third_party/boto/s3/key.py - Issue 12633019: Added boto/ to depot_tools/third_party

Side by Side Diff: third_party/boto/s3/key.py

Issue 12633019: Added boto/ to depot_tools/third_party (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Moved boto down by one Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/

	2 # Copyright (c) 2011, Nexenta Systems Inc.

	3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved

	4 #

	5 # Permission is hereby granted, free of charge, to any person obtaining a

	6 # copy of this software and associated documentation files (the

	7 # "Software"), to deal in the Software without restriction, including

	8 # without limitation the rights to use, copy, modify, merge, publish, dis-

	9 # tribute, sublicense, and/or sell copies of the Software, and to permit

	10 # persons to whom the Software is furnished to do so, subject to the fol-

	11 # lowing conditions:

	12 #

	13 # The above copyright notice and this permission notice shall be included

	14 # in all copies or substantial portions of the Software.

	15 #

	16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

	17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

	18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

	19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

	20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

	22 # IN THE SOFTWARE.

	23

	24 import mimetypes

	25 import os

	26 import re

	27 import rfc822

	28 import StringIO

	29 import base64

	30 import binascii

	31 import math

	32 import urllib

	33 import boto.utils

	34 from boto.exception import BotoClientError

	35 from boto.provider import Provider

	36 from boto.s3.keyfile import KeyFile

	37 from boto.s3.user import User

	38 from boto import UserAgent

	39 from boto.utils import compute_md5

	40 try:

	41 from hashlib import md5

	42 except ImportError:

	43 from md5 import md5

	44

	45

	46 class Key(object):

	47 """

	48 Represents a key (object) in an S3 bucket.

	49

	50 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.

	51 :ivar name: The name of this Key object.

	52 :ivar metadata: A dictionary containing user metadata that you

	53 wish to store with the object or that has been retrieved from

	54 an existing object.

	55 :ivar cache_control: The value of the `Cache-Control` HTTP header.

	56 :ivar content_type: The value of the `Content-Type` HTTP header.

	57 :ivar content_encoding: The value of the `Content-Encoding` HTTP header.

	58 :ivar content_disposition: The value of the `Content-Disposition` HTTP

	59 header.

	60 :ivar content_language: The value of the `Content-Language` HTTP header.

	61 :ivar etag: The `etag` associated with this object.

	62 :ivar last_modified: The string timestamp representing the last

	63 time this object was modified in S3.

	64 :ivar owner: The ID of the owner of this object.

	65 :ivar storage_class: The storage class of the object. Currently, one of:

	66 STANDARD \| REDUCED_REDUNDANCY \| GLACIER

	67 :ivar md5: The MD5 hash of the contents of the object.

	68 :ivar size: The size, in bytes, of the object.

	69 :ivar version_id: The version ID of this object, if it is a versioned

	70 object.

	71 :ivar encrypted: Whether the object is encrypted while at rest on

	72 the server.

	73 """

	74

	75 DefaultContentType = 'application/octet-stream'

	76

	77 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>

	78 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">

	79 <Days>%s</Days>

	80 </RestoreRequest>"""

	81

	82

	83 BufferSize = 8192

	84

	85 # The object metadata fields a user can set, other than custom metadata

	86 # fields (i.e., those beginning with a provider-specific prefix like

	87 # x-amz-meta).

	88 base_user_settable_fields = set(["cache-control", "content-disposition",

	89 "content-encoding", "content-language",

	90 "content-md5", "content-type"])

	91 _underscore_base_user_settable_fields = set()

	92 for f in base_user_settable_fields:

	93 _underscore_base_user_settable_fields.add(f.replace('-', '_'))

	94

	95

	96

	97 def __init__(self, bucket=None, name=None):

	98 self.bucket = bucket

	99 self.name = name

	100 self.metadata = {}

	101 self.cache_control = None

	102 self.content_type = self.DefaultContentType

	103 self.content_encoding = None

	104 self.content_disposition = None

	105 self.content_language = None

	106 self.filename = None

	107 self.etag = None

	108 self.is_latest = False

	109 self.last_modified = None

	110 self.owner = None

	111 self.storage_class = 'STANDARD'

	112 self.md5 = None

	113 self.base64md5 = None

	114 self.path = None

	115 self.resp = None

	116 self.mode = None

	117 self.size = None

	118 self.version_id = None

	119 self.source_version_id = None

	120 self.delete_marker = False

	121 self.encrypted = None

	122 # If the object is being restored, this attribute will be set to True.

	123 # If the object is restored, it will be set to False. Otherwise this

	124 # value will be None. If the restore is completed (ongoing_restore =

	125 # False), the expiry_date will be populated with the expiry date of the

	126 # restored object.

	127 self.ongoing_restore = None

	128 self.expiry_date = None

	129

	130 def __repr__(self):

	131 if self.bucket:

	132 return '<Key: %s,%s>' % (self.bucket.name, self.name)

	133 else:

	134 return '<Key: None,%s>' % self.name

	135

	136 def __getattr__(self, name):

	137 if name == 'key':

	138 return self.name

	139 else:

	140 raise AttributeError

	141

	142 def __setattr__(self, name, value):

	143 if name == 'key':

	144 self.__dict__['name'] = value

	145 else:

	146 self.__dict__[name] = value

	147

	148 def __iter__(self):

	149 return self

	150

	151 @property

	152 def provider(self):

	153 provider = None

	154 if self.bucket and self.bucket.connection:

	155 provider = self.bucket.connection.provider

	156 return provider

	157

	158 def get_md5_from_hexdigest(self, md5_hexdigest):

	159 """

	160 A utility function to create the 2-tuple (md5hexdigest, base64md5)

	161 from just having a precalculated md5_hexdigest.

	162 """

	163 digest = binascii.unhexlify(md5_hexdigest)

	164 base64md5 = base64.encodestring(digest)

	165 if base64md5[-1] == '\n':

	166 base64md5 = base64md5[0:-1]

	167 return (md5_hexdigest, base64md5)

	168

	169 def handle_encryption_headers(self, resp):

	170 provider = self.bucket.connection.provider

	171 if provider.server_side_encryption_header:

	172 self.encrypted = resp.getheader(provider.server_side_encryption_head er, None)

	173 else:

	174 self.encrypted = None

	175

	176 def handle_version_headers(self, resp, force=False):

	177 provider = self.bucket.connection.provider

	178 # If the Key object already has a version_id attribute value, it

	179 # means that it represents an explicit version and the user is

	180 # doing a get_contents_*(version_id=<foo>) to retrieve another

	181 # version of the Key. In that case, we don't really want to

	182 # overwrite the version_id in this Key object. Comprende?

	183 if self.version_id is None or force:

	184 self.version_id = resp.getheader(provider.version_id, None)

	185 self.source_version_id = resp.getheader(provider.copy_source_version_id,

	186 None)

	187 if resp.getheader(provider.delete_marker, 'false') == 'true':

	188 self.delete_marker = True

	189 else:

	190 self.delete_marker = False

	191

	192 def handle_restore_headers(self, response):

	193 header = response.getheader('x-amz-restore')

	194 if header is None:

	195 return

	196 parts = header.split(',', 1)

	197 for part in parts:

	198 key, val = [i.strip() for i in part.split('=')]

	199 val = val.replace('"', '')

	200 if key == 'ongoing-request':

	201 self.ongoing_restore = True if val.lower() == 'true' else False

	202 elif key == 'expiry-date':

	203 self.expiry_date = val

	204

	205 def open_read(self, headers=None, query_args='',

	206 override_num_retries=None, response_headers=None):

	207 """

	208 Open this key for reading

	209

	210 :type headers: dict

	211 :param headers: Headers to pass in the web request

	212

	213 :type query_args: string

	214 :param query_args: Arguments to pass in the query string

	215 (ie, 'torrent')

	216

	217 :type override_num_retries: int

	218 :param override_num_retries: If not None will override configured

	219 num_retries parameter for underlying GET.

	220

	221 :type response_headers: dict

	222 :param response_headers: A dictionary containing HTTP

	223 headers/values that will override any headers associated

	224 with the stored object in the response. See

	225 http://goo.gl/EWOPb for details.

	226 """

	227 if self.resp == None:

	228 self.mode = 'r'

	229

	230 provider = self.bucket.connection.provider

	231 self.resp = self.bucket.connection.make_request(

	232 'GET', self.bucket.name, self.name, headers,

	233 query_args=query_args,

	234 override_num_retries=override_num_retries)

	235 if self.resp.status < 199 or self.resp.status > 299:

	236 body = self.resp.read()

	237 raise provider.storage_response_error(self.resp.status,

	238 self.resp.reason, body)

	239 response_headers = self.resp.msg

	240 self.metadata = boto.utils.get_aws_metadata(response_headers,

	241 provider)

	242 for name, value in response_headers.items():

	243 # To get correct size for Range GETs, use Content-Range

	244 # header if one was returned. If not, use Content-Length

	245 # header.

	246 if (name.lower() == 'content-length' and

	247 'Content-Range' not in response_headers):

	248 self.size = int(value)

	249 elif name.lower() == 'content-range':

	250 end_range = re.sub('./(.)', '\\1', value)

	251 self.size = int(end_range)

	252 elif name.lower() == 'etag':

	253 self.etag = value

	254 elif name.lower() == 'content-type':

	255 self.content_type = value

	256 elif name.lower() == 'content-encoding':

	257 self.content_encoding = value

	258 elif name.lower() == 'content-language':

	259 self.content_language = value

	260 elif name.lower() == 'last-modified':

	261 self.last_modified = value

	262 elif name.lower() == 'cache-control':

	263 self.cache_control = value

	264 elif name.lower() == 'content-disposition':

	265 self.content_disposition = value

	266 self.handle_version_headers(self.resp)

	267 self.handle_encryption_headers(self.resp)

	268

	269 def open_write(self, headers=None, override_num_retries=None):

	270 """

	271 Open this key for writing.

	272 Not yet implemented

	273

	274 :type headers: dict

	275 :param headers: Headers to pass in the write request

	276

	277 :type override_num_retries: int

	278 :param override_num_retries: If not None will override configured

	279 num_retries parameter for underlying PUT.

	280 """

	281 raise BotoClientError('Not Implemented')

	282

	283 def open(self, mode='r', headers=None, query_args=None,

	284 override_num_retries=None):

	285 if mode == 'r':

	286 self.mode = 'r'

	287 self.open_read(headers=headers, query_args=query_args,

	288 override_num_retries=override_num_retries)

	289 elif mode == 'w':

	290 self.mode = 'w'

	291 self.open_write(headers=headers,

	292 override_num_retries=override_num_retries)

	293 else:

	294 raise BotoClientError('Invalid mode: %s' % mode)

	295

	296 closed = False

	297

	298 def close(self, fast=False):

	299 """

	300 Close this key.

	301

	302 :type fast: bool

	303 :param fast: True if you want the connection to be closed without first

	304 reading the content. This should only be used in cases where subsequent

	305 calls don't need to return the content from the open HTTP connection.

	306 Note: As explained at

	307 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.get response,

	308 callers must read the whole response before sending a new request to the

	309 server. Calling Key.close(fast=True) and making a subsequent request to

	310 the server will work because boto will get an httplib exception and

	311 close/reopen the connection.

	312

	313 """

	314 if self.resp and not fast:

	315 self.resp.read()

	316 self.resp = None

	317 self.mode = None

	318 self.closed = True

	319

	320 def next(self):

	321 """

	322 By providing a next method, the key object supports use as an iterator.

	323 For example, you can now say:

	324

	325 for bytes in key:

	326 write bytes to a file or whatever

	327

	328 All of the HTTP connection stuff is handled for you.

	329 """

	330 self.open_read()

	331 data = self.resp.read(self.BufferSize)

	332 if not data:

	333 self.close()

	334 raise StopIteration

	335 return data

	336

	337 def read(self, size=0):

	338 self.open_read()

	339 if size == 0:

	340 data = self.resp.read()

	341 else:

	342 data = self.resp.read(size)

	343 if not data:

	344 self.close()

	345 return data

	346

	347 def change_storage_class(self, new_storage_class, dst_bucket=None,

	348 validate_dst_bucket=True):

	349 """

	350 Change the storage class of an existing key.

	351 Depending on whether a different destination bucket is supplied

	352 or not, this will either move the item within the bucket, preserving

	353 all metadata and ACL info bucket changing the storage class or it

	354 will copy the item to the provided destination bucket, also

	355 preserving metadata and ACL info.

	356

	357 :type new_storage_class: string

	358 :param new_storage_class: The new storage class for the Key.

	359 Possible values are:

	360 * STANDARD

	361 * REDUCED_REDUNDANCY

	362

	363 :type dst_bucket: string

	364 :param dst_bucket: The name of a destination bucket. If not

	365 provided the current bucket of the key will be used.

	366

	367 :type validate_dst_bucket: bool

	368 :param validate_dst_bucket: If True, will validate the dst_bucket

	369 by using an extra list request.

	370 """

	371 if new_storage_class == 'STANDARD':

	372 return self.copy(self.bucket.name, self.name,

	373 reduced_redundancy=False, preserve_acl=True,

	374 validate_dst_bucket=validate_dst_bucket)

	375 elif new_storage_class == 'REDUCED_REDUNDANCY':

	376 return self.copy(self.bucket.name, self.name,

	377 reduced_redundancy=True, preserve_acl=True,

	378 validate_dst_bucket=validate_dst_bucket)

	379 else:

	380 raise BotoClientError('Invalid storage class: %s' %

	381 new_storage_class)

	382

	383 def copy(self, dst_bucket, dst_key, metadata=None,

	384 reduced_redundancy=False, preserve_acl=False,

	385 encrypt_key=False, validate_dst_bucket=True):

	386 """

	387 Copy this Key to another bucket.

	388

	389 :type dst_bucket: string

	390 :param dst_bucket: The name of the destination bucket

	391

	392 :type dst_key: string

	393 :param dst_key: The name of the destination key

	394

	395 :type metadata: dict

	396 :param metadata: Metadata to be associated with new key. If

	397 metadata is supplied, it will replace the metadata of the

	398 source key being copied. If no metadata is supplied, the

	399 source key's metadata will be copied to the new key.

	400

	401 :type reduced_redundancy: bool

	402 :param reduced_redundancy: If True, this will force the

	403 storage class of the new Key to be REDUCED_REDUNDANCY

	404 regardless of the storage class of the key being copied.

	405 The Reduced Redundancy Storage (RRS) feature of S3,

	406 provides lower redundancy at lower storage cost.

	407

	408 :type preserve_acl: bool

	409 :param preserve_acl: If True, the ACL from the source key will

	410 be copied to the destination key. If False, the

	411 destination key will have the default ACL. Note that

	412 preserving the ACL in the new key object will require two

	413 additional API calls to S3, one to retrieve the current

	414 ACL and one to set that ACL on the new object. If you

	415 don't care about the ACL, a value of False will be

	416 significantly more efficient.

	417

	418 :type encrypt_key: bool

	419 :param encrypt_key: If True, the new copy of the object will

	420 be encrypted on the server-side by S3 and will be stored

	421 in an encrypted form while at rest in S3.

	422

	423 :type validate_dst_bucket: bool

	424 :param validate_dst_bucket: If True, will validate the dst_bucket

	425 by using an extra list request.

	426

	427 :rtype: :class:`boto.s3.key.Key` or subclass

	428 :returns: An instance of the newly created key object

	429 """

	430 dst_bucket = self.bucket.connection.lookup(dst_bucket,

	431 validate_dst_bucket)

	432 if reduced_redundancy:

	433 storage_class = 'REDUCED_REDUNDANCY'

	434 else:

	435 storage_class = self.storage_class

	436 return dst_bucket.copy_key(dst_key, self.bucket.name,

	437 self.name, metadata,

	438 storage_class=storage_class,

	439 preserve_acl=preserve_acl,

	440 encrypt_key=encrypt_key)

	441

	442 def startElement(self, name, attrs, connection):

	443 if name == 'Owner':

	444 self.owner = User(self)

	445 return self.owner

	446 else:

	447 return None

	448

	449 def endElement(self, name, value, connection):

	450 if name == 'Key':

	451 self.name = value

	452 elif name == 'ETag':

	453 self.etag = value

	454 elif name == 'IsLatest':

	455 if value == 'true':

	456 self.is_latest = True

	457 else:

	458 self.is_latest = False

	459 elif name == 'LastModified':

	460 self.last_modified = value

	461 elif name == 'Size':

	462 self.size = int(value)

	463 elif name == 'StorageClass':

	464 self.storage_class = value

	465 elif name == 'Owner':

	466 pass

	467 elif name == 'VersionId':

	468 self.version_id = value

	469 else:

	470 setattr(self, name, value)

	471

	472 def exists(self):

	473 """

	474 Returns True if the key exists

	475

	476 :rtype: bool

	477 :return: Whether the key exists on S3

	478 """

	479 return bool(self.bucket.lookup(self.name))

	480

	481 def delete(self):

	482 """

	483 Delete this key from S3

	484 """

	485 return self.bucket.delete_key(self.name, version_id=self.version_id)

	486

	487 def get_metadata(self, name):

	488 return self.metadata.get(name)

	489

	490 def set_metadata(self, name, value):

	491 self.metadata[name] = value

	492

	493 def update_metadata(self, d):

	494 self.metadata.update(d)

	495

	496 # convenience methods for setting/getting ACL

	497 def set_acl(self, acl_str, headers=None):

	498 if self.bucket != None:

	499 self.bucket.set_acl(acl_str, self.name, headers=headers)

	500

	501 def get_acl(self, headers=None):

	502 if self.bucket != None:

	503 return self.bucket.get_acl(self.name, headers=headers)

	504

	505 def get_xml_acl(self, headers=None):

	506 if self.bucket != None:

	507 return self.bucket.get_xml_acl(self.name, headers=headers)

	508

	509 def set_xml_acl(self, acl_str, headers=None):

	510 if self.bucket != None:

	511 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)

	512

	513 def set_canned_acl(self, acl_str, headers=None):

	514 return self.bucket.set_canned_acl(acl_str, self.name, headers)

	515

	516 def get_redirect(self):

	517 """Return the redirect location configured for this key.

	518

	519 If no redirect is configured (via set_redirect), then None

	520 will be returned.

	521

	522 """

	523 response = self.bucket.connection.make_request(

	524 'HEAD', self.bucket.name, self.name)

	525 if response.status == 200:

	526 return response.getheader('x-amz-website-redirect-location')

	527 else:

	528 raise self.provider.storage_response_error(

	529 response.status, response.reason, response.read())

	530

	531 def set_redirect(self, redirect_location):

	532 """Configure this key to redirect to another location.

	533

	534 When the bucket associated with this key is accessed from the website

	535 endpoint, a 301 redirect will be issued to the specified

	536 `redirect_location`.

	537

	538 :type redirect_location: string

	539 :param redirect_location: The location to redirect.

	540

	541 """

	542 headers = {'x-amz-website-redirect-location': redirect_location}

	543 response = self.bucket.connection.make_request('PUT', self.bucket.name,

	544 self.name, headers)

	545 if response.status == 200:

	546 return True

	547 else:

	548 raise self.provider.storage_response_error(

	549 response.status, response.reason, response.read())

	550

	551 def make_public(self, headers=None):

	552 return self.bucket.set_canned_acl('public-read', self.name, headers)

	553

	554 def generate_url(self, expires_in, method='GET', headers=None,

	555 query_auth=True, force_http=False, response_headers=None,

	556 expires_in_absolute=False, version_id=None,

	557 policy=None, reduced_redundancy=False, encrypt_key=False):

	558 """

	559 Generate a URL to access this key.

	560

	561 :type expires_in: int

	562 :param expires_in: How long the url is valid for, in seconds

	563

	564 :type method: string

	565 :param method: The method to use for retrieving the file

	566 (default is GET)

	567

	568 :type headers: dict

	569 :param headers: Any headers to pass along in the request

	570

	571 :type query_auth: bool

	572 :param query_auth:

	573

	574 :type force_http: bool

	575 :param force_http: If True, http will be used instead of https.

	576

	577 :type response_headers: dict

	578 :param response_headers: A dictionary containing HTTP

	579 headers/values that will override any headers associated

	580 with the stored object in the response. See

	581 http://goo.gl/EWOPb for details.

	582

	583 :type expires_in_absolute: bool

	584 :param expires_in_absolute:

	585

	586 :type version_id: string

	587 :param version_id: The version_id of the object to GET. If specified

	588 this overrides any value in the key.

	589

	590 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	591 :param policy: A canned ACL policy that will be applied to the

	592 new key in S3.

	593

	594 :type reduced_redundancy: bool

	595 :param reduced_redundancy: If True, this will set the storage

	596 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	597 Redundancy Storage (RRS) feature of S3, provides lower

	598 redundancy at lower storage cost.

	599

	600 :type encrypt_key: bool

	601 :param encrypt_key: If True, the new copy of the object will

	602 be encrypted on the server-side by S3 and will be stored

	603 in an encrypted form while at rest in S3.

	604

	605 :rtype: string

	606 :return: The URL to access the key

	607 """

	608 provider = self.bucket.connection.provider

	609 version_id = version_id or self.version_id

	610 if headers is None:

	611 headers = {}

	612 else:

	613 headers = headers.copy()

	614

	615 # add headers accordingly (usually PUT case)

	616 if policy:

	617 headers[provider.acl_header] = policy

	618 if reduced_redundancy:

	619 self.storage_class = 'REDUCED_REDUNDANCY'

	620 if provider.storage_class_header:

	621 headers[provider.storage_class_header] = self.storage_class

	622 if encrypt_key:

	623 headers[provider.server_side_encryption_header] = 'AES256'

	624 headers = boto.utils.merge_meta(headers, self.metadata, provider)

	625

	626 return self.bucket.connection.generate_url(expires_in, method,

	627 self.bucket.name, self.name,

	628 headers, query_auth,

	629 force_http,

	630 response_headers,

	631 expires_in_absolute,

	632 version_id)

	633

	634 def send_file(self, fp, headers=None, cb=None, num_cb=10,

	635 query_args=None, chunked_transfer=False, size=None):

	636 """

	637 Upload a file to a key into a bucket on S3.

	638

	639 :type fp: file

	640 :param fp: The file pointer to upload. The file pointer must

	641 point point at the offset from which you wish to upload.

	642 ie. if uploading the full file, it should point at the

	643 start of the file. Normally when a file is opened for

	644 reading, the fp will point at the first byte. See the

	645 bytes parameter below for more info.

	646

	647 :type headers: dict

	648 :param headers: The headers to pass along with the PUT request

	649

	650 :type cb: function

	651 :param cb: a callback function that will be called to report

	652 progress on the upload. The callback should accept two

	653 integer parameters, the first representing the number of

	654 bytes that have been successfully transmitted to S3 and

	655 the second representing the size of the to be transmitted

	656 object.

	657

	658 :type num_cb: int

	659 :param num_cb: (optional) If a callback is specified with the

	660 cb parameter this parameter determines the granularity of

	661 the callback by defining the maximum number of times the

	662 callback will be called during the file

	663 transfer. Providing a negative integer will cause your

	664 callback to be called with each buffer read.

	665

	666 :type size: int

	667 :param size: (optional) The Maximum number of bytes to read

	668 from the file pointer (fp). This is useful when uploading

	669 a file in multiple parts where you are splitting the file

	670 up into different ranges to be uploaded. If not specified,

	671 the default behaviour is to read all bytes from the file

	672 pointer. Less bytes may be available.

	673 """

	674 provider = self.bucket.connection.provider

	675 try:

	676 spos = fp.tell()

	677 except IOError:

	678 spos = None

	679 self.read_from_stream = False

	680

	681 def sender(http_conn, method, path, data, headers):

	682 # This function is called repeatedly for temporary retries

	683 # so we must be sure the file pointer is pointing at the

	684 # start of the data.

	685 if spos is not None and spos != fp.tell():

	686 fp.seek(spos)

	687 elif spos is None and self.read_from_stream:

	688 # if seek is not supported, and we've read from this

	689 # stream already, then we need to abort retries to

	690 # avoid setting bad data.

	691 raise provider.storage_data_error(

	692 'Cannot retry failed request. fp does not support seeking.')

	693

	694 http_conn.putrequest(method, path)

	695 for key in headers:

	696 http_conn.putheader(key, headers[key])

	697 http_conn.endheaders()

	698

	699 # Calculate all MD5 checksums on the fly, if not already computed

	700 if not self.base64md5:

	701 m = md5()

	702 else:

	703 m = None

	704

	705 save_debug = self.bucket.connection.debug

	706 self.bucket.connection.debug = 0

	707 # If the debuglevel < 4 we don't want to show connection

	708 # payload, so turn off HTTP connection-level debug output (to

	709 # be restored below).

	710 # Use the getattr approach to allow this to work in AppEngine.

	711 if getattr(http_conn, 'debuglevel', 0) < 4:

	712 http_conn.set_debuglevel(0)

	713

	714 data_len = 0

	715 if cb:

	716 if size:

	717 cb_size = size

	718 elif self.size:

	719 cb_size = self.size

	720 else:

	721 cb_size = 0

	722 if chunked_transfer and cb_size == 0:

	723 # For chunked Transfer, we call the cb for every 1MB

	724 # of data transferred, except when we know size.

	725 cb_count = (1024 * 1024) / self.BufferSize

	726 elif num_cb > 1:

	727 cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))

	728 elif num_cb < 0:

	729 cb_count = -1

	730 else:

	731 cb_count = 0

	732 i = 0

	733 cb(data_len, cb_size)

	734

	735 bytes_togo = size

	736 if bytes_togo and bytes_togo < self.BufferSize:

	737 chunk = fp.read(bytes_togo)

	738 else:

	739 chunk = fp.read(self.BufferSize)

	740 if spos is None:

	741 # read at least something from a non-seekable fp.

	742 self.read_from_stream = True

	743 while chunk:

	744 chunk_len = len(chunk)

	745 data_len += chunk_len

	746 if chunked_transfer:

	747 http_conn.send('%x;\r\n' % chunk_len)

	748 http_conn.send(chunk)

	749 http_conn.send('\r\n')

	750 else:

	751 http_conn.send(chunk)

	752 if m:

	753 m.update(chunk)

	754 if bytes_togo:

	755 bytes_togo -= chunk_len

	756 if bytes_togo <= 0:

	757 break

	758 if cb:

	759 i += 1

	760 if i == cb_count or cb_count == -1:

	761 cb(data_len, cb_size)

	762 i = 0

	763 if bytes_togo and bytes_togo < self.BufferSize:

	764 chunk = fp.read(bytes_togo)

	765 else:

	766 chunk = fp.read(self.BufferSize)

	767

	768 self.size = data_len

	769

	770 if m:

	771 # Use the chunked trailer for the digest

	772 hd = m.hexdigest()

	773 self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)

	774

	775 if chunked_transfer:

	776 http_conn.send('0\r\n')

	777 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)

	778 http_conn.send('\r\n')

	779

	780 if cb and (cb_count <= 1 or i > 0) and data_len > 0:

	781 cb(data_len, cb_size)

	782

	783 http_conn.set_debuglevel(save_debug)

	784 self.bucket.connection.debug = save_debug

	785 response = http_conn.getresponse()

	786 body = response.read()

	787 if ((response.status == 500 or response.status == 503 or

	788 response.getheader('location')) and not chunked_transfer):

	789 # we'll try again.

	790 return response

	791 elif response.status >= 200 and response.status <= 299:

	792 self.etag = response.getheader('etag')

	793 if self.etag != '"%s"' % self.md5:

	794 raise provider.storage_data_error(

	795 'ETag from S3 did not match computed MD5')

	796 return response

	797 else:

	798 raise provider.storage_response_error(

	799 response.status, response.reason, body)

	800

	801 if not headers:

	802 headers = {}

	803 else:

	804 headers = headers.copy()

	805 headers['User-Agent'] = UserAgent

	806 if self.storage_class != 'STANDARD':

	807 headers[provider.storage_class_header] = self.storage_class

	808 if 'Content-Encoding' in headers:

	809 self.content_encoding = headers['Content-Encoding']

	810 if 'Content-Language' in headers:

	811 self.content_encoding = headers['Content-Language']

	812 if 'Content-Type' in headers:

	813 # Some use cases need to suppress sending of the Content-Type

	814 # header and depend on the receiving server to set the content

	815 # type. This can be achieved by setting headers['Content-Type']

	816 # to None when calling this method.

	817 if headers['Content-Type'] is None:

	818 # Delete null Content-Type value to skip sending that header.

	819 del headers['Content-Type']

	820 else:

	821 self.content_type = headers['Content-Type']

	822 elif self.path:

	823 self.content_type = mimetypes.guess_type(self.path)[0]

	824 if self.content_type == None:

	825 self.content_type = self.DefaultContentType

	826 headers['Content-Type'] = self.content_type

	827 else:

	828 headers['Content-Type'] = self.content_type

	829 if self.base64md5:

	830 headers['Content-MD5'] = self.base64md5

	831 if chunked_transfer:

	832 headers['Transfer-Encoding'] = 'chunked'

	833 #if not self.base64md5:

	834 # headers['Trailer'] = "Content-MD5"

	835 else:

	836 headers['Content-Length'] = str(self.size)

	837 headers['Expect'] = '100-Continue'

	838 headers = boto.utils.merge_meta(headers, self.metadata, provider)

	839 resp = self.bucket.connection.make_request('PUT', self.bucket.name,

	840 self.name, headers,

	841 sender=sender,

	842 query_args=query_args)

	843 self.handle_version_headers(resp, force=True)

	844

	845 def compute_md5(self, fp, size=None):

	846 """

	847 :type fp: file

	848 :param fp: File pointer to the file to MD5 hash. The file

	849 pointer will be reset to the same position before the

	850 method returns.

	851

	852 :type size: int

	853 :param size: (optional) The Maximum number of bytes to read

	854 from the file pointer (fp). This is useful when uploading

	855 a file in multiple parts where the file is being split

	856 inplace into different parts. Less bytes may be available.

	857

	858 :rtype: tuple

	859 :return: A tuple containing the hex digest version of the MD5

	860 hash as the first element and the base64 encoded version

	861 of the plain digest as the second element.

	862 """

	863 tup = compute_md5(fp, size=size)

	864 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.

	865 # The internal implementation of compute_md5() needs to return the

	866 # data size but we don't want to return that value to the external

	867 # caller because it changes the class interface (i.e. it might

	868 # break some code) so we consume the third tuple value here and

	869 # return the remainder of the tuple to the caller, thereby preserving

	870 # the existing interface.

	871 self.size = tup[2]

	872 return tup[0:2]

	873

	874 def set_contents_from_stream(self, fp, headers=None, replace=True,

	875 cb=None, num_cb=10, policy=None,

	876 reduced_redundancy=False, query_args=None,

	877 size=None):

	878 """

	879 Store an object using the name of the Key object as the key in

	880 cloud and the contents of the data stream pointed to by 'fp' as

	881 the contents.

	882

	883 The stream object is not seekable and total size is not known.

	884 This has the implication that we can't specify the

	885 Content-Size and Content-MD5 in the header. So for huge

	886 uploads, the delay in calculating MD5 is avoided but with a

	887 penalty of inability to verify the integrity of the uploaded

	888 data.

	889

	890 :type fp: file

	891 :param fp: the file whose contents are to be uploaded

	892

	893 :type headers: dict

	894 :param headers: additional HTTP headers to be sent with the

	895 PUT request.

	896

	897 :type replace: bool

	898 :param replace: If this parameter is False, the method will first check

	899 to see if an object exists in the bucket with the same key. If it

	900 does, it won't overwrite it. The default value is True which will

	901 overwrite the object.

	902

	903 :type cb: function

	904 :param cb: a callback function that will be called to report

	905 progress on the upload. The callback should accept two integer

	906 parameters, the first representing the number of bytes that have

	907 been successfully transmitted to GS and the second representing the

	908 total number of bytes that need to be transmitted.

	909

	910 :type num_cb: int

	911 :param num_cb: (optional) If a callback is specified with the

	912 cb parameter, this parameter determines the granularity of

	913 the callback by defining the maximum number of times the

	914 callback will be called during the file transfer.

	915

	916 :type policy: :class:`boto.gs.acl.CannedACLStrings`

	917 :param policy: A canned ACL policy that will be applied to the new key

	918 in GS.

	919

	920 :type reduced_redundancy: bool

	921 :param reduced_redundancy: If True, this will set the storage

	922 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	923 Redundancy Storage (RRS) feature of S3, provides lower

	924 redundancy at lower storage cost.

	925

	926 :type size: int

	927 :param size: (optional) The Maximum number of bytes to read from

	928 the file pointer (fp). This is useful when uploading a

	929 file in multiple parts where you are splitting the file up

	930 into different ranges to be uploaded. If not specified,

	931 the default behaviour is to read all bytes from the file

	932 pointer. Less bytes may be available.

	933 """

	934

	935 provider = self.bucket.connection.provider

	936 if not provider.supports_chunked_transfer():

	937 raise BotoClientError('%s does not support chunked transfer'

	938 % provider.get_provider_name())

	939

	940 # Name of the Object should be specified explicitly for Streams.

	941 if not self.name or self.name == '':

	942 raise BotoClientError('Cannot determine the destination '

	943 'object name for the given stream')

	944

	945 if headers is None:

	946 headers = {}

	947 if policy:

	948 headers[provider.acl_header] = policy

	949

	950 if reduced_redundancy:

	951 self.storage_class = 'REDUCED_REDUNDANCY'

	952 if provider.storage_class_header:

	953 headers[provider.storage_class_header] = self.storage_class

	954

	955 if self.bucket != None:

	956 if not replace:

	957 if self.bucket.lookup(self.name):

	958 return

	959 self.send_file(fp, headers, cb, num_cb, query_args,

	960 chunked_transfer=True, size=size)

	961

	962 def set_contents_from_file(self, fp, headers=None, replace=True,

	963 cb=None, num_cb=10, policy=None, md5=None,

	964 reduced_redundancy=False, query_args=None,

	965 encrypt_key=False, size=None, rewind=False):

	966 """

	967 Store an object in S3 using the name of the Key object as the

	968 key in S3 and the contents of the file pointed to by 'fp' as the

	969 contents. The data is read from 'fp' from its current position until

	970 'size' bytes have been read or EOF.

	971

	972 :type fp: file

	973 :param fp: the file whose contents to upload

	974

	975 :type headers: dict

	976 :param headers: Additional HTTP headers that will be sent with

	977 the PUT request.

	978

	979 :type replace: bool

	980 :param replace: If this parameter is False, the method will

	981 first check to see if an object exists in the bucket with

	982 the same key. If it does, it won't overwrite it. The

	983 default value is True which will overwrite the object.

	984

	985 :type cb: function

	986 :param cb: a callback function that will be called to report

	987 progress on the upload. The callback should accept two

	988 integer parameters, the first representing the number of

	989 bytes that have been successfully transmitted to S3 and

	990 the second representing the size of the to be transmitted

	991 object.

	992

	993 :type cb: int

	994 :param num_cb: (optional) If a callback is specified with the

	995 cb parameter this parameter determines the granularity of

	996 the callback by defining the maximum number of times the

	997 callback will be called during the file transfer.

	998

	999 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	1000 :param policy: A canned ACL policy that will be applied to the

	1001 new key in S3.

	1002

	1003 :type md5: A tuple containing the hexdigest version of the MD5

	1004 checksum of the file as the first element and the

	1005 Base64-encoded version of the plain checksum as the second

	1006 element. This is the same format returned by the

	1007 compute_md5 method.

	1008 :param md5: If you need to compute the MD5 for any reason

	1009 prior to upload, it's silly to have to do it twice so this

	1010 param, if present, will be used as the MD5 values of the

	1011 file. Otherwise, the checksum will be computed.

	1012

	1013 :type reduced_redundancy: bool

	1014 :param reduced_redundancy: If True, this will set the storage

	1015 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	1016 Redundancy Storage (RRS) feature of S3, provides lower

	1017 redundancy at lower storage cost.

	1018

	1019 :type encrypt_key: bool

	1020 :param encrypt_key: If True, the new copy of the object will

	1021 be encrypted on the server-side by S3 and will be stored

	1022 in an encrypted form while at rest in S3.

	1023

	1024 :type size: int

	1025 :param size: (optional) The Maximum number of bytes to read

	1026 from the file pointer (fp). This is useful when uploading

	1027 a file in multiple parts where you are splitting the file

	1028 up into different ranges to be uploaded. If not specified,

	1029 the default behaviour is to read all bytes from the file

	1030 pointer. Less bytes may be available.

	1031

	1032 :type rewind: bool

	1033 :param rewind: (optional) If True, the file pointer (fp) will

	1034 be rewound to the start before any bytes are read from

	1035 it. The default behaviour is False which reads from the

	1036 current position of the file pointer (fp).

	1037

	1038 :rtype: int

	1039 :return: The number of bytes written to the key.

	1040 """

	1041 provider = self.bucket.connection.provider

	1042 headers = headers or {}

	1043 if policy:

	1044 headers[provider.acl_header] = policy

	1045 if encrypt_key:

	1046 headers[provider.server_side_encryption_header] = 'AES256'

	1047

	1048 if rewind:

	1049 # caller requests reading from beginning of fp.

	1050 fp.seek(0, os.SEEK_SET)

	1051 else:

	1052 # The following seek/tell/seek logic is intended

	1053 # to detect applications using the older interface to

	1054 # set_contents_from_file(), which automatically rewound the

	1055 # file each time the Key was reused. This changed with commit

	1056 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads

	1057 # split into multiple parts and uploaded in parallel, and at

	1058 # the time of that commit this check was added because otherwise

	1059 # older programs would get a success status and upload an empty

	1060 # object. Unfortuantely, it's very inefficient for fp's implemented

	1061 # by KeyFile (used, for example, by gsutil when copying between

	1062 # providers). So, we skip the check for the KeyFile case.

	1063 # TODO: At some point consider removing this seek/tell/seek

	1064 # logic, after enough time has passed that it's unlikely any

	1065 # programs remain that assume the older auto-rewind interface.

	1066 if not isinstance(fp, KeyFile):

	1067 spos = fp.tell()

	1068 fp.seek(0, os.SEEK_END)

	1069 if fp.tell() == spos:

	1070 fp.seek(0, os.SEEK_SET)

	1071 if fp.tell() != spos:

	1072 # Raise an exception as this is likely a programming

	1073 # error whereby there is data before the fp but nothing

	1074 # after it.

	1075 fp.seek(spos)

	1076 raise AttributeError('fp is at EOF. Use rewind option '

	1077 'or seek() to data start.')

	1078 # seek back to the correct position.

	1079 fp.seek(spos)

	1080

	1081 if reduced_redundancy:

	1082 self.storage_class = 'REDUCED_REDUNDANCY'

	1083 if provider.storage_class_header:

	1084 headers[provider.storage_class_header] = self.storage_class

	1085 # TODO - What if provider doesn't support reduced reduncancy?

	1086 # What if different providers provide different classes?

	1087 if hasattr(fp, 'name'):

	1088 self.path = fp.name

	1089 if self.bucket != None:

	1090 if not md5 and provider.supports_chunked_transfer():

	1091 # defer md5 calculation to on the fly and

	1092 # we don't know anything about size yet.

	1093 chunked_transfer = True

	1094 self.size = None

	1095 else:

	1096 chunked_transfer = False

	1097 if isinstance(fp, KeyFile):

	1098 # Avoid EOF seek for KeyFile case as it's very inefficient.

	1099 key = fp.getkey()

	1100 size = key.size - fp.tell()

	1101 self.size = size

	1102 # At present both GCS and S3 use MD5 for the etag for

	1103 # non-multipart-uploaded objects. If the etag is 32 hex

	1104 # chars use it as an MD5, to avoid having to read the file

	1105 # twice while transferring.

	1106 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):

	1107 etag = key.etag.strip('"')

	1108 md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))

	1109 if not md5:

	1110 # compute_md5() and also set self.size to actual

	1111 # size of the bytes read computing the md5.

	1112 md5 = self.compute_md5(fp, size)

	1113 # adjust size if required

	1114 size = self.size

	1115 elif size:

	1116 self.size = size

	1117 else:

	1118 # If md5 is provided, still need to size so

	1119 # calculate based on bytes to end of content

	1120 spos = fp.tell()

	1121 fp.seek(0, os.SEEK_END)

	1122 self.size = fp.tell() - spos

	1123 fp.seek(spos)

	1124 size = self.size

	1125 self.md5 = md5[0]

	1126 self.base64md5 = md5[1]

	1127

	1128 if self.name == None:

	1129 self.name = self.md5

	1130 if not replace:

	1131 if self.bucket.lookup(self.name):

	1132 return

	1133

	1134 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,

	1135 query_args=query_args,

	1136 chunked_transfer=chunked_transfer, size=size)

	1137 # return number of bytes written.

	1138 return self.size

	1139

	1140 def set_contents_from_filename(self, filename, headers=None, replace=True,

	1141 cb=None, num_cb=10, policy=None, md5=None,

	1142 reduced_redundancy=False,

	1143 encrypt_key=False):

	1144 """

	1145 Store an object in S3 using the name of the Key object as the

	1146 key in S3 and the contents of the file named by 'filename'.

	1147 See set_contents_from_file method for details about the

	1148 parameters.

	1149

	1150 :type filename: string

	1151 :param filename: The name of the file that you want to put onto S3

	1152

	1153 :type headers: dict

	1154 :param headers: Additional headers to pass along with the

	1155 request to AWS.

	1156

	1157 :type replace: bool

	1158 :param replace: If True, replaces the contents of the file

	1159 if it already exists.

	1160

	1161 :type cb: function

	1162 :param cb: a callback function that will be called to report

	1163 progress on the upload. The callback should accept two

	1164 integer parameters, the first representing the number of

	1165 bytes that have been successfully transmitted to S3 and

	1166 the second representing the size of the to be transmitted

	1167 object.

	1168

	1169 :type cb: int

	1170 :param num_cb: (optional) If a callback is specified with the

	1171 cb parameter this parameter determines the granularity of

	1172 the callback by defining the maximum number of times the

	1173 callback will be called during the file transfer.

	1174

	1175 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	1176 :param policy: A canned ACL policy that will be applied to the

	1177 new key in S3.

	1178

	1179 :type md5: A tuple containing the hexdigest version of the MD5

	1180 checksum of the file as the first element and the

	1181 Base64-encoded version of the plain checksum as the second

	1182 element. This is the same format returned by the

	1183 compute_md5 method.

	1184 :param md5: If you need to compute the MD5 for any reason

	1185 prior to upload, it's silly to have to do it twice so this

	1186 param, if present, will be used as the MD5 values of the

	1187 file. Otherwise, the checksum will be computed.

	1188

	1189 :type reduced_redundancy: bool

	1190 :param reduced_redundancy: If True, this will set the storage

	1191 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	1192 Redundancy Storage (RRS) feature of S3, provides lower

	1193 redundancy at lower storage cost. :type encrypt_key: bool

	1194 :param encrypt_key: If True, the new copy of the object

	1195 will be encrypted on the server-side by S3 and will be

	1196 stored in an encrypted form while at rest in S3.

	1197 """

	1198 fp = open(filename, 'rb')

	1199 try:

	1200 self.set_contents_from_file(fp, headers, replace, cb, num_cb,

	1201 policy, md5, reduced_redundancy,

	1202 encrypt_key=encrypt_key)

	1203 finally:

	1204 fp.close()

	1205

	1206 def set_contents_from_string(self, s, headers=None, replace=True,

	1207 cb=None, num_cb=10, policy=None, md5=None,

	1208 reduced_redundancy=False,

	1209 encrypt_key=False):

	1210 """

	1211 Store an object in S3 using the name of the Key object as the

	1212 key in S3 and the string 's' as the contents.

	1213 See set_contents_from_file method for details about the

	1214 parameters.

	1215

	1216 :type headers: dict

	1217 :param headers: Additional headers to pass along with the

	1218 request to AWS.

	1219

	1220 :type replace: bool

	1221 :param replace: If True, replaces the contents of the file if

	1222 it already exists.

	1223

	1224 :type cb: function

	1225 :param cb: a callback function that will be called to report

	1226 progress on the upload. The callback should accept two

	1227 integer parameters, the first representing the number of

	1228 bytes that have been successfully transmitted to S3 and

	1229 the second representing the size of the to be transmitted

	1230 object.

	1231

	1232 :type cb: int

	1233 :param num_cb: (optional) If a callback is specified with the

	1234 cb parameter this parameter determines the granularity of

	1235 the callback by defining the maximum number of times the

	1236 callback will be called during the file transfer.

	1237

	1238 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	1239 :param policy: A canned ACL policy that will be applied to the

	1240 new key in S3.

	1241

	1242 :type md5: A tuple containing the hexdigest version of the MD5

	1243 checksum of the file as the first element and the

	1244 Base64-encoded version of the plain checksum as the second

	1245 element. This is the same format returned by the

	1246 compute_md5 method.

	1247 :param md5: If you need to compute the MD5 for any reason

	1248 prior to upload, it's silly to have to do it twice so this

	1249 param, if present, will be used as the MD5 values of the

	1250 file. Otherwise, the checksum will be computed.

	1251

	1252 :type reduced_redundancy: bool

	1253 :param reduced_redundancy: If True, this will set the storage

	1254 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	1255 Redundancy Storage (RRS) feature of S3, provides lower

	1256 redundancy at lower storage cost.

	1257

	1258 :type encrypt_key: bool

	1259 :param encrypt_key: If True, the new copy of the object will

	1260 be encrypted on the server-side by S3 and will be stored

	1261 in an encrypted form while at rest in S3.

	1262 """

	1263 if isinstance(s, unicode):

	1264 s = s.encode("utf-8")

	1265 fp = StringIO.StringIO(s)

	1266 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,

	1267 policy, md5, reduced_redundancy,

	1268 encrypt_key=encrypt_key)

	1269 fp.close()

	1270 return r

	1271

	1272 def get_file(self, fp, headers=None, cb=None, num_cb=10,

	1273 torrent=False, version_id=None, override_num_retries=None,

	1274 response_headers=None):

	1275 """

	1276 Retrieves a file from an S3 Key

	1277

	1278 :type fp: file

	1279 :param fp: File pointer to put the data into

	1280

	1281 :type headers: string

	1282 :param: headers to send when retrieving the files

	1283

	1284 :type cb: function

	1285 :param cb: a callback function that will be called to report

	1286 progress on the upload. The callback should accept two

	1287 integer parameters, the first representing the number of

	1288 bytes that have been successfully transmitted to S3 and

	1289 the second representing the size of the to be transmitted

	1290 object.

	1291

	1292 :type cb: int

	1293 :param num_cb: (optional) If a callback is specified with the

	1294 cb parameter this parameter determines the granularity of

	1295 the callback by defining the maximum number of times the

	1296 callback will be called during the file transfer.

	1297

	1298 :type torrent: bool

	1299 :param torrent: Flag for whether to get a torrent for the file

	1300

	1301 :type override_num_retries: int

	1302 :param override_num_retries: If not None will override configured

	1303 num_retries parameter for underlying GET.

	1304

	1305 :type response_headers: dict

	1306 :param response_headers: A dictionary containing HTTP

	1307 headers/values that will override any headers associated

	1308 with the stored object in the response. See

	1309 http://goo.gl/EWOPb for details.

	1310 """

	1311 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,

	1312 torrent=torrent, version_id=version_id,

	1313 override_num_retries=override_num_retries,

	1314 response_headers=response_headers,

	1315 query_args=None)

	1316

	1317 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,

	1318 torrent=False, version_id=None, override_num_retries=None,

	1319 response_headers=None, query_args=None):

	1320 if headers is None:

	1321 headers = {}

	1322 save_debug = self.bucket.connection.debug

	1323 if self.bucket.connection.debug == 1:

	1324 self.bucket.connection.debug = 0

	1325

	1326 query_args = query_args or []

	1327 if torrent:

	1328 query_args.append('torrent')

	1329 m = None

	1330 else:

	1331 m = md5()

	1332 # If a version_id is passed in, use that. If not, check to see

	1333 # if the Key object has an explicit version_id and, if so, use that.

	1334 # Otherwise, don't pass a version_id query param.

	1335 if version_id is None:

	1336 version_id = self.version_id

	1337 if version_id:

	1338 query_args.append('versionId=%s' % version_id)

	1339 if response_headers:

	1340 for key in response_headers:

	1341 query_args.append('%s=%s' % (key, urllib.quote(response_headers[ key])))

	1342 query_args = '&'.join(query_args)

	1343 self.open('r', headers, query_args=query_args,

	1344 override_num_retries=override_num_retries)

	1345

	1346 data_len = 0

	1347 if cb:

	1348 if self.size is None:

	1349 cb_size = 0

	1350 else:

	1351 cb_size = self.size

	1352 if self.size is None and num_cb != -1:

	1353 # If size is not available due to chunked transfer for example,

	1354 # we'll call the cb for every 1MB of data transferred.

	1355 cb_count = (1024 * 1024) / self.BufferSize

	1356 elif num_cb > 1:

	1357 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))

	1358 elif num_cb < 0:

	1359 cb_count = -1

	1360 else:

	1361 cb_count = 0

	1362 i = 0

	1363 cb(data_len, cb_size)

	1364 for bytes in self:

	1365 fp.write(bytes)

	1366 data_len += len(bytes)

	1367 if m:

	1368 m.update(bytes)

	1369 if cb:

	1370 if cb_size > 0 and data_len >= cb_size:

	1371 break

	1372 i += 1

	1373 if i == cb_count or cb_count == -1:

	1374 cb(data_len, cb_size)

	1375 i = 0

	1376 if cb and (cb_count <= 1 or i > 0) and data_len > 0:

	1377 cb(data_len, cb_size)

	1378 if m:

	1379 self.md5 = m.hexdigest()

	1380 if self.size is None and not torrent and "Range" not in headers:

	1381 self.size = data_len

	1382 self.close()

	1383 self.bucket.connection.debug = save_debug

	1384

	1385 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):

	1386 """

	1387 Get a torrent file (see to get_file)

	1388

	1389 :type fp: file

	1390 :param fp: The file pointer of where to put the torrent

	1391

	1392 :type headers: dict

	1393 :param headers: Headers to be passed

	1394

	1395 :type cb: function

	1396 :param cb: a callback function that will be called to report

	1397 progress on the upload. The callback should accept two

	1398 integer parameters, the first representing the number of

	1399 bytes that have been successfully transmitted to S3 and

	1400 the second representing the size of the to be transmitted

	1401 object.

	1402

	1403 :type cb: int

	1404 :param num_cb: (optional) If a callback is specified with the

	1405 cb parameter this parameter determines the granularity of

	1406 the callback by defining the maximum number of times the

	1407 callback will be called during the file transfer.

	1408

	1409 """

	1410 return self.get_file(fp, headers, cb, num_cb, torrent=True)

	1411

	1412 def get_contents_to_file(self, fp, headers=None,

	1413 cb=None, num_cb=10,

	1414 torrent=False,

	1415 version_id=None,

	1416 res_download_handler=None,

	1417 response_headers=None):

	1418 """

	1419 Retrieve an object from S3 using the name of the Key object as the

	1420 key in S3. Write the contents of the object to the file pointed

	1421 to by 'fp'.

	1422

	1423 :type fp: File -like object

	1424 :param fp:

	1425

	1426 :type headers: dict

	1427 :param headers: additional HTTP headers that will be sent with

	1428 the GET request.

	1429

	1430 :type cb: function

	1431 :param cb: a callback function that will be called to report

	1432 progress on the upload. The callback should accept two

	1433 integer parameters, the first representing the number of

	1434 bytes that have been successfully transmitted to S3 and

	1435 the second representing the size of the to be transmitted

	1436 object.

	1437

	1438 :type cb: int

	1439 :param num_cb: (optional) If a callback is specified with the

	1440 cb parameter this parameter determines the granularity of

	1441 the callback by defining the maximum number of times the

	1442 callback will be called during the file transfer.

	1443

	1444 :type torrent: bool

	1445 :param torrent: If True, returns the contents of a torrent

	1446 file as a string.

	1447

	1448 :type res_upload_handler: ResumableDownloadHandler

	1449 :param res_download_handler: If provided, this handler will

	1450 perform the download.

	1451

	1452 :type response_headers: dict

	1453 :param response_headers: A dictionary containing HTTP

	1454 headers/values that will override any headers associated

	1455 with the stored object in the response. See

	1456 http://goo.gl/EWOPb for details.

	1457 """

	1458 if self.bucket != None:

	1459 if res_download_handler:

	1460 res_download_handler.get_file(self, fp, headers, cb, num_cb,

	1461 torrent=torrent,

	1462 version_id=version_id)

	1463 else:

	1464 self.get_file(fp, headers, cb, num_cb, torrent=torrent,

	1465 version_id=version_id,

	1466 response_headers=response_headers)

	1467

	1468 def get_contents_to_filename(self, filename, headers=None,

	1469 cb=None, num_cb=10,

	1470 torrent=False,

	1471 version_id=None,

	1472 res_download_handler=None,

	1473 response_headers=None):

	1474 """

	1475 Retrieve an object from S3 using the name of the Key object as the

	1476 key in S3. Store contents of the object to a file named by 'filename'.

	1477 See get_contents_to_file method for details about the

	1478 parameters.

	1479

	1480 :type filename: string

	1481 :param filename: The filename of where to put the file contents

	1482

	1483 :type headers: dict

	1484 :param headers: Any additional headers to send in the request

	1485

	1486 :type cb: function

	1487 :param cb: a callback function that will be called to report

	1488 progress on the upload. The callback should accept two

	1489 integer parameters, the first representing the number of

	1490 bytes that have been successfully transmitted to S3 and

	1491 the second representing the size of the to be transmitted

	1492 object.

	1493

	1494 :type cb: int

	1495 :param num_cb: (optional) If a callback is specified with the

	1496 cb parameter this parameter determines the granularity of

	1497 the callback by defining the maximum number of times the

	1498 callback will be called during the file transfer.

	1499

	1500 :type torrent: bool

	1501 :param torrent: If True, returns the contents of a torrent file

	1502 as a string.

	1503

	1504 :type res_upload_handler: ResumableDownloadHandler

	1505 :param res_download_handler: If provided, this handler will

	1506 perform the download.

	1507

	1508 :type response_headers: dict

	1509 :param response_headers: A dictionary containing HTTP

	1510 headers/values that will override any headers associated

	1511 with the stored object in the response. See

	1512 http://goo.gl/EWOPb for details.

	1513 """

	1514 fp = open(filename, 'wb')

	1515 try:

	1516 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,

	1517 version_id=version_id,

	1518 res_download_handler=res_download_handler,

	1519 response_headers=response_headers)

	1520 except Exception:

	1521 os.remove(filename)

	1522 raise

	1523 finally:

	1524 fp.close()

	1525 # if last_modified date was sent from s3, try to set file's timestamp

	1526 if self.last_modified != None:

	1527 try:

	1528 modified_tuple = rfc822.parsedate_tz(self.last_modified)

	1529 modified_stamp = int(rfc822.mktime_tz(modified_tuple))

	1530 os.utime(fp.name, (modified_stamp, modified_stamp))

	1531 except Exception:

	1532 pass

	1533

	1534 def get_contents_as_string(self, headers=None,

	1535 cb=None, num_cb=10,

	1536 torrent=False,

	1537 version_id=None,

	1538 response_headers=None):

	1539 """

	1540 Retrieve an object from S3 using the name of the Key object as the

	1541 key in S3. Return the contents of the object as a string.

	1542 See get_contents_to_file method for details about the

	1543 parameters.

	1544

	1545 :type headers: dict

	1546 :param headers: Any additional headers to send in the request

	1547

	1548 :type cb: function

	1549 :param cb: a callback function that will be called to report

	1550 progress on the upload. The callback should accept two

	1551 integer parameters, the first representing the number of

	1552 bytes that have been successfully transmitted to S3 and

	1553 the second representing the size of the to be transmitted

	1554 object.

	1555

	1556 :type cb: int

	1557 :param num_cb: (optional) If a callback is specified with the

	1558 cb parameter this parameter determines the granularity of

	1559 the callback by defining the maximum number of times the

	1560 callback will be called during the file transfer.

	1561

	1562 :type torrent: bool

	1563 :param torrent: If True, returns the contents of a torrent file

	1564 as a string.

	1565

	1566 :type response_headers: dict

	1567 :param response_headers: A dictionary containing HTTP

	1568 headers/values that will override any headers associated

	1569 with the stored object in the response. See

	1570 http://goo.gl/EWOPb for details.

	1571

	1572 :rtype: string

	1573 :returns: The contents of the file as a string

	1574 """

	1575 fp = StringIO.StringIO()

	1576 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,

	1577 version_id=version_id,

	1578 response_headers=response_headers)

	1579 return fp.getvalue()

	1580

	1581 def add_email_grant(self, permission, email_address, headers=None):

	1582 """

	1583 Convenience method that provides a quick way to add an email grant

	1584 to a key. This method retrieves the current ACL, creates a new

	1585 grant based on the parameters passed in, adds that grant to the ACL

	1586 and then PUT's the new ACL back to S3.

	1587

	1588 :type permission: string

	1589 :param permission: The permission being granted. Should be one of:

	1590 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).

	1591

	1592 :type email_address: string

	1593 :param email_address: The email address associated with the AWS

	1594 account your are granting the permission to.

	1595

	1596 :type recursive: boolean

	1597 :param recursive: A boolean value to controls whether the

	1598 command will apply the grant to all keys within the bucket

	1599 or not. The default value is False. By passing a True

	1600 value, the call will iterate through all keys in the

	1601 bucket and apply the same grant to each key. CAUTION: If

	1602 you have a lot of keys, this could take a long time!

	1603 """

	1604 policy = self.get_acl(headers=headers)

	1605 policy.acl.add_email_grant(permission, email_address)

	1606 self.set_acl(policy, headers=headers)

	1607

	1608 def add_user_grant(self, permission, user_id, headers=None,

	1609 display_name=None):

	1610 """

	1611 Convenience method that provides a quick way to add a canonical

	1612 user grant to a key. This method retrieves the current ACL,

	1613 creates a new grant based on the parameters passed in, adds that

	1614 grant to the ACL and then PUT's the new ACL back to S3.

	1615

	1616 :type permission: string

	1617 :param permission: The permission being granted. Should be one of:

	1618 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).

	1619

	1620 :type user_id: string

	1621 :param user_id: The canonical user id associated with the AWS

	1622 account your are granting the permission to.

	1623

	1624 :type display_name: string

	1625 :param display_name: An option string containing the user's

	1626 Display Name. Only required on Walrus.

	1627 """

	1628 policy = self.get_acl(headers=headers)

	1629 policy.acl.add_user_grant(permission, user_id,

	1630 display_name=display_name)

	1631 self.set_acl(policy, headers=headers)

	1632

	1633 def _normalize_metadata(self, metadata):

	1634 if type(metadata) == set:

	1635 norm_metadata = set()

	1636 for k in metadata:

	1637 norm_metadata.add(k.lower())

	1638 else:

	1639 norm_metadata = {}

	1640 for k in metadata:

	1641 norm_metadata[k.lower()] = metadata[k]

	1642 return norm_metadata

	1643

	1644 def _get_remote_metadata(self, headers=None):

	1645 """

	1646 Extracts metadata from existing URI into a dict, so we can

	1647 overwrite/delete from it to form the new set of metadata to apply to a

	1648 key.

	1649 """

	1650 metadata = {}

	1651 for underscore_name in self._underscore_base_user_settable_fields:

	1652 if hasattr(self, underscore_name):

	1653 value = getattr(self, underscore_name)

	1654 if value:

	1655 # Generate HTTP field name corresponding to "_" named field.

	1656 field_name = underscore_name.replace('_', '-')

	1657 metadata[field_name.lower()] = value

	1658 # self.metadata contains custom metadata, which are all user-settable.

	1659 prefix = self.provider.metadata_prefix

	1660 for underscore_name in self.metadata:

	1661 field_name = underscore_name.replace('_', '-')

	1662 metadata['%s%s' % (prefix, field_name.lower())] = (

	1663 self.metadata[underscore_name])

	1664 return metadata

	1665

	1666 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,

	1667 headers=None):

	1668 metadata_plus = self._normalize_metadata(metadata_plus)

	1669 metadata_minus = self._normalize_metadata(metadata_minus)

	1670 metadata = self._get_remote_metadata()

	1671 metadata.update(metadata_plus)

	1672 for h in metadata_minus:

	1673 if h in metadata:

	1674 del metadata[h]

	1675 src_bucket = self.bucket

	1676 # Boto prepends the meta prefix when adding headers, so strip prefix in

	1677 # metadata before sending back in to copy_key() call.

	1678 rewritten_metadata = {}

	1679 for h in metadata:

	1680 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):

	1681 rewritten_h = (h.replace('x-goog-meta-', '')

	1682 .replace('x-amz-meta-', ''))

	1683 else:

	1684 rewritten_h = h

	1685 rewritten_metadata[rewritten_h] = metadata[h]

	1686 metadata = rewritten_metadata

	1687 src_bucket.copy_key(self.name, self.bucket.name, self.name,

	1688 metadata=metadata, preserve_acl=preserve_acl,

	1689 headers=headers)

	1690

	1691 def restore(self, days, headers=None):

	1692 """Restore an object from an archive.

	1693

	1694 :type days: int

	1695 :param days: The lifetime of the restored object (must

	1696 be at least 1 day). If the object is already restored

	1697 then this parameter can be used to readjust the lifetime

	1698 of the restored object. In this case, the days

	1699 param is with respect to the initial time of the request.

	1700 If the object has not been restored, this param is with

	1701 respect to the completion time of the request.

	1702

	1703 """

	1704 response = self.bucket.connection.make_request(

	1705 'POST', self.bucket.name, self.name,

	1706 data=self.RestoreBody % days,

	1707 headers=headers, query_args='restore')

	1708 if response.status not in (200, 202):

	1709 provider = self.bucket.connection.provider

	1710 raise provider.storage_response_error(response.status,

	1711 response.reason,

	1712 response.read())

OLD	NEW

« no previous file with comments | « third_party/boto/s3/deletemarker.py ('k') | third_party/boto/s3/keyfile.py » ('j') | no next file with comments »