third_party/gsutil/boto/s3/key.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Side by Side Diff: third_party/gsutil/boto/s3/key.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/

	2 # Copyright (c) 2011, Nexenta Systems Inc.

	3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved

	4 #

	5 # Permission is hereby granted, free of charge, to any person obtaining a

	6 # copy of this software and associated documentation files (the

	7 # "Software"), to deal in the Software without restriction, including

	8 # without limitation the rights to use, copy, modify, merge, publish, dis-

	9 # tribute, sublicense, and/or sell copies of the Software, and to permit

	10 # persons to whom the Software is furnished to do so, subject to the fol-

	11 # lowing conditions:

	12 #

	13 # The above copyright notice and this permission notice shall be included

	14 # in all copies or substantial portions of the Software.

	15 #

	16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

	17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

	18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

	19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

	20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

	22 # IN THE SOFTWARE.

	23

	24 import mimetypes

	25 import os

	26 import re

	27 import rfc822

	28 import StringIO

	29 import base64

	30 import math

	31 import urllib

	32 import boto.utils

	33 from boto.exception import BotoClientError

	34 from boto.provider import Provider

	35 from boto.s3.user import User

	36 from boto import UserAgent

	37 from boto.utils import compute_md5

	38 try:

	39 from hashlib import md5

	40 except ImportError:

	41 from md5 import md5

	42

	43

	44 class Key(object):

	45 """

	46 Represents a key (object) in an S3 bucket.

	47

	48 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.

	49 :ivar name: The name of this Key object.

	50 :ivar metadata: A dictionary containing user metadata that you

	51 wish to store with the object or that has been retrieved from

	52 an existing object.

	53 :ivar cache_control: The value of the `Cache-Control` HTTP header.

	54 :ivar content_type: The value of the `Content-Type` HTTP header.

	55 :ivar content_encoding: The value of the `Content-Encoding` HTTP header.

	56 :ivar content_disposition: The value of the `Content-Disposition` HTTP

	57 header.

	58 :ivar content_language: The value of the `Content-Language` HTTP header.

	59 :ivar etag: The `etag` associated with this object.

	60 :ivar last_modified: The string timestamp representing the last

	61 time this object was modified in S3.

	62 :ivar owner: The ID of the owner of this object.

	63 :ivar storage_class: The storage class of the object. Currently, one of:

	64 STANDARD \| REDUCED_REDUNDANCY \| GLACIER

	65 :ivar md5: The MD5 hash of the contents of the object.

	66 :ivar size: The size, in bytes, of the object.

	67 :ivar version_id: The version ID of this object, if it is a versioned

	68 object.

	69 :ivar encrypted: Whether the object is encrypted while at rest on

	70 the server.

	71 """

	72

	73 DefaultContentType = 'application/octet-stream'

	74

	75 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>

	76 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">

	77 <Days>%s</Days>

	78 </RestoreRequest>"""

	79

	80

	81 BufferSize = 8192

	82

	83 # The object metadata fields a user can set, other than custom metadata

	84 # fields (i.e., those beginning with a provider-specific prefix like

	85 # x-amz-meta).

	86 base_user_settable_fields = set(["cache-control", "content-disposition",

	87 "content-encoding", "content-language",

	88 "content-md5", "content-type"])

	89 _underscore_base_user_settable_fields = set()

	90 for f in base_user_settable_fields:

	91 _underscore_base_user_settable_fields.add(f.replace('-', '_'))

	92

	93

	94

	95 def __init__(self, bucket=None, name=None):

	96 self.bucket = bucket

	97 self.name = name

	98 self.metadata = {}

	99 self.cache_control = None

	100 self.content_type = self.DefaultContentType

	101 self.content_encoding = None

	102 self.content_disposition = None

	103 self.content_language = None

	104 self.filename = None

	105 self.etag = None

	106 self.is_latest = False

	107 self.last_modified = None

	108 self.owner = None

	109 self.storage_class = 'STANDARD'

	110 self.md5 = None

	111 self.base64md5 = None

	112 self.path = None

	113 self.resp = None

	114 self.mode = None

	115 self.size = None

	116 self.version_id = None

	117 self.source_version_id = None

	118 self.delete_marker = False

	119 self.encrypted = None

	120

	121 def __repr__(self):

	122 if self.bucket:

	123 return '<Key: %s,%s>' % (self.bucket.name, self.name)

	124 else:

	125 return '<Key: None,%s>' % self.name

	126

	127 def __getattr__(self, name):

	128 if name == 'key':

	129 return self.name

	130 else:

	131 raise AttributeError

	132

	133 def __setattr__(self, name, value):

	134 if name == 'key':

	135 self.__dict__['name'] = value

	136 else:

	137 self.__dict__[name] = value

	138

	139 def __iter__(self):

	140 return self

	141

	142 @property

	143 def provider(self):

	144 provider = None

	145 if self.bucket:

	146 if self.bucket.connection:

	147 provider = self.bucket.connection.provider

	148 return provider

	149

	150 def get_md5_from_hexdigest(self, md5_hexdigest):

	151 """

	152 A utility function to create the 2-tuple (md5hexdigest, base64md5)

	153 from just having a precalculated md5_hexdigest.

	154 """

	155 import binascii

	156 digest = binascii.unhexlify(md5_hexdigest)

	157 base64md5 = base64.encodestring(digest)

	158 if base64md5[-1] == '\n':

	159 base64md5 = base64md5[0:-1]

	160 return (md5_hexdigest, base64md5)

	161

	162 def handle_encryption_headers(self, resp):

	163 provider = self.bucket.connection.provider

	164 if provider.server_side_encryption_header:

	165 self.encrypted = resp.getheader(provider.server_side_encryption_head er, None)

	166 else:

	167 self.encrypted = None

	168

	169 def handle_version_headers(self, resp, force=False):

	170 provider = self.bucket.connection.provider

	171 # If the Key object already has a version_id attribute value, it

	172 # means that it represents an explicit version and the user is

	173 # doing a get_contents_*(version_id=<foo>) to retrieve another

	174 # version of the Key. In that case, we don't really want to

	175 # overwrite the version_id in this Key object. Comprende?

	176 if self.version_id is None or force:

	177 self.version_id = resp.getheader(provider.version_id, None)

	178 self.source_version_id = resp.getheader(provider.copy_source_version_id,

	179 None)

	180 if resp.getheader(provider.delete_marker, 'false') == 'true':

	181 self.delete_marker = True

	182 else:

	183 self.delete_marker = False

	184

	185 def open_read(self, headers=None, query_args='',

	186 override_num_retries=None, response_headers=None):

	187 """

	188 Open this key for reading

	189

	190 :type headers: dict

	191 :param headers: Headers to pass in the web request

	192

	193 :type query_args: string

	194 :param query_args: Arguments to pass in the query string

	195 (ie, 'torrent')

	196

	197 :type override_num_retries: int

	198 :param override_num_retries: If not None will override configured

	199 num_retries parameter for underlying GET.

	200

	201 :type response_headers: dict

	202 :param response_headers: A dictionary containing HTTP

	203 headers/values that will override any headers associated

	204 with the stored object in the response. See

	205 http://goo.gl/EWOPb for details.

	206 """

	207 if self.resp == None:

	208 self.mode = 'r'

	209

	210 provider = self.bucket.connection.provider

	211 self.resp = self.bucket.connection.make_request(

	212 'GET', self.bucket.name, self.name, headers,

	213 query_args=query_args,

	214 override_num_retries=override_num_retries)

	215 if self.resp.status < 199 or self.resp.status > 299:

	216 body = self.resp.read()

	217 raise provider.storage_response_error(self.resp.status,

	218 self.resp.reason, body)

	219 response_headers = self.resp.msg

	220 self.metadata = boto.utils.get_aws_metadata(response_headers,

	221 provider)

	222 for name, value in response_headers.items():

	223 # To get correct size for Range GETs, use Content-Range

	224 # header if one was returned. If not, use Content-Length

	225 # header.

	226 if (name.lower() == 'content-length' and

	227 'Content-Range' not in response_headers):

	228 self.size = int(value)

	229 elif name.lower() == 'content-range':

	230 end_range = re.sub('./(.)', '\\1', value)

	231 self.size = int(end_range)

	232 elif name.lower() == 'etag':

	233 self.etag = value

	234 elif name.lower() == 'content-type':

	235 self.content_type = value

	236 elif name.lower() == 'content-encoding':

	237 self.content_encoding = value

	238 elif name.lower() == 'content-language':

	239 self.content_language = value

	240 elif name.lower() == 'last-modified':

	241 self.last_modified = value

	242 elif name.lower() == 'cache-control':

	243 self.cache_control = value

	244 elif name.lower() == 'content-disposition':

	245 self.content_disposition = value

	246 self.handle_version_headers(self.resp)

	247 self.handle_encryption_headers(self.resp)

	248

	249 def open_write(self, headers=None, override_num_retries=None):

	250 """

	251 Open this key for writing.

	252 Not yet implemented

	253

	254 :type headers: dict

	255 :param headers: Headers to pass in the write request

	256

	257 :type override_num_retries: int

	258 :param override_num_retries: If not None will override configured

	259 num_retries parameter for underlying PUT.

	260 """

	261 raise BotoClientError('Not Implemented')

	262

	263 def open(self, mode='r', headers=None, query_args=None,

	264 override_num_retries=None):

	265 if mode == 'r':

	266 self.mode = 'r'

	267 self.open_read(headers=headers, query_args=query_args,

	268 override_num_retries=override_num_retries)

	269 elif mode == 'w':

	270 self.mode = 'w'

	271 self.open_write(headers=headers,

	272 override_num_retries=override_num_retries)

	273 else:

	274 raise BotoClientError('Invalid mode: %s' % mode)

	275

	276 closed = False

	277

	278 def close(self):

	279 if self.resp:

	280 self.resp.read()

	281 self.resp = None

	282 self.mode = None

	283 self.closed = True

	284

	285 def next(self):

	286 """

	287 By providing a next method, the key object supports use as an iterator.

	288 For example, you can now say:

	289

	290 for bytes in key:

	291 write bytes to a file or whatever

	292

	293 All of the HTTP connection stuff is handled for you.

	294 """

	295 self.open_read()

	296 data = self.resp.read(self.BufferSize)

	297 if not data:

	298 self.close()

	299 raise StopIteration

	300 return data

	301

	302 def read(self, size=0):

	303 self.open_read()

	304 if size == 0:

	305 data = self.resp.read()

	306 else:

	307 data = self.resp.read(size)

	308 if not data:

	309 self.close()

	310 return data

	311

	312 def change_storage_class(self, new_storage_class, dst_bucket=None,

	313 validate_dst_bucket=True):

	314 """

	315 Change the storage class of an existing key.

	316 Depending on whether a different destination bucket is supplied

	317 or not, this will either move the item within the bucket, preserving

	318 all metadata and ACL info bucket changing the storage class or it

	319 will copy the item to the provided destination bucket, also

	320 preserving metadata and ACL info.

	321

	322 :type new_storage_class: string

	323 :param new_storage_class: The new storage class for the Key.

	324 Possible values are:

	325 * STANDARD

	326 * REDUCED_REDUNDANCY

	327

	328 :type dst_bucket: string

	329 :param dst_bucket: The name of a destination bucket. If not

	330 provided the current bucket of the key will be used.

	331

	332 :type validate_dst_bucket: bool

	333 :param validate_dst_bucket: If True, will validate the dst_bucket

	334 by using an extra list request.

	335 """

	336 if new_storage_class == 'STANDARD':

	337 return self.copy(self.bucket.name, self.name,

	338 reduced_redundancy=False, preserve_acl=True,

	339 validate_dst_bucket=validate_dst_bucket)

	340 elif new_storage_class == 'REDUCED_REDUNDANCY':

	341 return self.copy(self.bucket.name, self.name,

	342 reduced_redundancy=True, preserve_acl=True,

	343 validate_dst_bucket=validate_dst_bucket)

	344 else:

	345 raise BotoClientError('Invalid storage class: %s' %

	346 new_storage_class)

	347

	348 def copy(self, dst_bucket, dst_key, metadata=None,

	349 reduced_redundancy=False, preserve_acl=False,

	350 encrypt_key=False, validate_dst_bucket=True):

	351 """

	352 Copy this Key to another bucket.

	353

	354 :type dst_bucket: string

	355 :param dst_bucket: The name of the destination bucket

	356

	357 :type dst_key: string

	358 :param dst_key: The name of the destination key

	359

	360 :type metadata: dict

	361 :param metadata: Metadata to be associated with new key. If

	362 metadata is supplied, it will replace the metadata of the

	363 source key being copied. If no metadata is supplied, the

	364 source key's metadata will be copied to the new key.

	365

	366 :type reduced_redundancy: bool

	367 :param reduced_redundancy: If True, this will force the

	368 storage class of the new Key to be REDUCED_REDUNDANCY

	369 regardless of the storage class of the key being copied.

	370 The Reduced Redundancy Storage (RRS) feature of S3,

	371 provides lower redundancy at lower storage cost.

	372

	373 :type preserve_acl: bool

	374 :param preserve_acl: If True, the ACL from the source key will

	375 be copied to the destination key. If False, the

	376 destination key will have the default ACL. Note that

	377 preserving the ACL in the new key object will require two

	378 additional API calls to S3, one to retrieve the current

	379 ACL and one to set that ACL on the new object. If you

	380 don't care about the ACL, a value of False will be

	381 significantly more efficient.

	382

	383 :type encrypt_key: bool

	384 :param encrypt_key: If True, the new copy of the object will

	385 be encrypted on the server-side by S3 and will be stored

	386 in an encrypted form while at rest in S3.

	387

	388 :type validate_dst_bucket: bool

	389 :param validate_dst_bucket: If True, will validate the dst_bucket

	390 by using an extra list request.

	391

	392 :rtype: :class:`boto.s3.key.Key` or subclass

	393 :returns: An instance of the newly created key object

	394 """

	395 dst_bucket = self.bucket.connection.lookup(dst_bucket,

	396 validate_dst_bucket)

	397 if reduced_redundancy:

	398 storage_class = 'REDUCED_REDUNDANCY'

	399 else:

	400 storage_class = self.storage_class

	401 return dst_bucket.copy_key(dst_key, self.bucket.name,

	402 self.name, metadata,

	403 storage_class=storage_class,

	404 preserve_acl=preserve_acl,

	405 encrypt_key=encrypt_key)

	406

	407 def startElement(self, name, attrs, connection):

	408 if name == 'Owner':

	409 self.owner = User(self)

	410 return self.owner

	411 else:

	412 return None

	413

	414 def endElement(self, name, value, connection):

	415 if name == 'Key':

	416 self.name = value

	417 elif name == 'ETag':

	418 self.etag = value

	419 elif name == 'IsLatest':

	420 if value == 'true':

	421 self.is_latest = True

	422 else:

	423 self.is_latest = False

	424 elif name == 'LastModified':

	425 self.last_modified = value

	426 elif name == 'Size':

	427 self.size = int(value)

	428 elif name == 'StorageClass':

	429 self.storage_class = value

	430 elif name == 'Owner':

	431 pass

	432 elif name == 'VersionId':

	433 self.version_id = value

	434 else:

	435 setattr(self, name, value)

	436

	437 def exists(self):

	438 """

	439 Returns True if the key exists

	440

	441 :rtype: bool

	442 :return: Whether the key exists on S3

	443 """

	444 return bool(self.bucket.lookup(self.name))

	445

	446 def delete(self):

	447 """

	448 Delete this key from S3

	449 """

	450 return self.bucket.delete_key(self.name, version_id=self.version_id)

	451

	452 def get_metadata(self, name):

	453 return self.metadata.get(name)

	454

	455 def set_metadata(self, name, value):

	456 self.metadata[name] = value

	457

	458 def update_metadata(self, d):

	459 self.metadata.update(d)

	460

	461 # convenience methods for setting/getting ACL

	462 def set_acl(self, acl_str, headers=None):

	463 if self.bucket != None:

	464 self.bucket.set_acl(acl_str, self.name, headers=headers)

	465

	466 def get_acl(self, headers=None):

	467 if self.bucket != None:

	468 return self.bucket.get_acl(self.name, headers=headers)

	469

	470 def get_xml_acl(self, headers=None):

	471 if self.bucket != None:

	472 return self.bucket.get_xml_acl(self.name, headers=headers)

	473

	474 def set_xml_acl(self, acl_str, headers=None):

	475 if self.bucket != None:

	476 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)

	477

	478 def set_canned_acl(self, acl_str, headers=None):

	479 return self.bucket.set_canned_acl(acl_str, self.name, headers)

	480

	481 def get_redirect(self):

	482 """Return the redirect location configured for this key.

	483

	484 If no redirect is configured (via set_redirect), then None

	485 will be returned.

	486

	487 """

	488 response = self.bucket.connection.make_request(

	489 'GET', self.bucket.name, self.name)

	490 if response.status == 200:

	491 return response.getheader('x-amz-website-redirect-location')

	492 else:

	493 raise self.provider.storage_response_error(

	494 response.status, response.reason, response.read())

	495

	496 def set_redirect(self, redirect_location):

	497 """Configure this key to redirect to another location.

	498

	499 When the bucket associated with this key is accessed from the website

	500 endpoint, a 301 redirect will be issued to the specified

	501 `redirect_location`.

	502

	503 :type redirect_location: string

	504 :param redirect_location: The location to redirect.

	505

	506 """

	507 headers = {'x-amz-website-redirect-location': redirect_location}

	508 response = self.bucket.connection.make_request('PUT', self.bucket.name,

	509 self.name, headers)

	510 if response.status == 200:

	511 return True

	512 else:

	513 raise self.provider.storage_response_error(

	514 response.status, response.reason, response.read())

	515

	516 def make_public(self, headers=None):

	517 return self.bucket.set_canned_acl('public-read', self.name, headers)

	518

	519 def generate_url(self, expires_in, method='GET', headers=None,

	520 query_auth=True, force_http=False, response_headers=None,

	521 expires_in_absolute=False, version_id=None,

	522 policy=None, reduced_redundancy=False, encrypt_key=False):

	523 """

	524 Generate a URL to access this key.

	525

	526 :type expires_in: int

	527 :param expires_in: How long the url is valid for, in seconds

	528

	529 :type method: string

	530 :param method: The method to use for retrieving the file

	531 (default is GET)

	532

	533 :type headers: dict

	534 :param headers: Any headers to pass along in the request

	535

	536 :type query_auth: bool

	537 :param query_auth:

	538

	539 :type force_http: bool

	540 :param force_http: If True, http will be used instead of https.

	541

	542 :type response_headers: dict

	543 :param response_headers: A dictionary containing HTTP

	544 headers/values that will override any headers associated

	545 with the stored object in the response. See

	546 http://goo.gl/EWOPb for details.

	547

	548 :type expires_in_absolute: bool

	549 :param expires_in_absolute:

	550

	551 :type version_id: string

	552 :param version_id: The version_id of the object to GET. If specified

	553 this overrides any value in the key.

	554

	555 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	556 :param policy: A canned ACL policy that will be applied to the

	557 new key in S3.

	558

	559 :type reduced_redundancy: bool

	560 :param reduced_redundancy: If True, this will set the storage

	561 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	562 Redundancy Storage (RRS) feature of S3, provides lower

	563 redundancy at lower storage cost.

	564

	565 :type encrypt_key: bool

	566 :param encrypt_key: If True, the new copy of the object will

	567 be encrypted on the server-side by S3 and will be stored

	568 in an encrypted form while at rest in S3.

	569

	570 :rtype: string

	571 :return: The URL to access the key

	572 """

	573 provider = self.bucket.connection.provider

	574 version_id = version_id or self.version_id

	575 if headers is None:

	576 headers = {}

	577 else:

	578 headers = headers.copy()

	579

	580 # add headers accordingly (usually PUT case)

	581 if policy:

	582 headers[provider.acl_header] = policy

	583 if reduced_redundancy:

	584 self.storage_class = 'REDUCED_REDUNDANCY'

	585 if provider.storage_class_header:

	586 headers[provider.storage_class_header] = self.storage_class

	587 if encrypt_key:

	588 headers[provider.server_side_encryption_header] = 'AES256'

	589 headers = boto.utils.merge_meta(headers, self.metadata, provider)

	590

	591 return self.bucket.connection.generate_url(expires_in, method,

	592 self.bucket.name, self.name,

	593 headers, query_auth,

	594 force_http,

	595 response_headers,

	596 expires_in_absolute,

	597 version_id)

	598

	599 def send_file(self, fp, headers=None, cb=None, num_cb=10,

	600 query_args=None, chunked_transfer=False, size=None):

	601 """

	602 Upload a file to a key into a bucket on S3.

	603

	604 :type fp: file

	605 :param fp: The file pointer to upload. The file pointer must

	606 point point at the offset from which you wish to upload.

	607 ie. if uploading the full file, it should point at the

	608 start of the file. Normally when a file is opened for

	609 reading, the fp will point at the first byte. See the

	610 bytes parameter below for more info.

	611

	612 :type headers: dict

	613 :param headers: The headers to pass along with the PUT request

	614

	615 :type cb: function

	616 :param cb: a callback function that will be called to report

	617 progress on the upload. The callback should accept two

	618 integer parameters, the first representing the number of

	619 bytes that have been successfully transmitted to S3 and

	620 the second representing the size of the to be transmitted

	621 object.

	622

	623 :type num_cb: int

	624 :param num_cb: (optional) If a callback is specified with the

	625 cb parameter this parameter determines the granularity of

	626 the callback by defining the maximum number of times the

	627 callback will be called during the file

	628 transfer. Providing a negative integer will cause your

	629 callback to be called with each buffer read.

	630

	631 :type size: int

	632 :param size: (optional) The Maximum number of bytes to read

	633 from the file pointer (fp). This is useful when uploading

	634 a file in multiple parts where you are splitting the file

	635 up into different ranges to be uploaded. If not specified,

	636 the default behaviour is to read all bytes from the file

	637 pointer. Less bytes may be available.

	638 """

	639 provider = self.bucket.connection.provider

	640 try:

	641 spos = fp.tell()

	642 except IOError:

	643 spos = None

	644 self.read_from_stream = False

	645

	646 def sender(http_conn, method, path, data, headers):

	647 # This function is called repeatedly for temporary retries

	648 # so we must be sure the file pointer is pointing at the

	649 # start of the data.

	650 if spos is not None and spos != fp.tell():

	651 fp.seek(spos)

	652 elif spos is None and self.read_from_stream:

	653 # if seek is not supported, and we've read from this

	654 # stream already, then we need to abort retries to

	655 # avoid setting bad data.

	656 raise provider.storage_data_error(

	657 'Cannot retry failed request. fp does not support seeking.')

	658

	659 http_conn.putrequest(method, path)

	660 for key in headers:

	661 http_conn.putheader(key, headers[key])

	662 http_conn.endheaders()

	663

	664 # Calculate all MD5 checksums on the fly, if not already computed

	665 if not self.base64md5:

	666 m = md5()

	667 else:

	668 m = None

	669

	670 save_debug = self.bucket.connection.debug

	671 self.bucket.connection.debug = 0

	672 # If the debuglevel < 3 we don't want to show connection

	673 # payload, so turn off HTTP connection-level debug output (to

	674 # be restored below).

	675 # Use the getattr approach to allow this to work in AppEngine.

	676 if getattr(http_conn, 'debuglevel', 0) < 3:

	677 http_conn.set_debuglevel(0)

	678

	679 data_len = 0

	680 if cb:

	681 if size:

	682 cb_size = size

	683 elif self.size:

	684 cb_size = self.size

	685 else:

	686 cb_size = 0

	687 if chunked_transfer and cb_size == 0:

	688 # For chunked Transfer, we call the cb for every 1MB

	689 # of data transferred, except when we know size.

	690 cb_count = (1024 * 1024) / self.BufferSize

	691 elif num_cb > 1:

	692 cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))

	693 elif num_cb < 0:

	694 cb_count = -1

	695 else:

	696 cb_count = 0

	697 i = 0

	698 cb(data_len, cb_size)

	699

	700 bytes_togo = size

	701 if bytes_togo and bytes_togo < self.BufferSize:

	702 chunk = fp.read(bytes_togo)

	703 else:

	704 chunk = fp.read(self.BufferSize)

	705 if spos is None:

	706 # read at least something from a non-seekable fp.

	707 self.read_from_stream = True

	708 while chunk:

	709 chunk_len = len(chunk)

	710 data_len += chunk_len

	711 if chunked_transfer:

	712 http_conn.send('%x;\r\n' % chunk_len)

	713 http_conn.send(chunk)

	714 http_conn.send('\r\n')

	715 else:

	716 http_conn.send(chunk)

	717 if m:

	718 m.update(chunk)

	719 if bytes_togo:

	720 bytes_togo -= chunk_len

	721 if bytes_togo <= 0:

	722 break

	723 if cb:

	724 i += 1

	725 if i == cb_count or cb_count == -1:

	726 cb(data_len, cb_size)

	727 i = 0

	728 if bytes_togo and bytes_togo < self.BufferSize:

	729 chunk = fp.read(bytes_togo)

	730 else:

	731 chunk = fp.read(self.BufferSize)

	732

	733 self.size = data_len

	734

	735 if m:

	736 # Use the chunked trailer for the digest

	737 hd = m.hexdigest()

	738 self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)

	739

	740 if chunked_transfer:

	741 http_conn.send('0\r\n')

	742 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)

	743 http_conn.send('\r\n')

	744

	745 if cb and (cb_count <= 1 or i > 0) and data_len > 0:

	746 cb(data_len, cb_size)

	747

	748 response = http_conn.getresponse()

	749 body = response.read()

	750 http_conn.set_debuglevel(save_debug)

	751 self.bucket.connection.debug = save_debug

	752 if ((response.status == 500 or response.status == 503 or

	753 response.getheader('location')) and not chunked_transfer):

	754 # we'll try again.

	755 return response

	756 elif response.status >= 200 and response.status <= 299:

	757 self.etag = response.getheader('etag')

	758 if self.etag != '"%s"' % self.md5:

	759 raise provider.storage_data_error(

	760 'ETag from S3 did not match computed MD5')

	761 return response

	762 else:

	763 raise provider.storage_response_error(

	764 response.status, response.reason, body)

	765

	766 if not headers:

	767 headers = {}

	768 else:

	769 headers = headers.copy()

	770 headers['User-Agent'] = UserAgent

	771 if self.storage_class != 'STANDARD':

	772 headers[provider.storage_class_header] = self.storage_class

	773 if 'Content-Encoding' in headers:

	774 self.content_encoding = headers['Content-Encoding']

	775 if 'Content-Language' in headers:

	776 self.content_encoding = headers['Content-Language']

	777 if 'Content-Type' in headers:

	778 # Some use cases need to suppress sending of the Content-Type

	779 # header and depend on the receiving server to set the content

	780 # type. This can be achieved by setting headers['Content-Type']

	781 # to None when calling this method.

	782 if headers['Content-Type'] is None:

	783 # Delete null Content-Type value to skip sending that header.

	784 del headers['Content-Type']

	785 else:

	786 self.content_type = headers['Content-Type']

	787 elif self.path:

	788 self.content_type = mimetypes.guess_type(self.path)[0]

	789 if self.content_type == None:

	790 self.content_type = self.DefaultContentType

	791 headers['Content-Type'] = self.content_type

	792 else:

	793 headers['Content-Type'] = self.content_type

	794 if self.base64md5:

	795 headers['Content-MD5'] = self.base64md5

	796 if chunked_transfer:

	797 headers['Transfer-Encoding'] = 'chunked'

	798 #if not self.base64md5:

	799 # headers['Trailer'] = "Content-MD5"

	800 else:

	801 headers['Content-Length'] = str(self.size)

	802 headers['Expect'] = '100-Continue'

	803 headers = boto.utils.merge_meta(headers, self.metadata, provider)

	804 resp = self.bucket.connection.make_request('PUT', self.bucket.name,

	805 self.name, headers,

	806 sender=sender,

	807 query_args=query_args)

	808 self.handle_version_headers(resp, force=True)

	809

	810 def compute_md5(self, fp, size=None):

	811 """

	812 :type fp: file

	813 :param fp: File pointer to the file to MD5 hash. The file

	814 pointer will be reset to the same position before the

	815 method returns.

	816

	817 :type size: int

	818 :param size: (optional) The Maximum number of bytes to read

	819 from the file pointer (fp). This is useful when uploading

	820 a file in multiple parts where the file is being split

	821 inplace into different parts. Less bytes may be available.

	822

	823 :rtype: tuple

	824 :return: A tuple containing the hex digest version of the MD5

	825 hash as the first element and the base64 encoded version

	826 of the plain digest as the second element.

	827 """

	828 tup = compute_md5(fp, size=size)

	829 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.

	830 # The internal implementation of compute_md5() needs to return the

	831 # data size but we don't want to return that value to the external

	832 # caller because it changes the class interface (i.e. it might

	833 # break some code) so we consume the third tuple value here and

	834 # return the remainder of the tuple to the caller, thereby preserving

	835 # the existing interface.

	836 self.size = tup[2]

	837 return tup[0:2]

	838

	839 def set_contents_from_stream(self, fp, headers=None, replace=True,

	840 cb=None, num_cb=10, policy=None,

	841 reduced_redundancy=False, query_args=None,

	842 size=None):

	843 """

	844 Store an object using the name of the Key object as the key in

	845 cloud and the contents of the data stream pointed to by 'fp' as

	846 the contents.

	847

	848 The stream object is not seekable and total size is not known.

	849 This has the implication that we can't specify the

	850 Content-Size and Content-MD5 in the header. So for huge

	851 uploads, the delay in calculating MD5 is avoided but with a

	852 penalty of inability to verify the integrity of the uploaded

	853 data.

	854

	855 :type fp: file

	856 :param fp: the file whose contents are to be uploaded

	857

	858 :type headers: dict

	859 :param headers: additional HTTP headers to be sent with the

	860 PUT request.

	861

	862 :type replace: bool

	863 :param replace: If this parameter is False, the method will first check

	864 to see if an object exists in the bucket with the same key. If it

	865 does, it won't overwrite it. The default value is True which will

	866 overwrite the object.

	867

	868 :type cb: function

	869 :param cb: a callback function that will be called to report

	870 progress on the upload. The callback should accept two integer

	871 parameters, the first representing the number of bytes that have

	872 been successfully transmitted to GS and the second representing the

	873 total number of bytes that need to be transmitted.

	874

	875 :type num_cb: int

	876 :param num_cb: (optional) If a callback is specified with the

	877 cb parameter, this parameter determines the granularity of

	878 the callback by defining the maximum number of times the

	879 callback will be called during the file transfer.

	880

	881 :type policy: :class:`boto.gs.acl.CannedACLStrings`

	882 :param policy: A canned ACL policy that will be applied to the new key

	883 in GS.

	884

	885 :type reduced_redundancy: bool

	886 :param reduced_redundancy: If True, this will set the storage

	887 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	888 Redundancy Storage (RRS) feature of S3, provides lower

	889 redundancy at lower storage cost.

	890

	891 :type size: int

	892 :param size: (optional) The Maximum number of bytes to read from

	893 the file pointer (fp). This is useful when uploading a

	894 file in multiple parts where you are splitting the file up

	895 into different ranges to be uploaded. If not specified,

	896 the default behaviour is to read all bytes from the file

	897 pointer. Less bytes may be available.

	898 """

	899

	900 provider = self.bucket.connection.provider

	901 if not provider.supports_chunked_transfer():

	902 raise BotoClientError('%s does not support chunked transfer'

	903 % provider.get_provider_name())

	904

	905 # Name of the Object should be specified explicitly for Streams.

	906 if not self.name or self.name == '':

	907 raise BotoClientError('Cannot determine the destination '

	908 'object name for the given stream')

	909

	910 if headers is None:

	911 headers = {}

	912 if policy:

	913 headers[provider.acl_header] = policy

	914

	915 if reduced_redundancy:

	916 self.storage_class = 'REDUCED_REDUNDANCY'

	917 if provider.storage_class_header:

	918 headers[provider.storage_class_header] = self.storage_class

	919

	920 if self.bucket != None:

	921 if not replace:

	922 if self.bucket.lookup(self.name):

	923 return

	924 self.send_file(fp, headers, cb, num_cb, query_args,

	925 chunked_transfer=True, size=size)

	926

	927 def set_contents_from_file(self, fp, headers=None, replace=True,

	928 cb=None, num_cb=10, policy=None, md5=None,

	929 reduced_redundancy=False, query_args=None,

	930 encrypt_key=False, size=None, rewind=False):

	931 """

	932 Store an object in S3 using the name of the Key object as the

	933 key in S3 and the contents of the file pointed to by 'fp' as the

	934 contents. The data is read from 'fp' from its current position until

	935 'size' bytes have been read or EOF.

	936

	937 :type fp: file

	938 :param fp: the file whose contents to upload

	939

	940 :type headers: dict

	941 :param headers: Additional HTTP headers that will be sent with

	942 the PUT request.

	943

	944 :type replace: bool

	945 :param replace: If this parameter is False, the method will

	946 first check to see if an object exists in the bucket with

	947 the same key. If it does, it won't overwrite it. The

	948 default value is True which will overwrite the object.

	949

	950 :type cb: function

	951 :param cb: a callback function that will be called to report

	952 progress on the upload. The callback should accept two

	953 integer parameters, the first representing the number of

	954 bytes that have been successfully transmitted to S3 and

	955 the second representing the size of the to be transmitted

	956 object.

	957

	958 :type cb: int

	959 :param num_cb: (optional) If a callback is specified with the

	960 cb parameter this parameter determines the granularity of

	961 the callback by defining the maximum number of times the

	962 callback will be called during the file transfer.

	963

	964 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	965 :param policy: A canned ACL policy that will be applied to the

	966 new key in S3.

	967

	968 :type md5: A tuple containing the hexdigest version of the MD5

	969 checksum of the file as the first element and the

	970 Base64-encoded version of the plain checksum as the second

	971 element. This is the same format returned by the

	972 compute_md5 method.

	973 :param md5: If you need to compute the MD5 for any reason

	974 prior to upload, it's silly to have to do it twice so this

	975 param, if present, will be used as the MD5 values of the

	976 file. Otherwise, the checksum will be computed.

	977

	978 :type reduced_redundancy: bool

	979 :param reduced_redundancy: If True, this will set the storage

	980 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	981 Redundancy Storage (RRS) feature of S3, provides lower

	982 redundancy at lower storage cost.

	983

	984 :type encrypt_key: bool

	985 :param encrypt_key: If True, the new copy of the object will

	986 be encrypted on the server-side by S3 and will be stored

	987 in an encrypted form while at rest in S3.

	988

	989 :type size: int

	990 :param size: (optional) The Maximum number of bytes to read

	991 from the file pointer (fp). This is useful when uploading

	992 a file in multiple parts where you are splitting the file

	993 up into different ranges to be uploaded. If not specified,

	994 the default behaviour is to read all bytes from the file

	995 pointer. Less bytes may be available.

	996

	997 :type rewind: bool

	998 :param rewind: (optional) If True, the file pointer (fp) will

	999 be rewound to the start before any bytes are read from

	1000 it. The default behaviour is False which reads from the

	1001 current position of the file pointer (fp).

	1002

	1003 :rtype: int

	1004 :return: The number of bytes written to the key.

	1005 """

	1006 provider = self.bucket.connection.provider

	1007 headers = headers or {}

	1008 if policy:

	1009 headers[provider.acl_header] = policy

	1010 if encrypt_key:

	1011 headers[provider.server_side_encryption_header] = 'AES256'

	1012

	1013 if rewind:

	1014 # caller requests reading from beginning of fp.

	1015 fp.seek(0, os.SEEK_SET)

	1016 else:

	1017 spos = fp.tell()

	1018 fp.seek(0, os.SEEK_END)

	1019 if fp.tell() == spos:

	1020 fp.seek(0, os.SEEK_SET)

	1021 if fp.tell() != spos:

	1022 # Raise an exception as this is likely a programming error

	1023 # whereby there is data before the fp but nothing after it.

	1024 fp.seek(spos)

	1025 raise AttributeError(

	1026 'fp is at EOF. Use rewind option or seek() to data start.')

	1027 # seek back to the correct position.

	1028 fp.seek(spos)

	1029

	1030 if reduced_redundancy:

	1031 self.storage_class = 'REDUCED_REDUNDANCY'

	1032 if provider.storage_class_header:

	1033 headers[provider.storage_class_header] = self.storage_class

	1034 # TODO - What if provider doesn't support reduced reduncancy?

	1035 # What if different providers provide different classes?

	1036 if hasattr(fp, 'name'):

	1037 self.path = fp.name

	1038

	1039 if self.bucket != None:

	1040 if not md5 and provider.supports_chunked_transfer():

	1041 # defer md5 calculation to on the fly and

	1042 # we don't know anything about size yet.

	1043 chunked_transfer = True

	1044 self.size = None

	1045 else:

	1046 chunked_transfer = False

	1047 if not md5:

	1048 # compute_md5() and also set self.size to actual

	1049 # size of the bytes read computing the md5.

	1050 md5 = self.compute_md5(fp, size)

	1051 # adjust size if required

	1052 size = self.size

	1053 elif size:

	1054 self.size = size

	1055 else:

	1056 # If md5 is provided, still need to size so

	1057 # calculate based on bytes to end of content

	1058 spos = fp.tell()

	1059 fp.seek(0, os.SEEK_END)

	1060 self.size = fp.tell() - spos

	1061 fp.seek(spos)

	1062 size = self.size

	1063 self.md5 = md5[0]

	1064 self.base64md5 = md5[1]

	1065

	1066 if self.name == None:

	1067 self.name = self.md5

	1068 if not replace:

	1069 if self.bucket.lookup(self.name):

	1070 return

	1071

	1072 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,

	1073 query_args=query_args,

	1074 chunked_transfer=chunked_transfer, size=size)

	1075 # return number of bytes written.

	1076 return self.size

	1077

	1078 def set_contents_from_filename(self, filename, headers=None, replace=True,

	1079 cb=None, num_cb=10, policy=None, md5=None,

	1080 reduced_redundancy=False,

	1081 encrypt_key=False):

	1082 """

	1083 Store an object in S3 using the name of the Key object as the

	1084 key in S3 and the contents of the file named by 'filename'.

	1085 See set_contents_from_file method for details about the

	1086 parameters.

	1087

	1088 :type filename: string

	1089 :param filename: The name of the file that you want to put onto S3

	1090

	1091 :type headers: dict

	1092 :param headers: Additional headers to pass along with the

	1093 request to AWS.

	1094

	1095 :type replace: bool

	1096 :param replace: If True, replaces the contents of the file

	1097 if it already exists.

	1098

	1099 :type cb: function

	1100 :param cb: a callback function that will be called to report

	1101 progress on the upload. The callback should accept two

	1102 integer parameters, the first representing the number of

	1103 bytes that have been successfully transmitted to S3 and

	1104 the second representing the size of the to be transmitted

	1105 object.

	1106

	1107 :type cb: int

	1108 :param num_cb: (optional) If a callback is specified with the

	1109 cb parameter this parameter determines the granularity of

	1110 the callback by defining the maximum number of times the

	1111 callback will be called during the file transfer.

	1112

	1113 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	1114 :param policy: A canned ACL policy that will be applied to the

	1115 new key in S3.

	1116

	1117 :type md5: A tuple containing the hexdigest version of the MD5

	1118 checksum of the file as the first element and the

	1119 Base64-encoded version of the plain checksum as the second

	1120 element. This is the same format returned by the

	1121 compute_md5 method.

	1122 :param md5: If you need to compute the MD5 for any reason

	1123 prior to upload, it's silly to have to do it twice so this

	1124 param, if present, will be used as the MD5 values of the

	1125 file. Otherwise, the checksum will be computed.

	1126

	1127 :type reduced_redundancy: bool

	1128 :param reduced_redundancy: If True, this will set the storage

	1129 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	1130 Redundancy Storage (RRS) feature of S3, provides lower

	1131 redundancy at lower storage cost. :type encrypt_key: bool

	1132 :param encrypt_key: If True, the new copy of the object

	1133 will be encrypted on the server-side by S3 and will be

	1134 stored in an encrypted form while at rest in S3.

	1135 """

	1136 fp = open(filename, 'rb')

	1137 self.set_contents_from_file(fp, headers, replace, cb, num_cb,

	1138 policy, md5, reduced_redundancy,

	1139 encrypt_key=encrypt_key)

	1140 fp.close()

	1141

	1142 def set_contents_from_string(self, s, headers=None, replace=True,

	1143 cb=None, num_cb=10, policy=None, md5=None,

	1144 reduced_redundancy=False,

	1145 encrypt_key=False):

	1146 """

	1147 Store an object in S3 using the name of the Key object as the

	1148 key in S3 and the string 's' as the contents.

	1149 See set_contents_from_file method for details about the

	1150 parameters.

	1151

	1152 :type headers: dict

	1153 :param headers: Additional headers to pass along with the

	1154 request to AWS.

	1155

	1156 :type replace: bool

	1157 :param replace: If True, replaces the contents of the file if

	1158 it already exists.

	1159

	1160 :type cb: function

	1161 :param cb: a callback function that will be called to report

	1162 progress on the upload. The callback should accept two

	1163 integer parameters, the first representing the number of

	1164 bytes that have been successfully transmitted to S3 and

	1165 the second representing the size of the to be transmitted

	1166 object.

	1167

	1168 :type cb: int

	1169 :param num_cb: (optional) If a callback is specified with the

	1170 cb parameter this parameter determines the granularity of

	1171 the callback by defining the maximum number of times the

	1172 callback will be called during the file transfer.

	1173

	1174 :type policy: :class:`boto.s3.acl.CannedACLStrings`

	1175 :param policy: A canned ACL policy that will be applied to the

	1176 new key in S3.

	1177

	1178 :type md5: A tuple containing the hexdigest version of the MD5

	1179 checksum of the file as the first element and the

	1180 Base64-encoded version of the plain checksum as the second

	1181 element. This is the same format returned by the

	1182 compute_md5 method.

	1183 :param md5: If you need to compute the MD5 for any reason

	1184 prior to upload, it's silly to have to do it twice so this

	1185 param, if present, will be used as the MD5 values of the

	1186 file. Otherwise, the checksum will be computed.

	1187

	1188 :type reduced_redundancy: bool

	1189 :param reduced_redundancy: If True, this will set the storage

	1190 class of the new Key to be REDUCED_REDUNDANCY. The Reduced

	1191 Redundancy Storage (RRS) feature of S3, provides lower

	1192 redundancy at lower storage cost.

	1193

	1194 :type encrypt_key: bool

	1195 :param encrypt_key: If True, the new copy of the object will

	1196 be encrypted on the server-side by S3 and will be stored

	1197 in an encrypted form while at rest in S3.

	1198 """

	1199 if isinstance(s, unicode):

	1200 s = s.encode("utf-8")

	1201 fp = StringIO.StringIO(s)

	1202 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,

	1203 policy, md5, reduced_redundancy,

	1204 encrypt_key=encrypt_key)

	1205 fp.close()

	1206 return r

	1207

	1208 def get_file(self, fp, headers=None, cb=None, num_cb=10,

	1209 torrent=False, version_id=None, override_num_retries=None,

	1210 response_headers=None):

	1211 """

	1212 Retrieves a file from an S3 Key

	1213

	1214 :type fp: file

	1215 :param fp: File pointer to put the data into

	1216

	1217 :type headers: string

	1218 :param: headers to send when retrieving the files

	1219

	1220 :type cb: function

	1221 :param cb: a callback function that will be called to report

	1222 progress on the upload. The callback should accept two

	1223 integer parameters, the first representing the number of

	1224 bytes that have been successfully transmitted to S3 and

	1225 the second representing the size of the to be transmitted

	1226 object.

	1227

	1228 :type cb: int

	1229 :param num_cb: (optional) If a callback is specified with the

	1230 cb parameter this parameter determines the granularity of

	1231 the callback by defining the maximum number of times the

	1232 callback will be called during the file transfer.

	1233

	1234 :type torrent: bool

	1235 :param torrent: Flag for whether to get a torrent for the file

	1236

	1237 :type override_num_retries: int

	1238 :param override_num_retries: If not None will override configured

	1239 num_retries parameter for underlying GET.

	1240

	1241 :type response_headers: dict

	1242 :param response_headers: A dictionary containing HTTP

	1243 headers/values that will override any headers associated

	1244 with the stored object in the response. See

	1245 http://goo.gl/EWOPb for details.

	1246 """

	1247 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,

	1248 torrent=torrent, version_id=version_id,

	1249 override_num_retries=override_num_retries,

	1250 response_headers=response_headers,

	1251 query_args=None)

	1252

	1253 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,

	1254 torrent=False, version_id=None, override_num_retries=None,

	1255 response_headers=None, query_args=None):

	1256 if headers is None:

	1257 headers = {}

	1258 save_debug = self.bucket.connection.debug

	1259 if self.bucket.connection.debug == 1:

	1260 self.bucket.connection.debug = 0

	1261

	1262 query_args = query_args or []

	1263 if torrent:

	1264 query_args.append('torrent')

	1265 m = None

	1266 else:

	1267 m = md5()

	1268 # If a version_id is passed in, use that. If not, check to see

	1269 # if the Key object has an explicit version_id and, if so, use that.

	1270 # Otherwise, don't pass a version_id query param.

	1271 if version_id is None:

	1272 version_id = self.version_id

	1273 if version_id:

	1274 query_args.append('versionId=%s' % version_id)

	1275 if response_headers:

	1276 for key in response_headers:

	1277 query_args.append('%s=%s' % (key, urllib.quote(response_headers[ key])))

	1278 query_args = '&'.join(query_args)

	1279 self.open('r', headers, query_args=query_args,

	1280 override_num_retries=override_num_retries)

	1281

	1282 data_len = 0

	1283 if cb:

	1284 if self.size is None:

	1285 cb_size = 0

	1286 else:

	1287 cb_size = self.size

	1288 if self.size is None and num_cb != -1:

	1289 # If size is not available due to chunked transfer for example,

	1290 # we'll call the cb for every 1MB of data transferred.

	1291 cb_count = (1024 * 1024) / self.BufferSize

	1292 elif num_cb > 1:

	1293 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))

	1294 elif num_cb < 0:

	1295 cb_count = -1

	1296 else:

	1297 cb_count = 0

	1298 i = 0

	1299 cb(data_len, cb_size)

	1300 for bytes in self:

	1301 fp.write(bytes)

	1302 data_len += len(bytes)

	1303 if m:

	1304 m.update(bytes)

	1305 if cb:

	1306 if cb_size > 0 and data_len >= cb_size:

	1307 break

	1308 i += 1

	1309 if i == cb_count or cb_count == -1:

	1310 cb(data_len, cb_size)

	1311 i = 0

	1312 if cb and (cb_count <= 1 or i > 0) and data_len > 0:

	1313 cb(data_len, cb_size)

	1314 if m:

	1315 self.md5 = m.hexdigest()

	1316 if self.size is None and not torrent and "Range" not in headers:

	1317 self.size = data_len

	1318 self.close()

	1319 self.bucket.connection.debug = save_debug

	1320

	1321 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):

	1322 """

	1323 Get a torrent file (see to get_file)

	1324

	1325 :type fp: file

	1326 :param fp: The file pointer of where to put the torrent

	1327

	1328 :type headers: dict

	1329 :param headers: Headers to be passed

	1330

	1331 :type cb: function

	1332 :param cb: a callback function that will be called to report

	1333 progress on the upload. The callback should accept two

	1334 integer parameters, the first representing the number of

	1335 bytes that have been successfully transmitted to S3 and

	1336 the second representing the size of the to be transmitted

	1337 object.

	1338

	1339 :type cb: int

	1340 :param num_cb: (optional) If a callback is specified with the

	1341 cb parameter this parameter determines the granularity of

	1342 the callback by defining the maximum number of times the

	1343 callback will be called during the file transfer.

	1344

	1345 """

	1346 return self.get_file(fp, headers, cb, num_cb, torrent=True)

	1347

	1348 def get_contents_to_file(self, fp, headers=None,

	1349 cb=None, num_cb=10,

	1350 torrent=False,

	1351 version_id=None,

	1352 res_download_handler=None,

	1353 response_headers=None):

	1354 """

	1355 Retrieve an object from S3 using the name of the Key object as the

	1356 key in S3. Write the contents of the object to the file pointed

	1357 to by 'fp'.

	1358

	1359 :type fp: File -like object

	1360 :param fp:

	1361

	1362 :type headers: dict

	1363 :param headers: additional HTTP headers that will be sent with

	1364 the GET request.

	1365

	1366 :type cb: function

	1367 :param cb: a callback function that will be called to report

	1368 progress on the upload. The callback should accept two

	1369 integer parameters, the first representing the number of

	1370 bytes that have been successfully transmitted to S3 and

	1371 the second representing the size of the to be transmitted

	1372 object.

	1373

	1374 :type cb: int

	1375 :param num_cb: (optional) If a callback is specified with the

	1376 cb parameter this parameter determines the granularity of

	1377 the callback by defining the maximum number of times the

	1378 callback will be called during the file transfer.

	1379

	1380 :type torrent: bool

	1381 :param torrent: If True, returns the contents of a torrent

	1382 file as a string.

	1383

	1384 :type res_upload_handler: ResumableDownloadHandler

	1385 :param res_download_handler: If provided, this handler will

	1386 perform the download.

	1387

	1388 :type response_headers: dict

	1389 :param response_headers: A dictionary containing HTTP

	1390 headers/values that will override any headers associated

	1391 with the stored object in the response. See

	1392 http://goo.gl/EWOPb for details.

	1393 """

	1394 if self.bucket != None:

	1395 if res_download_handler:

	1396 res_download_handler.get_file(self, fp, headers, cb, num_cb,

	1397 torrent=torrent,

	1398 version_id=version_id)

	1399 else:

	1400 self.get_file(fp, headers, cb, num_cb, torrent=torrent,

	1401 version_id=version_id,

	1402 response_headers=response_headers)

	1403

	1404 def get_contents_to_filename(self, filename, headers=None,

	1405 cb=None, num_cb=10,

	1406 torrent=False,

	1407 version_id=None,

	1408 res_download_handler=None,

	1409 response_headers=None):

	1410 """

	1411 Retrieve an object from S3 using the name of the Key object as the

	1412 key in S3. Store contents of the object to a file named by 'filename'.

	1413 See get_contents_to_file method for details about the

	1414 parameters.

	1415

	1416 :type filename: string

	1417 :param filename: The filename of where to put the file contents

	1418

	1419 :type headers: dict

	1420 :param headers: Any additional headers to send in the request

	1421

	1422 :type cb: function

	1423 :param cb: a callback function that will be called to report

	1424 progress on the upload. The callback should accept two

	1425 integer parameters, the first representing the number of

	1426 bytes that have been successfully transmitted to S3 and

	1427 the second representing the size of the to be transmitted

	1428 object.

	1429

	1430 :type cb: int

	1431 :param num_cb: (optional) If a callback is specified with the

	1432 cb parameter this parameter determines the granularity of

	1433 the callback by defining the maximum number of times the

	1434 callback will be called during the file transfer.

	1435

	1436 :type torrent: bool

	1437 :param torrent: If True, returns the contents of a torrent file

	1438 as a string.

	1439

	1440 :type res_upload_handler: ResumableDownloadHandler

	1441 :param res_download_handler: If provided, this handler will

	1442 perform the download.

	1443

	1444 :type response_headers: dict

	1445 :param response_headers: A dictionary containing HTTP

	1446 headers/values that will override any headers associated

	1447 with the stored object in the response. See

	1448 http://goo.gl/EWOPb for details.

	1449 """

	1450 fp = open(filename, 'wb')

	1451 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,

	1452 version_id=version_id,

	1453 res_download_handler=res_download_handler,

	1454 response_headers=response_headers)

	1455 fp.close()

	1456 # if last_modified date was sent from s3, try to set file's timestamp

	1457 if self.last_modified != None:

	1458 try:

	1459 modified_tuple = rfc822.parsedate_tz(self.last_modified)

	1460 modified_stamp = int(rfc822.mktime_tz(modified_tuple))

	1461 os.utime(fp.name, (modified_stamp, modified_stamp))

	1462 except Exception:

	1463 pass

	1464

	1465 def get_contents_as_string(self, headers=None,

	1466 cb=None, num_cb=10,

	1467 torrent=False,

	1468 version_id=None,

	1469 response_headers=None):

	1470 """

	1471 Retrieve an object from S3 using the name of the Key object as the

	1472 key in S3. Return the contents of the object as a string.

	1473 See get_contents_to_file method for details about the

	1474 parameters.

	1475

	1476 :type headers: dict

	1477 :param headers: Any additional headers to send in the request

	1478

	1479 :type cb: function

	1480 :param cb: a callback function that will be called to report

	1481 progress on the upload. The callback should accept two

	1482 integer parameters, the first representing the number of

	1483 bytes that have been successfully transmitted to S3 and

	1484 the second representing the size of the to be transmitted

	1485 object.

	1486

	1487 :type cb: int

	1488 :param num_cb: (optional) If a callback is specified with the

	1489 cb parameter this parameter determines the granularity of

	1490 the callback by defining the maximum number of times the

	1491 callback will be called during the file transfer.

	1492

	1493 :type torrent: bool

	1494 :param torrent: If True, returns the contents of a torrent file

	1495 as a string.

	1496

	1497 :type response_headers: dict

	1498 :param response_headers: A dictionary containing HTTP

	1499 headers/values that will override any headers associated

	1500 with the stored object in the response. See

	1501 http://goo.gl/EWOPb for details.

	1502

	1503 :rtype: string

	1504 :returns: The contents of the file as a string

	1505 """

	1506 fp = StringIO.StringIO()

	1507 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,

	1508 version_id=version_id,

	1509 response_headers=response_headers)

	1510 return fp.getvalue()

	1511

	1512 def add_email_grant(self, permission, email_address, headers=None):

	1513 """

	1514 Convenience method that provides a quick way to add an email grant

	1515 to a key. This method retrieves the current ACL, creates a new

	1516 grant based on the parameters passed in, adds that grant to the ACL

	1517 and then PUT's the new ACL back to S3.

	1518

	1519 :type permission: string

	1520 :param permission: The permission being granted. Should be one of:

	1521 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).

	1522

	1523 :type email_address: string

	1524 :param email_address: The email address associated with the AWS

	1525 account your are granting the permission to.

	1526

	1527 :type recursive: boolean

	1528 :param recursive: A boolean value to controls whether the

	1529 command will apply the grant to all keys within the bucket

	1530 or not. The default value is False. By passing a True

	1531 value, the call will iterate through all keys in the

	1532 bucket and apply the same grant to each key. CAUTION: If

	1533 you have a lot of keys, this could take a long time!

	1534 """

	1535 policy = self.get_acl(headers=headers)

	1536 policy.acl.add_email_grant(permission, email_address)

	1537 self.set_acl(policy, headers=headers)

	1538

	1539 def add_user_grant(self, permission, user_id, headers=None,

	1540 display_name=None):

	1541 """

	1542 Convenience method that provides a quick way to add a canonical

	1543 user grant to a key. This method retrieves the current ACL,

	1544 creates a new grant based on the parameters passed in, adds that

	1545 grant to the ACL and then PUT's the new ACL back to S3.

	1546

	1547 :type permission: string

	1548 :param permission: The permission being granted. Should be one of:

	1549 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).

	1550

	1551 :type user_id: string

	1552 :param user_id: The canonical user id associated with the AWS

	1553 account your are granting the permission to.

	1554

	1555 :type display_name: string

	1556 :param display_name: An option string containing the user's

	1557 Display Name. Only required on Walrus.

	1558 """

	1559 policy = self.get_acl(headers=headers)

	1560 policy.acl.add_user_grant(permission, user_id,

	1561 display_name=display_name)

	1562 self.set_acl(policy, headers=headers)

	1563

	1564 def _normalize_metadata(self, metadata):

	1565 if type(metadata) == set:

	1566 norm_metadata = set()

	1567 for k in metadata:

	1568 norm_metadata.add(k.lower())

	1569 else:

	1570 norm_metadata = {}

	1571 for k in metadata:

	1572 norm_metadata[k.lower()] = metadata[k]

	1573 return norm_metadata

	1574

	1575 def _get_remote_metadata(self, headers=None):

	1576 """

	1577 Extracts metadata from existing URI into a dict, so we can

	1578 overwrite/delete from it to form the new set of metadata to apply to a

	1579 key.

	1580 """

	1581 metadata = {}

	1582 for underscore_name in self._underscore_base_user_settable_fields:

	1583 if hasattr(self, underscore_name):

	1584 value = getattr(self, underscore_name)

	1585 if value:

	1586 # Generate HTTP field name corresponding to "_" named field.

	1587 field_name = underscore_name.replace('_', '-')

	1588 metadata[field_name.lower()] = value

	1589 # self.metadata contains custom metadata, which are all user-settable.

	1590 prefix = self.provider.metadata_prefix

	1591 for underscore_name in self.metadata:

	1592 field_name = underscore_name.replace('_', '-')

	1593 metadata['%s%s' % (prefix, field_name.lower())] = (

	1594 self.metadata[underscore_name])

	1595 return metadata

	1596

	1597 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,

	1598 headers=None):

	1599 metadata_plus = self._normalize_metadata(metadata_plus)

	1600 metadata_minus = self._normalize_metadata(metadata_minus)

	1601 metadata = self._get_remote_metadata()

	1602 metadata.update(metadata_plus)

	1603 for h in metadata_minus:

	1604 if h in metadata:

	1605 del metadata[h]

	1606 src_bucket = self.bucket

	1607 # Boto prepends the meta prefix when adding headers, so strip prefix in

	1608 # metadata before sending back in to copy_key() call.

	1609 rewritten_metadata = {}

	1610 for h in metadata:

	1611 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):

	1612 rewritten_h = (h.replace('x-goog-meta-', '')

	1613 .replace('x-amz-meta-', ''))

	1614 else:

	1615 rewritten_h = h

	1616 rewritten_metadata[rewritten_h] = metadata[h]

	1617 metadata = rewritten_metadata

	1618 src_bucket.copy_key(self.name, self.bucket.name, self.name,

	1619 metadata=metadata, preserve_acl=preserve_acl)

	1620

	1621 def restore(self, days, headers=None):

	1622 """Restore an object from an archive.

	1623

	1624 :type days: int

	1625 :param days: The lifetime of the restored object (must

	1626 be at least 1 day). If the object is already restored

	1627 then this parameter can be used to readjust the lifetime

	1628 of the restored object. In this case, the days

	1629 param is with respect to the initial time of the request.

	1630 If the object has not been restored, this param is with

	1631 respect to the completion time of the request.

	1632

	1633 """

	1634 response = self.bucket.connection.make_request(

	1635 'POST', self.bucket.name, self.name,

	1636 data=self.RestoreBody % days,

	1637 headers=headers, query_args='restore')

	1638 if response.status not in (200, 202):

	1639 provider = self.bucket.connection.provider

	1640 raise provider.storage_response_error(response.status,

	1641 response.reason,

	1642 response.read())

OLD	NEW

« download_from_google_storage.py ('K') | « third_party/gsutil/boto/s3/deletemarker.py ('k') | third_party/gsutil/boto/s3/lifecycle.py » ('j') | upload_to_google_storage.py » ('J')