| OLD | NEW |
| (Empty) | |
| 1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ |
| 2 # Copyright (c) 2011, Nexenta Systems Inc. |
| 3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a |
| 6 # copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- |
| 11 # lowing conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be included |
| 14 # in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 # IN THE SOFTWARE. |
| 23 |
| 24 import mimetypes |
| 25 import os |
| 26 import re |
| 27 import rfc822 |
| 28 import StringIO |
| 29 import base64 |
| 30 import math |
| 31 import urllib |
| 32 import boto.utils |
| 33 from boto.exception import BotoClientError |
| 34 from boto.provider import Provider |
| 35 from boto.s3.user import User |
| 36 from boto import UserAgent |
| 37 from boto.utils import compute_md5 |
| 38 try: |
| 39 from hashlib import md5 |
| 40 except ImportError: |
| 41 from md5 import md5 |
| 42 |
| 43 |
| 44 class Key(object): |
| 45 """ |
| 46 Represents a key (object) in an S3 bucket. |
| 47 |
| 48 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. |
| 49 :ivar name: The name of this Key object. |
| 50 :ivar metadata: A dictionary containing user metadata that you |
| 51 wish to store with the object or that has been retrieved from |
| 52 an existing object. |
| 53 :ivar cache_control: The value of the `Cache-Control` HTTP header. |
| 54 :ivar content_type: The value of the `Content-Type` HTTP header. |
| 55 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. |
| 56 :ivar content_disposition: The value of the `Content-Disposition` HTTP |
| 57 header. |
| 58 :ivar content_language: The value of the `Content-Language` HTTP header. |
| 59 :ivar etag: The `etag` associated with this object. |
| 60 :ivar last_modified: The string timestamp representing the last |
| 61 time this object was modified in S3. |
| 62 :ivar owner: The ID of the owner of this object. |
| 63 :ivar storage_class: The storage class of the object. Currently, one of: |
| 64 STANDARD | REDUCED_REDUNDANCY | GLACIER |
| 65 :ivar md5: The MD5 hash of the contents of the object. |
| 66 :ivar size: The size, in bytes, of the object. |
| 67 :ivar version_id: The version ID of this object, if it is a versioned |
| 68 object. |
| 69 :ivar encrypted: Whether the object is encrypted while at rest on |
| 70 the server. |
| 71 """ |
| 72 |
| 73 DefaultContentType = 'application/octet-stream' |
| 74 |
| 75 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> |
| 76 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> |
| 77 <Days>%s</Days> |
| 78 </RestoreRequest>""" |
| 79 |
| 80 |
| 81 BufferSize = 8192 |
| 82 |
| 83 # The object metadata fields a user can set, other than custom metadata |
| 84 # fields (i.e., those beginning with a provider-specific prefix like |
| 85 # x-amz-meta). |
| 86 base_user_settable_fields = set(["cache-control", "content-disposition", |
| 87 "content-encoding", "content-language", |
| 88 "content-md5", "content-type"]) |
| 89 _underscore_base_user_settable_fields = set() |
| 90 for f in base_user_settable_fields: |
| 91 _underscore_base_user_settable_fields.add(f.replace('-', '_')) |
| 92 |
| 93 |
| 94 |
| 95 def __init__(self, bucket=None, name=None): |
| 96 self.bucket = bucket |
| 97 self.name = name |
| 98 self.metadata = {} |
| 99 self.cache_control = None |
| 100 self.content_type = self.DefaultContentType |
| 101 self.content_encoding = None |
| 102 self.content_disposition = None |
| 103 self.content_language = None |
| 104 self.filename = None |
| 105 self.etag = None |
| 106 self.is_latest = False |
| 107 self.last_modified = None |
| 108 self.owner = None |
| 109 self.storage_class = 'STANDARD' |
| 110 self.md5 = None |
| 111 self.base64md5 = None |
| 112 self.path = None |
| 113 self.resp = None |
| 114 self.mode = None |
| 115 self.size = None |
| 116 self.version_id = None |
| 117 self.source_version_id = None |
| 118 self.delete_marker = False |
| 119 self.encrypted = None |
| 120 |
| 121 def __repr__(self): |
| 122 if self.bucket: |
| 123 return '<Key: %s,%s>' % (self.bucket.name, self.name) |
| 124 else: |
| 125 return '<Key: None,%s>' % self.name |
| 126 |
| 127 def __getattr__(self, name): |
| 128 if name == 'key': |
| 129 return self.name |
| 130 else: |
| 131 raise AttributeError |
| 132 |
| 133 def __setattr__(self, name, value): |
| 134 if name == 'key': |
| 135 self.__dict__['name'] = value |
| 136 else: |
| 137 self.__dict__[name] = value |
| 138 |
| 139 def __iter__(self): |
| 140 return self |
| 141 |
| 142 @property |
| 143 def provider(self): |
| 144 provider = None |
| 145 if self.bucket: |
| 146 if self.bucket.connection: |
| 147 provider = self.bucket.connection.provider |
| 148 return provider |
| 149 |
| 150 def get_md5_from_hexdigest(self, md5_hexdigest): |
| 151 """ |
| 152 A utility function to create the 2-tuple (md5hexdigest, base64md5) |
| 153 from just having a precalculated md5_hexdigest. |
| 154 """ |
| 155 import binascii |
| 156 digest = binascii.unhexlify(md5_hexdigest) |
| 157 base64md5 = base64.encodestring(digest) |
| 158 if base64md5[-1] == '\n': |
| 159 base64md5 = base64md5[0:-1] |
| 160 return (md5_hexdigest, base64md5) |
| 161 |
| 162 def handle_encryption_headers(self, resp): |
| 163 provider = self.bucket.connection.provider |
| 164 if provider.server_side_encryption_header: |
| 165 self.encrypted = resp.getheader(provider.server_side_encryption_head
er, None) |
| 166 else: |
| 167 self.encrypted = None |
| 168 |
| 169 def handle_version_headers(self, resp, force=False): |
| 170 provider = self.bucket.connection.provider |
| 171 # If the Key object already has a version_id attribute value, it |
| 172 # means that it represents an explicit version and the user is |
| 173 # doing a get_contents_*(version_id=<foo>) to retrieve another |
| 174 # version of the Key. In that case, we don't really want to |
| 175 # overwrite the version_id in this Key object. Comprende? |
| 176 if self.version_id is None or force: |
| 177 self.version_id = resp.getheader(provider.version_id, None) |
| 178 self.source_version_id = resp.getheader(provider.copy_source_version_id, |
| 179 None) |
| 180 if resp.getheader(provider.delete_marker, 'false') == 'true': |
| 181 self.delete_marker = True |
| 182 else: |
| 183 self.delete_marker = False |
| 184 |
| 185 def open_read(self, headers=None, query_args='', |
| 186 override_num_retries=None, response_headers=None): |
| 187 """ |
| 188 Open this key for reading |
| 189 |
| 190 :type headers: dict |
| 191 :param headers: Headers to pass in the web request |
| 192 |
| 193 :type query_args: string |
| 194 :param query_args: Arguments to pass in the query string |
| 195 (ie, 'torrent') |
| 196 |
| 197 :type override_num_retries: int |
| 198 :param override_num_retries: If not None will override configured |
| 199 num_retries parameter for underlying GET. |
| 200 |
| 201 :type response_headers: dict |
| 202 :param response_headers: A dictionary containing HTTP |
| 203 headers/values that will override any headers associated |
| 204 with the stored object in the response. See |
| 205 http://goo.gl/EWOPb for details. |
| 206 """ |
| 207 if self.resp == None: |
| 208 self.mode = 'r' |
| 209 |
| 210 provider = self.bucket.connection.provider |
| 211 self.resp = self.bucket.connection.make_request( |
| 212 'GET', self.bucket.name, self.name, headers, |
| 213 query_args=query_args, |
| 214 override_num_retries=override_num_retries) |
| 215 if self.resp.status < 199 or self.resp.status > 299: |
| 216 body = self.resp.read() |
| 217 raise provider.storage_response_error(self.resp.status, |
| 218 self.resp.reason, body) |
| 219 response_headers = self.resp.msg |
| 220 self.metadata = boto.utils.get_aws_metadata(response_headers, |
| 221 provider) |
| 222 for name, value in response_headers.items(): |
| 223 # To get correct size for Range GETs, use Content-Range |
| 224 # header if one was returned. If not, use Content-Length |
| 225 # header. |
| 226 if (name.lower() == 'content-length' and |
| 227 'Content-Range' not in response_headers): |
| 228 self.size = int(value) |
| 229 elif name.lower() == 'content-range': |
| 230 end_range = re.sub('.*/(.*)', '\\1', value) |
| 231 self.size = int(end_range) |
| 232 elif name.lower() == 'etag': |
| 233 self.etag = value |
| 234 elif name.lower() == 'content-type': |
| 235 self.content_type = value |
| 236 elif name.lower() == 'content-encoding': |
| 237 self.content_encoding = value |
| 238 elif name.lower() == 'content-language': |
| 239 self.content_language = value |
| 240 elif name.lower() == 'last-modified': |
| 241 self.last_modified = value |
| 242 elif name.lower() == 'cache-control': |
| 243 self.cache_control = value |
| 244 elif name.lower() == 'content-disposition': |
| 245 self.content_disposition = value |
| 246 self.handle_version_headers(self.resp) |
| 247 self.handle_encryption_headers(self.resp) |
| 248 |
| 249 def open_write(self, headers=None, override_num_retries=None): |
| 250 """ |
| 251 Open this key for writing. |
| 252 Not yet implemented |
| 253 |
| 254 :type headers: dict |
| 255 :param headers: Headers to pass in the write request |
| 256 |
| 257 :type override_num_retries: int |
| 258 :param override_num_retries: If not None will override configured |
| 259 num_retries parameter for underlying PUT. |
| 260 """ |
| 261 raise BotoClientError('Not Implemented') |
| 262 |
| 263 def open(self, mode='r', headers=None, query_args=None, |
| 264 override_num_retries=None): |
| 265 if mode == 'r': |
| 266 self.mode = 'r' |
| 267 self.open_read(headers=headers, query_args=query_args, |
| 268 override_num_retries=override_num_retries) |
| 269 elif mode == 'w': |
| 270 self.mode = 'w' |
| 271 self.open_write(headers=headers, |
| 272 override_num_retries=override_num_retries) |
| 273 else: |
| 274 raise BotoClientError('Invalid mode: %s' % mode) |
| 275 |
| 276 closed = False |
| 277 |
| 278 def close(self): |
| 279 if self.resp: |
| 280 self.resp.read() |
| 281 self.resp = None |
| 282 self.mode = None |
| 283 self.closed = True |
| 284 |
| 285 def next(self): |
| 286 """ |
| 287 By providing a next method, the key object supports use as an iterator. |
| 288 For example, you can now say: |
| 289 |
| 290 for bytes in key: |
| 291 write bytes to a file or whatever |
| 292 |
| 293 All of the HTTP connection stuff is handled for you. |
| 294 """ |
| 295 self.open_read() |
| 296 data = self.resp.read(self.BufferSize) |
| 297 if not data: |
| 298 self.close() |
| 299 raise StopIteration |
| 300 return data |
| 301 |
| 302 def read(self, size=0): |
| 303 self.open_read() |
| 304 if size == 0: |
| 305 data = self.resp.read() |
| 306 else: |
| 307 data = self.resp.read(size) |
| 308 if not data: |
| 309 self.close() |
| 310 return data |
| 311 |
| 312 def change_storage_class(self, new_storage_class, dst_bucket=None, |
| 313 validate_dst_bucket=True): |
| 314 """ |
| 315 Change the storage class of an existing key. |
| 316 Depending on whether a different destination bucket is supplied |
| 317 or not, this will either move the item within the bucket, preserving |
| 318 all metadata and ACL info bucket changing the storage class or it |
| 319 will copy the item to the provided destination bucket, also |
| 320 preserving metadata and ACL info. |
| 321 |
| 322 :type new_storage_class: string |
| 323 :param new_storage_class: The new storage class for the Key. |
| 324 Possible values are: |
| 325 * STANDARD |
| 326 * REDUCED_REDUNDANCY |
| 327 |
| 328 :type dst_bucket: string |
| 329 :param dst_bucket: The name of a destination bucket. If not |
| 330 provided the current bucket of the key will be used. |
| 331 |
| 332 :type validate_dst_bucket: bool |
| 333 :param validate_dst_bucket: If True, will validate the dst_bucket |
| 334 by using an extra list request. |
| 335 """ |
| 336 if new_storage_class == 'STANDARD': |
| 337 return self.copy(self.bucket.name, self.name, |
| 338 reduced_redundancy=False, preserve_acl=True, |
| 339 validate_dst_bucket=validate_dst_bucket) |
| 340 elif new_storage_class == 'REDUCED_REDUNDANCY': |
| 341 return self.copy(self.bucket.name, self.name, |
| 342 reduced_redundancy=True, preserve_acl=True, |
| 343 validate_dst_bucket=validate_dst_bucket) |
| 344 else: |
| 345 raise BotoClientError('Invalid storage class: %s' % |
| 346 new_storage_class) |
| 347 |
| 348 def copy(self, dst_bucket, dst_key, metadata=None, |
| 349 reduced_redundancy=False, preserve_acl=False, |
| 350 encrypt_key=False, validate_dst_bucket=True): |
| 351 """ |
| 352 Copy this Key to another bucket. |
| 353 |
| 354 :type dst_bucket: string |
| 355 :param dst_bucket: The name of the destination bucket |
| 356 |
| 357 :type dst_key: string |
| 358 :param dst_key: The name of the destination key |
| 359 |
| 360 :type metadata: dict |
| 361 :param metadata: Metadata to be associated with new key. If |
| 362 metadata is supplied, it will replace the metadata of the |
| 363 source key being copied. If no metadata is supplied, the |
| 364 source key's metadata will be copied to the new key. |
| 365 |
| 366 :type reduced_redundancy: bool |
| 367 :param reduced_redundancy: If True, this will force the |
| 368 storage class of the new Key to be REDUCED_REDUNDANCY |
| 369 regardless of the storage class of the key being copied. |
| 370 The Reduced Redundancy Storage (RRS) feature of S3, |
| 371 provides lower redundancy at lower storage cost. |
| 372 |
| 373 :type preserve_acl: bool |
| 374 :param preserve_acl: If True, the ACL from the source key will |
| 375 be copied to the destination key. If False, the |
| 376 destination key will have the default ACL. Note that |
| 377 preserving the ACL in the new key object will require two |
| 378 additional API calls to S3, one to retrieve the current |
| 379 ACL and one to set that ACL on the new object. If you |
| 380 don't care about the ACL, a value of False will be |
| 381 significantly more efficient. |
| 382 |
| 383 :type encrypt_key: bool |
| 384 :param encrypt_key: If True, the new copy of the object will |
| 385 be encrypted on the server-side by S3 and will be stored |
| 386 in an encrypted form while at rest in S3. |
| 387 |
| 388 :type validate_dst_bucket: bool |
| 389 :param validate_dst_bucket: If True, will validate the dst_bucket |
| 390 by using an extra list request. |
| 391 |
| 392 :rtype: :class:`boto.s3.key.Key` or subclass |
| 393 :returns: An instance of the newly created key object |
| 394 """ |
| 395 dst_bucket = self.bucket.connection.lookup(dst_bucket, |
| 396 validate_dst_bucket) |
| 397 if reduced_redundancy: |
| 398 storage_class = 'REDUCED_REDUNDANCY' |
| 399 else: |
| 400 storage_class = self.storage_class |
| 401 return dst_bucket.copy_key(dst_key, self.bucket.name, |
| 402 self.name, metadata, |
| 403 storage_class=storage_class, |
| 404 preserve_acl=preserve_acl, |
| 405 encrypt_key=encrypt_key) |
| 406 |
| 407 def startElement(self, name, attrs, connection): |
| 408 if name == 'Owner': |
| 409 self.owner = User(self) |
| 410 return self.owner |
| 411 else: |
| 412 return None |
| 413 |
| 414 def endElement(self, name, value, connection): |
| 415 if name == 'Key': |
| 416 self.name = value |
| 417 elif name == 'ETag': |
| 418 self.etag = value |
| 419 elif name == 'IsLatest': |
| 420 if value == 'true': |
| 421 self.is_latest = True |
| 422 else: |
| 423 self.is_latest = False |
| 424 elif name == 'LastModified': |
| 425 self.last_modified = value |
| 426 elif name == 'Size': |
| 427 self.size = int(value) |
| 428 elif name == 'StorageClass': |
| 429 self.storage_class = value |
| 430 elif name == 'Owner': |
| 431 pass |
| 432 elif name == 'VersionId': |
| 433 self.version_id = value |
| 434 else: |
| 435 setattr(self, name, value) |
| 436 |
| 437 def exists(self): |
| 438 """ |
| 439 Returns True if the key exists |
| 440 |
| 441 :rtype: bool |
| 442 :return: Whether the key exists on S3 |
| 443 """ |
| 444 return bool(self.bucket.lookup(self.name)) |
| 445 |
| 446 def delete(self): |
| 447 """ |
| 448 Delete this key from S3 |
| 449 """ |
| 450 return self.bucket.delete_key(self.name, version_id=self.version_id) |
| 451 |
| 452 def get_metadata(self, name): |
| 453 return self.metadata.get(name) |
| 454 |
| 455 def set_metadata(self, name, value): |
| 456 self.metadata[name] = value |
| 457 |
| 458 def update_metadata(self, d): |
| 459 self.metadata.update(d) |
| 460 |
| 461 # convenience methods for setting/getting ACL |
| 462 def set_acl(self, acl_str, headers=None): |
| 463 if self.bucket != None: |
| 464 self.bucket.set_acl(acl_str, self.name, headers=headers) |
| 465 |
| 466 def get_acl(self, headers=None): |
| 467 if self.bucket != None: |
| 468 return self.bucket.get_acl(self.name, headers=headers) |
| 469 |
| 470 def get_xml_acl(self, headers=None): |
| 471 if self.bucket != None: |
| 472 return self.bucket.get_xml_acl(self.name, headers=headers) |
| 473 |
| 474 def set_xml_acl(self, acl_str, headers=None): |
| 475 if self.bucket != None: |
| 476 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) |
| 477 |
| 478 def set_canned_acl(self, acl_str, headers=None): |
| 479 return self.bucket.set_canned_acl(acl_str, self.name, headers) |
| 480 |
| 481 def get_redirect(self): |
| 482 """Return the redirect location configured for this key. |
| 483 |
| 484 If no redirect is configured (via set_redirect), then None |
| 485 will be returned. |
| 486 |
| 487 """ |
| 488 response = self.bucket.connection.make_request( |
| 489 'GET', self.bucket.name, self.name) |
| 490 if response.status == 200: |
| 491 return response.getheader('x-amz-website-redirect-location') |
| 492 else: |
| 493 raise self.provider.storage_response_error( |
| 494 response.status, response.reason, response.read()) |
| 495 |
| 496 def set_redirect(self, redirect_location): |
| 497 """Configure this key to redirect to another location. |
| 498 |
| 499 When the bucket associated with this key is accessed from the website |
| 500 endpoint, a 301 redirect will be issued to the specified |
| 501 `redirect_location`. |
| 502 |
| 503 :type redirect_location: string |
| 504 :param redirect_location: The location to redirect. |
| 505 |
| 506 """ |
| 507 headers = {'x-amz-website-redirect-location': redirect_location} |
| 508 response = self.bucket.connection.make_request('PUT', self.bucket.name, |
| 509 self.name, headers) |
| 510 if response.status == 200: |
| 511 return True |
| 512 else: |
| 513 raise self.provider.storage_response_error( |
| 514 response.status, response.reason, response.read()) |
| 515 |
| 516 def make_public(self, headers=None): |
| 517 return self.bucket.set_canned_acl('public-read', self.name, headers) |
| 518 |
| 519 def generate_url(self, expires_in, method='GET', headers=None, |
| 520 query_auth=True, force_http=False, response_headers=None, |
| 521 expires_in_absolute=False, version_id=None, |
| 522 policy=None, reduced_redundancy=False, encrypt_key=False): |
| 523 """ |
| 524 Generate a URL to access this key. |
| 525 |
| 526 :type expires_in: int |
| 527 :param expires_in: How long the url is valid for, in seconds |
| 528 |
| 529 :type method: string |
| 530 :param method: The method to use for retrieving the file |
| 531 (default is GET) |
| 532 |
| 533 :type headers: dict |
| 534 :param headers: Any headers to pass along in the request |
| 535 |
| 536 :type query_auth: bool |
| 537 :param query_auth: |
| 538 |
| 539 :type force_http: bool |
| 540 :param force_http: If True, http will be used instead of https. |
| 541 |
| 542 :type response_headers: dict |
| 543 :param response_headers: A dictionary containing HTTP |
| 544 headers/values that will override any headers associated |
| 545 with the stored object in the response. See |
| 546 http://goo.gl/EWOPb for details. |
| 547 |
| 548 :type expires_in_absolute: bool |
| 549 :param expires_in_absolute: |
| 550 |
| 551 :type version_id: string |
| 552 :param version_id: The version_id of the object to GET. If specified |
| 553 this overrides any value in the key. |
| 554 |
| 555 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 556 :param policy: A canned ACL policy that will be applied to the |
| 557 new key in S3. |
| 558 |
| 559 :type reduced_redundancy: bool |
| 560 :param reduced_redundancy: If True, this will set the storage |
| 561 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 562 Redundancy Storage (RRS) feature of S3, provides lower |
| 563 redundancy at lower storage cost. |
| 564 |
| 565 :type encrypt_key: bool |
| 566 :param encrypt_key: If True, the new copy of the object will |
| 567 be encrypted on the server-side by S3 and will be stored |
| 568 in an encrypted form while at rest in S3. |
| 569 |
| 570 :rtype: string |
| 571 :return: The URL to access the key |
| 572 """ |
| 573 provider = self.bucket.connection.provider |
| 574 version_id = version_id or self.version_id |
| 575 if headers is None: |
| 576 headers = {} |
| 577 else: |
| 578 headers = headers.copy() |
| 579 |
| 580 # add headers accordingly (usually PUT case) |
| 581 if policy: |
| 582 headers[provider.acl_header] = policy |
| 583 if reduced_redundancy: |
| 584 self.storage_class = 'REDUCED_REDUNDANCY' |
| 585 if provider.storage_class_header: |
| 586 headers[provider.storage_class_header] = self.storage_class |
| 587 if encrypt_key: |
| 588 headers[provider.server_side_encryption_header] = 'AES256' |
| 589 headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| 590 |
| 591 return self.bucket.connection.generate_url(expires_in, method, |
| 592 self.bucket.name, self.name, |
| 593 headers, query_auth, |
| 594 force_http, |
| 595 response_headers, |
| 596 expires_in_absolute, |
| 597 version_id) |
| 598 |
| 599 def send_file(self, fp, headers=None, cb=None, num_cb=10, |
| 600 query_args=None, chunked_transfer=False, size=None): |
| 601 """ |
| 602 Upload a file to a key into a bucket on S3. |
| 603 |
| 604 :type fp: file |
| 605 :param fp: The file pointer to upload. The file pointer must |
| 606 point point at the offset from which you wish to upload. |
| 607 ie. if uploading the full file, it should point at the |
| 608 start of the file. Normally when a file is opened for |
| 609 reading, the fp will point at the first byte. See the |
| 610 bytes parameter below for more info. |
| 611 |
| 612 :type headers: dict |
| 613 :param headers: The headers to pass along with the PUT request |
| 614 |
| 615 :type cb: function |
| 616 :param cb: a callback function that will be called to report |
| 617 progress on the upload. The callback should accept two |
| 618 integer parameters, the first representing the number of |
| 619 bytes that have been successfully transmitted to S3 and |
| 620 the second representing the size of the to be transmitted |
| 621 object. |
| 622 |
| 623 :type num_cb: int |
| 624 :param num_cb: (optional) If a callback is specified with the |
| 625 cb parameter this parameter determines the granularity of |
| 626 the callback by defining the maximum number of times the |
| 627 callback will be called during the file |
| 628 transfer. Providing a negative integer will cause your |
| 629 callback to be called with each buffer read. |
| 630 |
| 631 :type size: int |
| 632 :param size: (optional) The Maximum number of bytes to read |
| 633 from the file pointer (fp). This is useful when uploading |
| 634 a file in multiple parts where you are splitting the file |
| 635 up into different ranges to be uploaded. If not specified, |
| 636 the default behaviour is to read all bytes from the file |
| 637 pointer. Less bytes may be available. |
| 638 """ |
| 639 provider = self.bucket.connection.provider |
| 640 try: |
| 641 spos = fp.tell() |
| 642 except IOError: |
| 643 spos = None |
| 644 self.read_from_stream = False |
| 645 |
| 646 def sender(http_conn, method, path, data, headers): |
| 647 # This function is called repeatedly for temporary retries |
| 648 # so we must be sure the file pointer is pointing at the |
| 649 # start of the data. |
| 650 if spos is not None and spos != fp.tell(): |
| 651 fp.seek(spos) |
| 652 elif spos is None and self.read_from_stream: |
| 653 # if seek is not supported, and we've read from this |
| 654 # stream already, then we need to abort retries to |
| 655 # avoid setting bad data. |
| 656 raise provider.storage_data_error( |
| 657 'Cannot retry failed request. fp does not support seeking.') |
| 658 |
| 659 http_conn.putrequest(method, path) |
| 660 for key in headers: |
| 661 http_conn.putheader(key, headers[key]) |
| 662 http_conn.endheaders() |
| 663 |
| 664 # Calculate all MD5 checksums on the fly, if not already computed |
| 665 if not self.base64md5: |
| 666 m = md5() |
| 667 else: |
| 668 m = None |
| 669 |
| 670 save_debug = self.bucket.connection.debug |
| 671 self.bucket.connection.debug = 0 |
| 672 # If the debuglevel < 3 we don't want to show connection |
| 673 # payload, so turn off HTTP connection-level debug output (to |
| 674 # be restored below). |
| 675 # Use the getattr approach to allow this to work in AppEngine. |
| 676 if getattr(http_conn, 'debuglevel', 0) < 3: |
| 677 http_conn.set_debuglevel(0) |
| 678 |
| 679 data_len = 0 |
| 680 if cb: |
| 681 if size: |
| 682 cb_size = size |
| 683 elif self.size: |
| 684 cb_size = self.size |
| 685 else: |
| 686 cb_size = 0 |
| 687 if chunked_transfer and cb_size == 0: |
| 688 # For chunked Transfer, we call the cb for every 1MB |
| 689 # of data transferred, except when we know size. |
| 690 cb_count = (1024 * 1024) / self.BufferSize |
| 691 elif num_cb > 1: |
| 692 cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb
- 1.0))) |
| 693 elif num_cb < 0: |
| 694 cb_count = -1 |
| 695 else: |
| 696 cb_count = 0 |
| 697 i = 0 |
| 698 cb(data_len, cb_size) |
| 699 |
| 700 bytes_togo = size |
| 701 if bytes_togo and bytes_togo < self.BufferSize: |
| 702 chunk = fp.read(bytes_togo) |
| 703 else: |
| 704 chunk = fp.read(self.BufferSize) |
| 705 if spos is None: |
| 706 # read at least something from a non-seekable fp. |
| 707 self.read_from_stream = True |
| 708 while chunk: |
| 709 chunk_len = len(chunk) |
| 710 data_len += chunk_len |
| 711 if chunked_transfer: |
| 712 http_conn.send('%x;\r\n' % chunk_len) |
| 713 http_conn.send(chunk) |
| 714 http_conn.send('\r\n') |
| 715 else: |
| 716 http_conn.send(chunk) |
| 717 if m: |
| 718 m.update(chunk) |
| 719 if bytes_togo: |
| 720 bytes_togo -= chunk_len |
| 721 if bytes_togo <= 0: |
| 722 break |
| 723 if cb: |
| 724 i += 1 |
| 725 if i == cb_count or cb_count == -1: |
| 726 cb(data_len, cb_size) |
| 727 i = 0 |
| 728 if bytes_togo and bytes_togo < self.BufferSize: |
| 729 chunk = fp.read(bytes_togo) |
| 730 else: |
| 731 chunk = fp.read(self.BufferSize) |
| 732 |
| 733 self.size = data_len |
| 734 |
| 735 if m: |
| 736 # Use the chunked trailer for the digest |
| 737 hd = m.hexdigest() |
| 738 self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd) |
| 739 |
| 740 if chunked_transfer: |
| 741 http_conn.send('0\r\n') |
| 742 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) |
| 743 http_conn.send('\r\n') |
| 744 |
| 745 if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| 746 cb(data_len, cb_size) |
| 747 |
| 748 response = http_conn.getresponse() |
| 749 body = response.read() |
| 750 http_conn.set_debuglevel(save_debug) |
| 751 self.bucket.connection.debug = save_debug |
| 752 if ((response.status == 500 or response.status == 503 or |
| 753 response.getheader('location')) and not chunked_transfer): |
| 754 # we'll try again. |
| 755 return response |
| 756 elif response.status >= 200 and response.status <= 299: |
| 757 self.etag = response.getheader('etag') |
| 758 if self.etag != '"%s"' % self.md5: |
| 759 raise provider.storage_data_error( |
| 760 'ETag from S3 did not match computed MD5') |
| 761 return response |
| 762 else: |
| 763 raise provider.storage_response_error( |
| 764 response.status, response.reason, body) |
| 765 |
| 766 if not headers: |
| 767 headers = {} |
| 768 else: |
| 769 headers = headers.copy() |
| 770 headers['User-Agent'] = UserAgent |
| 771 if self.storage_class != 'STANDARD': |
| 772 headers[provider.storage_class_header] = self.storage_class |
| 773 if 'Content-Encoding' in headers: |
| 774 self.content_encoding = headers['Content-Encoding'] |
| 775 if 'Content-Language' in headers: |
| 776 self.content_encoding = headers['Content-Language'] |
| 777 if 'Content-Type' in headers: |
| 778 # Some use cases need to suppress sending of the Content-Type |
| 779 # header and depend on the receiving server to set the content |
| 780 # type. This can be achieved by setting headers['Content-Type'] |
| 781 # to None when calling this method. |
| 782 if headers['Content-Type'] is None: |
| 783 # Delete null Content-Type value to skip sending that header. |
| 784 del headers['Content-Type'] |
| 785 else: |
| 786 self.content_type = headers['Content-Type'] |
| 787 elif self.path: |
| 788 self.content_type = mimetypes.guess_type(self.path)[0] |
| 789 if self.content_type == None: |
| 790 self.content_type = self.DefaultContentType |
| 791 headers['Content-Type'] = self.content_type |
| 792 else: |
| 793 headers['Content-Type'] = self.content_type |
| 794 if self.base64md5: |
| 795 headers['Content-MD5'] = self.base64md5 |
| 796 if chunked_transfer: |
| 797 headers['Transfer-Encoding'] = 'chunked' |
| 798 #if not self.base64md5: |
| 799 # headers['Trailer'] = "Content-MD5" |
| 800 else: |
| 801 headers['Content-Length'] = str(self.size) |
| 802 headers['Expect'] = '100-Continue' |
| 803 headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| 804 resp = self.bucket.connection.make_request('PUT', self.bucket.name, |
| 805 self.name, headers, |
| 806 sender=sender, |
| 807 query_args=query_args) |
| 808 self.handle_version_headers(resp, force=True) |
| 809 |
| 810 def compute_md5(self, fp, size=None): |
| 811 """ |
| 812 :type fp: file |
| 813 :param fp: File pointer to the file to MD5 hash. The file |
| 814 pointer will be reset to the same position before the |
| 815 method returns. |
| 816 |
| 817 :type size: int |
| 818 :param size: (optional) The Maximum number of bytes to read |
| 819 from the file pointer (fp). This is useful when uploading |
| 820 a file in multiple parts where the file is being split |
| 821 inplace into different parts. Less bytes may be available. |
| 822 |
| 823 :rtype: tuple |
| 824 :return: A tuple containing the hex digest version of the MD5 |
| 825 hash as the first element and the base64 encoded version |
| 826 of the plain digest as the second element. |
| 827 """ |
| 828 tup = compute_md5(fp, size=size) |
| 829 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. |
| 830 # The internal implementation of compute_md5() needs to return the |
| 831 # data size but we don't want to return that value to the external |
| 832 # caller because it changes the class interface (i.e. it might |
| 833 # break some code) so we consume the third tuple value here and |
| 834 # return the remainder of the tuple to the caller, thereby preserving |
| 835 # the existing interface. |
| 836 self.size = tup[2] |
| 837 return tup[0:2] |
| 838 |
| 839 def set_contents_from_stream(self, fp, headers=None, replace=True, |
| 840 cb=None, num_cb=10, policy=None, |
| 841 reduced_redundancy=False, query_args=None, |
| 842 size=None): |
| 843 """ |
| 844 Store an object using the name of the Key object as the key in |
| 845 cloud and the contents of the data stream pointed to by 'fp' as |
| 846 the contents. |
| 847 |
| 848 The stream object is not seekable and total size is not known. |
| 849 This has the implication that we can't specify the |
| 850 Content-Size and Content-MD5 in the header. So for huge |
| 851 uploads, the delay in calculating MD5 is avoided but with a |
| 852 penalty of inability to verify the integrity of the uploaded |
| 853 data. |
| 854 |
| 855 :type fp: file |
| 856 :param fp: the file whose contents are to be uploaded |
| 857 |
| 858 :type headers: dict |
| 859 :param headers: additional HTTP headers to be sent with the |
| 860 PUT request. |
| 861 |
| 862 :type replace: bool |
| 863 :param replace: If this parameter is False, the method will first check |
| 864 to see if an object exists in the bucket with the same key. If it |
| 865 does, it won't overwrite it. The default value is True which will |
| 866 overwrite the object. |
| 867 |
| 868 :type cb: function |
| 869 :param cb: a callback function that will be called to report |
| 870 progress on the upload. The callback should accept two integer |
| 871 parameters, the first representing the number of bytes that have |
| 872 been successfully transmitted to GS and the second representing the |
| 873 total number of bytes that need to be transmitted. |
| 874 |
| 875 :type num_cb: int |
| 876 :param num_cb: (optional) If a callback is specified with the |
| 877 cb parameter, this parameter determines the granularity of |
| 878 the callback by defining the maximum number of times the |
| 879 callback will be called during the file transfer. |
| 880 |
| 881 :type policy: :class:`boto.gs.acl.CannedACLStrings` |
| 882 :param policy: A canned ACL policy that will be applied to the new key |
| 883 in GS. |
| 884 |
| 885 :type reduced_redundancy: bool |
| 886 :param reduced_redundancy: If True, this will set the storage |
| 887 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 888 Redundancy Storage (RRS) feature of S3, provides lower |
| 889 redundancy at lower storage cost. |
| 890 |
| 891 :type size: int |
| 892 :param size: (optional) The Maximum number of bytes to read from |
| 893 the file pointer (fp). This is useful when uploading a |
| 894 file in multiple parts where you are splitting the file up |
| 895 into different ranges to be uploaded. If not specified, |
| 896 the default behaviour is to read all bytes from the file |
| 897 pointer. Less bytes may be available. |
| 898 """ |
| 899 |
| 900 provider = self.bucket.connection.provider |
| 901 if not provider.supports_chunked_transfer(): |
| 902 raise BotoClientError('%s does not support chunked transfer' |
| 903 % provider.get_provider_name()) |
| 904 |
| 905 # Name of the Object should be specified explicitly for Streams. |
| 906 if not self.name or self.name == '': |
| 907 raise BotoClientError('Cannot determine the destination ' |
| 908 'object name for the given stream') |
| 909 |
| 910 if headers is None: |
| 911 headers = {} |
| 912 if policy: |
| 913 headers[provider.acl_header] = policy |
| 914 |
| 915 if reduced_redundancy: |
| 916 self.storage_class = 'REDUCED_REDUNDANCY' |
| 917 if provider.storage_class_header: |
| 918 headers[provider.storage_class_header] = self.storage_class |
| 919 |
| 920 if self.bucket != None: |
| 921 if not replace: |
| 922 if self.bucket.lookup(self.name): |
| 923 return |
| 924 self.send_file(fp, headers, cb, num_cb, query_args, |
| 925 chunked_transfer=True, size=size) |
| 926 |
| 927 def set_contents_from_file(self, fp, headers=None, replace=True, |
| 928 cb=None, num_cb=10, policy=None, md5=None, |
| 929 reduced_redundancy=False, query_args=None, |
| 930 encrypt_key=False, size=None, rewind=False): |
| 931 """ |
| 932 Store an object in S3 using the name of the Key object as the |
| 933 key in S3 and the contents of the file pointed to by 'fp' as the |
| 934 contents. The data is read from 'fp' from its current position until |
| 935 'size' bytes have been read or EOF. |
| 936 |
| 937 :type fp: file |
| 938 :param fp: the file whose contents to upload |
| 939 |
| 940 :type headers: dict |
| 941 :param headers: Additional HTTP headers that will be sent with |
| 942 the PUT request. |
| 943 |
| 944 :type replace: bool |
| 945 :param replace: If this parameter is False, the method will |
| 946 first check to see if an object exists in the bucket with |
| 947 the same key. If it does, it won't overwrite it. The |
| 948 default value is True which will overwrite the object. |
| 949 |
| 950 :type cb: function |
| 951 :param cb: a callback function that will be called to report |
| 952 progress on the upload. The callback should accept two |
| 953 integer parameters, the first representing the number of |
| 954 bytes that have been successfully transmitted to S3 and |
| 955 the second representing the size of the to be transmitted |
| 956 object. |
| 957 |
| 958 :type cb: int |
| 959 :param num_cb: (optional) If a callback is specified with the |
| 960 cb parameter this parameter determines the granularity of |
| 961 the callback by defining the maximum number of times the |
| 962 callback will be called during the file transfer. |
| 963 |
| 964 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 965 :param policy: A canned ACL policy that will be applied to the |
| 966 new key in S3. |
| 967 |
| 968 :type md5: A tuple containing the hexdigest version of the MD5 |
| 969 checksum of the file as the first element and the |
| 970 Base64-encoded version of the plain checksum as the second |
| 971 element. This is the same format returned by the |
| 972 compute_md5 method. |
| 973 :param md5: If you need to compute the MD5 for any reason |
| 974 prior to upload, it's silly to have to do it twice so this |
| 975 param, if present, will be used as the MD5 values of the |
| 976 file. Otherwise, the checksum will be computed. |
| 977 |
| 978 :type reduced_redundancy: bool |
| 979 :param reduced_redundancy: If True, this will set the storage |
| 980 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 981 Redundancy Storage (RRS) feature of S3, provides lower |
| 982 redundancy at lower storage cost. |
| 983 |
| 984 :type encrypt_key: bool |
| 985 :param encrypt_key: If True, the new copy of the object will |
| 986 be encrypted on the server-side by S3 and will be stored |
| 987 in an encrypted form while at rest in S3. |
| 988 |
| 989 :type size: int |
| 990 :param size: (optional) The Maximum number of bytes to read |
| 991 from the file pointer (fp). This is useful when uploading |
| 992 a file in multiple parts where you are splitting the file |
| 993 up into different ranges to be uploaded. If not specified, |
| 994 the default behaviour is to read all bytes from the file |
| 995 pointer. Less bytes may be available. |
| 996 |
| 997 :type rewind: bool |
| 998 :param rewind: (optional) If True, the file pointer (fp) will |
| 999 be rewound to the start before any bytes are read from |
| 1000 it. The default behaviour is False which reads from the |
| 1001 current position of the file pointer (fp). |
| 1002 |
| 1003 :rtype: int |
| 1004 :return: The number of bytes written to the key. |
| 1005 """ |
| 1006 provider = self.bucket.connection.provider |
| 1007 headers = headers or {} |
| 1008 if policy: |
| 1009 headers[provider.acl_header] = policy |
| 1010 if encrypt_key: |
| 1011 headers[provider.server_side_encryption_header] = 'AES256' |
| 1012 |
| 1013 if rewind: |
| 1014 # caller requests reading from beginning of fp. |
| 1015 fp.seek(0, os.SEEK_SET) |
| 1016 else: |
| 1017 spos = fp.tell() |
| 1018 fp.seek(0, os.SEEK_END) |
| 1019 if fp.tell() == spos: |
| 1020 fp.seek(0, os.SEEK_SET) |
| 1021 if fp.tell() != spos: |
| 1022 # Raise an exception as this is likely a programming error |
| 1023 # whereby there is data before the fp but nothing after it. |
| 1024 fp.seek(spos) |
| 1025 raise AttributeError( |
| 1026 'fp is at EOF. Use rewind option or seek() to data start.') |
| 1027 # seek back to the correct position. |
| 1028 fp.seek(spos) |
| 1029 |
| 1030 if reduced_redundancy: |
| 1031 self.storage_class = 'REDUCED_REDUNDANCY' |
| 1032 if provider.storage_class_header: |
| 1033 headers[provider.storage_class_header] = self.storage_class |
| 1034 # TODO - What if provider doesn't support reduced reduncancy? |
| 1035 # What if different providers provide different classes? |
| 1036 if hasattr(fp, 'name'): |
| 1037 self.path = fp.name |
| 1038 |
| 1039 if self.bucket != None: |
| 1040 if not md5 and provider.supports_chunked_transfer(): |
| 1041 # defer md5 calculation to on the fly and |
| 1042 # we don't know anything about size yet. |
| 1043 chunked_transfer = True |
| 1044 self.size = None |
| 1045 else: |
| 1046 chunked_transfer = False |
| 1047 if not md5: |
| 1048 # compute_md5() and also set self.size to actual |
| 1049 # size of the bytes read computing the md5. |
| 1050 md5 = self.compute_md5(fp, size) |
| 1051 # adjust size if required |
| 1052 size = self.size |
| 1053 elif size: |
| 1054 self.size = size |
| 1055 else: |
| 1056 # If md5 is provided, still need to size so |
| 1057 # calculate based on bytes to end of content |
| 1058 spos = fp.tell() |
| 1059 fp.seek(0, os.SEEK_END) |
| 1060 self.size = fp.tell() - spos |
| 1061 fp.seek(spos) |
| 1062 size = self.size |
| 1063 self.md5 = md5[0] |
| 1064 self.base64md5 = md5[1] |
| 1065 |
| 1066 if self.name == None: |
| 1067 self.name = self.md5 |
| 1068 if not replace: |
| 1069 if self.bucket.lookup(self.name): |
| 1070 return |
| 1071 |
| 1072 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, |
| 1073 query_args=query_args, |
| 1074 chunked_transfer=chunked_transfer, size=size) |
| 1075 # return number of bytes written. |
| 1076 return self.size |
| 1077 |
| 1078 def set_contents_from_filename(self, filename, headers=None, replace=True, |
| 1079 cb=None, num_cb=10, policy=None, md5=None, |
| 1080 reduced_redundancy=False, |
| 1081 encrypt_key=False): |
| 1082 """ |
| 1083 Store an object in S3 using the name of the Key object as the |
| 1084 key in S3 and the contents of the file named by 'filename'. |
| 1085 See set_contents_from_file method for details about the |
| 1086 parameters. |
| 1087 |
| 1088 :type filename: string |
| 1089 :param filename: The name of the file that you want to put onto S3 |
| 1090 |
| 1091 :type headers: dict |
| 1092 :param headers: Additional headers to pass along with the |
| 1093 request to AWS. |
| 1094 |
| 1095 :type replace: bool |
| 1096 :param replace: If True, replaces the contents of the file |
| 1097 if it already exists. |
| 1098 |
| 1099 :type cb: function |
| 1100 :param cb: a callback function that will be called to report |
| 1101 progress on the upload. The callback should accept two |
| 1102 integer parameters, the first representing the number of |
| 1103 bytes that have been successfully transmitted to S3 and |
| 1104 the second representing the size of the to be transmitted |
| 1105 object. |
| 1106 |
| 1107 :type cb: int |
| 1108 :param num_cb: (optional) If a callback is specified with the |
| 1109 cb parameter this parameter determines the granularity of |
| 1110 the callback by defining the maximum number of times the |
| 1111 callback will be called during the file transfer. |
| 1112 |
| 1113 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 1114 :param policy: A canned ACL policy that will be applied to the |
| 1115 new key in S3. |
| 1116 |
| 1117 :type md5: A tuple containing the hexdigest version of the MD5 |
| 1118 checksum of the file as the first element and the |
| 1119 Base64-encoded version of the plain checksum as the second |
| 1120 element. This is the same format returned by the |
| 1121 compute_md5 method. |
| 1122 :param md5: If you need to compute the MD5 for any reason |
| 1123 prior to upload, it's silly to have to do it twice so this |
| 1124 param, if present, will be used as the MD5 values of the |
| 1125 file. Otherwise, the checksum will be computed. |
| 1126 |
| 1127 :type reduced_redundancy: bool |
| 1128 :param reduced_redundancy: If True, this will set the storage |
| 1129 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 1130 Redundancy Storage (RRS) feature of S3, provides lower |
| 1131 redundancy at lower storage cost. :type encrypt_key: bool |
| 1132 :param encrypt_key: If True, the new copy of the object |
| 1133 will be encrypted on the server-side by S3 and will be |
| 1134 stored in an encrypted form while at rest in S3. |
| 1135 """ |
| 1136 fp = open(filename, 'rb') |
| 1137 self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| 1138 policy, md5, reduced_redundancy, |
| 1139 encrypt_key=encrypt_key) |
| 1140 fp.close() |
| 1141 |
| 1142 def set_contents_from_string(self, s, headers=None, replace=True, |
| 1143 cb=None, num_cb=10, policy=None, md5=None, |
| 1144 reduced_redundancy=False, |
| 1145 encrypt_key=False): |
| 1146 """ |
| 1147 Store an object in S3 using the name of the Key object as the |
| 1148 key in S3 and the string 's' as the contents. |
| 1149 See set_contents_from_file method for details about the |
| 1150 parameters. |
| 1151 |
| 1152 :type headers: dict |
| 1153 :param headers: Additional headers to pass along with the |
| 1154 request to AWS. |
| 1155 |
| 1156 :type replace: bool |
| 1157 :param replace: If True, replaces the contents of the file if |
| 1158 it already exists. |
| 1159 |
| 1160 :type cb: function |
| 1161 :param cb: a callback function that will be called to report |
| 1162 progress on the upload. The callback should accept two |
| 1163 integer parameters, the first representing the number of |
| 1164 bytes that have been successfully transmitted to S3 and |
| 1165 the second representing the size of the to be transmitted |
| 1166 object. |
| 1167 |
| 1168 :type cb: int |
| 1169 :param num_cb: (optional) If a callback is specified with the |
| 1170 cb parameter this parameter determines the granularity of |
| 1171 the callback by defining the maximum number of times the |
| 1172 callback will be called during the file transfer. |
| 1173 |
| 1174 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 1175 :param policy: A canned ACL policy that will be applied to the |
| 1176 new key in S3. |
| 1177 |
| 1178 :type md5: A tuple containing the hexdigest version of the MD5 |
| 1179 checksum of the file as the first element and the |
| 1180 Base64-encoded version of the plain checksum as the second |
| 1181 element. This is the same format returned by the |
| 1182 compute_md5 method. |
| 1183 :param md5: If you need to compute the MD5 for any reason |
| 1184 prior to upload, it's silly to have to do it twice so this |
| 1185 param, if present, will be used as the MD5 values of the |
| 1186 file. Otherwise, the checksum will be computed. |
| 1187 |
| 1188 :type reduced_redundancy: bool |
| 1189 :param reduced_redundancy: If True, this will set the storage |
| 1190 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 1191 Redundancy Storage (RRS) feature of S3, provides lower |
| 1192 redundancy at lower storage cost. |
| 1193 |
| 1194 :type encrypt_key: bool |
| 1195 :param encrypt_key: If True, the new copy of the object will |
| 1196 be encrypted on the server-side by S3 and will be stored |
| 1197 in an encrypted form while at rest in S3. |
| 1198 """ |
| 1199 if isinstance(s, unicode): |
| 1200 s = s.encode("utf-8") |
| 1201 fp = StringIO.StringIO(s) |
| 1202 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| 1203 policy, md5, reduced_redundancy, |
| 1204 encrypt_key=encrypt_key) |
| 1205 fp.close() |
| 1206 return r |
| 1207 |
| 1208 def get_file(self, fp, headers=None, cb=None, num_cb=10, |
| 1209 torrent=False, version_id=None, override_num_retries=None, |
| 1210 response_headers=None): |
| 1211 """ |
| 1212 Retrieves a file from an S3 Key |
| 1213 |
| 1214 :type fp: file |
| 1215 :param fp: File pointer to put the data into |
| 1216 |
| 1217 :type headers: string |
| 1218 :param: headers to send when retrieving the files |
| 1219 |
| 1220 :type cb: function |
| 1221 :param cb: a callback function that will be called to report |
| 1222 progress on the upload. The callback should accept two |
| 1223 integer parameters, the first representing the number of |
| 1224 bytes that have been successfully transmitted to S3 and |
| 1225 the second representing the size of the to be transmitted |
| 1226 object. |
| 1227 |
| 1228 :type cb: int |
| 1229 :param num_cb: (optional) If a callback is specified with the |
| 1230 cb parameter this parameter determines the granularity of |
| 1231 the callback by defining the maximum number of times the |
| 1232 callback will be called during the file transfer. |
| 1233 |
| 1234 :type torrent: bool |
| 1235 :param torrent: Flag for whether to get a torrent for the file |
| 1236 |
| 1237 :type override_num_retries: int |
| 1238 :param override_num_retries: If not None will override configured |
| 1239 num_retries parameter for underlying GET. |
| 1240 |
| 1241 :type response_headers: dict |
| 1242 :param response_headers: A dictionary containing HTTP |
| 1243 headers/values that will override any headers associated |
| 1244 with the stored object in the response. See |
| 1245 http://goo.gl/EWOPb for details. |
| 1246 """ |
| 1247 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, |
| 1248 torrent=torrent, version_id=version_id, |
| 1249 override_num_retries=override_num_retries, |
| 1250 response_headers=response_headers, |
| 1251 query_args=None) |
| 1252 |
| 1253 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, |
| 1254 torrent=False, version_id=None, override_num_retries=None, |
| 1255 response_headers=None, query_args=None): |
| 1256 if headers is None: |
| 1257 headers = {} |
| 1258 save_debug = self.bucket.connection.debug |
| 1259 if self.bucket.connection.debug == 1: |
| 1260 self.bucket.connection.debug = 0 |
| 1261 |
| 1262 query_args = query_args or [] |
| 1263 if torrent: |
| 1264 query_args.append('torrent') |
| 1265 m = None |
| 1266 else: |
| 1267 m = md5() |
| 1268 # If a version_id is passed in, use that. If not, check to see |
| 1269 # if the Key object has an explicit version_id and, if so, use that. |
| 1270 # Otherwise, don't pass a version_id query param. |
| 1271 if version_id is None: |
| 1272 version_id = self.version_id |
| 1273 if version_id: |
| 1274 query_args.append('versionId=%s' % version_id) |
| 1275 if response_headers: |
| 1276 for key in response_headers: |
| 1277 query_args.append('%s=%s' % (key, urllib.quote(response_headers[
key]))) |
| 1278 query_args = '&'.join(query_args) |
| 1279 self.open('r', headers, query_args=query_args, |
| 1280 override_num_retries=override_num_retries) |
| 1281 |
| 1282 data_len = 0 |
| 1283 if cb: |
| 1284 if self.size is None: |
| 1285 cb_size = 0 |
| 1286 else: |
| 1287 cb_size = self.size |
| 1288 if self.size is None and num_cb != -1: |
| 1289 # If size is not available due to chunked transfer for example, |
| 1290 # we'll call the cb for every 1MB of data transferred. |
| 1291 cb_count = (1024 * 1024) / self.BufferSize |
| 1292 elif num_cb > 1: |
| 1293 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) |
| 1294 elif num_cb < 0: |
| 1295 cb_count = -1 |
| 1296 else: |
| 1297 cb_count = 0 |
| 1298 i = 0 |
| 1299 cb(data_len, cb_size) |
| 1300 for bytes in self: |
| 1301 fp.write(bytes) |
| 1302 data_len += len(bytes) |
| 1303 if m: |
| 1304 m.update(bytes) |
| 1305 if cb: |
| 1306 if cb_size > 0 and data_len >= cb_size: |
| 1307 break |
| 1308 i += 1 |
| 1309 if i == cb_count or cb_count == -1: |
| 1310 cb(data_len, cb_size) |
| 1311 i = 0 |
| 1312 if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| 1313 cb(data_len, cb_size) |
| 1314 if m: |
| 1315 self.md5 = m.hexdigest() |
| 1316 if self.size is None and not torrent and "Range" not in headers: |
| 1317 self.size = data_len |
| 1318 self.close() |
| 1319 self.bucket.connection.debug = save_debug |
| 1320 |
| 1321 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): |
| 1322 """ |
| 1323 Get a torrent file (see to get_file) |
| 1324 |
| 1325 :type fp: file |
| 1326 :param fp: The file pointer of where to put the torrent |
| 1327 |
| 1328 :type headers: dict |
| 1329 :param headers: Headers to be passed |
| 1330 |
| 1331 :type cb: function |
| 1332 :param cb: a callback function that will be called to report |
| 1333 progress on the upload. The callback should accept two |
| 1334 integer parameters, the first representing the number of |
| 1335 bytes that have been successfully transmitted to S3 and |
| 1336 the second representing the size of the to be transmitted |
| 1337 object. |
| 1338 |
| 1339 :type cb: int |
| 1340 :param num_cb: (optional) If a callback is specified with the |
| 1341 cb parameter this parameter determines the granularity of |
| 1342 the callback by defining the maximum number of times the |
| 1343 callback will be called during the file transfer. |
| 1344 |
| 1345 """ |
| 1346 return self.get_file(fp, headers, cb, num_cb, torrent=True) |
| 1347 |
| 1348 def get_contents_to_file(self, fp, headers=None, |
| 1349 cb=None, num_cb=10, |
| 1350 torrent=False, |
| 1351 version_id=None, |
| 1352 res_download_handler=None, |
| 1353 response_headers=None): |
| 1354 """ |
| 1355 Retrieve an object from S3 using the name of the Key object as the |
| 1356 key in S3. Write the contents of the object to the file pointed |
| 1357 to by 'fp'. |
| 1358 |
| 1359 :type fp: File -like object |
| 1360 :param fp: |
| 1361 |
| 1362 :type headers: dict |
| 1363 :param headers: additional HTTP headers that will be sent with |
| 1364 the GET request. |
| 1365 |
| 1366 :type cb: function |
| 1367 :param cb: a callback function that will be called to report |
| 1368 progress on the upload. The callback should accept two |
| 1369 integer parameters, the first representing the number of |
| 1370 bytes that have been successfully transmitted to S3 and |
| 1371 the second representing the size of the to be transmitted |
| 1372 object. |
| 1373 |
| 1374 :type cb: int |
| 1375 :param num_cb: (optional) If a callback is specified with the |
| 1376 cb parameter this parameter determines the granularity of |
| 1377 the callback by defining the maximum number of times the |
| 1378 callback will be called during the file transfer. |
| 1379 |
| 1380 :type torrent: bool |
| 1381 :param torrent: If True, returns the contents of a torrent |
| 1382 file as a string. |
| 1383 |
| 1384 :type res_upload_handler: ResumableDownloadHandler |
| 1385 :param res_download_handler: If provided, this handler will |
| 1386 perform the download. |
| 1387 |
| 1388 :type response_headers: dict |
| 1389 :param response_headers: A dictionary containing HTTP |
| 1390 headers/values that will override any headers associated |
| 1391 with the stored object in the response. See |
| 1392 http://goo.gl/EWOPb for details. |
| 1393 """ |
| 1394 if self.bucket != None: |
| 1395 if res_download_handler: |
| 1396 res_download_handler.get_file(self, fp, headers, cb, num_cb, |
| 1397 torrent=torrent, |
| 1398 version_id=version_id) |
| 1399 else: |
| 1400 self.get_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1401 version_id=version_id, |
| 1402 response_headers=response_headers) |
| 1403 |
| 1404 def get_contents_to_filename(self, filename, headers=None, |
| 1405 cb=None, num_cb=10, |
| 1406 torrent=False, |
| 1407 version_id=None, |
| 1408 res_download_handler=None, |
| 1409 response_headers=None): |
| 1410 """ |
| 1411 Retrieve an object from S3 using the name of the Key object as the |
| 1412 key in S3. Store contents of the object to a file named by 'filename'. |
| 1413 See get_contents_to_file method for details about the |
| 1414 parameters. |
| 1415 |
| 1416 :type filename: string |
| 1417 :param filename: The filename of where to put the file contents |
| 1418 |
| 1419 :type headers: dict |
| 1420 :param headers: Any additional headers to send in the request |
| 1421 |
| 1422 :type cb: function |
| 1423 :param cb: a callback function that will be called to report |
| 1424 progress on the upload. The callback should accept two |
| 1425 integer parameters, the first representing the number of |
| 1426 bytes that have been successfully transmitted to S3 and |
| 1427 the second representing the size of the to be transmitted |
| 1428 object. |
| 1429 |
| 1430 :type cb: int |
| 1431 :param num_cb: (optional) If a callback is specified with the |
| 1432 cb parameter this parameter determines the granularity of |
| 1433 the callback by defining the maximum number of times the |
| 1434 callback will be called during the file transfer. |
| 1435 |
| 1436 :type torrent: bool |
| 1437 :param torrent: If True, returns the contents of a torrent file |
| 1438 as a string. |
| 1439 |
| 1440 :type res_upload_handler: ResumableDownloadHandler |
| 1441 :param res_download_handler: If provided, this handler will |
| 1442 perform the download. |
| 1443 |
| 1444 :type response_headers: dict |
| 1445 :param response_headers: A dictionary containing HTTP |
| 1446 headers/values that will override any headers associated |
| 1447 with the stored object in the response. See |
| 1448 http://goo.gl/EWOPb for details. |
| 1449 """ |
| 1450 fp = open(filename, 'wb') |
| 1451 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1452 version_id=version_id, |
| 1453 res_download_handler=res_download_handler, |
| 1454 response_headers=response_headers) |
| 1455 fp.close() |
| 1456 # if last_modified date was sent from s3, try to set file's timestamp |
| 1457 if self.last_modified != None: |
| 1458 try: |
| 1459 modified_tuple = rfc822.parsedate_tz(self.last_modified) |
| 1460 modified_stamp = int(rfc822.mktime_tz(modified_tuple)) |
| 1461 os.utime(fp.name, (modified_stamp, modified_stamp)) |
| 1462 except Exception: |
| 1463 pass |
| 1464 |
| 1465 def get_contents_as_string(self, headers=None, |
| 1466 cb=None, num_cb=10, |
| 1467 torrent=False, |
| 1468 version_id=None, |
| 1469 response_headers=None): |
| 1470 """ |
| 1471 Retrieve an object from S3 using the name of the Key object as the |
| 1472 key in S3. Return the contents of the object as a string. |
| 1473 See get_contents_to_file method for details about the |
| 1474 parameters. |
| 1475 |
| 1476 :type headers: dict |
| 1477 :param headers: Any additional headers to send in the request |
| 1478 |
| 1479 :type cb: function |
| 1480 :param cb: a callback function that will be called to report |
| 1481 progress on the upload. The callback should accept two |
| 1482 integer parameters, the first representing the number of |
| 1483 bytes that have been successfully transmitted to S3 and |
| 1484 the second representing the size of the to be transmitted |
| 1485 object. |
| 1486 |
| 1487 :type cb: int |
| 1488 :param num_cb: (optional) If a callback is specified with the |
| 1489 cb parameter this parameter determines the granularity of |
| 1490 the callback by defining the maximum number of times the |
| 1491 callback will be called during the file transfer. |
| 1492 |
| 1493 :type torrent: bool |
| 1494 :param torrent: If True, returns the contents of a torrent file |
| 1495 as a string. |
| 1496 |
| 1497 :type response_headers: dict |
| 1498 :param response_headers: A dictionary containing HTTP |
| 1499 headers/values that will override any headers associated |
| 1500 with the stored object in the response. See |
| 1501 http://goo.gl/EWOPb for details. |
| 1502 |
| 1503 :rtype: string |
| 1504 :returns: The contents of the file as a string |
| 1505 """ |
| 1506 fp = StringIO.StringIO() |
| 1507 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1508 version_id=version_id, |
| 1509 response_headers=response_headers) |
| 1510 return fp.getvalue() |
| 1511 |
| 1512 def add_email_grant(self, permission, email_address, headers=None): |
| 1513 """ |
| 1514 Convenience method that provides a quick way to add an email grant |
| 1515 to a key. This method retrieves the current ACL, creates a new |
| 1516 grant based on the parameters passed in, adds that grant to the ACL |
| 1517 and then PUT's the new ACL back to S3. |
| 1518 |
| 1519 :type permission: string |
| 1520 :param permission: The permission being granted. Should be one of: |
| 1521 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| 1522 |
| 1523 :type email_address: string |
| 1524 :param email_address: The email address associated with the AWS |
| 1525 account your are granting the permission to. |
| 1526 |
| 1527 :type recursive: boolean |
| 1528 :param recursive: A boolean value to controls whether the |
| 1529 command will apply the grant to all keys within the bucket |
| 1530 or not. The default value is False. By passing a True |
| 1531 value, the call will iterate through all keys in the |
| 1532 bucket and apply the same grant to each key. CAUTION: If |
| 1533 you have a lot of keys, this could take a long time! |
| 1534 """ |
| 1535 policy = self.get_acl(headers=headers) |
| 1536 policy.acl.add_email_grant(permission, email_address) |
| 1537 self.set_acl(policy, headers=headers) |
| 1538 |
| 1539 def add_user_grant(self, permission, user_id, headers=None, |
| 1540 display_name=None): |
| 1541 """ |
| 1542 Convenience method that provides a quick way to add a canonical |
| 1543 user grant to a key. This method retrieves the current ACL, |
| 1544 creates a new grant based on the parameters passed in, adds that |
| 1545 grant to the ACL and then PUT's the new ACL back to S3. |
| 1546 |
| 1547 :type permission: string |
| 1548 :param permission: The permission being granted. Should be one of: |
| 1549 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| 1550 |
| 1551 :type user_id: string |
| 1552 :param user_id: The canonical user id associated with the AWS |
| 1553 account your are granting the permission to. |
| 1554 |
| 1555 :type display_name: string |
| 1556 :param display_name: An option string containing the user's |
| 1557 Display Name. Only required on Walrus. |
| 1558 """ |
| 1559 policy = self.get_acl(headers=headers) |
| 1560 policy.acl.add_user_grant(permission, user_id, |
| 1561 display_name=display_name) |
| 1562 self.set_acl(policy, headers=headers) |
| 1563 |
| 1564 def _normalize_metadata(self, metadata): |
| 1565 if type(metadata) == set: |
| 1566 norm_metadata = set() |
| 1567 for k in metadata: |
| 1568 norm_metadata.add(k.lower()) |
| 1569 else: |
| 1570 norm_metadata = {} |
| 1571 for k in metadata: |
| 1572 norm_metadata[k.lower()] = metadata[k] |
| 1573 return norm_metadata |
| 1574 |
| 1575 def _get_remote_metadata(self, headers=None): |
| 1576 """ |
| 1577 Extracts metadata from existing URI into a dict, so we can |
| 1578 overwrite/delete from it to form the new set of metadata to apply to a |
| 1579 key. |
| 1580 """ |
| 1581 metadata = {} |
| 1582 for underscore_name in self._underscore_base_user_settable_fields: |
| 1583 if hasattr(self, underscore_name): |
| 1584 value = getattr(self, underscore_name) |
| 1585 if value: |
| 1586 # Generate HTTP field name corresponding to "_" named field. |
| 1587 field_name = underscore_name.replace('_', '-') |
| 1588 metadata[field_name.lower()] = value |
| 1589 # self.metadata contains custom metadata, which are all user-settable. |
| 1590 prefix = self.provider.metadata_prefix |
| 1591 for underscore_name in self.metadata: |
| 1592 field_name = underscore_name.replace('_', '-') |
| 1593 metadata['%s%s' % (prefix, field_name.lower())] = ( |
| 1594 self.metadata[underscore_name]) |
| 1595 return metadata |
| 1596 |
| 1597 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, |
| 1598 headers=None): |
| 1599 metadata_plus = self._normalize_metadata(metadata_plus) |
| 1600 metadata_minus = self._normalize_metadata(metadata_minus) |
| 1601 metadata = self._get_remote_metadata() |
| 1602 metadata.update(metadata_plus) |
| 1603 for h in metadata_minus: |
| 1604 if h in metadata: |
| 1605 del metadata[h] |
| 1606 src_bucket = self.bucket |
| 1607 # Boto prepends the meta prefix when adding headers, so strip prefix in |
| 1608 # metadata before sending back in to copy_key() call. |
| 1609 rewritten_metadata = {} |
| 1610 for h in metadata: |
| 1611 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): |
| 1612 rewritten_h = (h.replace('x-goog-meta-', '') |
| 1613 .replace('x-amz-meta-', '')) |
| 1614 else: |
| 1615 rewritten_h = h |
| 1616 rewritten_metadata[rewritten_h] = metadata[h] |
| 1617 metadata = rewritten_metadata |
| 1618 src_bucket.copy_key(self.name, self.bucket.name, self.name, |
| 1619 metadata=metadata, preserve_acl=preserve_acl) |
| 1620 |
| 1621 def restore(self, days, headers=None): |
| 1622 """Restore an object from an archive. |
| 1623 |
| 1624 :type days: int |
| 1625 :param days: The lifetime of the restored object (must |
| 1626 be at least 1 day). If the object is already restored |
| 1627 then this parameter can be used to readjust the lifetime |
| 1628 of the restored object. In this case, the days |
| 1629 param is with respect to the initial time of the request. |
| 1630 If the object has not been restored, this param is with |
| 1631 respect to the completion time of the request. |
| 1632 |
| 1633 """ |
| 1634 response = self.bucket.connection.make_request( |
| 1635 'POST', self.bucket.name, self.name, |
| 1636 data=self.RestoreBody % days, |
| 1637 headers=headers, query_args='restore') |
| 1638 if response.status not in (200, 202): |
| 1639 provider = self.bucket.connection.provider |
| 1640 raise provider.storage_response_error(response.status, |
| 1641 response.reason, |
| 1642 response.read()) |
| OLD | NEW |