OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ |
| 2 # Copyright (c) 2011, Nexenta Systems Inc. |
| 3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a |
| 6 # copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- |
| 11 # lowing conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be included |
| 14 # in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 # IN THE SOFTWARE. |
| 23 |
| 24 import mimetypes |
| 25 import os |
| 26 import re |
| 27 import rfc822 |
| 28 import StringIO |
| 29 import base64 |
| 30 import binascii |
| 31 import math |
| 32 import urllib |
| 33 import boto.utils |
| 34 from boto.exception import BotoClientError |
| 35 from boto.provider import Provider |
| 36 from boto.s3.keyfile import KeyFile |
| 37 from boto.s3.user import User |
| 38 from boto import UserAgent |
| 39 from boto.utils import compute_md5 |
| 40 try: |
| 41 from hashlib import md5 |
| 42 except ImportError: |
| 43 from md5 import md5 |
| 44 |
| 45 |
| 46 class Key(object): |
| 47 """ |
| 48 Represents a key (object) in an S3 bucket. |
| 49 |
| 50 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. |
| 51 :ivar name: The name of this Key object. |
| 52 :ivar metadata: A dictionary containing user metadata that you |
| 53 wish to store with the object or that has been retrieved from |
| 54 an existing object. |
| 55 :ivar cache_control: The value of the `Cache-Control` HTTP header. |
| 56 :ivar content_type: The value of the `Content-Type` HTTP header. |
| 57 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. |
| 58 :ivar content_disposition: The value of the `Content-Disposition` HTTP |
| 59 header. |
| 60 :ivar content_language: The value of the `Content-Language` HTTP header. |
| 61 :ivar etag: The `etag` associated with this object. |
| 62 :ivar last_modified: The string timestamp representing the last |
| 63 time this object was modified in S3. |
| 64 :ivar owner: The ID of the owner of this object. |
| 65 :ivar storage_class: The storage class of the object. Currently, one of: |
| 66 STANDARD | REDUCED_REDUNDANCY | GLACIER |
| 67 :ivar md5: The MD5 hash of the contents of the object. |
| 68 :ivar size: The size, in bytes, of the object. |
| 69 :ivar version_id: The version ID of this object, if it is a versioned |
| 70 object. |
| 71 :ivar encrypted: Whether the object is encrypted while at rest on |
| 72 the server. |
| 73 """ |
| 74 |
| 75 DefaultContentType = 'application/octet-stream' |
| 76 |
| 77 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> |
| 78 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> |
| 79 <Days>%s</Days> |
| 80 </RestoreRequest>""" |
| 81 |
| 82 |
| 83 BufferSize = 8192 |
| 84 |
| 85 # The object metadata fields a user can set, other than custom metadata |
| 86 # fields (i.e., those beginning with a provider-specific prefix like |
| 87 # x-amz-meta). |
| 88 base_user_settable_fields = set(["cache-control", "content-disposition", |
| 89 "content-encoding", "content-language", |
| 90 "content-md5", "content-type"]) |
| 91 _underscore_base_user_settable_fields = set() |
| 92 for f in base_user_settable_fields: |
| 93 _underscore_base_user_settable_fields.add(f.replace('-', '_')) |
| 94 |
| 95 |
| 96 |
| 97 def __init__(self, bucket=None, name=None): |
| 98 self.bucket = bucket |
| 99 self.name = name |
| 100 self.metadata = {} |
| 101 self.cache_control = None |
| 102 self.content_type = self.DefaultContentType |
| 103 self.content_encoding = None |
| 104 self.content_disposition = None |
| 105 self.content_language = None |
| 106 self.filename = None |
| 107 self.etag = None |
| 108 self.is_latest = False |
| 109 self.last_modified = None |
| 110 self.owner = None |
| 111 self.storage_class = 'STANDARD' |
| 112 self.md5 = None |
| 113 self.base64md5 = None |
| 114 self.path = None |
| 115 self.resp = None |
| 116 self.mode = None |
| 117 self.size = None |
| 118 self.version_id = None |
| 119 self.source_version_id = None |
| 120 self.delete_marker = False |
| 121 self.encrypted = None |
| 122 # If the object is being restored, this attribute will be set to True. |
| 123 # If the object is restored, it will be set to False. Otherwise this |
| 124 # value will be None. If the restore is completed (ongoing_restore = |
| 125 # False), the expiry_date will be populated with the expiry date of the |
| 126 # restored object. |
| 127 self.ongoing_restore = None |
| 128 self.expiry_date = None |
| 129 |
| 130 def __repr__(self): |
| 131 if self.bucket: |
| 132 return '<Key: %s,%s>' % (self.bucket.name, self.name) |
| 133 else: |
| 134 return '<Key: None,%s>' % self.name |
| 135 |
| 136 def __getattr__(self, name): |
| 137 if name == 'key': |
| 138 return self.name |
| 139 else: |
| 140 raise AttributeError |
| 141 |
| 142 def __setattr__(self, name, value): |
| 143 if name == 'key': |
| 144 self.__dict__['name'] = value |
| 145 else: |
| 146 self.__dict__[name] = value |
| 147 |
| 148 def __iter__(self): |
| 149 return self |
| 150 |
| 151 @property |
| 152 def provider(self): |
| 153 provider = None |
| 154 if self.bucket and self.bucket.connection: |
| 155 provider = self.bucket.connection.provider |
| 156 return provider |
| 157 |
| 158 def get_md5_from_hexdigest(self, md5_hexdigest): |
| 159 """ |
| 160 A utility function to create the 2-tuple (md5hexdigest, base64md5) |
| 161 from just having a precalculated md5_hexdigest. |
| 162 """ |
| 163 digest = binascii.unhexlify(md5_hexdigest) |
| 164 base64md5 = base64.encodestring(digest) |
| 165 if base64md5[-1] == '\n': |
| 166 base64md5 = base64md5[0:-1] |
| 167 return (md5_hexdigest, base64md5) |
| 168 |
| 169 def handle_encryption_headers(self, resp): |
| 170 provider = self.bucket.connection.provider |
| 171 if provider.server_side_encryption_header: |
| 172 self.encrypted = resp.getheader(provider.server_side_encryption_head
er, None) |
| 173 else: |
| 174 self.encrypted = None |
| 175 |
| 176 def handle_version_headers(self, resp, force=False): |
| 177 provider = self.bucket.connection.provider |
| 178 # If the Key object already has a version_id attribute value, it |
| 179 # means that it represents an explicit version and the user is |
| 180 # doing a get_contents_*(version_id=<foo>) to retrieve another |
| 181 # version of the Key. In that case, we don't really want to |
| 182 # overwrite the version_id in this Key object. Comprende? |
| 183 if self.version_id is None or force: |
| 184 self.version_id = resp.getheader(provider.version_id, None) |
| 185 self.source_version_id = resp.getheader(provider.copy_source_version_id, |
| 186 None) |
| 187 if resp.getheader(provider.delete_marker, 'false') == 'true': |
| 188 self.delete_marker = True |
| 189 else: |
| 190 self.delete_marker = False |
| 191 |
| 192 def handle_restore_headers(self, response): |
| 193 header = response.getheader('x-amz-restore') |
| 194 if header is None: |
| 195 return |
| 196 parts = header.split(',', 1) |
| 197 for part in parts: |
| 198 key, val = [i.strip() for i in part.split('=')] |
| 199 val = val.replace('"', '') |
| 200 if key == 'ongoing-request': |
| 201 self.ongoing_restore = True if val.lower() == 'true' else False |
| 202 elif key == 'expiry-date': |
| 203 self.expiry_date = val |
| 204 |
| 205 def open_read(self, headers=None, query_args='', |
| 206 override_num_retries=None, response_headers=None): |
| 207 """ |
| 208 Open this key for reading |
| 209 |
| 210 :type headers: dict |
| 211 :param headers: Headers to pass in the web request |
| 212 |
| 213 :type query_args: string |
| 214 :param query_args: Arguments to pass in the query string |
| 215 (ie, 'torrent') |
| 216 |
| 217 :type override_num_retries: int |
| 218 :param override_num_retries: If not None will override configured |
| 219 num_retries parameter for underlying GET. |
| 220 |
| 221 :type response_headers: dict |
| 222 :param response_headers: A dictionary containing HTTP |
| 223 headers/values that will override any headers associated |
| 224 with the stored object in the response. See |
| 225 http://goo.gl/EWOPb for details. |
| 226 """ |
| 227 if self.resp == None: |
| 228 self.mode = 'r' |
| 229 |
| 230 provider = self.bucket.connection.provider |
| 231 self.resp = self.bucket.connection.make_request( |
| 232 'GET', self.bucket.name, self.name, headers, |
| 233 query_args=query_args, |
| 234 override_num_retries=override_num_retries) |
| 235 if self.resp.status < 199 or self.resp.status > 299: |
| 236 body = self.resp.read() |
| 237 raise provider.storage_response_error(self.resp.status, |
| 238 self.resp.reason, body) |
| 239 response_headers = self.resp.msg |
| 240 self.metadata = boto.utils.get_aws_metadata(response_headers, |
| 241 provider) |
| 242 for name, value in response_headers.items(): |
| 243 # To get correct size for Range GETs, use Content-Range |
| 244 # header if one was returned. If not, use Content-Length |
| 245 # header. |
| 246 if (name.lower() == 'content-length' and |
| 247 'Content-Range' not in response_headers): |
| 248 self.size = int(value) |
| 249 elif name.lower() == 'content-range': |
| 250 end_range = re.sub('.*/(.*)', '\\1', value) |
| 251 self.size = int(end_range) |
| 252 elif name.lower() == 'etag': |
| 253 self.etag = value |
| 254 elif name.lower() == 'content-type': |
| 255 self.content_type = value |
| 256 elif name.lower() == 'content-encoding': |
| 257 self.content_encoding = value |
| 258 elif name.lower() == 'content-language': |
| 259 self.content_language = value |
| 260 elif name.lower() == 'last-modified': |
| 261 self.last_modified = value |
| 262 elif name.lower() == 'cache-control': |
| 263 self.cache_control = value |
| 264 elif name.lower() == 'content-disposition': |
| 265 self.content_disposition = value |
| 266 self.handle_version_headers(self.resp) |
| 267 self.handle_encryption_headers(self.resp) |
| 268 |
| 269 def open_write(self, headers=None, override_num_retries=None): |
| 270 """ |
| 271 Open this key for writing. |
| 272 Not yet implemented |
| 273 |
| 274 :type headers: dict |
| 275 :param headers: Headers to pass in the write request |
| 276 |
| 277 :type override_num_retries: int |
| 278 :param override_num_retries: If not None will override configured |
| 279 num_retries parameter for underlying PUT. |
| 280 """ |
| 281 raise BotoClientError('Not Implemented') |
| 282 |
| 283 def open(self, mode='r', headers=None, query_args=None, |
| 284 override_num_retries=None): |
| 285 if mode == 'r': |
| 286 self.mode = 'r' |
| 287 self.open_read(headers=headers, query_args=query_args, |
| 288 override_num_retries=override_num_retries) |
| 289 elif mode == 'w': |
| 290 self.mode = 'w' |
| 291 self.open_write(headers=headers, |
| 292 override_num_retries=override_num_retries) |
| 293 else: |
| 294 raise BotoClientError('Invalid mode: %s' % mode) |
| 295 |
| 296 closed = False |
| 297 |
| 298 def close(self, fast=False): |
| 299 """ |
| 300 Close this key. |
| 301 |
| 302 :type fast: bool |
| 303 :param fast: True if you want the connection to be closed without first |
| 304 reading the content. This should only be used in cases where subsequent |
| 305 calls don't need to return the content from the open HTTP connection. |
| 306 Note: As explained at |
| 307 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.get
response, |
| 308 callers must read the whole response before sending a new request to the |
| 309 server. Calling Key.close(fast=True) and making a subsequent request to |
| 310 the server will work because boto will get an httplib exception and |
| 311 close/reopen the connection. |
| 312 |
| 313 """ |
| 314 if self.resp and not fast: |
| 315 self.resp.read() |
| 316 self.resp = None |
| 317 self.mode = None |
| 318 self.closed = True |
| 319 |
| 320 def next(self): |
| 321 """ |
| 322 By providing a next method, the key object supports use as an iterator. |
| 323 For example, you can now say: |
| 324 |
| 325 for bytes in key: |
| 326 write bytes to a file or whatever |
| 327 |
| 328 All of the HTTP connection stuff is handled for you. |
| 329 """ |
| 330 self.open_read() |
| 331 data = self.resp.read(self.BufferSize) |
| 332 if not data: |
| 333 self.close() |
| 334 raise StopIteration |
| 335 return data |
| 336 |
| 337 def read(self, size=0): |
| 338 self.open_read() |
| 339 if size == 0: |
| 340 data = self.resp.read() |
| 341 else: |
| 342 data = self.resp.read(size) |
| 343 if not data: |
| 344 self.close() |
| 345 return data |
| 346 |
| 347 def change_storage_class(self, new_storage_class, dst_bucket=None, |
| 348 validate_dst_bucket=True): |
| 349 """ |
| 350 Change the storage class of an existing key. |
| 351 Depending on whether a different destination bucket is supplied |
| 352 or not, this will either move the item within the bucket, preserving |
| 353 all metadata and ACL info bucket changing the storage class or it |
| 354 will copy the item to the provided destination bucket, also |
| 355 preserving metadata and ACL info. |
| 356 |
| 357 :type new_storage_class: string |
| 358 :param new_storage_class: The new storage class for the Key. |
| 359 Possible values are: |
| 360 * STANDARD |
| 361 * REDUCED_REDUNDANCY |
| 362 |
| 363 :type dst_bucket: string |
| 364 :param dst_bucket: The name of a destination bucket. If not |
| 365 provided the current bucket of the key will be used. |
| 366 |
| 367 :type validate_dst_bucket: bool |
| 368 :param validate_dst_bucket: If True, will validate the dst_bucket |
| 369 by using an extra list request. |
| 370 """ |
| 371 if new_storage_class == 'STANDARD': |
| 372 return self.copy(self.bucket.name, self.name, |
| 373 reduced_redundancy=False, preserve_acl=True, |
| 374 validate_dst_bucket=validate_dst_bucket) |
| 375 elif new_storage_class == 'REDUCED_REDUNDANCY': |
| 376 return self.copy(self.bucket.name, self.name, |
| 377 reduced_redundancy=True, preserve_acl=True, |
| 378 validate_dst_bucket=validate_dst_bucket) |
| 379 else: |
| 380 raise BotoClientError('Invalid storage class: %s' % |
| 381 new_storage_class) |
| 382 |
| 383 def copy(self, dst_bucket, dst_key, metadata=None, |
| 384 reduced_redundancy=False, preserve_acl=False, |
| 385 encrypt_key=False, validate_dst_bucket=True): |
| 386 """ |
| 387 Copy this Key to another bucket. |
| 388 |
| 389 :type dst_bucket: string |
| 390 :param dst_bucket: The name of the destination bucket |
| 391 |
| 392 :type dst_key: string |
| 393 :param dst_key: The name of the destination key |
| 394 |
| 395 :type metadata: dict |
| 396 :param metadata: Metadata to be associated with new key. If |
| 397 metadata is supplied, it will replace the metadata of the |
| 398 source key being copied. If no metadata is supplied, the |
| 399 source key's metadata will be copied to the new key. |
| 400 |
| 401 :type reduced_redundancy: bool |
| 402 :param reduced_redundancy: If True, this will force the |
| 403 storage class of the new Key to be REDUCED_REDUNDANCY |
| 404 regardless of the storage class of the key being copied. |
| 405 The Reduced Redundancy Storage (RRS) feature of S3, |
| 406 provides lower redundancy at lower storage cost. |
| 407 |
| 408 :type preserve_acl: bool |
| 409 :param preserve_acl: If True, the ACL from the source key will |
| 410 be copied to the destination key. If False, the |
| 411 destination key will have the default ACL. Note that |
| 412 preserving the ACL in the new key object will require two |
| 413 additional API calls to S3, one to retrieve the current |
| 414 ACL and one to set that ACL on the new object. If you |
| 415 don't care about the ACL, a value of False will be |
| 416 significantly more efficient. |
| 417 |
| 418 :type encrypt_key: bool |
| 419 :param encrypt_key: If True, the new copy of the object will |
| 420 be encrypted on the server-side by S3 and will be stored |
| 421 in an encrypted form while at rest in S3. |
| 422 |
| 423 :type validate_dst_bucket: bool |
| 424 :param validate_dst_bucket: If True, will validate the dst_bucket |
| 425 by using an extra list request. |
| 426 |
| 427 :rtype: :class:`boto.s3.key.Key` or subclass |
| 428 :returns: An instance of the newly created key object |
| 429 """ |
| 430 dst_bucket = self.bucket.connection.lookup(dst_bucket, |
| 431 validate_dst_bucket) |
| 432 if reduced_redundancy: |
| 433 storage_class = 'REDUCED_REDUNDANCY' |
| 434 else: |
| 435 storage_class = self.storage_class |
| 436 return dst_bucket.copy_key(dst_key, self.bucket.name, |
| 437 self.name, metadata, |
| 438 storage_class=storage_class, |
| 439 preserve_acl=preserve_acl, |
| 440 encrypt_key=encrypt_key) |
| 441 |
| 442 def startElement(self, name, attrs, connection): |
| 443 if name == 'Owner': |
| 444 self.owner = User(self) |
| 445 return self.owner |
| 446 else: |
| 447 return None |
| 448 |
| 449 def endElement(self, name, value, connection): |
| 450 if name == 'Key': |
| 451 self.name = value |
| 452 elif name == 'ETag': |
| 453 self.etag = value |
| 454 elif name == 'IsLatest': |
| 455 if value == 'true': |
| 456 self.is_latest = True |
| 457 else: |
| 458 self.is_latest = False |
| 459 elif name == 'LastModified': |
| 460 self.last_modified = value |
| 461 elif name == 'Size': |
| 462 self.size = int(value) |
| 463 elif name == 'StorageClass': |
| 464 self.storage_class = value |
| 465 elif name == 'Owner': |
| 466 pass |
| 467 elif name == 'VersionId': |
| 468 self.version_id = value |
| 469 else: |
| 470 setattr(self, name, value) |
| 471 |
| 472 def exists(self): |
| 473 """ |
| 474 Returns True if the key exists |
| 475 |
| 476 :rtype: bool |
| 477 :return: Whether the key exists on S3 |
| 478 """ |
| 479 return bool(self.bucket.lookup(self.name)) |
| 480 |
| 481 def delete(self): |
| 482 """ |
| 483 Delete this key from S3 |
| 484 """ |
| 485 return self.bucket.delete_key(self.name, version_id=self.version_id) |
| 486 |
| 487 def get_metadata(self, name): |
| 488 return self.metadata.get(name) |
| 489 |
| 490 def set_metadata(self, name, value): |
| 491 self.metadata[name] = value |
| 492 |
| 493 def update_metadata(self, d): |
| 494 self.metadata.update(d) |
| 495 |
| 496 # convenience methods for setting/getting ACL |
| 497 def set_acl(self, acl_str, headers=None): |
| 498 if self.bucket != None: |
| 499 self.bucket.set_acl(acl_str, self.name, headers=headers) |
| 500 |
| 501 def get_acl(self, headers=None): |
| 502 if self.bucket != None: |
| 503 return self.bucket.get_acl(self.name, headers=headers) |
| 504 |
| 505 def get_xml_acl(self, headers=None): |
| 506 if self.bucket != None: |
| 507 return self.bucket.get_xml_acl(self.name, headers=headers) |
| 508 |
| 509 def set_xml_acl(self, acl_str, headers=None): |
| 510 if self.bucket != None: |
| 511 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) |
| 512 |
| 513 def set_canned_acl(self, acl_str, headers=None): |
| 514 return self.bucket.set_canned_acl(acl_str, self.name, headers) |
| 515 |
| 516 def get_redirect(self): |
| 517 """Return the redirect location configured for this key. |
| 518 |
| 519 If no redirect is configured (via set_redirect), then None |
| 520 will be returned. |
| 521 |
| 522 """ |
| 523 response = self.bucket.connection.make_request( |
| 524 'HEAD', self.bucket.name, self.name) |
| 525 if response.status == 200: |
| 526 return response.getheader('x-amz-website-redirect-location') |
| 527 else: |
| 528 raise self.provider.storage_response_error( |
| 529 response.status, response.reason, response.read()) |
| 530 |
| 531 def set_redirect(self, redirect_location): |
| 532 """Configure this key to redirect to another location. |
| 533 |
| 534 When the bucket associated with this key is accessed from the website |
| 535 endpoint, a 301 redirect will be issued to the specified |
| 536 `redirect_location`. |
| 537 |
| 538 :type redirect_location: string |
| 539 :param redirect_location: The location to redirect. |
| 540 |
| 541 """ |
| 542 headers = {'x-amz-website-redirect-location': redirect_location} |
| 543 response = self.bucket.connection.make_request('PUT', self.bucket.name, |
| 544 self.name, headers) |
| 545 if response.status == 200: |
| 546 return True |
| 547 else: |
| 548 raise self.provider.storage_response_error( |
| 549 response.status, response.reason, response.read()) |
| 550 |
| 551 def make_public(self, headers=None): |
| 552 return self.bucket.set_canned_acl('public-read', self.name, headers) |
| 553 |
| 554 def generate_url(self, expires_in, method='GET', headers=None, |
| 555 query_auth=True, force_http=False, response_headers=None, |
| 556 expires_in_absolute=False, version_id=None, |
| 557 policy=None, reduced_redundancy=False, encrypt_key=False): |
| 558 """ |
| 559 Generate a URL to access this key. |
| 560 |
| 561 :type expires_in: int |
| 562 :param expires_in: How long the url is valid for, in seconds |
| 563 |
| 564 :type method: string |
| 565 :param method: The method to use for retrieving the file |
| 566 (default is GET) |
| 567 |
| 568 :type headers: dict |
| 569 :param headers: Any headers to pass along in the request |
| 570 |
| 571 :type query_auth: bool |
| 572 :param query_auth: |
| 573 |
| 574 :type force_http: bool |
| 575 :param force_http: If True, http will be used instead of https. |
| 576 |
| 577 :type response_headers: dict |
| 578 :param response_headers: A dictionary containing HTTP |
| 579 headers/values that will override any headers associated |
| 580 with the stored object in the response. See |
| 581 http://goo.gl/EWOPb for details. |
| 582 |
| 583 :type expires_in_absolute: bool |
| 584 :param expires_in_absolute: |
| 585 |
| 586 :type version_id: string |
| 587 :param version_id: The version_id of the object to GET. If specified |
| 588 this overrides any value in the key. |
| 589 |
| 590 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 591 :param policy: A canned ACL policy that will be applied to the |
| 592 new key in S3. |
| 593 |
| 594 :type reduced_redundancy: bool |
| 595 :param reduced_redundancy: If True, this will set the storage |
| 596 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 597 Redundancy Storage (RRS) feature of S3, provides lower |
| 598 redundancy at lower storage cost. |
| 599 |
| 600 :type encrypt_key: bool |
| 601 :param encrypt_key: If True, the new copy of the object will |
| 602 be encrypted on the server-side by S3 and will be stored |
| 603 in an encrypted form while at rest in S3. |
| 604 |
| 605 :rtype: string |
| 606 :return: The URL to access the key |
| 607 """ |
| 608 provider = self.bucket.connection.provider |
| 609 version_id = version_id or self.version_id |
| 610 if headers is None: |
| 611 headers = {} |
| 612 else: |
| 613 headers = headers.copy() |
| 614 |
| 615 # add headers accordingly (usually PUT case) |
| 616 if policy: |
| 617 headers[provider.acl_header] = policy |
| 618 if reduced_redundancy: |
| 619 self.storage_class = 'REDUCED_REDUNDANCY' |
| 620 if provider.storage_class_header: |
| 621 headers[provider.storage_class_header] = self.storage_class |
| 622 if encrypt_key: |
| 623 headers[provider.server_side_encryption_header] = 'AES256' |
| 624 headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| 625 |
| 626 return self.bucket.connection.generate_url(expires_in, method, |
| 627 self.bucket.name, self.name, |
| 628 headers, query_auth, |
| 629 force_http, |
| 630 response_headers, |
| 631 expires_in_absolute, |
| 632 version_id) |
| 633 |
| 634 def send_file(self, fp, headers=None, cb=None, num_cb=10, |
| 635 query_args=None, chunked_transfer=False, size=None): |
| 636 """ |
| 637 Upload a file to a key into a bucket on S3. |
| 638 |
| 639 :type fp: file |
| 640 :param fp: The file pointer to upload. The file pointer must |
| 641 point point at the offset from which you wish to upload. |
| 642 ie. if uploading the full file, it should point at the |
| 643 start of the file. Normally when a file is opened for |
| 644 reading, the fp will point at the first byte. See the |
| 645 bytes parameter below for more info. |
| 646 |
| 647 :type headers: dict |
| 648 :param headers: The headers to pass along with the PUT request |
| 649 |
| 650 :type cb: function |
| 651 :param cb: a callback function that will be called to report |
| 652 progress on the upload. The callback should accept two |
| 653 integer parameters, the first representing the number of |
| 654 bytes that have been successfully transmitted to S3 and |
| 655 the second representing the size of the to be transmitted |
| 656 object. |
| 657 |
| 658 :type num_cb: int |
| 659 :param num_cb: (optional) If a callback is specified with the |
| 660 cb parameter this parameter determines the granularity of |
| 661 the callback by defining the maximum number of times the |
| 662 callback will be called during the file |
| 663 transfer. Providing a negative integer will cause your |
| 664 callback to be called with each buffer read. |
| 665 |
| 666 :type size: int |
| 667 :param size: (optional) The Maximum number of bytes to read |
| 668 from the file pointer (fp). This is useful when uploading |
| 669 a file in multiple parts where you are splitting the file |
| 670 up into different ranges to be uploaded. If not specified, |
| 671 the default behaviour is to read all bytes from the file |
| 672 pointer. Less bytes may be available. |
| 673 """ |
| 674 provider = self.bucket.connection.provider |
| 675 try: |
| 676 spos = fp.tell() |
| 677 except IOError: |
| 678 spos = None |
| 679 self.read_from_stream = False |
| 680 |
| 681 def sender(http_conn, method, path, data, headers): |
| 682 # This function is called repeatedly for temporary retries |
| 683 # so we must be sure the file pointer is pointing at the |
| 684 # start of the data. |
| 685 if spos is not None and spos != fp.tell(): |
| 686 fp.seek(spos) |
| 687 elif spos is None and self.read_from_stream: |
| 688 # if seek is not supported, and we've read from this |
| 689 # stream already, then we need to abort retries to |
| 690 # avoid setting bad data. |
| 691 raise provider.storage_data_error( |
| 692 'Cannot retry failed request. fp does not support seeking.') |
| 693 |
| 694 http_conn.putrequest(method, path) |
| 695 for key in headers: |
| 696 http_conn.putheader(key, headers[key]) |
| 697 http_conn.endheaders() |
| 698 |
| 699 # Calculate all MD5 checksums on the fly, if not already computed |
| 700 if not self.base64md5: |
| 701 m = md5() |
| 702 else: |
| 703 m = None |
| 704 |
| 705 save_debug = self.bucket.connection.debug |
| 706 self.bucket.connection.debug = 0 |
| 707 # If the debuglevel < 4 we don't want to show connection |
| 708 # payload, so turn off HTTP connection-level debug output (to |
| 709 # be restored below). |
| 710 # Use the getattr approach to allow this to work in AppEngine. |
| 711 if getattr(http_conn, 'debuglevel', 0) < 4: |
| 712 http_conn.set_debuglevel(0) |
| 713 |
| 714 data_len = 0 |
| 715 if cb: |
| 716 if size: |
| 717 cb_size = size |
| 718 elif self.size: |
| 719 cb_size = self.size |
| 720 else: |
| 721 cb_size = 0 |
| 722 if chunked_transfer and cb_size == 0: |
| 723 # For chunked Transfer, we call the cb for every 1MB |
| 724 # of data transferred, except when we know size. |
| 725 cb_count = (1024 * 1024) / self.BufferSize |
| 726 elif num_cb > 1: |
| 727 cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb
- 1.0))) |
| 728 elif num_cb < 0: |
| 729 cb_count = -1 |
| 730 else: |
| 731 cb_count = 0 |
| 732 i = 0 |
| 733 cb(data_len, cb_size) |
| 734 |
| 735 bytes_togo = size |
| 736 if bytes_togo and bytes_togo < self.BufferSize: |
| 737 chunk = fp.read(bytes_togo) |
| 738 else: |
| 739 chunk = fp.read(self.BufferSize) |
| 740 if spos is None: |
| 741 # read at least something from a non-seekable fp. |
| 742 self.read_from_stream = True |
| 743 while chunk: |
| 744 chunk_len = len(chunk) |
| 745 data_len += chunk_len |
| 746 if chunked_transfer: |
| 747 http_conn.send('%x;\r\n' % chunk_len) |
| 748 http_conn.send(chunk) |
| 749 http_conn.send('\r\n') |
| 750 else: |
| 751 http_conn.send(chunk) |
| 752 if m: |
| 753 m.update(chunk) |
| 754 if bytes_togo: |
| 755 bytes_togo -= chunk_len |
| 756 if bytes_togo <= 0: |
| 757 break |
| 758 if cb: |
| 759 i += 1 |
| 760 if i == cb_count or cb_count == -1: |
| 761 cb(data_len, cb_size) |
| 762 i = 0 |
| 763 if bytes_togo and bytes_togo < self.BufferSize: |
| 764 chunk = fp.read(bytes_togo) |
| 765 else: |
| 766 chunk = fp.read(self.BufferSize) |
| 767 |
| 768 self.size = data_len |
| 769 |
| 770 if m: |
| 771 # Use the chunked trailer for the digest |
| 772 hd = m.hexdigest() |
| 773 self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd) |
| 774 |
| 775 if chunked_transfer: |
| 776 http_conn.send('0\r\n') |
| 777 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) |
| 778 http_conn.send('\r\n') |
| 779 |
| 780 if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| 781 cb(data_len, cb_size) |
| 782 |
| 783 http_conn.set_debuglevel(save_debug) |
| 784 self.bucket.connection.debug = save_debug |
| 785 response = http_conn.getresponse() |
| 786 body = response.read() |
| 787 if ((response.status == 500 or response.status == 503 or |
| 788 response.getheader('location')) and not chunked_transfer): |
| 789 # we'll try again. |
| 790 return response |
| 791 elif response.status >= 200 and response.status <= 299: |
| 792 self.etag = response.getheader('etag') |
| 793 if self.etag != '"%s"' % self.md5: |
| 794 raise provider.storage_data_error( |
| 795 'ETag from S3 did not match computed MD5') |
| 796 return response |
| 797 else: |
| 798 raise provider.storage_response_error( |
| 799 response.status, response.reason, body) |
| 800 |
| 801 if not headers: |
| 802 headers = {} |
| 803 else: |
| 804 headers = headers.copy() |
| 805 headers['User-Agent'] = UserAgent |
| 806 if self.storage_class != 'STANDARD': |
| 807 headers[provider.storage_class_header] = self.storage_class |
| 808 if 'Content-Encoding' in headers: |
| 809 self.content_encoding = headers['Content-Encoding'] |
| 810 if 'Content-Language' in headers: |
| 811 self.content_encoding = headers['Content-Language'] |
| 812 if 'Content-Type' in headers: |
| 813 # Some use cases need to suppress sending of the Content-Type |
| 814 # header and depend on the receiving server to set the content |
| 815 # type. This can be achieved by setting headers['Content-Type'] |
| 816 # to None when calling this method. |
| 817 if headers['Content-Type'] is None: |
| 818 # Delete null Content-Type value to skip sending that header. |
| 819 del headers['Content-Type'] |
| 820 else: |
| 821 self.content_type = headers['Content-Type'] |
| 822 elif self.path: |
| 823 self.content_type = mimetypes.guess_type(self.path)[0] |
| 824 if self.content_type == None: |
| 825 self.content_type = self.DefaultContentType |
| 826 headers['Content-Type'] = self.content_type |
| 827 else: |
| 828 headers['Content-Type'] = self.content_type |
| 829 if self.base64md5: |
| 830 headers['Content-MD5'] = self.base64md5 |
| 831 if chunked_transfer: |
| 832 headers['Transfer-Encoding'] = 'chunked' |
| 833 #if not self.base64md5: |
| 834 # headers['Trailer'] = "Content-MD5" |
| 835 else: |
| 836 headers['Content-Length'] = str(self.size) |
| 837 headers['Expect'] = '100-Continue' |
| 838 headers = boto.utils.merge_meta(headers, self.metadata, provider) |
| 839 resp = self.bucket.connection.make_request('PUT', self.bucket.name, |
| 840 self.name, headers, |
| 841 sender=sender, |
| 842 query_args=query_args) |
| 843 self.handle_version_headers(resp, force=True) |
| 844 |
| 845 def compute_md5(self, fp, size=None): |
| 846 """ |
| 847 :type fp: file |
| 848 :param fp: File pointer to the file to MD5 hash. The file |
| 849 pointer will be reset to the same position before the |
| 850 method returns. |
| 851 |
| 852 :type size: int |
| 853 :param size: (optional) The Maximum number of bytes to read |
| 854 from the file pointer (fp). This is useful when uploading |
| 855 a file in multiple parts where the file is being split |
| 856 inplace into different parts. Less bytes may be available. |
| 857 |
| 858 :rtype: tuple |
| 859 :return: A tuple containing the hex digest version of the MD5 |
| 860 hash as the first element and the base64 encoded version |
| 861 of the plain digest as the second element. |
| 862 """ |
| 863 tup = compute_md5(fp, size=size) |
| 864 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. |
| 865 # The internal implementation of compute_md5() needs to return the |
| 866 # data size but we don't want to return that value to the external |
| 867 # caller because it changes the class interface (i.e. it might |
| 868 # break some code) so we consume the third tuple value here and |
| 869 # return the remainder of the tuple to the caller, thereby preserving |
| 870 # the existing interface. |
| 871 self.size = tup[2] |
| 872 return tup[0:2] |
| 873 |
| 874 def set_contents_from_stream(self, fp, headers=None, replace=True, |
| 875 cb=None, num_cb=10, policy=None, |
| 876 reduced_redundancy=False, query_args=None, |
| 877 size=None): |
| 878 """ |
| 879 Store an object using the name of the Key object as the key in |
| 880 cloud and the contents of the data stream pointed to by 'fp' as |
| 881 the contents. |
| 882 |
| 883 The stream object is not seekable and total size is not known. |
| 884 This has the implication that we can't specify the |
| 885 Content-Size and Content-MD5 in the header. So for huge |
| 886 uploads, the delay in calculating MD5 is avoided but with a |
| 887 penalty of inability to verify the integrity of the uploaded |
| 888 data. |
| 889 |
| 890 :type fp: file |
| 891 :param fp: the file whose contents are to be uploaded |
| 892 |
| 893 :type headers: dict |
| 894 :param headers: additional HTTP headers to be sent with the |
| 895 PUT request. |
| 896 |
| 897 :type replace: bool |
| 898 :param replace: If this parameter is False, the method will first check |
| 899 to see if an object exists in the bucket with the same key. If it |
| 900 does, it won't overwrite it. The default value is True which will |
| 901 overwrite the object. |
| 902 |
| 903 :type cb: function |
| 904 :param cb: a callback function that will be called to report |
| 905 progress on the upload. The callback should accept two integer |
| 906 parameters, the first representing the number of bytes that have |
| 907 been successfully transmitted to GS and the second representing the |
| 908 total number of bytes that need to be transmitted. |
| 909 |
| 910 :type num_cb: int |
| 911 :param num_cb: (optional) If a callback is specified with the |
| 912 cb parameter, this parameter determines the granularity of |
| 913 the callback by defining the maximum number of times the |
| 914 callback will be called during the file transfer. |
| 915 |
| 916 :type policy: :class:`boto.gs.acl.CannedACLStrings` |
| 917 :param policy: A canned ACL policy that will be applied to the new key |
| 918 in GS. |
| 919 |
| 920 :type reduced_redundancy: bool |
| 921 :param reduced_redundancy: If True, this will set the storage |
| 922 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 923 Redundancy Storage (RRS) feature of S3, provides lower |
| 924 redundancy at lower storage cost. |
| 925 |
| 926 :type size: int |
| 927 :param size: (optional) The Maximum number of bytes to read from |
| 928 the file pointer (fp). This is useful when uploading a |
| 929 file in multiple parts where you are splitting the file up |
| 930 into different ranges to be uploaded. If not specified, |
| 931 the default behaviour is to read all bytes from the file |
| 932 pointer. Less bytes may be available. |
| 933 """ |
| 934 |
| 935 provider = self.bucket.connection.provider |
| 936 if not provider.supports_chunked_transfer(): |
| 937 raise BotoClientError('%s does not support chunked transfer' |
| 938 % provider.get_provider_name()) |
| 939 |
| 940 # Name of the Object should be specified explicitly for Streams. |
| 941 if not self.name or self.name == '': |
| 942 raise BotoClientError('Cannot determine the destination ' |
| 943 'object name for the given stream') |
| 944 |
| 945 if headers is None: |
| 946 headers = {} |
| 947 if policy: |
| 948 headers[provider.acl_header] = policy |
| 949 |
| 950 if reduced_redundancy: |
| 951 self.storage_class = 'REDUCED_REDUNDANCY' |
| 952 if provider.storage_class_header: |
| 953 headers[provider.storage_class_header] = self.storage_class |
| 954 |
| 955 if self.bucket != None: |
| 956 if not replace: |
| 957 if self.bucket.lookup(self.name): |
| 958 return |
| 959 self.send_file(fp, headers, cb, num_cb, query_args, |
| 960 chunked_transfer=True, size=size) |
| 961 |
| 962 def set_contents_from_file(self, fp, headers=None, replace=True, |
| 963 cb=None, num_cb=10, policy=None, md5=None, |
| 964 reduced_redundancy=False, query_args=None, |
| 965 encrypt_key=False, size=None, rewind=False): |
| 966 """ |
| 967 Store an object in S3 using the name of the Key object as the |
| 968 key in S3 and the contents of the file pointed to by 'fp' as the |
| 969 contents. The data is read from 'fp' from its current position until |
| 970 'size' bytes have been read or EOF. |
| 971 |
| 972 :type fp: file |
| 973 :param fp: the file whose contents to upload |
| 974 |
| 975 :type headers: dict |
| 976 :param headers: Additional HTTP headers that will be sent with |
| 977 the PUT request. |
| 978 |
| 979 :type replace: bool |
| 980 :param replace: If this parameter is False, the method will |
| 981 first check to see if an object exists in the bucket with |
| 982 the same key. If it does, it won't overwrite it. The |
| 983 default value is True which will overwrite the object. |
| 984 |
| 985 :type cb: function |
| 986 :param cb: a callback function that will be called to report |
| 987 progress on the upload. The callback should accept two |
| 988 integer parameters, the first representing the number of |
| 989 bytes that have been successfully transmitted to S3 and |
| 990 the second representing the size of the to be transmitted |
| 991 object. |
| 992 |
| 993 :type cb: int |
| 994 :param num_cb: (optional) If a callback is specified with the |
| 995 cb parameter this parameter determines the granularity of |
| 996 the callback by defining the maximum number of times the |
| 997 callback will be called during the file transfer. |
| 998 |
| 999 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 1000 :param policy: A canned ACL policy that will be applied to the |
| 1001 new key in S3. |
| 1002 |
| 1003 :type md5: A tuple containing the hexdigest version of the MD5 |
| 1004 checksum of the file as the first element and the |
| 1005 Base64-encoded version of the plain checksum as the second |
| 1006 element. This is the same format returned by the |
| 1007 compute_md5 method. |
| 1008 :param md5: If you need to compute the MD5 for any reason |
| 1009 prior to upload, it's silly to have to do it twice so this |
| 1010 param, if present, will be used as the MD5 values of the |
| 1011 file. Otherwise, the checksum will be computed. |
| 1012 |
| 1013 :type reduced_redundancy: bool |
| 1014 :param reduced_redundancy: If True, this will set the storage |
| 1015 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 1016 Redundancy Storage (RRS) feature of S3, provides lower |
| 1017 redundancy at lower storage cost. |
| 1018 |
| 1019 :type encrypt_key: bool |
| 1020 :param encrypt_key: If True, the new copy of the object will |
| 1021 be encrypted on the server-side by S3 and will be stored |
| 1022 in an encrypted form while at rest in S3. |
| 1023 |
| 1024 :type size: int |
| 1025 :param size: (optional) The Maximum number of bytes to read |
| 1026 from the file pointer (fp). This is useful when uploading |
| 1027 a file in multiple parts where you are splitting the file |
| 1028 up into different ranges to be uploaded. If not specified, |
| 1029 the default behaviour is to read all bytes from the file |
| 1030 pointer. Less bytes may be available. |
| 1031 |
| 1032 :type rewind: bool |
| 1033 :param rewind: (optional) If True, the file pointer (fp) will |
| 1034 be rewound to the start before any bytes are read from |
| 1035 it. The default behaviour is False which reads from the |
| 1036 current position of the file pointer (fp). |
| 1037 |
| 1038 :rtype: int |
| 1039 :return: The number of bytes written to the key. |
| 1040 """ |
| 1041 provider = self.bucket.connection.provider |
| 1042 headers = headers or {} |
| 1043 if policy: |
| 1044 headers[provider.acl_header] = policy |
| 1045 if encrypt_key: |
| 1046 headers[provider.server_side_encryption_header] = 'AES256' |
| 1047 |
| 1048 if rewind: |
| 1049 # caller requests reading from beginning of fp. |
| 1050 fp.seek(0, os.SEEK_SET) |
| 1051 else: |
| 1052 # The following seek/tell/seek logic is intended |
| 1053 # to detect applications using the older interface to |
| 1054 # set_contents_from_file(), which automatically rewound the |
| 1055 # file each time the Key was reused. This changed with commit |
| 1056 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads |
| 1057 # split into multiple parts and uploaded in parallel, and at |
| 1058 # the time of that commit this check was added because otherwise |
| 1059 # older programs would get a success status and upload an empty |
| 1060 # object. Unfortuantely, it's very inefficient for fp's implemented |
| 1061 # by KeyFile (used, for example, by gsutil when copying between |
| 1062 # providers). So, we skip the check for the KeyFile case. |
| 1063 # TODO: At some point consider removing this seek/tell/seek |
| 1064 # logic, after enough time has passed that it's unlikely any |
| 1065 # programs remain that assume the older auto-rewind interface. |
| 1066 if not isinstance(fp, KeyFile): |
| 1067 spos = fp.tell() |
| 1068 fp.seek(0, os.SEEK_END) |
| 1069 if fp.tell() == spos: |
| 1070 fp.seek(0, os.SEEK_SET) |
| 1071 if fp.tell() != spos: |
| 1072 # Raise an exception as this is likely a programming |
| 1073 # error whereby there is data before the fp but nothing |
| 1074 # after it. |
| 1075 fp.seek(spos) |
| 1076 raise AttributeError('fp is at EOF. Use rewind option ' |
| 1077 'or seek() to data start.') |
| 1078 # seek back to the correct position. |
| 1079 fp.seek(spos) |
| 1080 |
| 1081 if reduced_redundancy: |
| 1082 self.storage_class = 'REDUCED_REDUNDANCY' |
| 1083 if provider.storage_class_header: |
| 1084 headers[provider.storage_class_header] = self.storage_class |
| 1085 # TODO - What if provider doesn't support reduced reduncancy? |
| 1086 # What if different providers provide different classes? |
| 1087 if hasattr(fp, 'name'): |
| 1088 self.path = fp.name |
| 1089 if self.bucket != None: |
| 1090 if not md5 and provider.supports_chunked_transfer(): |
| 1091 # defer md5 calculation to on the fly and |
| 1092 # we don't know anything about size yet. |
| 1093 chunked_transfer = True |
| 1094 self.size = None |
| 1095 else: |
| 1096 chunked_transfer = False |
| 1097 if isinstance(fp, KeyFile): |
| 1098 # Avoid EOF seek for KeyFile case as it's very inefficient. |
| 1099 key = fp.getkey() |
| 1100 size = key.size - fp.tell() |
| 1101 self.size = size |
| 1102 # At present both GCS and S3 use MD5 for the etag for |
| 1103 # non-multipart-uploaded objects. If the etag is 32 hex |
| 1104 # chars use it as an MD5, to avoid having to read the file |
| 1105 # twice while transferring. |
| 1106 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): |
| 1107 etag = key.etag.strip('"') |
| 1108 md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) |
| 1109 if not md5: |
| 1110 # compute_md5() and also set self.size to actual |
| 1111 # size of the bytes read computing the md5. |
| 1112 md5 = self.compute_md5(fp, size) |
| 1113 # adjust size if required |
| 1114 size = self.size |
| 1115 elif size: |
| 1116 self.size = size |
| 1117 else: |
| 1118 # If md5 is provided, still need to size so |
| 1119 # calculate based on bytes to end of content |
| 1120 spos = fp.tell() |
| 1121 fp.seek(0, os.SEEK_END) |
| 1122 self.size = fp.tell() - spos |
| 1123 fp.seek(spos) |
| 1124 size = self.size |
| 1125 self.md5 = md5[0] |
| 1126 self.base64md5 = md5[1] |
| 1127 |
| 1128 if self.name == None: |
| 1129 self.name = self.md5 |
| 1130 if not replace: |
| 1131 if self.bucket.lookup(self.name): |
| 1132 return |
| 1133 |
| 1134 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, |
| 1135 query_args=query_args, |
| 1136 chunked_transfer=chunked_transfer, size=size) |
| 1137 # return number of bytes written. |
| 1138 return self.size |
| 1139 |
| 1140 def set_contents_from_filename(self, filename, headers=None, replace=True, |
| 1141 cb=None, num_cb=10, policy=None, md5=None, |
| 1142 reduced_redundancy=False, |
| 1143 encrypt_key=False): |
| 1144 """ |
| 1145 Store an object in S3 using the name of the Key object as the |
| 1146 key in S3 and the contents of the file named by 'filename'. |
| 1147 See set_contents_from_file method for details about the |
| 1148 parameters. |
| 1149 |
| 1150 :type filename: string |
| 1151 :param filename: The name of the file that you want to put onto S3 |
| 1152 |
| 1153 :type headers: dict |
| 1154 :param headers: Additional headers to pass along with the |
| 1155 request to AWS. |
| 1156 |
| 1157 :type replace: bool |
| 1158 :param replace: If True, replaces the contents of the file |
| 1159 if it already exists. |
| 1160 |
| 1161 :type cb: function |
| 1162 :param cb: a callback function that will be called to report |
| 1163 progress on the upload. The callback should accept two |
| 1164 integer parameters, the first representing the number of |
| 1165 bytes that have been successfully transmitted to S3 and |
| 1166 the second representing the size of the to be transmitted |
| 1167 object. |
| 1168 |
| 1169 :type cb: int |
| 1170 :param num_cb: (optional) If a callback is specified with the |
| 1171 cb parameter this parameter determines the granularity of |
| 1172 the callback by defining the maximum number of times the |
| 1173 callback will be called during the file transfer. |
| 1174 |
| 1175 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 1176 :param policy: A canned ACL policy that will be applied to the |
| 1177 new key in S3. |
| 1178 |
| 1179 :type md5: A tuple containing the hexdigest version of the MD5 |
| 1180 checksum of the file as the first element and the |
| 1181 Base64-encoded version of the plain checksum as the second |
| 1182 element. This is the same format returned by the |
| 1183 compute_md5 method. |
| 1184 :param md5: If you need to compute the MD5 for any reason |
| 1185 prior to upload, it's silly to have to do it twice so this |
| 1186 param, if present, will be used as the MD5 values of the |
| 1187 file. Otherwise, the checksum will be computed. |
| 1188 |
| 1189 :type reduced_redundancy: bool |
| 1190 :param reduced_redundancy: If True, this will set the storage |
| 1191 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 1192 Redundancy Storage (RRS) feature of S3, provides lower |
| 1193 redundancy at lower storage cost. :type encrypt_key: bool |
| 1194 :param encrypt_key: If True, the new copy of the object |
| 1195 will be encrypted on the server-side by S3 and will be |
| 1196 stored in an encrypted form while at rest in S3. |
| 1197 """ |
| 1198 fp = open(filename, 'rb') |
| 1199 try: |
| 1200 self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| 1201 policy, md5, reduced_redundancy, |
| 1202 encrypt_key=encrypt_key) |
| 1203 finally: |
| 1204 fp.close() |
| 1205 |
| 1206 def set_contents_from_string(self, s, headers=None, replace=True, |
| 1207 cb=None, num_cb=10, policy=None, md5=None, |
| 1208 reduced_redundancy=False, |
| 1209 encrypt_key=False): |
| 1210 """ |
| 1211 Store an object in S3 using the name of the Key object as the |
| 1212 key in S3 and the string 's' as the contents. |
| 1213 See set_contents_from_file method for details about the |
| 1214 parameters. |
| 1215 |
| 1216 :type headers: dict |
| 1217 :param headers: Additional headers to pass along with the |
| 1218 request to AWS. |
| 1219 |
| 1220 :type replace: bool |
| 1221 :param replace: If True, replaces the contents of the file if |
| 1222 it already exists. |
| 1223 |
| 1224 :type cb: function |
| 1225 :param cb: a callback function that will be called to report |
| 1226 progress on the upload. The callback should accept two |
| 1227 integer parameters, the first representing the number of |
| 1228 bytes that have been successfully transmitted to S3 and |
| 1229 the second representing the size of the to be transmitted |
| 1230 object. |
| 1231 |
| 1232 :type cb: int |
| 1233 :param num_cb: (optional) If a callback is specified with the |
| 1234 cb parameter this parameter determines the granularity of |
| 1235 the callback by defining the maximum number of times the |
| 1236 callback will be called during the file transfer. |
| 1237 |
| 1238 :type policy: :class:`boto.s3.acl.CannedACLStrings` |
| 1239 :param policy: A canned ACL policy that will be applied to the |
| 1240 new key in S3. |
| 1241 |
| 1242 :type md5: A tuple containing the hexdigest version of the MD5 |
| 1243 checksum of the file as the first element and the |
| 1244 Base64-encoded version of the plain checksum as the second |
| 1245 element. This is the same format returned by the |
| 1246 compute_md5 method. |
| 1247 :param md5: If you need to compute the MD5 for any reason |
| 1248 prior to upload, it's silly to have to do it twice so this |
| 1249 param, if present, will be used as the MD5 values of the |
| 1250 file. Otherwise, the checksum will be computed. |
| 1251 |
| 1252 :type reduced_redundancy: bool |
| 1253 :param reduced_redundancy: If True, this will set the storage |
| 1254 class of the new Key to be REDUCED_REDUNDANCY. The Reduced |
| 1255 Redundancy Storage (RRS) feature of S3, provides lower |
| 1256 redundancy at lower storage cost. |
| 1257 |
| 1258 :type encrypt_key: bool |
| 1259 :param encrypt_key: If True, the new copy of the object will |
| 1260 be encrypted on the server-side by S3 and will be stored |
| 1261 in an encrypted form while at rest in S3. |
| 1262 """ |
| 1263 if isinstance(s, unicode): |
| 1264 s = s.encode("utf-8") |
| 1265 fp = StringIO.StringIO(s) |
| 1266 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, |
| 1267 policy, md5, reduced_redundancy, |
| 1268 encrypt_key=encrypt_key) |
| 1269 fp.close() |
| 1270 return r |
| 1271 |
| 1272 def get_file(self, fp, headers=None, cb=None, num_cb=10, |
| 1273 torrent=False, version_id=None, override_num_retries=None, |
| 1274 response_headers=None): |
| 1275 """ |
| 1276 Retrieves a file from an S3 Key |
| 1277 |
| 1278 :type fp: file |
| 1279 :param fp: File pointer to put the data into |
| 1280 |
| 1281 :type headers: string |
| 1282 :param: headers to send when retrieving the files |
| 1283 |
| 1284 :type cb: function |
| 1285 :param cb: a callback function that will be called to report |
| 1286 progress on the upload. The callback should accept two |
| 1287 integer parameters, the first representing the number of |
| 1288 bytes that have been successfully transmitted to S3 and |
| 1289 the second representing the size of the to be transmitted |
| 1290 object. |
| 1291 |
| 1292 :type cb: int |
| 1293 :param num_cb: (optional) If a callback is specified with the |
| 1294 cb parameter this parameter determines the granularity of |
| 1295 the callback by defining the maximum number of times the |
| 1296 callback will be called during the file transfer. |
| 1297 |
| 1298 :type torrent: bool |
| 1299 :param torrent: Flag for whether to get a torrent for the file |
| 1300 |
| 1301 :type override_num_retries: int |
| 1302 :param override_num_retries: If not None will override configured |
| 1303 num_retries parameter for underlying GET. |
| 1304 |
| 1305 :type response_headers: dict |
| 1306 :param response_headers: A dictionary containing HTTP |
| 1307 headers/values that will override any headers associated |
| 1308 with the stored object in the response. See |
| 1309 http://goo.gl/EWOPb for details. |
| 1310 """ |
| 1311 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, |
| 1312 torrent=torrent, version_id=version_id, |
| 1313 override_num_retries=override_num_retries, |
| 1314 response_headers=response_headers, |
| 1315 query_args=None) |
| 1316 |
| 1317 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, |
| 1318 torrent=False, version_id=None, override_num_retries=None, |
| 1319 response_headers=None, query_args=None): |
| 1320 if headers is None: |
| 1321 headers = {} |
| 1322 save_debug = self.bucket.connection.debug |
| 1323 if self.bucket.connection.debug == 1: |
| 1324 self.bucket.connection.debug = 0 |
| 1325 |
| 1326 query_args = query_args or [] |
| 1327 if torrent: |
| 1328 query_args.append('torrent') |
| 1329 m = None |
| 1330 else: |
| 1331 m = md5() |
| 1332 # If a version_id is passed in, use that. If not, check to see |
| 1333 # if the Key object has an explicit version_id and, if so, use that. |
| 1334 # Otherwise, don't pass a version_id query param. |
| 1335 if version_id is None: |
| 1336 version_id = self.version_id |
| 1337 if version_id: |
| 1338 query_args.append('versionId=%s' % version_id) |
| 1339 if response_headers: |
| 1340 for key in response_headers: |
| 1341 query_args.append('%s=%s' % (key, urllib.quote(response_headers[
key]))) |
| 1342 query_args = '&'.join(query_args) |
| 1343 self.open('r', headers, query_args=query_args, |
| 1344 override_num_retries=override_num_retries) |
| 1345 |
| 1346 data_len = 0 |
| 1347 if cb: |
| 1348 if self.size is None: |
| 1349 cb_size = 0 |
| 1350 else: |
| 1351 cb_size = self.size |
| 1352 if self.size is None and num_cb != -1: |
| 1353 # If size is not available due to chunked transfer for example, |
| 1354 # we'll call the cb for every 1MB of data transferred. |
| 1355 cb_count = (1024 * 1024) / self.BufferSize |
| 1356 elif num_cb > 1: |
| 1357 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) |
| 1358 elif num_cb < 0: |
| 1359 cb_count = -1 |
| 1360 else: |
| 1361 cb_count = 0 |
| 1362 i = 0 |
| 1363 cb(data_len, cb_size) |
| 1364 for bytes in self: |
| 1365 fp.write(bytes) |
| 1366 data_len += len(bytes) |
| 1367 if m: |
| 1368 m.update(bytes) |
| 1369 if cb: |
| 1370 if cb_size > 0 and data_len >= cb_size: |
| 1371 break |
| 1372 i += 1 |
| 1373 if i == cb_count or cb_count == -1: |
| 1374 cb(data_len, cb_size) |
| 1375 i = 0 |
| 1376 if cb and (cb_count <= 1 or i > 0) and data_len > 0: |
| 1377 cb(data_len, cb_size) |
| 1378 if m: |
| 1379 self.md5 = m.hexdigest() |
| 1380 if self.size is None and not torrent and "Range" not in headers: |
| 1381 self.size = data_len |
| 1382 self.close() |
| 1383 self.bucket.connection.debug = save_debug |
| 1384 |
| 1385 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): |
| 1386 """ |
| 1387 Get a torrent file (see to get_file) |
| 1388 |
| 1389 :type fp: file |
| 1390 :param fp: The file pointer of where to put the torrent |
| 1391 |
| 1392 :type headers: dict |
| 1393 :param headers: Headers to be passed |
| 1394 |
| 1395 :type cb: function |
| 1396 :param cb: a callback function that will be called to report |
| 1397 progress on the upload. The callback should accept two |
| 1398 integer parameters, the first representing the number of |
| 1399 bytes that have been successfully transmitted to S3 and |
| 1400 the second representing the size of the to be transmitted |
| 1401 object. |
| 1402 |
| 1403 :type cb: int |
| 1404 :param num_cb: (optional) If a callback is specified with the |
| 1405 cb parameter this parameter determines the granularity of |
| 1406 the callback by defining the maximum number of times the |
| 1407 callback will be called during the file transfer. |
| 1408 |
| 1409 """ |
| 1410 return self.get_file(fp, headers, cb, num_cb, torrent=True) |
| 1411 |
| 1412 def get_contents_to_file(self, fp, headers=None, |
| 1413 cb=None, num_cb=10, |
| 1414 torrent=False, |
| 1415 version_id=None, |
| 1416 res_download_handler=None, |
| 1417 response_headers=None): |
| 1418 """ |
| 1419 Retrieve an object from S3 using the name of the Key object as the |
| 1420 key in S3. Write the contents of the object to the file pointed |
| 1421 to by 'fp'. |
| 1422 |
| 1423 :type fp: File -like object |
| 1424 :param fp: |
| 1425 |
| 1426 :type headers: dict |
| 1427 :param headers: additional HTTP headers that will be sent with |
| 1428 the GET request. |
| 1429 |
| 1430 :type cb: function |
| 1431 :param cb: a callback function that will be called to report |
| 1432 progress on the upload. The callback should accept two |
| 1433 integer parameters, the first representing the number of |
| 1434 bytes that have been successfully transmitted to S3 and |
| 1435 the second representing the size of the to be transmitted |
| 1436 object. |
| 1437 |
| 1438 :type cb: int |
| 1439 :param num_cb: (optional) If a callback is specified with the |
| 1440 cb parameter this parameter determines the granularity of |
| 1441 the callback by defining the maximum number of times the |
| 1442 callback will be called during the file transfer. |
| 1443 |
| 1444 :type torrent: bool |
| 1445 :param torrent: If True, returns the contents of a torrent |
| 1446 file as a string. |
| 1447 |
| 1448 :type res_upload_handler: ResumableDownloadHandler |
| 1449 :param res_download_handler: If provided, this handler will |
| 1450 perform the download. |
| 1451 |
| 1452 :type response_headers: dict |
| 1453 :param response_headers: A dictionary containing HTTP |
| 1454 headers/values that will override any headers associated |
| 1455 with the stored object in the response. See |
| 1456 http://goo.gl/EWOPb for details. |
| 1457 """ |
| 1458 if self.bucket != None: |
| 1459 if res_download_handler: |
| 1460 res_download_handler.get_file(self, fp, headers, cb, num_cb, |
| 1461 torrent=torrent, |
| 1462 version_id=version_id) |
| 1463 else: |
| 1464 self.get_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1465 version_id=version_id, |
| 1466 response_headers=response_headers) |
| 1467 |
| 1468 def get_contents_to_filename(self, filename, headers=None, |
| 1469 cb=None, num_cb=10, |
| 1470 torrent=False, |
| 1471 version_id=None, |
| 1472 res_download_handler=None, |
| 1473 response_headers=None): |
| 1474 """ |
| 1475 Retrieve an object from S3 using the name of the Key object as the |
| 1476 key in S3. Store contents of the object to a file named by 'filename'. |
| 1477 See get_contents_to_file method for details about the |
| 1478 parameters. |
| 1479 |
| 1480 :type filename: string |
| 1481 :param filename: The filename of where to put the file contents |
| 1482 |
| 1483 :type headers: dict |
| 1484 :param headers: Any additional headers to send in the request |
| 1485 |
| 1486 :type cb: function |
| 1487 :param cb: a callback function that will be called to report |
| 1488 progress on the upload. The callback should accept two |
| 1489 integer parameters, the first representing the number of |
| 1490 bytes that have been successfully transmitted to S3 and |
| 1491 the second representing the size of the to be transmitted |
| 1492 object. |
| 1493 |
| 1494 :type cb: int |
| 1495 :param num_cb: (optional) If a callback is specified with the |
| 1496 cb parameter this parameter determines the granularity of |
| 1497 the callback by defining the maximum number of times the |
| 1498 callback will be called during the file transfer. |
| 1499 |
| 1500 :type torrent: bool |
| 1501 :param torrent: If True, returns the contents of a torrent file |
| 1502 as a string. |
| 1503 |
| 1504 :type res_upload_handler: ResumableDownloadHandler |
| 1505 :param res_download_handler: If provided, this handler will |
| 1506 perform the download. |
| 1507 |
| 1508 :type response_headers: dict |
| 1509 :param response_headers: A dictionary containing HTTP |
| 1510 headers/values that will override any headers associated |
| 1511 with the stored object in the response. See |
| 1512 http://goo.gl/EWOPb for details. |
| 1513 """ |
| 1514 fp = open(filename, 'wb') |
| 1515 try: |
| 1516 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1517 version_id=version_id, |
| 1518 res_download_handler=res_download_handler, |
| 1519 response_headers=response_headers) |
| 1520 except Exception: |
| 1521 os.remove(filename) |
| 1522 raise |
| 1523 finally: |
| 1524 fp.close() |
| 1525 # if last_modified date was sent from s3, try to set file's timestamp |
| 1526 if self.last_modified != None: |
| 1527 try: |
| 1528 modified_tuple = rfc822.parsedate_tz(self.last_modified) |
| 1529 modified_stamp = int(rfc822.mktime_tz(modified_tuple)) |
| 1530 os.utime(fp.name, (modified_stamp, modified_stamp)) |
| 1531 except Exception: |
| 1532 pass |
| 1533 |
| 1534 def get_contents_as_string(self, headers=None, |
| 1535 cb=None, num_cb=10, |
| 1536 torrent=False, |
| 1537 version_id=None, |
| 1538 response_headers=None): |
| 1539 """ |
| 1540 Retrieve an object from S3 using the name of the Key object as the |
| 1541 key in S3. Return the contents of the object as a string. |
| 1542 See get_contents_to_file method for details about the |
| 1543 parameters. |
| 1544 |
| 1545 :type headers: dict |
| 1546 :param headers: Any additional headers to send in the request |
| 1547 |
| 1548 :type cb: function |
| 1549 :param cb: a callback function that will be called to report |
| 1550 progress on the upload. The callback should accept two |
| 1551 integer parameters, the first representing the number of |
| 1552 bytes that have been successfully transmitted to S3 and |
| 1553 the second representing the size of the to be transmitted |
| 1554 object. |
| 1555 |
| 1556 :type cb: int |
| 1557 :param num_cb: (optional) If a callback is specified with the |
| 1558 cb parameter this parameter determines the granularity of |
| 1559 the callback by defining the maximum number of times the |
| 1560 callback will be called during the file transfer. |
| 1561 |
| 1562 :type torrent: bool |
| 1563 :param torrent: If True, returns the contents of a torrent file |
| 1564 as a string. |
| 1565 |
| 1566 :type response_headers: dict |
| 1567 :param response_headers: A dictionary containing HTTP |
| 1568 headers/values that will override any headers associated |
| 1569 with the stored object in the response. See |
| 1570 http://goo.gl/EWOPb for details. |
| 1571 |
| 1572 :rtype: string |
| 1573 :returns: The contents of the file as a string |
| 1574 """ |
| 1575 fp = StringIO.StringIO() |
| 1576 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, |
| 1577 version_id=version_id, |
| 1578 response_headers=response_headers) |
| 1579 return fp.getvalue() |
| 1580 |
| 1581 def add_email_grant(self, permission, email_address, headers=None): |
| 1582 """ |
| 1583 Convenience method that provides a quick way to add an email grant |
| 1584 to a key. This method retrieves the current ACL, creates a new |
| 1585 grant based on the parameters passed in, adds that grant to the ACL |
| 1586 and then PUT's the new ACL back to S3. |
| 1587 |
| 1588 :type permission: string |
| 1589 :param permission: The permission being granted. Should be one of: |
| 1590 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| 1591 |
| 1592 :type email_address: string |
| 1593 :param email_address: The email address associated with the AWS |
| 1594 account your are granting the permission to. |
| 1595 |
| 1596 :type recursive: boolean |
| 1597 :param recursive: A boolean value to controls whether the |
| 1598 command will apply the grant to all keys within the bucket |
| 1599 or not. The default value is False. By passing a True |
| 1600 value, the call will iterate through all keys in the |
| 1601 bucket and apply the same grant to each key. CAUTION: If |
| 1602 you have a lot of keys, this could take a long time! |
| 1603 """ |
| 1604 policy = self.get_acl(headers=headers) |
| 1605 policy.acl.add_email_grant(permission, email_address) |
| 1606 self.set_acl(policy, headers=headers) |
| 1607 |
| 1608 def add_user_grant(self, permission, user_id, headers=None, |
| 1609 display_name=None): |
| 1610 """ |
| 1611 Convenience method that provides a quick way to add a canonical |
| 1612 user grant to a key. This method retrieves the current ACL, |
| 1613 creates a new grant based on the parameters passed in, adds that |
| 1614 grant to the ACL and then PUT's the new ACL back to S3. |
| 1615 |
| 1616 :type permission: string |
| 1617 :param permission: The permission being granted. Should be one of: |
| 1618 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). |
| 1619 |
| 1620 :type user_id: string |
| 1621 :param user_id: The canonical user id associated with the AWS |
| 1622 account your are granting the permission to. |
| 1623 |
| 1624 :type display_name: string |
| 1625 :param display_name: An option string containing the user's |
| 1626 Display Name. Only required on Walrus. |
| 1627 """ |
| 1628 policy = self.get_acl(headers=headers) |
| 1629 policy.acl.add_user_grant(permission, user_id, |
| 1630 display_name=display_name) |
| 1631 self.set_acl(policy, headers=headers) |
| 1632 |
| 1633 def _normalize_metadata(self, metadata): |
| 1634 if type(metadata) == set: |
| 1635 norm_metadata = set() |
| 1636 for k in metadata: |
| 1637 norm_metadata.add(k.lower()) |
| 1638 else: |
| 1639 norm_metadata = {} |
| 1640 for k in metadata: |
| 1641 norm_metadata[k.lower()] = metadata[k] |
| 1642 return norm_metadata |
| 1643 |
| 1644 def _get_remote_metadata(self, headers=None): |
| 1645 """ |
| 1646 Extracts metadata from existing URI into a dict, so we can |
| 1647 overwrite/delete from it to form the new set of metadata to apply to a |
| 1648 key. |
| 1649 """ |
| 1650 metadata = {} |
| 1651 for underscore_name in self._underscore_base_user_settable_fields: |
| 1652 if hasattr(self, underscore_name): |
| 1653 value = getattr(self, underscore_name) |
| 1654 if value: |
| 1655 # Generate HTTP field name corresponding to "_" named field. |
| 1656 field_name = underscore_name.replace('_', '-') |
| 1657 metadata[field_name.lower()] = value |
| 1658 # self.metadata contains custom metadata, which are all user-settable. |
| 1659 prefix = self.provider.metadata_prefix |
| 1660 for underscore_name in self.metadata: |
| 1661 field_name = underscore_name.replace('_', '-') |
| 1662 metadata['%s%s' % (prefix, field_name.lower())] = ( |
| 1663 self.metadata[underscore_name]) |
| 1664 return metadata |
| 1665 |
| 1666 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, |
| 1667 headers=None): |
| 1668 metadata_plus = self._normalize_metadata(metadata_plus) |
| 1669 metadata_minus = self._normalize_metadata(metadata_minus) |
| 1670 metadata = self._get_remote_metadata() |
| 1671 metadata.update(metadata_plus) |
| 1672 for h in metadata_minus: |
| 1673 if h in metadata: |
| 1674 del metadata[h] |
| 1675 src_bucket = self.bucket |
| 1676 # Boto prepends the meta prefix when adding headers, so strip prefix in |
| 1677 # metadata before sending back in to copy_key() call. |
| 1678 rewritten_metadata = {} |
| 1679 for h in metadata: |
| 1680 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): |
| 1681 rewritten_h = (h.replace('x-goog-meta-', '') |
| 1682 .replace('x-amz-meta-', '')) |
| 1683 else: |
| 1684 rewritten_h = h |
| 1685 rewritten_metadata[rewritten_h] = metadata[h] |
| 1686 metadata = rewritten_metadata |
| 1687 src_bucket.copy_key(self.name, self.bucket.name, self.name, |
| 1688 metadata=metadata, preserve_acl=preserve_acl, |
| 1689 headers=headers) |
| 1690 |
| 1691 def restore(self, days, headers=None): |
| 1692 """Restore an object from an archive. |
| 1693 |
| 1694 :type days: int |
| 1695 :param days: The lifetime of the restored object (must |
| 1696 be at least 1 day). If the object is already restored |
| 1697 then this parameter can be used to readjust the lifetime |
| 1698 of the restored object. In this case, the days |
| 1699 param is with respect to the initial time of the request. |
| 1700 If the object has not been restored, this param is with |
| 1701 respect to the completion time of the request. |
| 1702 |
| 1703 """ |
| 1704 response = self.bucket.connection.make_request( |
| 1705 'POST', self.bucket.name, self.name, |
| 1706 data=self.RestoreBody % days, |
| 1707 headers=headers, query_args='restore') |
| 1708 if response.status not in (200, 202): |
| 1709 provider = self.bucket.connection.provider |
| 1710 raise provider.storage_response_error(response.status, |
| 1711 response.reason, |
| 1712 response.read()) |
OLD | NEW |