| Index: third_party/boto/s3/key.py
|
| diff --git a/third_party/boto/s3/key.py b/third_party/boto/s3/key.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d4a1a5f565e383b0a047aecad139f242e1e038cb
|
| --- /dev/null
|
| +++ b/third_party/boto/s3/key.py
|
| @@ -0,0 +1,1712 @@
|
| +# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
|
| +# Copyright (c) 2011, Nexenta Systems Inc.
|
| +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
|
| +#
|
| +# Permission is hereby granted, free of charge, to any person obtaining a
|
| +# copy of this software and associated documentation files (the
|
| +# "Software"), to deal in the Software without restriction, including
|
| +# without limitation the rights to use, copy, modify, merge, publish, dis-
|
| +# tribute, sublicense, and/or sell copies of the Software, and to permit
|
| +# persons to whom the Software is furnished to do so, subject to the fol-
|
| +# lowing conditions:
|
| +#
|
| +# The above copyright notice and this permission notice shall be included
|
| +# in all copies or substantial portions of the Software.
|
| +#
|
| +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
| +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
| +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
| +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
| +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
| +# IN THE SOFTWARE.
|
| +
|
| +import mimetypes
|
| +import os
|
| +import re
|
| +import rfc822
|
| +import StringIO
|
| +import base64
|
| +import binascii
|
| +import math
|
| +import urllib
|
| +import boto.utils
|
| +from boto.exception import BotoClientError
|
| +from boto.provider import Provider
|
| +from boto.s3.keyfile import KeyFile
|
| +from boto.s3.user import User
|
| +from boto import UserAgent
|
| +from boto.utils import compute_md5
|
| +try:
|
| + from hashlib import md5
|
| +except ImportError:
|
| + from md5 import md5
|
| +
|
| +
|
| +class Key(object):
|
| + """
|
| + Represents a key (object) in an S3 bucket.
|
| +
|
| + :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.
|
| + :ivar name: The name of this Key object.
|
| + :ivar metadata: A dictionary containing user metadata that you
|
| + wish to store with the object or that has been retrieved from
|
| + an existing object.
|
| + :ivar cache_control: The value of the `Cache-Control` HTTP header.
|
| + :ivar content_type: The value of the `Content-Type` HTTP header.
|
| + :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
|
| + :ivar content_disposition: The value of the `Content-Disposition` HTTP
|
| + header.
|
| + :ivar content_language: The value of the `Content-Language` HTTP header.
|
| + :ivar etag: The `etag` associated with this object.
|
| + :ivar last_modified: The string timestamp representing the last
|
| + time this object was modified in S3.
|
| + :ivar owner: The ID of the owner of this object.
|
| + :ivar storage_class: The storage class of the object. Currently, one of:
|
| + STANDARD | REDUCED_REDUNDANCY | GLACIER
|
| + :ivar md5: The MD5 hash of the contents of the object.
|
| + :ivar size: The size, in bytes, of the object.
|
| + :ivar version_id: The version ID of this object, if it is a versioned
|
| + object.
|
| + :ivar encrypted: Whether the object is encrypted while at rest on
|
| + the server.
|
| + """
|
| +
|
| + DefaultContentType = 'application/octet-stream'
|
| +
|
| + RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>
|
| + <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">
|
| + <Days>%s</Days>
|
| + </RestoreRequest>"""
|
| +
|
| +
|
| + BufferSize = 8192
|
| +
|
| + # The object metadata fields a user can set, other than custom metadata
|
| + # fields (i.e., those beginning with a provider-specific prefix like
|
| + # x-amz-meta).
|
| + base_user_settable_fields = set(["cache-control", "content-disposition",
|
| + "content-encoding", "content-language",
|
| + "content-md5", "content-type"])
|
| + _underscore_base_user_settable_fields = set()
|
| + for f in base_user_settable_fields:
|
| + _underscore_base_user_settable_fields.add(f.replace('-', '_'))
|
| +
|
| +
|
| +
|
| + def __init__(self, bucket=None, name=None):
|
| + self.bucket = bucket
|
| + self.name = name
|
| + self.metadata = {}
|
| + self.cache_control = None
|
| + self.content_type = self.DefaultContentType
|
| + self.content_encoding = None
|
| + self.content_disposition = None
|
| + self.content_language = None
|
| + self.filename = None
|
| + self.etag = None
|
| + self.is_latest = False
|
| + self.last_modified = None
|
| + self.owner = None
|
| + self.storage_class = 'STANDARD'
|
| + self.md5 = None
|
| + self.base64md5 = None
|
| + self.path = None
|
| + self.resp = None
|
| + self.mode = None
|
| + self.size = None
|
| + self.version_id = None
|
| + self.source_version_id = None
|
| + self.delete_marker = False
|
| + self.encrypted = None
|
| + # If the object is being restored, this attribute will be set to True.
|
| + # If the object is restored, it will be set to False. Otherwise this
|
| + # value will be None. If the restore is completed (ongoing_restore =
|
| + # False), the expiry_date will be populated with the expiry date of the
|
| + # restored object.
|
| + self.ongoing_restore = None
|
| + self.expiry_date = None
|
| +
|
| + def __repr__(self):
|
| + if self.bucket:
|
| + return '<Key: %s,%s>' % (self.bucket.name, self.name)
|
| + else:
|
| + return '<Key: None,%s>' % self.name
|
| +
|
| + def __getattr__(self, name):
|
| + if name == 'key':
|
| + return self.name
|
| + else:
|
| + raise AttributeError
|
| +
|
| + def __setattr__(self, name, value):
|
| + if name == 'key':
|
| + self.__dict__['name'] = value
|
| + else:
|
| + self.__dict__[name] = value
|
| +
|
| + def __iter__(self):
|
| + return self
|
| +
|
| + @property
|
| + def provider(self):
|
| + provider = None
|
| + if self.bucket and self.bucket.connection:
|
| + provider = self.bucket.connection.provider
|
| + return provider
|
| +
|
| + def get_md5_from_hexdigest(self, md5_hexdigest):
|
| + """
|
| + A utility function to create the 2-tuple (md5hexdigest, base64md5)
|
| + from just having a precalculated md5_hexdigest.
|
| + """
|
| + digest = binascii.unhexlify(md5_hexdigest)
|
| + base64md5 = base64.encodestring(digest)
|
| + if base64md5[-1] == '\n':
|
| + base64md5 = base64md5[0:-1]
|
| + return (md5_hexdigest, base64md5)
|
| +
|
| + def handle_encryption_headers(self, resp):
|
| + provider = self.bucket.connection.provider
|
| + if provider.server_side_encryption_header:
|
| + self.encrypted = resp.getheader(provider.server_side_encryption_header, None)
|
| + else:
|
| + self.encrypted = None
|
| +
|
| + def handle_version_headers(self, resp, force=False):
|
| + provider = self.bucket.connection.provider
|
| + # If the Key object already has a version_id attribute value, it
|
| + # means that it represents an explicit version and the user is
|
| + # doing a get_contents_*(version_id=<foo>) to retrieve another
|
| + # version of the Key. In that case, we don't really want to
|
| + # overwrite the version_id in this Key object. Comprende?
|
| + if self.version_id is None or force:
|
| + self.version_id = resp.getheader(provider.version_id, None)
|
| + self.source_version_id = resp.getheader(provider.copy_source_version_id,
|
| + None)
|
| + if resp.getheader(provider.delete_marker, 'false') == 'true':
|
| + self.delete_marker = True
|
| + else:
|
| + self.delete_marker = False
|
| +
|
| + def handle_restore_headers(self, response):
|
| + header = response.getheader('x-amz-restore')
|
| + if header is None:
|
| + return
|
| + parts = header.split(',', 1)
|
| + for part in parts:
|
| + key, val = [i.strip() for i in part.split('=')]
|
| + val = val.replace('"', '')
|
| + if key == 'ongoing-request':
|
| + self.ongoing_restore = True if val.lower() == 'true' else False
|
| + elif key == 'expiry-date':
|
| + self.expiry_date = val
|
| +
|
| + def open_read(self, headers=None, query_args='',
|
| + override_num_retries=None, response_headers=None):
|
| + """
|
| + Open this key for reading
|
| +
|
| + :type headers: dict
|
| + :param headers: Headers to pass in the web request
|
| +
|
| + :type query_args: string
|
| + :param query_args: Arguments to pass in the query string
|
| + (ie, 'torrent')
|
| +
|
| + :type override_num_retries: int
|
| + :param override_num_retries: If not None will override configured
|
| + num_retries parameter for underlying GET.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| + """
|
| + if self.resp == None:
|
| + self.mode = 'r'
|
| +
|
| + provider = self.bucket.connection.provider
|
| + self.resp = self.bucket.connection.make_request(
|
| + 'GET', self.bucket.name, self.name, headers,
|
| + query_args=query_args,
|
| + override_num_retries=override_num_retries)
|
| + if self.resp.status < 199 or self.resp.status > 299:
|
| + body = self.resp.read()
|
| + raise provider.storage_response_error(self.resp.status,
|
| + self.resp.reason, body)
|
| + response_headers = self.resp.msg
|
| + self.metadata = boto.utils.get_aws_metadata(response_headers,
|
| + provider)
|
| + for name, value in response_headers.items():
|
| + # To get correct size for Range GETs, use Content-Range
|
| + # header if one was returned. If not, use Content-Length
|
| + # header.
|
| + if (name.lower() == 'content-length' and
|
| + 'Content-Range' not in response_headers):
|
| + self.size = int(value)
|
| + elif name.lower() == 'content-range':
|
| + end_range = re.sub('.*/(.*)', '\\1', value)
|
| + self.size = int(end_range)
|
| + elif name.lower() == 'etag':
|
| + self.etag = value
|
| + elif name.lower() == 'content-type':
|
| + self.content_type = value
|
| + elif name.lower() == 'content-encoding':
|
| + self.content_encoding = value
|
| + elif name.lower() == 'content-language':
|
| + self.content_language = value
|
| + elif name.lower() == 'last-modified':
|
| + self.last_modified = value
|
| + elif name.lower() == 'cache-control':
|
| + self.cache_control = value
|
| + elif name.lower() == 'content-disposition':
|
| + self.content_disposition = value
|
| + self.handle_version_headers(self.resp)
|
| + self.handle_encryption_headers(self.resp)
|
| +
|
| + def open_write(self, headers=None, override_num_retries=None):
|
| + """
|
| + Open this key for writing.
|
| + Not yet implemented
|
| +
|
| + :type headers: dict
|
| + :param headers: Headers to pass in the write request
|
| +
|
| + :type override_num_retries: int
|
| + :param override_num_retries: If not None will override configured
|
| + num_retries parameter for underlying PUT.
|
| + """
|
| + raise BotoClientError('Not Implemented')
|
| +
|
| + def open(self, mode='r', headers=None, query_args=None,
|
| + override_num_retries=None):
|
| + if mode == 'r':
|
| + self.mode = 'r'
|
| + self.open_read(headers=headers, query_args=query_args,
|
| + override_num_retries=override_num_retries)
|
| + elif mode == 'w':
|
| + self.mode = 'w'
|
| + self.open_write(headers=headers,
|
| + override_num_retries=override_num_retries)
|
| + else:
|
| + raise BotoClientError('Invalid mode: %s' % mode)
|
| +
|
| + closed = False
|
| +
|
| + def close(self, fast=False):
|
| + """
|
| + Close this key.
|
| +
|
| + :type fast: bool
|
| + :param fast: True if you want the connection to be closed without first
|
| + reading the content. This should only be used in cases where subsequent
|
| + calls don't need to return the content from the open HTTP connection.
|
| + Note: As explained at
|
| + http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
|
| + callers must read the whole response before sending a new request to the
|
| + server. Calling Key.close(fast=True) and making a subsequent request to
|
| + the server will work because boto will get an httplib exception and
|
| + close/reopen the connection.
|
| +
|
| + """
|
| + if self.resp and not fast:
|
| + self.resp.read()
|
| + self.resp = None
|
| + self.mode = None
|
| + self.closed = True
|
| +
|
| + def next(self):
|
| + """
|
| + By providing a next method, the key object supports use as an iterator.
|
| + For example, you can now say:
|
| +
|
| + for bytes in key:
|
| + write bytes to a file or whatever
|
| +
|
| + All of the HTTP connection stuff is handled for you.
|
| + """
|
| + self.open_read()
|
| + data = self.resp.read(self.BufferSize)
|
| + if not data:
|
| + self.close()
|
| + raise StopIteration
|
| + return data
|
| +
|
| + def read(self, size=0):
|
| + self.open_read()
|
| + if size == 0:
|
| + data = self.resp.read()
|
| + else:
|
| + data = self.resp.read(size)
|
| + if not data:
|
| + self.close()
|
| + return data
|
| +
|
| + def change_storage_class(self, new_storage_class, dst_bucket=None,
|
| + validate_dst_bucket=True):
|
| + """
|
| + Change the storage class of an existing key.
|
| + Depending on whether a different destination bucket is supplied
|
| + or not, this will either move the item within the bucket, preserving
|
| + all metadata and ACL info bucket changing the storage class or it
|
| + will copy the item to the provided destination bucket, also
|
| + preserving metadata and ACL info.
|
| +
|
| + :type new_storage_class: string
|
| + :param new_storage_class: The new storage class for the Key.
|
| + Possible values are:
|
| + * STANDARD
|
| + * REDUCED_REDUNDANCY
|
| +
|
| + :type dst_bucket: string
|
| + :param dst_bucket: The name of a destination bucket. If not
|
| + provided the current bucket of the key will be used.
|
| +
|
| + :type validate_dst_bucket: bool
|
| + :param validate_dst_bucket: If True, will validate the dst_bucket
|
| + by using an extra list request.
|
| + """
|
| + if new_storage_class == 'STANDARD':
|
| + return self.copy(self.bucket.name, self.name,
|
| + reduced_redundancy=False, preserve_acl=True,
|
| + validate_dst_bucket=validate_dst_bucket)
|
| + elif new_storage_class == 'REDUCED_REDUNDANCY':
|
| + return self.copy(self.bucket.name, self.name,
|
| + reduced_redundancy=True, preserve_acl=True,
|
| + validate_dst_bucket=validate_dst_bucket)
|
| + else:
|
| + raise BotoClientError('Invalid storage class: %s' %
|
| + new_storage_class)
|
| +
|
| + def copy(self, dst_bucket, dst_key, metadata=None,
|
| + reduced_redundancy=False, preserve_acl=False,
|
| + encrypt_key=False, validate_dst_bucket=True):
|
| + """
|
| + Copy this Key to another bucket.
|
| +
|
| + :type dst_bucket: string
|
| + :param dst_bucket: The name of the destination bucket
|
| +
|
| + :type dst_key: string
|
| + :param dst_key: The name of the destination key
|
| +
|
| + :type metadata: dict
|
| + :param metadata: Metadata to be associated with new key. If
|
| + metadata is supplied, it will replace the metadata of the
|
| + source key being copied. If no metadata is supplied, the
|
| + source key's metadata will be copied to the new key.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will force the
|
| + storage class of the new Key to be REDUCED_REDUNDANCY
|
| + regardless of the storage class of the key being copied.
|
| + The Reduced Redundancy Storage (RRS) feature of S3,
|
| + provides lower redundancy at lower storage cost.
|
| +
|
| + :type preserve_acl: bool
|
| + :param preserve_acl: If True, the ACL from the source key will
|
| + be copied to the destination key. If False, the
|
| + destination key will have the default ACL. Note that
|
| + preserving the ACL in the new key object will require two
|
| + additional API calls to S3, one to retrieve the current
|
| + ACL and one to set that ACL on the new object. If you
|
| + don't care about the ACL, a value of False will be
|
| + significantly more efficient.
|
| +
|
| + :type encrypt_key: bool
|
| + :param encrypt_key: If True, the new copy of the object will
|
| + be encrypted on the server-side by S3 and will be stored
|
| + in an encrypted form while at rest in S3.
|
| +
|
| + :type validate_dst_bucket: bool
|
| + :param validate_dst_bucket: If True, will validate the dst_bucket
|
| + by using an extra list request.
|
| +
|
| + :rtype: :class:`boto.s3.key.Key` or subclass
|
| + :returns: An instance of the newly created key object
|
| + """
|
| + dst_bucket = self.bucket.connection.lookup(dst_bucket,
|
| + validate_dst_bucket)
|
| + if reduced_redundancy:
|
| + storage_class = 'REDUCED_REDUNDANCY'
|
| + else:
|
| + storage_class = self.storage_class
|
| + return dst_bucket.copy_key(dst_key, self.bucket.name,
|
| + self.name, metadata,
|
| + storage_class=storage_class,
|
| + preserve_acl=preserve_acl,
|
| + encrypt_key=encrypt_key)
|
| +
|
| + def startElement(self, name, attrs, connection):
|
| + if name == 'Owner':
|
| + self.owner = User(self)
|
| + return self.owner
|
| + else:
|
| + return None
|
| +
|
| + def endElement(self, name, value, connection):
|
| + if name == 'Key':
|
| + self.name = value
|
| + elif name == 'ETag':
|
| + self.etag = value
|
| + elif name == 'IsLatest':
|
| + if value == 'true':
|
| + self.is_latest = True
|
| + else:
|
| + self.is_latest = False
|
| + elif name == 'LastModified':
|
| + self.last_modified = value
|
| + elif name == 'Size':
|
| + self.size = int(value)
|
| + elif name == 'StorageClass':
|
| + self.storage_class = value
|
| + elif name == 'Owner':
|
| + pass
|
| + elif name == 'VersionId':
|
| + self.version_id = value
|
| + else:
|
| + setattr(self, name, value)
|
| +
|
| + def exists(self):
|
| + """
|
| + Returns True if the key exists
|
| +
|
| + :rtype: bool
|
| + :return: Whether the key exists on S3
|
| + """
|
| + return bool(self.bucket.lookup(self.name))
|
| +
|
| + def delete(self):
|
| + """
|
| + Delete this key from S3
|
| + """
|
| + return self.bucket.delete_key(self.name, version_id=self.version_id)
|
| +
|
| + def get_metadata(self, name):
|
| + return self.metadata.get(name)
|
| +
|
| + def set_metadata(self, name, value):
|
| + self.metadata[name] = value
|
| +
|
| + def update_metadata(self, d):
|
| + self.metadata.update(d)
|
| +
|
| + # convenience methods for setting/getting ACL
|
| + def set_acl(self, acl_str, headers=None):
|
| + if self.bucket != None:
|
| + self.bucket.set_acl(acl_str, self.name, headers=headers)
|
| +
|
| + def get_acl(self, headers=None):
|
| + if self.bucket != None:
|
| + return self.bucket.get_acl(self.name, headers=headers)
|
| +
|
| + def get_xml_acl(self, headers=None):
|
| + if self.bucket != None:
|
| + return self.bucket.get_xml_acl(self.name, headers=headers)
|
| +
|
| + def set_xml_acl(self, acl_str, headers=None):
|
| + if self.bucket != None:
|
| + return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)
|
| +
|
| + def set_canned_acl(self, acl_str, headers=None):
|
| + return self.bucket.set_canned_acl(acl_str, self.name, headers)
|
| +
|
| + def get_redirect(self):
|
| + """Return the redirect location configured for this key.
|
| +
|
| + If no redirect is configured (via set_redirect), then None
|
| + will be returned.
|
| +
|
| + """
|
| + response = self.bucket.connection.make_request(
|
| + 'HEAD', self.bucket.name, self.name)
|
| + if response.status == 200:
|
| + return response.getheader('x-amz-website-redirect-location')
|
| + else:
|
| + raise self.provider.storage_response_error(
|
| + response.status, response.reason, response.read())
|
| +
|
| + def set_redirect(self, redirect_location):
|
| + """Configure this key to redirect to another location.
|
| +
|
| + When the bucket associated with this key is accessed from the website
|
| + endpoint, a 301 redirect will be issued to the specified
|
| + `redirect_location`.
|
| +
|
| + :type redirect_location: string
|
| + :param redirect_location: The location to redirect.
|
| +
|
| + """
|
| + headers = {'x-amz-website-redirect-location': redirect_location}
|
| + response = self.bucket.connection.make_request('PUT', self.bucket.name,
|
| + self.name, headers)
|
| + if response.status == 200:
|
| + return True
|
| + else:
|
| + raise self.provider.storage_response_error(
|
| + response.status, response.reason, response.read())
|
| +
|
| + def make_public(self, headers=None):
|
| + return self.bucket.set_canned_acl('public-read', self.name, headers)
|
| +
|
| + def generate_url(self, expires_in, method='GET', headers=None,
|
| + query_auth=True, force_http=False, response_headers=None,
|
| + expires_in_absolute=False, version_id=None,
|
| + policy=None, reduced_redundancy=False, encrypt_key=False):
|
| + """
|
| + Generate a URL to access this key.
|
| +
|
| + :type expires_in: int
|
| + :param expires_in: How long the url is valid for, in seconds
|
| +
|
| + :type method: string
|
| + :param method: The method to use for retrieving the file
|
| + (default is GET)
|
| +
|
| + :type headers: dict
|
| + :param headers: Any headers to pass along in the request
|
| +
|
| + :type query_auth: bool
|
| + :param query_auth:
|
| +
|
| + :type force_http: bool
|
| + :param force_http: If True, http will be used instead of https.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| +
|
| + :type expires_in_absolute: bool
|
| + :param expires_in_absolute:
|
| +
|
| + :type version_id: string
|
| + :param version_id: The version_id of the object to GET. If specified
|
| + this overrides any value in the key.
|
| +
|
| + :type policy: :class:`boto.s3.acl.CannedACLStrings`
|
| + :param policy: A canned ACL policy that will be applied to the
|
| + new key in S3.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will set the storage
|
| + class of the new Key to be REDUCED_REDUNDANCY. The Reduced
|
| + Redundancy Storage (RRS) feature of S3, provides lower
|
| + redundancy at lower storage cost.
|
| +
|
| + :type encrypt_key: bool
|
| + :param encrypt_key: If True, the new copy of the object will
|
| + be encrypted on the server-side by S3 and will be stored
|
| + in an encrypted form while at rest in S3.
|
| +
|
| + :rtype: string
|
| + :return: The URL to access the key
|
| + """
|
| + provider = self.bucket.connection.provider
|
| + version_id = version_id or self.version_id
|
| + if headers is None:
|
| + headers = {}
|
| + else:
|
| + headers = headers.copy()
|
| +
|
| + # add headers accordingly (usually PUT case)
|
| + if policy:
|
| + headers[provider.acl_header] = policy
|
| + if reduced_redundancy:
|
| + self.storage_class = 'REDUCED_REDUNDANCY'
|
| + if provider.storage_class_header:
|
| + headers[provider.storage_class_header] = self.storage_class
|
| + if encrypt_key:
|
| + headers[provider.server_side_encryption_header] = 'AES256'
|
| + headers = boto.utils.merge_meta(headers, self.metadata, provider)
|
| +
|
| + return self.bucket.connection.generate_url(expires_in, method,
|
| + self.bucket.name, self.name,
|
| + headers, query_auth,
|
| + force_http,
|
| + response_headers,
|
| + expires_in_absolute,
|
| + version_id)
|
| +
|
| + def send_file(self, fp, headers=None, cb=None, num_cb=10,
|
| + query_args=None, chunked_transfer=False, size=None):
|
| + """
|
| + Upload a file to a key into a bucket on S3.
|
| +
|
| + :type fp: file
|
| + :param fp: The file pointer to upload. The file pointer must
|
| + point point at the offset from which you wish to upload.
|
| + ie. if uploading the full file, it should point at the
|
| + start of the file. Normally when a file is opened for
|
| + reading, the fp will point at the first byte. See the
|
| + bytes parameter below for more info.
|
| +
|
| + :type headers: dict
|
| + :param headers: The headers to pass along with the PUT request
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type num_cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file
|
| + transfer. Providing a negative integer will cause your
|
| + callback to be called with each buffer read.
|
| +
|
| + :type size: int
|
| + :param size: (optional) The Maximum number of bytes to read
|
| + from the file pointer (fp). This is useful when uploading
|
| + a file in multiple parts where you are splitting the file
|
| + up into different ranges to be uploaded. If not specified,
|
| + the default behaviour is to read all bytes from the file
|
| + pointer. Less bytes may be available.
|
| + """
|
| + provider = self.bucket.connection.provider
|
| + try:
|
| + spos = fp.tell()
|
| + except IOError:
|
| + spos = None
|
| + self.read_from_stream = False
|
| +
|
| + def sender(http_conn, method, path, data, headers):
|
| + # This function is called repeatedly for temporary retries
|
| + # so we must be sure the file pointer is pointing at the
|
| + # start of the data.
|
| + if spos is not None and spos != fp.tell():
|
| + fp.seek(spos)
|
| + elif spos is None and self.read_from_stream:
|
| + # if seek is not supported, and we've read from this
|
| + # stream already, then we need to abort retries to
|
| + # avoid setting bad data.
|
| + raise provider.storage_data_error(
|
| + 'Cannot retry failed request. fp does not support seeking.')
|
| +
|
| + http_conn.putrequest(method, path)
|
| + for key in headers:
|
| + http_conn.putheader(key, headers[key])
|
| + http_conn.endheaders()
|
| +
|
| + # Calculate all MD5 checksums on the fly, if not already computed
|
| + if not self.base64md5:
|
| + m = md5()
|
| + else:
|
| + m = None
|
| +
|
| + save_debug = self.bucket.connection.debug
|
| + self.bucket.connection.debug = 0
|
| + # If the debuglevel < 4 we don't want to show connection
|
| + # payload, so turn off HTTP connection-level debug output (to
|
| + # be restored below).
|
| + # Use the getattr approach to allow this to work in AppEngine.
|
| + if getattr(http_conn, 'debuglevel', 0) < 4:
|
| + http_conn.set_debuglevel(0)
|
| +
|
| + data_len = 0
|
| + if cb:
|
| + if size:
|
| + cb_size = size
|
| + elif self.size:
|
| + cb_size = self.size
|
| + else:
|
| + cb_size = 0
|
| + if chunked_transfer and cb_size == 0:
|
| + # For chunked Transfer, we call the cb for every 1MB
|
| + # of data transferred, except when we know size.
|
| + cb_count = (1024 * 1024) / self.BufferSize
|
| + elif num_cb > 1:
|
| + cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
|
| + elif num_cb < 0:
|
| + cb_count = -1
|
| + else:
|
| + cb_count = 0
|
| + i = 0
|
| + cb(data_len, cb_size)
|
| +
|
| + bytes_togo = size
|
| + if bytes_togo and bytes_togo < self.BufferSize:
|
| + chunk = fp.read(bytes_togo)
|
| + else:
|
| + chunk = fp.read(self.BufferSize)
|
| + if spos is None:
|
| + # read at least something from a non-seekable fp.
|
| + self.read_from_stream = True
|
| + while chunk:
|
| + chunk_len = len(chunk)
|
| + data_len += chunk_len
|
| + if chunked_transfer:
|
| + http_conn.send('%x;\r\n' % chunk_len)
|
| + http_conn.send(chunk)
|
| + http_conn.send('\r\n')
|
| + else:
|
| + http_conn.send(chunk)
|
| + if m:
|
| + m.update(chunk)
|
| + if bytes_togo:
|
| + bytes_togo -= chunk_len
|
| + if bytes_togo <= 0:
|
| + break
|
| + if cb:
|
| + i += 1
|
| + if i == cb_count or cb_count == -1:
|
| + cb(data_len, cb_size)
|
| + i = 0
|
| + if bytes_togo and bytes_togo < self.BufferSize:
|
| + chunk = fp.read(bytes_togo)
|
| + else:
|
| + chunk = fp.read(self.BufferSize)
|
| +
|
| + self.size = data_len
|
| +
|
| + if m:
|
| + # Use the chunked trailer for the digest
|
| + hd = m.hexdigest()
|
| + self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)
|
| +
|
| + if chunked_transfer:
|
| + http_conn.send('0\r\n')
|
| + # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
|
| + http_conn.send('\r\n')
|
| +
|
| + if cb and (cb_count <= 1 or i > 0) and data_len > 0:
|
| + cb(data_len, cb_size)
|
| +
|
| + http_conn.set_debuglevel(save_debug)
|
| + self.bucket.connection.debug = save_debug
|
| + response = http_conn.getresponse()
|
| + body = response.read()
|
| + if ((response.status == 500 or response.status == 503 or
|
| + response.getheader('location')) and not chunked_transfer):
|
| + # we'll try again.
|
| + return response
|
| + elif response.status >= 200 and response.status <= 299:
|
| + self.etag = response.getheader('etag')
|
| + if self.etag != '"%s"' % self.md5:
|
| + raise provider.storage_data_error(
|
| + 'ETag from S3 did not match computed MD5')
|
| + return response
|
| + else:
|
| + raise provider.storage_response_error(
|
| + response.status, response.reason, body)
|
| +
|
| + if not headers:
|
| + headers = {}
|
| + else:
|
| + headers = headers.copy()
|
| + headers['User-Agent'] = UserAgent
|
| + if self.storage_class != 'STANDARD':
|
| + headers[provider.storage_class_header] = self.storage_class
|
| + if 'Content-Encoding' in headers:
|
| + self.content_encoding = headers['Content-Encoding']
|
| + if 'Content-Language' in headers:
|
| + self.content_encoding = headers['Content-Language']
|
| + if 'Content-Type' in headers:
|
| + # Some use cases need to suppress sending of the Content-Type
|
| + # header and depend on the receiving server to set the content
|
| + # type. This can be achieved by setting headers['Content-Type']
|
| + # to None when calling this method.
|
| + if headers['Content-Type'] is None:
|
| + # Delete null Content-Type value to skip sending that header.
|
| + del headers['Content-Type']
|
| + else:
|
| + self.content_type = headers['Content-Type']
|
| + elif self.path:
|
| + self.content_type = mimetypes.guess_type(self.path)[0]
|
| + if self.content_type == None:
|
| + self.content_type = self.DefaultContentType
|
| + headers['Content-Type'] = self.content_type
|
| + else:
|
| + headers['Content-Type'] = self.content_type
|
| + if self.base64md5:
|
| + headers['Content-MD5'] = self.base64md5
|
| + if chunked_transfer:
|
| + headers['Transfer-Encoding'] = 'chunked'
|
| + #if not self.base64md5:
|
| + # headers['Trailer'] = "Content-MD5"
|
| + else:
|
| + headers['Content-Length'] = str(self.size)
|
| + headers['Expect'] = '100-Continue'
|
| + headers = boto.utils.merge_meta(headers, self.metadata, provider)
|
| + resp = self.bucket.connection.make_request('PUT', self.bucket.name,
|
| + self.name, headers,
|
| + sender=sender,
|
| + query_args=query_args)
|
| + self.handle_version_headers(resp, force=True)
|
| +
|
| + def compute_md5(self, fp, size=None):
|
| + """
|
| + :type fp: file
|
| + :param fp: File pointer to the file to MD5 hash. The file
|
| + pointer will be reset to the same position before the
|
| + method returns.
|
| +
|
| + :type size: int
|
| + :param size: (optional) The Maximum number of bytes to read
|
| + from the file pointer (fp). This is useful when uploading
|
| + a file in multiple parts where the file is being split
|
| + inplace into different parts. Less bytes may be available.
|
| +
|
| + :rtype: tuple
|
| + :return: A tuple containing the hex digest version of the MD5
|
| + hash as the first element and the base64 encoded version
|
| + of the plain digest as the second element.
|
| + """
|
| + tup = compute_md5(fp, size=size)
|
| + # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
|
| + # The internal implementation of compute_md5() needs to return the
|
| + # data size but we don't want to return that value to the external
|
| + # caller because it changes the class interface (i.e. it might
|
| + # break some code) so we consume the third tuple value here and
|
| + # return the remainder of the tuple to the caller, thereby preserving
|
| + # the existing interface.
|
| + self.size = tup[2]
|
| + return tup[0:2]
|
| +
|
| + def set_contents_from_stream(self, fp, headers=None, replace=True,
|
| + cb=None, num_cb=10, policy=None,
|
| + reduced_redundancy=False, query_args=None,
|
| + size=None):
|
| + """
|
| + Store an object using the name of the Key object as the key in
|
| + cloud and the contents of the data stream pointed to by 'fp' as
|
| + the contents.
|
| +
|
| + The stream object is not seekable and total size is not known.
|
| + This has the implication that we can't specify the
|
| + Content-Size and Content-MD5 in the header. So for huge
|
| + uploads, the delay in calculating MD5 is avoided but with a
|
| + penalty of inability to verify the integrity of the uploaded
|
| + data.
|
| +
|
| + :type fp: file
|
| + :param fp: the file whose contents are to be uploaded
|
| +
|
| + :type headers: dict
|
| + :param headers: additional HTTP headers to be sent with the
|
| + PUT request.
|
| +
|
| + :type replace: bool
|
| + :param replace: If this parameter is False, the method will first check
|
| + to see if an object exists in the bucket with the same key. If it
|
| + does, it won't overwrite it. The default value is True which will
|
| + overwrite the object.
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two integer
|
| + parameters, the first representing the number of bytes that have
|
| + been successfully transmitted to GS and the second representing the
|
| + total number of bytes that need to be transmitted.
|
| +
|
| + :type num_cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter, this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type policy: :class:`boto.gs.acl.CannedACLStrings`
|
| + :param policy: A canned ACL policy that will be applied to the new key
|
| + in GS.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will set the storage
|
| + class of the new Key to be REDUCED_REDUNDANCY. The Reduced
|
| + Redundancy Storage (RRS) feature of S3, provides lower
|
| + redundancy at lower storage cost.
|
| +
|
| + :type size: int
|
| + :param size: (optional) The Maximum number of bytes to read from
|
| + the file pointer (fp). This is useful when uploading a
|
| + file in multiple parts where you are splitting the file up
|
| + into different ranges to be uploaded. If not specified,
|
| + the default behaviour is to read all bytes from the file
|
| + pointer. Less bytes may be available.
|
| + """
|
| +
|
| + provider = self.bucket.connection.provider
|
| + if not provider.supports_chunked_transfer():
|
| + raise BotoClientError('%s does not support chunked transfer'
|
| + % provider.get_provider_name())
|
| +
|
| + # Name of the Object should be specified explicitly for Streams.
|
| + if not self.name or self.name == '':
|
| + raise BotoClientError('Cannot determine the destination '
|
| + 'object name for the given stream')
|
| +
|
| + if headers is None:
|
| + headers = {}
|
| + if policy:
|
| + headers[provider.acl_header] = policy
|
| +
|
| + if reduced_redundancy:
|
| + self.storage_class = 'REDUCED_REDUNDANCY'
|
| + if provider.storage_class_header:
|
| + headers[provider.storage_class_header] = self.storage_class
|
| +
|
| + if self.bucket != None:
|
| + if not replace:
|
| + if self.bucket.lookup(self.name):
|
| + return
|
| + self.send_file(fp, headers, cb, num_cb, query_args,
|
| + chunked_transfer=True, size=size)
|
| +
|
| + def set_contents_from_file(self, fp, headers=None, replace=True,
|
| + cb=None, num_cb=10, policy=None, md5=None,
|
| + reduced_redundancy=False, query_args=None,
|
| + encrypt_key=False, size=None, rewind=False):
|
| + """
|
| + Store an object in S3 using the name of the Key object as the
|
| + key in S3 and the contents of the file pointed to by 'fp' as the
|
| + contents. The data is read from 'fp' from its current position until
|
| + 'size' bytes have been read or EOF.
|
| +
|
| + :type fp: file
|
| + :param fp: the file whose contents to upload
|
| +
|
| + :type headers: dict
|
| + :param headers: Additional HTTP headers that will be sent with
|
| + the PUT request.
|
| +
|
| + :type replace: bool
|
| + :param replace: If this parameter is False, the method will
|
| + first check to see if an object exists in the bucket with
|
| + the same key. If it does, it won't overwrite it. The
|
| + default value is True which will overwrite the object.
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type policy: :class:`boto.s3.acl.CannedACLStrings`
|
| + :param policy: A canned ACL policy that will be applied to the
|
| + new key in S3.
|
| +
|
| + :type md5: A tuple containing the hexdigest version of the MD5
|
| + checksum of the file as the first element and the
|
| + Base64-encoded version of the plain checksum as the second
|
| + element. This is the same format returned by the
|
| + compute_md5 method.
|
| + :param md5: If you need to compute the MD5 for any reason
|
| + prior to upload, it's silly to have to do it twice so this
|
| + param, if present, will be used as the MD5 values of the
|
| + file. Otherwise, the checksum will be computed.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will set the storage
|
| + class of the new Key to be REDUCED_REDUNDANCY. The Reduced
|
| + Redundancy Storage (RRS) feature of S3, provides lower
|
| + redundancy at lower storage cost.
|
| +
|
| + :type encrypt_key: bool
|
| + :param encrypt_key: If True, the new copy of the object will
|
| + be encrypted on the server-side by S3 and will be stored
|
| + in an encrypted form while at rest in S3.
|
| +
|
| + :type size: int
|
| + :param size: (optional) The Maximum number of bytes to read
|
| + from the file pointer (fp). This is useful when uploading
|
| + a file in multiple parts where you are splitting the file
|
| + up into different ranges to be uploaded. If not specified,
|
| + the default behaviour is to read all bytes from the file
|
| + pointer. Less bytes may be available.
|
| +
|
| + :type rewind: bool
|
| + :param rewind: (optional) If True, the file pointer (fp) will
|
| + be rewound to the start before any bytes are read from
|
| + it. The default behaviour is False which reads from the
|
| + current position of the file pointer (fp).
|
| +
|
| + :rtype: int
|
| + :return: The number of bytes written to the key.
|
| + """
|
| + provider = self.bucket.connection.provider
|
| + headers = headers or {}
|
| + if policy:
|
| + headers[provider.acl_header] = policy
|
| + if encrypt_key:
|
| + headers[provider.server_side_encryption_header] = 'AES256'
|
| +
|
| + if rewind:
|
| + # caller requests reading from beginning of fp.
|
| + fp.seek(0, os.SEEK_SET)
|
| + else:
|
| + # The following seek/tell/seek logic is intended
|
| + # to detect applications using the older interface to
|
| + # set_contents_from_file(), which automatically rewound the
|
| + # file each time the Key was reused. This changed with commit
|
| + # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
|
| + # split into multiple parts and uploaded in parallel, and at
|
| + # the time of that commit this check was added because otherwise
|
| + # older programs would get a success status and upload an empty
|
| + # object. Unfortuantely, it's very inefficient for fp's implemented
|
| + # by KeyFile (used, for example, by gsutil when copying between
|
| + # providers). So, we skip the check for the KeyFile case.
|
| + # TODO: At some point consider removing this seek/tell/seek
|
| + # logic, after enough time has passed that it's unlikely any
|
| + # programs remain that assume the older auto-rewind interface.
|
| + if not isinstance(fp, KeyFile):
|
| + spos = fp.tell()
|
| + fp.seek(0, os.SEEK_END)
|
| + if fp.tell() == spos:
|
| + fp.seek(0, os.SEEK_SET)
|
| + if fp.tell() != spos:
|
| + # Raise an exception as this is likely a programming
|
| + # error whereby there is data before the fp but nothing
|
| + # after it.
|
| + fp.seek(spos)
|
| + raise AttributeError('fp is at EOF. Use rewind option '
|
| + 'or seek() to data start.')
|
| + # seek back to the correct position.
|
| + fp.seek(spos)
|
| +
|
| + if reduced_redundancy:
|
| + self.storage_class = 'REDUCED_REDUNDANCY'
|
| + if provider.storage_class_header:
|
| + headers[provider.storage_class_header] = self.storage_class
|
| + # TODO - What if provider doesn't support reduced reduncancy?
|
| + # What if different providers provide different classes?
|
| + if hasattr(fp, 'name'):
|
| + self.path = fp.name
|
| + if self.bucket != None:
|
| + if not md5 and provider.supports_chunked_transfer():
|
| + # defer md5 calculation to on the fly and
|
| + # we don't know anything about size yet.
|
| + chunked_transfer = True
|
| + self.size = None
|
| + else:
|
| + chunked_transfer = False
|
| + if isinstance(fp, KeyFile):
|
| + # Avoid EOF seek for KeyFile case as it's very inefficient.
|
| + key = fp.getkey()
|
| + size = key.size - fp.tell()
|
| + self.size = size
|
| + # At present both GCS and S3 use MD5 for the etag for
|
| + # non-multipart-uploaded objects. If the etag is 32 hex
|
| + # chars use it as an MD5, to avoid having to read the file
|
| + # twice while transferring.
|
| + if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
|
| + etag = key.etag.strip('"')
|
| + md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
|
| + if not md5:
|
| + # compute_md5() and also set self.size to actual
|
| + # size of the bytes read computing the md5.
|
| + md5 = self.compute_md5(fp, size)
|
| + # adjust size if required
|
| + size = self.size
|
| + elif size:
|
| + self.size = size
|
| + else:
|
| + # If md5 is provided, still need to size so
|
| + # calculate based on bytes to end of content
|
| + spos = fp.tell()
|
| + fp.seek(0, os.SEEK_END)
|
| + self.size = fp.tell() - spos
|
| + fp.seek(spos)
|
| + size = self.size
|
| + self.md5 = md5[0]
|
| + self.base64md5 = md5[1]
|
| +
|
| + if self.name == None:
|
| + self.name = self.md5
|
| + if not replace:
|
| + if self.bucket.lookup(self.name):
|
| + return
|
| +
|
| + self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
|
| + query_args=query_args,
|
| + chunked_transfer=chunked_transfer, size=size)
|
| + # return number of bytes written.
|
| + return self.size
|
| +
|
| + def set_contents_from_filename(self, filename, headers=None, replace=True,
|
| + cb=None, num_cb=10, policy=None, md5=None,
|
| + reduced_redundancy=False,
|
| + encrypt_key=False):
|
| + """
|
| + Store an object in S3 using the name of the Key object as the
|
| + key in S3 and the contents of the file named by 'filename'.
|
| + See set_contents_from_file method for details about the
|
| + parameters.
|
| +
|
| + :type filename: string
|
| + :param filename: The name of the file that you want to put onto S3
|
| +
|
| + :type headers: dict
|
| + :param headers: Additional headers to pass along with the
|
| + request to AWS.
|
| +
|
| + :type replace: bool
|
| + :param replace: If True, replaces the contents of the file
|
| + if it already exists.
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type policy: :class:`boto.s3.acl.CannedACLStrings`
|
| + :param policy: A canned ACL policy that will be applied to the
|
| + new key in S3.
|
| +
|
| + :type md5: A tuple containing the hexdigest version of the MD5
|
| + checksum of the file as the first element and the
|
| + Base64-encoded version of the plain checksum as the second
|
| + element. This is the same format returned by the
|
| + compute_md5 method.
|
| + :param md5: If you need to compute the MD5 for any reason
|
| + prior to upload, it's silly to have to do it twice so this
|
| + param, if present, will be used as the MD5 values of the
|
| + file. Otherwise, the checksum will be computed.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will set the storage
|
| + class of the new Key to be REDUCED_REDUNDANCY. The Reduced
|
| + Redundancy Storage (RRS) feature of S3, provides lower
|
| + redundancy at lower storage cost. :type encrypt_key: bool
|
| + :param encrypt_key: If True, the new copy of the object
|
| + will be encrypted on the server-side by S3 and will be
|
| + stored in an encrypted form while at rest in S3.
|
| + """
|
| + fp = open(filename, 'rb')
|
| + try:
|
| + self.set_contents_from_file(fp, headers, replace, cb, num_cb,
|
| + policy, md5, reduced_redundancy,
|
| + encrypt_key=encrypt_key)
|
| + finally:
|
| + fp.close()
|
| +
|
| + def set_contents_from_string(self, s, headers=None, replace=True,
|
| + cb=None, num_cb=10, policy=None, md5=None,
|
| + reduced_redundancy=False,
|
| + encrypt_key=False):
|
| + """
|
| + Store an object in S3 using the name of the Key object as the
|
| + key in S3 and the string 's' as the contents.
|
| + See set_contents_from_file method for details about the
|
| + parameters.
|
| +
|
| + :type headers: dict
|
| + :param headers: Additional headers to pass along with the
|
| + request to AWS.
|
| +
|
| + :type replace: bool
|
| + :param replace: If True, replaces the contents of the file if
|
| + it already exists.
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type policy: :class:`boto.s3.acl.CannedACLStrings`
|
| + :param policy: A canned ACL policy that will be applied to the
|
| + new key in S3.
|
| +
|
| + :type md5: A tuple containing the hexdigest version of the MD5
|
| + checksum of the file as the first element and the
|
| + Base64-encoded version of the plain checksum as the second
|
| + element. This is the same format returned by the
|
| + compute_md5 method.
|
| + :param md5: If you need to compute the MD5 for any reason
|
| + prior to upload, it's silly to have to do it twice so this
|
| + param, if present, will be used as the MD5 values of the
|
| + file. Otherwise, the checksum will be computed.
|
| +
|
| + :type reduced_redundancy: bool
|
| + :param reduced_redundancy: If True, this will set the storage
|
| + class of the new Key to be REDUCED_REDUNDANCY. The Reduced
|
| + Redundancy Storage (RRS) feature of S3, provides lower
|
| + redundancy at lower storage cost.
|
| +
|
| + :type encrypt_key: bool
|
| + :param encrypt_key: If True, the new copy of the object will
|
| + be encrypted on the server-side by S3 and will be stored
|
| + in an encrypted form while at rest in S3.
|
| + """
|
| + if isinstance(s, unicode):
|
| + s = s.encode("utf-8")
|
| + fp = StringIO.StringIO(s)
|
| + r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
|
| + policy, md5, reduced_redundancy,
|
| + encrypt_key=encrypt_key)
|
| + fp.close()
|
| + return r
|
| +
|
| + def get_file(self, fp, headers=None, cb=None, num_cb=10,
|
| + torrent=False, version_id=None, override_num_retries=None,
|
| + response_headers=None):
|
| + """
|
| + Retrieves a file from an S3 Key
|
| +
|
| + :type fp: file
|
| + :param fp: File pointer to put the data into
|
| +
|
| + :type headers: string
|
| + :param: headers to send when retrieving the files
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type torrent: bool
|
| + :param torrent: Flag for whether to get a torrent for the file
|
| +
|
| + :type override_num_retries: int
|
| + :param override_num_retries: If not None will override configured
|
| + num_retries parameter for underlying GET.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| + """
|
| + self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
|
| + torrent=torrent, version_id=version_id,
|
| + override_num_retries=override_num_retries,
|
| + response_headers=response_headers,
|
| + query_args=None)
|
| +
|
| + def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
|
| + torrent=False, version_id=None, override_num_retries=None,
|
| + response_headers=None, query_args=None):
|
| + if headers is None:
|
| + headers = {}
|
| + save_debug = self.bucket.connection.debug
|
| + if self.bucket.connection.debug == 1:
|
| + self.bucket.connection.debug = 0
|
| +
|
| + query_args = query_args or []
|
| + if torrent:
|
| + query_args.append('torrent')
|
| + m = None
|
| + else:
|
| + m = md5()
|
| + # If a version_id is passed in, use that. If not, check to see
|
| + # if the Key object has an explicit version_id and, if so, use that.
|
| + # Otherwise, don't pass a version_id query param.
|
| + if version_id is None:
|
| + version_id = self.version_id
|
| + if version_id:
|
| + query_args.append('versionId=%s' % version_id)
|
| + if response_headers:
|
| + for key in response_headers:
|
| + query_args.append('%s=%s' % (key, urllib.quote(response_headers[key])))
|
| + query_args = '&'.join(query_args)
|
| + self.open('r', headers, query_args=query_args,
|
| + override_num_retries=override_num_retries)
|
| +
|
| + data_len = 0
|
| + if cb:
|
| + if self.size is None:
|
| + cb_size = 0
|
| + else:
|
| + cb_size = self.size
|
| + if self.size is None and num_cb != -1:
|
| + # If size is not available due to chunked transfer for example,
|
| + # we'll call the cb for every 1MB of data transferred.
|
| + cb_count = (1024 * 1024) / self.BufferSize
|
| + elif num_cb > 1:
|
| + cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
|
| + elif num_cb < 0:
|
| + cb_count = -1
|
| + else:
|
| + cb_count = 0
|
| + i = 0
|
| + cb(data_len, cb_size)
|
| + for bytes in self:
|
| + fp.write(bytes)
|
| + data_len += len(bytes)
|
| + if m:
|
| + m.update(bytes)
|
| + if cb:
|
| + if cb_size > 0 and data_len >= cb_size:
|
| + break
|
| + i += 1
|
| + if i == cb_count or cb_count == -1:
|
| + cb(data_len, cb_size)
|
| + i = 0
|
| + if cb and (cb_count <= 1 or i > 0) and data_len > 0:
|
| + cb(data_len, cb_size)
|
| + if m:
|
| + self.md5 = m.hexdigest()
|
| + if self.size is None and not torrent and "Range" not in headers:
|
| + self.size = data_len
|
| + self.close()
|
| + self.bucket.connection.debug = save_debug
|
| +
|
| + def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):
|
| + """
|
| + Get a torrent file (see to get_file)
|
| +
|
| + :type fp: file
|
| + :param fp: The file pointer of where to put the torrent
|
| +
|
| + :type headers: dict
|
| + :param headers: Headers to be passed
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + """
|
| + return self.get_file(fp, headers, cb, num_cb, torrent=True)
|
| +
|
| + def get_contents_to_file(self, fp, headers=None,
|
| + cb=None, num_cb=10,
|
| + torrent=False,
|
| + version_id=None,
|
| + res_download_handler=None,
|
| + response_headers=None):
|
| + """
|
| + Retrieve an object from S3 using the name of the Key object as the
|
| + key in S3. Write the contents of the object to the file pointed
|
| + to by 'fp'.
|
| +
|
| + :type fp: File -like object
|
| + :param fp:
|
| +
|
| + :type headers: dict
|
| + :param headers: additional HTTP headers that will be sent with
|
| + the GET request.
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type torrent: bool
|
| + :param torrent: If True, returns the contents of a torrent
|
| + file as a string.
|
| +
|
| + :type res_upload_handler: ResumableDownloadHandler
|
| + :param res_download_handler: If provided, this handler will
|
| + perform the download.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| + """
|
| + if self.bucket != None:
|
| + if res_download_handler:
|
| + res_download_handler.get_file(self, fp, headers, cb, num_cb,
|
| + torrent=torrent,
|
| + version_id=version_id)
|
| + else:
|
| + self.get_file(fp, headers, cb, num_cb, torrent=torrent,
|
| + version_id=version_id,
|
| + response_headers=response_headers)
|
| +
|
| + def get_contents_to_filename(self, filename, headers=None,
|
| + cb=None, num_cb=10,
|
| + torrent=False,
|
| + version_id=None,
|
| + res_download_handler=None,
|
| + response_headers=None):
|
| + """
|
| + Retrieve an object from S3 using the name of the Key object as the
|
| + key in S3. Store contents of the object to a file named by 'filename'.
|
| + See get_contents_to_file method for details about the
|
| + parameters.
|
| +
|
| + :type filename: string
|
| + :param filename: The filename of where to put the file contents
|
| +
|
| + :type headers: dict
|
| + :param headers: Any additional headers to send in the request
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type torrent: bool
|
| + :param torrent: If True, returns the contents of a torrent file
|
| + as a string.
|
| +
|
| + :type res_upload_handler: ResumableDownloadHandler
|
| + :param res_download_handler: If provided, this handler will
|
| + perform the download.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| + """
|
| + fp = open(filename, 'wb')
|
| + try:
|
| + self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
|
| + version_id=version_id,
|
| + res_download_handler=res_download_handler,
|
| + response_headers=response_headers)
|
| + except Exception:
|
| + os.remove(filename)
|
| + raise
|
| + finally:
|
| + fp.close()
|
| + # if last_modified date was sent from s3, try to set file's timestamp
|
| + if self.last_modified != None:
|
| + try:
|
| + modified_tuple = rfc822.parsedate_tz(self.last_modified)
|
| + modified_stamp = int(rfc822.mktime_tz(modified_tuple))
|
| + os.utime(fp.name, (modified_stamp, modified_stamp))
|
| + except Exception:
|
| + pass
|
| +
|
| + def get_contents_as_string(self, headers=None,
|
| + cb=None, num_cb=10,
|
| + torrent=False,
|
| + version_id=None,
|
| + response_headers=None):
|
| + """
|
| + Retrieve an object from S3 using the name of the Key object as the
|
| + key in S3. Return the contents of the object as a string.
|
| + See get_contents_to_file method for details about the
|
| + parameters.
|
| +
|
| + :type headers: dict
|
| + :param headers: Any additional headers to send in the request
|
| +
|
| + :type cb: function
|
| + :param cb: a callback function that will be called to report
|
| + progress on the upload. The callback should accept two
|
| + integer parameters, the first representing the number of
|
| + bytes that have been successfully transmitted to S3 and
|
| + the second representing the size of the to be transmitted
|
| + object.
|
| +
|
| + :type cb: int
|
| + :param num_cb: (optional) If a callback is specified with the
|
| + cb parameter this parameter determines the granularity of
|
| + the callback by defining the maximum number of times the
|
| + callback will be called during the file transfer.
|
| +
|
| + :type torrent: bool
|
| + :param torrent: If True, returns the contents of a torrent file
|
| + as a string.
|
| +
|
| + :type response_headers: dict
|
| + :param response_headers: A dictionary containing HTTP
|
| + headers/values that will override any headers associated
|
| + with the stored object in the response. See
|
| + http://goo.gl/EWOPb for details.
|
| +
|
| + :rtype: string
|
| + :returns: The contents of the file as a string
|
| + """
|
| + fp = StringIO.StringIO()
|
| + self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
|
| + version_id=version_id,
|
| + response_headers=response_headers)
|
| + return fp.getvalue()
|
| +
|
| + def add_email_grant(self, permission, email_address, headers=None):
|
| + """
|
| + Convenience method that provides a quick way to add an email grant
|
| + to a key. This method retrieves the current ACL, creates a new
|
| + grant based on the parameters passed in, adds that grant to the ACL
|
| + and then PUT's the new ACL back to S3.
|
| +
|
| + :type permission: string
|
| + :param permission: The permission being granted. Should be one of:
|
| + (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
|
| +
|
| + :type email_address: string
|
| + :param email_address: The email address associated with the AWS
|
| + account your are granting the permission to.
|
| +
|
| + :type recursive: boolean
|
| + :param recursive: A boolean value to controls whether the
|
| + command will apply the grant to all keys within the bucket
|
| + or not. The default value is False. By passing a True
|
| + value, the call will iterate through all keys in the
|
| + bucket and apply the same grant to each key. CAUTION: If
|
| + you have a lot of keys, this could take a long time!
|
| + """
|
| + policy = self.get_acl(headers=headers)
|
| + policy.acl.add_email_grant(permission, email_address)
|
| + self.set_acl(policy, headers=headers)
|
| +
|
| + def add_user_grant(self, permission, user_id, headers=None,
|
| + display_name=None):
|
| + """
|
| + Convenience method that provides a quick way to add a canonical
|
| + user grant to a key. This method retrieves the current ACL,
|
| + creates a new grant based on the parameters passed in, adds that
|
| + grant to the ACL and then PUT's the new ACL back to S3.
|
| +
|
| + :type permission: string
|
| + :param permission: The permission being granted. Should be one of:
|
| + (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
|
| +
|
| + :type user_id: string
|
| + :param user_id: The canonical user id associated with the AWS
|
| + account your are granting the permission to.
|
| +
|
| + :type display_name: string
|
| + :param display_name: An option string containing the user's
|
| + Display Name. Only required on Walrus.
|
| + """
|
| + policy = self.get_acl(headers=headers)
|
| + policy.acl.add_user_grant(permission, user_id,
|
| + display_name=display_name)
|
| + self.set_acl(policy, headers=headers)
|
| +
|
| + def _normalize_metadata(self, metadata):
|
| + if type(metadata) == set:
|
| + norm_metadata = set()
|
| + for k in metadata:
|
| + norm_metadata.add(k.lower())
|
| + else:
|
| + norm_metadata = {}
|
| + for k in metadata:
|
| + norm_metadata[k.lower()] = metadata[k]
|
| + return norm_metadata
|
| +
|
| + def _get_remote_metadata(self, headers=None):
|
| + """
|
| + Extracts metadata from existing URI into a dict, so we can
|
| + overwrite/delete from it to form the new set of metadata to apply to a
|
| + key.
|
| + """
|
| + metadata = {}
|
| + for underscore_name in self._underscore_base_user_settable_fields:
|
| + if hasattr(self, underscore_name):
|
| + value = getattr(self, underscore_name)
|
| + if value:
|
| + # Generate HTTP field name corresponding to "_" named field.
|
| + field_name = underscore_name.replace('_', '-')
|
| + metadata[field_name.lower()] = value
|
| + # self.metadata contains custom metadata, which are all user-settable.
|
| + prefix = self.provider.metadata_prefix
|
| + for underscore_name in self.metadata:
|
| + field_name = underscore_name.replace('_', '-')
|
| + metadata['%s%s' % (prefix, field_name.lower())] = (
|
| + self.metadata[underscore_name])
|
| + return metadata
|
| +
|
| + def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,
|
| + headers=None):
|
| + metadata_plus = self._normalize_metadata(metadata_plus)
|
| + metadata_minus = self._normalize_metadata(metadata_minus)
|
| + metadata = self._get_remote_metadata()
|
| + metadata.update(metadata_plus)
|
| + for h in metadata_minus:
|
| + if h in metadata:
|
| + del metadata[h]
|
| + src_bucket = self.bucket
|
| + # Boto prepends the meta prefix when adding headers, so strip prefix in
|
| + # metadata before sending back in to copy_key() call.
|
| + rewritten_metadata = {}
|
| + for h in metadata:
|
| + if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):
|
| + rewritten_h = (h.replace('x-goog-meta-', '')
|
| + .replace('x-amz-meta-', ''))
|
| + else:
|
| + rewritten_h = h
|
| + rewritten_metadata[rewritten_h] = metadata[h]
|
| + metadata = rewritten_metadata
|
| + src_bucket.copy_key(self.name, self.bucket.name, self.name,
|
| + metadata=metadata, preserve_acl=preserve_acl,
|
| + headers=headers)
|
| +
|
| + def restore(self, days, headers=None):
|
| + """Restore an object from an archive.
|
| +
|
| + :type days: int
|
| + :param days: The lifetime of the restored object (must
|
| + be at least 1 day). If the object is already restored
|
| + then this parameter can be used to readjust the lifetime
|
| + of the restored object. In this case, the days
|
| + param is with respect to the initial time of the request.
|
| + If the object has not been restored, this param is with
|
| + respect to the completion time of the request.
|
| +
|
| + """
|
| + response = self.bucket.connection.make_request(
|
| + 'POST', self.bucket.name, self.name,
|
| + data=self.RestoreBody % days,
|
| + headers=headers, query_args='restore')
|
| + if response.status not in (200, 202):
|
| + provider = self.bucket.connection.provider
|
| + raise provider.storage_response_error(response.status,
|
| + response.reason,
|
| + response.read())
|
|
|