third_party/gsutil/boto/boto/glacier/vault.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Unified Diff: third_party/gsutil/boto/boto/glacier/vault.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« third_party/gsutil/README ('K') | « third_party/gsutil/boto/boto/glacier/utils.py ('k') | third_party/gsutil/boto/boto/glacier/writer.py » ('j') | third_party/gsutil/boto/tests/__init__.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/gsutil/boto/boto/glacier/vault.py

diff --git a/third_party/gsutil/boto/boto/glacier/vault.py b/third_party/gsutil/boto/boto/glacier/vault.py

new file mode 100644

index 0000000000000000000000000000000000000000..e037adc700ab7d72594d060f436ec891253daf12

--- /dev/null

+++ b/third_party/gsutil/boto/boto/glacier/vault.py

@@ -0,0 +1,387 @@

+# -*- coding: utf-8 -*-

+# Permission is hereby granted, free of charge, to any person obtaining a

+# copy of this software and associated documentation files (the

+# "Software"), to deal in the Software without restriction, including

+# without limitation the rights to use, copy, modify, merge, publish, dis-

+# tribute, sublicense, and/or sell copies of the Software, and to permit

+# persons to whom the Software is furnished to do so, subject to the fol-

+# lowing conditions:

+# The above copyright notice and this permission notice shall be included

+# in all copies or substantial portions of the Software.

+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

+# IN THE SOFTWARE.

+from __future__ import with_statement

+from .exceptions import UploadArchiveError

+from .job import Job

+from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer

+from .concurrent import ConcurrentUploader

+from .utils import minimum_part_size, DEFAULT_PART_SIZE

+import os.path

+_MEGABYTE = 1024 * 1024

+_GIGABYTE = 1024 * _MEGABYTE

+MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE

+MAXIMUM_NUMBER_OF_PARTS = 10000

+class Vault(object):

+ DefaultPartSize = DEFAULT_PART_SIZE

+ SingleOperationThreshold = 100 * _MEGABYTE

+ ResponseDataElements = (('VaultName', 'name', None),

+ ('VaultARN', 'arn', None),

+ ('CreationDate', 'creation_date', None),

+ ('LastInventoryDate', 'last_inventory_date', None),

+ ('SizeInBytes', 'size', 0),

+ ('NumberOfArchives', 'number_of_archives', 0))

+ def __init__(self, layer1, response_data=None):

+ self.layer1 = layer1

+ if response_data:

+ for response_name, attr_name, default in self.ResponseDataElements:

+ value = response_data[response_name]

+ if isinstance(value, unicode):

+ value = value.encode('utf8')

+ setattr(self, attr_name, value)

+ else:

+ for response_name, attr_name, default in self.ResponseDataElements:

+ setattr(self, attr_name, default)

+ def __repr__(self):

+ return 'Vault("%s")' % self.arn

+ def delete(self):

+ """

+ Delete's this vault. WARNING!

+ """

+ self.layer1.delete_vault(self.name)

+ def upload_archive(self, filename, description=None):

+ """

+ Adds an archive to a vault. For archives greater than 100MB the

+ multipart upload will be used.

+ :type file: str

+ :param file: A filename to upload

+ :type description: str

+ :param description: An optional description for the archive.

+ :rtype: str

+ :return: The archive id of the newly created archive

+ """

+ if os.path.getsize(filename) > self.SingleOperationThreshold:

+ return self.create_archive_from_file(filename, description=description)

+ return self._upload_archive_single_operation(filename, description)

+ def _upload_archive_single_operation(self, filename, description):

+ """

+ Adds an archive to a vault in a single operation. It's recommended for

+ archives less than 100MB

+ :type file: str

+ :param file: A filename to upload

+ :type description: str

+ :param description: A description for the archive.

+ :rtype: str

+ :return: The archive id of the newly created archive

+ """

+ with open(filename, 'rb') as fileobj:

+ linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)

+ fileobj.seek(0)

+ response = self.layer1.upload_archive(self.name, fileobj,

+ linear_hash, tree_hash,

+ description)

+ return response['ArchiveId']

+ def create_archive_writer(self, part_size=DefaultPartSize,

+ description=None):

+ """

+ Create a new archive and begin a multi-part upload to it.

+ Returns a file-like object to which the data for the archive

+ can be written. Once all the data is written the file-like

+ object should be closed, you can then call the get_archive_id

+ method on it to get the ID of the created archive.

+ :type part_size: int

+ :param part_size: The part size for the multipart upload.

+ :type description: str

+ :param description: An optional description for the archive.

+ :rtype: :class:`boto.glacier.writer.Writer`

+ :return: A Writer object that to which the archive data

+ should be written.

+ """

+ response = self.layer1.initiate_multipart_upload(self.name,

+ part_size,

+ description)

+ return Writer(self, response['UploadId'], part_size=part_size)

+ def create_archive_from_file(self, filename=None, file_obj=None,

+ description=None, upload_id_callback=None):

+ """

+ Create a new archive and upload the data from the given file

+ or file-like object.

+ :type filename: str

+ :param filename: A filename to upload

+ :type file_obj: file

+ :param file_obj: A file-like object to upload

+ :type description: str

+ :param description: An optional description for the archive.

+ :type upload_id_callback: function

+ :param upload_id_callback: if set, call with the upload_id as the

+ only parameter when it becomes known, to enable future calls

+ to resume_archive_from_file in case resume is needed.

+ :rtype: str

+ :return: The archive id of the newly created archive

+ """

+ part_size = self.DefaultPartSize

+ if not file_obj:

+ file_size = os.path.getsize(filename)

+ try:

+ min_part_size = minimum_part_size(file_size,

+ self.DefaultPartSize)

+ except ValueError:

+ raise UploadArchiveError("File size of %s bytes exceeds "

+ "40,000 GB archive limit of Glacier.")

+ file_obj = open(filename, "rb")

+ writer = self.create_archive_writer(

+ description=description,

+ part_size=part_size)

+ if upload_id_callback:

+ upload_id_callback(writer.upload_id)

+ while True:

+ data = file_obj.read(part_size)

+ if not data:

+ break

+ writer.write(data)

+ writer.close()

+ return writer.get_archive_id()

+ @staticmethod

+ def _range_string_to_part_index(range_string, part_size):

+ start, inside_end = [int(value) for value in range_string.split('-')]

+ end = inside_end + 1

+ length = end - start

+ if length == part_size + 1:

+ # Off-by-one bug in Amazon's Glacier implementation,

+ # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866

+ # Workaround: since part_size is too big by one byte, adjust it

+ end -= 1

+ inside_end -= 1

+ length -= 1

+ assert not (start % part_size), (

+ "upload part start byte is not on a part boundary")

+ assert (length <= part_size), "upload part is bigger than part size"

+ return start // part_size

+ def resume_archive_from_file(self, upload_id, filename=None,

+ file_obj=None):

+ """Resume upload of a file already part-uploaded to Glacier.

+ The resumption of an upload where the part-uploaded section is empty

+ is a valid degenerate case that this function can handle.

+ One and only one of filename or file_obj must be specified.

+ :type upload_id: str

+ :param upload_id: existing Glacier upload id of upload being resumed.

+ :type filename: str

+ :param filename: file to open for resume

+ :type fobj: file

+ :param fobj: file-like object containing local data to resume. This

+ must read from the start of the entire upload, not just from the

+ point being resumed. Use fobj.seek(0) to achieve this if necessary.

+ :rtype: str

+ :return: The archive id of the newly created archive

+ """

+ part_list_response = self.list_all_parts(upload_id)

+ part_size = part_list_response['PartSizeInBytes']

+ part_hash_map = {}

+ for part_desc in part_list_response['Parts']:

+ part_index = self._range_string_to_part_index(

+ part_desc['RangeInBytes'], part_size)

+ part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')

+ part_hash_map[part_index] = part_tree_hash

+ if not file_obj:

+ file_obj = open(filename, "rb")

+ return resume_file_upload(

+ self, upload_id, part_size, file_obj, part_hash_map)

+ def concurrent_create_archive_from_file(self, filename, description):

+ """

+ Create a new archive from a file and upload the given

+ file.

+ This is a convenience method around the

+ :class:`boto.glacier.concurrent.ConcurrentUploader`

+ class. This method will perform a multipart upload

+ and upload the parts of the file concurrently.

+ :type filename: str

+ :param filename: A filename to upload

+ :raises: `boto.glacier.exception.UploadArchiveError` is an error

+ occurs during the upload process.

+ :rtype: str

+ :return: The archive id of the newly created archive

+ """

+ uploader = ConcurrentUploader(self.layer1, self.name)

+ archive_id = uploader.upload(filename, description)

+ return archive_id

+ def retrieve_archive(self, archive_id, sns_topic=None,

+ description=None):

+ """

+ Initiate a archive retrieval job to download the data from an

+ archive. You will need to wait for the notification from

+ Amazon (via SNS) before you can actually download the data,

+ this takes around 4 hours.

+ :type archive_id: str

+ :param archive_id: The id of the archive

+ :type description: str

+ :param description: An optional description for the job.

+ :type sns_topic: str

+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier

+ sends notification when the job is completed and the output

+ is ready for you to download.

+ :rtype: :class:`boto.glacier.job.Job`

+ :return: A Job object representing the retrieval job.

+ """

+ job_data = {'Type': 'archive-retrieval',

+ 'ArchiveId': archive_id}

+ if sns_topic is not None:

+ job_data['SNSTopic'] = sns_topic

+ if description is not None:

+ job_data['Description'] = description

+ response = self.layer1.initiate_job(self.name, job_data)

+ return self.get_job(response['JobId'])

+ def retrieve_inventory(self, sns_topic=None,

+ description=None):

+ """

+ Initiate a inventory retrieval job to list the items in the

+ vault. You will need to wait for the notification from

+ Amazon (via SNS) before you can actually download the data,

+ this takes around 4 hours.

+ :type description: str

+ :param description: An optional description for the job.

+ :type sns_topic: str

+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier

+ sends notification when the job is completed and the output

+ is ready for you to download.

+ :rtype: :class:`boto.glacier.job.Job`

+ :return: A Job object representing the retrieval job.

+ """

+ job_data = {'Type': 'inventory-retrieval'}

+ if sns_topic is not None:

+ job_data['SNSTopic'] = sns_topic

+ if description is not None:

+ job_data['Description'] = description

+ response = self.layer1.initiate_job(self.name, job_data)

+ return response['JobId']

+ def delete_archive(self, archive_id):

+ """

+ This operation deletes an archive from the vault.

+ :type archive_id: str

+ :param archive_id: The ID for the archive to be deleted.

+ """

+ return self.layer1.delete_archive(self.name, archive_id)

+ def get_job(self, job_id):

+ """

+ Get an object representing a job in progress.

+ :type job_id: str

+ :param job_id: The ID of the job

+ :rtype: :class:`boto.glacier.job.Job`

+ :return: A Job object representing the job.

+ """

+ response_data = self.layer1.describe_job(self.name, job_id)

+ return Job(self, response_data)

+ def list_jobs(self, completed=None, status_code=None):

+ """

+ Return a list of Job objects related to this vault.

+ :type completed: boolean

+ :param completed: Specifies the state of the jobs to return.

+ If a value of True is passed, only completed jobs will

+ be returned. If a value of False is passed, only

+ uncompleted jobs will be returned. If no value is

+ passed, all jobs will be returned.

+ :type status_code: string

+ :param status_code: Specifies the type of job status to return.

+ Valid values are: InProgress|Succeeded|Failed. If not

+ specified, jobs with all status codes are returned.

+ :rtype: list of :class:`boto.glacier.job.Job`

+ :return: A list of Job objects related to this vault.

+ """

+ response_data = self.layer1.list_jobs(self.name, completed,

+ status_code)

+ return [Job(self, jd) for jd in response_data['JobList']]

+ def list_all_parts(self, upload_id):

+ """Automatically make and combine multiple calls to list_parts.

+ Call list_parts as necessary, combining the results in case multiple

+ calls were required to get data on all available parts.

+ """

+ result = self.layer1.list_parts(self.name, upload_id)

+ marker = result['Marker']

+ while marker:

+ additional_result = self.layer1.list_parts(

+ self.name, upload_id, marker=marker)

+ result['Parts'].extend(additional_result['Parts'])

+ marker = additional_result['Marker']

+ # The marker makes no sense in an unpaginated result, and clearing it

+ # makes testing easier. This also has the nice property that the result

+ # is a normal (but expanded) response.

+ result['Marker'] = None

+ return result