Index: third_party/gsutil/boto/boto/glacier/vault.py |
diff --git a/third_party/gsutil/boto/boto/glacier/vault.py b/third_party/gsutil/boto/boto/glacier/vault.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e037adc700ab7d72594d060f436ec891253daf12 |
--- /dev/null |
+++ b/third_party/gsutil/boto/boto/glacier/vault.py |
@@ -0,0 +1,387 @@ |
+# -*- coding: utf-8 -*- |
+# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ |
+# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk> |
+# |
+# Permission is hereby granted, free of charge, to any person obtaining a |
+# copy of this software and associated documentation files (the |
+# "Software"), to deal in the Software without restriction, including |
+# without limitation the rights to use, copy, modify, merge, publish, dis- |
+# tribute, sublicense, and/or sell copies of the Software, and to permit |
+# persons to whom the Software is furnished to do so, subject to the fol- |
+# lowing conditions: |
+# |
+# The above copyright notice and this permission notice shall be included |
+# in all copies or substantial portions of the Software. |
+# |
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
+# IN THE SOFTWARE. |
+# |
+from __future__ import with_statement |
+from .exceptions import UploadArchiveError |
+from .job import Job |
+from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer |
+from .concurrent import ConcurrentUploader |
+from .utils import minimum_part_size, DEFAULT_PART_SIZE |
+import os.path |
+ |
+ |
+_MEGABYTE = 1024 * 1024 |
+_GIGABYTE = 1024 * _MEGABYTE |
+ |
+MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE |
+MAXIMUM_NUMBER_OF_PARTS = 10000 |
+ |
+ |
+class Vault(object): |
+ |
+ DefaultPartSize = DEFAULT_PART_SIZE |
+ SingleOperationThreshold = 100 * _MEGABYTE |
+ |
+ ResponseDataElements = (('VaultName', 'name', None), |
+ ('VaultARN', 'arn', None), |
+ ('CreationDate', 'creation_date', None), |
+ ('LastInventoryDate', 'last_inventory_date', None), |
+ ('SizeInBytes', 'size', 0), |
+ ('NumberOfArchives', 'number_of_archives', 0)) |
+ |
+ def __init__(self, layer1, response_data=None): |
+ self.layer1 = layer1 |
+ if response_data: |
+ for response_name, attr_name, default in self.ResponseDataElements: |
+ value = response_data[response_name] |
+ if isinstance(value, unicode): |
+ value = value.encode('utf8') |
+ setattr(self, attr_name, value) |
+ else: |
+ for response_name, attr_name, default in self.ResponseDataElements: |
+ setattr(self, attr_name, default) |
+ |
+ def __repr__(self): |
+ return 'Vault("%s")' % self.arn |
+ |
+ def delete(self): |
+ """ |
+ Delete's this vault. WARNING! |
+ """ |
+ self.layer1.delete_vault(self.name) |
+ |
+ def upload_archive(self, filename, description=None): |
+ """ |
+ Adds an archive to a vault. For archives greater than 100MB the |
+ multipart upload will be used. |
+ |
+ :type file: str |
+ :param file: A filename to upload |
+ |
+ :type description: str |
+ :param description: An optional description for the archive. |
+ |
+ :rtype: str |
+ :return: The archive id of the newly created archive |
+ """ |
+ if os.path.getsize(filename) > self.SingleOperationThreshold: |
+ return self.create_archive_from_file(filename, description=description) |
+ return self._upload_archive_single_operation(filename, description) |
+ |
+ def _upload_archive_single_operation(self, filename, description): |
+ """ |
+ Adds an archive to a vault in a single operation. It's recommended for |
+ archives less than 100MB |
+ |
+ :type file: str |
+ :param file: A filename to upload |
+ |
+ :type description: str |
+ :param description: A description for the archive. |
+ |
+ :rtype: str |
+ :return: The archive id of the newly created archive |
+ """ |
+ with open(filename, 'rb') as fileobj: |
+ linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) |
+ fileobj.seek(0) |
+ response = self.layer1.upload_archive(self.name, fileobj, |
+ linear_hash, tree_hash, |
+ description) |
+ return response['ArchiveId'] |
+ |
+ def create_archive_writer(self, part_size=DefaultPartSize, |
+ description=None): |
+ """ |
+ Create a new archive and begin a multi-part upload to it. |
+ Returns a file-like object to which the data for the archive |
+ can be written. Once all the data is written the file-like |
+ object should be closed, you can then call the get_archive_id |
+ method on it to get the ID of the created archive. |
+ |
+ :type part_size: int |
+ :param part_size: The part size for the multipart upload. |
+ |
+ :type description: str |
+ :param description: An optional description for the archive. |
+ |
+ :rtype: :class:`boto.glacier.writer.Writer` |
+ :return: A Writer object that to which the archive data |
+ should be written. |
+ """ |
+ response = self.layer1.initiate_multipart_upload(self.name, |
+ part_size, |
+ description) |
+ return Writer(self, response['UploadId'], part_size=part_size) |
+ |
+ def create_archive_from_file(self, filename=None, file_obj=None, |
+ description=None, upload_id_callback=None): |
+ """ |
+ Create a new archive and upload the data from the given file |
+ or file-like object. |
+ |
+ :type filename: str |
+ :param filename: A filename to upload |
+ |
+ :type file_obj: file |
+ :param file_obj: A file-like object to upload |
+ |
+ :type description: str |
+ :param description: An optional description for the archive. |
+ |
+ :type upload_id_callback: function |
+ :param upload_id_callback: if set, call with the upload_id as the |
+ only parameter when it becomes known, to enable future calls |
+ to resume_archive_from_file in case resume is needed. |
+ |
+ :rtype: str |
+ :return: The archive id of the newly created archive |
+ """ |
+ part_size = self.DefaultPartSize |
+ if not file_obj: |
+ file_size = os.path.getsize(filename) |
+ try: |
+ min_part_size = minimum_part_size(file_size, |
+ self.DefaultPartSize) |
+ except ValueError: |
+ raise UploadArchiveError("File size of %s bytes exceeds " |
+ "40,000 GB archive limit of Glacier.") |
+ file_obj = open(filename, "rb") |
+ writer = self.create_archive_writer( |
+ description=description, |
+ part_size=part_size) |
+ if upload_id_callback: |
+ upload_id_callback(writer.upload_id) |
+ while True: |
+ data = file_obj.read(part_size) |
+ if not data: |
+ break |
+ writer.write(data) |
+ writer.close() |
+ return writer.get_archive_id() |
+ |
+ @staticmethod |
+ def _range_string_to_part_index(range_string, part_size): |
+ start, inside_end = [int(value) for value in range_string.split('-')] |
+ end = inside_end + 1 |
+ length = end - start |
+ if length == part_size + 1: |
+ # Off-by-one bug in Amazon's Glacier implementation, |
+ # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866 |
+ # Workaround: since part_size is too big by one byte, adjust it |
+ end -= 1 |
+ inside_end -= 1 |
+ length -= 1 |
+ assert not (start % part_size), ( |
+ "upload part start byte is not on a part boundary") |
+ assert (length <= part_size), "upload part is bigger than part size" |
+ return start // part_size |
+ |
+ def resume_archive_from_file(self, upload_id, filename=None, |
+ file_obj=None): |
+ """Resume upload of a file already part-uploaded to Glacier. |
+ |
+ The resumption of an upload where the part-uploaded section is empty |
+ is a valid degenerate case that this function can handle. |
+ |
+ One and only one of filename or file_obj must be specified. |
+ |
+ :type upload_id: str |
+ :param upload_id: existing Glacier upload id of upload being resumed. |
+ |
+ :type filename: str |
+ :param filename: file to open for resume |
+ |
+ :type fobj: file |
+ :param fobj: file-like object containing local data to resume. This |
+ must read from the start of the entire upload, not just from the |
+ point being resumed. Use fobj.seek(0) to achieve this if necessary. |
+ |
+ :rtype: str |
+ :return: The archive id of the newly created archive |
+ |
+ """ |
+ part_list_response = self.list_all_parts(upload_id) |
+ part_size = part_list_response['PartSizeInBytes'] |
+ |
+ part_hash_map = {} |
+ for part_desc in part_list_response['Parts']: |
+ part_index = self._range_string_to_part_index( |
+ part_desc['RangeInBytes'], part_size) |
+ part_tree_hash = part_desc['SHA256TreeHash'].decode('hex') |
+ part_hash_map[part_index] = part_tree_hash |
+ |
+ if not file_obj: |
+ file_obj = open(filename, "rb") |
+ |
+ return resume_file_upload( |
+ self, upload_id, part_size, file_obj, part_hash_map) |
+ |
+ def concurrent_create_archive_from_file(self, filename, description): |
+ """ |
+ Create a new archive from a file and upload the given |
+ file. |
+ |
+ This is a convenience method around the |
+ :class:`boto.glacier.concurrent.ConcurrentUploader` |
+ class. This method will perform a multipart upload |
+ and upload the parts of the file concurrently. |
+ |
+ :type filename: str |
+ :param filename: A filename to upload |
+ |
+ :raises: `boto.glacier.exception.UploadArchiveError` is an error |
+ occurs during the upload process. |
+ |
+ :rtype: str |
+ :return: The archive id of the newly created archive |
+ |
+ """ |
+ uploader = ConcurrentUploader(self.layer1, self.name) |
+ archive_id = uploader.upload(filename, description) |
+ return archive_id |
+ |
+ def retrieve_archive(self, archive_id, sns_topic=None, |
+ description=None): |
+ """ |
+ Initiate a archive retrieval job to download the data from an |
+ archive. You will need to wait for the notification from |
+ Amazon (via SNS) before you can actually download the data, |
+ this takes around 4 hours. |
+ |
+ :type archive_id: str |
+ :param archive_id: The id of the archive |
+ |
+ :type description: str |
+ :param description: An optional description for the job. |
+ |
+ :type sns_topic: str |
+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier |
+ sends notification when the job is completed and the output |
+ is ready for you to download. |
+ |
+ :rtype: :class:`boto.glacier.job.Job` |
+ :return: A Job object representing the retrieval job. |
+ """ |
+ job_data = {'Type': 'archive-retrieval', |
+ 'ArchiveId': archive_id} |
+ if sns_topic is not None: |
+ job_data['SNSTopic'] = sns_topic |
+ if description is not None: |
+ job_data['Description'] = description |
+ |
+ response = self.layer1.initiate_job(self.name, job_data) |
+ return self.get_job(response['JobId']) |
+ |
+ def retrieve_inventory(self, sns_topic=None, |
+ description=None): |
+ """ |
+ Initiate a inventory retrieval job to list the items in the |
+ vault. You will need to wait for the notification from |
+ Amazon (via SNS) before you can actually download the data, |
+ this takes around 4 hours. |
+ |
+ :type description: str |
+ :param description: An optional description for the job. |
+ |
+ :type sns_topic: str |
+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier |
+ sends notification when the job is completed and the output |
+ is ready for you to download. |
+ |
+ :rtype: :class:`boto.glacier.job.Job` |
+ :return: A Job object representing the retrieval job. |
+ """ |
+ job_data = {'Type': 'inventory-retrieval'} |
+ if sns_topic is not None: |
+ job_data['SNSTopic'] = sns_topic |
+ if description is not None: |
+ job_data['Description'] = description |
+ |
+ response = self.layer1.initiate_job(self.name, job_data) |
+ return response['JobId'] |
+ |
+ def delete_archive(self, archive_id): |
+ """ |
+ This operation deletes an archive from the vault. |
+ |
+ :type archive_id: str |
+ :param archive_id: The ID for the archive to be deleted. |
+ """ |
+ return self.layer1.delete_archive(self.name, archive_id) |
+ |
+ def get_job(self, job_id): |
+ """ |
+ Get an object representing a job in progress. |
+ |
+ :type job_id: str |
+ :param job_id: The ID of the job |
+ |
+ :rtype: :class:`boto.glacier.job.Job` |
+ :return: A Job object representing the job. |
+ """ |
+ response_data = self.layer1.describe_job(self.name, job_id) |
+ return Job(self, response_data) |
+ |
+ def list_jobs(self, completed=None, status_code=None): |
+ """ |
+ Return a list of Job objects related to this vault. |
+ |
+ :type completed: boolean |
+ :param completed: Specifies the state of the jobs to return. |
+ If a value of True is passed, only completed jobs will |
+ be returned. If a value of False is passed, only |
+ uncompleted jobs will be returned. If no value is |
+ passed, all jobs will be returned. |
+ |
+ :type status_code: string |
+ :param status_code: Specifies the type of job status to return. |
+ Valid values are: InProgress|Succeeded|Failed. If not |
+ specified, jobs with all status codes are returned. |
+ |
+ :rtype: list of :class:`boto.glacier.job.Job` |
+ :return: A list of Job objects related to this vault. |
+ """ |
+ response_data = self.layer1.list_jobs(self.name, completed, |
+ status_code) |
+ return [Job(self, jd) for jd in response_data['JobList']] |
+ |
+ def list_all_parts(self, upload_id): |
+ """Automatically make and combine multiple calls to list_parts. |
+ |
+ Call list_parts as necessary, combining the results in case multiple |
+ calls were required to get data on all available parts. |
+ |
+ """ |
+ result = self.layer1.list_parts(self.name, upload_id) |
+ marker = result['Marker'] |
+ while marker: |
+ additional_result = self.layer1.list_parts( |
+ self.name, upload_id, marker=marker) |
+ result['Parts'].extend(additional_result['Parts']) |
+ marker = additional_result['Marker'] |
+ # The marker makes no sense in an unpaginated result, and clearing it |
+ # makes testing easier. This also has the nice property that the result |
+ # is a normal (but expanded) response. |
+ result['Marker'] = None |
+ return result |