Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Unified Diff: third_party/gsutil/boto/boto/glacier/vault.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/gsutil/boto/boto/glacier/vault.py
diff --git a/third_party/gsutil/boto/boto/glacier/vault.py b/third_party/gsutil/boto/boto/glacier/vault.py
new file mode 100644
index 0000000000000000000000000000000000000000..e037adc700ab7d72594d060f436ec891253daf12
--- /dev/null
+++ b/third_party/gsutil/boto/boto/glacier/vault.py
@@ -0,0 +1,387 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
+# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+from __future__ import with_statement
+from .exceptions import UploadArchiveError
+from .job import Job
+from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer
+from .concurrent import ConcurrentUploader
+from .utils import minimum_part_size, DEFAULT_PART_SIZE
+import os.path
+
+
+_MEGABYTE = 1024 * 1024
+_GIGABYTE = 1024 * _MEGABYTE
+
+MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE
+MAXIMUM_NUMBER_OF_PARTS = 10000
+
+
+class Vault(object):
+
+ DefaultPartSize = DEFAULT_PART_SIZE
+ SingleOperationThreshold = 100 * _MEGABYTE
+
+ ResponseDataElements = (('VaultName', 'name', None),
+ ('VaultARN', 'arn', None),
+ ('CreationDate', 'creation_date', None),
+ ('LastInventoryDate', 'last_inventory_date', None),
+ ('SizeInBytes', 'size', 0),
+ ('NumberOfArchives', 'number_of_archives', 0))
+
+ def __init__(self, layer1, response_data=None):
+ self.layer1 = layer1
+ if response_data:
+ for response_name, attr_name, default in self.ResponseDataElements:
+ value = response_data[response_name]
+ if isinstance(value, unicode):
+ value = value.encode('utf8')
+ setattr(self, attr_name, value)
+ else:
+ for response_name, attr_name, default in self.ResponseDataElements:
+ setattr(self, attr_name, default)
+
+ def __repr__(self):
+ return 'Vault("%s")' % self.arn
+
+ def delete(self):
+ """
+ Delete's this vault. WARNING!
+ """
+ self.layer1.delete_vault(self.name)
+
+ def upload_archive(self, filename, description=None):
+ """
+ Adds an archive to a vault. For archives greater than 100MB the
+ multipart upload will be used.
+
+ :type file: str
+ :param file: A filename to upload
+
+ :type description: str
+ :param description: An optional description for the archive.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+ """
+ if os.path.getsize(filename) > self.SingleOperationThreshold:
+ return self.create_archive_from_file(filename, description=description)
+ return self._upload_archive_single_operation(filename, description)
+
+ def _upload_archive_single_operation(self, filename, description):
+ """
+ Adds an archive to a vault in a single operation. It's recommended for
+ archives less than 100MB
+
+ :type file: str
+ :param file: A filename to upload
+
+ :type description: str
+ :param description: A description for the archive.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+ """
+ with open(filename, 'rb') as fileobj:
+ linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)
+ fileobj.seek(0)
+ response = self.layer1.upload_archive(self.name, fileobj,
+ linear_hash, tree_hash,
+ description)
+ return response['ArchiveId']
+
+ def create_archive_writer(self, part_size=DefaultPartSize,
+ description=None):
+ """
+ Create a new archive and begin a multi-part upload to it.
+ Returns a file-like object to which the data for the archive
+ can be written. Once all the data is written the file-like
+ object should be closed, you can then call the get_archive_id
+ method on it to get the ID of the created archive.
+
+ :type part_size: int
+ :param part_size: The part size for the multipart upload.
+
+ :type description: str
+ :param description: An optional description for the archive.
+
+ :rtype: :class:`boto.glacier.writer.Writer`
+ :return: A Writer object that to which the archive data
+ should be written.
+ """
+ response = self.layer1.initiate_multipart_upload(self.name,
+ part_size,
+ description)
+ return Writer(self, response['UploadId'], part_size=part_size)
+
+ def create_archive_from_file(self, filename=None, file_obj=None,
+ description=None, upload_id_callback=None):
+ """
+ Create a new archive and upload the data from the given file
+ or file-like object.
+
+ :type filename: str
+ :param filename: A filename to upload
+
+ :type file_obj: file
+ :param file_obj: A file-like object to upload
+
+ :type description: str
+ :param description: An optional description for the archive.
+
+ :type upload_id_callback: function
+ :param upload_id_callback: if set, call with the upload_id as the
+ only parameter when it becomes known, to enable future calls
+ to resume_archive_from_file in case resume is needed.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+ """
+ part_size = self.DefaultPartSize
+ if not file_obj:
+ file_size = os.path.getsize(filename)
+ try:
+ min_part_size = minimum_part_size(file_size,
+ self.DefaultPartSize)
+ except ValueError:
+ raise UploadArchiveError("File size of %s bytes exceeds "
+ "40,000 GB archive limit of Glacier.")
+ file_obj = open(filename, "rb")
+ writer = self.create_archive_writer(
+ description=description,
+ part_size=part_size)
+ if upload_id_callback:
+ upload_id_callback(writer.upload_id)
+ while True:
+ data = file_obj.read(part_size)
+ if not data:
+ break
+ writer.write(data)
+ writer.close()
+ return writer.get_archive_id()
+
+ @staticmethod
+ def _range_string_to_part_index(range_string, part_size):
+ start, inside_end = [int(value) for value in range_string.split('-')]
+ end = inside_end + 1
+ length = end - start
+ if length == part_size + 1:
+ # Off-by-one bug in Amazon's Glacier implementation,
+ # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
+ # Workaround: since part_size is too big by one byte, adjust it
+ end -= 1
+ inside_end -= 1
+ length -= 1
+ assert not (start % part_size), (
+ "upload part start byte is not on a part boundary")
+ assert (length <= part_size), "upload part is bigger than part size"
+ return start // part_size
+
+ def resume_archive_from_file(self, upload_id, filename=None,
+ file_obj=None):
+ """Resume upload of a file already part-uploaded to Glacier.
+
+ The resumption of an upload where the part-uploaded section is empty
+ is a valid degenerate case that this function can handle.
+
+ One and only one of filename or file_obj must be specified.
+
+ :type upload_id: str
+ :param upload_id: existing Glacier upload id of upload being resumed.
+
+ :type filename: str
+ :param filename: file to open for resume
+
+ :type fobj: file
+ :param fobj: file-like object containing local data to resume. This
+ must read from the start of the entire upload, not just from the
+ point being resumed. Use fobj.seek(0) to achieve this if necessary.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+
+ """
+ part_list_response = self.list_all_parts(upload_id)
+ part_size = part_list_response['PartSizeInBytes']
+
+ part_hash_map = {}
+ for part_desc in part_list_response['Parts']:
+ part_index = self._range_string_to_part_index(
+ part_desc['RangeInBytes'], part_size)
+ part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')
+ part_hash_map[part_index] = part_tree_hash
+
+ if not file_obj:
+ file_obj = open(filename, "rb")
+
+ return resume_file_upload(
+ self, upload_id, part_size, file_obj, part_hash_map)
+
+ def concurrent_create_archive_from_file(self, filename, description):
+ """
+ Create a new archive from a file and upload the given
+ file.
+
+ This is a convenience method around the
+ :class:`boto.glacier.concurrent.ConcurrentUploader`
+ class. This method will perform a multipart upload
+ and upload the parts of the file concurrently.
+
+ :type filename: str
+ :param filename: A filename to upload
+
+ :raises: `boto.glacier.exception.UploadArchiveError` is an error
+ occurs during the upload process.
+
+ :rtype: str
+ :return: The archive id of the newly created archive
+
+ """
+ uploader = ConcurrentUploader(self.layer1, self.name)
+ archive_id = uploader.upload(filename, description)
+ return archive_id
+
+ def retrieve_archive(self, archive_id, sns_topic=None,
+ description=None):
+ """
+ Initiate a archive retrieval job to download the data from an
+ archive. You will need to wait for the notification from
+ Amazon (via SNS) before you can actually download the data,
+ this takes around 4 hours.
+
+ :type archive_id: str
+ :param archive_id: The id of the archive
+
+ :type description: str
+ :param description: An optional description for the job.
+
+ :type sns_topic: str
+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
+ sends notification when the job is completed and the output
+ is ready for you to download.
+
+ :rtype: :class:`boto.glacier.job.Job`
+ :return: A Job object representing the retrieval job.
+ """
+ job_data = {'Type': 'archive-retrieval',
+ 'ArchiveId': archive_id}
+ if sns_topic is not None:
+ job_data['SNSTopic'] = sns_topic
+ if description is not None:
+ job_data['Description'] = description
+
+ response = self.layer1.initiate_job(self.name, job_data)
+ return self.get_job(response['JobId'])
+
+ def retrieve_inventory(self, sns_topic=None,
+ description=None):
+ """
+ Initiate a inventory retrieval job to list the items in the
+ vault. You will need to wait for the notification from
+ Amazon (via SNS) before you can actually download the data,
+ this takes around 4 hours.
+
+ :type description: str
+ :param description: An optional description for the job.
+
+ :type sns_topic: str
+ :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
+ sends notification when the job is completed and the output
+ is ready for you to download.
+
+ :rtype: :class:`boto.glacier.job.Job`
+ :return: A Job object representing the retrieval job.
+ """
+ job_data = {'Type': 'inventory-retrieval'}
+ if sns_topic is not None:
+ job_data['SNSTopic'] = sns_topic
+ if description is not None:
+ job_data['Description'] = description
+
+ response = self.layer1.initiate_job(self.name, job_data)
+ return response['JobId']
+
+ def delete_archive(self, archive_id):
+ """
+ This operation deletes an archive from the vault.
+
+ :type archive_id: str
+ :param archive_id: The ID for the archive to be deleted.
+ """
+ return self.layer1.delete_archive(self.name, archive_id)
+
+ def get_job(self, job_id):
+ """
+ Get an object representing a job in progress.
+
+ :type job_id: str
+ :param job_id: The ID of the job
+
+ :rtype: :class:`boto.glacier.job.Job`
+ :return: A Job object representing the job.
+ """
+ response_data = self.layer1.describe_job(self.name, job_id)
+ return Job(self, response_data)
+
+ def list_jobs(self, completed=None, status_code=None):
+ """
+ Return a list of Job objects related to this vault.
+
+ :type completed: boolean
+ :param completed: Specifies the state of the jobs to return.
+ If a value of True is passed, only completed jobs will
+ be returned. If a value of False is passed, only
+ uncompleted jobs will be returned. If no value is
+ passed, all jobs will be returned.
+
+ :type status_code: string
+ :param status_code: Specifies the type of job status to return.
+ Valid values are: InProgress|Succeeded|Failed. If not
+ specified, jobs with all status codes are returned.
+
+ :rtype: list of :class:`boto.glacier.job.Job`
+ :return: A list of Job objects related to this vault.
+ """
+ response_data = self.layer1.list_jobs(self.name, completed,
+ status_code)
+ return [Job(self, jd) for jd in response_data['JobList']]
+
+ def list_all_parts(self, upload_id):
+ """Automatically make and combine multiple calls to list_parts.
+
+ Call list_parts as necessary, combining the results in case multiple
+ calls were required to get data on all available parts.
+
+ """
+ result = self.layer1.list_parts(self.name, upload_id)
+ marker = result['Marker']
+ while marker:
+ additional_result = self.layer1.list_parts(
+ self.name, upload_id, marker=marker)
+ result['Parts'].extend(additional_result['Parts'])
+ marker = additional_result['Marker']
+ # The marker makes no sense in an unpaginated result, and clearing it
+ # makes testing easier. This also has the nice property that the result
+ # is a normal (but expanded) response.
+ result['Marker'] = None
+ return result

Powered by Google App Engine
This is Rietveld 408576698