third_party/gsutil/boto/boto/glacier/job.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Unified Diff: third_party/gsutil/boto/boto/glacier/job.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« third_party/gsutil/README ('K') | « third_party/gsutil/boto/boto/glacier/exceptions.py ('k') | third_party/gsutil/boto/boto/glacier/layer1.py » ('j') | third_party/gsutil/boto/tests/__init__.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/gsutil/boto/boto/glacier/job.py

diff --git a/third_party/gsutil/boto/boto/glacier/job.py b/third_party/gsutil/boto/boto/glacier/job.py

new file mode 100644

index 0000000000000000000000000000000000000000..c740174117e5e1f78c18749f4e4275ebbbc1b019

--- /dev/null

+++ b/third_party/gsutil/boto/boto/glacier/job.py

@@ -0,0 +1,152 @@

+# -*- coding: utf-8 -*-

+# Permission is hereby granted, free of charge, to any person obtaining a

+# copy of this software and associated documentation files (the

+# "Software"), to deal in the Software without restriction, including

+# without limitation the rights to use, copy, modify, merge, publish, dis-

+# tribute, sublicense, and/or sell copies of the Software, and to permit

+# persons to whom the Software is furnished to do so, subject to the fol-

+# lowing conditions:

+# The above copyright notice and this permission notice shall be included

+# in all copies or substantial portions of the Software.

+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

+# IN THE SOFTWARE.

+from __future__ import with_statement

+import math

+import socket

+from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError

+from .utils import tree_hash_from_str

+class Job(object):

+ DefaultPartSize = 4 * 1024 * 1024

+ ResponseDataElements = (('Action', 'action', None),

+ ('ArchiveId', 'archive_id', None),

+ ('ArchiveSizeInBytes', 'archive_size', 0),

+ ('Completed', 'completed', False),

+ ('CompletionDate', 'completion_date', None),

+ ('CreationDate', 'creation_date', None),

+ ('InventorySizeInBytes', 'inventory_size', 0),

+ ('JobDescription', 'description', None),

+ ('JobId', 'id', None),

+ ('SHA256TreeHash', 'sha256_treehash', None),

+ ('SNSTopic', 'sns_topic', None),

+ ('StatusCode', 'status_code', None),

+ ('StatusMessage', 'status_message', None),

+ ('VaultARN', 'arn', None))

+ def __init__(self, vault, response_data=None):

+ self.vault = vault

+ if response_data:

+ for response_name, attr_name, default in self.ResponseDataElements:

+ setattr(self, attr_name, response_data[response_name])

+ else:

+ for response_name, attr_name, default in self.ResponseDataElements:

+ setattr(self, attr_name, default)

+ def __repr__(self):

+ return 'Job(%s)' % self.arn

+ def get_output(self, byte_range=None, validate_checksum=False):

+ """

+ This operation downloads the output of the job. Depending on

+ the job type you specified when you initiated the job, the

+ output will be either the content of an archive or a vault

+ inventory.

+ You can download all the job output or download a portion of

+ the output by specifying a byte range. In the case of an

+ archive retrieval job, depending on the byte range you

+ specify, Amazon Glacier returns the checksum for the portion

+ of the data. You can compute the checksum on the client and

+ verify that the values match to ensure the portion you

+ downloaded is the correct data.

+ :type byte_range: tuple

+ :param range: A tuple of integer specifying the slice (in bytes)

+ of the archive you want to receive

+ :type validate_checksum: bool

+ :param validate_checksum: Specify whether or not to validate

+ the associate tree hash. If the response does not contain

+ a TreeHash, then no checksum will be verified.

+ """

+ response = self.vault.layer1.get_job_output(self.vault.name,

+ self.id,

+ byte_range)

+ if validate_checksum and 'TreeHash' in response:

+ data = response.read()

+ actual_tree_hash = tree_hash_from_str(data)

+ if response['TreeHash'] != actual_tree_hash:

+ raise TreeHashDoesNotMatchError(

+ "The calculated tree hash %s does not match the "

+ "expected tree hash %s for the byte range %s" % (

+ actual_tree_hash, response['TreeHash'], byte_range))

+ return response

+ def download_to_file(self, filename, chunk_size=DefaultPartSize,

+ verify_hashes=True, retry_exceptions=(socket.error,)):

+ """Download an archive to a file.

+ :type filename: str

+ :param filename: The name of the file where the archive

+ contents will be saved.

+ :type chunk_size: int

+ :param chunk_size: The chunk size to use when downloading

+ the archive.

+ :type verify_hashes: bool

+ :param verify_hashes: Indicates whether or not to verify

+ the tree hashes for each downloaded chunk.

+ """

+ num_chunks = int(math.ceil(self.archive_size / float(chunk_size)))

+ with open(filename, 'wb') as output_file:

+ self._download_to_fileob(output_file, num_chunks, chunk_size,

+ verify_hashes, retry_exceptions)

+ def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes,

+ retry_exceptions):

+ for i in xrange(num_chunks):

+ byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1)

+ data, expected_tree_hash = self._download_byte_range(

+ byte_range, retry_exceptions)

+ if verify_hashes:

+ actual_tree_hash = tree_hash_from_str(data)

+ if expected_tree_hash != actual_tree_hash:

+ raise TreeHashDoesNotMatchError(

+ "The calculated tree hash %s does not match the "

+ "expected tree hash %s for the byte range %s" % (

+ actual_tree_hash, expected_tree_hash, byte_range))

+ fileobj.write(data)

+ def _download_byte_range(self, byte_range, retry_exceptions):

+ # You can occasionally get socket.errors when downloading

+ # chunks from Glacier, so each chunk can be retried up

+ # to 5 times.

+ for _ in xrange(5):

+ try:

+ response = self.get_output(byte_range)

+ data = response.read()

+ expected_tree_hash = response['TreeHash']

+ return data, expected_tree_hash

+ except retry_exceptions, e:

+ continue

+ else:

+ raise DownloadArchiveError("There was an error downloading"

+ "byte range %s: %s" % (byte_range,

+ e))