Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(345)

Unified Diff: third_party/gsutil/boto/boto/glacier/job.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/gsutil/boto/boto/glacier/job.py
diff --git a/third_party/gsutil/boto/boto/glacier/job.py b/third_party/gsutil/boto/boto/glacier/job.py
new file mode 100644
index 0000000000000000000000000000000000000000..c740174117e5e1f78c18749f4e4275ebbbc1b019
--- /dev/null
+++ b/third_party/gsutil/boto/boto/glacier/job.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+from __future__ import with_statement
+import math
+import socket
+
+from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError
+from .utils import tree_hash_from_str
+
+
+class Job(object):
+
+ DefaultPartSize = 4 * 1024 * 1024
+
+ ResponseDataElements = (('Action', 'action', None),
+ ('ArchiveId', 'archive_id', None),
+ ('ArchiveSizeInBytes', 'archive_size', 0),
+ ('Completed', 'completed', False),
+ ('CompletionDate', 'completion_date', None),
+ ('CreationDate', 'creation_date', None),
+ ('InventorySizeInBytes', 'inventory_size', 0),
+ ('JobDescription', 'description', None),
+ ('JobId', 'id', None),
+ ('SHA256TreeHash', 'sha256_treehash', None),
+ ('SNSTopic', 'sns_topic', None),
+ ('StatusCode', 'status_code', None),
+ ('StatusMessage', 'status_message', None),
+ ('VaultARN', 'arn', None))
+
+ def __init__(self, vault, response_data=None):
+ self.vault = vault
+ if response_data:
+ for response_name, attr_name, default in self.ResponseDataElements:
+ setattr(self, attr_name, response_data[response_name])
+ else:
+ for response_name, attr_name, default in self.ResponseDataElements:
+ setattr(self, attr_name, default)
+
+ def __repr__(self):
+ return 'Job(%s)' % self.arn
+
+ def get_output(self, byte_range=None, validate_checksum=False):
+ """
+ This operation downloads the output of the job. Depending on
+ the job type you specified when you initiated the job, the
+ output will be either the content of an archive or a vault
+ inventory.
+
+ You can download all the job output or download a portion of
+ the output by specifying a byte range. In the case of an
+ archive retrieval job, depending on the byte range you
+ specify, Amazon Glacier returns the checksum for the portion
+ of the data. You can compute the checksum on the client and
+ verify that the values match to ensure the portion you
+ downloaded is the correct data.
+
+ :type byte_range: tuple
+ :param range: A tuple of integer specifying the slice (in bytes)
+ of the archive you want to receive
+
+ :type validate_checksum: bool
+ :param validate_checksum: Specify whether or not to validate
+ the associate tree hash. If the response does not contain
+ a TreeHash, then no checksum will be verified.
+
+ """
+ response = self.vault.layer1.get_job_output(self.vault.name,
+ self.id,
+ byte_range)
+ if validate_checksum and 'TreeHash' in response:
+ data = response.read()
+ actual_tree_hash = tree_hash_from_str(data)
+ if response['TreeHash'] != actual_tree_hash:
+ raise TreeHashDoesNotMatchError(
+ "The calculated tree hash %s does not match the "
+ "expected tree hash %s for the byte range %s" % (
+ actual_tree_hash, response['TreeHash'], byte_range))
+ return response
+
+ def download_to_file(self, filename, chunk_size=DefaultPartSize,
+ verify_hashes=True, retry_exceptions=(socket.error,)):
+ """Download an archive to a file.
+
+ :type filename: str
+ :param filename: The name of the file where the archive
+ contents will be saved.
+
+ :type chunk_size: int
+ :param chunk_size: The chunk size to use when downloading
+ the archive.
+
+ :type verify_hashes: bool
+ :param verify_hashes: Indicates whether or not to verify
+ the tree hashes for each downloaded chunk.
+
+ """
+ num_chunks = int(math.ceil(self.archive_size / float(chunk_size)))
+ with open(filename, 'wb') as output_file:
+ self._download_to_fileob(output_file, num_chunks, chunk_size,
+ verify_hashes, retry_exceptions)
+
+ def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes,
+ retry_exceptions):
+ for i in xrange(num_chunks):
+ byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1)
+ data, expected_tree_hash = self._download_byte_range(
+ byte_range, retry_exceptions)
+ if verify_hashes:
+ actual_tree_hash = tree_hash_from_str(data)
+ if expected_tree_hash != actual_tree_hash:
+ raise TreeHashDoesNotMatchError(
+ "The calculated tree hash %s does not match the "
+ "expected tree hash %s for the byte range %s" % (
+ actual_tree_hash, expected_tree_hash, byte_range))
+ fileobj.write(data)
+
+ def _download_byte_range(self, byte_range, retry_exceptions):
+ # You can occasionally get socket.errors when downloading
+ # chunks from Glacier, so each chunk can be retried up
+ # to 5 times.
+ for _ in xrange(5):
+ try:
+ response = self.get_output(byte_range)
+ data = response.read()
+ expected_tree_hash = response['TreeHash']
+ return data, expected_tree_hash
+ except retry_exceptions, e:
+ continue
+ else:
+ raise DownloadArchiveError("There was an error downloading"
+ "byte range %s: %s" % (byte_range,
+ e))

Powered by Google App Engine
This is Rietveld 408576698