Index: third_party/gsutil/boto/boto/glacier/utils.py |
diff --git a/third_party/gsutil/boto/boto/glacier/utils.py b/third_party/gsutil/boto/boto/glacier/utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..af779f5cc30c829a24dd33e2f389184d5b1496ad |
--- /dev/null |
+++ b/third_party/gsutil/boto/boto/glacier/utils.py |
@@ -0,0 +1,163 @@ |
+# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved |
+# |
+# Permission is hereby granted, free of charge, to any person obtaining a |
+# copy of this software and associated documentation files (the |
+# "Software"), to deal in the Software without restriction, including |
+# without limitation the rights to use, copy, modify, merge, publish, dis- |
+# tribute, sublicense, and/or sell copies of the Software, and to permit |
+# persons to whom the Software is furnished to do so, subject to the fol- |
+# lowing conditions: |
+# |
+# The above copyright notice and this permission notice shall be included |
+# in all copies or substantial portions of the Software. |
+# |
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
+# IN THE SOFTWARE. |
+# |
+import hashlib |
+import math |
+ |
+ |
+_MEGABYTE = 1024 * 1024 |
+DEFAULT_PART_SIZE = 4 * _MEGABYTE |
+MAXIMUM_NUMBER_OF_PARTS = 10000 |
+ |
+ |
+def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE): |
+ """Calculate the minimum part size needed for a multipart upload. |
+ |
+ Glacier allows a maximum of 10,000 parts per upload. It also |
+ states that the maximum archive size is 10,000 * 4 GB, which means |
+ the part size can range from 1MB to 4GB (provided it is one 1MB |
+ multiplied by a power of 2). |
+ |
+ This function will compute what the minimum part size must be in |
+ order to upload a file of size ``size_in_bytes``. |
+ |
+ It will first check if ``default_part_size`` is sufficient for |
+ a part size given the ``size_in_bytes``. If this is not the case, |
+ then the smallest part size than can accomodate a file of size |
+ ``size_in_bytes`` will be returned. |
+ |
+ If the file size is greater than the maximum allowed archive |
+ size of 10,000 * 4GB, a ``ValueError`` will be raised. |
+ |
+ """ |
+ # The default part size (4 MB) will be too small for a very large |
+ # archive, as there is a limit of 10,000 parts in a multipart upload. |
+ # This puts the maximum allowed archive size with the default part size |
+ # at 40,000 MB. We need to do a sanity check on the part size, and find |
+ # one that works if the default is too small. |
+ part_size = _MEGABYTE |
+ if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes: |
+ if size_in_bytes > (4096 * _MEGABYTE * 10000): |
+ raise ValueError("File size too large: %s" % size_in_bytes) |
+ min_part_size = size_in_bytes / 10000 |
+ power = 3 |
+ while part_size < min_part_size: |
+ part_size = math.ldexp(_MEGABYTE, power) |
+ power += 1 |
+ part_size = int(part_size) |
+ else: |
+ part_size = default_part_size |
+ return part_size |
+ |
+ |
+def chunk_hashes(bytestring, chunk_size=_MEGABYTE): |
+ chunk_count = int(math.ceil(len(bytestring) / float(chunk_size))) |
+ hashes = [] |
+ for i in xrange(chunk_count): |
+ start = i * chunk_size |
+ end = (i + 1) * chunk_size |
+ hashes.append(hashlib.sha256(bytestring[start:end]).digest()) |
+ if not hashes: |
+ return [hashlib.sha256('').digest()] |
+ return hashes |
+ |
+ |
+def tree_hash(fo): |
+ """ |
+ Given a hash of each 1MB chunk (from chunk_hashes) this will hash |
+ together adjacent hashes until it ends up with one big one. So a |
+ tree of hashes. |
+ """ |
+ hashes = [] |
+ hashes.extend(fo) |
+ while len(hashes) > 1: |
+ new_hashes = [] |
+ while True: |
+ if len(hashes) > 1: |
+ first = hashes.pop(0) |
+ second = hashes.pop(0) |
+ new_hashes.append(hashlib.sha256(first + second).digest()) |
+ elif len(hashes) == 1: |
+ only = hashes.pop(0) |
+ new_hashes.append(only) |
+ else: |
+ break |
+ hashes.extend(new_hashes) |
+ return hashes[0] |
+ |
+ |
+def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): |
+ """Compute the linear and tree hash from a fileobj. |
+ |
+ This function will compute the linear/tree hash of a fileobj |
+ in a single pass through the fileobj. |
+ |
+ :param fileobj: A file like object. |
+ |
+ :param chunk_size: The size of the chunks to use for the tree |
+ hash. This is also the buffer size used to read from |
+ `fileobj`. |
+ |
+ :rtype: tuple |
+ :return: A tuple of (linear_hash, tree_hash). Both hashes |
+ are returned in hex. |
+ |
+ """ |
+ linear_hash = hashlib.sha256() |
+ chunks = [] |
+ chunk = fileobj.read(chunk_size) |
+ while chunk: |
+ linear_hash.update(chunk) |
+ chunks.append(hashlib.sha256(chunk).digest()) |
+ chunk = fileobj.read(chunk_size) |
+ if not chunks: |
+ chunks = [hashlib.sha256('').digest()] |
+ return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks)) |
+ |
+ |
+def bytes_to_hex(str_as_bytes): |
+ return ''.join(["%02x" % ord(x) for x in str_as_bytes]).strip() |
+ |
+ |
+def tree_hash_from_str(str_as_bytes): |
+ """ |
+ |
+ :type str_as_bytes: str |
+ :param str_as_bytes: The string for which to compute the tree hash. |
+ |
+ :rtype: str |
+ :return: The computed tree hash, returned as hex. |
+ |
+ """ |
+ return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes))) |
+ |
+ |
+class ResettingFileSender(object): |
+ def __init__(self, archive): |
+ self._archive = archive |
+ self._starting_offset = archive.tell() |
+ |
+ def __call__(self, connection, method, path, body, headers): |
+ try: |
+ connection.request(method, path, self._archive, headers) |
+ return connection.getresponse() |
+ finally: |
+ self._archive.seek(self._starting_offset) |