Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: third_party/gsutil/boto/glacier/utils.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish, dis-
7 # tribute, sublicense, and/or sell copies of the Software, and to permit
8 # persons to whom the Software is furnished to do so, subject to the fol-
9 # lowing conditions:
10 #
11 # The above copyright notice and this permission notice shall be included
12 # in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
21 #
22 import hashlib
23 import math
24
25
26 _MEGABYTE = 1024 * 1024
27 DEFAULT_PART_SIZE = 4 * _MEGABYTE
28 MAXIMUM_NUMBER_OF_PARTS = 10000
29
30
31 def minimum_part_size(size_in_bytes):
32 # The default part size (4 MB) will be too small for a very large
33 # archive, as there is a limit of 10,000 parts in a multipart upload.
34 # This puts the maximum allowed archive size with the default part size
35 # at 40,000 MB. We need to do a sanity check on the part size, and find
36 # one that works if the default is too small.
37 part_size = _MEGABYTE
38 if (DEFAULT_PART_SIZE * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes:
39 if size_in_bytes > (4096 * _MEGABYTE * 10000):
40 raise ValueError("File size too large: %s" % size_in_bytes)
41 min_part_size = size_in_bytes / 10000
42 power = 3
43 while part_size < min_part_size:
44 part_size = math.ldexp(_MEGABYTE, power)
45 power += 1
46 part_size = int(part_size)
47 else:
48 part_size = DEFAULT_PART_SIZE
49 return part_size
50
51
52 def chunk_hashes(bytestring, chunk_size=_MEGABYTE):
53 chunk_count = int(math.ceil(len(bytestring) / float(chunk_size)))
54 hashes = []
55 for i in xrange(chunk_count):
56 start = i * chunk_size
57 end = (i + 1) * chunk_size
58 hashes.append(hashlib.sha256(bytestring[start:end]).digest())
59 return hashes
60
61
62 def tree_hash(fo):
63 """
64 Given a hash of each 1MB chunk (from chunk_hashes) this will hash
65 together adjacent hashes until it ends up with one big one. So a
66 tree of hashes.
67 """
68 hashes = []
69 hashes.extend(fo)
70 while len(hashes) > 1:
71 new_hashes = []
72 while True:
73 if len(hashes) > 1:
74 first = hashes.pop(0)
75 second = hashes.pop(0)
76 new_hashes.append(hashlib.sha256(first + second).digest())
77 elif len(hashes) == 1:
78 only = hashes.pop(0)
79 new_hashes.append(only)
80 else:
81 break
82 hashes.extend(new_hashes)
83 return hashes[0]
84
85
86 def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024):
87 """Compute the linear and tree hash from a fileobj.
88
89 This function will compute the linear/tree hash of a fileobj
90 in a single pass through the fileobj.
91
92 :param fileobj: A file like object.
93
94 :param chunk_size: The size of the chunks to use for the tree
95 hash. This is also the buffer size used to read from
96 `fileobj`.
97
98 :rtype: tuple
99 :return: A tuple of (linear_hash, tree_hash). Both hashes
100 are returned in hex.
101
102 """
103 linear_hash = hashlib.sha256()
104 chunks = []
105 chunk = fileobj.read(chunk_size)
106 while chunk:
107 linear_hash.update(chunk)
108 chunks.append(hashlib.sha256(chunk).digest())
109 chunk = fileobj.read(chunk_size)
110 return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks))
111
112
113 def bytes_to_hex(str_as_bytes):
114 return ''.join(["%02x" % ord(x) for x in str_as_bytes]).strip()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698