Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: third_party/gsutil/boto/glacier/job.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
3 #
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE.
22 #
23 from __future__ import with_statement
24 import math
25 import socket
26
27 from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError
28 from .utils import bytes_to_hex, chunk_hashes, tree_hash
29
30
31 class Job(object):
32
33 DefaultPartSize = 4 * 1024 * 1024
34
35 ResponseDataElements = (('Action', 'action', None),
36 ('ArchiveId', 'archive_id', None),
37 ('ArchiveSizeInBytes', 'archive_size', 0),
38 ('Completed', 'completed', False),
39 ('CompletionDate', 'completion_date', None),
40 ('CreationDate', 'creation_date', None),
41 ('InventorySizeInBytes', 'inventory_size', 0),
42 ('JobDescription', 'description', None),
43 ('JobId', 'id', None),
44 ('SHA256TreeHash', 'sha256_treehash', None),
45 ('SNSTopic', 'sns_topic', None),
46 ('StatusCode', 'status_code', None),
47 ('StatusMessage', 'status_message', None),
48 ('VaultARN', 'arn', None))
49
50 def __init__(self, vault, response_data=None):
51 self.vault = vault
52 if response_data:
53 for response_name, attr_name, default in self.ResponseDataElements:
54 setattr(self, attr_name, response_data[response_name])
55 else:
56 for response_name, attr_name, default in self.ResponseDataElements:
57 setattr(self, attr_name, default)
58
59 def __repr__(self):
60 return 'Job(%s)' % self.arn
61
62 def get_output(self, byte_range=None):
63 """
64 This operation downloads the output of the job. Depending on
65 the job type you specified when you initiated the job, the
66 output will be either the content of an archive or a vault
67 inventory.
68
69 You can download all the job output or download a portion of
70 the output by specifying a byte range. In the case of an
71 archive retrieval job, depending on the byte range you
72 specify, Amazon Glacier returns the checksum for the portion
73 of the data. You can compute the checksum on the client and
74 verify that the values match to ensure the portion you
75 downloaded is the correct data.
76
77 :type byte_range: tuple
78 :param range: A tuple of integer specifying the slice (in bytes)
79 of the archive you want to receive
80 """
81 return self.vault.layer1.get_job_output(self.vault.name,
82 self.id,
83 byte_range)
84
85 def download_to_file(self, filename, chunk_size=DefaultPartSize,
86 verify_hashes=True, retry_exceptions=(socket.error,)):
87 """Download an archive to a file.
88
89 :type filename: str
90 :param filename: The name of the file where the archive
91 contents will be saved.
92
93 :type chunk_size: int
94 :param chunk_size: The chunk size to use when downloading
95 the archive.
96
97 :type verify_hashes: bool
98 :param verify_hashes: Indicates whether or not to verify
99 the tree hashes for each downloaded chunk.
100
101 """
102 num_chunks = int(math.ceil(self.archive_size / float(chunk_size)))
103 with open(filename, 'wb') as output_file:
104 self._download_to_fileob(output_file, num_chunks, chunk_size,
105 verify_hashes, retry_exceptions)
106
107 def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes ,
108 retry_exceptions):
109 for i in xrange(num_chunks):
110 byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1)
111 data, expected_tree_hash = self._download_byte_range(
112 byte_range, retry_exceptions)
113 if verify_hashes:
114 actual_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
115 if expected_tree_hash != actual_tree_hash:
116 raise TreeHashDoesNotMatchError(
117 "The calculated tree hash %s does not match the "
118 "expected tree hash %s for the byte range %s" % (
119 actual_tree_hash, expected_tree_hash, byte_range))
120 fileobj.write(data)
121
122 def _download_byte_range(self, byte_range, retry_exceptions):
123 # You can occasionally get socket.errors when downloading
124 # chunks from Glacier, so each chunk can be retried up
125 # to 5 times.
126 for _ in xrange(5):
127 try:
128 response = self.get_output(byte_range)
129 data = response.read()
130 expected_tree_hash = response['TreeHash']
131 return data, expected_tree_hash
132 except retry_exceptions, e:
133 continue
134 else:
135 raise DownloadArchiveError("There was an error downloading"
136 "byte range %s: %s" % (byte_range,
137 e))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698