Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(341)

Side by Side Diff: third_party/gsutil/boto/glacier/vault.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
3 # Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 #
24 from __future__ import with_statement
25 from .exceptions import UploadArchiveError
26 from .job import Job
27 from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer
28 from .concurrent import ConcurrentUploader
29 from .utils import minimum_part_size, DEFAULT_PART_SIZE
30 import os.path
31
32
33 _MEGABYTE = 1024 * 1024
34 _GIGABYTE = 1024 * _MEGABYTE
35
36 MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE
37 MAXIMUM_NUMBER_OF_PARTS = 10000
38
39
40 class Vault(object):
41
42 DefaultPartSize = DEFAULT_PART_SIZE
43 SingleOperationThreshold = 100 * _MEGABYTE
44
45 ResponseDataElements = (('VaultName', 'name', None),
46 ('VaultARN', 'arn', None),
47 ('CreationDate', 'creation_date', None),
48 ('LastInventoryDate', 'last_inventory_date', None),
49 ('SizeInBytes', 'size', 0),
50 ('NumberOfArchives', 'number_of_archives', 0))
51
52 def __init__(self, layer1, response_data=None):
53 self.layer1 = layer1
54 if response_data:
55 for response_name, attr_name, default in self.ResponseDataElements:
56 value = response_data[response_name]
57 if isinstance(value, unicode):
58 value = value.encode('utf8')
59 setattr(self, attr_name, value)
60 else:
61 for response_name, attr_name, default in self.ResponseDataElements:
62 setattr(self, attr_name, default)
63
64 def __repr__(self):
65 return 'Vault("%s")' % self.arn
66
67 def delete(self):
68 """
69 Delete's this vault. WARNING!
70 """
71 self.layer1.delete_vault(self.name)
72
73 def upload_archive(self, filename, description=None):
74 """
75 Adds an archive to a vault. For archives greater than 100MB the
76 multipart upload will be used.
77
78 :type file: str
79 :param file: A filename to upload
80
81 :type description: str
82 :param description: An optional description for the archive.
83
84 :rtype: str
85 :return: The archive id of the newly created archive
86 """
87 if os.path.getsize(filename) > self.SingleOperationThreshold:
88 return self.create_archive_from_file(filename, description=descripti on)
89 return self._upload_archive_single_operation(filename, description)
90
91 def _upload_archive_single_operation(self, filename, description):
92 """
93 Adds an archive to a vault in a single operation. It's recommended for
94 archives less than 100MB
95
96 :type file: str
97 :param file: A filename to upload
98
99 :type description: str
100 :param description: A description for the archive.
101
102 :rtype: str
103 :return: The archive id of the newly created archive
104 """
105 with open(filename, 'rb') as fileobj:
106 linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)
107 fileobj.seek(0)
108 response = self.layer1.upload_archive(self.name, fileobj,
109 linear_hash, tree_hash,
110 description)
111 return response['ArchiveId']
112
113 def create_archive_writer(self, part_size=DefaultPartSize,
114 description=None):
115 """
116 Create a new archive and begin a multi-part upload to it.
117 Returns a file-like object to which the data for the archive
118 can be written. Once all the data is written the file-like
119 object should be closed, you can then call the get_archive_id
120 method on it to get the ID of the created archive.
121
122 :type part_size: int
123 :param part_size: The part size for the multipart upload.
124
125 :type description: str
126 :param description: An optional description for the archive.
127
128 :rtype: :class:`boto.glacier.writer.Writer`
129 :return: A Writer object that to which the archive data
130 should be written.
131 """
132 response = self.layer1.initiate_multipart_upload(self.name,
133 part_size,
134 description)
135 return Writer(self, response['UploadId'], part_size=part_size)
136
137 def create_archive_from_file(self, filename=None, file_obj=None,
138 description=None, upload_id_callback=None):
139 """
140 Create a new archive and upload the data from the given file
141 or file-like object.
142
143 :type filename: str
144 :param filename: A filename to upload
145
146 :type file_obj: file
147 :param file_obj: A file-like object to upload
148
149 :type description: str
150 :param description: An optional description for the archive.
151
152 :type upload_id_callback: function
153 :param upload_id_callback: if set, call with the upload_id as the
154 only parameter when it becomes known, to enable future calls
155 to resume_archive_from_file in case resume is needed.
156
157 :rtype: str
158 :return: The archive id of the newly created archive
159 """
160 part_size = self.DefaultPartSize
161 if not file_obj:
162 file_size = os.path.getsize(filename)
163 try:
164 part_size = minimum_part_size(file_size)
165 except ValueError:
166 raise UploadArchiveError("File size of %s bytes exceeds "
167 "40,000 GB archive limit of Glacier.")
168 file_obj = open(filename, "rb")
169 writer = self.create_archive_writer(
170 description=description,
171 part_size=part_size)
172 if upload_id_callback:
173 upload_id_callback(writer.upload_id)
174 while True:
175 data = file_obj.read(part_size)
176 if not data:
177 break
178 writer.write(data)
179 writer.close()
180 return writer.get_archive_id()
181
182 @staticmethod
183 def _range_string_to_part_index(range_string, part_size):
184 start, inside_end = [int(value) for value in range_string.split('-')]
185 end = inside_end + 1
186 length = end - start
187 if length == part_size + 1:
188 # Off-by-one bug in Amazon's Glacier implementation,
189 # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
190 # Workaround: since part_size is too big by one byte, adjust it
191 end -= 1
192 inside_end -= 1
193 length -= 1
194 assert not (start % part_size), (
195 "upload part start byte is not on a part boundary")
196 assert (length <= part_size), "upload part is bigger than part size"
197 return start // part_size
198
199 def resume_archive_from_file(self, upload_id, filename=None,
200 file_obj=None):
201 """Resume upload of a file already part-uploaded to Glacier.
202
203 The resumption of an upload where the part-uploaded section is empty
204 is a valid degenerate case that this function can handle.
205
206 One and only one of filename or file_obj must be specified.
207
208 :type upload_id: str
209 :param upload_id: existing Glacier upload id of upload being resumed.
210
211 :type filename: str
212 :param filename: file to open for resume
213
214 :type fobj: file
215 :param fobj: file-like object containing local data to resume. This
216 must read from the start of the entire upload, not just from the
217 point being resumed. Use fobj.seek(0) to achieve this if necessary.
218
219 :rtype: str
220 :return: The archive id of the newly created archive
221
222 """
223 part_list_response = self.list_all_parts(upload_id)
224 part_size = part_list_response['PartSizeInBytes']
225
226 part_hash_map = {}
227 for part_desc in part_list_response['Parts']:
228 part_index = self._range_string_to_part_index(
229 part_desc['RangeInBytes'], part_size)
230 part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')
231 part_hash_map[part_index] = part_tree_hash
232
233 if not file_obj:
234 file_obj = open(filename, "rb")
235
236 return resume_file_upload(
237 self, upload_id, part_size, file_obj, part_hash_map)
238
239 def concurrent_create_archive_from_file(self, filename, description):
240 """
241 Create a new archive from a file and upload the given
242 file.
243
244 This is a convenience method around the
245 :class:`boto.glacier.concurrent.ConcurrentUploader`
246 class. This method will perform a multipart upload
247 and upload the parts of the file concurrently.
248
249 :type filename: str
250 :param filename: A filename to upload
251
252 :raises: `boto.glacier.exception.UploadArchiveError` is an error
253 occurs during the upload process.
254
255 :rtype: str
256 :return: The archive id of the newly created archive
257
258 """
259 uploader = ConcurrentUploader(self.layer1, self.name)
260 archive_id = uploader.upload(filename, description)
261 return archive_id
262
263 def retrieve_archive(self, archive_id, sns_topic=None,
264 description=None):
265 """
266 Initiate a archive retrieval job to download the data from an
267 archive. You will need to wait for the notification from
268 Amazon (via SNS) before you can actually download the data,
269 this takes around 4 hours.
270
271 :type archive_id: str
272 :param archive_id: The id of the archive
273
274 :type description: str
275 :param description: An optional description for the job.
276
277 :type sns_topic: str
278 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
279 sends notification when the job is completed and the output
280 is ready for you to download.
281
282 :rtype: :class:`boto.glacier.job.Job`
283 :return: A Job object representing the retrieval job.
284 """
285 job_data = {'Type': 'archive-retrieval',
286 'ArchiveId': archive_id}
287 if sns_topic is not None:
288 job_data['SNSTopic'] = sns_topic
289 if description is not None:
290 job_data['Description'] = description
291
292 response = self.layer1.initiate_job(self.name, job_data)
293 return self.get_job(response['JobId'])
294
295 def retrieve_inventory(self, sns_topic=None,
296 description=None):
297 """
298 Initiate a inventory retrieval job to list the items in the
299 vault. You will need to wait for the notification from
300 Amazon (via SNS) before you can actually download the data,
301 this takes around 4 hours.
302
303 :type description: str
304 :param description: An optional description for the job.
305
306 :type sns_topic: str
307 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
308 sends notification when the job is completed and the output
309 is ready for you to download.
310
311 :rtype: :class:`boto.glacier.job.Job`
312 :return: A Job object representing the retrieval job.
313 """
314 job_data = {'Type': 'inventory-retrieval'}
315 if sns_topic is not None:
316 job_data['SNSTopic'] = sns_topic
317 if description is not None:
318 job_data['Description'] = description
319
320 response = self.layer1.initiate_job(self.name, job_data)
321 return response['JobId']
322
323 def delete_archive(self, archive_id):
324 """
325 This operation deletes an archive from the vault.
326
327 :type archive_id: str
328 :param archive_id: The ID for the archive to be deleted.
329 """
330 return self.layer1.delete_archive(self.name, archive_id)
331
332 def get_job(self, job_id):
333 """
334 Get an object representing a job in progress.
335
336 :type job_id: str
337 :param job_id: The ID of the job
338
339 :rtype: :class:`boto.glacier.job.Job`
340 :return: A Job object representing the job.
341 """
342 response_data = self.layer1.describe_job(self.name, job_id)
343 return Job(self, response_data)
344
345 def list_jobs(self, completed=None, status_code=None):
346 """
347 Return a list of Job objects related to this vault.
348
349 :type completed: boolean
350 :param completed: Specifies the state of the jobs to return.
351 If a value of True is passed, only completed jobs will
352 be returned. If a value of False is passed, only
353 uncompleted jobs will be returned. If no value is
354 passed, all jobs will be returned.
355
356 :type status_code: string
357 :param status_code: Specifies the type of job status to return.
358 Valid values are: InProgress|Succeeded|Failed. If not
359 specified, jobs with all status codes are returned.
360
361 :rtype: list of :class:`boto.glacier.job.Job`
362 :return: A list of Job objects related to this vault.
363 """
364 response_data = self.layer1.list_jobs(self.name, completed,
365 status_code)
366 return [Job(self, jd) for jd in response_data['JobList']]
367
368 def list_all_parts(self, upload_id):
369 """Automatically make and combine multiple calls to list_parts.
370
371 Call list_parts as necessary, combining the results in case multiple
372 calls were required to get data on all available parts.
373
374 """
375 result = self.layer1.list_parts(self.name, upload_id)
376 marker = result['Marker']
377 while marker:
378 additional_result = self.layer1.list_parts(
379 self.name, upload_id, marker=marker)
380 result['Parts'].extend(additional_result['Parts'])
381 marker = additional_result['Marker']
382 # The marker makes no sense in an unpaginated result, and clearing it
383 # makes testing easier. This also has the nice property that the result
384 # is a normal (but expanded) response.
385 result['Marker'] = None
386 return result
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698