third_party/gsutil/boto/glacier/job.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Side by Side Diff: third_party/gsutil/boto/glacier/job.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # -- coding: utf-8 --

	2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/

	3 #

	4 # Permission is hereby granted, free of charge, to any person obtaining a

	5 # copy of this software and associated documentation files (the

	6 # "Software"), to deal in the Software without restriction, including

	7 # without limitation the rights to use, copy, modify, merge, publish, dis-

	8 # tribute, sublicense, and/or sell copies of the Software, and to permit

	9 # persons to whom the Software is furnished to do so, subject to the fol-

	10 # lowing conditions:

	11 #

	12 # The above copyright notice and this permission notice shall be included

	13 # in all copies or substantial portions of the Software.

	14 #

	15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

	16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

	17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

	18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

	19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

	21 # IN THE SOFTWARE.

	22 #

	23 from __future__ import with_statement

	24 import math

	25 import socket

	26

	27 from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError

	28 from .utils import bytes_to_hex, chunk_hashes, tree_hash

	29

	30

	31 class Job(object):

	32

	33 DefaultPartSize = 4 * 1024 * 1024

	34

	35 ResponseDataElements = (('Action', 'action', None),

	36 ('ArchiveId', 'archive_id', None),

	37 ('ArchiveSizeInBytes', 'archive_size', 0),

	38 ('Completed', 'completed', False),

	39 ('CompletionDate', 'completion_date', None),

	40 ('CreationDate', 'creation_date', None),

	41 ('InventorySizeInBytes', 'inventory_size', 0),

	42 ('JobDescription', 'description', None),

	43 ('JobId', 'id', None),

	44 ('SHA256TreeHash', 'sha256_treehash', None),

	45 ('SNSTopic', 'sns_topic', None),

	46 ('StatusCode', 'status_code', None),

	47 ('StatusMessage', 'status_message', None),

	48 ('VaultARN', 'arn', None))

	49

	50 def __init__(self, vault, response_data=None):

	51 self.vault = vault

	52 if response_data:

	53 for response_name, attr_name, default in self.ResponseDataElements:

	54 setattr(self, attr_name, response_data[response_name])

	55 else:

	56 for response_name, attr_name, default in self.ResponseDataElements:

	57 setattr(self, attr_name, default)

	58

	59 def __repr__(self):

	60 return 'Job(%s)' % self.arn

	61

	62 def get_output(self, byte_range=None):

	63 """

	64 This operation downloads the output of the job. Depending on

	65 the job type you specified when you initiated the job, the

	66 output will be either the content of an archive or a vault

	67 inventory.

	68

	69 You can download all the job output or download a portion of

	70 the output by specifying a byte range. In the case of an

	71 archive retrieval job, depending on the byte range you

	72 specify, Amazon Glacier returns the checksum for the portion

	73 of the data. You can compute the checksum on the client and

	74 verify that the values match to ensure the portion you

	75 downloaded is the correct data.

	76

	77 :type byte_range: tuple

	78 :param range: A tuple of integer specifying the slice (in bytes)

	79 of the archive you want to receive

	80 """

	81 return self.vault.layer1.get_job_output(self.vault.name,

	82 self.id,

	83 byte_range)

	84

	85 def download_to_file(self, filename, chunk_size=DefaultPartSize,

	86 verify_hashes=True, retry_exceptions=(socket.error,)):

	87 """Download an archive to a file.

	88

	89 :type filename: str

	90 :param filename: The name of the file where the archive

	91 contents will be saved.

	92

	93 :type chunk_size: int

	94 :param chunk_size: The chunk size to use when downloading

	95 the archive.

	96

	97 :type verify_hashes: bool

	98 :param verify_hashes: Indicates whether or not to verify

	99 the tree hashes for each downloaded chunk.

	100

	101 """

	102 num_chunks = int(math.ceil(self.archive_size / float(chunk_size)))

	103 with open(filename, 'wb') as output_file:

	104 self._download_to_fileob(output_file, num_chunks, chunk_size,

	105 verify_hashes, retry_exceptions)

	106

	107 def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes ,

	108 retry_exceptions):

	109 for i in xrange(num_chunks):

	110 byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1)

	111 data, expected_tree_hash = self._download_byte_range(

	112 byte_range, retry_exceptions)

	113 if verify_hashes:

	114 actual_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))

	115 if expected_tree_hash != actual_tree_hash:

	116 raise TreeHashDoesNotMatchError(

	117 "The calculated tree hash %s does not match the "

	118 "expected tree hash %s for the byte range %s" % (

	119 actual_tree_hash, expected_tree_hash, byte_range))

	120 fileobj.write(data)

	121

	122 def _download_byte_range(self, byte_range, retry_exceptions):

	123 # You can occasionally get socket.errors when downloading

	124 # chunks from Glacier, so each chunk can be retried up

	125 # to 5 times.

	126 for _ in xrange(5):

	127 try:

	128 response = self.get_output(byte_range)

	129 data = response.read()

	130 expected_tree_hash = response['TreeHash']

	131 return data, expected_tree_hash

	132 except retry_exceptions, e:

	133 continue

	134 else:

	135 raise DownloadArchiveError("There was an error downloading"

	136 "byte range %s: %s" % (byte_range,

	137 e))

OLD	NEW

« download_from_google_storage.py ('K') | « third_party/gsutil/boto/glacier/exceptions.py ('k') | third_party/gsutil/boto/glacier/layer1.py » ('j') | upload_to_google_storage.py » ('J')