third_party/gsutil/boto/glacier/vault.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Side by Side Diff: third_party/gsutil/boto/glacier/vault.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # -- coding: utf-8 --

	2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/

	3 # Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>

	4 #

	5 # Permission is hereby granted, free of charge, to any person obtaining a

	6 # copy of this software and associated documentation files (the

	7 # "Software"), to deal in the Software without restriction, including

	8 # without limitation the rights to use, copy, modify, merge, publish, dis-

	9 # tribute, sublicense, and/or sell copies of the Software, and to permit

	10 # persons to whom the Software is furnished to do so, subject to the fol-

	11 # lowing conditions:

	12 #

	13 # The above copyright notice and this permission notice shall be included

	14 # in all copies or substantial portions of the Software.

	15 #

	16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

	17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

	18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

	19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

	20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

	22 # IN THE SOFTWARE.

	23 #

	24 from __future__ import with_statement

	25 from .exceptions import UploadArchiveError

	26 from .job import Job

	27 from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer

	28 from .concurrent import ConcurrentUploader

	29 from .utils import minimum_part_size, DEFAULT_PART_SIZE

	30 import os.path

	31

	32

	33 _MEGABYTE = 1024 * 1024

	34 _GIGABYTE = 1024 * _MEGABYTE

	35

	36 MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE

	37 MAXIMUM_NUMBER_OF_PARTS = 10000

	38

	39

	40 class Vault(object):

	41

	42 DefaultPartSize = DEFAULT_PART_SIZE

	43 SingleOperationThreshold = 100 * _MEGABYTE

	44

	45 ResponseDataElements = (('VaultName', 'name', None),

	46 ('VaultARN', 'arn', None),

	47 ('CreationDate', 'creation_date', None),

	48 ('LastInventoryDate', 'last_inventory_date', None),

	49 ('SizeInBytes', 'size', 0),

	50 ('NumberOfArchives', 'number_of_archives', 0))

	51

	52 def __init__(self, layer1, response_data=None):

	53 self.layer1 = layer1

	54 if response_data:

	55 for response_name, attr_name, default in self.ResponseDataElements:

	56 value = response_data[response_name]

	57 if isinstance(value, unicode):

	58 value = value.encode('utf8')

	59 setattr(self, attr_name, value)

	60 else:

	61 for response_name, attr_name, default in self.ResponseDataElements:

	62 setattr(self, attr_name, default)

	63

	64 def __repr__(self):

	65 return 'Vault("%s")' % self.arn

	66

	67 def delete(self):

	68 """

	69 Delete's this vault. WARNING!

	70 """

	71 self.layer1.delete_vault(self.name)

	72

	73 def upload_archive(self, filename, description=None):

	74 """

	75 Adds an archive to a vault. For archives greater than 100MB the

	76 multipart upload will be used.

	77

	78 :type file: str

	79 :param file: A filename to upload

	80

	81 :type description: str

	82 :param description: An optional description for the archive.

	83

	84 :rtype: str

	85 :return: The archive id of the newly created archive

	86 """

	87 if os.path.getsize(filename) > self.SingleOperationThreshold:

	88 return self.create_archive_from_file(filename, description=descripti on)

	89 return self._upload_archive_single_operation(filename, description)

	90

	91 def _upload_archive_single_operation(self, filename, description):

	92 """

	93 Adds an archive to a vault in a single operation. It's recommended for

	94 archives less than 100MB

	95

	96 :type file: str

	97 :param file: A filename to upload

	98

	99 :type description: str

	100 :param description: A description for the archive.

	101

	102 :rtype: str

	103 :return: The archive id of the newly created archive

	104 """

	105 with open(filename, 'rb') as fileobj:

	106 linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)

	107 fileobj.seek(0)

	108 response = self.layer1.upload_archive(self.name, fileobj,

	109 linear_hash, tree_hash,

	110 description)

	111 return response['ArchiveId']

	112

	113 def create_archive_writer(self, part_size=DefaultPartSize,

	114 description=None):

	115 """

	116 Create a new archive and begin a multi-part upload to it.

	117 Returns a file-like object to which the data for the archive

	118 can be written. Once all the data is written the file-like

	119 object should be closed, you can then call the get_archive_id

	120 method on it to get the ID of the created archive.

	121

	122 :type part_size: int

	123 :param part_size: The part size for the multipart upload.

	124

	125 :type description: str

	126 :param description: An optional description for the archive.

	127

	128 :rtype: :class:`boto.glacier.writer.Writer`

	129 :return: A Writer object that to which the archive data

	130 should be written.

	131 """

	132 response = self.layer1.initiate_multipart_upload(self.name,

	133 part_size,

	134 description)

	135 return Writer(self, response['UploadId'], part_size=part_size)

	136

	137 def create_archive_from_file(self, filename=None, file_obj=None,

	138 description=None, upload_id_callback=None):

	139 """

	140 Create a new archive and upload the data from the given file

	141 or file-like object.

	142

	143 :type filename: str

	144 :param filename: A filename to upload

	145

	146 :type file_obj: file

	147 :param file_obj: A file-like object to upload

	148

	149 :type description: str

	150 :param description: An optional description for the archive.

	151

	152 :type upload_id_callback: function

	153 :param upload_id_callback: if set, call with the upload_id as the

	154 only parameter when it becomes known, to enable future calls

	155 to resume_archive_from_file in case resume is needed.

	156

	157 :rtype: str

	158 :return: The archive id of the newly created archive

	159 """

	160 part_size = self.DefaultPartSize

	161 if not file_obj:

	162 file_size = os.path.getsize(filename)

	163 try:

	164 part_size = minimum_part_size(file_size)

	165 except ValueError:

	166 raise UploadArchiveError("File size of %s bytes exceeds "

	167 "40,000 GB archive limit of Glacier.")

	168 file_obj = open(filename, "rb")

	169 writer = self.create_archive_writer(

	170 description=description,

	171 part_size=part_size)

	172 if upload_id_callback:

	173 upload_id_callback(writer.upload_id)

	174 while True:

	175 data = file_obj.read(part_size)

	176 if not data:

	177 break

	178 writer.write(data)

	179 writer.close()

	180 return writer.get_archive_id()

	181

	182 @staticmethod

	183 def _range_string_to_part_index(range_string, part_size):

	184 start, inside_end = [int(value) for value in range_string.split('-')]

	185 end = inside_end + 1

	186 length = end - start

	187 if length == part_size + 1:

	188 # Off-by-one bug in Amazon's Glacier implementation,

	189 # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866

	190 # Workaround: since part_size is too big by one byte, adjust it

	191 end -= 1

	192 inside_end -= 1

	193 length -= 1

	194 assert not (start % part_size), (

	195 "upload part start byte is not on a part boundary")

	196 assert (length <= part_size), "upload part is bigger than part size"

	197 return start // part_size

	198

	199 def resume_archive_from_file(self, upload_id, filename=None,

	200 file_obj=None):

	201 """Resume upload of a file already part-uploaded to Glacier.

	202

	203 The resumption of an upload where the part-uploaded section is empty

	204 is a valid degenerate case that this function can handle.

	205

	206 One and only one of filename or file_obj must be specified.

	207

	208 :type upload_id: str

	209 :param upload_id: existing Glacier upload id of upload being resumed.

	210

	211 :type filename: str

	212 :param filename: file to open for resume

	213

	214 :type fobj: file

	215 :param fobj: file-like object containing local data to resume. This

	216 must read from the start of the entire upload, not just from the

	217 point being resumed. Use fobj.seek(0) to achieve this if necessary.

	218

	219 :rtype: str

	220 :return: The archive id of the newly created archive

	221

	222 """

	223 part_list_response = self.list_all_parts(upload_id)

	224 part_size = part_list_response['PartSizeInBytes']

	225

	226 part_hash_map = {}

	227 for part_desc in part_list_response['Parts']:

	228 part_index = self._range_string_to_part_index(

	229 part_desc['RangeInBytes'], part_size)

	230 part_tree_hash = part_desc['SHA256TreeHash'].decode('hex')

	231 part_hash_map[part_index] = part_tree_hash

	232

	233 if not file_obj:

	234 file_obj = open(filename, "rb")

	235

	236 return resume_file_upload(

	237 self, upload_id, part_size, file_obj, part_hash_map)

	238

	239 def concurrent_create_archive_from_file(self, filename, description):

	240 """

	241 Create a new archive from a file and upload the given

	242 file.

	243

	244 This is a convenience method around the

	245 :class:`boto.glacier.concurrent.ConcurrentUploader`

	246 class. This method will perform a multipart upload

	247 and upload the parts of the file concurrently.

	248

	249 :type filename: str

	250 :param filename: A filename to upload

	251

	252 :raises: `boto.glacier.exception.UploadArchiveError` is an error

	253 occurs during the upload process.

	254

	255 :rtype: str

	256 :return: The archive id of the newly created archive

	257

	258 """

	259 uploader = ConcurrentUploader(self.layer1, self.name)

	260 archive_id = uploader.upload(filename, description)

	261 return archive_id

	262

	263 def retrieve_archive(self, archive_id, sns_topic=None,

	264 description=None):

	265 """

	266 Initiate a archive retrieval job to download the data from an

	267 archive. You will need to wait for the notification from

	268 Amazon (via SNS) before you can actually download the data,

	269 this takes around 4 hours.

	270

	271 :type archive_id: str

	272 :param archive_id: The id of the archive

	273

	274 :type description: str

	275 :param description: An optional description for the job.

	276

	277 :type sns_topic: str

	278 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier

	279 sends notification when the job is completed and the output

	280 is ready for you to download.

	281

	282 :rtype: :class:`boto.glacier.job.Job`

	283 :return: A Job object representing the retrieval job.

	284 """

	285 job_data = {'Type': 'archive-retrieval',

	286 'ArchiveId': archive_id}

	287 if sns_topic is not None:

	288 job_data['SNSTopic'] = sns_topic

	289 if description is not None:

	290 job_data['Description'] = description

	291

	292 response = self.layer1.initiate_job(self.name, job_data)

	293 return self.get_job(response['JobId'])

	294

	295 def retrieve_inventory(self, sns_topic=None,

	296 description=None):

	297 """

	298 Initiate a inventory retrieval job to list the items in the

	299 vault. You will need to wait for the notification from

	300 Amazon (via SNS) before you can actually download the data,

	301 this takes around 4 hours.

	302

	303 :type description: str

	304 :param description: An optional description for the job.

	305

	306 :type sns_topic: str

	307 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier

	308 sends notification when the job is completed and the output

	309 is ready for you to download.

	310

	311 :rtype: :class:`boto.glacier.job.Job`

	312 :return: A Job object representing the retrieval job.

	313 """

	314 job_data = {'Type': 'inventory-retrieval'}

	315 if sns_topic is not None:

	316 job_data['SNSTopic'] = sns_topic

	317 if description is not None:

	318 job_data['Description'] = description

	319

	320 response = self.layer1.initiate_job(self.name, job_data)

	321 return response['JobId']

	322

	323 def delete_archive(self, archive_id):

	324 """

	325 This operation deletes an archive from the vault.

	326

	327 :type archive_id: str

	328 :param archive_id: The ID for the archive to be deleted.

	329 """

	330 return self.layer1.delete_archive(self.name, archive_id)

	331

	332 def get_job(self, job_id):

	333 """

	334 Get an object representing a job in progress.

	335

	336 :type job_id: str

	337 :param job_id: The ID of the job

	338

	339 :rtype: :class:`boto.glacier.job.Job`

	340 :return: A Job object representing the job.

	341 """

	342 response_data = self.layer1.describe_job(self.name, job_id)

	343 return Job(self, response_data)

	344

	345 def list_jobs(self, completed=None, status_code=None):

	346 """

	347 Return a list of Job objects related to this vault.

	348

	349 :type completed: boolean

	350 :param completed: Specifies the state of the jobs to return.

	351 If a value of True is passed, only completed jobs will

	352 be returned. If a value of False is passed, only

	353 uncompleted jobs will be returned. If no value is

	354 passed, all jobs will be returned.

	355

	356 :type status_code: string

	357 :param status_code: Specifies the type of job status to return.

	358 Valid values are: InProgress\|Succeeded\|Failed. If not

	359 specified, jobs with all status codes are returned.

	360

	361 :rtype: list of :class:`boto.glacier.job.Job`

	362 :return: A list of Job objects related to this vault.

	363 """

	364 response_data = self.layer1.list_jobs(self.name, completed,

	365 status_code)

	366 return [Job(self, jd) for jd in response_data['JobList']]

	367

	368 def list_all_parts(self, upload_id):

	369 """Automatically make and combine multiple calls to list_parts.

	370

	371 Call list_parts as necessary, combining the results in case multiple

	372 calls were required to get data on all available parts.

	373

	374 """

	375 result = self.layer1.list_parts(self.name, upload_id)

	376 marker = result['Marker']

	377 while marker:

	378 additional_result = self.layer1.list_parts(

	379 self.name, upload_id, marker=marker)

	380 result['Parts'].extend(additional_result['Parts'])

	381 marker = additional_result['Marker']

	382 # The marker makes no sense in an unpaginated result, and clearing it

	383 # makes testing easier. This also has the nice property that the result

	384 # is a normal (but expanded) response.

	385 result['Marker'] = None

	386 return result

OLD	NEW

« download_from_google_storage.py ('K') | « third_party/gsutil/boto/glacier/utils.py ('k') | third_party/gsutil/boto/glacier/writer.py » ('j') | upload_to_google_storage.py » ('J')