third_party/gsutil/boto/glacier/layer1.py - Issue 12042069: Scripts to download files from google storage based on sha1 sums

Side by Side Diff: third_party/gsutil/boto/glacier/layer1.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # -- coding: utf-8 --

	2 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/

	3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved

	4 #

	5 # Permission is hereby granted, free of charge, to any person obtaining a

	6 # copy of this software and associated documentation files (the

	7 # "Software"), to deal in the Software without restriction, including

	8 # without limitation the rights to use, copy, modify, merge, publish, dis-

	9 # tribute, sublicense, and/or sell copies of the Software, and to permit

	10 # persons to whom the Software is furnished to do so, subject to the fol-

	11 # lowing conditions:

	12 #

	13 # The above copyright notice and this permission notice shall be included

	14 # in all copies or substantial portions of the Software.

	15 #

	16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

	17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-

	18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

	19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

	20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS

	22 # IN THE SOFTWARE.

	23 #

	24

	25 import os

	26 import urllib

	27

	28 import boto.glacier

	29 from boto.compat import json

	30 from boto.connection import AWSAuthConnection

	31 from .exceptions import UnexpectedHTTPResponseError

	32 from .response import GlacierResponse

	33

	34

	35 class Layer1(AWSAuthConnection):

	36

	37 Version = '2012-06-01'

	38 """Glacier API version."""

	39

	40 def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,

	41 account_id='-', is_secure=True, port=None,

	42 proxy=None, proxy_port=None,

	43 proxy_user=None, proxy_pass=None, debug=0,

	44 https_connection_factory=None, path='/',

	45 provider='aws', security_token=None,

	46 suppress_consec_slashes=True,

	47 region=None, region_name='us-east-1'):

	48

	49 if not region:

	50 for reg in boto.glacier.regions():

	51 if reg.name == region_name:

	52 region = reg

	53 break

	54

	55 self.region = region

	56 self.account_id = account_id

	57 AWSAuthConnection.__init__(self, region.endpoint,

	58 aws_access_key_id, aws_secret_access_key,

	59 True, port, proxy, proxy_port,

	60 proxy_user, proxy_pass, debug,

	61 https_connection_factory,

	62 path, provider, security_token,

	63 suppress_consec_slashes)

	64

	65 def _required_auth_capability(self):

	66 return ['hmac-v4']

	67

	68 def make_request(self, verb, resource, headers=None,

	69 data='', ok_responses=(200,), params=None,

	70 response_headers=None):

	71 if headers is None:

	72 headers = {}

	73 headers['x-amz-glacier-version'] = self.Version

	74 uri = '/%s/%s' % (self.account_id, resource)

	75 response = AWSAuthConnection.make_request(self, verb, uri,

	76 params=params,

	77 headers=headers,

	78 data=data)

	79 if response.status in ok_responses:

	80 return GlacierResponse(response, response_headers)

	81 else:

	82 # create glacier-specific exceptions

	83 raise UnexpectedHTTPResponseError(ok_responses, response)

	84

	85 # Vaults

	86

	87 def list_vaults(self, limit=None, marker=None):

	88 """

	89 This operation lists all vaults owned by the calling user’s

	90 account. The list returned in the response is ASCII-sorted by

	91 vault name.

	92

	93 By default, this operation returns up to 1,000 items. If there

	94 are more vaults to list, the marker field in the response body

	95 contains the vault Amazon Resource Name (ARN) at which to

	96 continue the list with a new List Vaults request; otherwise,

	97 the marker field is null. In your next List Vaults request you

	98 set the marker parameter to the value Amazon Glacier returned

	99 in the responses to your previous List Vaults request. You can

	100 also limit the number of vaults returned in the response by

	101 specifying the limit parameter in the request.

	102

	103 :type limit: int

	104 :param limit: The maximum number of items returned in the

	105 response. If you don't specify a value, the List Vaults

	106 operation returns up to 1,000 items.

	107

	108 :type marker: str

	109 :param marker: A string used for pagination. marker specifies

	110 the vault ARN after which the listing of vaults should

	111 begin. (The vault specified by marker is not included in

	112 the returned list.) Get the marker value from a previous

	113 List Vaults response. You need to include the marker only

	114 if you are continuing the pagination of results started in

	115 a previous List Vaults request. Specifying an empty value

	116 ("") for the marker returns a list of vaults starting

	117 from the first vault.

	118 """

	119 params = {}

	120 if limit:

	121 params['limit'] = limit

	122 if marker:

	123 params['marker'] = marker

	124 return self.make_request('GET', 'vaults', params=params)

	125

	126 def describe_vault(self, vault_name):

	127 """

	128 This operation returns information about a vault, including

	129 the vault Amazon Resource Name (ARN), the date the vault was

	130 created, the number of archives contained within the vault,

	131 and the total size of all the archives in the vault. The

	132 number of archives and their total size are as of the last

	133 vault inventory Amazon Glacier generated. Amazon Glacier

	134 generates vault inventories approximately daily. This means

	135 that if you add or remove an archive from a vault, and then

	136 immediately send a Describe Vault request, the response might

	137 not reflect the changes.

	138

	139 :type vault_name: str

	140 :param vault_name: The name of the new vault

	141 """

	142 uri = 'vaults/%s' % vault_name

	143 return self.make_request('GET', uri)

	144

	145 def create_vault(self, vault_name):

	146 """

	147 This operation creates a new vault with the specified name.

	148 The name of the vault must be unique within a region for an

	149 AWS account. You can create up to 1,000 vaults per

	150 account. For information on creating more vaults, go to the

	151 Amazon Glacier product detail page.

	152

	153 You must use the following guidelines when naming a vault.

	154

	155 Names can be between 1 and 255 characters long.

	156

	157 Allowed characters are a–z, A–Z, 0–9, '_' (underscore),

	158 '-' (hyphen), and '.' (period).

	159

	160 This operation is idempotent, you can send the same request

	161 multiple times and it has no further effect after the first

	162 time Amazon Glacier creates the specified vault.

	163

	164 :type vault_name: str

	165 :param vault_name: The name of the new vault

	166 """

	167 uri = 'vaults/%s' % vault_name

	168 return self.make_request('PUT', uri, ok_responses=(201,),

	169 response_headers=[('Location', 'Location')])

	170

	171 def delete_vault(self, vault_name):

	172 """

	173 This operation deletes a vault. Amazon Glacier will delete a

	174 vault only if there are no archives in the vault as per the

	175 last inventory and there have been no writes to the vault

	176 since the last inventory. If either of these conditions is not

	177 satisfied, the vault deletion fails (that is, the vault is not

	178 removed) and Amazon Glacier returns an error.

	179

	180 This operation is idempotent, you can send the same request

	181 multiple times and it has no further effect after the first

	182 time Amazon Glacier delete the specified vault.

	183

	184 :type vault_name: str

	185 :param vault_name: The name of the new vault

	186 """

	187 uri = 'vaults/%s' % vault_name

	188 return self.make_request('DELETE', uri, ok_responses=(204,))

	189

	190 def get_vault_notifications(self, vault_name):

	191 """

	192 This operation retrieves the notification-configuration

	193 subresource set on the vault.

	194

	195 :type vault_name: str

	196 :param vault_name: The name of the new vault

	197 """

	198 uri = 'vaults/%s/notification-configuration' % vault_name

	199 return self.make_request('GET', uri)

	200

	201 def set_vault_notifications(self, vault_name, notification_config):

	202 """

	203 This operation retrieves the notification-configuration

	204 subresource set on the vault.

	205

	206 :type vault_name: str

	207 :param vault_name: The name of the new vault

	208

	209 :type notification_config: dict

	210 :param notification_config: A Python dictionary containing

	211 an SNS Topic and events for which you want Amazon Glacier

	212 to send notifications to the topic. Possible events are:

	213

	214 * ArchiveRetrievalCompleted - occurs when a job that was

	215 initiated for an archive retrieval is completed.

	216 * InventoryRetrievalCompleted - occurs when a job that was

	217 initiated for an inventory retrieval is completed.

	218

	219 The format of the dictionary is:

	220

	221 {'SNSTopic': 'mytopic',

	222 'Events': [event1,...]}

	223 """

	224 uri = 'vaults/%s/notification-configuration' % vault_name

	225 json_config = json.dumps(notification_config)

	226 return self.make_request('PUT', uri, data=json_config,

	227 ok_responses=(204,))

	228

	229 def delete_vault_notifications(self, vault_name):

	230 """

	231 This operation deletes the notification-configuration

	232 subresource set on the vault.

	233

	234 :type vault_name: str

	235 :param vault_name: The name of the new vault

	236 """

	237 uri = 'vaults/%s/notification-configuration' % vault_name

	238 return self.make_request('DELETE', uri, ok_responses=(204,))

	239

	240 # Jobs

	241

	242 def list_jobs(self, vault_name, completed=None, status_code=None,

	243 limit=None, marker=None):

	244 """

	245 This operation lists jobs for a vault including jobs that are

	246 in-progress and jobs that have recently finished.

	247

	248 :type vault_name: str

	249 :param vault_name: The name of the vault.

	250

	251 :type completed: boolean

	252 :param completed: Specifies the state of the jobs to return.

	253 If a value of True is passed, only completed jobs will

	254 be returned. If a value of False is passed, only

	255 uncompleted jobs will be returned. If no value is

	256 passed, all jobs will be returned.

	257

	258 :type status_code: string

	259 :param status_code: Specifies the type of job status to return.

	260 Valid values are: InProgress\|Succeeded\|Failed. If not

	261 specified, jobs with all status codes are returned.

	262

	263 :type limit: int

	264 :param limit: The maximum number of items returned in the

	265 response. If you don't specify a value, the List Jobs

	266 operation returns up to 1,000 items.

	267

	268 :type marker: str

	269 :param marker: An opaque string used for pagination. marker

	270 specifies the job at which the listing of jobs should

	271 begin. Get the marker value from a previous List Jobs

	272 response. You need only include the marker if you are

	273 continuing the pagination of results started in a previous

	274 List Jobs request.

	275

	276 """

	277 params = {}

	278 if limit:

	279 params['limit'] = limit

	280 if marker:

	281 params['marker'] = marker

	282 if status_code:

	283 params['statuscode'] = status_code

	284 if completed is not None:

	285 params['completed'] = 'true' if completed else 'false'

	286 uri = 'vaults/%s/jobs' % vault_name

	287 return self.make_request('GET', uri, params=params)

	288

	289 def describe_job(self, vault_name, job_id):

	290 """

	291 This operation returns information about a job you previously

	292 initiated, including the job initiation date, the user who

	293 initiated the job, the job status code/message and the Amazon

	294 Simple Notification Service (Amazon SNS) topic to notify after

	295 Amazon Glacier completes the job.

	296

	297 :type vault_name: str

	298 :param vault_name: The name of the new vault

	299

	300 :type job_id: str

	301 :param job_id: The ID of the job.

	302 """

	303 uri = 'vaults/%s/jobs/%s' % (vault_name, job_id)

	304 return self.make_request('GET', uri, ok_responses=(200,))

	305

	306 def initiate_job(self, vault_name, job_data):

	307 """

	308 This operation initiates a job of the specified

	309 type. Retrieving an archive or a vault inventory are

	310 asynchronous operations that require you to initiate a job. It

	311 is a two-step process:

	312

	313 * Initiate a retrieval job.

	314 * After the job completes, download the bytes.

	315

	316 The retrieval is executed asynchronously. When you initiate

	317 a retrieval job, Amazon Glacier creates a job and returns a

	318 job ID in the response.

	319

	320 :type vault_name: str

	321 :param vault_name: The name of the new vault

	322

	323 :type job_data: dict

	324 :param job_data: A Python dictionary containing the

	325 information about the requested job. The dictionary

	326 can contain the following attributes:

	327

	328 * ArchiveId - The ID of the archive you want to retrieve.

	329 This field is required only if the Type is set to

	330 archive-retrieval.

	331 * Description - The optional description for the job.

	332 * Format - When initiating a job to retrieve a vault

	333 inventory, you can optionally add this parameter to

	334 specify the output format. Valid values are: CSV\|JSON.

	335 * SNSTopic - The Amazon SNS topic ARN where Amazon Glacier

	336 sends a notification when the job is completed and the

	337 output is ready for you to download.

	338 * Type - The job type. Valid values are:

	339 archive-retrieval\|inventory-retrieval

	340 * RetrievalByteRange - Optionally specify the range of

	341 bytes to retrieve.

	342

	343 """

	344 uri = 'vaults/%s/jobs' % vault_name

	345 response_headers = [('x-amz-job-id', u'JobId'),

	346 ('Location', u'Location')]

	347 json_job_data = json.dumps(job_data)

	348 return self.make_request('POST', uri, data=json_job_data,

	349 ok_responses=(202,),

	350 response_headers=response_headers)

	351

	352 def get_job_output(self, vault_name, job_id, byte_range=None):

	353 """

	354 This operation downloads the output of the job you initiated

	355 using Initiate a Job. Depending on the job type

	356 you specified when you initiated the job, the output will be

	357 either the content of an archive or a vault inventory.

	358

	359 You can download all the job output or download a portion of

	360 the output by specifying a byte range. In the case of an

	361 archive retrieval job, depending on the byte range you

	362 specify, Amazon Glacier returns the checksum for the portion

	363 of the data. You can compute the checksum on the client and

	364 verify that the values match to ensure the portion you

	365 downloaded is the correct data.

	366

	367 :type vault_name: str :param

	368 :param vault_name: The name of the new vault

	369

	370 :type job_id: str

	371 :param job_id: The ID of the job.

	372

	373 :type byte_range: tuple

	374 :param range: A tuple of integers specifying the slice (in bytes)

	375 of the archive you want to receive

	376 """

	377 response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'),

	378 ('Content-Range', u'ContentRange'),

	379 ('Content-Type', u'ContentType')]

	380 headers = None

	381 if byte_range:

	382 headers = {'Range': 'bytes=%d-%d' % byte_range}

	383 uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id)

	384 response = self.make_request('GET', uri, headers=headers,

	385 ok_responses=(200, 206),

	386 response_headers=response_headers)

	387 return response

	388

	389 # Archives

	390

	391 def upload_archive(self, vault_name, archive,

	392 linear_hash, tree_hash, description=None):

	393 """

	394 This operation adds an archive to a vault. For a successful

	395 upload, your data is durably persisted. In response, Amazon

	396 Glacier returns the archive ID in the x-amz-archive-id header

	397 of the response. You should save the archive ID returned so

	398 that you can access the archive later.

	399

	400 :type vault_name: str :param

	401 :param vault_name: The name of the vault

	402

	403 :type archive: bytes

	404 :param archive: The data to upload.

	405

	406 :type linear_hash: str

	407 :param linear_hash: The SHA256 checksum (a linear hash) of the

	408 payload.

	409

	410 :type tree_hash: str

	411 :param tree_hash: The user-computed SHA256 tree hash of the

	412 payload. For more information on computing the

	413 tree hash, see http://goo.gl/u7chF.

	414

	415 :type description: str

	416 :param description: An optional description of the archive.

	417 """

	418 response_headers = [('x-amz-archive-id', u'ArchiveId'),

	419 ('Location', u'Location'),

	420 ('x-amz-sha256-tree-hash', u'TreeHash')]

	421 uri = 'vaults/%s/archives' % vault_name

	422 try:

	423 content_length = str(len(archive))

	424 except TypeError:

	425 # If a file like object is provided, try to retrieve

	426 # the file size via fstat.

	427 content_length = str(os.fstat(archive.fileno()).st_size)

	428 headers = {'x-amz-content-sha256': linear_hash,

	429 'x-amz-sha256-tree-hash': tree_hash,

	430 'Content-Length': content_length}

	431 if description:

	432 headers['x-amz-archive-description'] = description

	433 return self.make_request('POST', uri, headers=headers,

	434 data=archive, ok_responses=(201,),

	435 response_headers=response_headers)

	436

	437 def delete_archive(self, vault_name, archive_id):

	438 """

	439 This operation deletes an archive from a vault.

	440

	441 :type vault_name: str

	442 :param vault_name: The name of the new vault

	443

	444 :type archive_id: str

	445 :param archive_id: The ID for the archive to be deleted.

	446 """

	447 uri = 'vaults/%s/archives/%s' % (vault_name, archive_id)

	448 return self.make_request('DELETE', uri, ok_responses=(204,))

	449

	450 # Multipart

	451

	452 def initiate_multipart_upload(self, vault_name, part_size,

	453 description=None):

	454 """

	455 Initiate a multipart upload. Amazon Glacier creates a

	456 multipart upload resource and returns it's ID. You use this

	457 ID in subsequent multipart upload operations.

	458

	459 :type vault_name: str

	460 :param vault_name: The name of the vault.

	461

	462 :type description: str

	463 :param description: An optional description of the archive.

	464

	465 :type part_size: int

	466 :param part_size: The size of each part except the last, in bytes.

	467 The part size must be a multiple of 1024 KB multiplied by

	468 a power of 2. The minimum allowable part size is 1MB and the

	469 maximum is 4GB.

	470 """

	471 response_headers = [('x-amz-multipart-upload-id', u'UploadId'),

	472 ('Location', u'Location')]

	473 headers = {'x-amz-part-size': str(part_size)}

	474 if description:

	475 headers['x-amz-archive-description'] = description

	476 uri = 'vaults/%s/multipart-uploads' % vault_name

	477 response = self.make_request('POST', uri, headers=headers,

	478 ok_responses=(201,),

	479 response_headers=response_headers)

	480 return response

	481

	482 def complete_multipart_upload(self, vault_name, upload_id,

	483 sha256_treehash, archive_size):

	484 """

	485 Call this to inform Amazon Glacier that all of the archive parts

	486 have been uploaded and Amazon Glacier can now assemble the archive

	487 from the uploaded parts.

	488

	489 :type vault_name: str

	490 :param vault_name: The name of the vault.

	491

	492 :type upload_id: str

	493 :param upload_id: The unique ID associated with this upload

	494 operation.

	495

	496 :type sha256_treehash: str

	497 :param sha256_treehash: The SHA256 tree hash of the entire

	498 archive. It is the tree hash of SHA256 tree hash of the

	499 individual parts. If the value you specify in the request

	500 does not match the SHA256 tree hash of the final assembled

	501 archive as computed by Amazon Glacier, Amazon Glacier

	502 returns an error and the request fails.

	503

	504 :type archive_size: int

	505 :param archive_size: The total size, in bytes, of the entire

	506 archive. This value should be the sum of all the sizes of

	507 the individual parts that you uploaded.

	508 """

	509 response_headers = [('x-amz-archive-id', u'ArchiveId'),

	510 ('Location', u'Location')]

	511 headers = {'x-amz-sha256-tree-hash': sha256_treehash,

	512 'x-amz-archive-size': str(archive_size)}

	513 uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)

	514 response = self.make_request('POST', uri, headers=headers,

	515 ok_responses=(201,),

	516 response_headers=response_headers)

	517 return response

	518

	519 def abort_multipart_upload(self, vault_name, upload_id):

	520 """

	521 Call this to abort a multipart upload identified by the upload ID.

	522

	523 :type vault_name: str

	524 :param vault_name: The name of the vault.

	525

	526 :type upload_id: str

	527 :param upload_id: The unique ID associated with this upload

	528 operation.

	529 """

	530 uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)

	531 return self.make_request('DELETE', uri, ok_responses=(204,))

	532

	533 def list_multipart_uploads(self, vault_name, limit=None, marker=None):

	534 """

	535 Lists in-progress multipart uploads for the specified vault.

	536

	537 :type vault_name: str

	538 :param vault_name: The name of the vault.

	539

	540 :type limit: int

	541 :param limit: The maximum number of items returned in the

	542 response. If you don't specify a value, the operation

	543 returns up to 1,000 items.

	544

	545 :type marker: str

	546 :param marker: An opaque string used for pagination. marker

	547 specifies the item at which the listing should

	548 begin. Get the marker value from a previous

	549 response. You need only include the marker if you are

	550 continuing the pagination of results started in a previous

	551 request.

	552 """

	553 params = {}

	554 if limit:

	555 params['limit'] = limit

	556 if marker:

	557 params['marker'] = marker

	558 uri = 'vaults/%s/multipart-uploads' % vault_name

	559 return self.make_request('GET', uri, params=params)

	560

	561 def list_parts(self, vault_name, upload_id, limit=None, marker=None):

	562 """

	563 Lists in-progress multipart uploads for the specified vault.

	564

	565 :type vault_name: str

	566 :param vault_name: The name of the vault.

	567

	568 :type upload_id: str

	569 :param upload_id: The unique ID associated with this upload

	570 operation.

	571

	572 :type limit: int

	573 :param limit: The maximum number of items returned in the

	574 response. If you don't specify a value, the operation

	575 returns up to 1,000 items.

	576

	577 :type marker: str

	578 :param marker: An opaque string used for pagination. marker

	579 specifies the item at which the listing should

	580 begin. Get the marker value from a previous

	581 response. You need only include the marker if you are

	582 continuing the pagination of results started in a previous

	583 request.

	584 """

	585 params = {}

	586 if limit:

	587 params['limit'] = limit

	588 if marker:

	589 params['marker'] = marker

	590 uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)

	591 return self.make_request('GET', uri, params=params)

	592

	593 def upload_part(self, vault_name, upload_id, linear_hash,

	594 tree_hash, byte_range, part_data):

	595 """

	596 Lists in-progress multipart uploads for the specified vault.

	597

	598 :type vault_name: str

	599 :param vault_name: The name of the vault.

	600

	601 :type linear_hash: str

	602 :param linear_hash: The SHA256 checksum (a linear hash) of the

	603 payload.

	604

	605 :type tree_hash: str

	606 :param tree_hash: The user-computed SHA256 tree hash of the

	607 payload. For more information on computing the

	608 tree hash, see http://goo.gl/u7chF.

	609

	610 :type upload_id: str

	611 :param upload_id: The unique ID associated with this upload

	612 operation.

	613

	614 :type byte_range: tuple of ints

	615 :param byte_range: Identfies the range of bytes in the assembled

	616 archive that will be uploaded in this part.

	617

	618 :type part_data: bytes

	619 :param part_data: The data to be uploaded for the part

	620 """

	621 headers = {'x-amz-content-sha256': linear_hash,

	622 'x-amz-sha256-tree-hash': tree_hash,

	623 'Content-Range': 'bytes %d-%d/*' % byte_range}

	624 response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')]

	625 uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)

	626 return self.make_request('PUT', uri, headers=headers,

	627 data=part_data, ok_responses=(204,),

	628 response_headers=response_headers)

OLD	NEW

« download_from_google_storage.py ('K') | « third_party/gsutil/boto/glacier/job.py ('k') | third_party/gsutil/boto/glacier/layer2.py » ('j') | upload_to_google_storage.py » ('J')