| OLD | NEW |
| (Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ |
| 3 # Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk> |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a |
| 6 # copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- |
| 11 # lowing conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be included |
| 14 # in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 # IN THE SOFTWARE. |
| 23 # |
| 24 from __future__ import with_statement |
| 25 from .exceptions import UploadArchiveError |
| 26 from .job import Job |
| 27 from .writer import compute_hashes_from_fileobj, resume_file_upload, Writer |
| 28 from .concurrent import ConcurrentUploader |
| 29 from .utils import minimum_part_size, DEFAULT_PART_SIZE |
| 30 import os.path |
| 31 |
| 32 |
| 33 _MEGABYTE = 1024 * 1024 |
| 34 _GIGABYTE = 1024 * _MEGABYTE |
| 35 |
| 36 MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE |
| 37 MAXIMUM_NUMBER_OF_PARTS = 10000 |
| 38 |
| 39 |
| 40 class Vault(object): |
| 41 |
| 42 DefaultPartSize = DEFAULT_PART_SIZE |
| 43 SingleOperationThreshold = 100 * _MEGABYTE |
| 44 |
| 45 ResponseDataElements = (('VaultName', 'name', None), |
| 46 ('VaultARN', 'arn', None), |
| 47 ('CreationDate', 'creation_date', None), |
| 48 ('LastInventoryDate', 'last_inventory_date', None), |
| 49 ('SizeInBytes', 'size', 0), |
| 50 ('NumberOfArchives', 'number_of_archives', 0)) |
| 51 |
| 52 def __init__(self, layer1, response_data=None): |
| 53 self.layer1 = layer1 |
| 54 if response_data: |
| 55 for response_name, attr_name, default in self.ResponseDataElements: |
| 56 value = response_data[response_name] |
| 57 if isinstance(value, unicode): |
| 58 value = value.encode('utf8') |
| 59 setattr(self, attr_name, value) |
| 60 else: |
| 61 for response_name, attr_name, default in self.ResponseDataElements: |
| 62 setattr(self, attr_name, default) |
| 63 |
| 64 def __repr__(self): |
| 65 return 'Vault("%s")' % self.arn |
| 66 |
| 67 def delete(self): |
| 68 """ |
| 69 Delete's this vault. WARNING! |
| 70 """ |
| 71 self.layer1.delete_vault(self.name) |
| 72 |
| 73 def upload_archive(self, filename, description=None): |
| 74 """ |
| 75 Adds an archive to a vault. For archives greater than 100MB the |
| 76 multipart upload will be used. |
| 77 |
| 78 :type file: str |
| 79 :param file: A filename to upload |
| 80 |
| 81 :type description: str |
| 82 :param description: An optional description for the archive. |
| 83 |
| 84 :rtype: str |
| 85 :return: The archive id of the newly created archive |
| 86 """ |
| 87 if os.path.getsize(filename) > self.SingleOperationThreshold: |
| 88 return self.create_archive_from_file(filename, description=descripti
on) |
| 89 return self._upload_archive_single_operation(filename, description) |
| 90 |
| 91 def _upload_archive_single_operation(self, filename, description): |
| 92 """ |
| 93 Adds an archive to a vault in a single operation. It's recommended for |
| 94 archives less than 100MB |
| 95 |
| 96 :type file: str |
| 97 :param file: A filename to upload |
| 98 |
| 99 :type description: str |
| 100 :param description: A description for the archive. |
| 101 |
| 102 :rtype: str |
| 103 :return: The archive id of the newly created archive |
| 104 """ |
| 105 with open(filename, 'rb') as fileobj: |
| 106 linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) |
| 107 fileobj.seek(0) |
| 108 response = self.layer1.upload_archive(self.name, fileobj, |
| 109 linear_hash, tree_hash, |
| 110 description) |
| 111 return response['ArchiveId'] |
| 112 |
| 113 def create_archive_writer(self, part_size=DefaultPartSize, |
| 114 description=None): |
| 115 """ |
| 116 Create a new archive and begin a multi-part upload to it. |
| 117 Returns a file-like object to which the data for the archive |
| 118 can be written. Once all the data is written the file-like |
| 119 object should be closed, you can then call the get_archive_id |
| 120 method on it to get the ID of the created archive. |
| 121 |
| 122 :type part_size: int |
| 123 :param part_size: The part size for the multipart upload. |
| 124 |
| 125 :type description: str |
| 126 :param description: An optional description for the archive. |
| 127 |
| 128 :rtype: :class:`boto.glacier.writer.Writer` |
| 129 :return: A Writer object that to which the archive data |
| 130 should be written. |
| 131 """ |
| 132 response = self.layer1.initiate_multipart_upload(self.name, |
| 133 part_size, |
| 134 description) |
| 135 return Writer(self, response['UploadId'], part_size=part_size) |
| 136 |
| 137 def create_archive_from_file(self, filename=None, file_obj=None, |
| 138 description=None, upload_id_callback=None): |
| 139 """ |
| 140 Create a new archive and upload the data from the given file |
| 141 or file-like object. |
| 142 |
| 143 :type filename: str |
| 144 :param filename: A filename to upload |
| 145 |
| 146 :type file_obj: file |
| 147 :param file_obj: A file-like object to upload |
| 148 |
| 149 :type description: str |
| 150 :param description: An optional description for the archive. |
| 151 |
| 152 :type upload_id_callback: function |
| 153 :param upload_id_callback: if set, call with the upload_id as the |
| 154 only parameter when it becomes known, to enable future calls |
| 155 to resume_archive_from_file in case resume is needed. |
| 156 |
| 157 :rtype: str |
| 158 :return: The archive id of the newly created archive |
| 159 """ |
| 160 part_size = self.DefaultPartSize |
| 161 if not file_obj: |
| 162 file_size = os.path.getsize(filename) |
| 163 try: |
| 164 part_size = minimum_part_size(file_size) |
| 165 except ValueError: |
| 166 raise UploadArchiveError("File size of %s bytes exceeds " |
| 167 "40,000 GB archive limit of Glacier.") |
| 168 file_obj = open(filename, "rb") |
| 169 writer = self.create_archive_writer( |
| 170 description=description, |
| 171 part_size=part_size) |
| 172 if upload_id_callback: |
| 173 upload_id_callback(writer.upload_id) |
| 174 while True: |
| 175 data = file_obj.read(part_size) |
| 176 if not data: |
| 177 break |
| 178 writer.write(data) |
| 179 writer.close() |
| 180 return writer.get_archive_id() |
| 181 |
| 182 @staticmethod |
| 183 def _range_string_to_part_index(range_string, part_size): |
| 184 start, inside_end = [int(value) for value in range_string.split('-')] |
| 185 end = inside_end + 1 |
| 186 length = end - start |
| 187 if length == part_size + 1: |
| 188 # Off-by-one bug in Amazon's Glacier implementation, |
| 189 # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866 |
| 190 # Workaround: since part_size is too big by one byte, adjust it |
| 191 end -= 1 |
| 192 inside_end -= 1 |
| 193 length -= 1 |
| 194 assert not (start % part_size), ( |
| 195 "upload part start byte is not on a part boundary") |
| 196 assert (length <= part_size), "upload part is bigger than part size" |
| 197 return start // part_size |
| 198 |
| 199 def resume_archive_from_file(self, upload_id, filename=None, |
| 200 file_obj=None): |
| 201 """Resume upload of a file already part-uploaded to Glacier. |
| 202 |
| 203 The resumption of an upload where the part-uploaded section is empty |
| 204 is a valid degenerate case that this function can handle. |
| 205 |
| 206 One and only one of filename or file_obj must be specified. |
| 207 |
| 208 :type upload_id: str |
| 209 :param upload_id: existing Glacier upload id of upload being resumed. |
| 210 |
| 211 :type filename: str |
| 212 :param filename: file to open for resume |
| 213 |
| 214 :type fobj: file |
| 215 :param fobj: file-like object containing local data to resume. This |
| 216 must read from the start of the entire upload, not just from the |
| 217 point being resumed. Use fobj.seek(0) to achieve this if necessary. |
| 218 |
| 219 :rtype: str |
| 220 :return: The archive id of the newly created archive |
| 221 |
| 222 """ |
| 223 part_list_response = self.list_all_parts(upload_id) |
| 224 part_size = part_list_response['PartSizeInBytes'] |
| 225 |
| 226 part_hash_map = {} |
| 227 for part_desc in part_list_response['Parts']: |
| 228 part_index = self._range_string_to_part_index( |
| 229 part_desc['RangeInBytes'], part_size) |
| 230 part_tree_hash = part_desc['SHA256TreeHash'].decode('hex') |
| 231 part_hash_map[part_index] = part_tree_hash |
| 232 |
| 233 if not file_obj: |
| 234 file_obj = open(filename, "rb") |
| 235 |
| 236 return resume_file_upload( |
| 237 self, upload_id, part_size, file_obj, part_hash_map) |
| 238 |
| 239 def concurrent_create_archive_from_file(self, filename, description): |
| 240 """ |
| 241 Create a new archive from a file and upload the given |
| 242 file. |
| 243 |
| 244 This is a convenience method around the |
| 245 :class:`boto.glacier.concurrent.ConcurrentUploader` |
| 246 class. This method will perform a multipart upload |
| 247 and upload the parts of the file concurrently. |
| 248 |
| 249 :type filename: str |
| 250 :param filename: A filename to upload |
| 251 |
| 252 :raises: `boto.glacier.exception.UploadArchiveError` is an error |
| 253 occurs during the upload process. |
| 254 |
| 255 :rtype: str |
| 256 :return: The archive id of the newly created archive |
| 257 |
| 258 """ |
| 259 uploader = ConcurrentUploader(self.layer1, self.name) |
| 260 archive_id = uploader.upload(filename, description) |
| 261 return archive_id |
| 262 |
| 263 def retrieve_archive(self, archive_id, sns_topic=None, |
| 264 description=None): |
| 265 """ |
| 266 Initiate a archive retrieval job to download the data from an |
| 267 archive. You will need to wait for the notification from |
| 268 Amazon (via SNS) before you can actually download the data, |
| 269 this takes around 4 hours. |
| 270 |
| 271 :type archive_id: str |
| 272 :param archive_id: The id of the archive |
| 273 |
| 274 :type description: str |
| 275 :param description: An optional description for the job. |
| 276 |
| 277 :type sns_topic: str |
| 278 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier |
| 279 sends notification when the job is completed and the output |
| 280 is ready for you to download. |
| 281 |
| 282 :rtype: :class:`boto.glacier.job.Job` |
| 283 :return: A Job object representing the retrieval job. |
| 284 """ |
| 285 job_data = {'Type': 'archive-retrieval', |
| 286 'ArchiveId': archive_id} |
| 287 if sns_topic is not None: |
| 288 job_data['SNSTopic'] = sns_topic |
| 289 if description is not None: |
| 290 job_data['Description'] = description |
| 291 |
| 292 response = self.layer1.initiate_job(self.name, job_data) |
| 293 return self.get_job(response['JobId']) |
| 294 |
| 295 def retrieve_inventory(self, sns_topic=None, |
| 296 description=None): |
| 297 """ |
| 298 Initiate a inventory retrieval job to list the items in the |
| 299 vault. You will need to wait for the notification from |
| 300 Amazon (via SNS) before you can actually download the data, |
| 301 this takes around 4 hours. |
| 302 |
| 303 :type description: str |
| 304 :param description: An optional description for the job. |
| 305 |
| 306 :type sns_topic: str |
| 307 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier |
| 308 sends notification when the job is completed and the output |
| 309 is ready for you to download. |
| 310 |
| 311 :rtype: :class:`boto.glacier.job.Job` |
| 312 :return: A Job object representing the retrieval job. |
| 313 """ |
| 314 job_data = {'Type': 'inventory-retrieval'} |
| 315 if sns_topic is not None: |
| 316 job_data['SNSTopic'] = sns_topic |
| 317 if description is not None: |
| 318 job_data['Description'] = description |
| 319 |
| 320 response = self.layer1.initiate_job(self.name, job_data) |
| 321 return response['JobId'] |
| 322 |
| 323 def delete_archive(self, archive_id): |
| 324 """ |
| 325 This operation deletes an archive from the vault. |
| 326 |
| 327 :type archive_id: str |
| 328 :param archive_id: The ID for the archive to be deleted. |
| 329 """ |
| 330 return self.layer1.delete_archive(self.name, archive_id) |
| 331 |
| 332 def get_job(self, job_id): |
| 333 """ |
| 334 Get an object representing a job in progress. |
| 335 |
| 336 :type job_id: str |
| 337 :param job_id: The ID of the job |
| 338 |
| 339 :rtype: :class:`boto.glacier.job.Job` |
| 340 :return: A Job object representing the job. |
| 341 """ |
| 342 response_data = self.layer1.describe_job(self.name, job_id) |
| 343 return Job(self, response_data) |
| 344 |
| 345 def list_jobs(self, completed=None, status_code=None): |
| 346 """ |
| 347 Return a list of Job objects related to this vault. |
| 348 |
| 349 :type completed: boolean |
| 350 :param completed: Specifies the state of the jobs to return. |
| 351 If a value of True is passed, only completed jobs will |
| 352 be returned. If a value of False is passed, only |
| 353 uncompleted jobs will be returned. If no value is |
| 354 passed, all jobs will be returned. |
| 355 |
| 356 :type status_code: string |
| 357 :param status_code: Specifies the type of job status to return. |
| 358 Valid values are: InProgress|Succeeded|Failed. If not |
| 359 specified, jobs with all status codes are returned. |
| 360 |
| 361 :rtype: list of :class:`boto.glacier.job.Job` |
| 362 :return: A list of Job objects related to this vault. |
| 363 """ |
| 364 response_data = self.layer1.list_jobs(self.name, completed, |
| 365 status_code) |
| 366 return [Job(self, jd) for jd in response_data['JobList']] |
| 367 |
| 368 def list_all_parts(self, upload_id): |
| 369 """Automatically make and combine multiple calls to list_parts. |
| 370 |
| 371 Call list_parts as necessary, combining the results in case multiple |
| 372 calls were required to get data on all available parts. |
| 373 |
| 374 """ |
| 375 result = self.layer1.list_parts(self.name, upload_id) |
| 376 marker = result['Marker'] |
| 377 while marker: |
| 378 additional_result = self.layer1.list_parts( |
| 379 self.name, upload_id, marker=marker) |
| 380 result['Parts'].extend(additional_result['Parts']) |
| 381 marker = additional_result['Marker'] |
| 382 # The marker makes no sense in an unpaginated result, and clearing it |
| 383 # makes testing easier. This also has the nice property that the result |
| 384 # is a normal (but expanded) response. |
| 385 result['Marker'] = None |
| 386 return result |
| OLD | NEW |