OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ |
| 3 # |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a |
| 5 # copy of this software and associated documentation files (the |
| 6 # "Software"), to deal in the Software without restriction, including |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- |
| 10 # lowing conditions: |
| 11 # |
| 12 # The above copyright notice and this permission notice shall be included |
| 13 # in all copies or substantial portions of the Software. |
| 14 # |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 # IN THE SOFTWARE. |
| 22 # |
| 23 import getopt |
| 24 import sys |
| 25 import os |
| 26 import boto |
| 27 |
| 28 try: |
| 29 # multipart portions copyright Fabian Topfstedt |
| 30 # https://gist.github.com/924094 |
| 31 |
| 32 import math |
| 33 import mimetypes |
| 34 from multiprocessing import Pool |
| 35 from boto.s3.connection import S3Connection |
| 36 from filechunkio import FileChunkIO |
| 37 multipart_capable = True |
| 38 usage_flag_multipart_capable = """ [--multipart]""" |
| 39 usage_string_multipart_capable = """ |
| 40 multipart - Upload files as multiple parts. This needs filechunkio.""" |
| 41 except ImportError as err: |
| 42 multipart_capable = False |
| 43 usage_flag_multipart_capable = "" |
| 44 usage_string_multipart_capable = '\n\n "' + \ |
| 45 err.message[len('No module named '):] + \ |
| 46 '" is missing for multipart support ' |
| 47 |
| 48 |
| 49 usage_string = """ |
| 50 SYNOPSIS |
| 51 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] |
| 52 -b/--bucket <bucket_name> [-c/--callback <num_cb>] |
| 53 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] |
| 54 [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>] |
| 55 [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] |
| 56 [--header] [--host <s3_host>]""" + usage_flag_multipart_capable + """
path [path...] |
| 57 |
| 58 Where |
| 59 access_key - Your AWS Access Key ID. If not supplied, boto will |
| 60 use the value of the environment variable |
| 61 AWS_ACCESS_KEY_ID |
| 62 secret_key - Your AWS Secret Access Key. If not supplied, boto |
| 63 will use the value of the environment variable |
| 64 AWS_SECRET_ACCESS_KEY |
| 65 bucket_name - The name of the S3 bucket the file(s) should be |
| 66 copied to. |
| 67 path - A path to a directory or file that represents the items |
| 68 to be uploaded. If the path points to an individual file, |
| 69 that file will be uploaded to the specified bucket. If the |
| 70 path points to a directory, it will recursively traverse |
| 71 the directory and upload all files to the specified bucket. |
| 72 debug_level - 0 means no debug output (default), 1 means normal |
| 73 debug output from boto, and 2 means boto debug output |
| 74 plus request/response output from httplib |
| 75 ignore_dirs - a comma-separated list of directory names that will |
| 76 be ignored and not uploaded to S3. |
| 77 num_cb - The number of progress callbacks to display. The default |
| 78 is zero which means no callbacks. If you supplied a value |
| 79 of "-c 10" for example, the progress callback would be |
| 80 called 10 times for each file transferred. |
| 81 prefix - A file path prefix that will be stripped from the full |
| 82 path of the file when determining the key name in S3. |
| 83 For example, if the full path of a file is: |
| 84 /home/foo/bar/fie.baz |
| 85 and the prefix is specified as "-p /home/foo/" the |
| 86 resulting key name in S3 will be: |
| 87 /bar/fie.baz |
| 88 The prefix must end in a trailing separator and if it |
| 89 does not then one will be added. |
| 90 key_prefix - A prefix to be added to the S3 key name, after any |
| 91 stripping of the file path is done based on the |
| 92 "-p/--prefix" option. |
| 93 reduced - Use Reduced Redundancy storage |
| 94 grant - A canned ACL policy that will be granted on each file |
| 95 transferred to S3. The value of provided must be one |
| 96 of the "canned" ACL policies supported by S3: |
| 97 private|public-read|public-read-write|authenticated-read |
| 98 no_overwrite - No files will be overwritten on S3, if the file/key |
| 99 exists on s3 it will be kept. This is useful for |
| 100 resuming interrupted transfers. Note this is not a |
| 101 sync, even if the file has been updated locally if |
| 102 the key exists on s3 the file on s3 will not be |
| 103 updated. |
| 104 header - key=value pairs of extra header(s) to pass along in the |
| 105 request |
| 106 host - Hostname override, for using an endpoint other then AWS S3 |
| 107 """ + usage_string_multipart_capable + """ |
| 108 |
| 109 |
| 110 If the -n option is provided, no files will be transferred to S3 but |
| 111 informational messages will be printed about what would happen. |
| 112 """ |
| 113 |
| 114 |
| 115 def usage(): |
| 116 print usage_string |
| 117 sys.exit() |
| 118 |
| 119 |
| 120 def submit_cb(bytes_so_far, total_bytes): |
| 121 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) |
| 122 |
| 123 |
| 124 def get_key_name(fullpath, prefix, key_prefix): |
| 125 if fullpath.startswith(prefix): |
| 126 key_name = fullpath[len(prefix):] |
| 127 else: |
| 128 key_name = fullpath |
| 129 l = key_name.split(os.sep) |
| 130 return key_prefix + '/'.join(l) |
| 131 |
| 132 |
| 133 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, |
| 134 source_path, offset, bytes, debug, cb, num_cb, |
| 135 amount_of_retries=10): |
| 136 """ |
| 137 Uploads a part with retries. |
| 138 """ |
| 139 if debug == 1: |
| 140 print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes) |
| 141 |
| 142 def _upload(retries_left=amount_of_retries): |
| 143 try: |
| 144 if debug == 1: |
| 145 print 'Start uploading part #%d ...' % part_num |
| 146 conn = S3Connection(aws_key, aws_secret) |
| 147 conn.debug = debug |
| 148 bucket = conn.get_bucket(bucketname) |
| 149 for mp in bucket.get_all_multipart_uploads(): |
| 150 if mp.id == multipart_id: |
| 151 with FileChunkIO(source_path, 'r', offset=offset, |
| 152 bytes=bytes) as fp: |
| 153 mp.upload_part_from_file(fp=fp, part_num=part_num, |
| 154 cb=cb, num_cb=num_cb) |
| 155 break |
| 156 except Exception, exc: |
| 157 if retries_left: |
| 158 _upload(retries_left=retries_left - 1) |
| 159 else: |
| 160 print 'Failed uploading part #%d' % part_num |
| 161 raise exc |
| 162 else: |
| 163 if debug == 1: |
| 164 print '... Uploaded part #%d' % part_num |
| 165 |
| 166 _upload() |
| 167 |
| 168 |
| 169 def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname, |
| 170 reduced, debug, cb, num_cb, acl='private', headers={}, |
| 171 guess_mimetype=True, parallel_processes=4): |
| 172 """ |
| 173 Parallel multipart upload. |
| 174 """ |
| 175 conn = S3Connection(aws_key, aws_secret) |
| 176 conn.debug = debug |
| 177 bucket = conn.get_bucket(bucketname) |
| 178 |
| 179 if guess_mimetype: |
| 180 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' |
| 181 headers.update({'Content-Type': mtype}) |
| 182 |
| 183 mp = bucket.initiate_multipart_upload(keyname, headers=headers, |
| 184 reduced_redundancy=reduced) |
| 185 |
| 186 source_size = os.stat(source_path).st_size |
| 187 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), |
| 188 5242880) |
| 189 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) |
| 190 |
| 191 pool = Pool(processes=parallel_processes) |
| 192 for i in range(chunk_amount): |
| 193 offset = i * bytes_per_chunk |
| 194 remaining_bytes = source_size - offset |
| 195 bytes = min([bytes_per_chunk, remaining_bytes]) |
| 196 part_num = i + 1 |
| 197 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, |
| 198 part_num, source_path, offset, bytes, |
| 199 debug, cb, num_cb]) |
| 200 pool.close() |
| 201 pool.join() |
| 202 |
| 203 if len(mp.get_all_parts()) == chunk_amount: |
| 204 mp.complete_upload() |
| 205 key = bucket.get_key(keyname) |
| 206 key.set_acl(acl) |
| 207 else: |
| 208 mp.cancel_upload() |
| 209 |
| 210 |
| 211 def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs): |
| 212 """ |
| 213 Single upload. |
| 214 """ |
| 215 k = bucket.new_key(key_name) |
| 216 k.set_contents_from_filename(fullpath, *kargs, **kwargs) |
| 217 |
| 218 |
| 219 def expand_path(path): |
| 220 path = os.path.expanduser(path) |
| 221 path = os.path.expandvars(path) |
| 222 return os.path.abspath(path) |
| 223 |
| 224 |
| 225 def main(): |
| 226 |
| 227 # default values |
| 228 aws_access_key_id = None |
| 229 aws_secret_access_key = None |
| 230 bucket_name = '' |
| 231 ignore_dirs = [] |
| 232 debug = 0 |
| 233 cb = None |
| 234 num_cb = 0 |
| 235 quiet = False |
| 236 no_op = False |
| 237 prefix = '/' |
| 238 key_prefix = '' |
| 239 grant = None |
| 240 no_overwrite = False |
| 241 reduced = False |
| 242 headers = {} |
| 243 host = None |
| 244 multipart_requested = False |
| 245 |
| 246 try: |
| 247 opts, args = getopt.getopt( |
| 248 sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr', |
| 249 ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=', |
| 250 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet', |
| 251 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart', |
| 252 'host=']) |
| 253 except: |
| 254 usage() |
| 255 |
| 256 # parse opts |
| 257 for o, a in opts: |
| 258 if o in ('-h', '--help'): |
| 259 usage() |
| 260 if o in ('-a', '--access_key'): |
| 261 aws_access_key_id = a |
| 262 if o in ('-b', '--bucket'): |
| 263 bucket_name = a |
| 264 if o in ('-c', '--callback'): |
| 265 num_cb = int(a) |
| 266 cb = submit_cb |
| 267 if o in ('-d', '--debug'): |
| 268 debug = int(a) |
| 269 if o in ('-g', '--grant'): |
| 270 grant = a |
| 271 if o in ('-i', '--ignore'): |
| 272 ignore_dirs = a.split(',') |
| 273 if o in ('-n', '--no_op'): |
| 274 no_op = True |
| 275 if o in ('-w', '--no_overwrite'): |
| 276 no_overwrite = True |
| 277 if o in ('-p', '--prefix'): |
| 278 prefix = a |
| 279 if prefix[-1] != os.sep: |
| 280 prefix = prefix + os.sep |
| 281 prefix = expand_path(prefix) |
| 282 if o in ('-k', '--key_prefix'): |
| 283 key_prefix = a |
| 284 if o in ('-q', '--quiet'): |
| 285 quiet = True |
| 286 if o in ('-s', '--secret_key'): |
| 287 aws_secret_access_key = a |
| 288 if o in ('-r', '--reduced'): |
| 289 reduced = True |
| 290 if o in ('--header'): |
| 291 (k, v) = a.split("=") |
| 292 headers[k] = v |
| 293 if o in ('--host'): |
| 294 host = a |
| 295 if o in ('--multipart'): |
| 296 if multipart_capable: |
| 297 multipart_requested = True |
| 298 else: |
| 299 print "multipart upload requested but not capable" |
| 300 sys.exit() |
| 301 |
| 302 if len(args) < 1: |
| 303 usage() |
| 304 |
| 305 if not bucket_name: |
| 306 print "bucket name is required!" |
| 307 usage() |
| 308 |
| 309 if host: |
| 310 c = boto.connect_s3(host=host, aws_access_key_id=aws_access_key_id, |
| 311 aws_secret_access_key=aws_secret_access_key) |
| 312 else: |
| 313 c = boto.connect_s3(aws_access_key_id=aws_access_key_id, |
| 314 aws_secret_access_key=aws_secret_access_key) |
| 315 c.debug = debug |
| 316 b = c.get_bucket(bucket_name) |
| 317 existing_keys_to_check_against = [] |
| 318 files_to_check_for_upload = [] |
| 319 |
| 320 for path in args: |
| 321 path = expand_path(path) |
| 322 # upload a directory of files recursively |
| 323 if os.path.isdir(path): |
| 324 if no_overwrite: |
| 325 if not quiet: |
| 326 print 'Getting list of existing keys to check against' |
| 327 for key in b.list(get_key_name(path, prefix, key_prefix)): |
| 328 existing_keys_to_check_against.append(key.name) |
| 329 for root, dirs, files in os.walk(path): |
| 330 for ignore in ignore_dirs: |
| 331 if ignore in dirs: |
| 332 dirs.remove(ignore) |
| 333 for path in files: |
| 334 if path.startswith("."): |
| 335 continue |
| 336 files_to_check_for_upload.append(os.path.join(root, path)) |
| 337 |
| 338 # upload a single file |
| 339 elif os.path.isfile(path): |
| 340 fullpath = os.path.abspath(path) |
| 341 key_name = get_key_name(fullpath, prefix, key_prefix) |
| 342 files_to_check_for_upload.append(fullpath) |
| 343 existing_keys_to_check_against.append(key_name) |
| 344 |
| 345 # we are trying to upload something unknown |
| 346 else: |
| 347 print "I don't know what %s is, so i can't upload it" % path |
| 348 |
| 349 for fullpath in files_to_check_for_upload: |
| 350 key_name = get_key_name(fullpath, prefix, key_prefix) |
| 351 |
| 352 if no_overwrite and key_name in existing_keys_to_check_against: |
| 353 if not quiet: |
| 354 print 'Skipping %s as it exists in s3' % fullpath |
| 355 continue |
| 356 |
| 357 if not quiet: |
| 358 print 'Copying %s to %s/%s' % (fullpath, bucket_name, key_name) |
| 359 |
| 360 if not no_op: |
| 361 # 0-byte files don't work and also don't need multipart upload |
| 362 if os.stat(fullpath).st_size != 0 and multipart_capable and \ |
| 363 multipart_requested: |
| 364 multipart_upload(bucket_name, aws_access_key_id, |
| 365 aws_secret_access_key, fullpath, key_name, |
| 366 reduced, debug, cb, num_cb, |
| 367 grant or 'private', headers) |
| 368 else: |
| 369 singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb, |
| 370 policy=grant, reduced_redundancy=reduced, |
| 371 headers=headers) |
| 372 |
| 373 if __name__ == "__main__": |
| 374 main() |
OLD | NEW |