Index: third_party/gsutil/boto/bin/s3put |
diff --git a/third_party/gsutil/boto/bin/s3put b/third_party/gsutil/boto/bin/s3put |
new file mode 100755 |
index 0000000000000000000000000000000000000000..01d9fcb195c66a0fc28fc926088167b88d99f258 |
--- /dev/null |
+++ b/third_party/gsutil/boto/bin/s3put |
@@ -0,0 +1,374 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ |
+# |
+# Permission is hereby granted, free of charge, to any person obtaining a |
+# copy of this software and associated documentation files (the |
+# "Software"), to deal in the Software without restriction, including |
+# without limitation the rights to use, copy, modify, merge, publish, dis- |
+# tribute, sublicense, and/or sell copies of the Software, and to permit |
+# persons to whom the Software is furnished to do so, subject to the fol- |
+# lowing conditions: |
+# |
+# The above copyright notice and this permission notice shall be included |
+# in all copies or substantial portions of the Software. |
+# |
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
+# IN THE SOFTWARE. |
+# |
+import getopt |
+import sys |
+import os |
+import boto |
+ |
+try: |
+ # multipart portions copyright Fabian Topfstedt |
+ # https://gist.github.com/924094 |
+ |
+ import math |
+ import mimetypes |
+ from multiprocessing import Pool |
+ from boto.s3.connection import S3Connection |
+ from filechunkio import FileChunkIO |
+ multipart_capable = True |
+ usage_flag_multipart_capable = """ [--multipart]""" |
+ usage_string_multipart_capable = """ |
+ multipart - Upload files as multiple parts. This needs filechunkio.""" |
+except ImportError as err: |
+ multipart_capable = False |
+ usage_flag_multipart_capable = "" |
+ usage_string_multipart_capable = '\n\n "' + \ |
+ err.message[len('No module named '):] + \ |
+ '" is missing for multipart support ' |
+ |
+ |
+usage_string = """ |
+SYNOPSIS |
+ s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] |
+ -b/--bucket <bucket_name> [-c/--callback <num_cb>] |
+ [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] |
+ [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>] |
+ [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] |
+ [--header] [--host <s3_host>]""" + usage_flag_multipart_capable + """ path [path...] |
+ |
+ Where |
+ access_key - Your AWS Access Key ID. If not supplied, boto will |
+ use the value of the environment variable |
+ AWS_ACCESS_KEY_ID |
+ secret_key - Your AWS Secret Access Key. If not supplied, boto |
+ will use the value of the environment variable |
+ AWS_SECRET_ACCESS_KEY |
+ bucket_name - The name of the S3 bucket the file(s) should be |
+ copied to. |
+ path - A path to a directory or file that represents the items |
+ to be uploaded. If the path points to an individual file, |
+ that file will be uploaded to the specified bucket. If the |
+ path points to a directory, it will recursively traverse |
+ the directory and upload all files to the specified bucket. |
+ debug_level - 0 means no debug output (default), 1 means normal |
+ debug output from boto, and 2 means boto debug output |
+ plus request/response output from httplib |
+ ignore_dirs - a comma-separated list of directory names that will |
+ be ignored and not uploaded to S3. |
+ num_cb - The number of progress callbacks to display. The default |
+ is zero which means no callbacks. If you supplied a value |
+ of "-c 10" for example, the progress callback would be |
+ called 10 times for each file transferred. |
+ prefix - A file path prefix that will be stripped from the full |
+ path of the file when determining the key name in S3. |
+ For example, if the full path of a file is: |
+ /home/foo/bar/fie.baz |
+ and the prefix is specified as "-p /home/foo/" the |
+ resulting key name in S3 will be: |
+ /bar/fie.baz |
+ The prefix must end in a trailing separator and if it |
+ does not then one will be added. |
+ key_prefix - A prefix to be added to the S3 key name, after any |
+ stripping of the file path is done based on the |
+ "-p/--prefix" option. |
+ reduced - Use Reduced Redundancy storage |
+ grant - A canned ACL policy that will be granted on each file |
+ transferred to S3. The value of provided must be one |
+ of the "canned" ACL policies supported by S3: |
+ private|public-read|public-read-write|authenticated-read |
+ no_overwrite - No files will be overwritten on S3, if the file/key |
+ exists on s3 it will be kept. This is useful for |
+ resuming interrupted transfers. Note this is not a |
+ sync, even if the file has been updated locally if |
+ the key exists on s3 the file on s3 will not be |
+ updated. |
+ header - key=value pairs of extra header(s) to pass along in the |
+ request |
+ host - Hostname override, for using an endpoint other then AWS S3 |
+""" + usage_string_multipart_capable + """ |
+ |
+ |
+ If the -n option is provided, no files will be transferred to S3 but |
+ informational messages will be printed about what would happen. |
+""" |
+ |
+ |
+def usage(): |
+ print usage_string |
+ sys.exit() |
+ |
+ |
+def submit_cb(bytes_so_far, total_bytes): |
+ print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) |
+ |
+ |
+def get_key_name(fullpath, prefix, key_prefix): |
+ if fullpath.startswith(prefix): |
+ key_name = fullpath[len(prefix):] |
+ else: |
+ key_name = fullpath |
+ l = key_name.split(os.sep) |
+ return key_prefix + '/'.join(l) |
+ |
+ |
+def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, |
+ source_path, offset, bytes, debug, cb, num_cb, |
+ amount_of_retries=10): |
+ """ |
+ Uploads a part with retries. |
+ """ |
+ if debug == 1: |
+ print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes) |
+ |
+ def _upload(retries_left=amount_of_retries): |
+ try: |
+ if debug == 1: |
+ print 'Start uploading part #%d ...' % part_num |
+ conn = S3Connection(aws_key, aws_secret) |
+ conn.debug = debug |
+ bucket = conn.get_bucket(bucketname) |
+ for mp in bucket.get_all_multipart_uploads(): |
+ if mp.id == multipart_id: |
+ with FileChunkIO(source_path, 'r', offset=offset, |
+ bytes=bytes) as fp: |
+ mp.upload_part_from_file(fp=fp, part_num=part_num, |
+ cb=cb, num_cb=num_cb) |
+ break |
+ except Exception, exc: |
+ if retries_left: |
+ _upload(retries_left=retries_left - 1) |
+ else: |
+ print 'Failed uploading part #%d' % part_num |
+ raise exc |
+ else: |
+ if debug == 1: |
+ print '... Uploaded part #%d' % part_num |
+ |
+ _upload() |
+ |
+ |
+def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname, |
+ reduced, debug, cb, num_cb, acl='private', headers={}, |
+ guess_mimetype=True, parallel_processes=4): |
+ """ |
+ Parallel multipart upload. |
+ """ |
+ conn = S3Connection(aws_key, aws_secret) |
+ conn.debug = debug |
+ bucket = conn.get_bucket(bucketname) |
+ |
+ if guess_mimetype: |
+ mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' |
+ headers.update({'Content-Type': mtype}) |
+ |
+ mp = bucket.initiate_multipart_upload(keyname, headers=headers, |
+ reduced_redundancy=reduced) |
+ |
+ source_size = os.stat(source_path).st_size |
+ bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), |
+ 5242880) |
+ chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) |
+ |
+ pool = Pool(processes=parallel_processes) |
+ for i in range(chunk_amount): |
+ offset = i * bytes_per_chunk |
+ remaining_bytes = source_size - offset |
+ bytes = min([bytes_per_chunk, remaining_bytes]) |
+ part_num = i + 1 |
+ pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, |
+ part_num, source_path, offset, bytes, |
+ debug, cb, num_cb]) |
+ pool.close() |
+ pool.join() |
+ |
+ if len(mp.get_all_parts()) == chunk_amount: |
+ mp.complete_upload() |
+ key = bucket.get_key(keyname) |
+ key.set_acl(acl) |
+ else: |
+ mp.cancel_upload() |
+ |
+ |
+def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs): |
+ """ |
+ Single upload. |
+ """ |
+ k = bucket.new_key(key_name) |
+ k.set_contents_from_filename(fullpath, *kargs, **kwargs) |
+ |
+ |
+def expand_path(path): |
+ path = os.path.expanduser(path) |
+ path = os.path.expandvars(path) |
+ return os.path.abspath(path) |
+ |
+ |
+def main(): |
+ |
+ # default values |
+ aws_access_key_id = None |
+ aws_secret_access_key = None |
+ bucket_name = '' |
+ ignore_dirs = [] |
+ debug = 0 |
+ cb = None |
+ num_cb = 0 |
+ quiet = False |
+ no_op = False |
+ prefix = '/' |
+ key_prefix = '' |
+ grant = None |
+ no_overwrite = False |
+ reduced = False |
+ headers = {} |
+ host = None |
+ multipart_requested = False |
+ |
+ try: |
+ opts, args = getopt.getopt( |
+ sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr', |
+ ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=', |
+ 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet', |
+ 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart', |
+ 'host=']) |
+ except: |
+ usage() |
+ |
+ # parse opts |
+ for o, a in opts: |
+ if o in ('-h', '--help'): |
+ usage() |
+ if o in ('-a', '--access_key'): |
+ aws_access_key_id = a |
+ if o in ('-b', '--bucket'): |
+ bucket_name = a |
+ if o in ('-c', '--callback'): |
+ num_cb = int(a) |
+ cb = submit_cb |
+ if o in ('-d', '--debug'): |
+ debug = int(a) |
+ if o in ('-g', '--grant'): |
+ grant = a |
+ if o in ('-i', '--ignore'): |
+ ignore_dirs = a.split(',') |
+ if o in ('-n', '--no_op'): |
+ no_op = True |
+ if o in ('-w', '--no_overwrite'): |
+ no_overwrite = True |
+ if o in ('-p', '--prefix'): |
+ prefix = a |
+ if prefix[-1] != os.sep: |
+ prefix = prefix + os.sep |
+ prefix = expand_path(prefix) |
+ if o in ('-k', '--key_prefix'): |
+ key_prefix = a |
+ if o in ('-q', '--quiet'): |
+ quiet = True |
+ if o in ('-s', '--secret_key'): |
+ aws_secret_access_key = a |
+ if o in ('-r', '--reduced'): |
+ reduced = True |
+ if o in ('--header'): |
+ (k, v) = a.split("=") |
+ headers[k] = v |
+ if o in ('--host'): |
+ host = a |
+ if o in ('--multipart'): |
+ if multipart_capable: |
+ multipart_requested = True |
+ else: |
+ print "multipart upload requested but not capable" |
+ sys.exit() |
+ |
+ if len(args) < 1: |
+ usage() |
+ |
+ if not bucket_name: |
+ print "bucket name is required!" |
+ usage() |
+ |
+ if host: |
+ c = boto.connect_s3(host=host, aws_access_key_id=aws_access_key_id, |
+ aws_secret_access_key=aws_secret_access_key) |
+ else: |
+ c = boto.connect_s3(aws_access_key_id=aws_access_key_id, |
+ aws_secret_access_key=aws_secret_access_key) |
+ c.debug = debug |
+ b = c.get_bucket(bucket_name) |
+ existing_keys_to_check_against = [] |
+ files_to_check_for_upload = [] |
+ |
+ for path in args: |
+ path = expand_path(path) |
+ # upload a directory of files recursively |
+ if os.path.isdir(path): |
+ if no_overwrite: |
+ if not quiet: |
+ print 'Getting list of existing keys to check against' |
+ for key in b.list(get_key_name(path, prefix, key_prefix)): |
+ existing_keys_to_check_against.append(key.name) |
+ for root, dirs, files in os.walk(path): |
+ for ignore in ignore_dirs: |
+ if ignore in dirs: |
+ dirs.remove(ignore) |
+ for path in files: |
+ if path.startswith("."): |
+ continue |
+ files_to_check_for_upload.append(os.path.join(root, path)) |
+ |
+ # upload a single file |
+ elif os.path.isfile(path): |
+ fullpath = os.path.abspath(path) |
+ key_name = get_key_name(fullpath, prefix, key_prefix) |
+ files_to_check_for_upload.append(fullpath) |
+ existing_keys_to_check_against.append(key_name) |
+ |
+ # we are trying to upload something unknown |
+ else: |
+ print "I don't know what %s is, so i can't upload it" % path |
+ |
+ for fullpath in files_to_check_for_upload: |
+ key_name = get_key_name(fullpath, prefix, key_prefix) |
+ |
+ if no_overwrite and key_name in existing_keys_to_check_against: |
+ if not quiet: |
+ print 'Skipping %s as it exists in s3' % fullpath |
+ continue |
+ |
+ if not quiet: |
+ print 'Copying %s to %s/%s' % (fullpath, bucket_name, key_name) |
+ |
+ if not no_op: |
+ # 0-byte files don't work and also don't need multipart upload |
+ if os.stat(fullpath).st_size != 0 and multipart_capable and \ |
+ multipart_requested: |
+ multipart_upload(bucket_name, aws_access_key_id, |
+ aws_secret_access_key, fullpath, key_name, |
+ reduced, debug, cb, num_cb, |
+ grant or 'private', headers) |
+ else: |
+ singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb, |
+ policy=grant, reduced_redundancy=reduced, |
+ headers=headers) |
+ |
+if __name__ == "__main__": |
+ main() |