| Index: third_party/gsutil/boto/bin/s3put | 
| diff --git a/third_party/gsutil/boto/bin/s3put b/third_party/gsutil/boto/bin/s3put | 
| new file mode 100755 | 
| index 0000000000000000000000000000000000000000..01d9fcb195c66a0fc28fc926088167b88d99f258 | 
| --- /dev/null | 
| +++ b/third_party/gsutil/boto/bin/s3put | 
| @@ -0,0 +1,374 @@ | 
| +#!/usr/bin/env python | 
| +# Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ | 
| +# | 
| +# Permission is hereby granted, free of charge, to any person obtaining a | 
| +# copy of this software and associated documentation files (the | 
| +# "Software"), to deal in the Software without restriction, including | 
| +# without limitation the rights to use, copy, modify, merge, publish, dis- | 
| +# tribute, sublicense, and/or sell copies of the Software, and to permit | 
| +# persons to whom the Software is furnished to do so, subject to the fol- | 
| +# lowing conditions: | 
| +# | 
| +# The above copyright notice and this permission notice shall be included | 
| +# in all copies or substantial portions of the Software. | 
| +# | 
| +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | 
| +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | 
| +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | 
| +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | 
| +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
| +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 
| +# IN THE SOFTWARE. | 
| +# | 
| +import getopt | 
| +import sys | 
| +import os | 
| +import boto | 
| + | 
| +try: | 
| +    # multipart portions copyright Fabian Topfstedt | 
| +    # https://gist.github.com/924094 | 
| + | 
| +    import math | 
| +    import mimetypes | 
| +    from multiprocessing import Pool | 
| +    from boto.s3.connection import S3Connection | 
| +    from filechunkio import FileChunkIO | 
| +    multipart_capable = True | 
| +    usage_flag_multipart_capable = """ [--multipart]""" | 
| +    usage_string_multipart_capable = """ | 
| +        multipart - Upload files as multiple parts. This needs filechunkio.""" | 
| +except ImportError as err: | 
| +    multipart_capable = False | 
| +    usage_flag_multipart_capable = "" | 
| +    usage_string_multipart_capable = '\n\n     "' + \ | 
| +        err.message[len('No module named '):] + \ | 
| +        '" is missing for multipart support ' | 
| + | 
| + | 
| +usage_string = """ | 
| +SYNOPSIS | 
| +    s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] | 
| +          -b/--bucket <bucket_name> [-c/--callback <num_cb>] | 
| +          [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] | 
| +          [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>] | 
| +          [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] | 
| +          [--header] [--host <s3_host>]""" + usage_flag_multipart_capable + """ path [path...] | 
| + | 
| +    Where | 
| +        access_key - Your AWS Access Key ID.  If not supplied, boto will | 
| +                     use the value of the environment variable | 
| +                     AWS_ACCESS_KEY_ID | 
| +        secret_key - Your AWS Secret Access Key.  If not supplied, boto | 
| +                     will use the value of the environment variable | 
| +                     AWS_SECRET_ACCESS_KEY | 
| +        bucket_name - The name of the S3 bucket the file(s) should be | 
| +                      copied to. | 
| +        path - A path to a directory or file that represents the items | 
| +               to be uploaded.  If the path points to an individual file, | 
| +               that file will be uploaded to the specified bucket.  If the | 
| +               path points to a directory, it will recursively traverse | 
| +               the directory and upload all files to the specified bucket. | 
| +        debug_level - 0 means no debug output (default), 1 means normal | 
| +                      debug output from boto, and 2 means boto debug output | 
| +                      plus request/response output from httplib | 
| +        ignore_dirs - a comma-separated list of directory names that will | 
| +                      be ignored and not uploaded to S3. | 
| +        num_cb - The number of progress callbacks to display.  The default | 
| +                 is zero which means no callbacks.  If you supplied a value | 
| +                 of "-c 10" for example, the progress callback would be | 
| +                 called 10 times for each file transferred. | 
| +        prefix - A file path prefix that will be stripped from the full | 
| +                 path of the file when determining the key name in S3. | 
| +                 For example, if the full path of a file is: | 
| +                     /home/foo/bar/fie.baz | 
| +                 and the prefix is specified as "-p /home/foo/" the | 
| +                 resulting key name in S3 will be: | 
| +                     /bar/fie.baz | 
| +                 The prefix must end in a trailing separator and if it | 
| +                 does not then one will be added. | 
| +        key_prefix - A prefix to be added to the S3 key name, after any | 
| +                     stripping of the file path is done based on the | 
| +                     "-p/--prefix" option. | 
| +        reduced - Use Reduced Redundancy storage | 
| +        grant - A canned ACL policy that will be granted on each file | 
| +                transferred to S3.  The value of provided must be one | 
| +                of the "canned" ACL policies supported by S3: | 
| +                private|public-read|public-read-write|authenticated-read | 
| +        no_overwrite - No files will be overwritten on S3, if the file/key | 
| +                       exists on s3 it will be kept. This is useful for | 
| +                       resuming interrupted transfers. Note this is not a | 
| +                       sync, even if the file has been updated locally if | 
| +                       the key exists on s3 the file on s3 will not be | 
| +                       updated. | 
| +        header - key=value pairs of extra header(s) to pass along in the | 
| +                 request | 
| +        host - Hostname override, for using an endpoint other then AWS S3 | 
| +""" + usage_string_multipart_capable + """ | 
| + | 
| + | 
| +     If the -n option is provided, no files will be transferred to S3 but | 
| +     informational messages will be printed about what would happen. | 
| +""" | 
| + | 
| + | 
| +def usage(): | 
| +    print usage_string | 
| +    sys.exit() | 
| + | 
| + | 
| +def submit_cb(bytes_so_far, total_bytes): | 
| +    print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) | 
| + | 
| + | 
| +def get_key_name(fullpath, prefix, key_prefix): | 
| +    if fullpath.startswith(prefix): | 
| +        key_name = fullpath[len(prefix):] | 
| +    else: | 
| +        key_name = fullpath | 
| +    l = key_name.split(os.sep) | 
| +    return key_prefix + '/'.join(l) | 
| + | 
| + | 
| +def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, | 
| +                 source_path, offset, bytes, debug, cb, num_cb, | 
| +                 amount_of_retries=10): | 
| +    """ | 
| +    Uploads a part with retries. | 
| +    """ | 
| +    if debug == 1: | 
| +        print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes) | 
| + | 
| +    def _upload(retries_left=amount_of_retries): | 
| +        try: | 
| +            if debug == 1: | 
| +                print 'Start uploading part #%d ...' % part_num | 
| +            conn = S3Connection(aws_key, aws_secret) | 
| +            conn.debug = debug | 
| +            bucket = conn.get_bucket(bucketname) | 
| +            for mp in bucket.get_all_multipart_uploads(): | 
| +                if mp.id == multipart_id: | 
| +                    with FileChunkIO(source_path, 'r', offset=offset, | 
| +                                     bytes=bytes) as fp: | 
| +                        mp.upload_part_from_file(fp=fp, part_num=part_num, | 
| +                                                 cb=cb, num_cb=num_cb) | 
| +                    break | 
| +        except Exception, exc: | 
| +            if retries_left: | 
| +                _upload(retries_left=retries_left - 1) | 
| +            else: | 
| +                print 'Failed uploading part #%d' % part_num | 
| +                raise exc | 
| +        else: | 
| +            if debug == 1: | 
| +                print '... Uploaded part #%d' % part_num | 
| + | 
| +    _upload() | 
| + | 
| + | 
| +def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname, | 
| +                     reduced, debug, cb, num_cb, acl='private', headers={}, | 
| +                     guess_mimetype=True, parallel_processes=4): | 
| +    """ | 
| +    Parallel multipart upload. | 
| +    """ | 
| +    conn = S3Connection(aws_key, aws_secret) | 
| +    conn.debug = debug | 
| +    bucket = conn.get_bucket(bucketname) | 
| + | 
| +    if guess_mimetype: | 
| +        mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' | 
| +        headers.update({'Content-Type': mtype}) | 
| + | 
| +    mp = bucket.initiate_multipart_upload(keyname, headers=headers, | 
| +                                          reduced_redundancy=reduced) | 
| + | 
| +    source_size = os.stat(source_path).st_size | 
| +    bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), | 
| +                          5242880) | 
| +    chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) | 
| + | 
| +    pool = Pool(processes=parallel_processes) | 
| +    for i in range(chunk_amount): | 
| +        offset = i * bytes_per_chunk | 
| +        remaining_bytes = source_size - offset | 
| +        bytes = min([bytes_per_chunk, remaining_bytes]) | 
| +        part_num = i + 1 | 
| +        pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, | 
| +                                        part_num, source_path, offset, bytes, | 
| +                                        debug, cb, num_cb]) | 
| +    pool.close() | 
| +    pool.join() | 
| + | 
| +    if len(mp.get_all_parts()) == chunk_amount: | 
| +        mp.complete_upload() | 
| +        key = bucket.get_key(keyname) | 
| +        key.set_acl(acl) | 
| +    else: | 
| +        mp.cancel_upload() | 
| + | 
| + | 
| +def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs): | 
| +    """ | 
| +    Single upload. | 
| +    """ | 
| +    k = bucket.new_key(key_name) | 
| +    k.set_contents_from_filename(fullpath, *kargs, **kwargs) | 
| + | 
| + | 
| +def expand_path(path): | 
| +    path = os.path.expanduser(path) | 
| +    path = os.path.expandvars(path) | 
| +    return os.path.abspath(path) | 
| + | 
| + | 
| +def main(): | 
| + | 
| +    # default values | 
| +    aws_access_key_id = None | 
| +    aws_secret_access_key = None | 
| +    bucket_name = '' | 
| +    ignore_dirs = [] | 
| +    debug = 0 | 
| +    cb = None | 
| +    num_cb = 0 | 
| +    quiet = False | 
| +    no_op = False | 
| +    prefix = '/' | 
| +    key_prefix = '' | 
| +    grant = None | 
| +    no_overwrite = False | 
| +    reduced = False | 
| +    headers = {} | 
| +    host = None | 
| +    multipart_requested = False | 
| + | 
| +    try: | 
| +        opts, args = getopt.getopt( | 
| +            sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr', | 
| +            ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=', | 
| +             'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet', | 
| +             'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart', | 
| +             'host=']) | 
| +    except: | 
| +        usage() | 
| + | 
| +    # parse opts | 
| +    for o, a in opts: | 
| +        if o in ('-h', '--help'): | 
| +            usage() | 
| +        if o in ('-a', '--access_key'): | 
| +            aws_access_key_id = a | 
| +        if o in ('-b', '--bucket'): | 
| +            bucket_name = a | 
| +        if o in ('-c', '--callback'): | 
| +            num_cb = int(a) | 
| +            cb = submit_cb | 
| +        if o in ('-d', '--debug'): | 
| +            debug = int(a) | 
| +        if o in ('-g', '--grant'): | 
| +            grant = a | 
| +        if o in ('-i', '--ignore'): | 
| +            ignore_dirs = a.split(',') | 
| +        if o in ('-n', '--no_op'): | 
| +            no_op = True | 
| +        if o in ('-w', '--no_overwrite'): | 
| +            no_overwrite = True | 
| +        if o in ('-p', '--prefix'): | 
| +            prefix = a | 
| +            if prefix[-1] != os.sep: | 
| +                prefix = prefix + os.sep | 
| +            prefix = expand_path(prefix) | 
| +        if o in ('-k', '--key_prefix'): | 
| +            key_prefix = a | 
| +        if o in ('-q', '--quiet'): | 
| +            quiet = True | 
| +        if o in ('-s', '--secret_key'): | 
| +            aws_secret_access_key = a | 
| +        if o in ('-r', '--reduced'): | 
| +            reduced = True | 
| +        if o in ('--header'): | 
| +            (k, v) = a.split("=") | 
| +            headers[k] = v | 
| +        if o in ('--host'): | 
| +            host = a | 
| +        if o in ('--multipart'): | 
| +            if multipart_capable: | 
| +                multipart_requested = True | 
| +            else: | 
| +                print "multipart upload requested but not capable" | 
| +                sys.exit() | 
| + | 
| +    if len(args) < 1: | 
| +        usage() | 
| + | 
| +    if not bucket_name: | 
| +        print "bucket name is required!" | 
| +        usage() | 
| + | 
| +    if host: | 
| +        c = boto.connect_s3(host=host, aws_access_key_id=aws_access_key_id, | 
| +                        aws_secret_access_key=aws_secret_access_key) | 
| +    else: | 
| +        c = boto.connect_s3(aws_access_key_id=aws_access_key_id, | 
| +                        aws_secret_access_key=aws_secret_access_key) | 
| +    c.debug = debug | 
| +    b = c.get_bucket(bucket_name) | 
| +    existing_keys_to_check_against = [] | 
| +    files_to_check_for_upload = [] | 
| + | 
| +    for path in args: | 
| +        path = expand_path(path) | 
| +        # upload a directory of files recursively | 
| +        if os.path.isdir(path): | 
| +            if no_overwrite: | 
| +                if not quiet: | 
| +                    print 'Getting list of existing keys to check against' | 
| +                for key in b.list(get_key_name(path, prefix, key_prefix)): | 
| +                    existing_keys_to_check_against.append(key.name) | 
| +            for root, dirs, files in os.walk(path): | 
| +                for ignore in ignore_dirs: | 
| +                    if ignore in dirs: | 
| +                        dirs.remove(ignore) | 
| +                for path in files: | 
| +                    if path.startswith("."): | 
| +                        continue | 
| +                    files_to_check_for_upload.append(os.path.join(root, path)) | 
| + | 
| +        # upload a single file | 
| +        elif os.path.isfile(path): | 
| +            fullpath = os.path.abspath(path) | 
| +            key_name = get_key_name(fullpath, prefix, key_prefix) | 
| +            files_to_check_for_upload.append(fullpath) | 
| +            existing_keys_to_check_against.append(key_name) | 
| + | 
| +        # we are trying to upload something unknown | 
| +        else: | 
| +            print "I don't know what %s is, so i can't upload it" % path | 
| + | 
| +    for fullpath in files_to_check_for_upload: | 
| +        key_name = get_key_name(fullpath, prefix, key_prefix) | 
| + | 
| +        if no_overwrite and key_name in existing_keys_to_check_against: | 
| +            if not quiet: | 
| +                print 'Skipping %s as it exists in s3' % fullpath | 
| +            continue | 
| + | 
| +        if not quiet: | 
| +            print 'Copying %s to %s/%s' % (fullpath, bucket_name, key_name) | 
| + | 
| +        if not no_op: | 
| +            # 0-byte files don't work and also don't need multipart upload | 
| +            if os.stat(fullpath).st_size != 0 and multipart_capable and \ | 
| +                    multipart_requested: | 
| +                multipart_upload(bucket_name, aws_access_key_id, | 
| +                                 aws_secret_access_key, fullpath, key_name, | 
| +                                 reduced, debug, cb, num_cb, | 
| +                                 grant or 'private', headers) | 
| +            else: | 
| +                singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb, | 
| +                                  policy=grant, reduced_redundancy=reduced, | 
| +                                  headers=headers) | 
| + | 
| +if __name__ == "__main__": | 
| +    main() | 
|  |