| OLD | NEW | 
| (Empty) |  | 
 |    1 #!/usr/bin/env python | 
 |    2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
 |    3 # Use of this source code is governed by a BSD-style license that can be | 
 |    4 # found in the LICENSE file. | 
 |    5  | 
 |    6 """Uploads files to Google Storage content addressed.""" | 
 |    7  | 
 |    8 import optparse | 
 |    9 import os | 
 |   10 import Queue | 
 |   11 import re | 
 |   12 import sys | 
 |   13 import threading | 
 |   14 import time | 
 |   15  | 
 |   16 from download_from_google_storage import CheckBucketPermissions | 
 |   17 from download_from_google_storage import GetMD5 | 
 |   18 from download_from_google_storage import GetMD5Cached | 
 |   19 from download_from_google_storage import GetSHA1 | 
 |   20 from download_from_google_storage import Gsutil | 
 |   21  | 
 |   22 GSUTIL_DEFAULT_PATH = os.path.join( | 
 |   23     os.path.dirname(os.path.abspath(__file__)), | 
 |   24     'third_party', 'gsutil', 'gsutil') | 
 |   25  | 
 |   26 USAGE_STRING = """%prog [options] target [target2 ...]. | 
 |   27 Target is the file intended to be uploaded to Google Storage. | 
 |   28 If target is "-", then a list of files will be taken from standard input | 
 |   29  | 
 |   30 This script will generate a file (original filename).sha1 containing the | 
 |   31 sha1 sum of the uploaded file. | 
 |   32 It is recommended that the .sha1 file is checked into the repository, | 
 |   33 the original file removed from the repository, and a hook added to the | 
 |   34 DEPS file to call download_from_google_storage.py. | 
 |   35  | 
 |   36 Example usages | 
 |   37 -------------- | 
 |   38  | 
 |   39 Scan the current directory and upload all files larger than 1MB: | 
 |   40 find . -name .svn -prune -o -size +1000k -type f -print0 | %prog -0 - | 
 |   41 """ | 
 |   42  | 
 |   43  | 
 |   44 def _upload_worker( | 
 |   45     thread_num, q, base_url, gsutil, md5_lock, force, use_md5, ret_codes): | 
 |   46   while True: | 
 |   47     filename, sha1_sum = q.get() | 
 |   48     if not filename: | 
 |   49       break | 
 |   50     file_url = '%s/%s' % (base_url, sha1_sum) | 
 |   51     if gsutil.check_call('ls', file_url)[0] == 0 and not force: | 
 |   52       # File exists, check MD5 hash. | 
 |   53       _, out, _ = gsutil.check_call('ls', '-L', file_url) | 
 |   54       etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) | 
 |   55       if etag_match: | 
 |   56         remote_md5 = etag_match.group(1) | 
 |   57         # Calculate the MD5 checksum to match it to Google Storage's ETag. | 
 |   58         if use_md5: | 
 |   59           local_md5 = GetMD5Cached(filename, md5_lock) | 
 |   60         else: | 
 |   61           local_md5 = GetMD5(filename, md5_lock) | 
 |   62         if local_md5 == remote_md5: | 
 |   63           print ('File %s already exists at %s and MD5 matches, exiting' % | 
 |   64                  (filename, file_url)) | 
 |   65           continue | 
 |   66     print 'Uploading %s to %s' % (filename, file_url) | 
 |   67     code, _, err = gsutil.check_call('cp', '-q', filename, file_url) | 
 |   68     if code != 0: | 
 |   69       ret_codes.put( | 
 |   70           (code, | 
 |   71            'Encountered error on uploading %s to %s\n%s' % | 
 |   72            (filename, file_url, err))) | 
 |   73       continue | 
 |   74  | 
 |   75  | 
 |   76 def get_targets(args, parser, use_null_terminator): | 
 |   77   if not args: | 
 |   78     parser.error('Missing target.') | 
 |   79  | 
 |   80   if len(args) == 1 and args[0] == '-': | 
 |   81     # Take stdin as a newline or null seperated list of files. | 
 |   82     if use_null_terminator: | 
 |   83       return sys.stdin.read().split('\0') | 
 |   84     else: | 
 |   85       return sys.stdin.read().splitlines() | 
 |   86   else: | 
 |   87     return args | 
 |   88  | 
 |   89  | 
 |   90 def upload_to_google_storage( | 
 |   91     input_filenames, base_url, gsutil, force, | 
 |   92     use_md5, num_threads, skip_hashing): | 
 |   93   # We only want one MD5 calculation happening at a time to avoid HD thrashing. | 
 |   94   md5_lock = threading.Lock() | 
 |   95  | 
 |   96   # Start up all the worker threads. | 
 |   97   all_threads = [] | 
 |   98   ret_codes = Queue.Queue() | 
 |   99   ret_codes.put((0, None)) | 
 |  100   upload_queue = Queue.Queue() | 
 |  101   upload_timer = time.time() | 
 |  102   for thread_num in range(num_threads): | 
 |  103     t = threading.Thread( | 
 |  104         target=_upload_worker, | 
 |  105         args=[thread_num, upload_queue, base_url, | 
 |  106               gsutil.clone(), md5_lock, force, use_md5, ret_codes]) | 
 |  107     t.daemon = True | 
 |  108     t.start() | 
 |  109     all_threads.append(t) | 
 |  110  | 
 |  111   # We want to hash everything in a single thread since its faster. | 
 |  112   # The bottleneck is in disk IO, not CPU. | 
 |  113   hash_timer = time.time()  # For timing statistics. | 
 |  114   for filename in input_filenames: | 
 |  115     if not os.path.exists(filename): | 
 |  116       print 'Error: %s not found, skipping.' % filename | 
 |  117       continue | 
 |  118     if os.path.exists('%s.sha1' % filename) and skip_hashing: | 
 |  119       print 'Found hash for %s, skipping.' % filename | 
 |  120       upload_queue.put((filename, open('%s.sha1' % filename).read())) | 
 |  121       continue | 
 |  122     print 'Calculating hash for %s...' % filename, | 
 |  123     sha1_sum = GetSHA1(filename) | 
 |  124     with open(filename + '.sha1', 'wb') as f: | 
 |  125       f.write(sha1_sum) | 
 |  126     print 'done' | 
 |  127     upload_queue.put((filename, sha1_sum)) | 
 |  128   hash_time = time.time() - hash_timer | 
 |  129  | 
 |  130   # Wait for everything to finish. | 
 |  131   for _ in all_threads: | 
 |  132     upload_queue.put((None, None))  # To mark the end of the work queue. | 
 |  133   for t in all_threads: | 
 |  134     t.join() | 
 |  135  | 
 |  136   # Print timing information. | 
 |  137   print 'Hashing %s files took %1f seconds' % (len(input_filenames), hash_time) | 
 |  138   print 'Uploading took %1f seconds' % (time.time() - upload_timer) | 
 |  139  | 
 |  140   # See if we ran into any errors. | 
 |  141   max_ret_code = 0 | 
 |  142   for ret_code, message in ret_codes.queue: | 
 |  143     max_ret_code = max(ret_code, max_ret_code) | 
 |  144     if message: | 
 |  145       print >> sys.stderr, message | 
 |  146  | 
 |  147   if not max_ret_code: | 
 |  148     print 'Success.' | 
 |  149   else: | 
 |  150     print 'We encountered some error(s).' | 
 |  151  | 
 |  152   return max_ret_code | 
 |  153  | 
 |  154  | 
 |  155 def main(args): | 
 |  156   parser = optparse.OptionParser(USAGE_STRING) | 
 |  157   parser.add_option('-b', '--bucket', | 
 |  158                     help='Google Storage bucket to upload to.') | 
 |  159   parser.add_option('-e', '--boto', help='Specify a custom boto file.') | 
 |  160   parser.add_option('-f', '--force', action='store_true', | 
 |  161                     help='Force upload even if remote file exists.') | 
 |  162   parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH, | 
 |  163                     help='Path to the gsutil script.') | 
 |  164   parser.add_option('-m', '--use_md5', action='store_true', | 
 |  165                     help='Generate MD5 files when scanning, and don\'t check ' | 
 |  166                     'the MD5 checksum if a .md5 file is found.') | 
 |  167   parser.add_option('-t', '--num_threads', default=1, type='int', | 
 |  168                     help='Number of uploader threads to run.') | 
 |  169   parser.add_option('-s', '--skip_hashing', action='store_true', | 
 |  170                     help='Skip hashing if .sha1 file exists.') | 
 |  171   parser.add_option('-0', '--use_null_terminator', action='store_true', | 
 |  172                     help='Use \\0 instead of \\n when parsing ' | 
 |  173                     'the file list from stdin.  This is useful if the input ' | 
 |  174                     'is coming from "find ... -print0".') | 
 |  175   (options, args) = parser.parse_args() | 
 |  176  | 
 |  177   # Enumerate our inputs. | 
 |  178   input_filenames = get_targets(args, parser, options.use_null_terminator) | 
 |  179  | 
 |  180   # Make sure we can find a working instance of gsutil. | 
 |  181   if os.path.exists(GSUTIL_DEFAULT_PATH): | 
 |  182     gsutil = Gsutil(GSUTIL_DEFAULT_PATH) | 
 |  183   else: | 
 |  184     print >> sys.stderr, ('gsutil not found in %s, bad depot_tools checkout?' % | 
 |  185                           GSUTIL_DEFAULT_PATH) | 
 |  186     return 1 | 
 |  187  | 
 |  188   # Check we have a valid bucket with valid permissions. | 
 |  189   base_url, code = CheckBucketPermissions(options.bucket, gsutil) | 
 |  190   if code: | 
 |  191     return code | 
 |  192  | 
 |  193   return upload_to_google_storage( | 
 |  194       input_filenames, base_url, gsutil, options.force, options.use_md5, | 
 |  195       options.num_threads, options.skip_hashing) | 
 |  196  | 
 |  197  | 
 |  198 if __name__ == '__main__': | 
 |  199   sys.exit(main(sys.argv)) | 
| OLD | NEW |