Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: upload_to_google_storage.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Added some unittests Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« tests/gstools_unittest.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Uploads files to Google Storage content addressed."""
7
8 import optparse
9 import os
10 import Queue
11 import re
12 import sys
13 import threading
14 import time
15
16 import gstools
17
18 GSUTIL_DEFAULT_PATH = os.path.join(
19 os.path.dirname(os.path.abspath(__file__)),
20 'third_party', 'gsutil', 'gsutil')
21
22 USAGE_STRING = """%prog [options] target [target2 ...].
23 Target is the file intended to be uploaded to Google Storage.
24 If target is "-", then a list of files will be taken from standard input
25
26 This script will generate a file (original filename).sha1 containing the
27 sha1 sum of the uploaded file.
28 It is recommended that the .sha1 file is checked into the repository,
29 the original file removed from the repository, and a hook added to the
30 DEPS file to call download_from_google_storage.py.
31
32 Example usages
33 --------------
34
35 Scan the current directory and upload all files larger than 1MB:
36 find . -name .svn -prune -o -size +1000k -type f -print0 | %prog -0 -
37 """
38
39
40 def _upload_worker(thread_num, q, base_url, gsutil, options, md5_lock):
41 while True:
42 filename, sha1_sum = q.get()
43 if filename is None:
M-A Ruel 2013/02/27 21:52:07 if not filename: break optional style nit: Pers
Ryan Tseng 2013/02/27 23:34:41 Done.
44 break # A (None, None) item is inserted for each thread to mark EOF.
45 file_url = '%s/%s' % (base_url, sha1_sum)
46 if gsutil.check_call('ls', file_url)[0] == 0 and not options.force:
47 # File exists, check MD5 hash.
48 _, out, _ = gsutil.check_call('ls', '-L', file_url)
49 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)
50 if etag_match:
51 remote_md5 = etag_match.group(1)
52 # Calculate the MD5 checksum to match it to Google Storage's ETag.
53 if options.use_md5:
54 local_md5 = gstools.GetMD5Cached(filename, md5_lock)
55 else:
56 local_md5 = gstools.GetMD5(filename, md5_lock)
57 if local_md5 == remote_md5:
58 print ('File %s already exists at %s and MD5 matches, exiting' %
59 (filename, file_url))
60 continue
61 print 'Uploading %s to %s' % (filename, file_url)
62 code = gsutil.call('cp', '-q', filename, file_url)
M-A Ruel 2013/02/27 21:52:07 Does this library throws exceptions when receiving
Ryan Tseng 2013/02/27 23:34:41 It doesn't throw an exception, it just returns a n
M-A Ruel 2013/02/28 14:53:56 No, my point was more that occasionally, exception
63 if code != 0:
64 print >> sys.stderr, gsutil.stderr
65 continue
66
67
68 def get_targets(options, args, parser):
69 if not args:
70 parser.error('Missing target.')
71 elif len(args) == 1 and args[0] == '-':
M-A Ruel 2013/02/27 21:52:07 s/elif/if/ and add an empty line above. It's clear
Ryan Tseng 2013/02/27 23:34:41 Done.
72 # Take stdin as a newline or null seperated list of files.
73 if options.use_null_terminator:
74 input_filenames = sys.stdin.read().split('\0')
M-A Ruel 2013/02/27 21:52:07 return sys.stdin.read().split('\0')
Ryan Tseng 2013/02/27 23:34:41 Done.
75 else:
76 input_filenames = sys.stdin.read().splitlines()
M-A Ruel 2013/02/27 21:52:07 return
Ryan Tseng 2013/02/27 23:34:41 Done.
77 else:
M-A Ruel 2013/02/27 21:52:07 return args remove the "else:" line, not needed.
Ryan Tseng 2013/02/27 23:34:41 Done.
78 input_filenames = args
79
80 return input_filenames
81
82
83 def upload_to_google_storage(input_filenames, base_url, gsutil, options):
84 # We only want one MD5 calculation happening at a time to avoid HD thrashing.
85 md5_lock = threading.Lock()
86
87 # Start up all the worker threads.
88 all_threads = []
89 upload_queue = Queue.Queue()
90 upload_timer = time.time()
91 for thread_num in range(options.num_threads):
92 t = threading.Thread(
93 target=_upload_worker,
94 args=[thread_num, upload_queue, base_url,
95 gsutil.clone(), options, md5_lock])
96 t.daemon = True
97 t.start()
98 all_threads.append(t)
99
100 # We want to hash everything in a single thread since its faster.
101 # The bottleneck is in disk IO, not CPU.
102 hash_timer = time.time() # For timing statistics.
103 for filename in input_filenames:
104 if not os.path.exists(filename):
105 print 'Error: %s not found, skipping.' % filename
106 continue
107 if os.path.exists('%s.sha1' % filename) and options.skip_hashing:
108 print 'Found hash for %s, skipping.' % filename
109 upload_queue.put((filename, open('%s.sha1' % filename).read()))
110 continue
111 print 'Calculating hash for %s...' % filename,
112 sha1_sum = gstools.GetSHA1(filename)
113 with open(filename + '.sha1', 'wb') as f:
114 f.write(sha1_sum)
115 print 'done'
116 upload_queue.put((filename, sha1_sum))
117 hash_time = time.time() - hash_timer
118
119 # Wait for everything to finish.
120 for _ in all_threads:
121 upload_queue.put((None, None)) # To mark the end of the work queue.
122 for t in all_threads:
123 t.join()
124
125 print 'Success.'
126 print 'Hashing %s files took %1f seconds' % (len(input_filenames), hash_time)
127 print 'Uploading took %1f seconds' % (time.time() - upload_timer)
128 return 0
129
130
131 def main(args):
132 parser = optparse.OptionParser(USAGE_STRING)
133 parser.add_option('-b', '--bucket',
134 help='Google Storage bucket to upload to.')
135 parser.add_option('-e', '--boto', help='Specify a custom boto file.')
136 parser.add_option('-f', '--force', action='store_true',
137 help='Force upload even if remote file exists.')
138 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
139 help='Path to the gsutil script.')
140 parser.add_option('-m', '--use_md5', action='store_true',
141 help='Generate MD5 files when scanning, and don\'t check '
142 'the MD5 checksum if a .md5 file is found.')
143 parser.add_option('-t', '--num_threads', default=1, type='int',
144 help='Number of uploader threads to run.')
145 parser.add_option('-s', '--skip_hashing', action='store_true',
146 help='Skip hashing if .sha1 file exists.')
147 parser.add_option('-0', '--use_null_terminator', action='store_true',
148 help='Use \\0 instead of \\n when parsing '
149 'the file list from stdin. This is useful if the input '
150 'is coming from "find ... -print0".')
151 (options, args) = parser.parse_args()
152
153 # Enumerate our inputs.
154 input_filenames = get_targets(options, args, parser)
155
156 # Make sure we can find a working instance of gsutil.
157 if os.path.exists(GSUTIL_DEFAULT_PATH):
158 gsutil = gstools.Gsutil(GSUTIL_DEFAULT_PATH)
159 else:
160 print >> sys.stderr, ('gsutil not found in %s, bad depot_tools checkout?' %
161 GSUTIL_DEFAULT_PATH)
162 return 1
163
164 # Check we have a valid bucket with valid permissions.
165 base_url, code = gstools.CheckBucketPermissions(options.bucket, gsutil)
166 if code:
167 return code
168
169 return upload_to_google_storage(input_filenames, base_url, gsutil, options)
170
171
172 if __name__ == '__main__':
173 sys.exit(main(sys.argv))
OLDNEW
« tests/gstools_unittest.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698