Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: upload_to_google_storage.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« gstools.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Uploads files to Google Storage content addressed."""
7
8 import optparse
9 import os
10 import Queue
11 import re
12 import sys
13 import threading
14 import time
15
16 import gstools
17
18 GSUTIL_DEFAULT_PATH = os.path.join(
19 os.path.dirname(os.path.abspath(__file__)),
20 'third_party', 'gsutil', 'gsutil')
21
22 USAGE_STRING = """%prog [options] target [target2 ...].
23 Target is the file intended to be uploaded to Google Storage.
24 If target is "-", then a list of files will be taken from standard input
25
26 This script will generate a file (original filename).sha1 containing the
27 sha1 sum of the uploaded file.
28 It is recommended that the .sha1 file is checked into the repository,
29 the original file removed from the repository, and a hook added to the
30 DEPS file to call download_from_google_storage.py.
31
32 Example usages
33 --------------
34
35 Scan the current directory and upload all files larger than 1MB:
36 find . -name .svn -prune -o -size +1000k -type f -print0 | %prog -0 -
37 """
38
39
40 def _upload_worker(
41 thread_num, q, base_url, gsutil, md5_lock, force, use_md5, ret_codes):
42 while True:
43 filename, sha1_sum = q.get()
44 if not filename:
45 break
46 file_url = '%s/%s' % (base_url, sha1_sum)
47 if gsutil.check_call('ls', file_url)[0] == 0 and not force:
48 # File exists, check MD5 hash.
49 _, out, _ = gsutil.check_call('ls', '-L', file_url)
50 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)
51 if etag_match:
52 remote_md5 = etag_match.group(1)
53 # Calculate the MD5 checksum to match it to Google Storage's ETag.
54 if use_md5:
55 local_md5 = gstools.GetMD5Cached(filename, md5_lock)
56 else:
57 local_md5 = gstools.GetMD5(filename, md5_lock)
58 if local_md5 == remote_md5:
59 print ('File %s already exists at %s and MD5 matches, exiting' %
60 (filename, file_url))
61 continue
62 print 'Uploading %s to %s' % (filename, file_url)
63 code, _, err = gsutil.check_call('cp', '-q', filename, file_url)
64 if code != 0:
65 ret_codes.put(
66 (code,
67 'Encountered error on uploading %s to %s\n%s' %
68 (filename, file_url, err)))
69 continue
70
71
72 def get_targets(args, parser, use_null_terminator):
73 if not args:
74 parser.error('Missing target.')
75
76 if len(args) == 1 and args[0] == '-':
77 # Take stdin as a newline or null seperated list of files.
78 if use_null_terminator:
79 return sys.stdin.read().split('\0')
80 else:
81 return sys.stdin.read().splitlines()
82 else:
83 return args
84
85
86 def upload_to_google_storage(
87 input_filenames, base_url, gsutil, force,
88 use_md5, num_threads, skip_hashing):
89 # We only want one MD5 calculation happening at a time to avoid HD thrashing.
90 md5_lock = threading.Lock()
91
92 # Start up all the worker threads.
93 all_threads = []
94 ret_codes = Queue.Queue()
95 ret_codes.put((0, None))
96 upload_queue = Queue.Queue()
97 upload_timer = time.time()
98 for thread_num in range(num_threads):
99 t = threading.Thread(
100 target=_upload_worker,
101 args=[thread_num, upload_queue, base_url,
102 gsutil.clone(), md5_lock, force, use_md5, ret_codes])
103 t.daemon = True
104 t.start()
105 all_threads.append(t)
106
107 # We want to hash everything in a single thread since its faster.
108 # The bottleneck is in disk IO, not CPU.
109 hash_timer = time.time() # For timing statistics.
110 for filename in input_filenames:
111 if not os.path.exists(filename):
112 print 'Error: %s not found, skipping.' % filename
113 continue
114 if os.path.exists('%s.sha1' % filename) and skip_hashing:
115 print 'Found hash for %s, skipping.' % filename
116 upload_queue.put((filename, open('%s.sha1' % filename).read()))
117 continue
118 print 'Calculating hash for %s...' % filename,
119 sha1_sum = gstools.GetSHA1(filename)
120 with open(filename + '.sha1', 'wb') as f:
121 f.write(sha1_sum)
122 print 'done'
123 upload_queue.put((filename, sha1_sum))
124 hash_time = time.time() - hash_timer
125
126 # Wait for everything to finish.
127 for _ in all_threads:
128 upload_queue.put((None, None)) # To mark the end of the work queue.
129 for t in all_threads:
130 t.join()
131
132 # Print timing information.
133 print 'Hashing %s files took %1f seconds' % (len(input_filenames), hash_time)
134 print 'Uploading took %1f seconds' % (time.time() - upload_timer)
135
136 # See if we ran into any errors.
137 max_ret_code = 0
138 for ret_code, message in ret_codes.queue:
139 max_ret_code = max(ret_code, max_ret_code)
140 if message:
141 print >> sys.stderr, message
142
143 if not max_ret_code:
144 print 'Success.'
145 else:
146 print 'We encountered some error(s).'
147
148 return max_ret_code
149
150
151 def main(args):
152 parser = optparse.OptionParser(USAGE_STRING)
153 parser.add_option('-b', '--bucket',
154 help='Google Storage bucket to upload to.')
155 parser.add_option('-e', '--boto', help='Specify a custom boto file.')
156 parser.add_option('-f', '--force', action='store_true',
157 help='Force upload even if remote file exists.')
158 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
159 help='Path to the gsutil script.')
160 parser.add_option('-m', '--use_md5', action='store_true',
161 help='Generate MD5 files when scanning, and don\'t check '
162 'the MD5 checksum if a .md5 file is found.')
163 parser.add_option('-t', '--num_threads', default=1, type='int',
164 help='Number of uploader threads to run.')
165 parser.add_option('-s', '--skip_hashing', action='store_true',
166 help='Skip hashing if .sha1 file exists.')
167 parser.add_option('-0', '--use_null_terminator', action='store_true',
168 help='Use \\0 instead of \\n when parsing '
169 'the file list from stdin. This is useful if the input '
170 'is coming from "find ... -print0".')
171 (options, args) = parser.parse_args()
172
173 # Enumerate our inputs.
174 input_filenames = get_targets(args, parser, options.use_null_terminator)
175
176 # Make sure we can find a working instance of gsutil.
177 if os.path.exists(GSUTIL_DEFAULT_PATH):
178 gsutil = gstools.Gsutil(GSUTIL_DEFAULT_PATH)
179 else:
180 print >> sys.stderr, ('gsutil not found in %s, bad depot_tools checkout?' %
181 GSUTIL_DEFAULT_PATH)
182 return 1
183
184 # Check we have a valid bucket with valid permissions.
185 base_url, code = gstools.CheckBucketPermissions(options.bucket, gsutil)
186 if code:
187 return code
188
189 return upload_to_google_storage(
190 input_filenames, base_url, gsutil, options.force, options.use_md5,
191 options.num_threads, options.skip_hashing)
192
193
194 if __name__ == '__main__':
195 sys.exit(main(sys.argv))
OLDNEW
« gstools.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698