Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: upload_to_google_storage.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gstools.py, added more error messages Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« download_from_google_storage.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Uploads files to Google Storage content addressed."""
7
8 import optparse
9 import os
10 import Queue
11 import re
12 import sys
13 import threading
14 import time
15
16 from download_from_google_storage import CheckBucketPermissions
17 from download_from_google_storage import GetMD5
18 from download_from_google_storage import GetMD5Cached
19 from download_from_google_storage import GetSHA1
20 from download_from_google_storage import Gsutil
21
22 GSUTIL_DEFAULT_PATH = os.path.join(
23 os.path.dirname(os.path.abspath(__file__)),
24 'third_party', 'gsutil', 'gsutil')
25
26 USAGE_STRING = """%prog [options] target [target2 ...].
27 Target is the file intended to be uploaded to Google Storage.
28 If target is "-", then a list of files will be taken from standard input
29
30 This script will generate a file (original filename).sha1 containing the
31 sha1 sum of the uploaded file.
32 It is recommended that the .sha1 file is checked into the repository,
33 the original file removed from the repository, and a hook added to the
34 DEPS file to call download_from_google_storage.py.
35
36 Example usages
37 --------------
38
39 Scan the current directory and upload all files larger than 1MB:
40 find . -name .svn -prune -o -size +1000k -type f -print0 | %prog -0 -
41 """
42
43
44 def _upload_worker(
45 thread_num, q, base_url, gsutil, md5_lock, force, use_md5, ret_codes):
46 while True:
47 filename, sha1_sum = q.get()
48 if not filename:
49 break
50 file_url = '%s/%s' % (base_url, sha1_sum)
51 if gsutil.check_call('ls', file_url)[0] == 0 and not force:
52 # File exists, check MD5 hash.
53 _, out, _ = gsutil.check_call('ls', '-L', file_url)
54 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)
55 if etag_match:
56 remote_md5 = etag_match.group(1)
57 # Calculate the MD5 checksum to match it to Google Storage's ETag.
58 if use_md5:
59 local_md5 = GetMD5Cached(filename, md5_lock)
60 else:
61 local_md5 = GetMD5(filename, md5_lock)
62 if local_md5 == remote_md5:
63 print ('File %s already exists at %s and MD5 matches, exiting' %
64 (filename, file_url))
65 continue
66 print 'Uploading %s to %s' % (filename, file_url)
67 code, _, err = gsutil.check_call('cp', '-q', filename, file_url)
68 if code != 0:
69 ret_codes.put(
70 (code,
71 'Encountered error on uploading %s to %s\n%s' %
72 (filename, file_url, err)))
73 continue
74
75
76 def get_targets(args, parser, use_null_terminator):
77 if not args:
78 parser.error('Missing target.')
79
80 if len(args) == 1 and args[0] == '-':
81 # Take stdin as a newline or null seperated list of files.
82 if use_null_terminator:
83 return sys.stdin.read().split('\0')
84 else:
85 return sys.stdin.read().splitlines()
86 else:
87 return args
88
89
90 def upload_to_google_storage(
91 input_filenames, base_url, gsutil, force,
92 use_md5, num_threads, skip_hashing):
93 # We only want one MD5 calculation happening at a time to avoid HD thrashing.
94 md5_lock = threading.Lock()
95
96 # Start up all the worker threads.
97 all_threads = []
98 ret_codes = Queue.Queue()
99 ret_codes.put((0, None))
100 upload_queue = Queue.Queue()
101 upload_timer = time.time()
102 for thread_num in range(num_threads):
103 t = threading.Thread(
104 target=_upload_worker,
105 args=[thread_num, upload_queue, base_url,
106 gsutil.clone(), md5_lock, force, use_md5, ret_codes])
107 t.daemon = True
108 t.start()
109 all_threads.append(t)
110
111 # We want to hash everything in a single thread since its faster.
112 # The bottleneck is in disk IO, not CPU.
113 hash_timer = time.time() # For timing statistics.
114 for filename in input_filenames:
115 if not os.path.exists(filename):
116 print 'Error: %s not found, skipping.' % filename
117 continue
118 if os.path.exists('%s.sha1' % filename) and skip_hashing:
119 print 'Found hash for %s, skipping.' % filename
120 upload_queue.put((filename, open('%s.sha1' % filename).read()))
121 continue
122 print 'Calculating hash for %s...' % filename,
123 sha1_sum = GetSHA1(filename)
124 with open(filename + '.sha1', 'wb') as f:
125 f.write(sha1_sum)
126 print 'done'
127 upload_queue.put((filename, sha1_sum))
128 hash_time = time.time() - hash_timer
129
130 # Wait for everything to finish.
131 for _ in all_threads:
132 upload_queue.put((None, None)) # To mark the end of the work queue.
133 for t in all_threads:
134 t.join()
135
136 # Print timing information.
137 print 'Hashing %s files took %1f seconds' % (len(input_filenames), hash_time)
138 print 'Uploading took %1f seconds' % (time.time() - upload_timer)
139
140 # See if we ran into any errors.
141 max_ret_code = 0
142 for ret_code, message in ret_codes.queue:
143 max_ret_code = max(ret_code, max_ret_code)
144 if message:
145 print >> sys.stderr, message
146
147 if not max_ret_code:
148 print 'Success.'
149 else:
150 print 'We encountered some error(s).'
151
152 return max_ret_code
153
154
155 def main(args):
156 parser = optparse.OptionParser(USAGE_STRING)
157 parser.add_option('-b', '--bucket',
158 help='Google Storage bucket to upload to.')
159 parser.add_option('-e', '--boto', help='Specify a custom boto file.')
160 parser.add_option('-f', '--force', action='store_true',
161 help='Force upload even if remote file exists.')
162 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
163 help='Path to the gsutil script.')
164 parser.add_option('-m', '--use_md5', action='store_true',
165 help='Generate MD5 files when scanning, and don\'t check '
166 'the MD5 checksum if a .md5 file is found.')
167 parser.add_option('-t', '--num_threads', default=1, type='int',
168 help='Number of uploader threads to run.')
169 parser.add_option('-s', '--skip_hashing', action='store_true',
170 help='Skip hashing if .sha1 file exists.')
171 parser.add_option('-0', '--use_null_terminator', action='store_true',
172 help='Use \\0 instead of \\n when parsing '
173 'the file list from stdin. This is useful if the input '
174 'is coming from "find ... -print0".')
175 (options, args) = parser.parse_args()
176
177 # Enumerate our inputs.
178 input_filenames = get_targets(args, parser, options.use_null_terminator)
179
180 # Make sure we can find a working instance of gsutil.
181 if os.path.exists(GSUTIL_DEFAULT_PATH):
182 gsutil = Gsutil(GSUTIL_DEFAULT_PATH)
183 else:
184 print >> sys.stderr, ('gsutil not found in %s, bad depot_tools checkout?' %
185 GSUTIL_DEFAULT_PATH)
186 return 1
187
188 # Check we have a valid bucket with valid permissions.
189 base_url, code = CheckBucketPermissions(options.bucket, gsutil)
190 if code:
191 return code
192
193 return upload_to_google_storage(
194 input_filenames, base_url, gsutil, options.force, options.use_md5,
195 options.num_threads, options.skip_hashing)
196
197
198 if __name__ == '__main__':
199 sys.exit(main(sys.argv))
OLDNEW
« download_from_google_storage.py ('K') | « tests/gstools_unittest.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698