Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: download_from_google_storage.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Download files from Google Storage based on SHA1 sums."""
7
8
9 import optparse
10 import os
11 import Queue
12 import re
13 import sys
14 import threading
15 import time
16
17 from common import Gsutil
18 from common import GetSHA1
19
20
21 GSUTIL_DEFAULT_PATH = os.path.join(
22 os.path.dirname(os.path.abspath(__file__)),
23 'third_party', 'gsutil', 'gsutil')
24
25
26 def _downloader_worker_thread(thread_num, q, options, base_url, gsutil, out_q):
27 while True:
28 input_sha1_sum, output_filename = q.get()
29 if input_sha1_sum is None:
30 out_q.put('Thread %d is done' % thread_num)
31 return
32 if os.path.exists(output_filename) and not options.force:
33 if GetSHA1(output_filename) == input_sha1_sum:
34 out_q.put('File %s exists and SHA1 sum (%s) matches. Skipping.' % (
35 output_filename , input_sha1_sum))
36 continue
37 # Check if file exists.
38 file_url = '%s/%s' % (base_url, input_sha1_sum)
39 if gsutil.check_call('ls', file_url)[0] != 0:
40 out_q.put('File %s for %s does not exist, skipping.' % (
41 file_url, output_filename))
42 continue
43 # Fetch the file.
44 out_q.put('Downloading %s to %s...' % (file_url, output_filename))
45 code = gsutil.call('cp', '-q', file_url, output_filename)
46 if code != 0:
47 out_q.put(gsutil.stderr)
48 return code
49
50
51 def download_from_google_storage(input_filename, options):
52 base_url = 'gs://%s' % options.bucket
53
54 # Make sure we can find a working instance of gsutil.
55 if os.path.exists(options.gsutil_path):
56 gsutil = Gsutil(options.gsutil_path, boto_path=options.boto)
57 else:
58 for path in os.environ["PATH"].split(os.pathsep):
59 if os.path.exists(path) and 'gsutil' in os.listdir(path):
60 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto)
61
62 # Check if we have permissions to the Google Storage bucket.
63 code, _, ls_err = gsutil.check_call('ls', base_url)
64 if code == 403:
65 code = gsutil.call('config')
66 if code != 0:
67 print >> sys.stderr, 'Error while authenticating to %s.' % base_url
68 return 403
69 elif code == 404:
70 print >> sys.stderr, '%s not found.' % base_url
71 return 404
72 elif code != 0:
73 print >> sys.stderr, ls_err
74 return code
75
76 # Enumerate our work queue.
77 work_queue = Queue.Queue()
78 work_queue_size = 0
79 if options.directory:
80 if options.recursive:
81 for root, dirs, files in os.walk(input_filename):
82 for exclude in ['.svn', '.git']:
83 if exclude in dirs:
84 dirs.remove(exclude)
85 if not options.recursive:
86 for item in dirs:
87 dirs.remove(item)
M-A Ruel 2013/02/25 15:15:06 You can't mutate a list while enumerating it. Did
Ryan Tseng 2013/02/27 02:06:56 Done.
88 for filename in files:
89 full_path = os.path.join(root, filename)
90 if full_path.endswith('.sha1'):
91 with open(full_path) as f:
M-A Ruel 2013/02/25 15:15:06 with open(full_path, 'rb') as f:
Ryan Tseng 2013/02/27 02:06:56 Done, but why binary? I'm expecting an ascii file.
92 sha1_match = re.search('^([A-Za-z0-9]{40})$', f.read(1024))
93 if sha1_match:
94 work_queue.put((sha1_match.groups(1)[0],
M-A Ruel 2013/02/25 15:15:06 work_queue.put( (sha1_match.groups(1)[0], full
Ryan Tseng 2013/02/27 02:06:56 Done.
95 full_path.replace('.sha1', '')))
96 work_queue_size += 1
97 else:
98 print >> sys.stderr, 'No sha1 sum found in %s.' % filename
99 else:
100 work_queue.put((input_filename, options.output))
101 work_queue_size += 1
102
103 # Start up all the worker threads.
104 all_threads = []
105 download_timer = time.time()
106 output_queue = Queue.Queue() # For printing out to stdio.
M-A Ruel 2013/02/25 15:15:06 optional nit: Then name it stdout_queue? It'll be
Ryan Tseng 2013/02/27 02:06:56 Done.
107 for thread_num in range(options.num_threads):
108 t = threading.Thread(target=_downloader_worker_thread, args=[thread_num,
M-A Ruel 2013/02/25 15:15:06 don't split args.
Ryan Tseng 2013/02/27 02:06:56 Done.
109 work_queue, options, base_url, gsutil.clone(), output_queue])
110 t.daemon = True
111 t.start()
112 all_threads.append(t)
113 work_queue.put((None, None)) # Used to tell worker threads to stop.
M-A Ruel 2013/02/25 15:15:06 Technically, you should start the threads first, t
Ryan Tseng 2013/02/27 02:06:56 Done.
114
115 # Wait for all downloads to finish.
116 while True:
117 num_alive_threads = 0
118 for t in all_threads:
119 if t.is_alive():
120 num_alive_threads += 1
121 if num_alive_threads == 0 and output_queue.empty():
122 break
123 line = output_queue.get()
124 print line
125
126
127 print 'Success.'
128 print 'Downloading %d files took %1f second(s)' % (
129 work_queue_size, time.time() - download_timer)
130 return 0
131
132
133 def main(args):
134 usage = ('usage: %prog [options] target\nTarget must be:\n'
135 '(default) a sha1 sum ([A-Za-z0-9]{40}).\n(-s or --sha1_file) a '
136 '.sha1 file, containing a sha1 sum on the first line. (-d or '
137 '--directory) A directory to scan for .sha1 files. ')
138 parser = optparse.OptionParser(usage)
139 parser.add_option('-o', '--output',
140 help='Specify the output file name. Defaults to:\n'
141 '(a) Given a SHA1 hash, the name is the SHA1 hash.\n'
142 '(b) Given a .sha1 file or directory, the name will '
143 'match (.*).sha1.')
144 parser.add_option('-b', '--bucket',
145 help='Google Storage bucket to fetch from.')
146 parser.add_option('-e', '--boto',
147 help='Specify a custom boto file.')
148 parser.add_option('-c', '--no_resume', action='store_true',
149 help='Resume download if file is partially downloaded.')
150 parser.add_option('-f', '--force', action='store_true',
151 help='Force download even if local file exists.')
152 parser.add_option('-r', '--recursive', action='store_true',
153 help='Scan folders recursively for .sha1 files. '
154 'Must be used with -d/--directory')
155 parser.add_option('-t', '--num_threads', default=1, type='int',
156 help='Number of downloader threads to run.')
157 parser.add_option('-d', '--directory', action='store_true',
158 help='The target is a directory. '
159 'Cannot be used with -s/--sha1_file.')
160 parser.add_option('-s', '--sha1_file', action='store_true',
161 help='The target is a file containing a sha1 sum. '
162 'Cannot be used with -d/--directory.')
163 # This file should be stored in tools/deps_scripts/ and we want the path to
164 # third_party/gsutil/gsutil
165 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
M-A Ruel 2013/02/25 15:15:06 Either align all at +4 or all at (. I prefer +4 bu
Ryan Tseng 2013/02/27 02:06:56 Oops this should've been removed anyways.
166 help='Path to the gsutil script.')
167
168 (options, args) = parser.parse_args()
169 if len(args) < 1:
170 parser.error('Missing target.')
171 if len(args) > 1:
172 parser.error('Too many targets.')
173 if not options.bucket:
174 parser.error('Missing bucket. Specify bucket with --bucket.')
175 if options.sha1_file and options.directory:
176 parser.error('Both --directory and --sha1_file are specified, '
177 'can only specify one.')
178 elif options.recursive and not options.directory:
179 parser.error('--recursive specified but --directory not specified.')
180 elif options.output and options.directory:
181 parser.error('--directory is specified, so --output has no effect.')
182 else:
183 input_filename = args[0]
184
185 # Set output filename if not specified.
186 if not options.output and not options.directory:
187 if not options.sha1_file:
188 # Target is a sha1 sum, so output filename would also be the sha1 sum.
189 options.output = input_filename
190 elif options.sha1_file:
191 # Target is a .sha1 file.
192 if not input_filename.endswith('.sha1'):
193 parser.error('--sha1_file is specified, but the input filename '
194 'does not end with .sha1, and no --output is specified. '
195 'Either make sure the input filename has a .sha1 '
196 'extension, or specify --output.')
197 options.output = input_filename[:-5]
198 else:
199 raise NotImplementedError('Unreachable state.')
200
201 # Check if output file already exists.
202 if not options.directory and not options.force and not options.no_resume:
203 if os.path.exists(options.output):
204 parser.error('Output file %s exists and --no_resume is specified.'
205 % options.output)
206
207 return download_from_google_storage(input_filename, options)
208
209
210 if __name__ == '__main__':
211 sys.exit(main(sys.argv))
OLDNEW
« common.py ('K') | « common.py ('k') | gsdl » ('j') | gsdl » ('J')

Powered by Google App Engine
This is Rietveld 408576698