Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: third_party/gsutil/bin/s3put

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/
3 #
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE.
22 #
23 import getopt
24 import sys
25 import os
26 import boto
27
28 try:
29 # multipart portions copyright Fabian Topfstedt
30 # https://gist.github.com/924094
31
32 import math
33 import mimetypes
34 from multiprocessing import Pool
35 from boto.s3.connection import S3Connection
36 from filechunkio import FileChunkIO
37 multipart_capable = True
38 usage_flag_multipart_capable = """ [--multipart]"""
39 usage_string_multipart_capable = """
40 multipart - Upload files as multiple parts. This needs filechunkio."""
41 except ImportError as err:
42 multipart_capable = False
43 usage_flag_multipart_capable = ""
44 usage_string_multipart_capable = '\n\n "' + \
45 err.message[len('No module named '):] + \
46 '" is missing for multipart support '
47
48
49 usage_string = """
50 SYNOPSIS
51 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>]
52 -b/--bucket <bucket_name> [-c/--callback <num_cb>]
53 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>]
54 [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>]
55 [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced]
56 [--header] """ + usage_string_multipart_capable + """ path [path...]
57
58 Where
59 access_key - Your AWS Access Key ID. If not supplied, boto will
60 use the value of the environment variable
61 AWS_ACCESS_KEY_ID
62 secret_key - Your AWS Secret Access Key. If not supplied, boto
63 will use the value of the environment variable
64 AWS_SECRET_ACCESS_KEY
65 bucket_name - The name of the S3 bucket the file(s) should be
66 copied to.
67 path - A path to a directory or file that represents the items
68 to be uploaded. If the path points to an individual file,
69 that file will be uploaded to the specified bucket. If the
70 path points to a directory, s3_it will recursively traverse
71 the directory and upload all files to the specified bucket.
72 debug_level - 0 means no debug output (default), 1 means normal
73 debug output from boto, and 2 means boto debug output
74 plus request/response output from httplib
75 ignore_dirs - a comma-separated list of directory names that will
76 be ignored and not uploaded to S3.
77 num_cb - The number of progress callbacks to display. The default
78 is zero which means no callbacks. If you supplied a value
79 of "-c 10" for example, the progress callback would be
80 called 10 times for each file transferred.
81 prefix - A file path prefix that will be stripped from the full
82 path of the file when determining the key name in S3.
83 For example, if the full path of a file is:
84 /home/foo/bar/fie.baz
85 and the prefix is specified as "-p /home/foo/" the
86 resulting key name in S3 will be:
87 /bar/fie.baz
88 The prefix must end in a trailing separator and if it
89 does not then one will be added.
90 key_prefix - A prefix to be added to the S3 key name, after any
91 stripping of the file path is done based on the
92 "-p/--prefix" option.
93 reduced - Use Reduced Redundancy storage
94 grant - A canned ACL policy that will be granted on each file
95 transferred to S3. The value of provided must be one
96 of the "canned" ACL policies supported by S3:
97 private|public-read|public-read-write|authenticated-read
98 no_overwrite - No files will be overwritten on S3, if the file/key
99 exists on s3 it will be kept. This is useful for
100 resuming interrupted transfers. Note this is not a
101 sync, even if the file has been updated locally if
102 the key exists on s3 the file on s3 will not be
103 updated.
104 header - key=value paris of extra header(s) to pass along in the
105 request""" + usage_string_multipart_capable + """
106
107
108 If the -n option is provided, no files will be transferred to S3 but
109 informational messages will be printed about what would happen.
110 """
111
112
113 def usage():
114 print usage_string
115 sys.exit()
116
117
118 def submit_cb(bytes_so_far, total_bytes):
119 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes)
120
121
122 def get_key_name(fullpath, prefix, key_prefix):
123 if fullpath.startswith(prefix):
124 key_name = fullpath[len(prefix):]
125 else:
126 key_name = fullpath
127 l = key_name.split(os.sep)
128 return key_prefix + '/'.join(l)
129
130
131 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num,
132 source_path, offset, bytes, debug, cb, num_cb,
133 amount_of_retries=10):
134 """
135 Uploads a part with retries.
136 """
137 if debug == 1:
138 print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes)
139
140 def _upload(retries_left=amount_of_retries):
141 try:
142 if debug == 1:
143 print 'Start uploading part #%d ...' % part_num
144 conn = S3Connection(aws_key, aws_secret)
145 conn.debug = debug
146 bucket = conn.get_bucket(bucketname)
147 for mp in bucket.get_all_multipart_uploads():
148 if mp.id == multipart_id:
149 with FileChunkIO(source_path, 'r', offset=offset,
150 bytes=bytes) as fp:
151 mp.upload_part_from_file(fp=fp, part_num=part_num,
152 cb=cb, num_cb=num_cb)
153 break
154 except Exception, exc:
155 if retries_left:
156 _upload(retries_left=retries_left - 1)
157 else:
158 print 'Failed uploading part #%d' % part_num
159 raise exc
160 else:
161 if debug == 1:
162 print '... Uploaded part #%d' % part_num
163
164 _upload()
165
166
167 def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname,
168 reduced, debug, cb, num_cb, acl='private', headers={},
169 guess_mimetype=True, parallel_processes=4):
170 """
171 Parallel multipart upload.
172 """
173 conn = S3Connection(aws_key, aws_secret)
174 conn.debug = debug
175 bucket = conn.get_bucket(bucketname)
176
177 if guess_mimetype:
178 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream'
179 headers.update({'Content-Type': mtype})
180
181 mp = bucket.initiate_multipart_upload(keyname, headers=headers,
182 reduced_redundancy=reduced)
183
184 source_size = os.stat(source_path).st_size
185 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)),
186 5242880)
187 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
188
189 pool = Pool(processes=parallel_processes)
190 for i in range(chunk_amount):
191 offset = i * bytes_per_chunk
192 remaining_bytes = source_size - offset
193 bytes = min([bytes_per_chunk, remaining_bytes])
194 part_num = i + 1
195 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id,
196 part_num, source_path, offset, bytes,
197 debug, cb, num_cb])
198 pool.close()
199 pool.join()
200
201 if len(mp.get_all_parts()) == chunk_amount:
202 mp.complete_upload()
203 key = bucket.get_key(keyname)
204 key.set_acl(acl)
205 else:
206 mp.cancel_upload()
207
208
209 def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs):
210 """
211 Single upload.
212 """
213 k = bucket.new_key(key_name)
214 k.set_contents_from_filename(fullpath, *kargs, **kwargs)
215
216
217 def expand_path(path):
218 path = os.path.expanduser(path)
219 path = os.path.expandvars(path)
220 return os.path.abspath(path)
221
222
223 def main():
224
225 # default values
226 aws_access_key_id = None
227 aws_secret_access_key = None
228 bucket_name = ''
229 ignore_dirs = []
230 debug = 0
231 cb = None
232 num_cb = 0
233 quiet = False
234 no_op = False
235 prefix = '/'
236 key_prefix = ''
237 grant = None
238 no_overwrite = False
239 reduced = False
240 headers = {}
241 multipart_requested = False
242
243 try:
244 opts, args = getopt.getopt(
245 sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr',
246 ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=',
247 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet',
248 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart'])
249 except:
250 usage()
251
252 # parse opts
253 for o, a in opts:
254 if o in ('-h', '--help'):
255 usage()
256 if o in ('-a', '--access_key'):
257 aws_access_key_id = a
258 if o in ('-b', '--bucket'):
259 bucket_name = a
260 if o in ('-c', '--callback'):
261 num_cb = int(a)
262 cb = submit_cb
263 if o in ('-d', '--debug'):
264 debug = int(a)
265 if o in ('-g', '--grant'):
266 grant = a
267 if o in ('-i', '--ignore'):
268 ignore_dirs = a.split(',')
269 if o in ('-n', '--no_op'):
270 no_op = True
271 if o in ('-w', '--no_overwrite'):
272 no_overwrite = True
273 if o in ('-p', '--prefix'):
274 prefix = a
275 if prefix[-1] != os.sep:
276 prefix = prefix + os.sep
277 prefix = expand_path(prefix)
278 if o in ('-k', '--key_prefix'):
279 key_prefix = a
280 if o in ('-q', '--quiet'):
281 quiet = True
282 if o in ('-s', '--secret_key'):
283 aws_secret_access_key = a
284 if o in ('-r', '--reduced'):
285 reduced = True
286 if o in ('--header'):
287 (k, v) = a.split("=")
288 headers[k] = v
289 if o in ('--multipart'):
290 if multipart_capable:
291 multipart_requested = True
292 else:
293 print "multipart upload requested but not capable"
294 sys.exit()
295
296 if len(args) < 1:
297 usage()
298
299 if not bucket_name:
300 print "bucket name is required!"
301 usage()
302
303 c = boto.connect_s3(aws_access_key_id=aws_access_key_id,
304 aws_secret_access_key=aws_secret_access_key)
305 c.debug = debug
306 b = c.get_bucket(bucket_name)
307 existing_keys_to_check_against = []
308 files_to_check_for_upload = []
309
310 for path in args:
311 path = expand_path(path)
312 # upload a directory of files recursively
313 if os.path.isdir(path):
314 if no_overwrite:
315 if not quiet:
316 print 'Getting list of existing keys to check against'
317 for key in b.list(get_key_name(path, prefix, key_prefix)):
318 existing_keys_to_check_against.append(key.name)
319 for root, dirs, files in os.walk(path):
320 for ignore in ignore_dirs:
321 if ignore in dirs:
322 dirs.remove(ignore)
323 for path in files:
324 if path.startswith("."):
325 continue
326 files_to_check_for_upload.append(os.path.join(root, path))
327
328 # upload a single file
329 elif os.path.isfile(path):
330 fullpath = os.path.abspath(path)
331 key_name = get_key_name(fullpath, prefix, key_prefix)
332 files_to_check_for_upload.append(fullpath)
333 existing_keys_to_check_against.append(key_name)
334
335 # we are trying to upload something unknown
336 else:
337 print "I don't know what %s is, so i can't upload it" % path
338
339 for fullpath in files_to_check_for_upload:
340 key_name = get_key_name(fullpath, prefix, key_prefix)
341
342 if no_overwrite and key_name in existing_keys_to_check_against:
343 if not quiet:
344 print 'Skipping %s as it exists in s3' % fullpath
345 continue
346
347 if not quiet:
348 print 'Copying %s to %s/%s' % (fullpath, bucket_name, key_name)
349
350 if not no_op:
351 # 0-byte files don't work and also don't need multipart upload
352 if os.stat(fullpath).st_size != 0 and multipart_capable and \
353 multipart_requested:
354 multipart_upload(bucket_name, aws_access_key_id,
355 aws_secret_access_key, fullpath, key_name,
356 reduced, debug, cb, num_cb,
357 grant or 'private', headers)
358 else:
359 singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb,
360 policy=grant, reduced_redundancy=reduced,
361 headers=headers)
362
363 if __name__ == "__main__":
364 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698