Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(627)

Side by Side Diff: third_party/gsutil/boto/bin/s3put

Issue 12317103: Added gsutil to depot tools (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/gsutil/boto/bin/route53 ('k') | third_party/gsutil/boto/bin/sdbadmin » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/
3 #
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE.
22 #
23 import getopt
24 import sys
25 import os
26 import boto
27
28 try:
29 # multipart portions copyright Fabian Topfstedt
30 # https://gist.github.com/924094
31
32 import math
33 import mimetypes
34 from multiprocessing import Pool
35 from boto.s3.connection import S3Connection
36 from filechunkio import FileChunkIO
37 multipart_capable = True
38 usage_flag_multipart_capable = """ [--multipart]"""
39 usage_string_multipart_capable = """
40 multipart - Upload files as multiple parts. This needs filechunkio."""
41 except ImportError as err:
42 multipart_capable = False
43 usage_flag_multipart_capable = ""
44 usage_string_multipart_capable = '\n\n "' + \
45 err.message[len('No module named '):] + \
46 '" is missing for multipart support '
47
48
49 usage_string = """
50 SYNOPSIS
51 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>]
52 -b/--bucket <bucket_name> [-c/--callback <num_cb>]
53 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>]
54 [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>]
55 [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced]
56 [--header] [--host <s3_host>]""" + usage_flag_multipart_capable + """ path [path...]
57
58 Where
59 access_key - Your AWS Access Key ID. If not supplied, boto will
60 use the value of the environment variable
61 AWS_ACCESS_KEY_ID
62 secret_key - Your AWS Secret Access Key. If not supplied, boto
63 will use the value of the environment variable
64 AWS_SECRET_ACCESS_KEY
65 bucket_name - The name of the S3 bucket the file(s) should be
66 copied to.
67 path - A path to a directory or file that represents the items
68 to be uploaded. If the path points to an individual file,
69 that file will be uploaded to the specified bucket. If the
70 path points to a directory, it will recursively traverse
71 the directory and upload all files to the specified bucket.
72 debug_level - 0 means no debug output (default), 1 means normal
73 debug output from boto, and 2 means boto debug output
74 plus request/response output from httplib
75 ignore_dirs - a comma-separated list of directory names that will
76 be ignored and not uploaded to S3.
77 num_cb - The number of progress callbacks to display. The default
78 is zero which means no callbacks. If you supplied a value
79 of "-c 10" for example, the progress callback would be
80 called 10 times for each file transferred.
81 prefix - A file path prefix that will be stripped from the full
82 path of the file when determining the key name in S3.
83 For example, if the full path of a file is:
84 /home/foo/bar/fie.baz
85 and the prefix is specified as "-p /home/foo/" the
86 resulting key name in S3 will be:
87 /bar/fie.baz
88 The prefix must end in a trailing separator and if it
89 does not then one will be added.
90 key_prefix - A prefix to be added to the S3 key name, after any
91 stripping of the file path is done based on the
92 "-p/--prefix" option.
93 reduced - Use Reduced Redundancy storage
94 grant - A canned ACL policy that will be granted on each file
95 transferred to S3. The value of provided must be one
96 of the "canned" ACL policies supported by S3:
97 private|public-read|public-read-write|authenticated-read
98 no_overwrite - No files will be overwritten on S3, if the file/key
99 exists on s3 it will be kept. This is useful for
100 resuming interrupted transfers. Note this is not a
101 sync, even if the file has been updated locally if
102 the key exists on s3 the file on s3 will not be
103 updated.
104 header - key=value pairs of extra header(s) to pass along in the
105 request
106 host - Hostname override, for using an endpoint other then AWS S3
107 """ + usage_string_multipart_capable + """
108
109
110 If the -n option is provided, no files will be transferred to S3 but
111 informational messages will be printed about what would happen.
112 """
113
114
115 def usage():
116 print usage_string
117 sys.exit()
118
119
120 def submit_cb(bytes_so_far, total_bytes):
121 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes)
122
123
124 def get_key_name(fullpath, prefix, key_prefix):
125 if fullpath.startswith(prefix):
126 key_name = fullpath[len(prefix):]
127 else:
128 key_name = fullpath
129 l = key_name.split(os.sep)
130 return key_prefix + '/'.join(l)
131
132
133 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num,
134 source_path, offset, bytes, debug, cb, num_cb,
135 amount_of_retries=10):
136 """
137 Uploads a part with retries.
138 """
139 if debug == 1:
140 print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes)
141
142 def _upload(retries_left=amount_of_retries):
143 try:
144 if debug == 1:
145 print 'Start uploading part #%d ...' % part_num
146 conn = S3Connection(aws_key, aws_secret)
147 conn.debug = debug
148 bucket = conn.get_bucket(bucketname)
149 for mp in bucket.get_all_multipart_uploads():
150 if mp.id == multipart_id:
151 with FileChunkIO(source_path, 'r', offset=offset,
152 bytes=bytes) as fp:
153 mp.upload_part_from_file(fp=fp, part_num=part_num,
154 cb=cb, num_cb=num_cb)
155 break
156 except Exception, exc:
157 if retries_left:
158 _upload(retries_left=retries_left - 1)
159 else:
160 print 'Failed uploading part #%d' % part_num
161 raise exc
162 else:
163 if debug == 1:
164 print '... Uploaded part #%d' % part_num
165
166 _upload()
167
168
169 def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname,
170 reduced, debug, cb, num_cb, acl='private', headers={},
171 guess_mimetype=True, parallel_processes=4):
172 """
173 Parallel multipart upload.
174 """
175 conn = S3Connection(aws_key, aws_secret)
176 conn.debug = debug
177 bucket = conn.get_bucket(bucketname)
178
179 if guess_mimetype:
180 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream'
181 headers.update({'Content-Type': mtype})
182
183 mp = bucket.initiate_multipart_upload(keyname, headers=headers,
184 reduced_redundancy=reduced)
185
186 source_size = os.stat(source_path).st_size
187 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)),
188 5242880)
189 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
190
191 pool = Pool(processes=parallel_processes)
192 for i in range(chunk_amount):
193 offset = i * bytes_per_chunk
194 remaining_bytes = source_size - offset
195 bytes = min([bytes_per_chunk, remaining_bytes])
196 part_num = i + 1
197 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id,
198 part_num, source_path, offset, bytes,
199 debug, cb, num_cb])
200 pool.close()
201 pool.join()
202
203 if len(mp.get_all_parts()) == chunk_amount:
204 mp.complete_upload()
205 key = bucket.get_key(keyname)
206 key.set_acl(acl)
207 else:
208 mp.cancel_upload()
209
210
211 def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs):
212 """
213 Single upload.
214 """
215 k = bucket.new_key(key_name)
216 k.set_contents_from_filename(fullpath, *kargs, **kwargs)
217
218
219 def expand_path(path):
220 path = os.path.expanduser(path)
221 path = os.path.expandvars(path)
222 return os.path.abspath(path)
223
224
225 def main():
226
227 # default values
228 aws_access_key_id = None
229 aws_secret_access_key = None
230 bucket_name = ''
231 ignore_dirs = []
232 debug = 0
233 cb = None
234 num_cb = 0
235 quiet = False
236 no_op = False
237 prefix = '/'
238 key_prefix = ''
239 grant = None
240 no_overwrite = False
241 reduced = False
242 headers = {}
243 host = None
244 multipart_requested = False
245
246 try:
247 opts, args = getopt.getopt(
248 sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr',
249 ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=',
250 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet',
251 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart',
252 'host='])
253 except:
254 usage()
255
256 # parse opts
257 for o, a in opts:
258 if o in ('-h', '--help'):
259 usage()
260 if o in ('-a', '--access_key'):
261 aws_access_key_id = a
262 if o in ('-b', '--bucket'):
263 bucket_name = a
264 if o in ('-c', '--callback'):
265 num_cb = int(a)
266 cb = submit_cb
267 if o in ('-d', '--debug'):
268 debug = int(a)
269 if o in ('-g', '--grant'):
270 grant = a
271 if o in ('-i', '--ignore'):
272 ignore_dirs = a.split(',')
273 if o in ('-n', '--no_op'):
274 no_op = True
275 if o in ('-w', '--no_overwrite'):
276 no_overwrite = True
277 if o in ('-p', '--prefix'):
278 prefix = a
279 if prefix[-1] != os.sep:
280 prefix = prefix + os.sep
281 prefix = expand_path(prefix)
282 if o in ('-k', '--key_prefix'):
283 key_prefix = a
284 if o in ('-q', '--quiet'):
285 quiet = True
286 if o in ('-s', '--secret_key'):
287 aws_secret_access_key = a
288 if o in ('-r', '--reduced'):
289 reduced = True
290 if o in ('--header'):
291 (k, v) = a.split("=")
292 headers[k] = v
293 if o in ('--host'):
294 host = a
295 if o in ('--multipart'):
296 if multipart_capable:
297 multipart_requested = True
298 else:
299 print "multipart upload requested but not capable"
300 sys.exit()
301
302 if len(args) < 1:
303 usage()
304
305 if not bucket_name:
306 print "bucket name is required!"
307 usage()
308
309 if host:
310 c = boto.connect_s3(host=host, aws_access_key_id=aws_access_key_id,
311 aws_secret_access_key=aws_secret_access_key)
312 else:
313 c = boto.connect_s3(aws_access_key_id=aws_access_key_id,
314 aws_secret_access_key=aws_secret_access_key)
315 c.debug = debug
316 b = c.get_bucket(bucket_name)
317 existing_keys_to_check_against = []
318 files_to_check_for_upload = []
319
320 for path in args:
321 path = expand_path(path)
322 # upload a directory of files recursively
323 if os.path.isdir(path):
324 if no_overwrite:
325 if not quiet:
326 print 'Getting list of existing keys to check against'
327 for key in b.list(get_key_name(path, prefix, key_prefix)):
328 existing_keys_to_check_against.append(key.name)
329 for root, dirs, files in os.walk(path):
330 for ignore in ignore_dirs:
331 if ignore in dirs:
332 dirs.remove(ignore)
333 for path in files:
334 if path.startswith("."):
335 continue
336 files_to_check_for_upload.append(os.path.join(root, path))
337
338 # upload a single file
339 elif os.path.isfile(path):
340 fullpath = os.path.abspath(path)
341 key_name = get_key_name(fullpath, prefix, key_prefix)
342 files_to_check_for_upload.append(fullpath)
343 existing_keys_to_check_against.append(key_name)
344
345 # we are trying to upload something unknown
346 else:
347 print "I don't know what %s is, so i can't upload it" % path
348
349 for fullpath in files_to_check_for_upload:
350 key_name = get_key_name(fullpath, prefix, key_prefix)
351
352 if no_overwrite and key_name in existing_keys_to_check_against:
353 if not quiet:
354 print 'Skipping %s as it exists in s3' % fullpath
355 continue
356
357 if not quiet:
358 print 'Copying %s to %s/%s' % (fullpath, bucket_name, key_name)
359
360 if not no_op:
361 # 0-byte files don't work and also don't need multipart upload
362 if os.stat(fullpath).st_size != 0 and multipart_capable and \
363 multipart_requested:
364 multipart_upload(bucket_name, aws_access_key_id,
365 aws_secret_access_key, fullpath, key_name,
366 reduced, debug, cb, num_cb,
367 grant or 'private', headers)
368 else:
369 singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb,
370 policy=grant, reduced_redundancy=reduced,
371 headers=headers)
372
373 if __name__ == "__main__":
374 main()
OLDNEW
« no previous file with comments | « third_party/gsutil/boto/bin/route53 ('k') | third_party/gsutil/boto/bin/sdbadmin » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698