Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: third_party/cloud_storage/cloudstorage/common.py

Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library (Closed) Base URL: https://github.com/dart-lang/pub-dartlang.git@master
Patch Set: Add deprecation comment to old cloud_storage.py:open() function Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2012 Google Inc. All Rights Reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
14
15 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
16
17
18
19
20
21 __all__ = ['CS_XML_NS',
22 'CSFileStat',
23 'dt_str_to_posix',
24 'local_api_url',
25 'LOCAL_GCS_ENDPOINT',
26 'local_run',
27 'get_access_token',
28 'get_stored_content_length',
29 'get_metadata',
30 'GCSFileStat',
31 'http_time_to_posix',
32 'memory_usage',
33 'posix_time_to_http',
34 'posix_to_dt_str',
35 'set_access_token',
36 'validate_options',
37 'validate_bucket_name',
38 'validate_bucket_path',
39 'validate_file_path',
40 ]
41
42
43 import calendar
44 import datetime
45 from email import utils as email_utils
46 import logging
47 import os
48 import re
49
50 try:
51 from google.appengine.api import runtime
52 except ImportError:
53 from google.appengine.api import runtime
54
55
56 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
57 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
58 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
59 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
60 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
61 _GCS_METADATA = ['x-goog-meta-',
62 'content-disposition',
63 'cache-control',
64 'content-encoding']
65 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
66 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
67 LOCAL_GCS_ENDPOINT = '/_ah/gcs'
68 _access_token = ''
69
70
71 _MAX_GET_BUCKET_RESULT = 1000
72
73
74 def set_access_token(access_token):
75 """Set the shared access token to authenticate with Google Cloud Storage.
76
77 When set, the library will always attempt to communicate with the
78 real Google Cloud Storage with this token even when running on dev appserver.
79 Note the token could expire so it's up to you to renew it.
80
81 When absent, the library will automatically request and refresh a token
82 on appserver, or when on dev appserver, talk to a Google Cloud Storage
83 stub.
84
85 Args:
86 access_token: you can get one by run 'gsutil -d ls' and copy the
87 str after 'Bearer'.
88 """
89 global _access_token
90 _access_token = access_token
91
92
93 def get_access_token():
94 """Returns the shared access token."""
95 return _access_token
96
97
98 class GCSFileStat(object):
99 """Container for GCS file stat."""
100
101 def __init__(self,
102 filename,
103 st_size,
104 etag,
105 st_ctime,
106 content_type=None,
107 metadata=None,
108 is_dir=False):
109 """Initialize.
110
111 For files, the non optional arguments are always set.
112 For directories, only filename and is_dir is set.
113
114 Args:
115 filename: a Google Cloud Storage filename of form '/bucket/filename'.
116 st_size: file size in bytes. long compatible.
117 etag: hex digest of the md5 hash of the file's content. str.
118 st_ctime: posix file creation time. float compatible.
119 content_type: content type. str.
120 metadata: a str->str dict of user specified options when creating
121 the file. Possible keys are x-goog-meta-, content-disposition,
122 content-encoding, and cache-control.
123 is_dir: True if this represents a directory. False if this is a real file.
124 """
125 self.filename = filename
126 self.is_dir = is_dir
127 self.st_size = None
128 self.st_ctime = None
129 self.etag = None
130 self.content_type = content_type
131 self.metadata = metadata
132
133 if not is_dir:
134 self.st_size = long(st_size)
135 self.st_ctime = float(st_ctime)
136 if etag[0] == '"' and etag[-1] == '"':
137 etag = etag[1:-1]
138 self.etag = etag
139
140 def __repr__(self):
141 if self.is_dir:
142 return '(directory: %s)' % self.filename
143
144 return (
145 '(filename: %(filename)s, st_size: %(st_size)s, '
146 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
147 'content_type: %(content_type)s, '
148 'metadata: %(metadata)s)' %
149 dict(filename=self.filename,
150 st_size=self.st_size,
151 st_ctime=self.st_ctime,
152 etag=self.etag,
153 content_type=self.content_type,
154 metadata=self.metadata))
155
156 def __cmp__(self, other):
157 if not isinstance(other, self.__class__):
158 raise ValueError('Argument to cmp must have the same type. '
159 'Expect %s, got %s', self.__class__.__name__,
160 other.__class__.__name__)
161 if self.filename > other.filename:
162 return 1
163 elif self.filename < other.filename:
164 return -1
165 return 0
166
167 def __hash__(self):
168 if self.etag:
169 return hash(self.etag)
170 return hash(self.filename)
171
172
173 CSFileStat = GCSFileStat
174
175
176 def get_stored_content_length(headers):
177 """Return the content length (in bytes) of the object as stored in GCS.
178
179 x-goog-stored-content-length should always be present except when called via
180 the local dev_appserver. Therefore if it is not present we default to the
181 standard content-length header.
182
183 Args:
184 headers: a dict of headers from the http response.
185
186 Returns:
187 the stored content length.
188 """
189 length = headers.get('x-goog-stored-content-length')
190 if length is None:
191 length = headers.get('content-length')
192 return length
193
194
195 def get_metadata(headers):
196 """Get user defined options from HTTP response headers."""
197 return dict((k, v) for k, v in headers.iteritems()
198 if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
199
200
201 def validate_bucket_name(name):
202 """Validate a Google Storage bucket name.
203
204 Args:
205 name: a Google Storage bucket name with no prefix or suffix.
206
207 Raises:
208 ValueError: if name is invalid.
209 """
210 _validate_path(name)
211 if not _GCS_BUCKET_REGEX.match(name):
212 raise ValueError('Bucket should be 3-63 characters long using only a-z,'
213 '0-9, underscore, dash or dot but got %s' % name)
214
215
216 def validate_bucket_path(path):
217 """Validate a Google Cloud Storage bucket path.
218
219 Args:
220 path: a Google Storage bucket path. It should have form '/bucket'.
221
222 Raises:
223 ValueError: if path is invalid.
224 """
225 _validate_path(path)
226 if not _GCS_BUCKET_PATH_REGEX.match(path):
227 raise ValueError('Bucket should have format /bucket '
228 'but got %s' % path)
229
230
231 def validate_file_path(path):
232 """Validate a Google Cloud Storage file path.
233
234 Args:
235 path: a Google Storage file path. It should have form '/bucket/filename'.
236
237 Raises:
238 ValueError: if path is invalid.
239 """
240 _validate_path(path)
241 if not _GCS_FULLPATH_REGEX.match(path):
242 raise ValueError('Path should have format /bucket/filename '
243 'but got %s' % path)
244
245
246 def _process_path_prefix(path_prefix):
247 """Validate and process a Google Cloud Stoarge path prefix.
248
249 Args:
250 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
251 or '/bucket/' or '/bucket'.
252
253 Raises:
254 ValueError: if path is invalid.
255
256 Returns:
257 a tuple of /bucket and prefix. prefix can be None.
258 """
259 _validate_path(path_prefix)
260 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
261 raise ValueError('Path prefix should have format /bucket, /bucket/, '
262 'or /bucket/prefix but got %s.' % path_prefix)
263 bucket_name_end = path_prefix.find('/', 1)
264 bucket = path_prefix
265 prefix = None
266 if bucket_name_end != -1:
267 bucket = path_prefix[:bucket_name_end]
268 prefix = path_prefix[bucket_name_end + 1:] or None
269 return bucket, prefix
270
271
272 def _validate_path(path):
273 """Basic validation of Google Storage paths.
274
275 Args:
276 path: a Google Storage path. It should have form '/bucket/filename'
277 or '/bucket'.
278
279 Raises:
280 ValueError: if path is invalid.
281 TypeError: if path is not of type basestring.
282 """
283 if not path:
284 raise ValueError('Path is empty')
285 if not isinstance(path, basestring):
286 raise TypeError('Path should be a string but is %s (%s).' %
287 (path.__class__, path))
288
289
290 def validate_options(options):
291 """Validate Google Cloud Storage options.
292
293 Args:
294 options: a str->basestring dict of options to pass to Google Cloud Storage.
295
296 Raises:
297 ValueError: if option is not supported.
298 TypeError: if option is not of type str or value of an option
299 is not of type basestring.
300 """
301 if not options:
302 return
303
304 for k, v in options.iteritems():
305 if not isinstance(k, str):
306 raise TypeError('option %r should be a str.' % k)
307 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
308 raise ValueError('option %s is not supported.' % k)
309 if not isinstance(v, basestring):
310 raise TypeError('value %r for option %s should be of type basestring.' %
311 (v, k))
312
313
314 def http_time_to_posix(http_time):
315 """Convert HTTP time format to posix time.
316
317 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
318 for http time format.
319
320 Args:
321 http_time: time in RFC 2616 format. e.g.
322 "Mon, 20 Nov 1995 19:12:08 GMT".
323
324 Returns:
325 A float of secs from unix epoch.
326 """
327 if http_time is not None:
328 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
329
330
331 def posix_time_to_http(posix_time):
332 """Convert posix time to HTML header time format.
333
334 Args:
335 posix_time: unix time.
336
337 Returns:
338 A datatime str in RFC 2616 format.
339 """
340 if posix_time:
341 return email_utils.formatdate(posix_time, usegmt=True)
342
343
344 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
345
346
347 def dt_str_to_posix(dt_str):
348 """format str to posix.
349
350 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
351 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
352 between date and time when they are on the same line.
353 Z indicates UTC (zero meridian).
354
355 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
356
357 This is used to parse LastModified node from GCS's GET bucket XML response.
358
359 Args:
360 dt_str: A datetime str.
361
362 Returns:
363 A float of secs from unix epoch. By posix definition, epoch is midnight
364 1970/1/1 UTC.
365 """
366 parsable, _ = dt_str.split('.')
367 dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
368 return calendar.timegm(dt.utctimetuple())
369
370
371 def posix_to_dt_str(posix):
372 """Reverse of str_to_datetime.
373
374 This is used by GCS stub to generate GET bucket XML response.
375
376 Args:
377 posix: A float of secs from unix epoch.
378
379 Returns:
380 A datetime str.
381 """
382 dt = datetime.datetime.utcfromtimestamp(posix)
383 dt_str = dt.strftime(_DT_FORMAT)
384 return dt_str + '.000Z'
385
386
387 def local_run():
388 """Whether we should hit GCS dev appserver stub."""
389 server_software = os.environ.get('SERVER_SOFTWARE')
390 if server_software is None:
391 return True
392 if 'remote_api' in server_software:
393 return False
394 if server_software.startswith(('Development', 'testutil')):
395 return True
396 return False
397
398
399 def local_api_url():
400 """Return URL for GCS emulation on dev appserver."""
401 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
402
403
404 def memory_usage(method):
405 """Log memory usage before and after a method."""
406 def wrapper(*args, **kwargs):
407 logging.info('Memory before method %s is %s.',
408 method.__name__, runtime.memory_usage().current())
409 result = method(*args, **kwargs)
410 logging.info('Memory after method %s is %s',
411 method.__name__, runtime.memory_usage().current())
412 return result
413 return wrapper
414
415
416 def _add_ns(tagname):
417 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
418 'tag': tagname}
419
420
421 _T_CONTENTS = _add_ns('Contents')
422 _T_LAST_MODIFIED = _add_ns('LastModified')
423 _T_ETAG = _add_ns('ETag')
424 _T_KEY = _add_ns('Key')
425 _T_SIZE = _add_ns('Size')
426 _T_PREFIX = _add_ns('Prefix')
427 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
428 _T_NEXT_MARKER = _add_ns('NextMarker')
429 _T_IS_TRUNCATED = _add_ns('IsTruncated')
OLDNEW
« no previous file with comments | « third_party/cloud_storage/cloudstorage/cloudstorage_api.py ('k') | third_party/cloud_storage/cloudstorage/errors.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698