Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(392)

Unified Diff: third_party/cloud_storage/cloudstorage/common.py

Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library (Closed) Base URL: https://github.com/dart-lang/pub-dartlang.git@master
Patch Set: Add deprecation comment to old cloud_storage.py:open() function Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/cloud_storage/cloudstorage/common.py
diff --git a/third_party/cloud_storage/cloudstorage/common.py b/third_party/cloud_storage/cloudstorage/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab9c8df358ccd29731586525e8da52d0a836ae82
--- /dev/null
+++ b/third_party/cloud_storage/cloudstorage/common.py
@@ -0,0 +1,429 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+# either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+"""Helpers shared by cloudstorage_stub and cloudstorage_api."""
+
+
+
+
+
+__all__ = ['CS_XML_NS',
+ 'CSFileStat',
+ 'dt_str_to_posix',
+ 'local_api_url',
+ 'LOCAL_GCS_ENDPOINT',
+ 'local_run',
+ 'get_access_token',
+ 'get_stored_content_length',
+ 'get_metadata',
+ 'GCSFileStat',
+ 'http_time_to_posix',
+ 'memory_usage',
+ 'posix_time_to_http',
+ 'posix_to_dt_str',
+ 'set_access_token',
+ 'validate_options',
+ 'validate_bucket_name',
+ 'validate_bucket_path',
+ 'validate_file_path',
+ ]
+
+
+import calendar
+import datetime
+from email import utils as email_utils
+import logging
+import os
+import re
+
+try:
+ from google.appengine.api import runtime
+except ImportError:
+ from google.appengine.api import runtime
+
+
+_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
+_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
+_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
+_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
+_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
+_GCS_METADATA = ['x-goog-meta-',
+ 'content-disposition',
+ 'cache-control',
+ 'content-encoding']
+_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
+CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
+LOCAL_GCS_ENDPOINT = '/_ah/gcs'
+_access_token = ''
+
+
+_MAX_GET_BUCKET_RESULT = 1000
+
+
+def set_access_token(access_token):
+ """Set the shared access token to authenticate with Google Cloud Storage.
+
+ When set, the library will always attempt to communicate with the
+ real Google Cloud Storage with this token even when running on dev appserver.
+ Note the token could expire so it's up to you to renew it.
+
+ When absent, the library will automatically request and refresh a token
+ on appserver, or when on dev appserver, talk to a Google Cloud Storage
+ stub.
+
+ Args:
+ access_token: you can get one by run 'gsutil -d ls' and copy the
+ str after 'Bearer'.
+ """
+ global _access_token
+ _access_token = access_token
+
+
+def get_access_token():
+ """Returns the shared access token."""
+ return _access_token
+
+
+class GCSFileStat(object):
+ """Container for GCS file stat."""
+
+ def __init__(self,
+ filename,
+ st_size,
+ etag,
+ st_ctime,
+ content_type=None,
+ metadata=None,
+ is_dir=False):
+ """Initialize.
+
+ For files, the non optional arguments are always set.
+ For directories, only filename and is_dir is set.
+
+ Args:
+ filename: a Google Cloud Storage filename of form '/bucket/filename'.
+ st_size: file size in bytes. long compatible.
+ etag: hex digest of the md5 hash of the file's content. str.
+ st_ctime: posix file creation time. float compatible.
+ content_type: content type. str.
+ metadata: a str->str dict of user specified options when creating
+ the file. Possible keys are x-goog-meta-, content-disposition,
+ content-encoding, and cache-control.
+ is_dir: True if this represents a directory. False if this is a real file.
+ """
+ self.filename = filename
+ self.is_dir = is_dir
+ self.st_size = None
+ self.st_ctime = None
+ self.etag = None
+ self.content_type = content_type
+ self.metadata = metadata
+
+ if not is_dir:
+ self.st_size = long(st_size)
+ self.st_ctime = float(st_ctime)
+ if etag[0] == '"' and etag[-1] == '"':
+ etag = etag[1:-1]
+ self.etag = etag
+
+ def __repr__(self):
+ if self.is_dir:
+ return '(directory: %s)' % self.filename
+
+ return (
+ '(filename: %(filename)s, st_size: %(st_size)s, '
+ 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
+ 'content_type: %(content_type)s, '
+ 'metadata: %(metadata)s)' %
+ dict(filename=self.filename,
+ st_size=self.st_size,
+ st_ctime=self.st_ctime,
+ etag=self.etag,
+ content_type=self.content_type,
+ metadata=self.metadata))
+
+ def __cmp__(self, other):
+ if not isinstance(other, self.__class__):
+ raise ValueError('Argument to cmp must have the same type. '
+ 'Expect %s, got %s', self.__class__.__name__,
+ other.__class__.__name__)
+ if self.filename > other.filename:
+ return 1
+ elif self.filename < other.filename:
+ return -1
+ return 0
+
+ def __hash__(self):
+ if self.etag:
+ return hash(self.etag)
+ return hash(self.filename)
+
+
+CSFileStat = GCSFileStat
+
+
+def get_stored_content_length(headers):
+ """Return the content length (in bytes) of the object as stored in GCS.
+
+ x-goog-stored-content-length should always be present except when called via
+ the local dev_appserver. Therefore if it is not present we default to the
+ standard content-length header.
+
+ Args:
+ headers: a dict of headers from the http response.
+
+ Returns:
+ the stored content length.
+ """
+ length = headers.get('x-goog-stored-content-length')
+ if length is None:
+ length = headers.get('content-length')
+ return length
+
+
+def get_metadata(headers):
+ """Get user defined options from HTTP response headers."""
+ return dict((k, v) for k, v in headers.iteritems()
+ if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
+
+
+def validate_bucket_name(name):
+ """Validate a Google Storage bucket name.
+
+ Args:
+ name: a Google Storage bucket name with no prefix or suffix.
+
+ Raises:
+ ValueError: if name is invalid.
+ """
+ _validate_path(name)
+ if not _GCS_BUCKET_REGEX.match(name):
+ raise ValueError('Bucket should be 3-63 characters long using only a-z,'
+ '0-9, underscore, dash or dot but got %s' % name)
+
+
+def validate_bucket_path(path):
+ """Validate a Google Cloud Storage bucket path.
+
+ Args:
+ path: a Google Storage bucket path. It should have form '/bucket'.
+
+ Raises:
+ ValueError: if path is invalid.
+ """
+ _validate_path(path)
+ if not _GCS_BUCKET_PATH_REGEX.match(path):
+ raise ValueError('Bucket should have format /bucket '
+ 'but got %s' % path)
+
+
+def validate_file_path(path):
+ """Validate a Google Cloud Storage file path.
+
+ Args:
+ path: a Google Storage file path. It should have form '/bucket/filename'.
+
+ Raises:
+ ValueError: if path is invalid.
+ """
+ _validate_path(path)
+ if not _GCS_FULLPATH_REGEX.match(path):
+ raise ValueError('Path should have format /bucket/filename '
+ 'but got %s' % path)
+
+
+def _process_path_prefix(path_prefix):
+ """Validate and process a Google Cloud Stoarge path prefix.
+
+ Args:
+ path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
+ or '/bucket/' or '/bucket'.
+
+ Raises:
+ ValueError: if path is invalid.
+
+ Returns:
+ a tuple of /bucket and prefix. prefix can be None.
+ """
+ _validate_path(path_prefix)
+ if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
+ raise ValueError('Path prefix should have format /bucket, /bucket/, '
+ 'or /bucket/prefix but got %s.' % path_prefix)
+ bucket_name_end = path_prefix.find('/', 1)
+ bucket = path_prefix
+ prefix = None
+ if bucket_name_end != -1:
+ bucket = path_prefix[:bucket_name_end]
+ prefix = path_prefix[bucket_name_end + 1:] or None
+ return bucket, prefix
+
+
+def _validate_path(path):
+ """Basic validation of Google Storage paths.
+
+ Args:
+ path: a Google Storage path. It should have form '/bucket/filename'
+ or '/bucket'.
+
+ Raises:
+ ValueError: if path is invalid.
+ TypeError: if path is not of type basestring.
+ """
+ if not path:
+ raise ValueError('Path is empty')
+ if not isinstance(path, basestring):
+ raise TypeError('Path should be a string but is %s (%s).' %
+ (path.__class__, path))
+
+
+def validate_options(options):
+ """Validate Google Cloud Storage options.
+
+ Args:
+ options: a str->basestring dict of options to pass to Google Cloud Storage.
+
+ Raises:
+ ValueError: if option is not supported.
+ TypeError: if option is not of type str or value of an option
+ is not of type basestring.
+ """
+ if not options:
+ return
+
+ for k, v in options.iteritems():
+ if not isinstance(k, str):
+ raise TypeError('option %r should be a str.' % k)
+ if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
+ raise ValueError('option %s is not supported.' % k)
+ if not isinstance(v, basestring):
+ raise TypeError('value %r for option %s should be of type basestring.' %
+ (v, k))
+
+
+def http_time_to_posix(http_time):
+ """Convert HTTP time format to posix time.
+
+ See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
+ for http time format.
+
+ Args:
+ http_time: time in RFC 2616 format. e.g.
+ "Mon, 20 Nov 1995 19:12:08 GMT".
+
+ Returns:
+ A float of secs from unix epoch.
+ """
+ if http_time is not None:
+ return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
+
+
+def posix_time_to_http(posix_time):
+ """Convert posix time to HTML header time format.
+
+ Args:
+ posix_time: unix time.
+
+ Returns:
+ A datatime str in RFC 2616 format.
+ """
+ if posix_time:
+ return email_utils.formatdate(posix_time, usegmt=True)
+
+
+_DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
+
+
+def dt_str_to_posix(dt_str):
+ """format str to posix.
+
+ datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
+ e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
+ between date and time when they are on the same line.
+ Z indicates UTC (zero meridian).
+
+ A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
+
+ This is used to parse LastModified node from GCS's GET bucket XML response.
+
+ Args:
+ dt_str: A datetime str.
+
+ Returns:
+ A float of secs from unix epoch. By posix definition, epoch is midnight
+ 1970/1/1 UTC.
+ """
+ parsable, _ = dt_str.split('.')
+ dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
+ return calendar.timegm(dt.utctimetuple())
+
+
+def posix_to_dt_str(posix):
+ """Reverse of str_to_datetime.
+
+ This is used by GCS stub to generate GET bucket XML response.
+
+ Args:
+ posix: A float of secs from unix epoch.
+
+ Returns:
+ A datetime str.
+ """
+ dt = datetime.datetime.utcfromtimestamp(posix)
+ dt_str = dt.strftime(_DT_FORMAT)
+ return dt_str + '.000Z'
+
+
+def local_run():
+ """Whether we should hit GCS dev appserver stub."""
+ server_software = os.environ.get('SERVER_SOFTWARE')
+ if server_software is None:
+ return True
+ if 'remote_api' in server_software:
+ return False
+ if server_software.startswith(('Development', 'testutil')):
+ return True
+ return False
+
+
+def local_api_url():
+ """Return URL for GCS emulation on dev appserver."""
+ return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
+
+
+def memory_usage(method):
+ """Log memory usage before and after a method."""
+ def wrapper(*args, **kwargs):
+ logging.info('Memory before method %s is %s.',
+ method.__name__, runtime.memory_usage().current())
+ result = method(*args, **kwargs)
+ logging.info('Memory after method %s is %s',
+ method.__name__, runtime.memory_usage().current())
+ return result
+ return wrapper
+
+
+def _add_ns(tagname):
+ return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
+ 'tag': tagname}
+
+
+_T_CONTENTS = _add_ns('Contents')
+_T_LAST_MODIFIED = _add_ns('LastModified')
+_T_ETAG = _add_ns('ETag')
+_T_KEY = _add_ns('Key')
+_T_SIZE = _add_ns('Size')
+_T_PREFIX = _add_ns('Prefix')
+_T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
+_T_NEXT_MARKER = _add_ns('NextMarker')
+_T_IS_TRUNCATED = _add_ns('IsTruncated')
« no previous file with comments | « third_party/cloud_storage/cloudstorage/cloudstorage_api.py ('k') | third_party/cloud_storage/cloudstorage/errors.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698