third_party/cloud_storage/cloudstorage/common.py - Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library

Unified Diff: third_party/cloud_storage/cloudstorage/common.py

Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library (Closed) Base URL: https://github.com/dart-lang/pub-dartlang.git@master

Patch Set: Add deprecation comment to old cloud_storage.py:open() function Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/cloud_storage/cloudstorage/cloudstorage_api.py ('k') | third_party/cloud_storage/cloudstorage/errors.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/cloud_storage/cloudstorage/common.py

diff --git a/third_party/cloud_storage/cloudstorage/common.py b/third_party/cloud_storage/cloudstorage/common.py

new file mode 100644

index 0000000000000000000000000000000000000000..ab9c8df358ccd29731586525e8da52d0a836ae82

--- /dev/null

+++ b/third_party/cloud_storage/cloudstorage/common.py

@@ -0,0 +1,429 @@

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+# http://www.apache.org/licenses/LICENSE-2.0

+# Unless required by applicable law or agreed to in writing,

+# software distributed under the License is distributed on an

+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,

+# either express or implied. See the License for the specific

+# language governing permissions and limitations under the License.

+"""Helpers shared by cloudstorage_stub and cloudstorage_api."""

+__all__ = ['CS_XML_NS',

+ 'CSFileStat',

+ 'dt_str_to_posix',

+ 'local_api_url',

+ 'LOCAL_GCS_ENDPOINT',

+ 'local_run',

+ 'get_access_token',

+ 'get_stored_content_length',

+ 'get_metadata',

+ 'GCSFileStat',

+ 'http_time_to_posix',

+ 'memory_usage',

+ 'posix_time_to_http',

+ 'posix_to_dt_str',

+ 'set_access_token',

+ 'validate_options',

+ 'validate_bucket_name',

+ 'validate_bucket_path',

+ 'validate_file_path',

+ ]

+import calendar

+import datetime

+from email import utils as email_utils

+import logging

+import os

+import re

+try:

+ from google.appengine.api import runtime

+except ImportError:

+ from google.appengine.api import runtime

+_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'

+_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')

+_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')

+_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')

+_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')

+_GCS_METADATA = ['x-goog-meta-',

+ 'content-disposition',

+ 'cache-control',

+ 'content-encoding']

+_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']

+CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'

+LOCAL_GCS_ENDPOINT = '/_ah/gcs'

+_access_token = ''

+_MAX_GET_BUCKET_RESULT = 1000

+def set_access_token(access_token):

+ """Set the shared access token to authenticate with Google Cloud Storage.

+ When set, the library will always attempt to communicate with the

+ real Google Cloud Storage with this token even when running on dev appserver.

+ Note the token could expire so it's up to you to renew it.

+ When absent, the library will automatically request and refresh a token

+ on appserver, or when on dev appserver, talk to a Google Cloud Storage

+ stub.

+ Args:

+ access_token: you can get one by run 'gsutil -d ls' and copy the

+ str after 'Bearer'.

+ """

+ global _access_token

+ _access_token = access_token

+def get_access_token():

+ """Returns the shared access token."""

+ return _access_token

+class GCSFileStat(object):

+ """Container for GCS file stat."""

+ def __init__(self,

+ filename,

+ st_size,

+ etag,

+ st_ctime,

+ content_type=None,

+ metadata=None,

+ is_dir=False):

+ """Initialize.

+ For files, the non optional arguments are always set.

+ For directories, only filename and is_dir is set.

+ Args:

+ filename: a Google Cloud Storage filename of form '/bucket/filename'.

+ st_size: file size in bytes. long compatible.

+ etag: hex digest of the md5 hash of the file's content. str.

+ st_ctime: posix file creation time. float compatible.

+ content_type: content type. str.

+ metadata: a str->str dict of user specified options when creating

+ the file. Possible keys are x-goog-meta-, content-disposition,

+ content-encoding, and cache-control.

+ is_dir: True if this represents a directory. False if this is a real file.

+ """

+ self.filename = filename

+ self.is_dir = is_dir

+ self.st_size = None

+ self.st_ctime = None

+ self.etag = None

+ self.content_type = content_type

+ self.metadata = metadata

+ if not is_dir:

+ self.st_size = long(st_size)

+ self.st_ctime = float(st_ctime)

+ if etag[0] == '"' and etag[-1] == '"':

+ etag = etag[1:-1]

+ self.etag = etag

+ def __repr__(self):

+ if self.is_dir:

+ return '(directory: %s)' % self.filename

+ return (

+ '(filename: %(filename)s, st_size: %(st_size)s, '

+ 'st_ctime: %(st_ctime)s, etag: %(etag)s, '

+ 'content_type: %(content_type)s, '

+ 'metadata: %(metadata)s)' %

+ dict(filename=self.filename,

+ st_size=self.st_size,

+ st_ctime=self.st_ctime,

+ etag=self.etag,

+ content_type=self.content_type,

+ metadata=self.metadata))

+ def __cmp__(self, other):

+ if not isinstance(other, self.__class__):

+ raise ValueError('Argument to cmp must have the same type. '

+ 'Expect %s, got %s', self.__class__.__name__,

+ other.__class__.__name__)

+ if self.filename > other.filename:

+ return 1

+ elif self.filename < other.filename:

+ return -1

+ return 0

+ def __hash__(self):

+ if self.etag:

+ return hash(self.etag)

+ return hash(self.filename)

+CSFileStat = GCSFileStat

+def get_stored_content_length(headers):

+ """Return the content length (in bytes) of the object as stored in GCS.

+ x-goog-stored-content-length should always be present except when called via

+ the local dev_appserver. Therefore if it is not present we default to the

+ standard content-length header.

+ Args:

+ headers: a dict of headers from the http response.

+ Returns:

+ the stored content length.

+ """

+ length = headers.get('x-goog-stored-content-length')

+ if length is None:

+ length = headers.get('content-length')

+ return length

+def get_metadata(headers):

+ """Get user defined options from HTTP response headers."""

+ return dict((k, v) for k, v in headers.iteritems()

+ if any(k.lower().startswith(valid) for valid in _GCS_METADATA))

+def validate_bucket_name(name):

+ """Validate a Google Storage bucket name.

+ Args:

+ name: a Google Storage bucket name with no prefix or suffix.

+ Raises:

+ ValueError: if name is invalid.

+ """

+ _validate_path(name)

+ if not _GCS_BUCKET_REGEX.match(name):

+ raise ValueError('Bucket should be 3-63 characters long using only a-z,'

+ '0-9, underscore, dash or dot but got %s' % name)

+def validate_bucket_path(path):

+ """Validate a Google Cloud Storage bucket path.

+ Args:

+ path: a Google Storage bucket path. It should have form '/bucket'.

+ Raises:

+ ValueError: if path is invalid.

+ """

+ _validate_path(path)

+ if not _GCS_BUCKET_PATH_REGEX.match(path):

+ raise ValueError('Bucket should have format /bucket '

+ 'but got %s' % path)

+def validate_file_path(path):

+ """Validate a Google Cloud Storage file path.

+ Args:

+ path: a Google Storage file path. It should have form '/bucket/filename'.

+ Raises:

+ ValueError: if path is invalid.

+ """

+ _validate_path(path)

+ if not _GCS_FULLPATH_REGEX.match(path):

+ raise ValueError('Path should have format /bucket/filename '

+ 'but got %s' % path)

+def _process_path_prefix(path_prefix):

+ """Validate and process a Google Cloud Stoarge path prefix.

+ Args:

+ path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'

+ or '/bucket/' or '/bucket'.

+ Raises:

+ ValueError: if path is invalid.

+ Returns:

+ a tuple of /bucket and prefix. prefix can be None.

+ """

+ _validate_path(path_prefix)

+ if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):

+ raise ValueError('Path prefix should have format /bucket, /bucket/, '

+ 'or /bucket/prefix but got %s.' % path_prefix)

+ bucket_name_end = path_prefix.find('/', 1)

+ bucket = path_prefix

+ prefix = None

+ if bucket_name_end != -1:

+ bucket = path_prefix[:bucket_name_end]

+ prefix = path_prefix[bucket_name_end + 1:] or None

+ return bucket, prefix

+def _validate_path(path):

+ """Basic validation of Google Storage paths.

+ Args:

+ path: a Google Storage path. It should have form '/bucket/filename'

+ or '/bucket'.

+ Raises:

+ ValueError: if path is invalid.

+ TypeError: if path is not of type basestring.

+ """

+ if not path:

+ raise ValueError('Path is empty')

+ if not isinstance(path, basestring):

+ raise TypeError('Path should be a string but is %s (%s).' %

+ (path.__class__, path))

+def validate_options(options):

+ """Validate Google Cloud Storage options.

+ Args:

+ options: a str->basestring dict of options to pass to Google Cloud Storage.

+ Raises:

+ ValueError: if option is not supported.

+ TypeError: if option is not of type str or value of an option

+ is not of type basestring.

+ """

+ if not options:

+ return

+ for k, v in options.iteritems():

+ if not isinstance(k, str):

+ raise TypeError('option %r should be a str.' % k)

+ if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):

+ raise ValueError('option %s is not supported.' % k)

+ if not isinstance(v, basestring):

+ raise TypeError('value %r for option %s should be of type basestring.' %

+ (v, k))

+def http_time_to_posix(http_time):

+ """Convert HTTP time format to posix time.

+ See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1

+ for http time format.

+ Args:

+ http_time: time in RFC 2616 format. e.g.

+ "Mon, 20 Nov 1995 19:12:08 GMT".

+ Returns:

+ A float of secs from unix epoch.

+ """

+ if http_time is not None:

+ return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))

+def posix_time_to_http(posix_time):

+ """Convert posix time to HTML header time format.

+ Args:

+ posix_time: unix time.

+ Returns:

+ A datatime str in RFC 2616 format.

+ """

+ if posix_time:

+ return email_utils.formatdate(posix_time, usegmt=True)

+_DT_FORMAT = '%Y-%m-%dT%H:%M:%S'

+def dt_str_to_posix(dt_str):

+ """format str to posix.

+ datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,

+ e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator

+ between date and time when they are on the same line.

+ Z indicates UTC (zero meridian).

+ A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html

+ This is used to parse LastModified node from GCS's GET bucket XML response.

+ Args:

+ dt_str: A datetime str.

+ Returns:

+ A float of secs from unix epoch. By posix definition, epoch is midnight

+ 1970/1/1 UTC.

+ """

+ parsable, _ = dt_str.split('.')

+ dt = datetime.datetime.strptime(parsable, _DT_FORMAT)

+ return calendar.timegm(dt.utctimetuple())

+def posix_to_dt_str(posix):

+ """Reverse of str_to_datetime.

+ This is used by GCS stub to generate GET bucket XML response.

+ Args:

+ posix: A float of secs from unix epoch.

+ Returns:

+ A datetime str.

+ """

+ dt = datetime.datetime.utcfromtimestamp(posix)

+ dt_str = dt.strftime(_DT_FORMAT)

+ return dt_str + '.000Z'

+def local_run():

+ """Whether we should hit GCS dev appserver stub."""

+ server_software = os.environ.get('SERVER_SOFTWARE')

+ if server_software is None:

+ return True

+ if 'remote_api' in server_software:

+ return False

+ if server_software.startswith(('Development', 'testutil')):

+ return True

+ return False

+def local_api_url():

+ """Return URL for GCS emulation on dev appserver."""

+ return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)

+def memory_usage(method):

+ """Log memory usage before and after a method."""

+ def wrapper(*args, **kwargs):

+ logging.info('Memory before method %s is %s.',

+ method.__name__, runtime.memory_usage().current())

+ result = method(*args, **kwargs)

+ logging.info('Memory after method %s is %s',

+ method.__name__, runtime.memory_usage().current())

+ return result

+ return wrapper

+def _add_ns(tagname):

+ return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,

+ 'tag': tagname}

+_T_CONTENTS = _add_ns('Contents')

+_T_LAST_MODIFIED = _add_ns('LastModified')

+_T_ETAG = _add_ns('ETag')

+_T_KEY = _add_ns('Key')

+_T_SIZE = _add_ns('Size')

+_T_PREFIX = _add_ns('Prefix')

+_T_COMMON_PREFIXES = _add_ns('CommonPrefixes')

+_T_NEXT_MARKER = _add_ns('NextMarker')

+_T_IS_TRUNCATED = _add_ns('IsTruncated')

« no previous file with comments | « third_party/cloud_storage/cloudstorage/cloudstorage_api.py ('k') | third_party/cloud_storage/cloudstorage/errors.py » ('j') | no next file with comments »