| Index: third_party/cloud_storage/cloudstorage/common.py
|
| diff --git a/third_party/cloud_storage/cloudstorage/common.py b/third_party/cloud_storage/cloudstorage/common.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..ab9c8df358ccd29731586525e8da52d0a836ae82
|
| --- /dev/null
|
| +++ b/third_party/cloud_storage/cloudstorage/common.py
|
| @@ -0,0 +1,429 @@
|
| +# Copyright 2012 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing,
|
| +# software distributed under the License is distributed on an
|
| +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
| +# either express or implied. See the License for the specific
|
| +# language governing permissions and limitations under the License.
|
| +
|
| +"""Helpers shared by cloudstorage_stub and cloudstorage_api."""
|
| +
|
| +
|
| +
|
| +
|
| +
|
| +__all__ = ['CS_XML_NS',
|
| + 'CSFileStat',
|
| + 'dt_str_to_posix',
|
| + 'local_api_url',
|
| + 'LOCAL_GCS_ENDPOINT',
|
| + 'local_run',
|
| + 'get_access_token',
|
| + 'get_stored_content_length',
|
| + 'get_metadata',
|
| + 'GCSFileStat',
|
| + 'http_time_to_posix',
|
| + 'memory_usage',
|
| + 'posix_time_to_http',
|
| + 'posix_to_dt_str',
|
| + 'set_access_token',
|
| + 'validate_options',
|
| + 'validate_bucket_name',
|
| + 'validate_bucket_path',
|
| + 'validate_file_path',
|
| + ]
|
| +
|
| +
|
| +import calendar
|
| +import datetime
|
| +from email import utils as email_utils
|
| +import logging
|
| +import os
|
| +import re
|
| +
|
| +try:
|
| + from google.appengine.api import runtime
|
| +except ImportError:
|
| + from google.appengine.api import runtime
|
| +
|
| +
|
| +_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
|
| +_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
|
| +_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
|
| +_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
|
| +_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
|
| +_GCS_METADATA = ['x-goog-meta-',
|
| + 'content-disposition',
|
| + 'cache-control',
|
| + 'content-encoding']
|
| +_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
|
| +CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
|
| +LOCAL_GCS_ENDPOINT = '/_ah/gcs'
|
| +_access_token = ''
|
| +
|
| +
|
| +_MAX_GET_BUCKET_RESULT = 1000
|
| +
|
| +
|
| +def set_access_token(access_token):
|
| + """Set the shared access token to authenticate with Google Cloud Storage.
|
| +
|
| + When set, the library will always attempt to communicate with the
|
| + real Google Cloud Storage with this token even when running on dev appserver.
|
| + Note the token could expire so it's up to you to renew it.
|
| +
|
| + When absent, the library will automatically request and refresh a token
|
| + on appserver, or when on dev appserver, talk to a Google Cloud Storage
|
| + stub.
|
| +
|
| + Args:
|
| + access_token: you can get one by run 'gsutil -d ls' and copy the
|
| + str after 'Bearer'.
|
| + """
|
| + global _access_token
|
| + _access_token = access_token
|
| +
|
| +
|
| +def get_access_token():
|
| + """Returns the shared access token."""
|
| + return _access_token
|
| +
|
| +
|
| +class GCSFileStat(object):
|
| + """Container for GCS file stat."""
|
| +
|
| + def __init__(self,
|
| + filename,
|
| + st_size,
|
| + etag,
|
| + st_ctime,
|
| + content_type=None,
|
| + metadata=None,
|
| + is_dir=False):
|
| + """Initialize.
|
| +
|
| + For files, the non optional arguments are always set.
|
| + For directories, only filename and is_dir is set.
|
| +
|
| + Args:
|
| + filename: a Google Cloud Storage filename of form '/bucket/filename'.
|
| + st_size: file size in bytes. long compatible.
|
| + etag: hex digest of the md5 hash of the file's content. str.
|
| + st_ctime: posix file creation time. float compatible.
|
| + content_type: content type. str.
|
| + metadata: a str->str dict of user specified options when creating
|
| + the file. Possible keys are x-goog-meta-, content-disposition,
|
| + content-encoding, and cache-control.
|
| + is_dir: True if this represents a directory. False if this is a real file.
|
| + """
|
| + self.filename = filename
|
| + self.is_dir = is_dir
|
| + self.st_size = None
|
| + self.st_ctime = None
|
| + self.etag = None
|
| + self.content_type = content_type
|
| + self.metadata = metadata
|
| +
|
| + if not is_dir:
|
| + self.st_size = long(st_size)
|
| + self.st_ctime = float(st_ctime)
|
| + if etag[0] == '"' and etag[-1] == '"':
|
| + etag = etag[1:-1]
|
| + self.etag = etag
|
| +
|
| + def __repr__(self):
|
| + if self.is_dir:
|
| + return '(directory: %s)' % self.filename
|
| +
|
| + return (
|
| + '(filename: %(filename)s, st_size: %(st_size)s, '
|
| + 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
|
| + 'content_type: %(content_type)s, '
|
| + 'metadata: %(metadata)s)' %
|
| + dict(filename=self.filename,
|
| + st_size=self.st_size,
|
| + st_ctime=self.st_ctime,
|
| + etag=self.etag,
|
| + content_type=self.content_type,
|
| + metadata=self.metadata))
|
| +
|
| + def __cmp__(self, other):
|
| + if not isinstance(other, self.__class__):
|
| + raise ValueError('Argument to cmp must have the same type. '
|
| + 'Expect %s, got %s', self.__class__.__name__,
|
| + other.__class__.__name__)
|
| + if self.filename > other.filename:
|
| + return 1
|
| + elif self.filename < other.filename:
|
| + return -1
|
| + return 0
|
| +
|
| + def __hash__(self):
|
| + if self.etag:
|
| + return hash(self.etag)
|
| + return hash(self.filename)
|
| +
|
| +
|
| +CSFileStat = GCSFileStat
|
| +
|
| +
|
| +def get_stored_content_length(headers):
|
| + """Return the content length (in bytes) of the object as stored in GCS.
|
| +
|
| + x-goog-stored-content-length should always be present except when called via
|
| + the local dev_appserver. Therefore if it is not present we default to the
|
| + standard content-length header.
|
| +
|
| + Args:
|
| + headers: a dict of headers from the http response.
|
| +
|
| + Returns:
|
| + the stored content length.
|
| + """
|
| + length = headers.get('x-goog-stored-content-length')
|
| + if length is None:
|
| + length = headers.get('content-length')
|
| + return length
|
| +
|
| +
|
| +def get_metadata(headers):
|
| + """Get user defined options from HTTP response headers."""
|
| + return dict((k, v) for k, v in headers.iteritems()
|
| + if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
|
| +
|
| +
|
| +def validate_bucket_name(name):
|
| + """Validate a Google Storage bucket name.
|
| +
|
| + Args:
|
| + name: a Google Storage bucket name with no prefix or suffix.
|
| +
|
| + Raises:
|
| + ValueError: if name is invalid.
|
| + """
|
| + _validate_path(name)
|
| + if not _GCS_BUCKET_REGEX.match(name):
|
| + raise ValueError('Bucket should be 3-63 characters long using only a-z,'
|
| + '0-9, underscore, dash or dot but got %s' % name)
|
| +
|
| +
|
| +def validate_bucket_path(path):
|
| + """Validate a Google Cloud Storage bucket path.
|
| +
|
| + Args:
|
| + path: a Google Storage bucket path. It should have form '/bucket'.
|
| +
|
| + Raises:
|
| + ValueError: if path is invalid.
|
| + """
|
| + _validate_path(path)
|
| + if not _GCS_BUCKET_PATH_REGEX.match(path):
|
| + raise ValueError('Bucket should have format /bucket '
|
| + 'but got %s' % path)
|
| +
|
| +
|
| +def validate_file_path(path):
|
| + """Validate a Google Cloud Storage file path.
|
| +
|
| + Args:
|
| + path: a Google Storage file path. It should have form '/bucket/filename'.
|
| +
|
| + Raises:
|
| + ValueError: if path is invalid.
|
| + """
|
| + _validate_path(path)
|
| + if not _GCS_FULLPATH_REGEX.match(path):
|
| + raise ValueError('Path should have format /bucket/filename '
|
| + 'but got %s' % path)
|
| +
|
| +
|
| +def _process_path_prefix(path_prefix):
|
| + """Validate and process a Google Cloud Stoarge path prefix.
|
| +
|
| + Args:
|
| + path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
|
| + or '/bucket/' or '/bucket'.
|
| +
|
| + Raises:
|
| + ValueError: if path is invalid.
|
| +
|
| + Returns:
|
| + a tuple of /bucket and prefix. prefix can be None.
|
| + """
|
| + _validate_path(path_prefix)
|
| + if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
|
| + raise ValueError('Path prefix should have format /bucket, /bucket/, '
|
| + 'or /bucket/prefix but got %s.' % path_prefix)
|
| + bucket_name_end = path_prefix.find('/', 1)
|
| + bucket = path_prefix
|
| + prefix = None
|
| + if bucket_name_end != -1:
|
| + bucket = path_prefix[:bucket_name_end]
|
| + prefix = path_prefix[bucket_name_end + 1:] or None
|
| + return bucket, prefix
|
| +
|
| +
|
| +def _validate_path(path):
|
| + """Basic validation of Google Storage paths.
|
| +
|
| + Args:
|
| + path: a Google Storage path. It should have form '/bucket/filename'
|
| + or '/bucket'.
|
| +
|
| + Raises:
|
| + ValueError: if path is invalid.
|
| + TypeError: if path is not of type basestring.
|
| + """
|
| + if not path:
|
| + raise ValueError('Path is empty')
|
| + if not isinstance(path, basestring):
|
| + raise TypeError('Path should be a string but is %s (%s).' %
|
| + (path.__class__, path))
|
| +
|
| +
|
| +def validate_options(options):
|
| + """Validate Google Cloud Storage options.
|
| +
|
| + Args:
|
| + options: a str->basestring dict of options to pass to Google Cloud Storage.
|
| +
|
| + Raises:
|
| + ValueError: if option is not supported.
|
| + TypeError: if option is not of type str or value of an option
|
| + is not of type basestring.
|
| + """
|
| + if not options:
|
| + return
|
| +
|
| + for k, v in options.iteritems():
|
| + if not isinstance(k, str):
|
| + raise TypeError('option %r should be a str.' % k)
|
| + if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
|
| + raise ValueError('option %s is not supported.' % k)
|
| + if not isinstance(v, basestring):
|
| + raise TypeError('value %r for option %s should be of type basestring.' %
|
| + (v, k))
|
| +
|
| +
|
| +def http_time_to_posix(http_time):
|
| + """Convert HTTP time format to posix time.
|
| +
|
| + See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
|
| + for http time format.
|
| +
|
| + Args:
|
| + http_time: time in RFC 2616 format. e.g.
|
| + "Mon, 20 Nov 1995 19:12:08 GMT".
|
| +
|
| + Returns:
|
| + A float of secs from unix epoch.
|
| + """
|
| + if http_time is not None:
|
| + return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
|
| +
|
| +
|
| +def posix_time_to_http(posix_time):
|
| + """Convert posix time to HTML header time format.
|
| +
|
| + Args:
|
| + posix_time: unix time.
|
| +
|
| + Returns:
|
| + A datatime str in RFC 2616 format.
|
| + """
|
| + if posix_time:
|
| + return email_utils.formatdate(posix_time, usegmt=True)
|
| +
|
| +
|
| +_DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
|
| +
|
| +
|
| +def dt_str_to_posix(dt_str):
|
| + """format str to posix.
|
| +
|
| + datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
|
| + e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
|
| + between date and time when they are on the same line.
|
| + Z indicates UTC (zero meridian).
|
| +
|
| + A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
|
| +
|
| + This is used to parse LastModified node from GCS's GET bucket XML response.
|
| +
|
| + Args:
|
| + dt_str: A datetime str.
|
| +
|
| + Returns:
|
| + A float of secs from unix epoch. By posix definition, epoch is midnight
|
| + 1970/1/1 UTC.
|
| + """
|
| + parsable, _ = dt_str.split('.')
|
| + dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
|
| + return calendar.timegm(dt.utctimetuple())
|
| +
|
| +
|
| +def posix_to_dt_str(posix):
|
| + """Reverse of str_to_datetime.
|
| +
|
| + This is used by GCS stub to generate GET bucket XML response.
|
| +
|
| + Args:
|
| + posix: A float of secs from unix epoch.
|
| +
|
| + Returns:
|
| + A datetime str.
|
| + """
|
| + dt = datetime.datetime.utcfromtimestamp(posix)
|
| + dt_str = dt.strftime(_DT_FORMAT)
|
| + return dt_str + '.000Z'
|
| +
|
| +
|
| +def local_run():
|
| + """Whether we should hit GCS dev appserver stub."""
|
| + server_software = os.environ.get('SERVER_SOFTWARE')
|
| + if server_software is None:
|
| + return True
|
| + if 'remote_api' in server_software:
|
| + return False
|
| + if server_software.startswith(('Development', 'testutil')):
|
| + return True
|
| + return False
|
| +
|
| +
|
| +def local_api_url():
|
| + """Return URL for GCS emulation on dev appserver."""
|
| + return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
|
| +
|
| +
|
| +def memory_usage(method):
|
| + """Log memory usage before and after a method."""
|
| + def wrapper(*args, **kwargs):
|
| + logging.info('Memory before method %s is %s.',
|
| + method.__name__, runtime.memory_usage().current())
|
| + result = method(*args, **kwargs)
|
| + logging.info('Memory after method %s is %s',
|
| + method.__name__, runtime.memory_usage().current())
|
| + return result
|
| + return wrapper
|
| +
|
| +
|
| +def _add_ns(tagname):
|
| + return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
|
| + 'tag': tagname}
|
| +
|
| +
|
| +_T_CONTENTS = _add_ns('Contents')
|
| +_T_LAST_MODIFIED = _add_ns('LastModified')
|
| +_T_ETAG = _add_ns('ETag')
|
| +_T_KEY = _add_ns('Key')
|
| +_T_SIZE = _add_ns('Size')
|
| +_T_PREFIX = _add_ns('Prefix')
|
| +_T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
|
| +_T_NEXT_MARKER = _add_ns('NextMarker')
|
| +_T_IS_TRUNCATED = _add_ns('IsTruncated')
|
|
|