Index: third_party/cloud_storage/cloudstorage/common.py |
diff --git a/third_party/cloud_storage/cloudstorage/common.py b/third_party/cloud_storage/cloudstorage/common.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..ab9c8df358ccd29731586525e8da52d0a836ae82 |
--- /dev/null |
+++ b/third_party/cloud_storage/cloudstorage/common.py |
@@ -0,0 +1,429 @@ |
+# Copyright 2012 Google Inc. All Rights Reserved. |
+# |
+# Licensed under the Apache License, Version 2.0 (the "License"); |
+# you may not use this file except in compliance with the License. |
+# You may obtain a copy of the License at |
+# |
+# http://www.apache.org/licenses/LICENSE-2.0 |
+# |
+# Unless required by applicable law or agreed to in writing, |
+# software distributed under the License is distributed on an |
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, |
+# either express or implied. See the License for the specific |
+# language governing permissions and limitations under the License. |
+ |
+"""Helpers shared by cloudstorage_stub and cloudstorage_api.""" |
+ |
+ |
+ |
+ |
+ |
+__all__ = ['CS_XML_NS', |
+ 'CSFileStat', |
+ 'dt_str_to_posix', |
+ 'local_api_url', |
+ 'LOCAL_GCS_ENDPOINT', |
+ 'local_run', |
+ 'get_access_token', |
+ 'get_stored_content_length', |
+ 'get_metadata', |
+ 'GCSFileStat', |
+ 'http_time_to_posix', |
+ 'memory_usage', |
+ 'posix_time_to_http', |
+ 'posix_to_dt_str', |
+ 'set_access_token', |
+ 'validate_options', |
+ 'validate_bucket_name', |
+ 'validate_bucket_path', |
+ 'validate_file_path', |
+ ] |
+ |
+ |
+import calendar |
+import datetime |
+from email import utils as email_utils |
+import logging |
+import os |
+import re |
+ |
+try: |
+ from google.appengine.api import runtime |
+except ImportError: |
+ from google.appengine.api import runtime |
+ |
+ |
+_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' |
+_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') |
+_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') |
+_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') |
+_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') |
+_GCS_METADATA = ['x-goog-meta-', |
+ 'content-disposition', |
+ 'cache-control', |
+ 'content-encoding'] |
+_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] |
+CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' |
+LOCAL_GCS_ENDPOINT = '/_ah/gcs' |
+_access_token = '' |
+ |
+ |
+_MAX_GET_BUCKET_RESULT = 1000 |
+ |
+ |
+def set_access_token(access_token): |
+ """Set the shared access token to authenticate with Google Cloud Storage. |
+ |
+ When set, the library will always attempt to communicate with the |
+ real Google Cloud Storage with this token even when running on dev appserver. |
+ Note the token could expire so it's up to you to renew it. |
+ |
+ When absent, the library will automatically request and refresh a token |
+ on appserver, or when on dev appserver, talk to a Google Cloud Storage |
+ stub. |
+ |
+ Args: |
+ access_token: you can get one by run 'gsutil -d ls' and copy the |
+ str after 'Bearer'. |
+ """ |
+ global _access_token |
+ _access_token = access_token |
+ |
+ |
+def get_access_token(): |
+ """Returns the shared access token.""" |
+ return _access_token |
+ |
+ |
+class GCSFileStat(object): |
+ """Container for GCS file stat.""" |
+ |
+ def __init__(self, |
+ filename, |
+ st_size, |
+ etag, |
+ st_ctime, |
+ content_type=None, |
+ metadata=None, |
+ is_dir=False): |
+ """Initialize. |
+ |
+ For files, the non optional arguments are always set. |
+ For directories, only filename and is_dir is set. |
+ |
+ Args: |
+ filename: a Google Cloud Storage filename of form '/bucket/filename'. |
+ st_size: file size in bytes. long compatible. |
+ etag: hex digest of the md5 hash of the file's content. str. |
+ st_ctime: posix file creation time. float compatible. |
+ content_type: content type. str. |
+ metadata: a str->str dict of user specified options when creating |
+ the file. Possible keys are x-goog-meta-, content-disposition, |
+ content-encoding, and cache-control. |
+ is_dir: True if this represents a directory. False if this is a real file. |
+ """ |
+ self.filename = filename |
+ self.is_dir = is_dir |
+ self.st_size = None |
+ self.st_ctime = None |
+ self.etag = None |
+ self.content_type = content_type |
+ self.metadata = metadata |
+ |
+ if not is_dir: |
+ self.st_size = long(st_size) |
+ self.st_ctime = float(st_ctime) |
+ if etag[0] == '"' and etag[-1] == '"': |
+ etag = etag[1:-1] |
+ self.etag = etag |
+ |
+ def __repr__(self): |
+ if self.is_dir: |
+ return '(directory: %s)' % self.filename |
+ |
+ return ( |
+ '(filename: %(filename)s, st_size: %(st_size)s, ' |
+ 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' |
+ 'content_type: %(content_type)s, ' |
+ 'metadata: %(metadata)s)' % |
+ dict(filename=self.filename, |
+ st_size=self.st_size, |
+ st_ctime=self.st_ctime, |
+ etag=self.etag, |
+ content_type=self.content_type, |
+ metadata=self.metadata)) |
+ |
+ def __cmp__(self, other): |
+ if not isinstance(other, self.__class__): |
+ raise ValueError('Argument to cmp must have the same type. ' |
+ 'Expect %s, got %s', self.__class__.__name__, |
+ other.__class__.__name__) |
+ if self.filename > other.filename: |
+ return 1 |
+ elif self.filename < other.filename: |
+ return -1 |
+ return 0 |
+ |
+ def __hash__(self): |
+ if self.etag: |
+ return hash(self.etag) |
+ return hash(self.filename) |
+ |
+ |
+CSFileStat = GCSFileStat |
+ |
+ |
+def get_stored_content_length(headers): |
+ """Return the content length (in bytes) of the object as stored in GCS. |
+ |
+ x-goog-stored-content-length should always be present except when called via |
+ the local dev_appserver. Therefore if it is not present we default to the |
+ standard content-length header. |
+ |
+ Args: |
+ headers: a dict of headers from the http response. |
+ |
+ Returns: |
+ the stored content length. |
+ """ |
+ length = headers.get('x-goog-stored-content-length') |
+ if length is None: |
+ length = headers.get('content-length') |
+ return length |
+ |
+ |
+def get_metadata(headers): |
+ """Get user defined options from HTTP response headers.""" |
+ return dict((k, v) for k, v in headers.iteritems() |
+ if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) |
+ |
+ |
+def validate_bucket_name(name): |
+ """Validate a Google Storage bucket name. |
+ |
+ Args: |
+ name: a Google Storage bucket name with no prefix or suffix. |
+ |
+ Raises: |
+ ValueError: if name is invalid. |
+ """ |
+ _validate_path(name) |
+ if not _GCS_BUCKET_REGEX.match(name): |
+ raise ValueError('Bucket should be 3-63 characters long using only a-z,' |
+ '0-9, underscore, dash or dot but got %s' % name) |
+ |
+ |
+def validate_bucket_path(path): |
+ """Validate a Google Cloud Storage bucket path. |
+ |
+ Args: |
+ path: a Google Storage bucket path. It should have form '/bucket'. |
+ |
+ Raises: |
+ ValueError: if path is invalid. |
+ """ |
+ _validate_path(path) |
+ if not _GCS_BUCKET_PATH_REGEX.match(path): |
+ raise ValueError('Bucket should have format /bucket ' |
+ 'but got %s' % path) |
+ |
+ |
+def validate_file_path(path): |
+ """Validate a Google Cloud Storage file path. |
+ |
+ Args: |
+ path: a Google Storage file path. It should have form '/bucket/filename'. |
+ |
+ Raises: |
+ ValueError: if path is invalid. |
+ """ |
+ _validate_path(path) |
+ if not _GCS_FULLPATH_REGEX.match(path): |
+ raise ValueError('Path should have format /bucket/filename ' |
+ 'but got %s' % path) |
+ |
+ |
+def _process_path_prefix(path_prefix): |
+ """Validate and process a Google Cloud Stoarge path prefix. |
+ |
+ Args: |
+ path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' |
+ or '/bucket/' or '/bucket'. |
+ |
+ Raises: |
+ ValueError: if path is invalid. |
+ |
+ Returns: |
+ a tuple of /bucket and prefix. prefix can be None. |
+ """ |
+ _validate_path(path_prefix) |
+ if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): |
+ raise ValueError('Path prefix should have format /bucket, /bucket/, ' |
+ 'or /bucket/prefix but got %s.' % path_prefix) |
+ bucket_name_end = path_prefix.find('/', 1) |
+ bucket = path_prefix |
+ prefix = None |
+ if bucket_name_end != -1: |
+ bucket = path_prefix[:bucket_name_end] |
+ prefix = path_prefix[bucket_name_end + 1:] or None |
+ return bucket, prefix |
+ |
+ |
+def _validate_path(path): |
+ """Basic validation of Google Storage paths. |
+ |
+ Args: |
+ path: a Google Storage path. It should have form '/bucket/filename' |
+ or '/bucket'. |
+ |
+ Raises: |
+ ValueError: if path is invalid. |
+ TypeError: if path is not of type basestring. |
+ """ |
+ if not path: |
+ raise ValueError('Path is empty') |
+ if not isinstance(path, basestring): |
+ raise TypeError('Path should be a string but is %s (%s).' % |
+ (path.__class__, path)) |
+ |
+ |
+def validate_options(options): |
+ """Validate Google Cloud Storage options. |
+ |
+ Args: |
+ options: a str->basestring dict of options to pass to Google Cloud Storage. |
+ |
+ Raises: |
+ ValueError: if option is not supported. |
+ TypeError: if option is not of type str or value of an option |
+ is not of type basestring. |
+ """ |
+ if not options: |
+ return |
+ |
+ for k, v in options.iteritems(): |
+ if not isinstance(k, str): |
+ raise TypeError('option %r should be a str.' % k) |
+ if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): |
+ raise ValueError('option %s is not supported.' % k) |
+ if not isinstance(v, basestring): |
+ raise TypeError('value %r for option %s should be of type basestring.' % |
+ (v, k)) |
+ |
+ |
+def http_time_to_posix(http_time): |
+ """Convert HTTP time format to posix time. |
+ |
+ See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 |
+ for http time format. |
+ |
+ Args: |
+ http_time: time in RFC 2616 format. e.g. |
+ "Mon, 20 Nov 1995 19:12:08 GMT". |
+ |
+ Returns: |
+ A float of secs from unix epoch. |
+ """ |
+ if http_time is not None: |
+ return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) |
+ |
+ |
+def posix_time_to_http(posix_time): |
+ """Convert posix time to HTML header time format. |
+ |
+ Args: |
+ posix_time: unix time. |
+ |
+ Returns: |
+ A datatime str in RFC 2616 format. |
+ """ |
+ if posix_time: |
+ return email_utils.formatdate(posix_time, usegmt=True) |
+ |
+ |
+_DT_FORMAT = '%Y-%m-%dT%H:%M:%S' |
+ |
+ |
+def dt_str_to_posix(dt_str): |
+ """format str to posix. |
+ |
+ datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, |
+ e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator |
+ between date and time when they are on the same line. |
+ Z indicates UTC (zero meridian). |
+ |
+ A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html |
+ |
+ This is used to parse LastModified node from GCS's GET bucket XML response. |
+ |
+ Args: |
+ dt_str: A datetime str. |
+ |
+ Returns: |
+ A float of secs from unix epoch. By posix definition, epoch is midnight |
+ 1970/1/1 UTC. |
+ """ |
+ parsable, _ = dt_str.split('.') |
+ dt = datetime.datetime.strptime(parsable, _DT_FORMAT) |
+ return calendar.timegm(dt.utctimetuple()) |
+ |
+ |
+def posix_to_dt_str(posix): |
+ """Reverse of str_to_datetime. |
+ |
+ This is used by GCS stub to generate GET bucket XML response. |
+ |
+ Args: |
+ posix: A float of secs from unix epoch. |
+ |
+ Returns: |
+ A datetime str. |
+ """ |
+ dt = datetime.datetime.utcfromtimestamp(posix) |
+ dt_str = dt.strftime(_DT_FORMAT) |
+ return dt_str + '.000Z' |
+ |
+ |
+def local_run(): |
+ """Whether we should hit GCS dev appserver stub.""" |
+ server_software = os.environ.get('SERVER_SOFTWARE') |
+ if server_software is None: |
+ return True |
+ if 'remote_api' in server_software: |
+ return False |
+ if server_software.startswith(('Development', 'testutil')): |
+ return True |
+ return False |
+ |
+ |
+def local_api_url(): |
+ """Return URL for GCS emulation on dev appserver.""" |
+ return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) |
+ |
+ |
+def memory_usage(method): |
+ """Log memory usage before and after a method.""" |
+ def wrapper(*args, **kwargs): |
+ logging.info('Memory before method %s is %s.', |
+ method.__name__, runtime.memory_usage().current()) |
+ result = method(*args, **kwargs) |
+ logging.info('Memory after method %s is %s', |
+ method.__name__, runtime.memory_usage().current()) |
+ return result |
+ return wrapper |
+ |
+ |
+def _add_ns(tagname): |
+ return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, |
+ 'tag': tagname} |
+ |
+ |
+_T_CONTENTS = _add_ns('Contents') |
+_T_LAST_MODIFIED = _add_ns('LastModified') |
+_T_ETAG = _add_ns('ETag') |
+_T_KEY = _add_ns('Key') |
+_T_SIZE = _add_ns('Size') |
+_T_PREFIX = _add_ns('Prefix') |
+_T_COMMON_PREFIXES = _add_ns('CommonPrefixes') |
+_T_NEXT_MARKER = _add_ns('NextMarker') |
+_T_IS_TRUNCATED = _add_ns('IsTruncated') |