Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1287)

Unified Diff: third_party/cloud_storage/cloudstorage/cloudstorage_api.py

Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library (Closed) Base URL: https://github.com/dart-lang/pub-dartlang.git@master
Patch Set: Add deprecation comment to old cloud_storage.py:open() function Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/cloud_storage/cloudstorage/cloudstorage_api.py
diff --git a/third_party/cloud_storage/cloudstorage/cloudstorage_api.py b/third_party/cloud_storage/cloudstorage/cloudstorage_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba8be862fb5210e65d1b0525069679f58d5bfb12
--- /dev/null
+++ b/third_party/cloud_storage/cloudstorage/cloudstorage_api.py
@@ -0,0 +1,451 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+# either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+
+"""File Interface for Google Cloud Storage."""
+
+
+
+from __future__ import with_statement
+
+
+
+__all__ = ['delete',
+ 'listbucket',
+ 'open',
+ 'stat',
+ ]
+
+import logging
+import StringIO
+import urllib
+import xml.etree.cElementTree as ET
+from . import api_utils
+from . import common
+from . import errors
+from . import storage_api
+
+
+
+def open(filename,
+ mode='r',
+ content_type=None,
+ options=None,
+ read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
+ retry_params=None,
+ _account_id=None):
+ """Opens a Google Cloud Storage file and returns it as a File-like object.
+
+ Args:
+ filename: A Google Cloud Storage filename of form '/bucket/filename'.
+ mode: 'r' for reading mode. 'w' for writing mode.
+ In reading mode, the file must exist. In writing mode, a file will
+ be created or be overrode.
+ content_type: The MIME type of the file. str. Only valid in writing mode.
+ options: A str->basestring dict to specify additional headers to pass to
+ GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
+ Supported options are x-goog-acl, x-goog-meta-, cache-control,
+ content-disposition, and content-encoding.
+ Only valid in writing mode.
+ See https://developers.google.com/storage/docs/reference-headers
+ for details.
+ read_buffer_size: The buffer size for read. Read keeps a buffer
+ and prefetches another one. To minimize blocking for large files,
+ always read by buffer size. To minimize number of RPC requests for
+ small files, set a large buffer size. Max is 30MB.
+ retry_params: An instance of api_utils.RetryParams for subsequent calls
+ to GCS from this file handle. If None, the default one is used.
+ _account_id: Internal-use only.
+
+ Returns:
+ A reading or writing buffer that supports File-like interface. Buffer
+ must be closed after operations are done.
+
+ Raises:
+ errors.AuthorizationError: if authorization failed.
+ errors.NotFoundError: if an object that's expected to exist doesn't.
+ ValueError: invalid open mode or if content_type or options are specified
+ in reading mode.
+ """
+ common.validate_file_path(filename)
+ api = storage_api._get_storage_api(retry_params=retry_params,
+ account_id=_account_id)
+ filename = api_utils._quote_filename(filename)
+
+ if mode == 'w':
+ common.validate_options(options)
+ return storage_api.StreamingBuffer(api, filename, content_type, options)
+ elif mode == 'r':
+ if content_type or options:
+ raise ValueError('Options and content_type can only be specified '
+ 'for writing mode.')
+ return storage_api.ReadBuffer(api,
+ filename,
+ buffer_size=read_buffer_size)
+ else:
+ raise ValueError('Invalid mode %s.' % mode)
+
+
+def delete(filename, retry_params=None, _account_id=None):
+ """Delete a Google Cloud Storage file.
+
+ Args:
+ filename: A Google Cloud Storage filename of form '/bucket/filename'.
+ retry_params: An api_utils.RetryParams for this call to GCS. If None,
+ the default one is used.
+ _account_id: Internal-use only.
+
+ Raises:
+ errors.NotFoundError: if the file doesn't exist prior to deletion.
+ """
+ api = storage_api._get_storage_api(retry_params=retry_params,
+ account_id=_account_id)
+ common.validate_file_path(filename)
+ filename = api_utils._quote_filename(filename)
+ status, resp_headers, content = api.delete_object(filename)
+ errors.check_status(status, [204], filename, resp_headers=resp_headers,
+ body=content)
+
+
+def stat(filename, retry_params=None, _account_id=None):
+ """Get GCSFileStat of a Google Cloud storage file.
+
+ Args:
+ filename: A Google Cloud Storage filename of form '/bucket/filename'.
+ retry_params: An api_utils.RetryParams for this call to GCS. If None,
+ the default one is used.
+ _account_id: Internal-use only.
+
+ Returns:
+ a GCSFileStat object containing info about this file.
+
+ Raises:
+ errors.AuthorizationError: if authorization failed.
+ errors.NotFoundError: if an object that's expected to exist doesn't.
+ """
+ common.validate_file_path(filename)
+ api = storage_api._get_storage_api(retry_params=retry_params,
+ account_id=_account_id)
+ status, headers, content = api.head_object(
+ api_utils._quote_filename(filename))
+ errors.check_status(status, [200], filename, resp_headers=headers,
+ body=content)
+ file_stat = common.GCSFileStat(
+ filename=filename,
+ st_size=common.get_stored_content_length(headers),
+ st_ctime=common.http_time_to_posix(headers.get('last-modified')),
+ etag=headers.get('etag'),
+ content_type=headers.get('content-type'),
+ metadata=common.get_metadata(headers))
+
+ return file_stat
+
+
+def _copy2(src, dst, metadata=None, retry_params=None):
+ """Copy the file content from src to dst.
+
+ Internal use only!
+
+ Args:
+ src: /bucket/filename
+ dst: /bucket/filename
+ metadata: a dict of metadata for this copy. If None, old metadata is copied.
+ For example, {'x-goog-meta-foo': 'bar'}.
+ retry_params: An api_utils.RetryParams for this call to GCS. If None,
+ the default one is used.
+
+ Raises:
+ errors.AuthorizationError: if authorization failed.
+ errors.NotFoundError: if an object that's expected to exist doesn't.
+ """
+ common.validate_file_path(src)
+ common.validate_file_path(dst)
+
+ if metadata is None:
+ metadata = {}
+ copy_meta = 'COPY'
+ else:
+ copy_meta = 'REPLACE'
+ metadata.update({'x-goog-copy-source': src,
+ 'x-goog-metadata-directive': copy_meta})
+
+ api = storage_api._get_storage_api(retry_params=retry_params)
+ status, resp_headers, content = api.put_object(
+ api_utils._quote_filename(dst), headers=metadata)
+ errors.check_status(status, [200], src, metadata, resp_headers, body=content)
+
+
+def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,
+ delimiter=None, retry_params=None, _account_id=None):
+ """Returns a GCSFileStat iterator over a bucket.
+
+ Optional arguments can limit the result to a subset of files under bucket.
+
+ This function has two modes:
+ 1. List bucket mode: Lists all files in the bucket without any concept of
+ hierarchy. GCS doesn't have real directory hierarchies.
+ 2. Directory emulation mode: If you specify the 'delimiter' argument,
+ it is used as a path separator to emulate a hierarchy of directories.
+ In this mode, the "path_prefix" argument should end in the delimiter
+ specified (thus designates a logical directory). The logical directory's
+ contents, both files and subdirectories, are listed. The names of
+ subdirectories returned will end with the delimiter. So listbucket
+ can be called with the subdirectory name to list the subdirectory's
+ contents.
+
+ Args:
+ path_prefix: A Google Cloud Storage path of format "/bucket" or
+ "/bucket/prefix". Only objects whose fullpath starts with the
+ path_prefix will be returned.
+ marker: Another path prefix. Only objects whose fullpath starts
+ lexicographically after marker will be returned (exclusive).
+ prefix: Deprecated. Use path_prefix.
+ max_keys: The limit on the number of objects to return. int.
+ For best performance, specify max_keys only if you know how many objects
+ you want. Otherwise, this method requests large batches and handles
+ pagination for you.
+ delimiter: Use to turn on directory mode. str of one or multiple chars
+ that your bucket uses as its directory separator.
+ retry_params: An api_utils.RetryParams for this call to GCS. If None,
+ the default one is used.
+ _account_id: Internal-use only.
+
+ Examples:
+ For files "/bucket/a",
+ "/bucket/bar/1"
+ "/bucket/foo",
+ "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",
+
+ Regular mode:
+ listbucket("/bucket/f", marker="/bucket/foo/1")
+ will match "/bucket/foo/2/1", "/bucket/foo/3/1".
+
+ Directory mode:
+ listbucket("/bucket/", delimiter="/")
+ will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".
+ listbucket("/bucket/foo/", delimiter="/")
+ will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"
+
+ Returns:
+ Regular mode:
+ A GCSFileStat iterator over matched files ordered by filename.
+ The iterator returns GCSFileStat objects. filename, etag, st_size,
+ st_ctime, and is_dir are set.
+
+ Directory emulation mode:
+ A GCSFileStat iterator over matched files and directories ordered by
+ name. The iterator returns GCSFileStat objects. For directories,
+ only the filename and is_dir fields are set.
+
+ The last name yielded can be used as next call's marker.
+ """
+ if prefix:
+ common.validate_bucket_path(path_prefix)
+ bucket = path_prefix
+ else:
+ bucket, prefix = common._process_path_prefix(path_prefix)
+
+ if marker and marker.startswith(bucket):
+ marker = marker[len(bucket) + 1:]
+
+ api = storage_api._get_storage_api(retry_params=retry_params,
+ account_id=_account_id)
+ options = {}
+ if marker:
+ options['marker'] = marker
+ if max_keys:
+ options['max-keys'] = max_keys
+ if prefix:
+ options['prefix'] = prefix
+ if delimiter:
+ options['delimiter'] = delimiter
+
+ return _Bucket(api, bucket, options)
+
+
+class _Bucket(object):
+ """A wrapper for a GCS bucket as the return value of listbucket."""
+
+ def __init__(self, api, path, options):
+ """Initialize.
+
+ Args:
+ api: storage_api instance.
+ path: bucket path of form '/bucket'.
+ options: a dict of listbucket options. Please see listbucket doc.
+ """
+ self._init(api, path, options)
+
+ def _init(self, api, path, options):
+ self._api = api
+ self._path = path
+ self._options = options.copy()
+ self._get_bucket_fut = self._api.get_bucket_async(
+ self._path + '?' + urllib.urlencode(self._options))
+ self._last_yield = None
+ self._new_max_keys = self._options.get('max-keys')
+
+ def __getstate__(self):
+ options = self._options
+ if self._last_yield:
+ options['marker'] = self._last_yield.filename[len(self._path) + 1:]
+ if self._new_max_keys is not None:
+ options['max-keys'] = self._new_max_keys
+ return {'api': self._api,
+ 'path': self._path,
+ 'options': options}
+
+ def __setstate__(self, state):
+ self._init(state['api'], state['path'], state['options'])
+
+ def __iter__(self):
+ """Iter over the bucket.
+
+ Yields:
+ GCSFileStat: a GCSFileStat for an object in the bucket.
+ They are ordered by GCSFileStat.filename.
+ """
+ total = 0
+ max_keys = self._options.get('max-keys')
+
+ while self._get_bucket_fut:
+ status, resp_headers, content = self._get_bucket_fut.get_result()
+ errors.check_status(status, [200], self._path, resp_headers=resp_headers,
+ body=content, extras=self._options)
+
+ if self._should_get_another_batch(content):
+ self._get_bucket_fut = self._api.get_bucket_async(
+ self._path + '?' + urllib.urlencode(self._options))
+ else:
+ self._get_bucket_fut = None
+
+ root = ET.fromstring(content)
+ dirs = self._next_dir_gen(root)
+ files = self._next_file_gen(root)
+ next_file = files.next()
+ next_dir = dirs.next()
+
+ while ((max_keys is None or total < max_keys) and
+ not (next_file is None and next_dir is None)):
+ total += 1
+ if next_file is None:
+ self._last_yield = next_dir
+ next_dir = dirs.next()
+ elif next_dir is None:
+ self._last_yield = next_file
+ next_file = files.next()
+ elif next_dir < next_file:
+ self._last_yield = next_dir
+ next_dir = dirs.next()
+ elif next_file < next_dir:
+ self._last_yield = next_file
+ next_file = files.next()
+ else:
+ logging.error(
+ 'Should never reach. next file is %r. next dir is %r.',
+ next_file, next_dir)
+ if self._new_max_keys:
+ self._new_max_keys -= 1
+ yield self._last_yield
+
+ def _next_file_gen(self, root):
+ """Generator for next file element in the document.
+
+ Args:
+ root: root element of the XML tree.
+
+ Yields:
+ GCSFileStat for the next file.
+ """
+ for e in root.getiterator(common._T_CONTENTS):
+ st_ctime, size, etag, key = None, None, None, None
+ for child in e.getiterator('*'):
+ if child.tag == common._T_LAST_MODIFIED:
+ st_ctime = common.dt_str_to_posix(child.text)
+ elif child.tag == common._T_ETAG:
+ etag = child.text
+ elif child.tag == common._T_SIZE:
+ size = child.text
+ elif child.tag == common._T_KEY:
+ key = child.text
+ yield common.GCSFileStat(self._path + '/' + key,
+ size, etag, st_ctime)
+ e.clear()
+ yield None
+
+ def _next_dir_gen(self, root):
+ """Generator for next directory element in the document.
+
+ Args:
+ root: root element in the XML tree.
+
+ Yields:
+ GCSFileStat for the next directory.
+ """
+ for e in root.getiterator(common._T_COMMON_PREFIXES):
+ yield common.GCSFileStat(
+ self._path + '/' + e.find(common._T_PREFIX).text,
+ st_size=None, etag=None, st_ctime=None, is_dir=True)
+ e.clear()
+ yield None
+
+ def _should_get_another_batch(self, content):
+ """Whether to issue another GET bucket call.
+
+ Args:
+ content: response XML.
+
+ Returns:
+ True if should, also update self._options for the next request.
+ False otherwise.
+ """
+ if ('max-keys' in self._options and
+ self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT):
+ return False
+
+ elements = self._find_elements(
+ content, set([common._T_IS_TRUNCATED,
+ common._T_NEXT_MARKER]))
+ if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true':
+ return False
+
+ next_marker = elements.get(common._T_NEXT_MARKER)
+ if next_marker is None:
+ self._options.pop('marker', None)
+ return False
+ self._options['marker'] = next_marker
+ return True
+
+ def _find_elements(self, result, elements):
+ """Find interesting elements from XML.
+
+ This function tries to only look for specified elements
+ without parsing the entire XML. The specified elements is better
+ located near the beginning.
+
+ Args:
+ result: response XML.
+ elements: a set of interesting element tags.
+
+ Returns:
+ A dict from element tag to element value.
+ """
+ element_mapping = {}
+ result = StringIO.StringIO(result)
+ for _, e in ET.iterparse(result, events=('end',)):
+ if not elements:
+ break
+ if e.tag in elements:
+ element_mapping[e.tag] = e.text
+ elements.remove(e.tag)
+ return element_mapping
« no previous file with comments | « third_party/cloud_storage/cloudstorage/api_utils.py ('k') | third_party/cloud_storage/cloudstorage/common.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698