third_party/cloud_storage/cloudstorage/cloudstorage_api.py - Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library

Unified Diff: third_party/cloud_storage/cloudstorage/cloudstorage_api.py

Issue 1031663002: Increase maximum file upload to 100MB, use cloudstorage python library (Closed) Base URL: https://github.com/dart-lang/pub-dartlang.git@master

Patch Set: Add deprecation comment to old cloud_storage.py:open() function Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/cloud_storage/cloudstorage/cloudstorage_api.py

diff --git a/third_party/cloud_storage/cloudstorage/cloudstorage_api.py b/third_party/cloud_storage/cloudstorage/cloudstorage_api.py

new file mode 100644

index 0000000000000000000000000000000000000000..ba8be862fb5210e65d1b0525069679f58d5bfb12

--- /dev/null

+++ b/third_party/cloud_storage/cloudstorage/cloudstorage_api.py

@@ -0,0 +1,451 @@

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+# http://www.apache.org/licenses/LICENSE-2.0

+# Unless required by applicable law or agreed to in writing,

+# software distributed under the License is distributed on an

+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,

+# either express or implied. See the License for the specific

+# language governing permissions and limitations under the License.

+"""File Interface for Google Cloud Storage."""

+from __future__ import with_statement

+__all__ = ['delete',

+ 'listbucket',

+ 'open',

+ 'stat',

+ ]

+import logging

+import StringIO

+import urllib

+import xml.etree.cElementTree as ET

+from . import api_utils

+from . import common

+from . import errors

+from . import storage_api

+def open(filename,

+ mode='r',

+ content_type=None,

+ options=None,

+ read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,

+ retry_params=None,

+ _account_id=None):

+ """Opens a Google Cloud Storage file and returns it as a File-like object.

+ Args:

+ filename: A Google Cloud Storage filename of form '/bucket/filename'.

+ mode: 'r' for reading mode. 'w' for writing mode.

+ In reading mode, the file must exist. In writing mode, a file will

+ be created or be overrode.

+ content_type: The MIME type of the file. str. Only valid in writing mode.

+ options: A str->basestring dict to specify additional headers to pass to

+ GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.

+ Supported options are x-goog-acl, x-goog-meta-, cache-control,

+ content-disposition, and content-encoding.

+ Only valid in writing mode.

+ See https://developers.google.com/storage/docs/reference-headers

+ for details.

+ read_buffer_size: The buffer size for read. Read keeps a buffer

+ and prefetches another one. To minimize blocking for large files,

+ always read by buffer size. To minimize number of RPC requests for

+ small files, set a large buffer size. Max is 30MB.

+ retry_params: An instance of api_utils.RetryParams for subsequent calls

+ to GCS from this file handle. If None, the default one is used.

+ _account_id: Internal-use only.

+ Returns:

+ A reading or writing buffer that supports File-like interface. Buffer

+ must be closed after operations are done.

+ Raises:

+ errors.AuthorizationError: if authorization failed.

+ errors.NotFoundError: if an object that's expected to exist doesn't.

+ ValueError: invalid open mode or if content_type or options are specified

+ in reading mode.

+ """

+ common.validate_file_path(filename)

+ api = storage_api._get_storage_api(retry_params=retry_params,

+ account_id=_account_id)

+ filename = api_utils._quote_filename(filename)

+ if mode == 'w':

+ common.validate_options(options)

+ return storage_api.StreamingBuffer(api, filename, content_type, options)

+ elif mode == 'r':

+ if content_type or options:

+ raise ValueError('Options and content_type can only be specified '

+ 'for writing mode.')

+ return storage_api.ReadBuffer(api,

+ filename,

+ buffer_size=read_buffer_size)

+ else:

+ raise ValueError('Invalid mode %s.' % mode)

+def delete(filename, retry_params=None, _account_id=None):

+ """Delete a Google Cloud Storage file.

+ Args:

+ filename: A Google Cloud Storage filename of form '/bucket/filename'.

+ retry_params: An api_utils.RetryParams for this call to GCS. If None,

+ the default one is used.

+ _account_id: Internal-use only.

+ Raises:

+ errors.NotFoundError: if the file doesn't exist prior to deletion.

+ """

+ api = storage_api._get_storage_api(retry_params=retry_params,

+ account_id=_account_id)

+ common.validate_file_path(filename)

+ filename = api_utils._quote_filename(filename)

+ status, resp_headers, content = api.delete_object(filename)

+ errors.check_status(status, [204], filename, resp_headers=resp_headers,

+ body=content)

+def stat(filename, retry_params=None, _account_id=None):

+ """Get GCSFileStat of a Google Cloud storage file.

+ Args:

+ filename: A Google Cloud Storage filename of form '/bucket/filename'.

+ retry_params: An api_utils.RetryParams for this call to GCS. If None,

+ the default one is used.

+ _account_id: Internal-use only.

+ Returns:

+ a GCSFileStat object containing info about this file.

+ Raises:

+ errors.AuthorizationError: if authorization failed.

+ errors.NotFoundError: if an object that's expected to exist doesn't.

+ """

+ common.validate_file_path(filename)

+ api = storage_api._get_storage_api(retry_params=retry_params,

+ account_id=_account_id)

+ status, headers, content = api.head_object(

+ api_utils._quote_filename(filename))

+ errors.check_status(status, [200], filename, resp_headers=headers,

+ body=content)

+ file_stat = common.GCSFileStat(

+ filename=filename,

+ st_size=common.get_stored_content_length(headers),

+ st_ctime=common.http_time_to_posix(headers.get('last-modified')),

+ etag=headers.get('etag'),

+ content_type=headers.get('content-type'),

+ metadata=common.get_metadata(headers))

+ return file_stat

+def _copy2(src, dst, metadata=None, retry_params=None):

+ """Copy the file content from src to dst.

+ Internal use only!

+ Args:

+ src: /bucket/filename

+ dst: /bucket/filename

+ metadata: a dict of metadata for this copy. If None, old metadata is copied.

+ For example, {'x-goog-meta-foo': 'bar'}.

+ retry_params: An api_utils.RetryParams for this call to GCS. If None,

+ the default one is used.

+ Raises:

+ errors.AuthorizationError: if authorization failed.

+ errors.NotFoundError: if an object that's expected to exist doesn't.

+ """

+ common.validate_file_path(src)

+ common.validate_file_path(dst)

+ if metadata is None:

+ metadata = {}

+ copy_meta = 'COPY'

+ else:

+ copy_meta = 'REPLACE'

+ metadata.update({'x-goog-copy-source': src,

+ 'x-goog-metadata-directive': copy_meta})

+ api = storage_api._get_storage_api(retry_params=retry_params)

+ status, resp_headers, content = api.put_object(

+ api_utils._quote_filename(dst), headers=metadata)

+ errors.check_status(status, [200], src, metadata, resp_headers, body=content)

+def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,

+ delimiter=None, retry_params=None, _account_id=None):

+ """Returns a GCSFileStat iterator over a bucket.

+ Optional arguments can limit the result to a subset of files under bucket.

+ This function has two modes:

+ 1. List bucket mode: Lists all files in the bucket without any concept of

+ hierarchy. GCS doesn't have real directory hierarchies.

+ 2. Directory emulation mode: If you specify the 'delimiter' argument,

+ it is used as a path separator to emulate a hierarchy of directories.

+ In this mode, the "path_prefix" argument should end in the delimiter

+ specified (thus designates a logical directory). The logical directory's

+ contents, both files and subdirectories, are listed. The names of

+ subdirectories returned will end with the delimiter. So listbucket

+ can be called with the subdirectory name to list the subdirectory's

+ contents.

+ Args:

+ path_prefix: A Google Cloud Storage path of format "/bucket" or

+ "/bucket/prefix". Only objects whose fullpath starts with the

+ path_prefix will be returned.

+ marker: Another path prefix. Only objects whose fullpath starts

+ lexicographically after marker will be returned (exclusive).

+ prefix: Deprecated. Use path_prefix.

+ max_keys: The limit on the number of objects to return. int.

+ For best performance, specify max_keys only if you know how many objects

+ you want. Otherwise, this method requests large batches and handles

+ pagination for you.

+ delimiter: Use to turn on directory mode. str of one or multiple chars

+ that your bucket uses as its directory separator.

+ retry_params: An api_utils.RetryParams for this call to GCS. If None,

+ the default one is used.

+ _account_id: Internal-use only.

+ Examples:

+ For files "/bucket/a",

+ "/bucket/bar/1"

+ "/bucket/foo",

+ "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",

+ Regular mode:

+ listbucket("/bucket/f", marker="/bucket/foo/1")

+ will match "/bucket/foo/2/1", "/bucket/foo/3/1".

+ Directory mode:

+ listbucket("/bucket/", delimiter="/")

+ will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".

+ listbucket("/bucket/foo/", delimiter="/")

+ will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"

+ Returns:

+ Regular mode:

+ A GCSFileStat iterator over matched files ordered by filename.

+ The iterator returns GCSFileStat objects. filename, etag, st_size,

+ st_ctime, and is_dir are set.

+ Directory emulation mode:

+ A GCSFileStat iterator over matched files and directories ordered by

+ name. The iterator returns GCSFileStat objects. For directories,

+ only the filename and is_dir fields are set.

+ The last name yielded can be used as next call's marker.

+ """

+ if prefix:

+ common.validate_bucket_path(path_prefix)

+ bucket = path_prefix

+ else:

+ bucket, prefix = common._process_path_prefix(path_prefix)

+ if marker and marker.startswith(bucket):

+ marker = marker[len(bucket) + 1:]

+ api = storage_api._get_storage_api(retry_params=retry_params,

+ account_id=_account_id)

+ options = {}

+ if marker:

+ options['marker'] = marker

+ if max_keys:

+ options['max-keys'] = max_keys

+ if prefix:

+ options['prefix'] = prefix

+ if delimiter:

+ options['delimiter'] = delimiter

+ return _Bucket(api, bucket, options)

+class _Bucket(object):

+ """A wrapper for a GCS bucket as the return value of listbucket."""

+ def __init__(self, api, path, options):

+ """Initialize.

+ Args:

+ api: storage_api instance.

+ path: bucket path of form '/bucket'.

+ options: a dict of listbucket options. Please see listbucket doc.

+ """

+ self._init(api, path, options)

+ def _init(self, api, path, options):

+ self._api = api

+ self._path = path

+ self._options = options.copy()

+ self._get_bucket_fut = self._api.get_bucket_async(

+ self._path + '?' + urllib.urlencode(self._options))

+ self._last_yield = None

+ self._new_max_keys = self._options.get('max-keys')

+ def __getstate__(self):

+ options = self._options

+ if self._last_yield:

+ options['marker'] = self._last_yield.filename[len(self._path) + 1:]

+ if self._new_max_keys is not None:

+ options['max-keys'] = self._new_max_keys

+ return {'api': self._api,

+ 'path': self._path,

+ 'options': options}

+ def __setstate__(self, state):

+ self._init(state['api'], state['path'], state['options'])

+ def __iter__(self):

+ """Iter over the bucket.

+ Yields:

+ GCSFileStat: a GCSFileStat for an object in the bucket.

+ They are ordered by GCSFileStat.filename.

+ """

+ total = 0

+ max_keys = self._options.get('max-keys')

+ while self._get_bucket_fut:

+ status, resp_headers, content = self._get_bucket_fut.get_result()

+ errors.check_status(status, [200], self._path, resp_headers=resp_headers,

+ body=content, extras=self._options)

+ if self._should_get_another_batch(content):

+ self._get_bucket_fut = self._api.get_bucket_async(

+ self._path + '?' + urllib.urlencode(self._options))

+ else:

+ self._get_bucket_fut = None

+ root = ET.fromstring(content)

+ dirs = self._next_dir_gen(root)

+ files = self._next_file_gen(root)

+ next_file = files.next()

+ next_dir = dirs.next()

+ while ((max_keys is None or total < max_keys) and

+ not (next_file is None and next_dir is None)):

+ total += 1

+ if next_file is None:

+ self._last_yield = next_dir

+ next_dir = dirs.next()

+ elif next_dir is None:

+ self._last_yield = next_file

+ next_file = files.next()

+ elif next_dir < next_file:

+ self._last_yield = next_dir

+ next_dir = dirs.next()

+ elif next_file < next_dir:

+ self._last_yield = next_file

+ next_file = files.next()

+ else:

+ logging.error(

+ 'Should never reach. next file is %r. next dir is %r.',

+ next_file, next_dir)

+ if self._new_max_keys:

+ self._new_max_keys -= 1

+ yield self._last_yield

+ def _next_file_gen(self, root):

+ """Generator for next file element in the document.

+ Args:

+ root: root element of the XML tree.

+ Yields:

+ GCSFileStat for the next file.

+ """

+ for e in root.getiterator(common._T_CONTENTS):

+ st_ctime, size, etag, key = None, None, None, None

+ for child in e.getiterator('*'):

+ if child.tag == common._T_LAST_MODIFIED:

+ st_ctime = common.dt_str_to_posix(child.text)

+ elif child.tag == common._T_ETAG:

+ etag = child.text

+ elif child.tag == common._T_SIZE:

+ size = child.text

+ elif child.tag == common._T_KEY:

+ key = child.text

+ yield common.GCSFileStat(self._path + '/' + key,

+ size, etag, st_ctime)

+ e.clear()

+ yield None

+ def _next_dir_gen(self, root):

+ """Generator for next directory element in the document.

+ Args:

+ root: root element in the XML tree.

+ Yields:

+ GCSFileStat for the next directory.

+ """

+ for e in root.getiterator(common._T_COMMON_PREFIXES):

+ yield common.GCSFileStat(

+ self._path + '/' + e.find(common._T_PREFIX).text,

+ st_size=None, etag=None, st_ctime=None, is_dir=True)

+ e.clear()

+ yield None

+ def _should_get_another_batch(self, content):

+ """Whether to issue another GET bucket call.

+ Args:

+ content: response XML.

+ Returns:

+ True if should, also update self._options for the next request.

+ False otherwise.

+ """

+ if ('max-keys' in self._options and

+ self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT):

+ return False

+ elements = self._find_elements(

+ content, set([common._T_IS_TRUNCATED,

+ common._T_NEXT_MARKER]))

+ if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true':

+ return False

+ next_marker = elements.get(common._T_NEXT_MARKER)

+ if next_marker is None:

+ self._options.pop('marker', None)

+ return False

+ self._options['marker'] = next_marker

+ return True

+ def _find_elements(self, result, elements):

+ """Find interesting elements from XML.

+ This function tries to only look for specified elements

+ without parsing the entire XML. The specified elements is better

+ located near the beginning.

+ Args:

+ result: response XML.

+ elements: a set of interesting element tags.

+ Returns:

+ A dict from element tag to element value.

+ """

+ element_mapping = {}

+ result = StringIO.StringIO(result)

+ for _, e in ET.iterparse(result, events=('end',)):

+ if not elements:

+ break

+ if e.tag in elements:

+ element_mapping[e.tag] = e.text

+ elements.remove(e.tag)

+ return element_mapping

« no previous file with comments | « third_party/cloud_storage/cloudstorage/api_utils.py ('k') | third_party/cloud_storage/cloudstorage/common.py » ('j') | no next file with comments »