| Index: third_party/gsutil/gslib/commands/setmeta.py
 | 
| diff --git a/third_party/gsutil/gslib/commands/setmeta.py b/third_party/gsutil/gslib/commands/setmeta.py
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..9c39d75befe8f519eb28a0abff26fe9d687523da
 | 
| --- /dev/null
 | 
| +++ b/third_party/gsutil/gslib/commands/setmeta.py
 | 
| @@ -0,0 +1,420 @@
 | 
| +# Copyright 2012 Google Inc. All Rights Reserved.
 | 
| +#coding=utf8
 | 
| +#
 | 
| +# Licensed under the Apache License, Version 2.0 (the "License");
 | 
| +# you may not use this file except in compliance with the License.
 | 
| +# You may obtain a copy of the License at
 | 
| +#
 | 
| +#     http://www.apache.org/licenses/LICENSE-2.0
 | 
| +#
 | 
| +# Unless required by applicable law or agreed to in writing, software
 | 
| +# distributed under the License is distributed on an "AS IS" BASIS,
 | 
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
| +# See the License for the specific language governing permissions and
 | 
| +# limitations under the License.
 | 
| +
 | 
| +import boto
 | 
| +import csv
 | 
| +import random
 | 
| +import StringIO
 | 
| +import time
 | 
| +
 | 
| +from boto.exception import GSResponseError
 | 
| +from boto.s3.key import Key
 | 
| +from gslib.command import COMMAND_NAME
 | 
| +from gslib.command import COMMAND_NAME_ALIASES
 | 
| +from gslib.command import CONFIG_REQUIRED
 | 
| +from gslib.command import Command
 | 
| +from gslib.command import FILE_URIS_OK
 | 
| +from gslib.command import MAX_ARGS
 | 
| +from gslib.command import MIN_ARGS
 | 
| +from gslib.command import PROVIDER_URIS_OK
 | 
| +from gslib.command import SUPPORTED_SUB_ARGS
 | 
| +from gslib.command import URIS_START_ARG
 | 
| +from gslib.exception import CommandException
 | 
| +from gslib.help_provider import HELP_NAME
 | 
| +from gslib.help_provider import HELP_NAME_ALIASES
 | 
| +from gslib.help_provider import HELP_ONE_LINE_SUMMARY
 | 
| +from gslib.help_provider import HELP_TEXT
 | 
| +from gslib.help_provider import HELP_TYPE
 | 
| +from gslib.help_provider import HelpType
 | 
| +from gslib.name_expansion import NameExpansionIterator
 | 
| +from gslib.util import NO_MAX
 | 
| +from gslib.util import Retry
 | 
| +
 | 
| +_detailed_help_text = ("""
 | 
| +<B>SYNOPSIS</B>
 | 
| +    gsutil setmeta [-n] -h [header:value|header] ... uri...
 | 
| +
 | 
| +
 | 
| +<B>DESCRIPTION</B>
 | 
| +  The gsutil setmeta command allows you to set or remove the metadata on one
 | 
| +  or more objects. It takes one or more header arguments followed by one or
 | 
| +  more URIs, where each header argument is in one of two forms:
 | 
| +
 | 
| +    - if you specify header:value, it will set the given header on all
 | 
| +      named objects.
 | 
| +
 | 
| +    - if you specify header (with no value), it will remove the given header
 | 
| +      from all named objects.
 | 
| +
 | 
| +  For example, the following command would set the Content-Type and
 | 
| +  Cache-Control and remove the Content-Disposition on the specified objects:
 | 
| +
 | 
| +    gsutil setmeta -h "Content-Type:text/html" \\
 | 
| +      -h "Cache-Control:public, max-age=3600" \\
 | 
| +      -h "Content-Disposition" gs://bucket/*.html
 | 
| +
 | 
| +  If you have a large number of objects to update you might want to use the
 | 
| +  gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
 | 
| +  update:
 | 
| +
 | 
| +    gsutil -m setmeta -h "Content-Type:text/html" \\
 | 
| +      -h "Cache-Control:public, max-age=3600" \\
 | 
| +      -h "Content-Disposition" gs://bucket/*.html
 | 
| +
 | 
| +  See "gsutil help metadata" for details about how you can set metadata
 | 
| +  while uploading objects, what metadata fields can be set and the meaning of
 | 
| +  these fields, use of custom metadata, and how to view currently set metadata.
 | 
| +
 | 
| +
 | 
| +<B>OPERATION COST</B>
 | 
| +  This command uses four operations per URI (one to read the ACL, one to read
 | 
| +  the current metadata, one to set the new metadata, and one to set the ACL).
 | 
| +
 | 
| +  For cases where you want all objects to have the same ACL you can avoid half
 | 
| +  these operations by setting a default ACL on the bucket(s) containing the
 | 
| +  named objects, and using the setmeta -n option. See "help gsutil setdefacl".
 | 
| +
 | 
| +
 | 
| +<B>OPTIONS</B>
 | 
| +  -h          Specifies a header:value to be added, or header to be removed,
 | 
| +              from each named object.
 | 
| +  -n          Causes the operations for reading and writing the ACL to be
 | 
| +              skipped. This halves the number of operations performed per
 | 
| +              request, improving the speed and reducing the cost of performing
 | 
| +              the operations. This option makes sense for cases where you want
 | 
| +              all objects to have the same ACL, for which you have set a default
 | 
| +              ACL on the bucket(s) containing the objects. See "help gsutil
 | 
| +              setdefacl".
 | 
| +
 | 
| +
 | 
| +<B>OLDER SYNTAX (DEPRECATED)</B>
 | 
| +  The first version of the setmeta command used more complicated syntax
 | 
| +  (described below). gsutil still supports this syntax, to avoid breaking
 | 
| +  existing customer uses, but it is now deprecated and will eventually
 | 
| +  be removed.
 | 
| +
 | 
| +  With this older syntax, the setmeta command accepts a single metadata
 | 
| +  argument in one of two forms:
 | 
| +
 | 
| +    gsutil setmeta [-n] header:value uri...
 | 
| +
 | 
| +  or
 | 
| +
 | 
| +    gsutil setmeta [-n] '"header:value","-header",...' uri...
 | 
| +
 | 
| +  The first form allows you to specify a single header name and value to
 | 
| +  set. For example, the following command would set the Content-Type and
 | 
| +  Cache-Control and remove the Content-Disposition on the specified objects:
 | 
| +
 | 
| +    gsutil setmeta -h "Content-Type:text/html" \\
 | 
| +      -h "Cache-Control:public, max-age=3600" \\
 | 
| +      -h "Content-Disposition" gs://bucket/*.html
 | 
| +
 | 
| +  This form only works if the header name and value don't contain double
 | 
| +  quotes or commas, and only works for setting the header value (not for
 | 
| +  removing it).
 | 
| +
 | 
| +  The more general form of the first argument allows both setting and removing
 | 
| +  multiple fields, without any of the content restrictions noted above. For
 | 
| +  this variant the first argument is a CSV-formatted list of headers to add
 | 
| +  or remove. Getting the CSV-formatted list to be passed correctly into gsutil
 | 
| +  requires different syntax on Linux or MacOS than it does on Windows.
 | 
| +
 | 
| +  On Linux or MacOS you need to surround the entire argument in single quotes
 | 
| +  to avoid having the shell interpret/strip out the double-quotes in the CSV
 | 
| +  data. For example, the following command would set the Content-Type and
 | 
| +  Cache-Control and remove the Content-Disposition on the specified objects:
 | 
| +
 | 
| +    gsutil setmeta '"Content-Type:text/html","Cache-Control:public, max-age=3600","-Content-Disposition"' gs://bucket/*.html
 | 
| +
 | 
| +  To pass CSV data on Windows you need two sets of double quotes around
 | 
| +  each header/value pair, and one set of double quotes around the entire
 | 
| +  expression. For example, the following command would set the Content-Type
 | 
| +  and Cache-Control and remove the Content-Disposition on the specified objects:
 | 
| +
 | 
| +    gsutil setmeta "\""Content-Type:text/html"",""Cache-Control:public, max-age=3600"",""-Content-Disposition""\" gs://bucket/*.html
 | 
| +
 | 
| +
 | 
| +<B>WARNING ABOUT USING SETMETA WITH VERSIONING ENABLED</B>
 | 
| +
 | 
| +Note that if you use the gsutil setmeta command on an object in a bucket
 | 
| +with versioning enabled (see 'gsutil help versioning'), it will create
 | 
| +a new object version (and thus, you will get charged for the space required
 | 
| +for holding the additional version).
 | 
| +""")
 | 
| +
 | 
| +
 | 
| +class SetMetaCommand(Command):
 | 
| +  """Implementation of gsutil setmeta command."""
 | 
| +
 | 
| +  # Command specification (processed by parent class).
 | 
| +  command_spec = {
 | 
| +    # Name of command.
 | 
| +    COMMAND_NAME : 'setmeta',
 | 
| +    # List of command name aliases.
 | 
| +    COMMAND_NAME_ALIASES : ['setheader'],
 | 
| +    # Min number of args required by this command.
 | 
| +    MIN_ARGS : 1,
 | 
| +    # Max number of args required by this command, or NO_MAX.
 | 
| +    MAX_ARGS : NO_MAX,
 | 
| +    # Getopt-style string specifying acceptable sub args.
 | 
| +    SUPPORTED_SUB_ARGS : 'h:n',
 | 
| +    # True if file URIs acceptable for this command.
 | 
| +    FILE_URIS_OK : False,
 | 
| +    # True if provider-only URIs acceptable for this command.
 | 
| +    PROVIDER_URIS_OK : False,
 | 
| +    # Index in args of first URI arg.
 | 
| +    URIS_START_ARG : 1,
 | 
| +    # True if must configure gsutil before running command.
 | 
| +    CONFIG_REQUIRED : True,
 | 
| +  }
 | 
| +  help_spec = {
 | 
| +    # Name of command or auxiliary help info for which this help applies.
 | 
| +    HELP_NAME : 'setmeta',
 | 
| +    # List of help name aliases.
 | 
| +    HELP_NAME_ALIASES : ['setheader'],
 | 
| +    # Type of help:
 | 
| +    HELP_TYPE : HelpType.COMMAND_HELP,
 | 
| +    # One line summary of this help.
 | 
| +    HELP_ONE_LINE_SUMMARY : 'Set metadata on already uploaded objects',
 | 
| +    # The full help text.
 | 
| +    HELP_TEXT : _detailed_help_text,
 | 
| +  }
 | 
| +
 | 
| +  # Command entry point.
 | 
| +  def RunCommand(self):
 | 
| +    headers = []
 | 
| +    preserve_acl = True
 | 
| +    if self.sub_opts:
 | 
| +      for o, a in self.sub_opts:
 | 
| +        if o == '-n':
 | 
| +          preserve_acl = False
 | 
| +        elif o == '-h':
 | 
| +          headers.append(a)
 | 
| +
 | 
| +    if headers:
 | 
| +      (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)
 | 
| +      uri_args = self.args
 | 
| +    else:
 | 
| +      (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0])
 | 
| +      uri_args = self.args[1:]
 | 
| +
 | 
| +    if (len(uri_args) == 1
 | 
| +        and not self.suri_builder.StorageUri(uri_args[0]).names_object()):
 | 
| +      raise CommandException('URI (%s) must name an object' % uri_args[0])
 | 
| +
 | 
| +    # Used to track if any objects' metadata failed to be set.
 | 
| +    self.everything_set_okay = True
 | 
| +
 | 
| +    def _SetMetadataExceptionHandler(e):
 | 
| +      """Simple exception handler to allow post-completion status."""
 | 
| +      self.THREADED_LOGGER.error(str(e))
 | 
| +      self.everything_set_okay = False
 | 
| +
 | 
| +    @Retry(GSResponseError, tries=3, delay=1, backoff=2)
 | 
| +    def _SetMetadataFunc(name_expansion_result):
 | 
| +      exp_src_uri = self.suri_builder.StorageUri(
 | 
| +          name_expansion_result.GetExpandedUriStr())
 | 
| +      self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri)
 | 
| +      
 | 
| +      key = exp_src_uri.get_key()
 | 
| +      meta_generation = key.meta_generation
 | 
| +      generation = key.generation
 | 
| +            
 | 
| +      headers = {}
 | 
| +      if generation:
 | 
| +        headers['x-goog-if-generation-match'] = generation
 | 
| +      if meta_generation:
 | 
| +        headers['x-goog-if-metageneration-match'] = meta_generation
 | 
| +          
 | 
| +      # If this fails because of a precondition, it will raise a 
 | 
| +      # GSResponseError for @Retry to handle.
 | 
| +      exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl, 
 | 
| +                                 headers=headers)
 | 
| +      
 | 
| +    name_expansion_iterator = NameExpansionIterator(
 | 
| +        self.command_name, self.proj_id_handler, self.headers, self.debug,
 | 
| +        self.bucket_storage_uri_class, uri_args, self.recursion_requested,
 | 
| +        self.recursion_requested)
 | 
| +
 | 
| +    # Perform requests in parallel (-m) mode, if requested, using
 | 
| +    # configured number of parallel processes and threads. Otherwise,
 | 
| +    # perform requests with sequential function calls in current process.
 | 
| +    self.Apply(_SetMetadataFunc, name_expansion_iterator,
 | 
| +               _SetMetadataExceptionHandler)
 | 
| +
 | 
| +    if not self.everything_set_okay:
 | 
| +      raise CommandException('Metadata for some objects could not be set.')
 | 
| +
 | 
| +    return 0
 | 
| +
 | 
| +  def _ParseMetadataHeaders(self, headers):
 | 
| +    metadata_minus = set()
 | 
| +    cust_metadata_minus = set()
 | 
| +    metadata_plus = {}
 | 
| +    cust_metadata_plus = {}
 | 
| +    # Build a count of the keys encountered from each plus and minus arg so we
 | 
| +    # can check for dupe field specs.
 | 
| +    num_metadata_plus_elems = 0
 | 
| +    num_cust_metadata_plus_elems = 0
 | 
| +    num_metadata_minus_elems = 0
 | 
| +    num_cust_metadata_minus_elems = 0
 | 
| +
 | 
| +    for md_arg in headers:
 | 
| +      parts = md_arg.split(':')
 | 
| +      if len(parts) not in (1, 2):
 | 
| +        raise CommandException(
 | 
| +            'Invalid argument: must be either header or header:value (%s)' %
 | 
| +            md_arg)
 | 
| +      if len(parts) == 2:
 | 
| +        (header, value) = parts
 | 
| +      else:
 | 
| +        (header, value) = (parts[0], None)
 | 
| +      _InsistAsciiHeader(header)
 | 
| +      # Translate headers to lowercase to match the casing assumed by our
 | 
| +      # sanity-checking operations.
 | 
| +      header = header.lower()
 | 
| +      if value:
 | 
| +        if _IsCustomMeta(header):
 | 
| +          # Allow non-ASCII data for custom metadata fields. Don't unicode
 | 
| +          # encode other fields because that would perturb their content
 | 
| +          # (e.g., adding %2F's into the middle of a Cache-Control value).
 | 
| +          value = unicode(value, 'utf-8')
 | 
| +          cust_metadata_plus[header] = value
 | 
| +          num_cust_metadata_plus_elems += 1
 | 
| +        else:
 | 
| +          metadata_plus[header] = value
 | 
| +          num_metadata_plus_elems += 1
 | 
| +      else:
 | 
| +        if _IsCustomMeta(header):
 | 
| +          cust_metadata_minus.add(header)
 | 
| +          num_cust_metadata_minus_elems += 1
 | 
| +        else:
 | 
| +          metadata_minus.add(header)
 | 
| +          num_metadata_minus_elems += 1
 | 
| +    if (num_metadata_plus_elems != len(metadata_plus)
 | 
| +        or num_cust_metadata_plus_elems != len(cust_metadata_plus)
 | 
| +        or num_metadata_minus_elems != len(metadata_minus)
 | 
| +        or num_cust_metadata_minus_elems != len(cust_metadata_minus)
 | 
| +        or metadata_minus.intersection(set(metadata_plus.keys()))):
 | 
| +      raise CommandException('Each header must appear at most once.')
 | 
| +    other_than_base_fields = (set(metadata_plus.keys())
 | 
| +        .difference(Key.base_user_settable_fields))
 | 
| +    other_than_base_fields.update(
 | 
| +        metadata_minus.difference(Key.base_user_settable_fields))
 | 
| +    for f in other_than_base_fields:
 | 
| +      # This check is overly simple; it would be stronger to check, for each
 | 
| +      # URI argument, whether f.startswith the
 | 
| +      # uri.get_provider().metadata_prefix, but here we just parse the spec
 | 
| +      # once, before processing any of the URIs. This means we will not
 | 
| +      # detect if the user tries to set an x-goog-meta- field on an another
 | 
| +      # provider's object, for example.
 | 
| +      if not _IsCustomMeta(f):
 | 
| +        raise CommandException('Invalid or disallowed header (%s).\n'
 | 
| +                               'Only these fields (plus x-goog-meta-* fields)'
 | 
| +                               ' can be set or unset:\n%s' % (f,
 | 
| +                               sorted(list(Key.base_user_settable_fields))))
 | 
| +    metadata_plus.update(cust_metadata_plus)
 | 
| +    metadata_minus.update(cust_metadata_minus)
 | 
| +    return (metadata_minus, metadata_plus)
 | 
| +
 | 
| +  def _ParseMetadataSpec(self, spec):
 | 
| +    self.THREADED_LOGGER.info('WARNING: metadata spec syntax (%s)\nis '
 | 
| +                              'deprecated and will eventually be removed.\n'
 | 
| +                              'Please see "gsutil help setmeta" for current '
 | 
| +                              'syntax' % spec)
 | 
| +    metadata_minus = set()
 | 
| +    cust_metadata_minus = set()
 | 
| +    metadata_plus = {}
 | 
| +    cust_metadata_plus = {}
 | 
| +    # Build a count of the keys encountered from each plus and minus arg so we
 | 
| +    # can check for dupe field specs.
 | 
| +    num_metadata_plus_elems = 0
 | 
| +    num_cust_metadata_plus_elems = 0
 | 
| +    num_metadata_minus_elems = 0
 | 
| +    num_cust_metadata_minus_elems = 0
 | 
| +
 | 
| +    mdf = StringIO.StringIO(spec)
 | 
| +    for md_arg in csv.reader(mdf).next():
 | 
| +      if not md_arg:
 | 
| +        raise CommandException(
 | 
| +            'Invalid empty metadata specification component.')
 | 
| +      if md_arg[0] == '-':
 | 
| +        header = md_arg[1:]
 | 
| +        if header.find(':') != -1:
 | 
| +          raise CommandException('Removal spec may not contain ":" (%s).' %
 | 
| +                                 header)
 | 
| +        _InsistAsciiHeader(header)
 | 
| +        # Translate headers to lowercase to match the casing required by
 | 
| +        # uri.set_metadata().
 | 
| +        header = header.lower()
 | 
| +        if _IsCustomMeta(header):
 | 
| +          cust_metadata_minus.add(header)
 | 
| +          num_cust_metadata_minus_elems += 1
 | 
| +        else:
 | 
| +          metadata_minus.add(header)
 | 
| +          num_metadata_minus_elems += 1
 | 
| +      else:
 | 
| +        parts = md_arg.split(':', 1)
 | 
| +        if len(parts) != 2:
 | 
| +          raise CommandException(
 | 
| +              'Fields being added must include values (%s).' % md_arg)
 | 
| +        (header, value) = parts
 | 
| +        _InsistAsciiHeader(header)
 | 
| +        header = header.lower()
 | 
| +        if _IsCustomMeta(header):
 | 
| +          # Allow non-ASCII data for custom metadata fields. Don't unicode
 | 
| +          # encode other fields because that would perturb their content
 | 
| +          # (e.g., adding %2F's into the middle of a Cache-Control value).
 | 
| +          value = unicode(value, 'utf-8')
 | 
| +          cust_metadata_plus[header] = value
 | 
| +          num_cust_metadata_plus_elems += 1
 | 
| +        else:
 | 
| +          metadata_plus[header] = value
 | 
| +          num_metadata_plus_elems += 1
 | 
| +    mdf.close()
 | 
| +    if (num_metadata_plus_elems != len(metadata_plus)
 | 
| +        or num_cust_metadata_plus_elems != len(cust_metadata_plus)
 | 
| +        or num_metadata_minus_elems != len(metadata_minus)
 | 
| +        or num_cust_metadata_minus_elems != len(cust_metadata_minus)
 | 
| +        or metadata_minus.intersection(set(metadata_plus.keys()))):
 | 
| +      raise CommandException('Each header must appear at most once.')
 | 
| +    other_than_base_fields = (set(metadata_plus.keys())
 | 
| +        .difference(Key.base_user_settable_fields))
 | 
| +    other_than_base_fields.update(
 | 
| +        metadata_minus.difference(Key.base_user_settable_fields))
 | 
| +    for f in other_than_base_fields:
 | 
| +      # This check is overly simple; it would be stronger to check, for each
 | 
| +      # URI argument, whether f.startswith the
 | 
| +      # uri.get_provider().metadata_prefix, but here we just parse the spec
 | 
| +      # once, before processing any of the URIs. This means we will not
 | 
| +      # detect if the user tries to set an x-goog-meta- field on an another
 | 
| +      # provider's object, for example.
 | 
| +      if not _IsCustomMeta(f):
 | 
| +        raise CommandException('Invalid or disallowed header (%s).\n'
 | 
| +                               'Only these fields (plus x-goog-meta-* fields)'
 | 
| +                               ' can be set or unset:\n%s' % (f,
 | 
| +                               sorted(list(Key.base_user_settable_fields))))
 | 
| +    metadata_plus.update(cust_metadata_plus)
 | 
| +    metadata_minus.update(cust_metadata_minus)
 | 
| +    return (metadata_minus, metadata_plus)
 | 
| +
 | 
| +
 | 
| +def _InsistAsciiHeader(header):
 | 
| +  if not all(ord(c) < 128 for c in header):
 | 
| +    raise CommandException('Invalid non-ASCII header (%s).' % header)
 | 
| +
 | 
| +def _IsCustomMeta(header):
 | 
| +  return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-')
 | 
| 
 |