Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(326)

Unified Diff: third_party/gsutil/gslib/commands/setmeta.py

Issue 12317103: Added gsutil to depot tools (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/setdefacl.py ('k') | third_party/gsutil/gslib/commands/setversioning.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/gsutil/gslib/commands/setmeta.py
diff --git a/third_party/gsutil/gslib/commands/setmeta.py b/third_party/gsutil/gslib/commands/setmeta.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c39d75befe8f519eb28a0abff26fe9d687523da
--- /dev/null
+++ b/third_party/gsutil/gslib/commands/setmeta.py
@@ -0,0 +1,420 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+#coding=utf8
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import boto
+import csv
+import random
+import StringIO
+import time
+
+from boto.exception import GSResponseError
+from boto.s3.key import Key
+from gslib.command import COMMAND_NAME
+from gslib.command import COMMAND_NAME_ALIASES
+from gslib.command import CONFIG_REQUIRED
+from gslib.command import Command
+from gslib.command import FILE_URIS_OK
+from gslib.command import MAX_ARGS
+from gslib.command import MIN_ARGS
+from gslib.command import PROVIDER_URIS_OK
+from gslib.command import SUPPORTED_SUB_ARGS
+from gslib.command import URIS_START_ARG
+from gslib.exception import CommandException
+from gslib.help_provider import HELP_NAME
+from gslib.help_provider import HELP_NAME_ALIASES
+from gslib.help_provider import HELP_ONE_LINE_SUMMARY
+from gslib.help_provider import HELP_TEXT
+from gslib.help_provider import HELP_TYPE
+from gslib.help_provider import HelpType
+from gslib.name_expansion import NameExpansionIterator
+from gslib.util import NO_MAX
+from gslib.util import Retry
+
+_detailed_help_text = ("""
+<B>SYNOPSIS</B>
+ gsutil setmeta [-n] -h [header:value|header] ... uri...
+
+
+<B>DESCRIPTION</B>
+ The gsutil setmeta command allows you to set or remove the metadata on one
+ or more objects. It takes one or more header arguments followed by one or
+ more URIs, where each header argument is in one of two forms:
+
+ - if you specify header:value, it will set the given header on all
+ named objects.
+
+ - if you specify header (with no value), it will remove the given header
+ from all named objects.
+
+ For example, the following command would set the Content-Type and
+ Cache-Control and remove the Content-Disposition on the specified objects:
+
+ gsutil setmeta -h "Content-Type:text/html" \\
+ -h "Cache-Control:public, max-age=3600" \\
+ -h "Content-Disposition" gs://bucket/*.html
+
+ If you have a large number of objects to update you might want to use the
+ gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
+ update:
+
+ gsutil -m setmeta -h "Content-Type:text/html" \\
+ -h "Cache-Control:public, max-age=3600" \\
+ -h "Content-Disposition" gs://bucket/*.html
+
+ See "gsutil help metadata" for details about how you can set metadata
+ while uploading objects, what metadata fields can be set and the meaning of
+ these fields, use of custom metadata, and how to view currently set metadata.
+
+
+<B>OPERATION COST</B>
+ This command uses four operations per URI (one to read the ACL, one to read
+ the current metadata, one to set the new metadata, and one to set the ACL).
+
+ For cases where you want all objects to have the same ACL you can avoid half
+ these operations by setting a default ACL on the bucket(s) containing the
+ named objects, and using the setmeta -n option. See "help gsutil setdefacl".
+
+
+<B>OPTIONS</B>
+ -h Specifies a header:value to be added, or header to be removed,
+ from each named object.
+ -n Causes the operations for reading and writing the ACL to be
+ skipped. This halves the number of operations performed per
+ request, improving the speed and reducing the cost of performing
+ the operations. This option makes sense for cases where you want
+ all objects to have the same ACL, for which you have set a default
+ ACL on the bucket(s) containing the objects. See "help gsutil
+ setdefacl".
+
+
+<B>OLDER SYNTAX (DEPRECATED)</B>
+ The first version of the setmeta command used more complicated syntax
+ (described below). gsutil still supports this syntax, to avoid breaking
+ existing customer uses, but it is now deprecated and will eventually
+ be removed.
+
+ With this older syntax, the setmeta command accepts a single metadata
+ argument in one of two forms:
+
+ gsutil setmeta [-n] header:value uri...
+
+ or
+
+ gsutil setmeta [-n] '"header:value","-header",...' uri...
+
+ The first form allows you to specify a single header name and value to
+ set. For example, the following command would set the Content-Type and
+ Cache-Control and remove the Content-Disposition on the specified objects:
+
+ gsutil setmeta -h "Content-Type:text/html" \\
+ -h "Cache-Control:public, max-age=3600" \\
+ -h "Content-Disposition" gs://bucket/*.html
+
+ This form only works if the header name and value don't contain double
+ quotes or commas, and only works for setting the header value (not for
+ removing it).
+
+ The more general form of the first argument allows both setting and removing
+ multiple fields, without any of the content restrictions noted above. For
+ this variant the first argument is a CSV-formatted list of headers to add
+ or remove. Getting the CSV-formatted list to be passed correctly into gsutil
+ requires different syntax on Linux or MacOS than it does on Windows.
+
+ On Linux or MacOS you need to surround the entire argument in single quotes
+ to avoid having the shell interpret/strip out the double-quotes in the CSV
+ data. For example, the following command would set the Content-Type and
+ Cache-Control and remove the Content-Disposition on the specified objects:
+
+ gsutil setmeta '"Content-Type:text/html","Cache-Control:public, max-age=3600","-Content-Disposition"' gs://bucket/*.html
+
+ To pass CSV data on Windows you need two sets of double quotes around
+ each header/value pair, and one set of double quotes around the entire
+ expression. For example, the following command would set the Content-Type
+ and Cache-Control and remove the Content-Disposition on the specified objects:
+
+ gsutil setmeta "\""Content-Type:text/html"",""Cache-Control:public, max-age=3600"",""-Content-Disposition""\" gs://bucket/*.html
+
+
+<B>WARNING ABOUT USING SETMETA WITH VERSIONING ENABLED</B>
+
+Note that if you use the gsutil setmeta command on an object in a bucket
+with versioning enabled (see 'gsutil help versioning'), it will create
+a new object version (and thus, you will get charged for the space required
+for holding the additional version).
+""")
+
+
+class SetMetaCommand(Command):
+ """Implementation of gsutil setmeta command."""
+
+ # Command specification (processed by parent class).
+ command_spec = {
+ # Name of command.
+ COMMAND_NAME : 'setmeta',
+ # List of command name aliases.
+ COMMAND_NAME_ALIASES : ['setheader'],
+ # Min number of args required by this command.
+ MIN_ARGS : 1,
+ # Max number of args required by this command, or NO_MAX.
+ MAX_ARGS : NO_MAX,
+ # Getopt-style string specifying acceptable sub args.
+ SUPPORTED_SUB_ARGS : 'h:n',
+ # True if file URIs acceptable for this command.
+ FILE_URIS_OK : False,
+ # True if provider-only URIs acceptable for this command.
+ PROVIDER_URIS_OK : False,
+ # Index in args of first URI arg.
+ URIS_START_ARG : 1,
+ # True if must configure gsutil before running command.
+ CONFIG_REQUIRED : True,
+ }
+ help_spec = {
+ # Name of command or auxiliary help info for which this help applies.
+ HELP_NAME : 'setmeta',
+ # List of help name aliases.
+ HELP_NAME_ALIASES : ['setheader'],
+ # Type of help:
+ HELP_TYPE : HelpType.COMMAND_HELP,
+ # One line summary of this help.
+ HELP_ONE_LINE_SUMMARY : 'Set metadata on already uploaded objects',
+ # The full help text.
+ HELP_TEXT : _detailed_help_text,
+ }
+
+ # Command entry point.
+ def RunCommand(self):
+ headers = []
+ preserve_acl = True
+ if self.sub_opts:
+ for o, a in self.sub_opts:
+ if o == '-n':
+ preserve_acl = False
+ elif o == '-h':
+ headers.append(a)
+
+ if headers:
+ (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)
+ uri_args = self.args
+ else:
+ (metadata_minus, metadata_plus) = self._ParseMetadataSpec(self.args[0])
+ uri_args = self.args[1:]
+
+ if (len(uri_args) == 1
+ and not self.suri_builder.StorageUri(uri_args[0]).names_object()):
+ raise CommandException('URI (%s) must name an object' % uri_args[0])
+
+ # Used to track if any objects' metadata failed to be set.
+ self.everything_set_okay = True
+
+ def _SetMetadataExceptionHandler(e):
+ """Simple exception handler to allow post-completion status."""
+ self.THREADED_LOGGER.error(str(e))
+ self.everything_set_okay = False
+
+ @Retry(GSResponseError, tries=3, delay=1, backoff=2)
+ def _SetMetadataFunc(name_expansion_result):
+ exp_src_uri = self.suri_builder.StorageUri(
+ name_expansion_result.GetExpandedUriStr())
+ self.THREADED_LOGGER.info('Setting metadata on %s...', exp_src_uri)
+
+ key = exp_src_uri.get_key()
+ meta_generation = key.meta_generation
+ generation = key.generation
+
+ headers = {}
+ if generation:
+ headers['x-goog-if-generation-match'] = generation
+ if meta_generation:
+ headers['x-goog-if-metageneration-match'] = meta_generation
+
+ # If this fails because of a precondition, it will raise a
+ # GSResponseError for @Retry to handle.
+ exp_src_uri.set_metadata(metadata_plus, metadata_minus, preserve_acl,
+ headers=headers)
+
+ name_expansion_iterator = NameExpansionIterator(
+ self.command_name, self.proj_id_handler, self.headers, self.debug,
+ self.bucket_storage_uri_class, uri_args, self.recursion_requested,
+ self.recursion_requested)
+
+ # Perform requests in parallel (-m) mode, if requested, using
+ # configured number of parallel processes and threads. Otherwise,
+ # perform requests with sequential function calls in current process.
+ self.Apply(_SetMetadataFunc, name_expansion_iterator,
+ _SetMetadataExceptionHandler)
+
+ if not self.everything_set_okay:
+ raise CommandException('Metadata for some objects could not be set.')
+
+ return 0
+
+ def _ParseMetadataHeaders(self, headers):
+ metadata_minus = set()
+ cust_metadata_minus = set()
+ metadata_plus = {}
+ cust_metadata_plus = {}
+ # Build a count of the keys encountered from each plus and minus arg so we
+ # can check for dupe field specs.
+ num_metadata_plus_elems = 0
+ num_cust_metadata_plus_elems = 0
+ num_metadata_minus_elems = 0
+ num_cust_metadata_minus_elems = 0
+
+ for md_arg in headers:
+ parts = md_arg.split(':')
+ if len(parts) not in (1, 2):
+ raise CommandException(
+ 'Invalid argument: must be either header or header:value (%s)' %
+ md_arg)
+ if len(parts) == 2:
+ (header, value) = parts
+ else:
+ (header, value) = (parts[0], None)
+ _InsistAsciiHeader(header)
+ # Translate headers to lowercase to match the casing assumed by our
+ # sanity-checking operations.
+ header = header.lower()
+ if value:
+ if _IsCustomMeta(header):
+ # Allow non-ASCII data for custom metadata fields. Don't unicode
+ # encode other fields because that would perturb their content
+ # (e.g., adding %2F's into the middle of a Cache-Control value).
+ value = unicode(value, 'utf-8')
+ cust_metadata_plus[header] = value
+ num_cust_metadata_plus_elems += 1
+ else:
+ metadata_plus[header] = value
+ num_metadata_plus_elems += 1
+ else:
+ if _IsCustomMeta(header):
+ cust_metadata_minus.add(header)
+ num_cust_metadata_minus_elems += 1
+ else:
+ metadata_minus.add(header)
+ num_metadata_minus_elems += 1
+ if (num_metadata_plus_elems != len(metadata_plus)
+ or num_cust_metadata_plus_elems != len(cust_metadata_plus)
+ or num_metadata_minus_elems != len(metadata_minus)
+ or num_cust_metadata_minus_elems != len(cust_metadata_minus)
+ or metadata_minus.intersection(set(metadata_plus.keys()))):
+ raise CommandException('Each header must appear at most once.')
+ other_than_base_fields = (set(metadata_plus.keys())
+ .difference(Key.base_user_settable_fields))
+ other_than_base_fields.update(
+ metadata_minus.difference(Key.base_user_settable_fields))
+ for f in other_than_base_fields:
+ # This check is overly simple; it would be stronger to check, for each
+ # URI argument, whether f.startswith the
+ # uri.get_provider().metadata_prefix, but here we just parse the spec
+ # once, before processing any of the URIs. This means we will not
+ # detect if the user tries to set an x-goog-meta- field on an another
+ # provider's object, for example.
+ if not _IsCustomMeta(f):
+ raise CommandException('Invalid or disallowed header (%s).\n'
+ 'Only these fields (plus x-goog-meta-* fields)'
+ ' can be set or unset:\n%s' % (f,
+ sorted(list(Key.base_user_settable_fields))))
+ metadata_plus.update(cust_metadata_plus)
+ metadata_minus.update(cust_metadata_minus)
+ return (metadata_minus, metadata_plus)
+
+ def _ParseMetadataSpec(self, spec):
+ self.THREADED_LOGGER.info('WARNING: metadata spec syntax (%s)\nis '
+ 'deprecated and will eventually be removed.\n'
+ 'Please see "gsutil help setmeta" for current '
+ 'syntax' % spec)
+ metadata_minus = set()
+ cust_metadata_minus = set()
+ metadata_plus = {}
+ cust_metadata_plus = {}
+ # Build a count of the keys encountered from each plus and minus arg so we
+ # can check for dupe field specs.
+ num_metadata_plus_elems = 0
+ num_cust_metadata_plus_elems = 0
+ num_metadata_minus_elems = 0
+ num_cust_metadata_minus_elems = 0
+
+ mdf = StringIO.StringIO(spec)
+ for md_arg in csv.reader(mdf).next():
+ if not md_arg:
+ raise CommandException(
+ 'Invalid empty metadata specification component.')
+ if md_arg[0] == '-':
+ header = md_arg[1:]
+ if header.find(':') != -1:
+ raise CommandException('Removal spec may not contain ":" (%s).' %
+ header)
+ _InsistAsciiHeader(header)
+ # Translate headers to lowercase to match the casing required by
+ # uri.set_metadata().
+ header = header.lower()
+ if _IsCustomMeta(header):
+ cust_metadata_minus.add(header)
+ num_cust_metadata_minus_elems += 1
+ else:
+ metadata_minus.add(header)
+ num_metadata_minus_elems += 1
+ else:
+ parts = md_arg.split(':', 1)
+ if len(parts) != 2:
+ raise CommandException(
+ 'Fields being added must include values (%s).' % md_arg)
+ (header, value) = parts
+ _InsistAsciiHeader(header)
+ header = header.lower()
+ if _IsCustomMeta(header):
+ # Allow non-ASCII data for custom metadata fields. Don't unicode
+ # encode other fields because that would perturb their content
+ # (e.g., adding %2F's into the middle of a Cache-Control value).
+ value = unicode(value, 'utf-8')
+ cust_metadata_plus[header] = value
+ num_cust_metadata_plus_elems += 1
+ else:
+ metadata_plus[header] = value
+ num_metadata_plus_elems += 1
+ mdf.close()
+ if (num_metadata_plus_elems != len(metadata_plus)
+ or num_cust_metadata_plus_elems != len(cust_metadata_plus)
+ or num_metadata_minus_elems != len(metadata_minus)
+ or num_cust_metadata_minus_elems != len(cust_metadata_minus)
+ or metadata_minus.intersection(set(metadata_plus.keys()))):
+ raise CommandException('Each header must appear at most once.')
+ other_than_base_fields = (set(metadata_plus.keys())
+ .difference(Key.base_user_settable_fields))
+ other_than_base_fields.update(
+ metadata_minus.difference(Key.base_user_settable_fields))
+ for f in other_than_base_fields:
+ # This check is overly simple; it would be stronger to check, for each
+ # URI argument, whether f.startswith the
+ # uri.get_provider().metadata_prefix, but here we just parse the spec
+ # once, before processing any of the URIs. This means we will not
+ # detect if the user tries to set an x-goog-meta- field on an another
+ # provider's object, for example.
+ if not _IsCustomMeta(f):
+ raise CommandException('Invalid or disallowed header (%s).\n'
+ 'Only these fields (plus x-goog-meta-* fields)'
+ ' can be set or unset:\n%s' % (f,
+ sorted(list(Key.base_user_settable_fields))))
+ metadata_plus.update(cust_metadata_plus)
+ metadata_minus.update(cust_metadata_minus)
+ return (metadata_minus, metadata_plus)
+
+
+def _InsistAsciiHeader(header):
+ if not all(ord(c) < 128 for c in header):
+ raise CommandException('Invalid non-ASCII header (%s).' % header)
+
+def _IsCustomMeta(header):
+ return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-')
« no previous file with comments | « third_party/gsutil/gslib/commands/setdefacl.py ('k') | third_party/gsutil/gslib/commands/setversioning.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698