third_party/gsutil/gslib/commands/cp.py - Issue 12317103: Added gsutil to depot tools

Side by Side Diff: third_party/gsutil/gslib/commands/cp.py

Issue 12317103: Added gsutil to depot tools (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2011 Google Inc. All Rights Reserved.

	2 # Copyright 2011, Nexenta Systems Inc.

	3 #

	4 # Licensed under the Apache License, Version 2.0 (the "License");

	5 # you may not use this file except in compliance with the License.

	6 # You may obtain a copy of the License at

	7 #

	8 # http://www.apache.org/licenses/LICENSE-2.0

	9 #

	10 # Unless required by applicable law or agreed to in writing, software

	11 # distributed under the License is distributed on an "AS IS" BASIS,

	12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

	13 # See the License for the specific language governing permissions and

	14 # limitations under the License.

	15

	16 import boto

	17 import errno

	18 import gzip

	19 import hashlib

	20 import mimetypes

	21 import os

	22 import platform

	23 import re

	24 import subprocess

	25 import stat

	26 import sys

	27 import tempfile

	28 import threading

	29 import time

	30

	31 from boto import config

	32 from boto.exception import GSResponseError

	33 from boto.exception import ResumableUploadException

	34 from boto.gs.resumable_upload_handler import ResumableUploadHandler

	35 from boto.s3.keyfile import KeyFile

	36 from boto.s3.resumable_download_handler import ResumableDownloadHandler

	37 from boto.storage_uri import BucketStorageUri

	38 from gslib.command import COMMAND_NAME

	39 from gslib.command import COMMAND_NAME_ALIASES

	40 from gslib.command import CONFIG_REQUIRED

	41 from gslib.command import Command

	42 from gslib.command import FILE_URIS_OK

	43 from gslib.command import MAX_ARGS

	44 from gslib.command import MIN_ARGS

	45 from gslib.command import PROVIDER_URIS_OK

	46 from gslib.command import SUPPORTED_SUB_ARGS

	47 from gslib.command import URIS_START_ARG

	48 from gslib.exception import CommandException

	49 from gslib.help_provider import HELP_NAME

	50 from gslib.help_provider import HELP_NAME_ALIASES

	51 from gslib.help_provider import HELP_ONE_LINE_SUMMARY

	52 from gslib.help_provider import HELP_TEXT

	53 from gslib.help_provider import HELP_TYPE

	54 from gslib.help_provider import HelpType

	55 from gslib.name_expansion import NameExpansionIterator

	56 from gslib.util import ExtractErrorDetail

	57 from gslib.util import IS_WINDOWS

	58 from gslib.util import MakeHumanReadable

	59 from gslib.util import NO_MAX

	60 from gslib.util import TWO_MB

	61 from gslib.wildcard_iterator import ContainsWildcard

	62

	63 _detailed_help_text = ("""

	64 <B>SYNOPSIS</B>

	65 gsutil cp [OPTION]... src_uri dst_uri

	66 - or -

	67 gsutil cp [OPTION]... src_uri... dst_uri

	68 - or -

	69 gsutil cp [OPTION]... -I dst_uri

	70

	71

	72 <B>DESCRIPTION</B>

	73 The gsutil cp command allows you to copy data between your local file

	74 system and the cloud, copy data within the cloud, and copy data between

	75 cloud storage providers. For example, to copy all text files from the

	76 local directory to a bucket you could do:

	77

	78 gsutil cp *.txt gs://my_bucket

	79

	80 Similarly, you can download text files from a bucket by doing:

	81

	82 gsutil cp gs://my_bucket/*.txt .

	83

	84 If you want to copy an entire directory tree you need to use the -R option:

	85

	86 gsutil cp -R dir gs://my_bucket

	87

	88 If you have a large number of files to upload you might want to use the

	89 gsutil -m option, to perform a parallel (multi-threaded/multi-processing)

	90 copy:

	91

	92 gsutil -m cp -R dir gs://my_bucket

	93

	94 You can pass a list of URIs to copy on STDIN instead of as command line

	95 arguments by using the -I option. This allows you to use gsutil in a

	96 pipeline to copy files and objects as generated by a program, such as:

	97

	98 some_program \| gsutil -m cp -I gs://my_bucket

	99

	100 The contents of STDIN can name files, cloud URIs, and wildcards of files

	101 and cloud URIs.

	102

	103

	104 <B>HOW NAMES ARE CONSTRUCTED</B>

	105 The gsutil cp command strives to name objects in a way consistent with how

	106 Linux cp works, which causes names to be constructed in varying ways depending

	107 on whether you're performing a recursive directory copy or copying

	108 individually named objects; and whether you're copying to an existing or

	109 non-existent directory.

	110

	111 When performing recursive directory copies, object names are constructed

	112 that mirror the source directory structure starting at the point of

	113 recursive processing. For example, the command:

	114

	115 gsutil cp -R dir1/dir2 gs://my_bucket

	116

	117 will create objects named like gs://my_bucket/dir2/a/b/c, assuming

	118 dir1/dir2 contains the file a/b/c.

	119

	120 In contrast, copying individually named files will result in objects named

	121 by the final path component of the source files. For example, the command:

	122

	123 gsutil cp dir1/dir2/** gs://my_bucket

	124

	125 will create objects named like gs://my_bucket/c.

	126

	127 The same rules apply for downloads: recursive copies of buckets and

	128 bucket subdirectories produce a mirrored filename structure, while copying

	129 individually (or wildcard) named objects produce flatly named files.

	130

	131 Note that in the above example the '**' wildcard matches all names

	132 anywhere under dir. The wildcard '*' will match names just one level deep. For

	133 more details see 'gsutil help wildcards'.

	134

	135 There's an additional wrinkle when working with subdirectories: the resulting

	136 names depend on whether the destination subdirectory exists. For example,

	137 if gs://my_bucket/subdir exists as a subdirectory, the command:

	138

	139 gsutil cp -R dir1/dir2 gs://my_bucket/subdir

	140

	141 will create objects named like gs://my_bucket/subdir/dir2/a/b/c. In contrast,

	142 if gs://my_bucket/subdir does not exist, this same gsutil cp command will

	143 create objects named like gs://my_bucket/subdir/a/b/c.

	144

	145

	146 <B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>

	147 You can use gsutil to copy to and from subdirectories by using a command like:

	148

	149 gsutil cp -R dir gs://my_bucket/data

	150

	151 This will cause dir and all of its files and nested subdirectories to be

	152 copied under the specified destination, resulting in objects with names like

	153 gs://my_bucket/data/dir/a/b/c. Similarly you can download from bucket

	154 subdirectories by using a command like:

	155

	156 gsutil cp -R gs://my_bucket/data dir

	157

	158 This will cause everything nested under gs://my_bucket/data to be downloaded

	159 into dir, resulting in files with names like dir/data/a/b/c.

	160

	161 Copying subdirectories is useful if you want to add data to an existing

	162 bucket directory structure over time. It's also useful if you want

	163 to parallelize uploads and downloads across multiple machines (often

	164 reducing overall transfer time compared with simply running gsutil -m

	165 cp on one machine). For example, if your bucket contains this structure:

	166

	167 gs://my_bucket/data/result_set_01/

	168 gs://my_bucket/data/result_set_02/

	169 ...

	170 gs://my_bucket/data/result_set_99/

	171

	172 you could perform concurrent downloads across 3 machines by running these

	173 commands on each machine, respectively:

	174

	175 gsutil -m cp -R gs://my_bucket/data/result_set_[0-3]* dir

	176 gsutil -m cp -R gs://my_bucket/data/result_set_[4-6]* dir

	177 gsutil -m cp -R gs://my_bucket/data/result_set_[7-9]* dir

	178

	179 Note that dir could be a local directory on each machine, or it could

	180 be a directory mounted off of a shared file server; whether the latter

	181 performs acceptably may depend on a number of things, so we recommend

	182 you experiment and find out what works best for you.

	183

	184

	185 <B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>

	186 If both the source and destination URI are cloud URIs from the same

	187 provider, gsutil copies data "in the cloud" (i.e., without downloading

	188 to and uploading from the machine where you run gsutil). In addition to

	189 the performance and cost advantages of doing this, copying in the cloud

	190 preserves metadata (like Content-Type and Cache-Control). In contrast,

	191 when you download data from the cloud it ends up in a file, which has

	192 no associated metadata. Thus, unless you have some way to hold on to

	193 or re-create that metadata, downloading to a file will not retain the

	194 metadata.

	195

	196 Note that by default, the gsutil cp command does not copy the object

	197 ACL to the new object, and instead will use the default bucket ACL (see

	198 "gsutil help setdefacl"). You can override this behavior with the -p

	199 option (see OPTIONS below).

	200

	201 gsutil does not preserve metadata when copying objects between providers.

	202

	203

	204 <B>RESUMABLE TRANSFERS</B>

	205 gsutil automatically uses the Google Cloud Storage resumable upload

	206 feature whenever you use the cp command to upload an object that is larger

	207 than 2 MB. You do not need to specify any special command line options

	208 to make this happen. If your upload is interrupted you can restart the

	209 upload by running the same cp command that you ran to start the upload.

	210

	211 Similarly, gsutil automatically performs resumable downloads (using HTTP

	212 standard Range GET operations) whenever you use the cp command to download an

	213 object larger than 2 MB.

	214

	215 Resumable uploads and downloads store some state information in a file

	216 in ~/.gsutil named by the destination object or file. If you attempt to

	217 resume a transfer from a machine with a different directory, the transfer

	218 will start over from scratch.

	219

	220 See also "gsutil help prod" for details on using resumable transfers

	221 in production.

	222

	223

	224 <B>STREAMING TRANSFERS</B>

	225 Use '-' in place of src_uri or dst_uri to perform a streaming

	226 transfer. For example:

	227 long_running_computation \| gsutil cp - gs://my_bucket/obj

	228

	229 Streaming transfers do not support resumable uploads/downloads.

	230 (The Google resumable transfer protocol has a way to support streaming

	231 transers, but gsutil doesn't currently implement support for this.)

	232

	233

	234 <B>CHANGING TEMP DIRECTORIES</B>

	235 gsutil writes data to a temporary directory in several cases:

	236 - when compressing data to be uploaded (see the -z option)

	237 - when decompressing data being downloaded (when the data has

	238 Content-Encoding:gzip, e.g., as happens when uploaded using gsutil cp -z)

	239 - when running integration tests (using the gsutil test command)

	240

	241 In these cases it's possible the temp file location on your system that

	242 gsutil selects by default may not have enough space. If you find that

	243 gsutil runs out of space during one of these operations (e.g., raising

	244 "CommandException: Inadequate temp space available to compress <your file>"

	245 during a gsutil cp -z operation), you can change where it writes these

	246 temp files by setting the TMPDIR environment variable. On Linux and MacOS

	247 you can do this either by running gsutil this way:

	248

	249 TMPDIR=/some/directory gsutil cp ...

	250

	251 or by adding this line to your ~/.bashrc file and then restarting the shell

	252 before running gsutil:

	253

	254 export TMPDIR=/some/directory

	255

	256 On Windows 7 you can change the TMPDIR environment variable from Start ->

	257 Computer -> System -> Advanced System Settings -> Environment Variables.

	258 You need to reboot after making this change for it to take effect. (Rebooting

	259 is not necessary after running the export command on Linux and MacOS.)

	260

	261

	262 <B>OPTIONS</B>

	263 -a canned_acl Sets named canned_acl when uploaded objects created. See

	264 'gsutil help acls' for further details.

	265

	266 -c If an error occurrs, continue to attempt to copy the remaining

	267 files.

	268

	269 -D Copy in "daisy chain" mode, i.e., copying between two buckets by

	270 hooking a download to an upload, via the machine where gsutil is

	271 run. By default, data are copied between two buckets "in the

	272 cloud", i.e., without needing to copy via the machine where

	273 gsutil runs. However, copy-in-the-cloud is not supported when

	274 copying between different locations (like US and EU) or between

	275 different storage classes (like STANDARD and

	276 DURABLE_REDUCED_AVAILABILITY). For these cases, you can use the

	277 -D option to copy data between buckets.

	278 Note: Daisy chain mode is automatically used when copying

	279 between providers (e.g., to copy data from Google Cloud Storage

	280 to another provider).

	281

	282 -e Exclude symlinks. When specified, symbolic links will not be

	283 copied.

	284

	285 -n No-clobber. When specified, existing files or objects at the

	286 destination will not be overwritten. Any items that are skipped

	287 by this option will be reported as being skipped. This option

	288 will perform an additional HEAD request to check if an item

	289 exists before attempting to upload the data. This will save

	290 retransmitting data, but the additional HTTP requests may make

	291 small object transfers slower and more expensive.

	292

	293 This option can be combined with the -c option to build a script

	294 that copies a large number of objects, allowing retries when

	295 some failures occur from which gsutil doesn't automatically

	296 recover, using a bash script like the following:

	297

	298 status=1

	299 while [ $status -ne 0 ] ; do

	300 gsutil cp -c -n -R ./dir gs://bucket

	301 status=$?

	302 done

	303

	304 The -c option will cause copying to continue after failures

	305 occur, and the -n option will cause objects already copied to be

	306 skipped on subsequent iterations. The loop will continue running

	307 as long as gsutil exits with a non-zero status (such a status

	308 indicates there was at least one failure during the gsutil run).

	309

	310 -p Causes ACLs to be preserved when copying in the cloud. Note that

	311 this option has performance and cost implications, because it

	312 is essentially performing three requests (getacl, cp, setacl).

	313 (The performance issue can be mitigated to some degree by

	314 using gsutil -m cp to cause parallel copying.)

	315

	316 You can avoid the additional performance and cost of using cp -p

	317 if you want all objects in the destination bucket to end up with

	318 the same ACL by setting a default ACL on that bucket instead of

	319 using cp -p. See "help gsutil setdefacl".

	320

	321 Note that it's not valid to specify both the -a and -p options

	322 together.

	323

	324 -q Causes copies to be performed quietly, i.e., without reporting

	325 progress indicators of files being copied. Errors are still

	326 reported. This option can be useful for running gsutil from a

	327 cron job that logs its output to a file, for which the only

	328 information desired in the log is failures.

	329

	330 -R, -r Causes directories, buckets, and bucket subdirectories to be

	331 copied recursively. If you neglect to use this option for

	332 an upload, gsutil will copy any files it finds and skip any

	333 directories. Similarly, neglecting to specify -R for a download

	334 will cause gsutil to copy any objects at the current bucket

	335 directory level, and skip any subdirectories.

	336

	337 -v Requests that the version-specific URI for each uploaded object

	338 be printed. Given this URI you can make future upload requests

	339 that are safe in the face of concurrent updates, because Google

	340 Cloud Storage will refuse to perform the update if the current

	341 object version doesn't match the version-specific URI. See

	342 'gsutil help versioning' for more details. Note: at present this

	343 option does not work correctly for objects copied "in the cloud"

	344 (e.g., gsutil cp gs://bucket/obj1 gs://bucket/obj2).

	345

	346 -z ext1,... Compresses file uploads with the given extensions. If you are

	347 uploading a large file with compressible content, such as

	348 a .js, .css, or .html file, you can gzip-compress the file

	349 during the upload process by specifying the -z <extensions>

	350 option. Compressing data before upload saves on usage charges

	351 because you are uploading a smaller amount of data.

	352

	353 When you specify the -z option, the data from your files is

	354 compressed before it is uploaded, but your actual files are left

	355 uncompressed on the local disk. The uploaded objects retain the

	356 original content type and name as the original files but are

	357 given a Content-Encoding header with the value "gzip" to

	358 indicate that the object data stored are compressed on the

	359 Google Cloud Storage servers.

	360

	361 For example, the following command:

	362

	363 gsutil cp -z html -a public-read cattypes.html gs://mycats

	364

	365 will do all of the following:

	366 - Upload as the object gs://mycats/cattypes.html (cp command)

	367 - Set the Content-Type to text/html (based on file extension)

	368 - Compress the data in the file cattypes.html (-z option)

	369 - Set the Content-Encoding to gzip (-z option)

	370 - Set the ACL to public-read (-a option)

	371 - If a user tries to view cattypes.html in a browser, the

	372 browser will know to uncompress the data based on the

	373 Content-Encoding header, and to render it as HTML based on

	374 the Content-Type header.

	375 """)

	376

	377 class CpCommand(Command):

	378 """

	379 Implementation of gsutil cp command.

	380

	381 Note that CpCommand is run for both gsutil cp and gsutil mv. The latter

	382 happens by MvCommand calling CpCommand and passing the hidden (undocumented)

	383 -M option. This allows the copy and remove needed for each mv to run

	384 together (rather than first running all the cp's and then all the rm's, as

	385 we originally had implemented), which in turn avoids the following problem

	386 with removing the wrong objects: starting with a bucket containing only

	387 the object gs://bucket/obj, say the user does:

	388 gsutil mv gs://bucket/* gs://bucket/d.txt

	389 If we ran all the cp's and then all the rm's and we didn't expand the wildcard

	390 first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,

	391 and the rm command would then remove that object. In the implementation

	392 prior to gsutil release 3.12 we avoided this by building a list of objects

	393 to process and then running the copies and then the removes; but building

	394 the list up front limits scalability (compared with the current approach

	395 of processing the bucket listing iterator on the fly).

	396 """

	397

	398 # Set default Content-Type type.

	399 DEFAULT_CONTENT_TYPE = 'application/octet-stream'

	400 USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False)

	401

	402 # Command specification (processed by parent class).

	403 command_spec = {

	404 # Name of command.

	405 COMMAND_NAME : 'cp',

	406 # List of command name aliases.

	407 COMMAND_NAME_ALIASES : ['copy'],

	408 # Min number of args required by this command.

	409 MIN_ARGS : 1,

	410 # Max number of args required by this command, or NO_MAX.

	411 MAX_ARGS : NO_MAX,

	412 # Getopt-style string specifying acceptable sub args.

	413 # -t is deprecated but leave intact for now to avoid breakage.

	414 SUPPORTED_SUB_ARGS : 'a:cDeIMNnpqrRtvz:',

	415 # True if file URIs acceptable for this command.

	416 FILE_URIS_OK : True,

	417 # True if provider-only URIs acceptable for this command.

	418 PROVIDER_URIS_OK : False,

	419 # Index in args of first URI arg.

	420 URIS_START_ARG : 0,

	421 # True if must configure gsutil before running command.

	422 CONFIG_REQUIRED : True,

	423 }

	424 help_spec = {

	425 # Name of command or auxiliary help info for which this help applies.

	426 HELP_NAME : 'cp',

	427 # List of help name aliases.

	428 HELP_NAME_ALIASES : ['copy'],

	429 # Type of help:

	430 HELP_TYPE : HelpType.COMMAND_HELP,

	431 # One line summary of this help.

	432 HELP_ONE_LINE_SUMMARY : 'Copy files and objects',

	433 # The full help text.

	434 HELP_TEXT : _detailed_help_text,

	435 }

	436

	437 def _CheckFinalMd5(self, key, file_name):

	438 """

	439 Checks that etag from server agrees with md5 computed after the

	440 download completes.

	441 """

	442 obj_md5 = key.etag.strip('"\'')

	443 file_md5 = None

	444

	445 if hasattr(key, 'md5') and key.md5:

	446 file_md5 = key.md5

	447 else:

	448 print 'Computing MD5 from scratch for resumed download'

	449

	450 # Open file in binary mode to avoid surprises in Windows.

	451 fp = open(file_name, 'rb')

	452 try:

	453 file_md5 = key.compute_md5(fp)[0]

	454 finally:

	455 fp.close()

	456

	457 if self.debug:

	458 print 'Checking file md5 against etag. (%s/%s)' % (file_md5, obj_md5)

	459 if file_md5 != obj_md5:

	460 # Checksums don't match - remove file and raise exception.

	461 os.unlink(file_name)

	462 raise CommandException(

	463 'File changed during download: md5 signature doesn\'t match '

	464 'etag (incorrect downloaded file deleted)')

	465

	466 def _CheckForDirFileConflict(self, exp_src_uri, dst_uri):

	467 """Checks whether copying exp_src_uri into dst_uri is not possible.

	468

	469 This happens if a directory exists in local file system where a file

	470 needs to go or vice versa. In that case we print an error message and

	471 exits. Example: if the file "./x" exists and you try to do:

	472 gsutil cp gs://mybucket/x/y .

	473 the request can't succeed because it requires a directory where

	474 the file x exists.

	475

	476 Note that we don't enforce any corresponding restrictions for buckets,

	477 because the flat namespace semantics for buckets doesn't prohibit such

	478 cases the way hierarchical file systems do. For example, if a bucket

	479 contains an object called gs://bucket/dir and then you run the command:

	480 gsutil cp file1 file2 gs://bucket/dir

	481 you'll end up with objects gs://bucket/dir, gs://bucket/dir/file1, and

	482 gs://bucket/dir/file2.

	483

	484 Args:

	485 exp_src_uri: Expanded source StorageUri of copy.

	486 dst_uri: Destination URI.

	487

	488 Raises:

	489 CommandException: if errors encountered.

	490 """

	491 if dst_uri.is_cloud_uri():

	492 # The problem can only happen for file destination URIs.

	493 return

	494 dst_path = dst_uri.object_name

	495 final_dir = os.path.dirname(dst_path)

	496 if os.path.isfile(final_dir):

	497 raise CommandException('Cannot retrieve %s because a file exists '

	498 'where a directory needs to be created (%s).' %

	499 (exp_src_uri, final_dir))

	500 if os.path.isdir(dst_path):

	501 raise CommandException('Cannot retrieve %s because a directory exists '

	502 '(%s) where the file needs to be created.' %

	503 (exp_src_uri, dst_path))

	504

	505 def _InsistDstUriNamesContainer(self, exp_dst_uri,

	506 have_existing_dst_container, command_name):

	507 """

	508 Raises an exception if URI doesn't name a directory, bucket, or bucket

	509 subdir, with special exception for cp -R (see comments below).

	510

	511 Args:

	512 exp_dst_uri: Wildcard-expanding dst_uri.

	513 have_existing_dst_container: bool indicator of whether exp_dst_uri

	514 names a container (directory, bucket, or existing bucket subdir).

	515 command_name: Name of command making call. May not be the same as

	516 self.command_name in the case of commands implemented atop other

	517 commands (like mv command).

	518

	519 Raises:

	520 CommandException: if the URI being checked does not name a container.

	521 """

	522 if exp_dst_uri.is_file_uri():

	523 ok = exp_dst_uri.names_directory()

	524 else:

	525 if have_existing_dst_container:

	526 ok = True

	527 else:

	528 # It's ok to specify a non-existing bucket subdir, for example:

	529 # gsutil cp -R dir gs://bucket/abc

	530 # where gs://bucket/abc isn't an existing subdir.

	531 ok = exp_dst_uri.names_object()

	532 if not ok:

	533 raise CommandException('Destination URI must name a directory, bucket, '

	534 'or bucket\nsubdirectory for the multiple '

	535 'source form of the %s command.' % command_name)

	536

	537 class _FileCopyCallbackHandler(object):

	538 """Outputs progress info for large copy requests."""

	539

	540 def __init__(self, upload):

	541 if upload:

	542 self.announce_text = 'Uploading'

	543 else:

	544 self.announce_text = 'Downloading'

	545

	546 def call(self, total_bytes_transferred, total_size):

	547 sys.stderr.write('%s: %s/%s \r' % (

	548 self.announce_text,

	549 MakeHumanReadable(total_bytes_transferred),

	550 MakeHumanReadable(total_size)))

	551 if total_bytes_transferred == total_size:

	552 sys.stderr.write('\n')

	553

	554 class _StreamCopyCallbackHandler(object):

	555 """Outputs progress info for Stream copy to cloud.

	556 Total Size of the stream is not known, so we output

	557 only the bytes transferred.

	558 """

	559

	560 def call(self, total_bytes_transferred, total_size):

	561 sys.stderr.write('Uploading: %s \r' % (

	562 MakeHumanReadable(total_bytes_transferred)))

	563 if total_size and total_bytes_transferred == total_size:

	564 sys.stderr.write('\n')

	565

	566 def _GetTransferHandlers(self, dst_uri, size, upload):

	567 """

	568 Selects upload/download and callback handlers.

	569

	570 We use a callback handler that shows a simple textual progress indicator

	571 if size is above the configurable threshold.

	572

	573 We use a resumable transfer handler if size is >= the configurable

	574 threshold and resumable transfers are supported by the given provider.

	575 boto supports resumable downloads for all providers, but resumable

	576 uploads are currently only supported by GS.

	577

	578 Args:

	579 dst_uri: the destination URI.

	580 size: size of file (object) being uploaded (downloaded).

	581 upload: bool indication of whether transfer is an upload.

	582 """

	583 config = boto.config

	584 resumable_threshold = config.getint('GSUtil', 'resumable_threshold', TWO_MB)

	585 transfer_handler = None

	586 cb = None

	587 num_cb = None

	588

	589 # Checks whether the destination file is a "special" file, like /dev/null on

	590 # Linux platforms or null on Windows platforms, so we can disable resumable

	591 # download support since the file size of the destination won't ever be

	592 # correct.

	593 dst_is_special = False

	594 if dst_uri.is_file_uri():

	595 # Check explicitly first because os.stat doesn't work on 'nul' in Windows.

	596 if dst_uri.object_name == os.devnull:

	597 dst_is_special = True

	598 try:

	599 mode = os.stat(dst_uri.object_name).st_mode

	600 if stat.S_ISCHR(mode):

	601 dst_is_special = True

	602 except OSError:

	603 pass

	604

	605 if size >= resumable_threshold and not dst_is_special:

	606 if not self.quiet:

	607 cb = self._FileCopyCallbackHandler(upload).call

	608 num_cb = int(size / TWO_MB)

	609

	610 resumable_tracker_dir = config.get(

	611 'GSUtil', 'resumable_tracker_dir',

	612 os.path.expanduser('~' + os.sep + '.gsutil'))

	613 if not os.path.exists(resumable_tracker_dir):

	614 os.makedirs(resumable_tracker_dir)

	615

	616 if upload:

	617 # Encode the dest bucket and object name into the tracker file name.

	618 res_tracker_file_name = (

	619 re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s.url' %

	620 (dst_uri.bucket_name, dst_uri.object_name)))

	621 else:

	622 # Encode the fully-qualified dest file name into the tracker file name.

	623 res_tracker_file_name = (

	624 re.sub('[/\\\\]', '_', 'resumable_download__%s.etag' %

	625 (os.path.realpath(dst_uri.object_name))))

	626

	627 res_tracker_file_name = _hash_filename(res_tracker_file_name)

	628 tracker_file = '%s%s%s' % (resumable_tracker_dir, os.sep,

	629 res_tracker_file_name)

	630 if upload:

	631 if dst_uri.scheme == 'gs':

	632 transfer_handler = ResumableUploadHandler(tracker_file)

	633 else:

	634 transfer_handler = ResumableDownloadHandler(tracker_file)

	635

	636 return (cb, num_cb, transfer_handler)

	637

	638 def _LogCopyOperation(self, src_uri, dst_uri, headers):

	639 """

	640 Logs copy operation being performed, including Content-Type if appropriate.

	641 """

	642 if self.quiet:

	643 return

	644 if 'Content-Type' in headers and dst_uri.is_cloud_uri():

	645 content_type_msg = ' [Content-Type=%s]' % headers['Content-Type']

	646 else:

	647 content_type_msg = ''

	648 if src_uri.is_stream():

	649 self.THREADED_LOGGER.info('Copying from <STDIN>%s...', content_type_msg)

	650 else:

	651 self.THREADED_LOGGER.info('Copying %s%s...', src_uri, content_type_msg)

	652

	653 # We pass the headers explicitly to this call instead of using self.headers

	654 # so we can set different metadata (like Content-Type type) for each object.

	655 def _CopyObjToObjInTheCloud(self, src_key, src_uri, dst_uri, headers):

	656 """Performs copy-in-the cloud from specified src to dest object.

	657

	658 Args:

	659 src_key: Source Key.

	660 src_uri: Source StorageUri.

	661 dst_uri: Destination StorageUri.

	662 headers: A copy of the headers dictionary.

	663

	664 Returns:

	665 (elapsed_time, bytes_transferred, dst_uri) excluding overhead like initial

	666 HEAD. Note: At present copy-in-the-cloud doesn't return the generation of

	667 the created object, so the returned URI is actually not version-specific

	668 (unlike other cp cases).

	669

	670 Raises:

	671 CommandException: if errors encountered.

	672 """

	673 self._SetContentTypeHeader(src_uri, headers)

	674 self._LogCopyOperation(src_uri, dst_uri, headers)

	675 # Do Object -> object copy within same provider (uses

	676 # x-<provider>-copy-source metadata HTTP header to request copying at the

	677 # server).

	678 src_bucket = src_uri.get_bucket(False, headers)

	679 preserve_acl = False

	680 canned_acl = None

	681 if self.sub_opts:

	682 for o, a in self.sub_opts:

	683 if o == '-a':

	684 canned_acls = dst_uri.canned_acls()

	685 if a not in canned_acls:

	686 raise CommandException('Invalid canned ACL "%s".' % a)

	687 canned_acl = a

	688 headers[dst_uri.get_provider().acl_header] = canned_acl

	689 if o == '-p':

	690 preserve_acl = True

	691 if preserve_acl and canned_acl:

	692 raise CommandException(

	693 'Specifying both the -p and -a options together is invalid.')

	694 start_time = time.time()

	695 # Pass headers in headers param not metadata param, so boto will copy

	696 # existing key's metadata and just set the additional headers specified

	697 # in the headers param (rather than using the headers to override existing

	698 # metadata). In particular this allows us to copy the existing key's

	699 # Content-Type and other metadata users need while still being able to

	700 # set headers the API needs (like x-goog-project-id). Note that this means

	701 # you can't do something like:

	702 # gsutil cp -t Content-Type text/html gs://bucket/* gs://bucket2

	703 # to change the Content-Type while copying.

	704

	705 try:

	706 dst_key = dst_uri.copy_key(

	707 src_bucket.name, src_uri.object_name, preserve_acl=False,

	708 headers=headers, src_version_id=src_uri.version_id,

	709 src_generation=src_uri.generation)

	710 except GSResponseError as e:

	711 exc_name, error_detail = ExtractErrorDetail(e)

	712 if (exc_name == 'GSResponseError'

	713 and ('Copy-in-the-cloud disallowed' in error_detail)):

	714 raise CommandException('%s.\nNote: you can copy between locations '

	715 'and between storage classes by using the '

	716 'gsutil cp -D option.' % error_detail)

	717 else:

	718 raise

	719 end_time = time.time()

	720 return (end_time - start_time, src_key.size,

	721 dst_uri.clone_replace_key(dst_key))

	722

	723 def _CheckFreeSpace(self, path):

	724 """Return path/drive free space (in bytes)."""

	725 if platform.system() == 'Windows':

	726 from ctypes import c_int, c_uint64, c_wchar_p, windll, POINTER, WINFUNCTYP E, WinError

	727 try:

	728 GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_wchar_p, POINTER(c_uint64),

	729 POINTER(c_uint64), POINTER(c_uint64))

	730 GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(

	731 ('GetDiskFreeSpaceExW', windll.kernel32), (

	732 (1, 'lpszPathName'),

	733 (2, 'lpFreeUserSpace'),

	734 (2, 'lpTotalSpace'),

	735 (2, 'lpFreeSpace'),))

	736 except AttributeError:

	737 GetDiskFreeSpaceEx = WINFUNCTYPE(c_int, c_char_p, POINTER(c_uint64),

	738 POINTER(c_uint64), POINTER(c_uint64))

	739 GetDiskFreeSpaceEx = GetDiskFreeSpaceEx(

	740 ('GetDiskFreeSpaceExA', windll.kernel32), (

	741 (1, 'lpszPathName'),

	742 (2, 'lpFreeUserSpace'),

	743 (2, 'lpTotalSpace'),

	744 (2, 'lpFreeSpace'),))

	745

	746 def GetDiskFreeSpaceEx_errcheck(result, func, args):

	747 if not result:

	748 raise WinError()

	749 return args[1].value

	750 GetDiskFreeSpaceEx.errcheck = GetDiskFreeSpaceEx_errcheck

	751

	752 return GetDiskFreeSpaceEx(os.getenv('SystemDrive'))

	753 else:

	754 (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path)

	755 return f_frsize * f_bavail

	756

	757 def _PerformResumableUploadIfApplies(self, fp, dst_uri, canned_acl, headers):

	758 """

	759 Performs resumable upload if supported by provider and file is above

	760 threshold, else performs non-resumable upload.

	761

	762 Returns (elapsed_time, bytes_transferred, version-specific dst_uri).

	763 """

	764 start_time = time.time()

	765 # Determine file size different ways for case where fp is actually a wrapper

	766 # around a Key vs an actual file.

	767 if isinstance(fp, KeyFile):

	768 file_size = fp.getkey().size

	769 else:

	770 file_size = os.path.getsize(fp.name)

	771 (cb, num_cb, res_upload_handler) = self._GetTransferHandlers(

	772 dst_uri, file_size, True)

	773 if dst_uri.scheme == 'gs':

	774 # Resumable upload protocol is Google Cloud Storage-specific.

	775 dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,

	776 cb=cb, num_cb=num_cb,

	777 res_upload_handler=res_upload_handler)

	778 else:

	779 dst_uri.set_contents_from_file(fp, headers, policy=canned_acl,

	780 cb=cb, num_cb=num_cb)

	781 if res_upload_handler:

	782 # ResumableUploadHandler does not update upload_start_point from its

	783 # initial value of -1 if transferring the whole file, so clamp at 0

	784 bytes_transferred = file_size - max(

	785 res_upload_handler.upload_start_point, 0)

	786 else:

	787 bytes_transferred = file_size

	788 end_time = time.time()

	789 return (end_time - start_time, bytes_transferred, dst_uri)

	790

	791 def _PerformStreamingUpload(self, fp, dst_uri, headers, canned_acl=None):

	792 """

	793 Performs a streaming upload to the cloud.

	794

	795 Args:

	796 fp: The file whose contents to upload.

	797 dst_uri: Destination StorageUri.

	798 headers: A copy of the headers dictionary.

	799 canned_acl: Optional canned ACL to set on the object.

	800

	801 Returns (elapsed_time, bytes_transferred, version-specific dst_uri).

	802 """

	803 start_time = time.time()

	804

	805 if self.quiet:

	806 cb = None

	807 else:

	808 cb = self._StreamCopyCallbackHandler().call

	809 dst_uri.set_contents_from_stream(

	810 fp, headers, policy=canned_acl, cb=cb)

	811 try:

	812 bytes_transferred = fp.tell()

	813 except:

	814 bytes_transferred = 0

	815

	816 end_time = time.time()

	817 return (end_time - start_time, bytes_transferred, dst_uri)

	818

	819 def _SetContentTypeHeader(self, src_uri, headers):

	820 """

	821 Sets content type header to value specified in '-h Content-Type' option (if

	822 specified); else sets using Content-Type detection.

	823 """

	824 if 'Content-Type' in headers:

	825 # If empty string specified (i.e., -h "Content-Type:") set header to None,

	826 # which will inhibit boto from sending the CT header. Otherwise, boto will

	827 # pass through the user specified CT header.

	828 if not headers['Content-Type']:

	829 headers['Content-Type'] = None

	830 # else we'll keep the value passed in via -h option (not performing

	831 # content type detection).

	832 else:

	833 # Only do content type recognition is src_uri is a file. Object-to-object

	834 # copies with no -h Content-Type specified re-use the content type of the

	835 # source object.

	836 if src_uri.is_file_uri():

	837 object_name = src_uri.object_name

	838 content_type = None

	839 # Streams (denoted by '-') are expected to be 'application/octet-stream'

	840 # and 'file' would partially consume them.

	841 if object_name != '-':

	842 if self.USE_MAGICFILE:

	843 p = subprocess.Popen(['file', '--mime-type', object_name],

	844 stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	845 output, error = p.communicate()

	846 if p.returncode != 0 or error:

	847 raise CommandException(

	848 'Encountered error running "file --mime-type %s" '

	849 '(returncode=%d).\n%s' % (object_name, p.returncode, error))

	850 # Parse output by removing line delimiter and splitting on last ":

	851 content_type = output.rstrip().rpartition(': ')[2]

	852 else:

	853 content_type = mimetypes.guess_type(object_name)[0]

	854 if not content_type:

	855 content_type = self.DEFAULT_CONTENT_TYPE

	856 headers['Content-Type'] = content_type

	857

	858 def _UploadFileToObject(self, src_key, src_uri, dst_uri, headers,

	859 should_log=True):

	860 """Uploads a local file to an object.

	861

	862 Args:

	863 src_key: Source StorageUri. Must be a file URI.

	864 src_uri: Source StorageUri.

	865 dst_uri: Destination StorageUri.

	866 headers: The headers dictionary.

	867 should_log: bool indicator whether we should log this operation.

	868 Returns:

	869 (elapsed_time, bytes_transferred, version-specific dst_uri), excluding

	870 overhead like initial HEAD.

	871

	872 Raises:

	873 CommandException: if errors encountered.

	874 """

	875 gzip_exts = []

	876 canned_acl = None

	877 if self.sub_opts:

	878 for o, a in self.sub_opts:

	879 if o == '-a':

	880 canned_acls = dst_uri.canned_acls()

	881 if a not in canned_acls:

	882 raise CommandException('Invalid canned ACL "%s".' % a)

	883 canned_acl = a

	884 elif o == '-t':

	885 print('Warning: -t is deprecated, and will be removed in the future. '

	886 'Content type\ndetection is '

	887 'now performed by default, unless inhibited by specifying '

	888 'a\nContent-Type header via the -h option.')

	889 elif o == '-z':

	890 gzip_exts = a.split(',')

	891

	892 self._SetContentTypeHeader(src_uri, headers)

	893 if should_log:

	894 self._LogCopyOperation(src_uri, dst_uri, headers)

	895

	896 if 'Content-Language' not in headers:

	897 content_language = config.get_value('GSUtil', 'content_language')

	898 if content_language:

	899 headers['Content-Language'] = content_language

	900

	901 fname_parts = src_uri.object_name.split('.')

	902 if len(fname_parts) > 1 and fname_parts[-1] in gzip_exts:

	903 if self.debug:

	904 print 'Compressing %s (to tmp)...' % src_key

	905 (gzip_fh, gzip_path) = tempfile.mkstemp()

	906 gzip_fp = None

	907 try:

	908 # Check for temp space. Assume the compressed object is at most 2x

	909 # the size of the object (normally should compress to smaller than

	910 # the object)

	911 if (self._CheckFreeSpace(gzip_path)

	912 < 2*int(os.path.getsize(src_key.name))):

	913 raise CommandException('Inadequate temp space available to compress '

	914 '%s' % src_key.name)

	915 gzip_fp = gzip.open(gzip_path, 'wb')

	916 gzip_fp.writelines(src_key.fp)

	917 finally:

	918 if gzip_fp:

	919 gzip_fp.close()

	920 os.close(gzip_fh)

	921 headers['Content-Encoding'] = 'gzip'

	922 gzip_fp = open(gzip_path, 'rb')

	923 try:

	924 (elapsed_time, bytes_transferred, result_uri) = (

	925 self._PerformResumableUploadIfApplies(gzip_fp, dst_uri,

	926 canned_acl, headers))

	927 finally:

	928 gzip_fp.close()

	929 try:

	930 os.unlink(gzip_path)

	931 # Windows sometimes complains the temp file is locked when you try to

	932 # delete it.

	933 except Exception, e:

	934 pass

	935 elif (src_key.is_stream()

	936 and dst_uri.get_provider().supports_chunked_transfer()):

	937 (elapsed_time, bytes_transferred, result_uri) = (

	938 self._PerformStreamingUpload(src_key.fp, dst_uri, headers,

	939 canned_acl))

	940 else:

	941 if src_key.is_stream():

	942 # For Providers that doesn't support chunked Transfers

	943 tmp = tempfile.NamedTemporaryFile()

	944 file_uri = self.suri_builder.StorageUri('file://%s' % tmp.name)

	945 try:

	946 file_uri.new_key(False, headers).set_contents_from_file(

	947 src_key.fp, headers)

	948 src_key = file_uri.get_key()

	949 finally:

	950 file_uri.close()

	951 try:

	952 (elapsed_time, bytes_transferred, result_uri) = (

	953 self._PerformResumableUploadIfApplies(src_key.fp, dst_uri,

	954 canned_acl, headers))

	955 finally:

	956 if src_key.is_stream():

	957 tmp.close()

	958 else:

	959 src_key.close()

	960

	961 return (elapsed_time, bytes_transferred, result_uri)

	962

	963 def _DownloadObjectToFile(self, src_key, src_uri, dst_uri, headers,

	964 should_log=True):

	965 """Downloads an object to a local file.

	966

	967 Args:

	968 src_key: Source StorageUri. Must be a file URI.

	969 src_uri: Source StorageUri.

	970 dst_uri: Destination StorageUri.

	971 headers: The headers dictionary.

	972 should_log: bool indicator whether we should log this operation.

	973 Returns:

	974 (elapsed_time, bytes_transferred, dst_uri), excluding overhead like

	975 initial HEAD.

	976

	977 Raises:

	978 CommandException: if errors encountered.

	979 """

	980 if should_log:

	981 self._LogCopyOperation(src_uri, dst_uri, headers)

	982 (cb, num_cb, res_download_handler) = self._GetTransferHandlers(

	983 dst_uri, src_key.size, False)

	984 file_name = dst_uri.object_name

	985 dir_name = os.path.dirname(file_name)

	986 if dir_name and not os.path.exists(dir_name):

	987 # Do dir creation in try block so can ignore case where dir already

	988 # exists. This is needed to avoid a race condition when running gsutil

	989 # -m cp.

	990 try:

	991 os.makedirs(dir_name)

	992 except OSError, e:

	993 if e.errno != errno.EEXIST:

	994 raise

	995 # For gzipped objects not named *.gz download to a temp file and unzip.

	996 if (hasattr(src_key, 'content_encoding')

	997 and src_key.content_encoding == 'gzip'

	998 and not file_name.endswith('.gz')):

	999 # We can't use tempfile.mkstemp() here because we need a predictable

	1000 # filename for resumable downloads.

	1001 download_file_name = '%s_.gztmp' % file_name

	1002 need_to_unzip = True

	1003 else:

	1004 download_file_name = file_name

	1005 need_to_unzip = False

	1006 fp = None

	1007 try:

	1008 if res_download_handler:

	1009 fp = open(download_file_name, 'ab')

	1010 else:

	1011 fp = open(download_file_name, 'wb')

	1012 start_time = time.time()

	1013 src_key.get_contents_to_file(fp, headers, cb=cb, num_cb=num_cb,

	1014 res_download_handler=res_download_handler)

	1015 # If a custom test method is defined, call it here. For the copy command,

	1016 # test methods are expected to take one argument: an open file pointer,

	1017 # and are used to perturb the open file during download to exercise

	1018 # download error detection.

	1019 if self.test_method:

	1020 self.test_method(fp)

	1021 end_time = time.time()

	1022 finally:

	1023 if fp:

	1024 fp.close()

	1025

	1026 # Discard the md5 if we are resuming a partial download.

	1027 if res_download_handler and res_download_handler.download_start_point:

	1028 src_key.md5 = None

	1029

	1030 # Verify downloaded file checksum matched source object's checksum.

	1031 self._CheckFinalMd5(src_key, download_file_name)

	1032

	1033 if res_download_handler:

	1034 bytes_transferred = (

	1035 src_key.size - res_download_handler.download_start_point)

	1036 else:

	1037 bytes_transferred = src_key.size

	1038 if need_to_unzip:

	1039 # Log that we're uncompressing if the file is big enough that

	1040 # decompressing would make it look like the transfer "stalled" at the end.

	1041 if not self.quiet and bytes_transferred > 10 * 1024 * 1024:

	1042 self.THREADED_LOGGER.info('Uncompressing downloaded tmp file to %s...',

	1043 file_name)

	1044 # Downloaded gzipped file to a filename w/o .gz extension, so unzip.

	1045 f_in = gzip.open(download_file_name, 'rb')

	1046 f_out = open(file_name, 'wb')

	1047 try:

	1048 while True:

	1049 data = f_in.read(8192)

	1050 if not data:

	1051 break

	1052 f_out.write(data)

	1053 finally:

	1054 f_out.close()

	1055 f_in.close()

	1056 os.unlink(download_file_name)

	1057 return (end_time - start_time, bytes_transferred, dst_uri)

	1058

	1059 def _PerformDownloadToStream(self, src_key, src_uri, str_fp, headers):

	1060 (cb, num_cb, res_download_handler) = self._GetTransferHandlers(

	1061 src_uri, src_key.size, False)

	1062 start_time = time.time()

	1063 src_key.get_contents_to_file(str_fp, headers, cb=cb, num_cb=num_cb)

	1064 end_time = time.time()

	1065 bytes_transferred = src_key.size

	1066 end_time = time.time()

	1067 return (end_time - start_time, bytes_transferred)

	1068

	1069 def _CopyFileToFile(self, src_key, src_uri, dst_uri, headers):

	1070 """Copies a local file to a local file.

	1071

	1072 Args:

	1073 src_key: Source StorageUri. Must be a file URI.

	1074 src_uri: Source StorageUri.

	1075 dst_uri: Destination StorageUri.

	1076 headers: The headers dictionary.

	1077 Returns:

	1078 (elapsed_time, bytes_transferred, dst_uri), excluding

	1079 overhead like initial HEAD.

	1080

	1081 Raises:

	1082 CommandException: if errors encountered.

	1083 """

	1084 self._LogCopyOperation(src_uri, dst_uri, headers)

	1085 dst_key = dst_uri.new_key(False, headers)

	1086 start_time = time.time()

	1087 dst_key.set_contents_from_file(src_key.fp, headers)

	1088 end_time = time.time()

	1089 return (end_time - start_time, os.path.getsize(src_key.fp.name), dst_uri)

	1090

	1091 def _CopyObjToObjDaisyChainMode(self, src_key, src_uri, dst_uri, headers):

	1092 """Copies from src_uri to dst_uri in "daisy chain" mode.

	1093 See -D OPTION documentation about what daisy chain mode is.

	1094

	1095 Args:

	1096 src_key: Source Key.

	1097 src_uri: Source StorageUri.

	1098 dst_uri: Destination StorageUri.

	1099 headers: A copy of the headers dictionary.

	1100

	1101 Returns:

	1102 (elapsed_time, bytes_transferred, version-specific dst_uri) excluding

	1103 overhead like initial HEAD.

	1104

	1105 Raises:

	1106 CommandException: if errors encountered.

	1107 """

	1108 self._SetContentTypeHeader(src_uri, headers)

	1109 self._LogCopyOperation(src_uri, dst_uri, headers)

	1110 canned_acl = None

	1111 if self.sub_opts:

	1112 for o, a in self.sub_opts:

	1113 if o == '-a':

	1114 canned_acls = dst_uri.canned_acls()

	1115 if a not in canned_acls:

	1116 raise CommandException('Invalid canned ACL "%s".' % a)

	1117 canned_acl = a

	1118 elif o == '-p':

	1119 # We don't attempt to preserve ACLs across providers because

	1120 # GCS and S3 support different ACLs and disjoint principals.

	1121 raise NotImplementedError('Cross-provider cp -p not supported')

	1122 return self._PerformResumableUploadIfApplies(KeyFile(src_key), dst_uri,

	1123 canned_acl, headers)

	1124

	1125 def _PerformCopy(self, src_uri, dst_uri):

	1126 """Performs copy from src_uri to dst_uri, handling various special cases.

	1127

	1128 Args:

	1129 src_uri: Source StorageUri.

	1130 dst_uri: Destination StorageUri.

	1131

	1132 Returns:

	1133 (elapsed_time, bytes_transferred, version-specific dst_uri) excluding

	1134 overhead like initial HEAD.

	1135

	1136 Raises:

	1137 CommandException: if errors encountered.

	1138 """

	1139 # Make a copy of the input headers each time so we can set a different

	1140 # content type for each object.

	1141 if self.headers:

	1142 headers = self.headers.copy()

	1143 else:

	1144 headers = {}

	1145

	1146 src_key = src_uri.get_key(False, headers)

	1147 if not src_key:

	1148 raise CommandException('"%s" does not exist.' % src_uri)

	1149

	1150 # On Windows, stdin is opened as text mode instead of binary which causes

	1151 # problems when piping a binary file, so this switches it to binary mode.

	1152 if IS_WINDOWS and src_uri.is_file_uri() and src_key.is_stream():

	1153 import msvcrt

	1154 msvcrt.setmode(src_key.fp.fileno(), os.O_BINARY)

	1155

	1156 if self.no_clobber:

	1157 # There are two checks to prevent clobbering:

	1158 # 1) The first check is to see if the item

	1159 # already exists at the destination and prevent the upload/download

	1160 # from happening. This is done by the exists() call.

	1161 # 2) The second check is only relevant if we are writing to gs. We can

	1162 # enforce that the server only writes the object if it doesn't exist

	1163 # by specifying the header below. This check only happens at the

	1164 # server after the complete file has been uploaded. We specify this

	1165 # header to prevent a race condition where a destination file may

	1166 # be created after the first check and before the file is fully

	1167 # uploaded.

	1168 # In order to save on unnecessary uploads/downloads we perform both

	1169 # checks. However, this may come at the cost of additional HTTP calls.

	1170 if dst_uri.exists(headers):

	1171 if not self.quiet:

	1172 self.THREADED_LOGGER.info('Skipping existing item: %s' %

	1173 dst_uri.uri)

	1174 return (0, 0, None)

	1175 if dst_uri.is_cloud_uri() and dst_uri.scheme == 'gs':

	1176 headers['x-goog-if-generation-match'] = '0'

	1177

	1178 if src_uri.is_cloud_uri() and dst_uri.is_cloud_uri():

	1179 if src_uri.scheme == dst_uri.scheme and not self.daisy_chain:

	1180 return self._CopyObjToObjInTheCloud(src_key, src_uri, dst_uri, headers)

	1181 else:

	1182 return self._CopyObjToObjDaisyChainMode(src_key, src_uri, dst_uri,

	1183 headers)

	1184 elif src_uri.is_file_uri() and dst_uri.is_cloud_uri():

	1185 return self._UploadFileToObject(src_key, src_uri, dst_uri, headers)

	1186 elif src_uri.is_cloud_uri() and dst_uri.is_file_uri():

	1187 return self._DownloadObjectToFile(src_key, src_uri, dst_uri, headers)

	1188 elif src_uri.is_file_uri() and dst_uri.is_file_uri():

	1189 return self._CopyFileToFile(src_key, src_uri, dst_uri, headers)

	1190 else:

	1191 raise CommandException('Unexpected src/dest case')

	1192

	1193 def _ExpandDstUri(self, dst_uri_str):

	1194 """

	1195 Expands wildcard if present in dst_uri_str.

	1196

	1197 Args:

	1198 dst_uri_str: String representation of requested dst_uri.

	1199

	1200 Returns:

	1201 (exp_dst_uri, have_existing_dst_container)

	1202 where have_existing_dst_container is a bool indicating whether

	1203 exp_dst_uri names an existing directory, bucket, or bucket subdirectory.

	1204

	1205 Raises:

	1206 CommandException: if dst_uri_str matched more than 1 URI.

	1207 """

	1208 dst_uri = self.suri_builder.StorageUri(dst_uri_str)

	1209

	1210 # Handle wildcarded dst_uri case.

	1211 if ContainsWildcard(dst_uri):

	1212 blr_expansion = list(self.WildcardIterator(dst_uri))

	1213 if len(blr_expansion) != 1:

	1214 raise CommandException('Destination (%s) must match exactly 1 URI' %

	1215 dst_uri_str)

	1216 blr = blr_expansion[0]

	1217 uri = blr.GetUri()

	1218 if uri.is_cloud_uri():

	1219 return (uri, uri.names_bucket() or blr.HasPrefix()

	1220 or blr.GetKey().endswith('/'))

	1221 else:

	1222 return (uri, uri.names_directory())

	1223

	1224 # Handle non-wildcarded dst_uri:

	1225 if dst_uri.is_file_uri():

	1226 return (dst_uri, dst_uri.names_directory())

	1227 if dst_uri.names_bucket():

	1228 return (dst_uri, True)

	1229 # For object URIs check 3 cases: (a) if the name ends with '/' treat as a

	1230 # subdir; else, perform a wildcard expansion with dst_uri + "*" and then

	1231 # find if (b) there's a Prefix matching dst_uri, or (c) name is of form

	1232 # dir_$folder$ (and in both these cases also treat dir as a subdir).

	1233 if dst_uri.is_cloud_uri() and dst_uri_str.endswith('/'):

	1234 return (dst_uri, True)

	1235 blr_expansion = list(self.WildcardIterator(

	1236 '%s*' % dst_uri_str.rstrip(dst_uri.delim)))

	1237 for blr in blr_expansion:

	1238 if blr.GetRStrippedUriString().endswith('_$folder$'):

	1239 return (dst_uri, True)

	1240 if blr.GetRStrippedUriString() == dst_uri_str.rstrip(dst_uri.delim):

	1241 return (dst_uri, blr.HasPrefix())

	1242 return (dst_uri, False)

	1243

	1244 def _ConstructDstUri(self, src_uri, exp_src_uri,

	1245 src_uri_names_container, src_uri_expands_to_multi,

	1246 have_multiple_srcs, exp_dst_uri,

	1247 have_existing_dest_subdir):

	1248 """

	1249 Constructs the destination URI for a given exp_src_uri/exp_dst_uri pair,

	1250 using context-dependent naming rules that mimic Linux cp and mv behavior.

	1251

	1252 Args:

	1253 src_uri: src_uri to be copied.

	1254 exp_src_uri: Single StorageUri from wildcard expansion of src_uri.

	1255 src_uri_names_container: True if src_uri names a container (including the

	1256 case of a wildcard-named bucket subdir (like gs://bucket/abc,

	1257 where gs://bucket/abc/* matched some objects). Note that this is

	1258 additional semantics tha src_uri.names_container() doesn't understand

	1259 because the latter only understands StorageUris, not wildcards.

	1260 src_uri_expands_to_multi: True if src_uri expanded to multiple URIs.

	1261 have_multiple_srcs: True if this is a multi-source request. This can be

	1262 true if src_uri wildcard-expanded to multiple URIs or if there were

	1263 multiple source URIs in the request.

	1264 exp_dst_uri: the expanded StorageUri requested for the cp destination.

	1265 Final written path is constructed from this plus a context-dependent

	1266 variant of src_uri.

	1267 have_existing_dest_subdir: bool indicator whether dest is an existing

	1268 subdirectory.

	1269

	1270 Returns:

	1271 StorageUri to use for copy.

	1272

	1273 Raises:

	1274 CommandException if destination object name not specified for

	1275 source and source is a stream.

	1276 """

	1277 if self._ShouldTreatDstUriAsSingleton(

	1278 have_multiple_srcs, have_existing_dest_subdir, exp_dst_uri):

	1279 # We're copying one file or object to one file or object.

	1280 return exp_dst_uri

	1281

	1282 if exp_src_uri.is_stream():

	1283 if exp_dst_uri.names_container():

	1284 raise CommandException('Destination object name needed when '

	1285 'source is a stream')

	1286 return exp_dst_uri

	1287

	1288 if not self.recursion_requested and not have_multiple_srcs:

	1289 # We're copying one file or object to a subdirectory. Append final comp

	1290 # of exp_src_uri to exp_dst_uri.

	1291 src_final_comp = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]

	1292 return self.suri_builder.StorageUri('%s%s%s' % (

	1293 exp_dst_uri.uri.rstrip(exp_dst_uri.delim), exp_dst_uri.delim,

	1294 src_final_comp))

	1295

	1296 # Else we're copying multiple sources to a directory, bucket, or a bucket

	1297 # "sub-directory".

	1298

	1299 # Ensure exp_dst_uri ends in delim char if we're doing a multi-src copy or

	1300 # a copy to a directory. (The check for copying to a directory needs

	1301 # special-case handling so that the command:

	1302 # gsutil cp gs://bucket/obj dir

	1303 # will turn into file://dir/ instead of file://dir -- the latter would cause

	1304 # the file "dirobj" to be created.)

	1305 # Note: need to check have_multiple_srcs or src_uri.names_container()

	1306 # because src_uri could be a bucket containing a single object, named

	1307 # as gs://bucket.

	1308 if ((have_multiple_srcs or src_uri.names_container()

	1309 or os.path.isdir(exp_dst_uri.object_name))

	1310 and not exp_dst_uri.uri.endswith(exp_dst_uri.delim)):

	1311 exp_dst_uri = exp_dst_uri.clone_replace_name(

	1312 '%s%s' % (exp_dst_uri.object_name, exp_dst_uri.delim)

	1313 )

	1314

	1315 # Making naming behavior match how things work with local Linux cp and mv

	1316 # operations depends on many factors, including whether the destination is a

	1317 # container, the plurality of the source(s), and whether the mv command is

	1318 # being used:

	1319 # 1. For the "mv" command that specifies a non-existent destination subdir,

	1320 # renaming should occur at the level of the src subdir, vs appending that

	1321 # subdir beneath the dst subdir like is done for copying. For example:

	1322 # gsutil rm -R gs://bucket

	1323 # gsutil cp -R dir1 gs://bucket

	1324 # gsutil cp -R dir2 gs://bucket/subdir1

	1325 # gsutil mv gs://bucket/subdir1 gs://bucket/subdir2

	1326 # would (if using cp naming behavior) end up with paths like:

	1327 # gs://bucket/subdir2/subdir1/dir2/.svn/all-wcprops

	1328 # whereas mv naming behavior should result in:

	1329 # gs://bucket/subdir2/dir2/.svn/all-wcprops

	1330 # 2. Copying from directories, buckets, or bucket subdirs should result in

	1331 # objects/files mirroring the source directory hierarchy. For example:

	1332 # gsutil cp dir1/dir2 gs://bucket

	1333 # should create the object gs://bucket/dir2/file2, assuming dir1/dir2

	1334 # contains file2).

	1335 # To be consistent with Linux cp behavior, there's one more wrinkle when

	1336 # working with subdirs: The resulting object names depend on whether the

	1337 # destination subdirectory exists. For example, if gs://bucket/subdir

	1338 # exists, the command:

	1339 # gsutil cp -R dir1/dir2 gs://bucket/subdir

	1340 # should create objects named like gs://bucket/subdir/dir2/a/b/c. In

	1341 # contrast, if gs://bucket/subdir does not exist, this same command

	1342 # should create objects named like gs://bucket/subdir/a/b/c.

	1343 # 3. Copying individual files or objects to dirs, buckets or bucket subdirs

	1344 # should result in objects/files named by the final source file name

	1345 # component. Example:

	1346 # gsutil cp dir1/*.txt gs://bucket

	1347 # should create the objects gs://bucket/f1.txt and gs://bucket/f2.txt,

	1348 # assuming dir1 contains f1.txt and f2.txt.

	1349

	1350 if (self.perform_mv and self.recursion_requested

	1351 and src_uri_expands_to_multi and not have_existing_dest_subdir):

	1352 # Case 1. Handle naming rules for bucket subdir mv. Here we want to

	1353 # line up the src_uri against its expansion, to find the base to build

	1354 # the new name. For example, running the command:

	1355 # gsutil mv gs://bucket/abcd gs://bucket/xyz

	1356 # when processing exp_src_uri=gs://bucket/abcd/123

	1357 # exp_src_uri_tail should become /123

	1358 # Note: mv.py code disallows wildcard specification of source URI.

	1359 exp_src_uri_tail = exp_src_uri.uri[len(src_uri.uri):]

	1360 dst_key_name = '%s/%s' % (exp_dst_uri.object_name.rstrip('/'),

	1361 exp_src_uri_tail.strip('/'))

	1362 return exp_dst_uri.clone_replace_name(dst_key_name)

	1363

	1364 if src_uri_names_container and not exp_dst_uri.names_file():

	1365 # Case 2. Build dst_key_name from subpath of exp_src_uri past

	1366 # where src_uri ends. For example, for src_uri=gs://bucket/ and

	1367 # exp_src_uri=gs://bucket/src_subdir/obj, dst_key_name should be

	1368 # src_subdir/obj.

	1369 src_uri_path_sans_final_dir = _GetPathBeforeFinalDir(src_uri)

	1370 dst_key_name = exp_src_uri.uri[

	1371 len(src_uri_path_sans_final_dir):].lstrip(src_uri.delim)

	1372 # Handle case where dst_uri is a non-existent subdir.

	1373 if not have_existing_dest_subdir:

	1374 dst_key_name = dst_key_name.partition(src_uri.delim)[-1]

	1375 # Handle special case where src_uri was a directory named with '.' or

	1376 # './', so that running a command like:

	1377 # gsutil cp -r . gs://dest

	1378 # will produce obj names of the form gs://dest/abc instead of

	1379 # gs://dest/./abc.

	1380 if dst_key_name.startswith('.%s' % os.sep):

	1381 dst_key_name = dst_key_name[2:]

	1382

	1383 else:

	1384 # Case 3.

	1385 dst_key_name = exp_src_uri.object_name.rpartition(src_uri.delim)[-1]

	1386

	1387 if (exp_dst_uri.is_file_uri()

	1388 or self._ShouldTreatDstUriAsBucketSubDir(

	1389 have_multiple_srcs, exp_dst_uri, have_existing_dest_subdir)):

	1390 if exp_dst_uri.object_name.endswith(exp_dst_uri.delim):

	1391 dst_key_name = '%s%s%s' % (

	1392 exp_dst_uri.object_name.rstrip(exp_dst_uri.delim),

	1393 exp_dst_uri.delim, dst_key_name)

	1394 else:

	1395 delim = exp_dst_uri.delim if exp_dst_uri.object_name else ''

	1396 dst_key_name = '%s%s%s' % (exp_dst_uri.object_name, delim, dst_key_name)

	1397

	1398 return exp_dst_uri.clone_replace_name(dst_key_name)

	1399

	1400 def _FixWindowsNaming(self, src_uri, dst_uri):

	1401 """

	1402 Rewrites the destination URI built by _ConstructDstUri() to translate

	1403 Windows pathnames to cloud pathnames if needed.

	1404

	1405 Args:

	1406 src_uri: Source URI to be copied.

	1407 dst_uri: The destination URI built by _ConstructDstUri().

	1408

	1409 Returns:

	1410 StorageUri to use for copy.

	1411 """

	1412 if (src_uri.is_file_uri() and src_uri.delim == '\\'

	1413 and dst_uri.is_cloud_uri()):

	1414 trans_uri_str = re.sub(r'\\', '/', dst_uri.uri)

	1415 dst_uri = self.suri_builder.StorageUri(trans_uri_str)

	1416 return dst_uri

	1417

	1418 # Command entry point.

	1419 def RunCommand(self):

	1420

	1421 # Inner funcs.

	1422 def _CopyExceptionHandler(e):

	1423 """Simple exception handler to allow post-completion status."""

	1424 self.THREADED_LOGGER.error(str(e))

	1425 self.copy_failure_count += 1

	1426

	1427 def _CopyFunc(name_expansion_result):

	1428 """Worker function for performing the actual copy (and rm, for mv)."""

	1429 if self.perform_mv:

	1430 cmd_name = 'mv'

	1431 else:

	1432 cmd_name = self.command_name

	1433 src_uri = self.suri_builder.StorageUri(

	1434 name_expansion_result.GetSrcUriStr())

	1435 exp_src_uri = self.suri_builder.StorageUri(

	1436 name_expansion_result.GetExpandedUriStr())

	1437 src_uri_names_container = name_expansion_result.NamesContainer()

	1438 src_uri_expands_to_multi = name_expansion_result.NamesContainer()

	1439 have_multiple_srcs = name_expansion_result.IsMultiSrcRequest()

	1440 have_existing_dest_subdir = (

	1441 name_expansion_result.HaveExistingDstContainer())

	1442

	1443 if src_uri.names_provider():

	1444 raise CommandException(

	1445 'The %s command does not allow provider-only source URIs (%s)' %

	1446 (cmd_name, src_uri))

	1447 if have_multiple_srcs:

	1448 self._InsistDstUriNamesContainer(exp_dst_uri,

	1449 have_existing_dst_container,

	1450 cmd_name)

	1451

	1452 if self.perform_mv:

	1453 if name_expansion_result.NamesContainer():

	1454 # Use recursion_requested when performing name expansion for the

	1455 # directory mv case so we can determine if any of the source URIs are

	1456 # directories (and then use cp -R and rm -R to perform the move, to

	1457 # match the behavior of Linux mv (which when moving a directory moves

	1458 # all the contained files).

	1459 self.recursion_requested = True

	1460 # Disallow wildcard src URIs when moving directories, as supporting it

	1461 # would make the name transformation too complex and would also be

	1462 # dangerous (e.g., someone could accidentally move many objects to the

	1463 # wrong name, or accidentally overwrite many objects).

	1464 if ContainsWildcard(src_uri):

	1465 raise CommandException('The mv command disallows naming source '

	1466 'directories using wildcards')

	1467

	1468 if (exp_dst_uri.is_file_uri()

	1469 and not os.path.exists(exp_dst_uri.object_name)

	1470 and have_multiple_srcs):

	1471 os.makedirs(exp_dst_uri.object_name)

	1472

	1473 dst_uri = self._ConstructDstUri(src_uri, exp_src_uri,

	1474 src_uri_names_container,

	1475 src_uri_expands_to_multi,

	1476 have_multiple_srcs, exp_dst_uri,

	1477 have_existing_dest_subdir)

	1478 dst_uri = self._FixWindowsNaming(src_uri, dst_uri)

	1479

	1480 self._CheckForDirFileConflict(exp_src_uri, dst_uri)

	1481 if self._SrcDstSame(exp_src_uri, dst_uri):

	1482 raise CommandException('%s: "%s" and "%s" are the same file - '

	1483 'abort.' % (cmd_name, exp_src_uri, dst_uri))

	1484

	1485 if dst_uri.is_cloud_uri() and dst_uri.is_version_specific:

	1486 raise CommandException('%s: a version-specific URI\n(%s)\ncannot be '

	1487 'the destination for gsutil cp - abort.'

	1488 % (cmd_name, dst_uri))

	1489

	1490 elapsed_time = bytes_transferred = 0

	1491 try:

	1492 (elapsed_time, bytes_transferred, result_uri) = (

	1493 self._PerformCopy(exp_src_uri, dst_uri))

	1494 except Exception, e:

	1495 if self._IsNoClobberServerException(e):

	1496 if not self.quiet:

	1497 self.THREADED_LOGGER.info('Rejected (noclobber): %s' % dst_uri.uri)

	1498 elif self.continue_on_error:

	1499 if not self.quiet:

	1500 self.THREADED_LOGGER.error('Error copying %s: %s' % (src_uri.uri,

	1501 str(e)))

	1502 self.copy_failure_count += 1

	1503 else:

	1504 raise

	1505 if self.print_ver:

	1506 # Some cases don't return a version-specific URI (e.g., if destination

	1507 # is a file).

	1508 if hasattr(result_uri, 'version_specific_uri'):

	1509 self.THREADED_LOGGER.info('Created: %s' %

	1510 result_uri.version_specific_uri)

	1511 else:

	1512 self.THREADED_LOGGER.info('Created: %s' % result_uri.uri)

	1513

	1514 # TODO: If we ever use -n (noclobber) with -M (move) (not possible today

	1515 # since we call copy internally from move and don't specify the -n flag)

	1516 # we'll need to only remove the source when we have not skipped the

	1517 # destination.

	1518 if self.perform_mv:

	1519 if not self.quiet:

	1520 self.THREADED_LOGGER.info('Removing %s...', exp_src_uri)

	1521 exp_src_uri.delete_key(validate=False, headers=self.headers)

	1522 stats_lock.acquire()

	1523 self.total_elapsed_time += elapsed_time

	1524 self.total_bytes_transferred += bytes_transferred

	1525 stats_lock.release()

	1526

	1527 # Start of RunCommand code.

	1528 self._ParseArgs()

	1529

	1530 self.total_elapsed_time = self.total_bytes_transferred = 0

	1531 if self.args[-1] == '-' or self.args[-1] == 'file://-':

	1532 self._HandleStreamingDownload()

	1533 return 0

	1534

	1535 if self.read_args_from_stdin:

	1536 if len(self.args) != 1:

	1537 raise CommandException('Source URIs cannot be specified with -I option')

	1538 uri_strs = self._StdinIterator()

	1539 else:

	1540 if len(self.args) < 2:

	1541 raise CommandException('Wrong number of arguments for "cp" command.')

	1542 uri_strs = self.args[0:len(self.args)-1]

	1543

	1544 (exp_dst_uri, have_existing_dst_container) = self._ExpandDstUri(

	1545 self.args[-1])

	1546 name_expansion_iterator = NameExpansionIterator(

	1547 self.command_name, self.proj_id_handler, self.headers, self.debug,

	1548 self.bucket_storage_uri_class, uri_strs,

	1549 self.recursion_requested or self.perform_mv,

	1550 have_existing_dst_container)

	1551

	1552 # Use a lock to ensure accurate statistics in the face of

	1553 # multi-threading/multi-processing.

	1554 stats_lock = threading.Lock()

	1555

	1556 # Tracks if any copies failed.

	1557 self.copy_failure_count = 0

	1558

	1559 # Start the clock.

	1560 start_time = time.time()

	1561

	1562 # Tuple of attributes to share/manage across multiple processes in

	1563 # parallel (-m) mode.

	1564 shared_attrs = ('copy_failure_count', 'total_bytes_transferred')

	1565

	1566 # Perform copy requests in parallel (-m) mode, if requested, using

	1567 # configured number of parallel processes and threads. Otherwise,

	1568 # perform requests with sequential function calls in current process.

	1569 self.Apply(_CopyFunc, name_expansion_iterator, _CopyExceptionHandler,

	1570 shared_attrs)

	1571 if self.debug:

	1572 print 'total_bytes_transferred:' + str(self.total_bytes_transferred)

	1573

	1574 end_time = time.time()

	1575 self.total_elapsed_time = end_time - start_time

	1576

	1577 # Sometimes, particularly when running unit tests, the total elapsed time

	1578 # is really small. On Windows, the timer resolution is too small and

	1579 # causes total_elapsed_time to be zero.

	1580 try:

	1581 float(self.total_bytes_transferred) / float(self.total_elapsed_time)

	1582 except ZeroDivisionError:

	1583 self.total_elapsed_time = 0.01

	1584

	1585 self.total_bytes_per_second = (float(self.total_bytes_transferred) /

	1586 float(self.total_elapsed_time))

	1587

	1588 if self.debug == 3:

	1589 # Note that this only counts the actual GET and PUT bytes for the copy

	1590 # - not any transfers for doing wildcard expansion, the initial HEAD

	1591 # request boto performs when doing a bucket.get_key() operation, etc.

	1592 if self.total_bytes_transferred != 0:

	1593 self.THREADED_LOGGER.info(

	1594 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',

	1595 self.total_bytes_transferred, self.total_elapsed_time,

	1596 MakeHumanReadable(self.total_bytes_per_second))

	1597 if self.copy_failure_count:

	1598 plural_str = ''

	1599 if self.copy_failure_count > 1:

	1600 plural_str = 's'

	1601 raise CommandException('%d file%s/object%s could not be transferred.' % (

	1602 self.copy_failure_count, plural_str, plural_str))

	1603

	1604 return 0

	1605

	1606 def _ParseArgs(self):

	1607 self.perform_mv = False

	1608 self.exclude_symlinks = False

	1609 self.quiet = False

	1610 self.no_clobber = False

	1611 self.continue_on_error = False

	1612 self.daisy_chain = False

	1613 self.read_args_from_stdin = False

	1614 self.print_ver = False

	1615 # self.recursion_requested initialized in command.py (so can be checked

	1616 # in parent class for all commands).

	1617 if self.sub_opts:

	1618 for o, unused_a in self.sub_opts:

	1619 if o == '-c':

	1620 self.continue_on_error = True

	1621 elif o == '-D':

	1622 self.daisy_chain = True

	1623 elif o == '-e':

	1624 self.exclude_symlinks = True

	1625 elif o == '-I':

	1626 self.read_args_from_stdin = True

	1627 elif o == '-M':

	1628 # Note that we signal to the cp command to perform a move (copy

	1629 # followed by remove) and use directory-move naming rules by passing

	1630 # the undocumented (for internal use) -M option when running the cp

	1631 # command from mv.py.

	1632 self.perform_mv = True

	1633 elif o == '-n':

	1634 self.no_clobber = True

	1635 elif o == '-q':

	1636 self.quiet = True

	1637 elif o == '-r' or o == '-R':

	1638 self.recursion_requested = True

	1639 elif o == '-v':

	1640 self.print_ver = True

	1641

	1642 def _HandleStreamingDownload(self):

	1643 # Destination is <STDOUT>. Manipulate sys.stdout so as to redirect all

	1644 # debug messages to <STDERR>.

	1645 stdout_fp = sys.stdout

	1646 sys.stdout = sys.stderr

	1647 did_some_work = False

	1648 for uri_str in self.args[0:len(self.args)-1]:

	1649 for uri in self.WildcardIterator(uri_str).IterUris():

	1650 did_some_work = True

	1651 key = uri.get_key(False, self.headers)

	1652 (elapsed_time, bytes_transferred) = self._PerformDownloadToStream(

	1653 key, uri, stdout_fp, self.headers)

	1654 self.total_elapsed_time += elapsed_time

	1655 self.total_bytes_transferred += bytes_transferred

	1656 if not did_some_work:

	1657 raise CommandException('No URIs matched')

	1658 if self.debug == 3:

	1659 if self.total_bytes_transferred != 0:

	1660 self.THREADED_LOGGER.info(

	1661 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',

	1662 self.total_bytes_transferred, self.total_elapsed_time,

	1663 MakeHumanReadable(float(self.total_bytes_transferred) /

	1664 float(self.total_elapsed_time)))

	1665

	1666 def _StdinIterator(self):

	1667 """A generator function that returns lines from stdin."""

	1668 for line in sys.stdin:

	1669 # Strip CRLF.

	1670 yield line.rstrip()

	1671

	1672 def _SrcDstSame(self, src_uri, dst_uri):

	1673 """Checks if src_uri and dst_uri represent the same object or file.

	1674

	1675 We don't handle anything about hard or symbolic links.

	1676

	1677 Args:

	1678 src_uri: Source StorageUri.

	1679 dst_uri: Destination StorageUri.

	1680

	1681 Returns:

	1682 Bool indicator.

	1683 """

	1684 if src_uri.is_file_uri() and dst_uri.is_file_uri():

	1685 # Translate a/b/./c to a/b/c, so src=dst comparison below works.

	1686 new_src_path = os.path.normpath(src_uri.object_name)

	1687 new_dst_path = os.path.normpath(dst_uri.object_name)

	1688 return (src_uri.clone_replace_name(new_src_path).uri ==

	1689 dst_uri.clone_replace_name(new_dst_path).uri)

	1690 else:

	1691 return (src_uri.uri == dst_uri.uri and

	1692 src_uri.generation == dst_uri.generation and

	1693 src_uri.version_id == dst_uri.version_id)

	1694

	1695 def _ShouldTreatDstUriAsBucketSubDir(self, have_multiple_srcs, dst_uri,

	1696 have_existing_dest_subdir):

	1697 """

	1698 Checks whether dst_uri should be treated as a bucket "sub-directory". The

	1699 decision about whether something constitutes a bucket "sub-directory"

	1700 depends on whether there are multiple sources in this request and whether

	1701 there is an existing bucket subdirectory. For example, when running the

	1702 command:

	1703 gsutil cp file gs://bucket/abc

	1704 if there's no existing gs://bucket/abc bucket subdirectory we should copy

	1705 file to the object gs://bucket/abc. In contrast, if

	1706 there's an existing gs://bucket/abc bucket subdirectory we should copy

	1707 file to gs://bucket/abc/file. And regardless of whether gs://bucket/abc

	1708 exists, when running the command:

	1709 gsutil cp file1 file2 gs://bucket/abc

	1710 we should copy file1 to gs://bucket/abc/file1 (and similarly for file2).

	1711

	1712 Note that we don't disallow naming a bucket "sub-directory" where there's

	1713 already an object at that URI. For example it's legitimate (albeit

	1714 confusing) to have an object called gs://bucket/dir and

	1715 then run the command

	1716 gsutil cp file1 file2 gs://bucket/dir

	1717 Doing so will end up with objects gs://bucket/dir, gs://bucket/dir/file1,

	1718 and gs://bucket/dir/file2.

	1719

	1720 Args:

	1721 have_multiple_srcs: Bool indicator of whether this is a multi-source

	1722 operation.

	1723 dst_uri: StorageUri to check.

	1724 have_existing_dest_subdir: bool indicator whether dest is an existing

	1725 subdirectory.

	1726

	1727 Returns:

	1728 bool indicator.

	1729 """

	1730 return ((have_multiple_srcs and dst_uri.is_cloud_uri())

	1731 or (have_existing_dest_subdir))

	1732

	1733 def _ShouldTreatDstUriAsSingleton(self, have_multiple_srcs,

	1734 have_existing_dest_subdir, dst_uri):

	1735 """

	1736 Checks that dst_uri names a singleton (file or object) after

	1737 dir/wildcard expansion. The decision is more nuanced than simply

	1738 dst_uri.names_singleton()) because of the possibility that an object path

	1739 might name a bucket sub-directory.

	1740

	1741 Args:

	1742 have_multiple_srcs: Bool indicator of whether this is a multi-source

	1743 operation.

	1744 have_existing_dest_subdir: bool indicator whether dest is an existing

	1745 subdirectory.

	1746 dst_uri: StorageUri to check.

	1747

	1748 Returns:

	1749 bool indicator.

	1750 """

	1751 if have_multiple_srcs:

	1752 # Only a file meets the criteria in this case.

	1753 return dst_uri.names_file()

	1754 return not have_existing_dest_subdir and dst_uri.names_singleton()

	1755

	1756 def _IsNoClobberServerException(self, e):

	1757 """

	1758 Checks to see if the server attempted to clobber a file after we specified

	1759 in the header that we didn't want the file clobbered.

	1760

	1761 Args:

	1762 e: The Exception that was generated by a failed copy operation

	1763

	1764 Returns:

	1765 bool indicator - True indicates that the server did attempt to clobber

	1766 an existing file.

	1767 """

	1768 return self.no_clobber and (

	1769 (isinstance(e, GSResponseError) and e.status==412) or

	1770 (isinstance(e, ResumableUploadException) and 'code 412' in e.message))

	1771

	1772 def _GetPathBeforeFinalDir(uri):

	1773 """

	1774 Returns the part of the path before the final directory component for the

	1775 given URI, handling cases for file system directories, bucket, and bucket

	1776 subdirectories. Example: for gs://bucket/dir/ we'll return 'gs://bucket',

	1777 and for file://dir we'll return file://

	1778

	1779 Args:

	1780 uri: StorageUri.

	1781

	1782 Returns:

	1783 String name of above-described path, sans final path separator.

	1784 """

	1785 sep = uri.delim

	1786 assert not uri.names_file()

	1787 if uri.names_directory():

	1788 past_scheme = uri.uri[len('file://'):]

	1789 if past_scheme.find(sep) == -1:

	1790 return 'file://'

	1791 else:

	1792 return 'file://%s' % past_scheme.rstrip(sep).rpartition(sep)[0]

	1793 if uri.names_bucket():

	1794 return '%s://' % uri.scheme

	1795 # Else it names a bucket subdir.

	1796 return uri.uri.rstrip(sep).rpartition(sep)[0]

	1797

	1798 def _hash_filename(filename):

	1799 """

	1800 Apply a hash function (SHA1) to shorten the passed file name. The spec

	1801 for the hashed file name is as follows:

	1802

	1803 TRACKER_<hash>_<trailing>

	1804

	1805 where hash is a SHA1 hash on the original file name and trailing is

	1806 the last 16 chars from the original file name. Max file name lengths

	1807 vary by operating system so the goal of this function is to ensure

	1808 the hashed version takes fewer than 100 characters.

	1809

	1810 Args:

	1811 filename: file name to be hashed.

	1812

	1813 Returns:

	1814 shorter, hashed version of passed file name

	1815 """

	1816 if not isinstance(filename, unicode):

	1817 filename = unicode(filename, 'utf8').encode('utf-8')

	1818 m = hashlib.sha1(filename)

	1819 return "TRACKER_" + m.hexdigest() + '.' + filename[-16:]

OLD	NEW

« no previous file with comments | « third_party/gsutil/gslib/commands/config.py ('k') | third_party/gsutil/gslib/commands/disablelogging.py » ('j') | no next file with comments »