| Index: third_party/gsutil/gslib/tests/test_cp.py
|
| diff --git a/third_party/gsutil/gslib/tests/test_cp.py b/third_party/gsutil/gslib/tests/test_cp.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f9bb7309a6db2a2a9c89b8d9e61f7b2ac7a643b3
|
| --- /dev/null
|
| +++ b/third_party/gsutil/gslib/tests/test_cp.py
|
| @@ -0,0 +1,366 @@
|
| +# Copyright 2013 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +
|
| +import boto
|
| +import os
|
| +import re
|
| +import gslib.tests.testcase as testcase
|
| +from gslib.util import Retry
|
| +from gslib.util import TWO_MB
|
| +from boto import storage_uri
|
| +from gslib.tests.util import ObjectToURI as suri
|
| +
|
| +
|
| +CURDIR = os.path.abspath(os.path.dirname(__file__))
|
| +TEST_DATA_DIR = os.path.join(CURDIR, 'test_data')
|
| +
|
| +
|
| +class TestCp(testcase.GsUtilIntegrationTestCase):
|
| + """Integration tests for cp command."""
|
| +
|
| + def _get_test_file(self, name):
|
| + return os.path.join(TEST_DATA_DIR, name)
|
| +
|
| + def test_noclobber(self):
|
| + key_uri = self.CreateObject(contents='foo')
|
| + fpath = self.CreateTempFile(contents='bar')
|
| + stderr = self.RunGsUtil(['cp', '-n', fpath, suri(key_uri)],
|
| + return_stderr=True)
|
| + self.assertIn('Skipping existing item: %s' % suri(key_uri), stderr)
|
| + self.assertEqual(key_uri.get_contents_as_string(), 'foo')
|
| + stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath],
|
| + return_stderr=True)
|
| + with open(fpath, 'r') as f:
|
| + self.assertIn('Skipping existing item: %s' % suri(f), stderr)
|
| + self.assertEqual(f.read(), 'bar')
|
| +
|
| + def test_copy_in_cloud_noclobber(self):
|
| + bucket1_uri = self.CreateBucket()
|
| + bucket2_uri = self.CreateBucket()
|
| + key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
|
| + stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)],
|
| + return_stderr=True)
|
| + self.assertEqual(stderr.count('Copying'), 1)
|
| + stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), suri(bucket2_uri)],
|
| + return_stderr=True)
|
| + self.assertIn('Skipping existing item: %s' % suri(bucket2_uri,
|
| + key_uri.object_name), stderr)
|
| +
|
| + def test_streaming(self):
|
| + bucket_uri = self.CreateBucket()
|
| + stderr = self.RunGsUtil(['cp', '-', '%s' % suri(bucket_uri, 'foo')],
|
| + stdin='bar', return_stderr=True)
|
| + self.assertIn('Copying from <STDIN>', stderr)
|
| + key_uri = bucket_uri.clone_replace_name('foo')
|
| + self.assertEqual(key_uri.get_contents_as_string(), 'bar')
|
| +
|
| + # TODO: Implement a way to test both with and without using magic file.
|
| +
|
| + def test_detect_content_type(self):
|
| + bucket_uri = self.CreateBucket()
|
| + dsturi = suri(bucket_uri, 'foo')
|
| +
|
| + self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\taudio/mpeg', stdout)
|
| + _Check1()
|
| +
|
| + self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check2():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\timage/gif', stdout)
|
| + _Check2()
|
| +
|
| + def test_content_type_override_default(self):
|
| + bucket_uri = self.CreateBucket()
|
| + dsturi = suri(bucket_uri, 'foo')
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:', 'cp',
|
| + self._get_test_file('test.mp3'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
|
| + _Check1()
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:', 'cp',
|
| + self._get_test_file('test.gif'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check2():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
|
| + _Check2()
|
| +
|
| + def test_content_type_override(self):
|
| + bucket_uri = self.CreateBucket()
|
| + dsturi = suri(bucket_uri, 'foo')
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:', 'cp',
|
| + self._get_test_file('test.mp3'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
|
| + _Check1()
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:', 'cp',
|
| + self._get_test_file('test.gif'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check2():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
|
| + _Check2()
|
| +
|
| + def test_foo_noct(self):
|
| + bucket_uri = self.CreateBucket()
|
| + dsturi = suri(bucket_uri, 'foo')
|
| + fpath = self.CreateTempFile(contents='foo/bar\n')
|
| + self.RunGsUtil(['cp', fpath, dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False)
|
| + content_type = ('text/plain' if USE_MAGICFILE
|
| + else 'application/octet-stream')
|
| + self.assertIn('Content-Type:\t%s' % content_type, stdout)
|
| + _Check1()
|
| +
|
| + def test_content_type_mismatches(self):
|
| + bucket_uri = self.CreateBucket()
|
| + dsturi = suri(bucket_uri, 'foo')
|
| + fpath = self.CreateTempFile(contents='foo/bar\n')
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
|
| + self._get_test_file('test.mp3'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\timage/gif', stdout)
|
| + _Check1()
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
|
| + self._get_test_file('test.gif'), dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check2():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\timage/gif', stdout)
|
| + _Check2()
|
| +
|
| + self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi])
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check3():
|
| + stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
| + self.assertIn('Content-Type:\timage/gif', stdout)
|
| + _Check3()
|
| +
|
| + def test_versioning(self):
|
| + bucket_uri = self.CreateVersionedBucket()
|
| + k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
|
| + k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
|
| + g1 = k2_uri.generation
|
| + self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)])
|
| + k2_uri = bucket_uri.clone_replace_name(k2_uri.object_name)
|
| + k2_uri = bucket_uri.clone_replace_key(k2_uri.get_key())
|
| + g2 = k2_uri.generation
|
| + k2_uri.set_contents_from_string('data3')
|
| + g3 = k2_uri.generation
|
| +
|
| + fpath = self.CreateTempFile()
|
| + # Check to make sure current version is data3.
|
| + self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
|
| + with open(fpath, 'r') as f:
|
| + self.assertEqual(f.read(), 'data3')
|
| +
|
| + # Check contents of all three versions
|
| + self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath])
|
| + with open(fpath, 'r') as f:
|
| + self.assertEqual(f.read(), 'data1')
|
| + self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g2), fpath])
|
| + with open(fpath, 'r') as f:
|
| + self.assertEqual(f.read(), 'data2')
|
| + self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g3), fpath])
|
| + with open(fpath, 'r') as f:
|
| + self.assertEqual(f.read(), 'data3')
|
| +
|
| + # Copy first version to current and verify.
|
| + self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1),
|
| + k2_uri.versionless_uri])
|
| + self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
|
| + with open(fpath, 'r') as f:
|
| + self.assertEqual(f.read(), 'data1')
|
| +
|
| + # Attempt to specify a version-specific URI for destination.
|
| + stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri], return_stderr=True,
|
| + expected_status=1)
|
| + self.assertIn('cannot be the destination for gsutil cp', stderr)
|
| +
|
| + def test_cp_v_option(self):
|
| + # Tests that cp -v option returns the created object's version-specific URI.
|
| + bucket_uri = self.CreateVersionedBucket()
|
| + k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
|
| + k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
|
| + g1 = k1_uri.generation
|
| +
|
| + # Case 1: Upload file to object using one-shot PUT.
|
| + tmpdir = self.CreateTempDir()
|
| + fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
|
| + self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
|
| +
|
| + # Case 2: Upload file to object using resumable upload.
|
| + size_threshold = boto.config.get('GSUtil', 'resumable_threshold', TWO_MB)
|
| + file_as_string = os.urandom(size_threshold)
|
| + tmpdir = self.CreateTempDir()
|
| + fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string)
|
| + self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
|
| +
|
| + # Case 3: Upload stream to object.
|
| + self._run_cp_minus_v_test('-v', '-', k2_uri.uri)
|
| +
|
| + # Case 4: Download object to file. For this case we just expect output of
|
| + # gsutil cp -v to be the URI of the file.
|
| + tmpdir = self.CreateTempDir()
|
| + fpath1 = self.CreateTempFile(tmpdir=tmpdir)
|
| + dst_uri = storage_uri(fpath1)
|
| + stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)],
|
| + return_stderr=True)
|
| + self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2])
|
| +
|
| + # Case 5: Daisy-chain from object to object.
|
| + self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri)
|
| +
|
| + # Case 6: Copy object to object in-the-cloud.
|
| + # TODO: Uncomment this test once copy-in-the-cloud returns version-specific
|
| + # URI.
|
| + #self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri)
|
| +
|
| + def _run_cp_minus_v_test(self, opt, src_str, dst_str):
|
| + stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True)
|
| + match = re.search(r'Created: (.*)\n', stderr)
|
| + self.assertIsNotNone(match)
|
| + created_uri = match.group(1)
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True)
|
| + lines = stdout.split('\n')
|
| + # Final (most recent) object should match the "Created:" URI. This is
|
| + # in second-to-last line (last line is '\n').
|
| + self.assertEqual(created_uri, lines[-2])
|
| + _Check1()
|
| +
|
| + def test_stdin_args(self):
|
| + tmpdir = self.CreateTempDir()
|
| + fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
|
| + fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
|
| + bucket_uri = self.CreateBucket()
|
| + self.RunGsUtil(['cp', '-I', suri(bucket_uri)],
|
| + stdin='\n'.join((fpath1, fpath2)))
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _Check1():
|
| + stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True)
|
| + self.assertIn(os.path.basename(fpath1), stdout)
|
| + self.assertIn(os.path.basename(fpath2), stdout)
|
| + self.assertNumLines(stdout, 2)
|
| + _Check1()
|
| +
|
| + def test_daisy_chain_cp(self):
|
| + # Daisy chain mode is required for copying across storage classes,
|
| + # so create 2 buckets and attempt to copy without vs with daisy chain mode.
|
| + bucket1_uri = self.CreateBucket(storage_class='STANDARD')
|
| + bucket2_uri = self.CreateBucket(
|
| + storage_class='DURABLE_REDUCED_AVAILABILITY')
|
| + key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
|
| + stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)],
|
| + return_stderr=True, expected_status=1)
|
| + self.assertIn('Copy-in-the-cloud disallowed', stderr)
|
| + key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
|
| + stderr = self.RunGsUtil(['cp', '-D', suri(key_uri), suri(bucket2_uri)],
|
| + return_stderr=True)
|
| + self.assertNotIn('Copy-in-the-cloud disallowed', stderr)
|
| +
|
| + def test_cp_key_to_local_stream(self):
|
| + bucket_uri = self.CreateBucket()
|
| + contents = 'foo'
|
| + key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents)
|
| + stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True)
|
| + self.assertIn(contents, stdout)
|
| +
|
| + def test_cp_local_file_to_local_stream(self):
|
| + contents = 'content'
|
| + fpath = self.CreateTempFile(contents=contents)
|
| + stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True)
|
| + self.assertIn(contents, stdout)
|
| +
|
| + def test_copy_bucket_to_bucket(self):
|
| + # Tests that recursively copying from bucket to bucket produces identically
|
| + # named objects (and not, in particular, destination objects named by the
|
| + # version- specific URI from source objects).
|
| + src_bucket_uri = self.CreateVersionedBucket()
|
| + dst_bucket_uri = self.CreateVersionedBucket()
|
| + self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
|
| + contents='abc')
|
| + self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
|
| + contents='def')
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _CopyAndCheck():
|
| + self.RunGsUtil(['cp', '-R', suri(src_bucket_uri),
|
| + suri(dst_bucket_uri)])
|
| + stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri],
|
| + return_stdout=True)
|
| + self.assertIn('%s%s/obj0\n' % (dst_bucket_uri,
|
| + src_bucket_uri.bucket_name), stdout)
|
| + self.assertIn('%s%s/obj1\n' % (dst_bucket_uri,
|
| + src_bucket_uri.bucket_name), stdout)
|
| + _CopyAndCheck()
|
| +
|
| + def test_copy_bucket_to_dir(self):
|
| + # Tests that recursively copying from bucket to dir produces identically
|
| + # named objects (and not, in particular, destination objects named by the
|
| + # version- specific URI from source objects).
|
| + src_bucket_uri = self.CreateBucket()
|
| + dst_dir = self.CreateTempDir()
|
| + self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
|
| + contents='abc')
|
| + self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
|
| + contents='def')
|
| + # Use @Retry as hedge against bucket listing eventual consistency.
|
| + @Retry(AssertionError, tries=3, delay=1, backoff=1)
|
| + def _CopyAndCheck():
|
| + self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
|
| + dir_list = []
|
| + for dirname, dirnames, filenames in os.walk(dst_dir):
|
| + for filename in filenames:
|
| + dir_list.append(os.path.join(dirname, filename))
|
| + dir_list = sorted(dir_list)
|
| + self.assertEqual(len(dir_list), 2)
|
| + self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
|
| + "obj0"), dir_list[0])
|
| + self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
|
| + "obj1"), dir_list[1])
|
| + _CopyAndCheck()
|
|
|