Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: third_party/gsutil/gslib/tests/test_wildcard_iterator.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/gsutil/gslib/tests/test_wildcard_iterator.py
diff --git a/third_party/gsutil/gslib/tests/test_wildcard_iterator.py b/third_party/gsutil/gslib/tests/test_wildcard_iterator.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3c144a8d1f478ff3df0ededaed60243024de1be
--- /dev/null
+++ b/third_party/gsutil/gslib/tests/test_wildcard_iterator.py
@@ -0,0 +1,364 @@
+# Copyright 2010 Google Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+"""Unit tests for gslib wildcard_iterator"""
+
+import os.path
+import tempfile
+
+from boto import InvalidUriError
+
+from gslib import wildcard_iterator
+from gslib.project_id import ProjectIdHandler
+import gslib.tests.testcase as testcase
+from gslib.wildcard_iterator import ContainsWildcard
+from gslib.tests.util import ObjectToURI as suri
+
+
+class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase):
+ """CloudWildcardIterator test suite"""
+
+ def setUp(self):
+ """Creates 2 mock buckets, each containing 4 objects, including 1 nested."""
+ super(CloudWildcardIteratorTests, self).setUp()
+ self.immed_child_obj_names = ['abcd', 'abdd', 'ade$']
+ self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1',
+ 'nested1/nested2/xyz2', 'nested1/nfile_abc']
+
+ self.base_bucket_uri = self.CreateBucket()
+ self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61]
+ self.base_uri_str = suri(self.base_bucket_uri)
+ self.base_uri_str = self.base_uri_str.replace(
+ self.base_bucket_uri.bucket_name, self.prefix_bucket_name)
+
+ self.test_bucket0_uri = self.CreateBucket(
+ bucket_name='%s0' % self.prefix_bucket_name)
+ self.test_bucket0_obj_uri_strs = set()
+ for obj_name in self.all_obj_names:
+ obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri,
+ object_name=obj_name, contents='')
+ self.test_bucket0_obj_uri_strs.add(suri(obj_uri))
+
+ self.test_bucket1_uri = self.CreateBucket(
+ bucket_name='%s1' % self.prefix_bucket_name)
+ self.test_bucket1_obj_uri_strs = set()
+ for obj_name in self.all_obj_names:
+ obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri,
+ object_name=obj_name, contents='')
+ self.test_bucket1_obj_uri_strs.add(suri(obj_uri))
+
+ def testNoOpObjectIterator(self):
+ """Tests that bucket-only URI iterates just that one URI"""
+ results = list(
+ self._test_wildcard_iterator(self.test_bucket0_uri).IterUris())
+ self.assertEqual(1, len(results))
+ self.assertEqual(str(self.test_bucket0_uri), str(results[0]))
+
+ def testMatchingAllObjects(self):
+ """Tests matching all objects, based on wildcard"""
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('**')).IterUris())
+ self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs)
+
+ def testMatchingObjectSubset(self):
+ """Tests matching a subset of objects, based on wildcard"""
+ exp_obj_uri_strs = set(
+ [str(self.test_bucket0_uri.clone_replace_name('abcd')),
+ str(self.test_bucket0_uri.clone_replace_name('abdd'))])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('ab??')).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testMatchingNonWildcardedUri(self):
+ """Tests matching a single named object"""
+ exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd')
+ )])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testWildcardedObjectUriWithVsWithoutPrefix(self):
+ """Tests that wildcarding w/ and w/o server prefix get same result"""
+ # (It's just more efficient to query w/o a prefix; wildcard
+ # iterator will filter the matches either way.)
+ with_prefix_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
+ # By including a wildcard at the start of the string no prefix can be
+ # used in server request.
+ no_prefix_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris())
+ self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs)
+
+ def testWildcardedObjectUriNestedSubdirMatch(self):
+ """Tests wildcarding with a nested subdir"""
+ uri_strs = set()
+ prefixes = set()
+ for blr in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('*')):
+ if blr.HasPrefix():
+ prefixes.add(blr.GetPrefix().name)
+ else:
+ uri_strs.add(blr.GetUri().uri)
+ exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x)
+ for x in self.immed_child_obj_names])
+ self.assertEqual(exp_obj_uri_strs, uri_strs)
+ self.assertEqual(1, len(prefixes))
+ self.assertTrue('nested1/' in prefixes)
+
+ def testWildcardedObjectUriNestedSubSubdirMatch(self):
+ """Tests wildcarding with a nested sub-subdir"""
+ for final_char in ('', '/'):
+ uri_strs = set()
+ prefixes = set()
+ for blr in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)):
+ if blr.HasPrefix():
+ prefixes.add(blr.GetPrefix().name)
+ else:
+ uri_strs.add(blr.GetUri().uri)
+ self.assertEqual(1, len(uri_strs))
+ self.assertEqual(1, len(prefixes))
+ self.assertTrue('nested1/nested2/' in prefixes)
+
+ def testWildcardPlusSubdirMatch(self):
+ """Tests gs://bucket/*/subdir matching"""
+ actual_uri_strs = set()
+ actual_prefixes = set()
+ for blr in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('*/nested1')):
+ if blr.HasPrefix():
+ actual_prefixes.add(blr.GetPrefix().name)
+ else:
+ actual_uri_strs.add(blr.GetUri().uri)
+ expected_uri_strs = set()
+ expected_prefixes = set(['nested1/'])
+ self.assertEqual(expected_prefixes, actual_prefixes)
+ self.assertEqual(expected_uri_strs, actual_uri_strs)
+
+ def testWildcardPlusSubdirSubdirMatch(self):
+ """Tests gs://bucket/*/subdir/* matching"""
+ actual_uri_strs = set()
+ actual_prefixes = set()
+ for blr in self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('*/nested2/*')):
+ if blr.HasPrefix():
+ actual_prefixes.add(blr.GetPrefix().name)
+ else:
+ actual_uri_strs.add(blr.GetUri().uri)
+ expected_uri_strs = set([
+ self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri,
+ self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri])
+ expected_prefixes = set()
+ self.assertEqual(expected_prefixes, actual_prefixes)
+ self.assertEqual(expected_uri_strs, actual_uri_strs)
+
+ def testNoMatchingWildcardedObjectUri(self):
+ """Tests that get back an empty iterator for non-matching wildcarded URI"""
+ res = list(self._test_wildcard_iterator(
+ self.test_bucket0_uri.clone_replace_name('*x0')).IterUris())
+ self.assertEqual(0, len(res))
+
+ def testWildcardedInvalidObjectUri(self):
+ """Tests that we raise an exception for wildcarded invalid URI"""
+ try:
+ for unused_ in self._test_wildcard_iterator(
+ 'badscheme://asdf').IterUris():
+ self.assertFalse('Expected InvalidUriError not raised.')
+ except InvalidUriError, e:
+ # Expected behavior.
+ self.assertTrue(e.message.find('Unrecognized scheme') != -1)
+
+ def testSingleMatchWildcardedBucketUri(self):
+ """Tests matching a single bucket based on a wildcarded bucket URI"""
+ exp_obj_uri_strs = set([
+ suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ '%s*1' % self.base_uri_str).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testMultiMatchWildcardedBucketUri(self):
+ """Tests matching a multiple buckets based on a wildcarded bucket URI"""
+ exp_obj_uri_strs = set([
+ suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim,
+ suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ '%s*' % self.base_uri_str).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testWildcardBucketAndObjectUri(self):
+ """Tests matching with both bucket and object wildcards"""
+ exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
+ 'abcd'))])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ '%s0*/abc*' % self.base_uri_str).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testWildcardUpToFinalCharSubdirPlusObjectName(self):
+ """Tests wildcard subd*r/obj name"""
+ exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
+ 'nested1/nested2/xyz1'))])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testPostRecursiveWildcard(self):
+ """Tests that wildcard containing ** followed by an additional wildcard works"""
+ exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
+ 'nested1/nested2/xyz2'))])
+ actual_obj_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(
+ '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris())
+ self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
+
+ def testCallingGetKeyOnProviderOnlyWildcardIteration(self):
+ """Tests that attempting iterating provider-only wildcard raises"""
+ try:
+ from gslib.bucket_listing_ref import BucketListingRefException
+ for iter_result in wildcard_iterator.wildcard_iterator(
+ 'gs://', ProjectIdHandler(),
+ bucket_storage_uri_class=self.mock_bucket_storage_uri):
+ iter_result.GetKey()
+ self.fail('Expected BucketListingRefException not raised.')
+ except BucketListingRefException, e:
+ self.assertTrue(str(e).find(
+ 'Attempt to call GetKey() on Key-less BucketListingRef') != -1)
+
+
+class FileIteratorTests(testcase.GsUtilUnitTestCase):
+ """FileWildcardIterator test suite"""
+
+ def setUp(self):
+ """
+ Creates a test dir containing 3 files and one nested subdirectory + file.
+ """
+ super(FileIteratorTests, self).setUp()
+
+ self.test_dir = self.CreateTempDir(test_files=[
+ 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')])
+
+ self.root_files_uri_strs = set([
+ suri(self.test_dir, 'abcd'),
+ suri(self.test_dir, 'abdd'),
+ suri(self.test_dir, 'ade$')])
+
+ self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')])
+
+ self.nested_files_uri_strs = set([
+ suri(self.test_dir, 'dir1', 'dir2', 'zzz')])
+
+ self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs
+ self.all_file_uri_strs = (
+ self.root_files_uri_strs | self.nested_files_uri_strs)
+
+ def testContainsWildcard(self):
+ """Tests ContainsWildcard call"""
+ self.assertTrue(ContainsWildcard('a*.txt'))
+ self.assertTrue(ContainsWildcard('a[0-9].txt'))
+ self.assertFalse(ContainsWildcard('0-9.txt'))
+ self.assertTrue(ContainsWildcard('?.txt'))
+
+ def testNoOpDirectoryIterator(self):
+ """Tests that directory-only URI iterates just that one URI"""
+ results = list(
+ self._test_wildcard_iterator(suri(tempfile.tempdir)).IterUris())
+ self.assertEqual(1, len(results))
+ self.assertEqual(suri(tempfile.tempdir), str(results[0]))
+
+ def testMatchingAllFiles(self):
+ """Tests matching all files, based on wildcard"""
+ uri = self._test_storage_uri(suri(self.test_dir, '*'))
+ actual_uri_strs = set(str(u) for u in
+ self._test_wildcard_iterator(uri).IterUris()
+ )
+ self.assertEqual(self.immed_child_uri_strs, actual_uri_strs)
+
+ def testMatchingFileSubset(self):
+ """Tests matching a subset of files, based on wildcard"""
+ exp_uri_strs = set(
+ [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')])
+ uri = self._test_storage_uri(suri(self.test_dir, 'ab??'))
+ actual_uri_strs = set(str(u) for u in
+ self._test_wildcard_iterator(uri).IterUris()
+ )
+ self.assertEqual(exp_uri_strs, actual_uri_strs)
+
+ def testMatchingNonWildcardedUri(self):
+ """Tests matching a single named file"""
+ exp_uri_strs = set([suri(self.test_dir, 'abcd')])
+ uri = self._test_storage_uri(suri(self.test_dir, 'abcd'))
+ actual_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(uri).IterUris())
+ self.assertEqual(exp_uri_strs, actual_uri_strs)
+
+ def testMatchingFilesIgnoringOtherRegexChars(self):
+ """Tests ignoring non-wildcard regex chars (e.g., ^ and $)"""
+
+ exp_uri_strs = set([suri(self.test_dir, 'ade$')])
+ uri = self._test_storage_uri(suri(self.test_dir, 'ad*$'))
+ actual_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(uri).IterUris())
+ self.assertEqual(exp_uri_strs, actual_uri_strs)
+
+ def testRecursiveDirectoryOnlyWildcarding(self):
+ """Tests recursive expansion of directory-only '**' wildcard"""
+ uri = self._test_storage_uri(suri(self.test_dir, '**'))
+ actual_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(uri).IterUris())
+ self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
+
+ def testRecursiveDirectoryPlusFileWildcarding(self):
+ """Tests recursive expansion of '**' directory plus '*' wildcard"""
+ uri = self._test_storage_uri(suri(self.test_dir, '**', '*'))
+ actual_uri_strs = set(
+ str(u) for u in self._test_wildcard_iterator(uri).IterUris())
+ self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
+
+ def testInvalidRecursiveDirectoryWildcard(self):
+ """Tests that wildcard containing '***' raises exception"""
+ try:
+ uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd'))
+ for unused_ in self._test_wildcard_iterator(uri).IterUris():
+ self.fail('Expected WildcardException not raised.')
+ except wildcard_iterator.WildcardException, e:
+ # Expected behavior.
+ self.assertTrue(str(e).find('more than 2 consecutive') != -1)
+
+ def testMissingDir(self):
+ """Tests that wildcard gets empty iterator when directory doesn't exist"""
+ res = list(
+ self._test_wildcard_iterator(suri('no_such_dir', '*')).IterUris())
+ self.assertEqual(0, len(res))
+
+ def testExistingDirNoFileMatch(self):
+ """Tests that wildcard returns empty iterator when there's no match"""
+ uri = self._test_storage_uri(
+ suri(self.test_dir, 'non_existent*'))
+ res = list(self._test_wildcard_iterator(uri).IterUris())
+ self.assertEqual(0, len(res))

Powered by Google App Engine
This is Rietveld 408576698