OLD | NEW |
(Empty) | |
| 1 # Copyright 2010 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Permission is hereby granted, free of charge, to any person obtaining a |
| 4 # copy of this software and associated documentation files (the |
| 5 # "Software"), to deal in the Software without restriction, including |
| 6 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 7 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 8 # persons to whom the Software is furnished to do so, subject to the fol- |
| 9 # lowing conditions: |
| 10 # |
| 11 # The above copyright notice and this permission notice shall be included |
| 12 # in all copies or substantial portions of the Software. |
| 13 # |
| 14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 20 # IN THE SOFTWARE. |
| 21 |
| 22 """Unit tests for gslib wildcard_iterator""" |
| 23 |
| 24 import os.path |
| 25 import tempfile |
| 26 |
| 27 from boto import InvalidUriError |
| 28 |
| 29 from gslib import wildcard_iterator |
| 30 from gslib.project_id import ProjectIdHandler |
| 31 import gslib.tests.testcase as testcase |
| 32 from gslib.wildcard_iterator import ContainsWildcard |
| 33 from gslib.tests.util import ObjectToURI as suri |
| 34 |
| 35 |
| 36 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase): |
| 37 """CloudWildcardIterator test suite""" |
| 38 |
| 39 def setUp(self): |
| 40 """Creates 2 mock buckets, each containing 4 objects, including 1 nested.""" |
| 41 super(CloudWildcardIteratorTests, self).setUp() |
| 42 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$'] |
| 43 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1', |
| 44 'nested1/nested2/xyz2', 'nested1/nfile_abc'] |
| 45 |
| 46 self.base_bucket_uri = self.CreateBucket() |
| 47 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61] |
| 48 self.base_uri_str = suri(self.base_bucket_uri) |
| 49 self.base_uri_str = self.base_uri_str.replace( |
| 50 self.base_bucket_uri.bucket_name, self.prefix_bucket_name) |
| 51 |
| 52 self.test_bucket0_uri = self.CreateBucket( |
| 53 bucket_name='%s0' % self.prefix_bucket_name) |
| 54 self.test_bucket0_obj_uri_strs = set() |
| 55 for obj_name in self.all_obj_names: |
| 56 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri, |
| 57 object_name=obj_name, contents='') |
| 58 self.test_bucket0_obj_uri_strs.add(suri(obj_uri)) |
| 59 |
| 60 self.test_bucket1_uri = self.CreateBucket( |
| 61 bucket_name='%s1' % self.prefix_bucket_name) |
| 62 self.test_bucket1_obj_uri_strs = set() |
| 63 for obj_name in self.all_obj_names: |
| 64 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri, |
| 65 object_name=obj_name, contents='') |
| 66 self.test_bucket1_obj_uri_strs.add(suri(obj_uri)) |
| 67 |
| 68 def testNoOpObjectIterator(self): |
| 69 """Tests that bucket-only URI iterates just that one URI""" |
| 70 results = list( |
| 71 self._test_wildcard_iterator(self.test_bucket0_uri).IterUris()) |
| 72 self.assertEqual(1, len(results)) |
| 73 self.assertEqual(str(self.test_bucket0_uri), str(results[0])) |
| 74 |
| 75 def testMatchingAllObjects(self): |
| 76 """Tests matching all objects, based on wildcard""" |
| 77 actual_obj_uri_strs = set( |
| 78 str(u) for u in self._test_wildcard_iterator( |
| 79 self.test_bucket0_uri.clone_replace_name('**')).IterUris()) |
| 80 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs) |
| 81 |
| 82 def testMatchingObjectSubset(self): |
| 83 """Tests matching a subset of objects, based on wildcard""" |
| 84 exp_obj_uri_strs = set( |
| 85 [str(self.test_bucket0_uri.clone_replace_name('abcd')), |
| 86 str(self.test_bucket0_uri.clone_replace_name('abdd'))]) |
| 87 actual_obj_uri_strs = set( |
| 88 str(u) for u in self._test_wildcard_iterator( |
| 89 self.test_bucket0_uri.clone_replace_name('ab??')).IterUris()) |
| 90 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 91 |
| 92 def testMatchingNonWildcardedUri(self): |
| 93 """Tests matching a single named object""" |
| 94 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd') |
| 95 )]) |
| 96 actual_obj_uri_strs = set( |
| 97 str(u) for u in self._test_wildcard_iterator( |
| 98 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) |
| 99 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 100 |
| 101 def testWildcardedObjectUriWithVsWithoutPrefix(self): |
| 102 """Tests that wildcarding w/ and w/o server prefix get same result""" |
| 103 # (It's just more efficient to query w/o a prefix; wildcard |
| 104 # iterator will filter the matches either way.) |
| 105 with_prefix_uri_strs = set( |
| 106 str(u) for u in self._test_wildcard_iterator( |
| 107 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris()) |
| 108 # By including a wildcard at the start of the string no prefix can be |
| 109 # used in server request. |
| 110 no_prefix_uri_strs = set( |
| 111 str(u) for u in self._test_wildcard_iterator( |
| 112 self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris()) |
| 113 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs) |
| 114 |
| 115 def testWildcardedObjectUriNestedSubdirMatch(self): |
| 116 """Tests wildcarding with a nested subdir""" |
| 117 uri_strs = set() |
| 118 prefixes = set() |
| 119 for blr in self._test_wildcard_iterator( |
| 120 self.test_bucket0_uri.clone_replace_name('*')): |
| 121 if blr.HasPrefix(): |
| 122 prefixes.add(blr.GetPrefix().name) |
| 123 else: |
| 124 uri_strs.add(blr.GetUri().uri) |
| 125 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x) |
| 126 for x in self.immed_child_obj_names]) |
| 127 self.assertEqual(exp_obj_uri_strs, uri_strs) |
| 128 self.assertEqual(1, len(prefixes)) |
| 129 self.assertTrue('nested1/' in prefixes) |
| 130 |
| 131 def testWildcardedObjectUriNestedSubSubdirMatch(self): |
| 132 """Tests wildcarding with a nested sub-subdir""" |
| 133 for final_char in ('', '/'): |
| 134 uri_strs = set() |
| 135 prefixes = set() |
| 136 for blr in self._test_wildcard_iterator( |
| 137 self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)): |
| 138 if blr.HasPrefix(): |
| 139 prefixes.add(blr.GetPrefix().name) |
| 140 else: |
| 141 uri_strs.add(blr.GetUri().uri) |
| 142 self.assertEqual(1, len(uri_strs)) |
| 143 self.assertEqual(1, len(prefixes)) |
| 144 self.assertTrue('nested1/nested2/' in prefixes) |
| 145 |
| 146 def testWildcardPlusSubdirMatch(self): |
| 147 """Tests gs://bucket/*/subdir matching""" |
| 148 actual_uri_strs = set() |
| 149 actual_prefixes = set() |
| 150 for blr in self._test_wildcard_iterator( |
| 151 self.test_bucket0_uri.clone_replace_name('*/nested1')): |
| 152 if blr.HasPrefix(): |
| 153 actual_prefixes.add(blr.GetPrefix().name) |
| 154 else: |
| 155 actual_uri_strs.add(blr.GetUri().uri) |
| 156 expected_uri_strs = set() |
| 157 expected_prefixes = set(['nested1/']) |
| 158 self.assertEqual(expected_prefixes, actual_prefixes) |
| 159 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 160 |
| 161 def testWildcardPlusSubdirSubdirMatch(self): |
| 162 """Tests gs://bucket/*/subdir/* matching""" |
| 163 actual_uri_strs = set() |
| 164 actual_prefixes = set() |
| 165 for blr in self._test_wildcard_iterator( |
| 166 self.test_bucket0_uri.clone_replace_name('*/nested2/*')): |
| 167 if blr.HasPrefix(): |
| 168 actual_prefixes.add(blr.GetPrefix().name) |
| 169 else: |
| 170 actual_uri_strs.add(blr.GetUri().uri) |
| 171 expected_uri_strs = set([ |
| 172 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri, |
| 173 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri]) |
| 174 expected_prefixes = set() |
| 175 self.assertEqual(expected_prefixes, actual_prefixes) |
| 176 self.assertEqual(expected_uri_strs, actual_uri_strs) |
| 177 |
| 178 def testNoMatchingWildcardedObjectUri(self): |
| 179 """Tests that get back an empty iterator for non-matching wildcarded URI""" |
| 180 res = list(self._test_wildcard_iterator( |
| 181 self.test_bucket0_uri.clone_replace_name('*x0')).IterUris()) |
| 182 self.assertEqual(0, len(res)) |
| 183 |
| 184 def testWildcardedInvalidObjectUri(self): |
| 185 """Tests that we raise an exception for wildcarded invalid URI""" |
| 186 try: |
| 187 for unused_ in self._test_wildcard_iterator( |
| 188 'badscheme://asdf').IterUris(): |
| 189 self.assertFalse('Expected InvalidUriError not raised.') |
| 190 except InvalidUriError, e: |
| 191 # Expected behavior. |
| 192 self.assertTrue(e.message.find('Unrecognized scheme') != -1) |
| 193 |
| 194 def testSingleMatchWildcardedBucketUri(self): |
| 195 """Tests matching a single bucket based on a wildcarded bucket URI""" |
| 196 exp_obj_uri_strs = set([ |
| 197 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 198 actual_obj_uri_strs = set( |
| 199 str(u) for u in self._test_wildcard_iterator( |
| 200 '%s*1' % self.base_uri_str).IterUris()) |
| 201 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 202 |
| 203 def testMultiMatchWildcardedBucketUri(self): |
| 204 """Tests matching a multiple buckets based on a wildcarded bucket URI""" |
| 205 exp_obj_uri_strs = set([ |
| 206 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim, |
| 207 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim]) |
| 208 actual_obj_uri_strs = set( |
| 209 str(u) for u in self._test_wildcard_iterator( |
| 210 '%s*' % self.base_uri_str).IterUris()) |
| 211 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 212 |
| 213 def testWildcardBucketAndObjectUri(self): |
| 214 """Tests matching with both bucket and object wildcards""" |
| 215 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 216 'abcd'))]) |
| 217 actual_obj_uri_strs = set( |
| 218 str(u) for u in self._test_wildcard_iterator( |
| 219 '%s0*/abc*' % self.base_uri_str).IterUris()) |
| 220 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 221 |
| 222 def testWildcardUpToFinalCharSubdirPlusObjectName(self): |
| 223 """Tests wildcard subd*r/obj name""" |
| 224 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 225 'nested1/nested2/xyz1'))]) |
| 226 actual_obj_uri_strs = set( |
| 227 str(u) for u in self._test_wildcard_iterator( |
| 228 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris()) |
| 229 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 230 |
| 231 def testPostRecursiveWildcard(self): |
| 232 """Tests that wildcard containing ** followed by an additional wildcard work
s""" |
| 233 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name( |
| 234 'nested1/nested2/xyz2'))]) |
| 235 actual_obj_uri_strs = set( |
| 236 str(u) for u in self._test_wildcard_iterator( |
| 237 '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris()) |
| 238 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs) |
| 239 |
| 240 def testCallingGetKeyOnProviderOnlyWildcardIteration(self): |
| 241 """Tests that attempting iterating provider-only wildcard raises""" |
| 242 try: |
| 243 from gslib.bucket_listing_ref import BucketListingRefException |
| 244 for iter_result in wildcard_iterator.wildcard_iterator( |
| 245 'gs://', ProjectIdHandler(), |
| 246 bucket_storage_uri_class=self.mock_bucket_storage_uri): |
| 247 iter_result.GetKey() |
| 248 self.fail('Expected BucketListingRefException not raised.') |
| 249 except BucketListingRefException, e: |
| 250 self.assertTrue(str(e).find( |
| 251 'Attempt to call GetKey() on Key-less BucketListingRef') != -1) |
| 252 |
| 253 |
| 254 class FileIteratorTests(testcase.GsUtilUnitTestCase): |
| 255 """FileWildcardIterator test suite""" |
| 256 |
| 257 def setUp(self): |
| 258 """ |
| 259 Creates a test dir containing 3 files and one nested subdirectory + file. |
| 260 """ |
| 261 super(FileIteratorTests, self).setUp() |
| 262 |
| 263 self.test_dir = self.CreateTempDir(test_files=[ |
| 264 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')]) |
| 265 |
| 266 self.root_files_uri_strs = set([ |
| 267 suri(self.test_dir, 'abcd'), |
| 268 suri(self.test_dir, 'abdd'), |
| 269 suri(self.test_dir, 'ade$')]) |
| 270 |
| 271 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')]) |
| 272 |
| 273 self.nested_files_uri_strs = set([ |
| 274 suri(self.test_dir, 'dir1', 'dir2', 'zzz')]) |
| 275 |
| 276 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs |
| 277 self.all_file_uri_strs = ( |
| 278 self.root_files_uri_strs | self.nested_files_uri_strs) |
| 279 |
| 280 def testContainsWildcard(self): |
| 281 """Tests ContainsWildcard call""" |
| 282 self.assertTrue(ContainsWildcard('a*.txt')) |
| 283 self.assertTrue(ContainsWildcard('a[0-9].txt')) |
| 284 self.assertFalse(ContainsWildcard('0-9.txt')) |
| 285 self.assertTrue(ContainsWildcard('?.txt')) |
| 286 |
| 287 def testNoOpDirectoryIterator(self): |
| 288 """Tests that directory-only URI iterates just that one URI""" |
| 289 results = list( |
| 290 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterUris()) |
| 291 self.assertEqual(1, len(results)) |
| 292 self.assertEqual(suri(tempfile.tempdir), str(results[0])) |
| 293 |
| 294 def testMatchingAllFiles(self): |
| 295 """Tests matching all files, based on wildcard""" |
| 296 uri = self._test_storage_uri(suri(self.test_dir, '*')) |
| 297 actual_uri_strs = set(str(u) for u in |
| 298 self._test_wildcard_iterator(uri).IterUris() |
| 299 ) |
| 300 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs) |
| 301 |
| 302 def testMatchingFileSubset(self): |
| 303 """Tests matching a subset of files, based on wildcard""" |
| 304 exp_uri_strs = set( |
| 305 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')]) |
| 306 uri = self._test_storage_uri(suri(self.test_dir, 'ab??')) |
| 307 actual_uri_strs = set(str(u) for u in |
| 308 self._test_wildcard_iterator(uri).IterUris() |
| 309 ) |
| 310 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 311 |
| 312 def testMatchingNonWildcardedUri(self): |
| 313 """Tests matching a single named file""" |
| 314 exp_uri_strs = set([suri(self.test_dir, 'abcd')]) |
| 315 uri = self._test_storage_uri(suri(self.test_dir, 'abcd')) |
| 316 actual_uri_strs = set( |
| 317 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) |
| 318 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 319 |
| 320 def testMatchingFilesIgnoringOtherRegexChars(self): |
| 321 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)""" |
| 322 |
| 323 exp_uri_strs = set([suri(self.test_dir, 'ade$')]) |
| 324 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$')) |
| 325 actual_uri_strs = set( |
| 326 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) |
| 327 self.assertEqual(exp_uri_strs, actual_uri_strs) |
| 328 |
| 329 def testRecursiveDirectoryOnlyWildcarding(self): |
| 330 """Tests recursive expansion of directory-only '**' wildcard""" |
| 331 uri = self._test_storage_uri(suri(self.test_dir, '**')) |
| 332 actual_uri_strs = set( |
| 333 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) |
| 334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 335 |
| 336 def testRecursiveDirectoryPlusFileWildcarding(self): |
| 337 """Tests recursive expansion of '**' directory plus '*' wildcard""" |
| 338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*')) |
| 339 actual_uri_strs = set( |
| 340 str(u) for u in self._test_wildcard_iterator(uri).IterUris()) |
| 341 self.assertEqual(self.all_file_uri_strs, actual_uri_strs) |
| 342 |
| 343 def testInvalidRecursiveDirectoryWildcard(self): |
| 344 """Tests that wildcard containing '***' raises exception""" |
| 345 try: |
| 346 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd')) |
| 347 for unused_ in self._test_wildcard_iterator(uri).IterUris(): |
| 348 self.fail('Expected WildcardException not raised.') |
| 349 except wildcard_iterator.WildcardException, e: |
| 350 # Expected behavior. |
| 351 self.assertTrue(str(e).find('more than 2 consecutive') != -1) |
| 352 |
| 353 def testMissingDir(self): |
| 354 """Tests that wildcard gets empty iterator when directory doesn't exist""" |
| 355 res = list( |
| 356 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterUris()) |
| 357 self.assertEqual(0, len(res)) |
| 358 |
| 359 def testExistingDirNoFileMatch(self): |
| 360 """Tests that wildcard returns empty iterator when there's no match""" |
| 361 uri = self._test_storage_uri( |
| 362 suri(self.test_dir, 'non_existent*')) |
| 363 res = list(self._test_wildcard_iterator(uri).IterUris()) |
| 364 self.assertEqual(0, len(res)) |
OLD | NEW |