Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(521)

Side by Side Diff: third_party/gsutil/gslib/tests/test_wildcard_iterator.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2010 Google Inc. All Rights Reserved.
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish, dis-
7 # tribute, sublicense, and/or sell copies of the Software, and to permit
8 # persons to whom the Software is furnished to do so, subject to the fol-
9 # lowing conditions:
10 #
11 # The above copyright notice and this permission notice shall be included
12 # in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
21
22 """Unit tests for gslib wildcard_iterator"""
23
24 import os.path
25 import tempfile
26
27 from boto import InvalidUriError
28
29 from gslib import wildcard_iterator
30 from gslib.project_id import ProjectIdHandler
31 import gslib.tests.testcase as testcase
32 from gslib.wildcard_iterator import ContainsWildcard
33 from gslib.tests.util import ObjectToURI as suri
34
35
36 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase):
37 """CloudWildcardIterator test suite"""
38
39 def setUp(self):
40 """Creates 2 mock buckets, each containing 4 objects, including 1 nested."""
41 super(CloudWildcardIteratorTests, self).setUp()
42 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$']
43 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1',
44 'nested1/nested2/xyz2', 'nested1/nfile_abc']
45
46 self.base_bucket_uri = self.CreateBucket()
47 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61]
48 self.base_uri_str = suri(self.base_bucket_uri)
49 self.base_uri_str = self.base_uri_str.replace(
50 self.base_bucket_uri.bucket_name, self.prefix_bucket_name)
51
52 self.test_bucket0_uri = self.CreateBucket(
53 bucket_name='%s0' % self.prefix_bucket_name)
54 self.test_bucket0_obj_uri_strs = set()
55 for obj_name in self.all_obj_names:
56 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri,
57 object_name=obj_name, contents='')
58 self.test_bucket0_obj_uri_strs.add(suri(obj_uri))
59
60 self.test_bucket1_uri = self.CreateBucket(
61 bucket_name='%s1' % self.prefix_bucket_name)
62 self.test_bucket1_obj_uri_strs = set()
63 for obj_name in self.all_obj_names:
64 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri,
65 object_name=obj_name, contents='')
66 self.test_bucket1_obj_uri_strs.add(suri(obj_uri))
67
68 def testNoOpObjectIterator(self):
69 """Tests that bucket-only URI iterates just that one URI"""
70 results = list(
71 self._test_wildcard_iterator(self.test_bucket0_uri).IterUris())
72 self.assertEqual(1, len(results))
73 self.assertEqual(str(self.test_bucket0_uri), str(results[0]))
74
75 def testMatchingAllObjects(self):
76 """Tests matching all objects, based on wildcard"""
77 actual_obj_uri_strs = set(
78 str(u) for u in self._test_wildcard_iterator(
79 self.test_bucket0_uri.clone_replace_name('**')).IterUris())
80 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs)
81
82 def testMatchingObjectSubset(self):
83 """Tests matching a subset of objects, based on wildcard"""
84 exp_obj_uri_strs = set(
85 [str(self.test_bucket0_uri.clone_replace_name('abcd')),
86 str(self.test_bucket0_uri.clone_replace_name('abdd'))])
87 actual_obj_uri_strs = set(
88 str(u) for u in self._test_wildcard_iterator(
89 self.test_bucket0_uri.clone_replace_name('ab??')).IterUris())
90 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
91
92 def testMatchingNonWildcardedUri(self):
93 """Tests matching a single named object"""
94 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd')
95 )])
96 actual_obj_uri_strs = set(
97 str(u) for u in self._test_wildcard_iterator(
98 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
99 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
100
101 def testWildcardedObjectUriWithVsWithoutPrefix(self):
102 """Tests that wildcarding w/ and w/o server prefix get same result"""
103 # (It's just more efficient to query w/o a prefix; wildcard
104 # iterator will filter the matches either way.)
105 with_prefix_uri_strs = set(
106 str(u) for u in self._test_wildcard_iterator(
107 self.test_bucket0_uri.clone_replace_name('abcd')).IterUris())
108 # By including a wildcard at the start of the string no prefix can be
109 # used in server request.
110 no_prefix_uri_strs = set(
111 str(u) for u in self._test_wildcard_iterator(
112 self.test_bucket0_uri.clone_replace_name('?bcd')).IterUris())
113 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs)
114
115 def testWildcardedObjectUriNestedSubdirMatch(self):
116 """Tests wildcarding with a nested subdir"""
117 uri_strs = set()
118 prefixes = set()
119 for blr in self._test_wildcard_iterator(
120 self.test_bucket0_uri.clone_replace_name('*')):
121 if blr.HasPrefix():
122 prefixes.add(blr.GetPrefix().name)
123 else:
124 uri_strs.add(blr.GetUri().uri)
125 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x)
126 for x in self.immed_child_obj_names])
127 self.assertEqual(exp_obj_uri_strs, uri_strs)
128 self.assertEqual(1, len(prefixes))
129 self.assertTrue('nested1/' in prefixes)
130
131 def testWildcardedObjectUriNestedSubSubdirMatch(self):
132 """Tests wildcarding with a nested sub-subdir"""
133 for final_char in ('', '/'):
134 uri_strs = set()
135 prefixes = set()
136 for blr in self._test_wildcard_iterator(
137 self.test_bucket0_uri.clone_replace_name('nested1/*%s' % final_char)):
138 if blr.HasPrefix():
139 prefixes.add(blr.GetPrefix().name)
140 else:
141 uri_strs.add(blr.GetUri().uri)
142 self.assertEqual(1, len(uri_strs))
143 self.assertEqual(1, len(prefixes))
144 self.assertTrue('nested1/nested2/' in prefixes)
145
146 def testWildcardPlusSubdirMatch(self):
147 """Tests gs://bucket/*/subdir matching"""
148 actual_uri_strs = set()
149 actual_prefixes = set()
150 for blr in self._test_wildcard_iterator(
151 self.test_bucket0_uri.clone_replace_name('*/nested1')):
152 if blr.HasPrefix():
153 actual_prefixes.add(blr.GetPrefix().name)
154 else:
155 actual_uri_strs.add(blr.GetUri().uri)
156 expected_uri_strs = set()
157 expected_prefixes = set(['nested1/'])
158 self.assertEqual(expected_prefixes, actual_prefixes)
159 self.assertEqual(expected_uri_strs, actual_uri_strs)
160
161 def testWildcardPlusSubdirSubdirMatch(self):
162 """Tests gs://bucket/*/subdir/* matching"""
163 actual_uri_strs = set()
164 actual_prefixes = set()
165 for blr in self._test_wildcard_iterator(
166 self.test_bucket0_uri.clone_replace_name('*/nested2/*')):
167 if blr.HasPrefix():
168 actual_prefixes.add(blr.GetPrefix().name)
169 else:
170 actual_uri_strs.add(blr.GetUri().uri)
171 expected_uri_strs = set([
172 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri,
173 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri])
174 expected_prefixes = set()
175 self.assertEqual(expected_prefixes, actual_prefixes)
176 self.assertEqual(expected_uri_strs, actual_uri_strs)
177
178 def testNoMatchingWildcardedObjectUri(self):
179 """Tests that get back an empty iterator for non-matching wildcarded URI"""
180 res = list(self._test_wildcard_iterator(
181 self.test_bucket0_uri.clone_replace_name('*x0')).IterUris())
182 self.assertEqual(0, len(res))
183
184 def testWildcardedInvalidObjectUri(self):
185 """Tests that we raise an exception for wildcarded invalid URI"""
186 try:
187 for unused_ in self._test_wildcard_iterator(
188 'badscheme://asdf').IterUris():
189 self.assertFalse('Expected InvalidUriError not raised.')
190 except InvalidUriError, e:
191 # Expected behavior.
192 self.assertTrue(e.message.find('Unrecognized scheme') != -1)
193
194 def testSingleMatchWildcardedBucketUri(self):
195 """Tests matching a single bucket based on a wildcarded bucket URI"""
196 exp_obj_uri_strs = set([
197 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
198 actual_obj_uri_strs = set(
199 str(u) for u in self._test_wildcard_iterator(
200 '%s*1' % self.base_uri_str).IterUris())
201 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
202
203 def testMultiMatchWildcardedBucketUri(self):
204 """Tests matching a multiple buckets based on a wildcarded bucket URI"""
205 exp_obj_uri_strs = set([
206 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim,
207 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
208 actual_obj_uri_strs = set(
209 str(u) for u in self._test_wildcard_iterator(
210 '%s*' % self.base_uri_str).IterUris())
211 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
212
213 def testWildcardBucketAndObjectUri(self):
214 """Tests matching with both bucket and object wildcards"""
215 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
216 'abcd'))])
217 actual_obj_uri_strs = set(
218 str(u) for u in self._test_wildcard_iterator(
219 '%s0*/abc*' % self.base_uri_str).IterUris())
220 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
221
222 def testWildcardUpToFinalCharSubdirPlusObjectName(self):
223 """Tests wildcard subd*r/obj name"""
224 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
225 'nested1/nested2/xyz1'))])
226 actual_obj_uri_strs = set(
227 str(u) for u in self._test_wildcard_iterator(
228 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterUris())
229 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
230
231 def testPostRecursiveWildcard(self):
232 """Tests that wildcard containing ** followed by an additional wildcard work s"""
233 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
234 'nested1/nested2/xyz2'))])
235 actual_obj_uri_strs = set(
236 str(u) for u in self._test_wildcard_iterator(
237 '%s**/*y*2' % self.test_bucket0_uri.uri).IterUris())
238 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
239
240 def testCallingGetKeyOnProviderOnlyWildcardIteration(self):
241 """Tests that attempting iterating provider-only wildcard raises"""
242 try:
243 from gslib.bucket_listing_ref import BucketListingRefException
244 for iter_result in wildcard_iterator.wildcard_iterator(
245 'gs://', ProjectIdHandler(),
246 bucket_storage_uri_class=self.mock_bucket_storage_uri):
247 iter_result.GetKey()
248 self.fail('Expected BucketListingRefException not raised.')
249 except BucketListingRefException, e:
250 self.assertTrue(str(e).find(
251 'Attempt to call GetKey() on Key-less BucketListingRef') != -1)
252
253
254 class FileIteratorTests(testcase.GsUtilUnitTestCase):
255 """FileWildcardIterator test suite"""
256
257 def setUp(self):
258 """
259 Creates a test dir containing 3 files and one nested subdirectory + file.
260 """
261 super(FileIteratorTests, self).setUp()
262
263 self.test_dir = self.CreateTempDir(test_files=[
264 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')])
265
266 self.root_files_uri_strs = set([
267 suri(self.test_dir, 'abcd'),
268 suri(self.test_dir, 'abdd'),
269 suri(self.test_dir, 'ade$')])
270
271 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')])
272
273 self.nested_files_uri_strs = set([
274 suri(self.test_dir, 'dir1', 'dir2', 'zzz')])
275
276 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs
277 self.all_file_uri_strs = (
278 self.root_files_uri_strs | self.nested_files_uri_strs)
279
280 def testContainsWildcard(self):
281 """Tests ContainsWildcard call"""
282 self.assertTrue(ContainsWildcard('a*.txt'))
283 self.assertTrue(ContainsWildcard('a[0-9].txt'))
284 self.assertFalse(ContainsWildcard('0-9.txt'))
285 self.assertTrue(ContainsWildcard('?.txt'))
286
287 def testNoOpDirectoryIterator(self):
288 """Tests that directory-only URI iterates just that one URI"""
289 results = list(
290 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterUris())
291 self.assertEqual(1, len(results))
292 self.assertEqual(suri(tempfile.tempdir), str(results[0]))
293
294 def testMatchingAllFiles(self):
295 """Tests matching all files, based on wildcard"""
296 uri = self._test_storage_uri(suri(self.test_dir, '*'))
297 actual_uri_strs = set(str(u) for u in
298 self._test_wildcard_iterator(uri).IterUris()
299 )
300 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs)
301
302 def testMatchingFileSubset(self):
303 """Tests matching a subset of files, based on wildcard"""
304 exp_uri_strs = set(
305 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')])
306 uri = self._test_storage_uri(suri(self.test_dir, 'ab??'))
307 actual_uri_strs = set(str(u) for u in
308 self._test_wildcard_iterator(uri).IterUris()
309 )
310 self.assertEqual(exp_uri_strs, actual_uri_strs)
311
312 def testMatchingNonWildcardedUri(self):
313 """Tests matching a single named file"""
314 exp_uri_strs = set([suri(self.test_dir, 'abcd')])
315 uri = self._test_storage_uri(suri(self.test_dir, 'abcd'))
316 actual_uri_strs = set(
317 str(u) for u in self._test_wildcard_iterator(uri).IterUris())
318 self.assertEqual(exp_uri_strs, actual_uri_strs)
319
320 def testMatchingFilesIgnoringOtherRegexChars(self):
321 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)"""
322
323 exp_uri_strs = set([suri(self.test_dir, 'ade$')])
324 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$'))
325 actual_uri_strs = set(
326 str(u) for u in self._test_wildcard_iterator(uri).IterUris())
327 self.assertEqual(exp_uri_strs, actual_uri_strs)
328
329 def testRecursiveDirectoryOnlyWildcarding(self):
330 """Tests recursive expansion of directory-only '**' wildcard"""
331 uri = self._test_storage_uri(suri(self.test_dir, '**'))
332 actual_uri_strs = set(
333 str(u) for u in self._test_wildcard_iterator(uri).IterUris())
334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
335
336 def testRecursiveDirectoryPlusFileWildcarding(self):
337 """Tests recursive expansion of '**' directory plus '*' wildcard"""
338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*'))
339 actual_uri_strs = set(
340 str(u) for u in self._test_wildcard_iterator(uri).IterUris())
341 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
342
343 def testInvalidRecursiveDirectoryWildcard(self):
344 """Tests that wildcard containing '***' raises exception"""
345 try:
346 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd'))
347 for unused_ in self._test_wildcard_iterator(uri).IterUris():
348 self.fail('Expected WildcardException not raised.')
349 except wildcard_iterator.WildcardException, e:
350 # Expected behavior.
351 self.assertTrue(str(e).find('more than 2 consecutive') != -1)
352
353 def testMissingDir(self):
354 """Tests that wildcard gets empty iterator when directory doesn't exist"""
355 res = list(
356 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterUris())
357 self.assertEqual(0, len(res))
358
359 def testExistingDirNoFileMatch(self):
360 """Tests that wildcard returns empty iterator when there's no match"""
361 uri = self._test_storage_uri(
362 suri(self.test_dir, 'non_existent*'))
363 res = list(self._test_wildcard_iterator(uri).IterUris())
364 self.assertEqual(0, len(res))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698