Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import re | |
| 6 | |
| 7 class SubversionDataSource(object): | |
|
not at google - send to devlin
2012/07/11 00:35:09
How about just putting a list-files method on the
cduvall
2012/07/11 20:56:30
Done.
| |
| 8 """This class gets files and lists directories from subversion. | |
| 9 """ | |
| 10 def __init__(self, cache_builder, base_path, ignore_path): | |
| 11 self._dir_cache = cache_builder.build(self._ListFilesInDirectory) | |
| 12 self._file_cache = cache_builder.build(lambda x: x) | |
| 13 self._base_path = base_path | |
| 14 self._ignore_path = ignore_path | |
| 15 | |
| 16 def _GetDirFromPage(self, page): | |
| 17 page_dir = re.search('<title>.* (.*)</title>', page).group(1) | |
|
not at google - send to devlin
2012/07/11 00:35:09
Could we try using an HTML parser here rather than
Aaron Boodman
2012/07/11 02:54:51
The directory listing page looks like valid xml to
not at google - send to devlin
2012/07/11 07:23:58
I think there's actually an HTML parser in python?
| |
| 18 if self._ignore_path: | |
| 19 page_dir = page_dir.split(self._ignore_path)[-1] | |
| 20 return page_dir | |
| 21 | |
| 22 def _GetFilesFromPage(self, page): | |
| 23 dir_contents = re.findall('<a.*>(.*)</a>', page) | |
| 24 if '..' in dir_contents: | |
| 25 dir_contents.remove('..') | |
| 26 return dir_contents | |
| 27 | |
| 28 def _ListFilesInDirectory(self, directory, add_dir=False): | |
| 29 dir_contents = self._GetFilesFromPage(directory) | |
| 30 file_list = filter(lambda x: not x.endswith('/'), dir_contents) | |
| 31 for file_name in dir_contents: | |
| 32 if file_name.endswith('/'): | |
| 33 par_dir = self._GetDirFromPage(directory) | |
| 34 next_dir = par_dir + '/' + file_name | |
| 35 # Add the full path to each file. | |
| 36 file_list.extend(map(lambda x: file_name + x, | |
| 37 self._dir_cache.get(self._base_path + '/' + next_dir))) | |
| 38 if add_dir: | |
| 39 par_dir = self._GetDirFromPage(directory) | |
| 40 return map(lambda x: par_dir + '/' + x, file_list) | |
| 41 return file_list | |
| OLD | NEW |