Chromium Code Reviews| Index: chrome/common/extensions/docs/server2/subversion_data_source.py |
| diff --git a/chrome/common/extensions/docs/server2/subversion_data_source.py b/chrome/common/extensions/docs/server2/subversion_data_source.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..bad93c8bbc2b8dd119713407d16314e2b5405904 |
| --- /dev/null |
| +++ b/chrome/common/extensions/docs/server2/subversion_data_source.py |
| @@ -0,0 +1,41 @@ |
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import re |
| + |
| +class SubversionDataSource(object): |
|
not at google - send to devlin
2012/07/11 00:35:09
How about just putting a list-files method on the
cduvall
2012/07/11 20:56:30
Done.
|
| + """This class gets files and lists directories from subversion. |
| + """ |
| + def __init__(self, cache_builder, base_path, ignore_path): |
| + self._dir_cache = cache_builder.build(self._ListFilesInDirectory) |
| + self._file_cache = cache_builder.build(lambda x: x) |
| + self._base_path = base_path |
| + self._ignore_path = ignore_path |
| + |
| + def _GetDirFromPage(self, page): |
| + page_dir = re.search('<title>.* (.*)</title>', page).group(1) |
|
not at google - send to devlin
2012/07/11 00:35:09
Could we try using an HTML parser here rather than
Aaron Boodman
2012/07/11 02:54:51
The directory listing page looks like valid xml to
not at google - send to devlin
2012/07/11 07:23:58
I think there's actually an HTML parser in python?
|
| + if self._ignore_path: |
| + page_dir = page_dir.split(self._ignore_path)[-1] |
| + return page_dir |
| + |
| + def _GetFilesFromPage(self, page): |
| + dir_contents = re.findall('<a.*>(.*)</a>', page) |
| + if '..' in dir_contents: |
| + dir_contents.remove('..') |
| + return dir_contents |
| + |
| + def _ListFilesInDirectory(self, directory, add_dir=False): |
| + dir_contents = self._GetFilesFromPage(directory) |
| + file_list = filter(lambda x: not x.endswith('/'), dir_contents) |
| + for file_name in dir_contents: |
| + if file_name.endswith('/'): |
| + par_dir = self._GetDirFromPage(directory) |
| + next_dir = par_dir + '/' + file_name |
| + # Add the full path to each file. |
| + file_list.extend(map(lambda x: file_name + x, |
| + self._dir_cache.get(self._base_path + '/' + next_dir))) |
| + if add_dir: |
| + par_dir = self._GetDirFromPage(directory) |
| + return map(lambda x: par_dir + '/' + x, file_list) |
| + return file_list |