Index: chrome/common/extensions/docs/server2/subversion_file_system.py |
diff --git a/chrome/common/extensions/docs/server2/subversion_file_system.py b/chrome/common/extensions/docs/server2/subversion_file_system.py |
index a27d788ddfb66e7e47402911586c56741d5d2f05..0d3113df76886c3083f17b67319a18e4e5f3163b 100644 |
--- a/chrome/common/extensions/docs/server2/subversion_file_system.py |
+++ b/chrome/common/extensions/docs/server2/subversion_file_system.py |
@@ -4,6 +4,7 @@ |
import re |
import xml.dom.minidom as xml |
+from xml.parsers.expat import ExpatError |
import file_system |
from future import Future |
@@ -11,18 +12,64 @@ from future import Future |
class SubversionFileSystem(file_system.FileSystem): |
"""Class to fetch resources from src.chromium.org. |
""" |
- def __init__(self, fetcher): |
+ def __init__(self, fetcher, stat_fetcher): |
self._fetcher = fetcher |
+ self._stat_fetcher = stat_fetcher |
def Read(self, paths, binary=False): |
return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
+ def _ParseHTML(self, html): |
+ """Unfortunately, the viewvc page has a stray </div> tag, so this takes care |
+ of all mismatched tags. |
+ """ |
+ try: |
+ return xml.parseString(html) |
not at google - send to devlin
2012/08/13 05:34:15
Wow, it kinda-works with an XML parser? Amazing.
|
+ except ExpatError as e: |
+ new_html = [] |
+ for lineno, line in enumerate(html.split('\n')): |
+ if e.lineno != lineno + 1: |
+ new_html.append(line) |
+ return self._ParseHTML('\n'.join(new_html)) |
not at google - send to devlin
2012/08/13 05:34:15
I love a good list comprehension challenge.
retur
cduvall
2012/08/13 19:45:45
I didn't use one of the HTML parsing libraries bec
not at google - send to devlin
2012/08/13 23:02:14
No list comprehension? :(
cduvall
2012/08/14 18:15:00
Oops forgot to change it! Done :)
|
+ |
+ def _CreateStatInfo(self, html): |
+ dom = self._ParseHTML(html) |
+ # Brace yourself, this is about to get ugly. The page returned from viewvc |
+ # was not the prettiest. |
not at google - send to devlin
2012/08/13 05:34:15
yup
|
+ tds = dom.getElementsByTagName('td') |
+ a_list = [] |
+ found = False |
+ dir_revision = None |
+ for td in tds: |
+ if found: |
+ dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue |
+ found = False |
+ a_list.extend(td.getElementsByTagName('a')) |
+ if (td.firstChild is not None and |
+ td.firstChild.nodeValue == 'Directory revision:'): |
+ found = True |
+ child_revisions = {} |
+ for i in range(len(a_list)): |
not at google - send to devlin
2012/08/13 05:34:15
is there something special about a_list that means
cduvall
2012/08/13 19:45:45
No, no there isn't. That's just me being strange.
|
+ name = a_list[i].getAttribute('name') |
+ if name: |
+ rev = a_list[i + 1].getElementsByTagName('strong')[0] |
not at google - send to devlin
2012/08/13 05:34:15
so we know for sure that this won't be past the en
cduvall
2012/08/13 19:45:45
If the HTML doesn't change it won't go past the en
|
+ if 'file' in a_list[i + 1].getAttribute('title'): |
+ child_revisions[name] = rev.firstChild.nodeValue |
+ else: |
+ child_revisions[name + '/'] = rev.firstChild.nodeValue |
+ return self.StatInfo(dir_revision, child_revisions) |
+ |
def Stat(self, path): |
directory = path.rsplit('/', 1)[0] |
- result = self._fetcher.Fetch(directory + '/') |
+ result = self._stat_fetcher.Fetch(directory + '/') |
if result.status_code == 404: |
raise file_system.FileNotFoundError(path) |
- return self.StatInfo(int(re.search('([0-9]+)', result.content).group(0))) |
+ stat_info = self._CreateStatInfo(result.content) |
+ if not path.endswith('/'): |
+ filename = path.rsplit('/', 1)[-1] |
+ if filename in stat_info.child_versions: |
+ stat_info.version = stat_info.child_versions[filename] |
not at google - send to devlin
2012/08/13 05:34:15
throw FileNotFoundException if it isn't?
cduvall
2012/08/13 19:45:45
Done.
|
+ return stat_info |
class _AsyncFetchFuture(object): |
def __init__(self, paths, fetcher, binary): |