Index: chrome/common/extensions/docs/server2/subversion_file_system.py |
diff --git a/chrome/common/extensions/docs/server2/subversion_file_system.py b/chrome/common/extensions/docs/server2/subversion_file_system.py |
index b5d26429776543cff0e0a62ed5ec4791ec4d89dc..019c8e87abdfed3d8b96cdc2b36e7c2903b949b1 100644 |
--- a/chrome/common/extensions/docs/server2/subversion_file_system.py |
+++ b/chrome/common/extensions/docs/server2/subversion_file_system.py |
@@ -4,6 +4,7 @@ |
import re |
import xml.dom.minidom as xml |
+from xml.parsers.expat import ExpatError |
import file_system |
from future import Future |
@@ -11,16 +12,57 @@ from future import Future |
class SubversionFileSystem(file_system.FileSystem): |
"""Class to fetch resources from src.chromium.org. |
""" |
- def __init__(self, fetcher): |
+ def __init__(self, fetcher, stat_fetcher): |
self._fetcher = fetcher |
+ self._stat_fetcher = stat_fetcher |
def Read(self, paths, binary=False): |
return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
+ def _ParseHTML(self, html): |
+ """Unfortunately, the viewvc page has a stray </div> tag, so this takes care |
+ of all mismatched tags. |
+ """ |
+ try: |
+ return xml.parseString(html) |
+ except ExpatError as e: |
+ new_html = [] |
+ for lineno, line in enumerate(html.split('\n')): |
+ if e.lineno != lineno + 1: |
+ new_html.append(line) |
+ return self._ParseHTML('\n'.join(new_html)) |
+ |
+ def _CreateStatInfo(self, html): |
+ dom = self._ParseHTML(html) |
+ # Brace yourself, this is about to get ugly. The page returned from viewvc |
+ # was not the prettiest. |
not at google - send to devlin
2012/08/10 06:42:24
heh. Let's hope it doesn't change; but if it does,
|
+ tds = dom.getElementsByTagName('td') |
+ a_list = [] |
+ found = False |
+ dir_revision = None |
+ for td in tds: |
+ if found: |
+ dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue |
+ found = False |
+ a_list.extend(td.getElementsByTagName('a')) |
+ if (td.firstChild is not None and |
+ td.firstChild.nodeValue == 'Directory revision:'): |
+ found = True |
+ child_revisions = {} |
+ for i in range(len(a_list)): |
+ name = a_list[i].getAttribute('name') |
+ if name: |
+ rev = a_list[i + 1].getElementsByTagName('strong')[0] |
+ if 'file' in a_list[i + 1].getAttribute('title'): |
+ child_revisions[name] = rev.firstChild.nodeValue |
+ else: |
+ child_revisions[name + '/'] = rev.firstChild.nodeValue |
+ return self.StatInfo(dir_revision, child_revisions) |
+ |
def Stat(self, path): |
directory = path.rsplit('/', 1)[0] |
- dir_html = self._fetcher.Fetch(directory + '/').content |
- return self.StatInfo(int(re.search('([0-9]+)', dir_html).group(0))) |
+ dir_html = self._stat_fetcher.Fetch(directory + '/').content |
+ return self._CreateStatInfo(dir_html) |
class _AsyncFetchFuture(object): |
def __init__(self, paths, fetcher, binary): |