OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import re | 5 import re |
6 import xml.dom.minidom as xml | 6 import xml.dom.minidom as xml |
| 7 from xml.parsers.expat import ExpatError |
7 | 8 |
8 import file_system | 9 import file_system |
9 from future import Future | 10 from future import Future |
10 | 11 |
11 class SubversionFileSystem(file_system.FileSystem): | 12 class SubversionFileSystem(file_system.FileSystem): |
12 """Class to fetch resources from src.chromium.org. | 13 """Class to fetch resources from src.chromium.org. |
13 """ | 14 """ |
14 def __init__(self, fetcher): | 15 def __init__(self, fetcher, stat_fetcher): |
15 self._fetcher = fetcher | 16 self._fetcher = fetcher |
| 17 self._stat_fetcher = stat_fetcher |
16 | 18 |
17 def Read(self, paths, binary=False): | 19 def Read(self, paths, binary=False): |
18 return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) | 20 return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) |
19 | 21 |
| 22 def _ParseHTML(self, html): |
| 23 """Unfortunately, the viewvc page has a stray </div> tag, so this takes care |
| 24 of all mismatched tags. |
| 25 """ |
| 26 try: |
| 27 return xml.parseString(html) |
| 28 except ExpatError as e: |
| 29 return self._ParseHTML('\n'.join( |
| 30 line for (i, line) in enumerate(html.split('\n')) |
| 31 if e.lineno != i + 1)) |
| 32 |
| 33 def _CreateStatInfo(self, html): |
| 34 dom = self._ParseHTML(html) |
| 35 # Brace yourself, this is about to get ugly. The page returned from viewvc |
| 36 # was not the prettiest. |
| 37 tds = dom.getElementsByTagName('td') |
| 38 a_list = [] |
| 39 found = False |
| 40 dir_revision = None |
| 41 for td in tds: |
| 42 if found: |
| 43 dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue |
| 44 found = False |
| 45 a_list.extend(td.getElementsByTagName('a')) |
| 46 if (td.firstChild is not None and |
| 47 td.firstChild.nodeValue == 'Directory revision:'): |
| 48 found = True |
| 49 child_revisions = {} |
| 50 for i, a in enumerate(a_list): |
| 51 if i + 1 >= len(a_list): |
| 52 break |
| 53 next_a = a_list[i + 1] |
| 54 name = a.getAttribute('name') |
| 55 if name: |
| 56 rev = next_a.getElementsByTagName('strong')[0] |
| 57 if 'file' in next_a.getAttribute('title'): |
| 58 child_revisions[name] = rev.firstChild.nodeValue |
| 59 else: |
| 60 child_revisions[name + '/'] = rev.firstChild.nodeValue |
| 61 return self.StatInfo(dir_revision, child_revisions) |
| 62 |
20 def Stat(self, path): | 63 def Stat(self, path): |
21 directory = path.rsplit('/', 1)[0] | 64 directory = path.rsplit('/', 1)[0] |
22 result = self._fetcher.Fetch(directory + '/') | 65 result = self._stat_fetcher.Fetch(directory + '/') |
23 if result.status_code == 404: | 66 if result.status_code == 404: |
24 raise file_system.FileNotFoundError(path) | 67 raise file_system.FileNotFoundError(path) |
25 return self.StatInfo(int(re.search('([0-9]+)', result.content).group(0))) | 68 stat_info = self._CreateStatInfo(result.content) |
| 69 if not path.endswith('/'): |
| 70 filename = path.rsplit('/', 1)[-1] |
| 71 if filename not in stat_info.child_versions: |
| 72 raise file_system.FileNotFoundError(path) |
| 73 stat_info.version = stat_info.child_versions[filename] |
| 74 return stat_info |
26 | 75 |
27 class _AsyncFetchFuture(object): | 76 class _AsyncFetchFuture(object): |
28 def __init__(self, paths, fetcher, binary): | 77 def __init__(self, paths, fetcher, binary): |
29 # A list of tuples of the form (path, Future). | 78 # A list of tuples of the form (path, Future). |
30 self._fetches = [] | 79 self._fetches = [] |
31 self._value = {} | 80 self._value = {} |
32 self._error = None | 81 self._error = None |
33 self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths] | 82 self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths] |
34 self._binary = binary | 83 self._binary = binary |
35 | 84 |
(...skipping 12 matching lines...) Expand all Loading... |
48 elif path.endswith('/'): | 97 elif path.endswith('/'): |
49 self._value[path] = self._ListDir(result.content) | 98 self._value[path] = self._ListDir(result.content) |
50 elif not self._binary: | 99 elif not self._binary: |
51 self._value[path] = file_system._ProcessFileData(result.content, path) | 100 self._value[path] = file_system._ProcessFileData(result.content, path) |
52 else: | 101 else: |
53 self._value[path] = result.content | 102 self._value[path] = result.content |
54 if self._error is not None: | 103 if self._error is not None: |
55 raise self._error | 104 raise self._error |
56 return self._value | 105 return self._value |
57 | 106 |
OLD | NEW |