OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | |
6 | |
7 from HTMLParser import HTMLParser | 5 from HTMLParser import HTMLParser |
| 6 import re |
8 | 7 |
9 from docs_server_utils import FormatKey | 8 from docs_server_utils import FormatKey |
10 from third_party.handlebar import Handlebar | 9 from third_party.handlebar import Handlebar |
11 | 10 |
12 class _IntroParser(HTMLParser): | 11 class _IntroParser(HTMLParser): |
13 """ An HTML parser which will parse table of contents and page title info out | 12 """ An HTML parser which will parse table of contents and page title info out |
14 of an intro. | 13 of an intro. |
15 """ | 14 """ |
16 def __init__(self): | 15 def __init__(self): |
17 HTMLParser.__init__(self) | 16 HTMLParser.__init__(self) |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 elif self._recent_tag in ['h2', 'h3']: | 50 elif self._recent_tag in ['h2', 'h3']: |
52 self._current_heading['title'] += data | 51 self._current_heading['title'] += data |
53 | 52 |
54 class IntroDataSource(object): | 53 class IntroDataSource(object): |
55 """This class fetches the intros for a given API. From this intro, a table | 54 """This class fetches the intros for a given API. From this intro, a table |
56 of contents dictionary is created, which contains the headings in the intro. | 55 of contents dictionary is created, which contains the headings in the intro. |
57 """ | 56 """ |
58 def __init__(self, cache_builder, base_paths): | 57 def __init__(self, cache_builder, base_paths): |
59 self._cache = cache_builder.build(self._MakeIntroDict) | 58 self._cache = cache_builder.build(self._MakeIntroDict) |
60 self._base_paths = base_paths | 59 self._base_paths = base_paths |
| 60 self._intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL) |
61 | 61 |
62 def _MakeIntroDict(self, intro): | 62 def _MakeIntroDict(self, intro): |
63 parser = _IntroParser() | 63 parser = _IntroParser() |
64 parser.feed(intro) | 64 parser.feed(intro) |
| 65 intro = re.sub(self._intro_regex, '', intro, count=1) |
65 return { | 66 return { |
66 'intro': Handlebar(intro), | 67 'intro': Handlebar(intro), |
67 'toc': parser.toc, | 68 'toc': parser.toc, |
68 'title': parser.page_title | 69 'title': parser.page_title |
69 } | 70 } |
70 | 71 |
71 def __getitem__(self, key): | 72 def __getitem__(self, key): |
72 return self.get(key) | 73 return self.get(key) |
73 | 74 |
74 def get(self, key): | 75 def get(self, key): |
75 real_path = FormatKey(key) | 76 real_path = FormatKey(key) |
76 for base_path in self._base_paths: | 77 for base_path in self._base_paths: |
77 try: | 78 try: |
78 return self._cache.GetFromFile(base_path + '/' + real_path) | 79 return self._cache.GetFromFile(base_path + '/' + real_path) |
79 except Exception: | 80 except Exception: |
80 pass | 81 pass |
81 return None | 82 return None |
OLD | NEW |