Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: chrome/common/extensions/docs/server2/intro_data_source.py

Issue 10810047: Extensions Docs Server: HTML parser in IDS (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: No HTML in toc and only take first h1 Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | chrome/common/extensions/docs/server2/templates/private/table_of_contents.html » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import re 5 import logging
6
7 from HTMLParser import HTMLParser
8
6 from path_utils import FormatKey 9 from path_utils import FormatKey
7 from third_party.handlebar import Handlebar 10 from third_party.handlebar import Handlebar
8 11
12 class _IntroParser(HTMLParser):
13 """ An HTML parser which will parse table of contents and page title info out
14 of an intro.
15 """
16 def __init__(self):
17 HTMLParser.__init__(self)
18 self.toc = []
19 self.page_title = None
20 self._recent_tag = None
21 self._current_heading = {}
22
23 def handle_starttag(self, tag, attrs):
24 id_ = ''
25 if tag not in ['h1', 'h2', 'h3']:
26 return
27 if tag != 'h1' or self.page_title is None:
28 self._recent_tag = tag
29 for attr in attrs:
30 if attr[0] == 'id':
31 id_ = attr[1]
32 if tag == 'h2':
33 self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
34 self.toc.append(self._current_heading)
35 elif tag == 'h3':
36 self._current_heading = { 'link': id_, 'title': '' }
37 self.toc[-1]['subheadings'].append(self._current_heading)
38
39 def handle_endtag(self, tag):
40 if tag in ['h1', 'h2', 'h3']:
41 self._recent_tag = None
42
43 def handle_data(self, data):
44 if self._recent_tag is None:
45 return
46 if self._recent_tag == 'h1':
47 if self.page_title is None:
48 self.page_title = data
49 else:
50 self.page_title += data
51 elif self._recent_tag in ['h2', 'h3']:
52 self._current_heading['title'] += data
53
9 class IntroDataSource(object): 54 class IntroDataSource(object):
10 """This class fetches the intros for a given API. From this intro, a table 55 """This class fetches the intros for a given API. From this intro, a table
11 of contents dictionary is created, which contains the headings in the intro. 56 of contents dictionary is created, which contains the headings in the intro.
12 """ 57 """
13 def __init__(self, cache_builder, base_paths): 58 def __init__(self, cache_builder, base_paths):
14 self._cache = cache_builder.build(self._MakeIntroDict) 59 self._cache = cache_builder.build(self._MakeIntroDict)
15 self._base_paths = base_paths 60 self._base_paths = base_paths
16 61
17 def _MakeIntroDict(self, intro): 62 def _MakeIntroDict(self, intro):
18 h1s = re.findall('<h1.*>(.+)</h1>', intro) 63 try:
19 if len(h1s) > 0: 64 parser = _IntroParser()
20 page_title = h1s[0] 65 parser.feed(intro)
21 else: 66 return {
22 page_title = '' 67 'intro': Handlebar(intro),
23 headings = re.findall('<h([23]) id\="(.+)">(.+)</h[23]>', intro) 68 'toc': parser.toc,
24 toc = [] 69 'title': parser.page_title
25 for heading in headings: 70 }
26 level, link, title = heading 71 except Exception as e:
27 if level == '2': 72 logging.info(e)
28 toc.append({ 'link': link, 'title': title, 'subheadings': [] })
29 else:
30 toc[-1]['subheadings'].append({ 'link': link, 'title': title })
31 return { 'intro': Handlebar(intro), 'toc': toc , 'title': page_title }
32 73
33 def __getitem__(self, key): 74 def __getitem__(self, key):
34 return self.get(key) 75 return self.get(key)
35 76
36 def get(self, key): 77 def get(self, key):
37 real_path = FormatKey(key) 78 real_path = FormatKey(key)
38 for base_path in self._base_paths: 79 for base_path in self._base_paths:
39 try: 80 try:
40 return self._cache.GetFromFile(base_path + '/' + real_path) 81 return self._cache.GetFromFile(base_path + '/' + real_path)
41 except Exception: 82 except Exception:
42 pass 83 pass
43 return None 84 return None
OLDNEW
« no previous file with comments | « no previous file | chrome/common/extensions/docs/server2/templates/private/table_of_contents.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698