Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(396)

Side by Side Diff: chrome/common/extensions/docs/server2/intro_data_source.py

Issue 10829348: Extensions Docs Server: Large performance increase (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fixes and ObjectStore Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from HTMLParser import HTMLParser 5 from HTMLParser import HTMLParser
6 import logging
7 import re 6 import re
8 7
9 from docs_server_utils import FormatKey 8 from docs_server_utils import FormatKey
10 from file_system import FileNotFoundError 9 from file_system import FileNotFoundError
10 import file_system_cache as fs_cache
11 from third_party.handlebar import Handlebar 11 from third_party.handlebar import Handlebar
12 12
13 _intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)
not at google - send to devlin 2012/08/21 00:30:11 should be _INTRO_REGEX or _H1_REGEX
cduvall 2012/08/21 01:33:33 Done.
14
13 class _IntroParser(HTMLParser): 15 class _IntroParser(HTMLParser):
14 """ An HTML parser which will parse table of contents and page title info out 16 """ An HTML parser which will parse table of contents and page title info out
15 of an intro. 17 of an intro.
16 """ 18 """
17 def __init__(self): 19 def __init__(self):
18 HTMLParser.__init__(self) 20 HTMLParser.__init__(self)
19 self.toc = [] 21 self.toc = []
20 self.page_title = None 22 self.page_title = None
21 self._recent_tag = None 23 self._recent_tag = None
22 self._current_heading = {} 24 self._current_heading = {}
(...skipping 23 matching lines...) Expand all
46 return 48 return
47 if self._recent_tag == 'h1': 49 if self._recent_tag == 'h1':
48 if self.page_title is None: 50 if self.page_title is None:
49 self.page_title = data 51 self.page_title = data
50 else: 52 else:
51 self.page_title += data 53 self.page_title += data
52 elif self._recent_tag in ['h2', 'h3']: 54 elif self._recent_tag in ['h2', 'h3']:
53 self._current_heading['title'] += data 55 self._current_heading['title'] += data
54 56
55 class IntroDataSource(object): 57 class IntroDataSource(object):
58
59 class Factory(object):
60 def __init__(self, cache_builder, base_paths):
61 self._cache = cache_builder.build(self._MakeIntroDict,
62 fs_cache.INTRO)
63 self._base_paths = base_paths
64
65 def _MakeIntroDict(self, intro):
66 parser = _IntroParser()
67 parser.feed(intro)
68 intro = re.sub(_intro_regex, '', intro, count=1)
69 return {
70 'intro': Handlebar(intro),
71 'toc': parser.toc,
72 'title': parser.page_title
73 }
74
75 def Create(self):
76 return IntroDataSource(self._cache, self._base_paths)
77
56 """This class fetches the intros for a given API. From this intro, a table 78 """This class fetches the intros for a given API. From this intro, a table
57 of contents dictionary is created, which contains the headings in the intro. 79 of contents dictionary is created, which contains the headings in the intro.
58 """ 80 """
59 def __init__(self, cache_builder, base_paths): 81 def __init__(self, cache, base_paths):
60 self._cache = cache_builder.build(self._MakeIntroDict) 82 self._cache = cache
61 self._base_paths = base_paths 83 self._base_paths = base_paths
62 self._intro_regex = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)
63
64 def _MakeIntroDict(self, intro):
65 parser = _IntroParser()
66 parser.feed(intro)
67 intro = re.sub(self._intro_regex, '', intro, count=1)
68 return {
69 'intro': Handlebar(intro),
70 'toc': parser.toc,
71 'title': parser.page_title
72 }
73 84
74 def __getitem__(self, key): 85 def __getitem__(self, key):
75 return self.get(key) 86 return self.get(key)
76 87
77 def get(self, key): 88 def get(self, key):
78 real_path = FormatKey(key) 89 real_path = FormatKey(key)
79 error = None 90 error = None
80 for base_path in self._base_paths: 91 for base_path in self._base_paths:
81 try: 92 try:
82 return self._cache.GetFromFile(base_path + '/' + real_path) 93 return self._cache.GetFromFile(base_path + '/' + real_path)
83 except FileNotFoundError as error: 94 except FileNotFoundError as error:
84 pass 95 pass
85 raise ValueError(str(error) + ': No intro found for "%s".' % key) 96 raise ValueError(str(error) + ': No intro found for "%s".' % key)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698