chrome/common/extensions/docs/server2/intro_data_source.py - Issue 10829348: Extensions Docs Server: Large performance increase

Side by Side Diff: chrome/common/extensions/docs/server2/intro_data_source.py

Issue 10829348: Extensions Docs Server: Large performance increase (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: fixes and ObjectStore Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/common/extensions/docs/server2/in_memory_object_store.py ('K') | « chrome/common/extensions/docs/server2/integration_test.py ('k') | chrome/common/extensions/docs/server2/local_file_system.py » ('j') | chrome/common/extensions/docs/server2/memcache_file_system.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from HTMLParser import HTMLParser	5 from HTMLParser import HTMLParser

6 import logging

7 import re	6 import re

8	7

9 from docs_server_utils import FormatKey	8 from docs_server_utils import FormatKey

10 from file_system import FileNotFoundError	9 from file_system import FileNotFoundError

	10 import file_system_cache as fs_cache

11 from third_party.handlebar import Handlebar	11 from third_party.handlebar import Handlebar

12	12

	13 _intro_regex = re.compile('<h1[^>.]?>.?</h1>', flags=re.DOTALL)
	not at google - send to devlin 2012/08/21 00:30:11 should be _INTRO_REGEX or _H1_REGEX should be _INTRO_REGEX or _H1_REGEX cduvall 2012/08/21 01:33:33 Done. Show quoted text On 2012/08/21 00:30:11, kalman wrote: > should be _INTRO_REGEX > > or _H1_REGEX Done.
	14

13 class _IntroParser(HTMLParser):	15 class _IntroParser(HTMLParser):

14 """ An HTML parser which will parse table of contents and page title info out	16 """ An HTML parser which will parse table of contents and page title info out

15 of an intro.	17 of an intro.

16 """	18 """

17 def __init__(self):	19 def __init__(self):

18 HTMLParser.__init__(self)	20 HTMLParser.__init__(self)

19 self.toc = []	21 self.toc = []

20 self.page_title = None	22 self.page_title = None

21 self._recent_tag = None	23 self._recent_tag = None

22 self._current_heading = {}	24 self._current_heading = {}

(...skipping 23 matching lines...) Expand all Loading...
46 return	48 return

47 if self._recent_tag == 'h1':	49 if self._recent_tag == 'h1':

48 if self.page_title is None:	50 if self.page_title is None:

49 self.page_title = data	51 self.page_title = data

50 else:	52 else:

51 self.page_title += data	53 self.page_title += data

52 elif self._recent_tag in ['h2', 'h3']:	54 elif self._recent_tag in ['h2', 'h3']:

53 self._current_heading['title'] += data	55 self._current_heading['title'] += data

54	56

55 class IntroDataSource(object):	57 class IntroDataSource(object):

	58

	59 class Factory(object):

	60 def __init__(self, cache_builder, base_paths):

	61 self._cache = cache_builder.build(self._MakeIntroDict,

	62 fs_cache.INTRO)

	63 self._base_paths = base_paths

	64

	65 def _MakeIntroDict(self, intro):

	66 parser = _IntroParser()

	67 parser.feed(intro)

	68 intro = re.sub(_intro_regex, '', intro, count=1)

	69 return {

	70 'intro': Handlebar(intro),

	71 'toc': parser.toc,

	72 'title': parser.page_title

	73 }

	74

	75 def Create(self):

	76 return IntroDataSource(self._cache, self._base_paths)

	77

56 """This class fetches the intros for a given API. From this intro, a table	78 """This class fetches the intros for a given API. From this intro, a table

57 of contents dictionary is created, which contains the headings in the intro.	79 of contents dictionary is created, which contains the headings in the intro.

58 """	80 """

59 def __init__(self, cache_builder, base_paths):	81 def __init__(self, cache, base_paths):

60 self._cache = cache_builder.build(self._MakeIntroDict)	82 self._cache = cache

61 self._base_paths = base_paths	83 self._base_paths = base_paths

62 self._intro_regex = re.compile('<h1[^>.]?>.?</h1>', flags=re.DOTALL)

63

64 def _MakeIntroDict(self, intro):

65 parser = _IntroParser()

66 parser.feed(intro)

67 intro = re.sub(self._intro_regex, '', intro, count=1)

68 return {

69 'intro': Handlebar(intro),

70 'toc': parser.toc,

71 'title': parser.page_title

72 }

73	84

74 def __getitem__(self, key):	85 def __getitem__(self, key):

75 return self.get(key)	86 return self.get(key)

76	87

77 def get(self, key):	88 def get(self, key):

78 real_path = FormatKey(key)	89 real_path = FormatKey(key)

79 error = None	90 error = None

80 for base_path in self._base_paths:	91 for base_path in self._base_paths:

81 try:	92 try:

82 return self._cache.GetFromFile(base_path + '/' + real_path)	93 return self._cache.GetFromFile(base_path + '/' + real_path)

83 except FileNotFoundError as error:	94 except FileNotFoundError as error:

84 pass	95 pass

85 raise ValueError(str(error) + ': No intro found for "%s".' % key)	96 raise ValueError(str(error) + ': No intro found for "%s".' % key)

OLD	NEW