Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: chrome/common/extensions/docs/server2/path_canonicalizer.py

Issue 148293018: Docserver: Make the .html extension unnecessary for content pages, for example, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import defaultdict 5 from collections import defaultdict
6 import posixpath 6 import posixpath
7 7
8 from path_util import SplitParent 8 from path_util import SplitParent
9 from special_paths import WEBMASTER_PROOF
9 10
10 11
11 def _SimplifyFileName(file_name): 12 def _SimplifyFileName(file_name):
12 return (posixpath.splitext(file_name)[0] 13 return (posixpath.splitext(file_name)[0]
13 .lower() 14 .lower()
14 .replace('.', '') 15 .replace('.', '')
15 .replace('-', '') 16 .replace('-', '')
16 .replace('_', '')) 17 .replace('_', ''))
17 18
18 19
19 class PathCanonicalizer(object): 20 class PathCanonicalizer(object):
20 '''Transforms paths into their canonical forms. Since the dev server has had 21 '''Transforms paths into their canonical forms. Since the docserver has had
21 many incarnations - e.g. there didn't use to be apps/ - there may be old 22 many incarnations - e.g. there didn't use to be apps/ - there may be old
22 paths lying around the webs. We try to redirect those to where they are now. 23 paths lying around the webs. We try to redirect those to where they are now.
23 ''' 24 '''
24 def __init__(self, compiled_fs_factory, file_system): 25 def __init__(self,
26 file_system,
27 object_store_creator,
28 strip_extensions):
29 # |strip_extensions| is a list of file extensions (e.g. .html) that should
30 # be stripped for a path's canonical form.
31 self._cache = object_store_creator.Create(
32 PathCanonicalizer, category=file_system.GetIdentity())
25 self._file_system = file_system 33 self._file_system = file_system
26 # A lazily populated mapping of file names to a list of full paths that 34 self._strip_extensions = strip_extensions
27 # contain them. For example,
28 # - browserAction.html: [extensions/browserAction.html]
29 # - storage.html: [apps/storage.html, extensions/storage.html]
30 self._files_to_paths = None
31 35
32 def _GetPotentialPaths(self, filename): 36 def _LoadCache(self):
33 '''Returns the paths to any file called |filename|. 37 cached = self._cache.GetMulti(('canonical_paths',
34 ''' 38 'simplified_to_paths')).Get()
35 if self._files_to_paths is None: 39
36 self._files_to_paths = defaultdict(list) 40 # |canonical_paths| is the pre-calculated set of canonical paths.
41 # |simplified_to_paths| is a lazily populated mapping of simplified file
Yoyo Zhou 2014/02/12 22:14:05 simplified_paths_map?
not at google - send to devlin 2014/02/13 03:34:34 Done.
42 # names to a list of full paths that contain them. For example,
43 # - browseraction: [extensions/browserAction.html]
44 # - storage: [apps/storage.html, extensions/storage.html]
45 canonical_paths, simplified_to_paths = (
46 cached.get('canonical_paths'), cached.get('simplified_to_paths'))
47
48 if canonical_paths is None:
49 assert simplified_to_paths is None
50 canonical_paths = set()
51 simplified_to_paths = {}
37 for base, dirs, files in self._file_system.Walk(''): 52 for base, dirs, files in self._file_system.Walk(''):
38 for f in dirs + files: 53 for path in dirs + files:
Yoyo Zhou 2014/02/12 22:14:05 Be consistent: path_without_ext + ext = path here,
not at google - send to devlin 2014/02/13 03:34:34 Done.
39 self._files_to_paths[_SimplifyFileName(f)].append( 54 # Update |canonical_paths|.
40 posixpath.join(base, f)) 55 path_without_ext, ext = posixpath.splitext(path)
41 return self._files_to_paths.get(_SimplifyFileName(filename)) 56 canonical_path = posixpath.join(
57 base,
58 path_without_ext if ext in self._strip_extensions else path)
59 canonical_paths.add(canonical_path)
60 # Update |simplified_to_paths|.
61 simplified = _SimplifyFileName(path)
62 if simplified not in simplified_to_paths:
Yoyo Zhou 2014/02/12 22:14:05 Why not use defaultdict for simplified_to_paths?
not at google - send to devlin 2014/02/13 03:34:34 Ooh, it pickles.
63 simplified_to_paths[simplified] = [canonical_path]
64 simplified_to_paths[simplified].append(canonical_path)
65 # Store |simplified_to_paths| sorted. Ties in length are broken by taking
66 # the shortest, lexicographically smallest path.
67 for path_list in simplified_to_paths.itervalues():
68 path_list.sort(key=lambda p: (len(p), p))
69 self._cache.SetMulti({
70 'canonical_paths': canonical_paths,
71 'simplified_to_paths': simplified_to_paths,
72 })
73 else:
74 assert simplified_to_paths is not None
75
76 return canonical_paths, simplified_to_paths
42 77
43 def Canonicalize(self, path): 78 def Canonicalize(self, path):
44 '''Returns the canonical path for |path|. 79 '''Returns the canonical path for |path|.
45 ''' 80 '''
46 # Path may already be the canonical path. 81 if path == WEBMASTER_PROOF:
Yoyo Zhou 2014/02/12 22:14:05 Are there tests that cover this path?
not at google - send to devlin 2014/02/13 03:34:34 There are now. And I decided the current behaviour
47 if self._file_system.Exists(path).Get():
48 return path 82 return path
49 83
50 # Path not found. Our single heuristic: find |basename| in the directory 84 canonical_paths, simplified_to_paths = self._LoadCache()
85
86 # Path may already be the canonical path.
87 if path in canonical_paths:
88 return path
89
90 # Path not found. Our single heuristic: find |base| in the directory
51 # structure with the longest common prefix of |path|. 91 # structure with the longest common prefix of |path|.
52 _, base = SplitParent(path) 92 _, base = SplitParent(path)
53 potential_paths = self._GetPotentialPaths(base) 93 potential_paths = simplified_to_paths.get(_SimplifyFileName(base))
54 if not potential_paths: 94 if not potential_paths:
55 # There is no file with that name. 95 # There is no file with anything close to that name.
56 return path 96 return path
57 97
58 # The most likely canonical file is the one with the longest common prefix. 98 # The most likely canonical file is the one with the longest common prefix
59 # This is slightly weaker than it could be; |path| is compared, not the 99 # with |path|. This is slightly weaker than it could be; |path| is
60 # simplified form of |path|, which may matter. Ties in length are broken by 100 # compared, not the simplified form of |path|, which may matter.
61 # taking the shortest, lexicographically smallest path.
62 potential_paths.sort(key=lambda p: (len(p), p))
63 max_prefix = potential_paths[0] 101 max_prefix = potential_paths[0]
64 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) 102 max_prefix_length = len(posixpath.commonprefix((max_prefix, path)))
65 for path_for_file in potential_paths[1:]: 103 for path_for_file in potential_paths[1:]:
66 prefix_length = len(posixpath.commonprefix((path_for_file, path))) 104 prefix_length = len(posixpath.commonprefix((path_for_file, path)))
67 if prefix_length > max_prefix_length: 105 if prefix_length > max_prefix_length:
68 max_prefix, max_prefix_length = path_for_file, prefix_length 106 max_prefix, max_prefix_length = path_for_file, prefix_length
69 107
70 return max_prefix 108 return max_prefix
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698