OLD | NEW |
---|---|
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from collections import defaultdict | 5 from collections import defaultdict |
6 import posixpath | 6 import posixpath |
7 | 7 |
8 from path_util import SplitParent | 8 from path_util import SplitParent |
9 from special_paths import WEBMASTER_PROOF | |
9 | 10 |
10 | 11 |
11 def _SimplifyFileName(file_name): | 12 def _SimplifyFileName(file_name): |
12 return (posixpath.splitext(file_name)[0] | 13 return (posixpath.splitext(file_name)[0] |
13 .lower() | 14 .lower() |
14 .replace('.', '') | 15 .replace('.', '') |
15 .replace('-', '') | 16 .replace('-', '') |
16 .replace('_', '')) | 17 .replace('_', '')) |
17 | 18 |
18 | 19 |
19 class PathCanonicalizer(object): | 20 class PathCanonicalizer(object): |
20 '''Transforms paths into their canonical forms. Since the dev server has had | 21 '''Transforms paths into their canonical forms. Since the docserver has had |
21 many incarnations - e.g. there didn't use to be apps/ - there may be old | 22 many incarnations - e.g. there didn't use to be apps/ - there may be old |
22 paths lying around the webs. We try to redirect those to where they are now. | 23 paths lying around the webs. We try to redirect those to where they are now. |
23 ''' | 24 ''' |
24 def __init__(self, compiled_fs_factory, file_system): | 25 def __init__(self, |
26 file_system, | |
27 object_store_creator, | |
28 strip_extensions): | |
29 # |strip_extensions| is a list of file extensions (e.g. .html) that should | |
30 # be stripped for a path's canonical form. | |
31 self._cache = object_store_creator.Create( | |
32 PathCanonicalizer, category=file_system.GetIdentity()) | |
25 self._file_system = file_system | 33 self._file_system = file_system |
26 # A lazily populated mapping of file names to a list of full paths that | 34 self._strip_extensions = strip_extensions |
27 # contain them. For example, | |
28 # - browserAction.html: [extensions/browserAction.html] | |
29 # - storage.html: [apps/storage.html, extensions/storage.html] | |
30 self._files_to_paths = None | |
31 | 35 |
32 def _GetPotentialPaths(self, filename): | 36 def _LoadCache(self): |
33 '''Returns the paths to any file called |filename|. | 37 cached = self._cache.GetMulti(('canonical_paths', |
34 ''' | 38 'simplified_to_paths')).Get() |
35 if self._files_to_paths is None: | 39 |
36 self._files_to_paths = defaultdict(list) | 40 # |canonical_paths| is the pre-calculated set of canonical paths. |
41 # |simplified_to_paths| is a lazily populated mapping of simplified file | |
Yoyo Zhou
2014/02/12 22:14:05
simplified_paths_map?
not at google - send to devlin
2014/02/13 03:34:34
Done.
| |
42 # names to a list of full paths that contain them. For example, | |
43 # - browseraction: [extensions/browserAction.html] | |
44 # - storage: [apps/storage.html, extensions/storage.html] | |
45 canonical_paths, simplified_to_paths = ( | |
46 cached.get('canonical_paths'), cached.get('simplified_to_paths')) | |
47 | |
48 if canonical_paths is None: | |
49 assert simplified_to_paths is None | |
50 canonical_paths = set() | |
51 simplified_to_paths = {} | |
37 for base, dirs, files in self._file_system.Walk(''): | 52 for base, dirs, files in self._file_system.Walk(''): |
38 for f in dirs + files: | 53 for path in dirs + files: |
Yoyo Zhou
2014/02/12 22:14:05
Be consistent: path_without_ext + ext = path here,
not at google - send to devlin
2014/02/13 03:34:34
Done.
| |
39 self._files_to_paths[_SimplifyFileName(f)].append( | 54 # Update |canonical_paths|. |
40 posixpath.join(base, f)) | 55 path_without_ext, ext = posixpath.splitext(path) |
41 return self._files_to_paths.get(_SimplifyFileName(filename)) | 56 canonical_path = posixpath.join( |
57 base, | |
58 path_without_ext if ext in self._strip_extensions else path) | |
59 canonical_paths.add(canonical_path) | |
60 # Update |simplified_to_paths|. | |
61 simplified = _SimplifyFileName(path) | |
62 if simplified not in simplified_to_paths: | |
Yoyo Zhou
2014/02/12 22:14:05
Why not use defaultdict for simplified_to_paths?
not at google - send to devlin
2014/02/13 03:34:34
Ooh, it pickles.
| |
63 simplified_to_paths[simplified] = [canonical_path] | |
64 simplified_to_paths[simplified].append(canonical_path) | |
65 # Store |simplified_to_paths| sorted. Ties in length are broken by taking | |
66 # the shortest, lexicographically smallest path. | |
67 for path_list in simplified_to_paths.itervalues(): | |
68 path_list.sort(key=lambda p: (len(p), p)) | |
69 self._cache.SetMulti({ | |
70 'canonical_paths': canonical_paths, | |
71 'simplified_to_paths': simplified_to_paths, | |
72 }) | |
73 else: | |
74 assert simplified_to_paths is not None | |
75 | |
76 return canonical_paths, simplified_to_paths | |
42 | 77 |
43 def Canonicalize(self, path): | 78 def Canonicalize(self, path): |
44 '''Returns the canonical path for |path|. | 79 '''Returns the canonical path for |path|. |
45 ''' | 80 ''' |
46 # Path may already be the canonical path. | 81 if path == WEBMASTER_PROOF: |
Yoyo Zhou
2014/02/12 22:14:05
Are there tests that cover this path?
not at google - send to devlin
2014/02/13 03:34:34
There are now. And I decided the current behaviour
| |
47 if self._file_system.Exists(path).Get(): | |
48 return path | 82 return path |
49 | 83 |
50 # Path not found. Our single heuristic: find |basename| in the directory | 84 canonical_paths, simplified_to_paths = self._LoadCache() |
85 | |
86 # Path may already be the canonical path. | |
87 if path in canonical_paths: | |
88 return path | |
89 | |
90 # Path not found. Our single heuristic: find |base| in the directory | |
51 # structure with the longest common prefix of |path|. | 91 # structure with the longest common prefix of |path|. |
52 _, base = SplitParent(path) | 92 _, base = SplitParent(path) |
53 potential_paths = self._GetPotentialPaths(base) | 93 potential_paths = simplified_to_paths.get(_SimplifyFileName(base)) |
54 if not potential_paths: | 94 if not potential_paths: |
55 # There is no file with that name. | 95 # There is no file with anything close to that name. |
56 return path | 96 return path |
57 | 97 |
58 # The most likely canonical file is the one with the longest common prefix. | 98 # The most likely canonical file is the one with the longest common prefix |
59 # This is slightly weaker than it could be; |path| is compared, not the | 99 # with |path|. This is slightly weaker than it could be; |path| is |
60 # simplified form of |path|, which may matter. Ties in length are broken by | 100 # compared, not the simplified form of |path|, which may matter. |
61 # taking the shortest, lexicographically smallest path. | |
62 potential_paths.sort(key=lambda p: (len(p), p)) | |
63 max_prefix = potential_paths[0] | 101 max_prefix = potential_paths[0] |
64 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) | 102 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) |
65 for path_for_file in potential_paths[1:]: | 103 for path_for_file in potential_paths[1:]: |
66 prefix_length = len(posixpath.commonprefix((path_for_file, path))) | 104 prefix_length = len(posixpath.commonprefix((path_for_file, path))) |
67 if prefix_length > max_prefix_length: | 105 if prefix_length > max_prefix_length: |
68 max_prefix, max_prefix_length = path_for_file, prefix_length | 106 max_prefix, max_prefix_length = path_for_file, prefix_length |
69 | 107 |
70 return max_prefix | 108 return max_prefix |
OLD | NEW |