Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ | |
| 6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ | |
| 7 #pragma once | |
| 8 | |
| 9 #include <string> | |
| 10 #include <map> | |
| 11 | |
| 12 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" | |
| 13 | |
| 14 class GURL; | |
| 15 | |
| 16 namespace extensions { | |
| 17 | |
| 18 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, | |
| 19 // containments, and equality} in GURLs to the substring matching problem. | |
| 20 // | |
| 21 // Say, you want to check whether the path of a URL starts with "/index.html". | |
| 22 // This class preprocesses a URL like "www.google.com/index.html" into something | |
| 23 // like "www.google.com|/index.html". After preprocessing, you can search for | |
| 24 // "|/index.html" in the string and see that this candidate URL actually has | |
| 25 // a path that starts with "/index.html". On the contrary, | |
| 26 // "www.google.com/images/images.html" would be normalized to | |
| 27 // "www.google.com|/images/images.html". It is easy to see that it contains | |
| 28 // "/index.html" but the path of the URL does not start with "/index.html". | |
|
Matt Perry
2012/02/14 01:38:34
Typo: your example contains /images.html :)
battre
2012/02/14 19:32:21
Done.
| |
| 29 // | |
| 30 // This preprocessing is important if you want to match a URL against many | |
| 31 // patterns because it reduces the matching to a "discover all substrings | |
| 32 // of a dictionary in a text" problem, which can be solved very efficiently | |
| 33 // by the Aho-Corasick algorithm. | |
| 34 class UrlComponentPatterns { | |
|
Matt Perry
2012/02/14 01:38:34
Chrome style is to capitalize acronyms in class/me
battre
2012/02/14 19:32:21
Done.
| |
| 35 public: | |
| 36 UrlComponentPatterns(); | |
| 37 | |
| 38 // Canonicalizes a URL for "Create{Host,Path,Query}*Pattern" searches. | |
| 39 std::string CanonlicalizeURLForComponentSearches(const GURL& url); | |
| 40 | |
| 41 // These functions create singleton SubstringPatterns. Each pattern gets a | |
| 42 // unique id, but if a function is called twice with the same parameter, it | |
| 43 // returns the same pattern. | |
| 44 // | |
| 45 // There is no CreateHostContainsPattern query because this cannot be | |
| 46 // mapped to the substring matching efficiently. Instead you have to use | |
| 47 // a CreateURLContainsPattern query followed by a verification whether the | |
| 48 // pattern actually occurs in url.host(). | |
| 49 SubstringPattern CreateHostPrefixPattern(const std::string& prefix); | |
| 50 SubstringPattern CreateHostSuffixPattern(const std::string& suffix); | |
| 51 SubstringPattern CreateHostEqualsPattern(const std::string& str); | |
| 52 | |
| 53 SubstringPattern CreatePathPrefixPattern(const std::string& prefix); | |
| 54 SubstringPattern CreatePathSuffixPattern(const std::string& suffix); | |
| 55 SubstringPattern CreatePathEqualsPattern(const std::string& str); | |
| 56 | |
| 57 SubstringPattern CreateQueryPrefixPattern(const std::string& prefix); | |
| 58 SubstringPattern CreateQuerySuffixPattern(const std::string& suffix); | |
| 59 SubstringPattern CreateQueryEqualsPattern(const std::string& str); | |
| 60 | |
| 61 // This covers the common case, where you don't care whether a domain | |
| 62 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it | |
| 63 // should be followed by a given |path_prefix|. | |
| 64 SubstringPattern CreateHostSuffixPathPrefixPattern( | |
| 65 const std::string& host_suffix, | |
| 66 const std::string& path_prefix); | |
| 67 | |
| 68 // Canonicalizes a URL for "CreateURL*Pattern" searches. | |
| 69 std::string CanonlicalizeURLForFullSearches(const GURL& url); | |
| 70 | |
| 71 SubstringPattern CreateURLPrefixPattern(const std::string& prefix); | |
| 72 SubstringPattern CreateURLSuffixPattern(const std::string& suffix); | |
| 73 SubstringPattern CreateURLContainsPattern(const std::string& str); | |
| 74 SubstringPattern CreateURLEqualsPattern(const std::string& str); | |
| 75 | |
| 76 // Forgets about the existence of |pattern|. The owner must be really | |
| 77 // sure that there exists no remaining reference to |pattern|. | |
| 78 // This function should be called to prevent this class from leaking | |
| 79 // memory. | |
| 80 void DestroySingletonPattern(const SubstringPattern& pattern); | |
| 81 void DestroySingletonPatterns(const std::vector<SubstringPattern>& patterns); | |
| 82 | |
| 83 private: | |
| 84 SubstringPattern CreateSingletonPattern(const std::string& pattern); | |
| 85 | |
| 86 // Prepends a "." to the hostname if it does not start with one. | |
| 87 std::string CanonicalizeHostname(const std::string hostname) const; | |
|
Matt Perry
2012/02/14 01:38:34
missing ref on parameter
battre
2012/02/14 19:32:21
Done.
| |
| 88 | |
| 89 int id_counter_; | |
| 90 std::map<std::string, SubstringPattern> pattern_singletons_; | |
|
Matt Perry
2012/02/14 01:38:34
Please add a comment about what the key in this ma
battre
2012/02/14 19:32:21
Done.
| |
| 91 }; | |
| 92 | |
| 93 } // namespace extensions | |
| 94 | |
| 95 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ | |
| OLD | NEW |