| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |
| 6 #define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |
| 7 #pragma once |
| 8 |
| 9 #include <set> |
| 10 #include <vector> |
| 11 |
| 12 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/memory/scoped_vector.h" |
| 14 #include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" |
| 15 |
| 16 class GURL; |
| 17 |
| 18 namespace base { |
| 19 class DictionaryValue; |
| 20 } |
| 21 |
| 22 namespace extensions { |
| 23 |
| 24 // This class represents a single URL matching condition, e.g. a match on the |
| 25 // host suffix or the containment of a string in the query component of a GURL. |
| 26 // |
| 27 // The difference from a simple SubstringPattern is that this also supports |
| 28 // checking whether the {Host, Path, Query} of a URL contains a string. The |
| 29 // reduction of URL matching conditions to StringPatterns conducted by |
| 30 // URLMatcherConditionFactory is not capable of expressing that alone. |
| 31 class URLMatcherCondition { |
| 32 public: |
| 33 enum Criterion { |
| 34 HOST_PREFIX, |
| 35 HOST_SUFFIX, |
| 36 HOST_CONTAINS, |
| 37 HOST_EQUALS, |
| 38 PATH_PREFIX, |
| 39 PATH_SUFFIX, |
| 40 PATH_CONTAINS, |
| 41 PATH_EQUALS, |
| 42 QUERY_PREFIX, |
| 43 QUERY_SUFFIX, |
| 44 QUERY_CONTAINS, |
| 45 QUERY_EQUALS, |
| 46 HOST_SUFFIX_PATH_PREFIX, |
| 47 URL_PREFIX, |
| 48 URL_SUFFIX, |
| 49 URL_CONTAINS, |
| 50 URL_EQUALS, |
| 51 }; |
| 52 |
| 53 URLMatcherCondition(); |
| 54 ~URLMatcherCondition(); |
| 55 URLMatcherCondition(Criterion criterion, |
| 56 const SubstringPattern* substring_pattern); |
| 57 URLMatcherCondition(const URLMatcherCondition& rhs); |
| 58 URLMatcherCondition& operator=(const URLMatcherCondition& rhs); |
| 59 bool operator<(const URLMatcherCondition& rhs) const; |
| 60 |
| 61 Criterion criterion() const { return criterion_; } |
| 62 const SubstringPattern* substring_pattern() const { |
| 63 return substring_pattern_; |
| 64 } |
| 65 |
| 66 // Returns whether this URLMatcherCondition needs to be executed on a |
| 67 // full URL rather than the individual components (see |
| 68 // URLMatcherConditionFactory). |
| 69 bool IsFullURLCondition() const; |
| 70 |
| 71 // Returns whether this condition is fulfilled according to |
| 72 // |matching_substring_patterns| and |url|. |
| 73 bool IsMatch( |
| 74 const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| 75 const GURL& url) const; |
| 76 |
| 77 private: |
| 78 // |criterion_| and |substring_pattern_| describe together what property a URL |
| 79 // needs to fulfill to be considered a match. |
| 80 Criterion criterion_; |
| 81 |
| 82 // This is the SubstringPattern that is used in a SubstringSetMatcher. |
| 83 const SubstringPattern* substring_pattern_; |
| 84 }; |
| 85 |
| 86 // Class to map the problem of finding {host, path, query} {prefixes, suffixes, |
| 87 // containments, and equality} in GURLs to the substring matching problem. |
| 88 // |
| 89 // Say, you want to check whether the path of a URL starts with "/index.html". |
| 90 // This class preprocesses a URL like "www.google.com/index.html" into something |
| 91 // like "www.google.com|/index.html". After preprocessing, you can search for |
| 92 // "|/index.html" in the string and see that this candidate URL actually has |
| 93 // a path that starts with "/index.html". On the contrary, |
| 94 // "www.google.com/images/index.html" would be normalized to |
| 95 // "www.google.com|/images/index.html". It is easy to see that it contains |
| 96 // "/index.html" but the path of the URL does not start with "/index.html". |
| 97 // |
| 98 // This preprocessing is important if you want to match a URL against many |
| 99 // patterns because it reduces the matching to a "discover all substrings |
| 100 // of a dictionary in a text" problem, which can be solved very efficiently |
| 101 // by the Aho-Corasick algorithm. |
| 102 // |
| 103 // IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern |
| 104 // referenced by created URLMatcherConditions. Therefore, it must outlive |
| 105 // all created URLMatcherCondition and the SubstringSetMatcher. |
| 106 class URLMatcherConditionFactory { |
| 107 public: |
| 108 URLMatcherConditionFactory(); |
| 109 ~URLMatcherConditionFactory(); |
| 110 |
| 111 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. |
| 112 std::string CanonicalizeURLForComponentSearches(const GURL& url); |
| 113 |
| 114 // Factory methods for various condition types. |
| 115 URLMatcherCondition CreateHostPrefixCondition(const std::string& prefix); |
| 116 URLMatcherCondition CreateHostSuffixCondition(const std::string& suffix); |
| 117 URLMatcherCondition CreateHostContainsCondition(const std::string& str); |
| 118 URLMatcherCondition CreateHostEqualsCondition(const std::string& str); |
| 119 |
| 120 URLMatcherCondition CreatePathPrefixCondition(const std::string& prefix); |
| 121 URLMatcherCondition CreatePathSuffixCondition(const std::string& suffix); |
| 122 URLMatcherCondition CreatePathContainsCondition(const std::string& str); |
| 123 URLMatcherCondition CreatePathEqualsCondition(const std::string& str); |
| 124 |
| 125 URLMatcherCondition CreateQueryPrefixCondition(const std::string& prefix); |
| 126 URLMatcherCondition CreateQuerySuffixCondition(const std::string& suffix); |
| 127 URLMatcherCondition CreateQueryContainsCondition(const std::string& str); |
| 128 URLMatcherCondition CreateQueryEqualsCondition(const std::string& str); |
| 129 |
| 130 // This covers the common case, where you don't care whether a domain |
| 131 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it |
| 132 // should be followed by a given |path_prefix|. |
| 133 URLMatcherCondition CreateHostSuffixPathPrefixCondition( |
| 134 const std::string& host_suffix, |
| 135 const std::string& path_prefix); |
| 136 |
| 137 // Canonicalizes a URL for "CreateURL*Condition" searches. |
| 138 std::string CanonicalizeURLForFullSearches(const GURL& url); |
| 139 |
| 140 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); |
| 141 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); |
| 142 URLMatcherCondition CreateURLContainsCondition(const std::string& str); |
| 143 URLMatcherCondition CreateURLEqualsCondition(const std::string& str); |
| 144 |
| 145 // Removes all patterns from |pattern_singletons_| that are not listed in |
| 146 // |used_patterns|. These patterns are not referenced any more and get |
| 147 // freed. |
| 148 void ForgetUnusedPatterns( |
| 149 const std::set<SubstringPattern::ID>& used_patterns); |
| 150 |
| 151 private: |
| 152 // Creates a URLMatcherCondition according to the parameters passed. |
| 153 // The URLMatcherCondition will refer to a SubstringPattern that is |
| 154 // owned by |pattern_singletons_|. |
| 155 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion, |
| 156 const std::string& pattern); |
| 157 |
| 158 // Prepends a "." to the hostname if it does not start with one. |
| 159 std::string CanonicalizeHostname(const std::string& hostname) const; |
| 160 |
| 161 // Counter that ensures that all created SubstringPatterns have unique IDs. |
| 162 int id_counter_; |
| 163 |
| 164 // This comparison considers only the pattern() value of the |
| 165 // SubstringPatterns. |
| 166 struct SubstringPatternPointerCompare { |
| 167 bool operator()(SubstringPattern* lhs, SubstringPattern* rhs) const; |
| 168 }; |
| 169 // Set to ensure that we generate only one SubstringPattern for each content |
| 170 // of SubstringPattern::pattern(). |
| 171 typedef std::set<SubstringPattern*, SubstringPatternPointerCompare> |
| 172 PatternSingletons; |
| 173 PatternSingletons pattern_singletons_; |
| 174 |
| 175 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); |
| 176 }; |
| 177 |
| 178 // This class represents a set of conditions that all need to match on a |
| 179 // given URL in order to be considered a match. |
| 180 class URLMatcherConditionSet { |
| 181 public: |
| 182 typedef int ID; |
| 183 typedef std::set<URLMatcherCondition> Conditions; |
| 184 |
| 185 URLMatcherConditionSet(); |
| 186 ~URLMatcherConditionSet(); |
| 187 URLMatcherConditionSet(ID id, const Conditions& conditions); |
| 188 URLMatcherConditionSet(const URLMatcherConditionSet& rhs); |
| 189 URLMatcherConditionSet& operator=(const URLMatcherConditionSet& rhs); |
| 190 |
| 191 ID id() const { return id_; } |
| 192 const Conditions& conditions() const { return conditions_; } |
| 193 |
| 194 bool IsMatch( |
| 195 const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| 196 const GURL& url) const; |
| 197 |
| 198 private: |
| 199 ID id_; |
| 200 Conditions conditions_; |
| 201 }; |
| 202 |
| 203 // This class allows matching one URL against a large set of |
| 204 // URLMatcherConditionSets at the same time. |
| 205 class URLMatcher { |
| 206 public: |
| 207 URLMatcher(); |
| 208 ~URLMatcher(); |
| 209 |
| 210 // Adds new URLMatcherConditionSet to this URL Matcher. Each condition set |
| 211 // must have a unique ID. |
| 212 // This is an expensive operation as it triggers pre-calculations on the |
| 213 // currently registered condition sets. Do not call this operation many |
| 214 // times with a single condition set in each call. |
| 215 void AddConditionSets( |
| 216 const std::vector<URLMatcherConditionSet>& condition_sets); |
| 217 |
| 218 // Removes the listed condition sets. All |condition_set_ids| must be |
| 219 // currently registered. This function should be called with large batches |
| 220 // of |condition_set_ids| at a time to improve performance. |
| 221 void RemoveConditionSets( |
| 222 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids); |
| 223 |
| 224 // Returns the IDs of all URLMatcherConditionSet that match to this |url|. |
| 225 std::set<URLMatcherConditionSet::ID> MatchURL(const GURL& url); |
| 226 |
| 227 // Returns the URLMatcherConditionFactory that must be used to create |
| 228 // URLMatcherConditionSets for this URLMatcher. |
| 229 URLMatcherConditionFactory* condition_factory() { |
| 230 return &condition_factory_; |
| 231 } |
| 232 |
| 233 private: |
| 234 void UpdateSubstringSetMatcher(bool full_url_conditions); |
| 235 void UpdateTriggers(); |
| 236 void UpdateConditionFactory(); |
| 237 void UpdateInternalDatastructures(); |
| 238 |
| 239 URLMatcherConditionFactory condition_factory_; |
| 240 |
| 241 // Maps the ID of a URLMatcherConditionSet to the respective |
| 242 // URLMatcherConditionSet. |
| 243 typedef std::map<URLMatcherConditionSet::ID, URLMatcherConditionSet> |
| 244 URLMatcherConditionSets; |
| 245 URLMatcherConditionSets url_matcher_condition_sets_; |
| 246 |
| 247 // Maps a SubstringPattern ID to the URLMatcherConditions that need to |
| 248 // be triggered in case of a SubstringPattern match. |
| 249 std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > |
| 250 substring_match_triggers_; |
| 251 |
| 252 SubstringSetMatcher full_url_matcher_; |
| 253 SubstringSetMatcher url_component_matcher_; |
| 254 std::set<const SubstringPattern*> registered_full_url_patterns_; |
| 255 std::set<const SubstringPattern*> registered_url_component_patterns_; |
| 256 |
| 257 DISALLOW_COPY_AND_ASSIGN(URLMatcher); |
| 258 }; |
| 259 |
| 260 } // namespace extensions |
| 261 |
| 262 #endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |
| OLD | NEW |