Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1321)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.h

Issue 12092096: Move c/c/extensions/matcher/ to top level extension dir (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: TOT Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
6 #define CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
7
8 #include <set>
9 #include <vector>
10
11 #include "base/memory/ref_counted.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/memory/scoped_vector.h"
14 #include "chrome/common/extensions/matcher/regex_set_matcher.h"
15 #include "chrome/common/extensions/matcher/substring_set_matcher.h"
16
17 class GURL;
18
19 namespace base {
20 class DictionaryValue;
21 }
22
23 namespace extensions {
24
25 // This class represents a single URL matching condition, e.g. a match on the
26 // host suffix or the containment of a string in the query component of a GURL.
27 //
28 // The difference from a simple StringPattern is that this also supports
29 // checking whether the {Host, Path, Query} of a URL contains a string. The
30 // reduction of URL matching conditions to StringPatterns conducted by
31 // URLMatcherConditionFactory is not capable of expressing that alone.
32 //
33 // Also supported is matching regular expressions against the URL (URL_MATCHES).
34 class URLMatcherCondition {
35 public:
36 enum Criterion {
37 HOST_PREFIX,
38 HOST_SUFFIX,
39 HOST_CONTAINS,
40 HOST_EQUALS,
41 PATH_PREFIX,
42 PATH_SUFFIX,
43 PATH_CONTAINS,
44 PATH_EQUALS,
45 QUERY_PREFIX,
46 QUERY_SUFFIX,
47 QUERY_CONTAINS,
48 QUERY_EQUALS,
49 HOST_SUFFIX_PATH_PREFIX,
50 HOST_EQUALS_PATH_PREFIX,
51 URL_PREFIX,
52 URL_SUFFIX,
53 URL_CONTAINS,
54 URL_EQUALS,
55 URL_MATCHES,
56 };
57
58 URLMatcherCondition();
59 ~URLMatcherCondition();
60 URLMatcherCondition(Criterion criterion,
61 const StringPattern* substring_pattern);
62 URLMatcherCondition(const URLMatcherCondition& rhs);
63 URLMatcherCondition& operator=(const URLMatcherCondition& rhs);
64 bool operator<(const URLMatcherCondition& rhs) const;
65
66 Criterion criterion() const { return criterion_; }
67 const StringPattern* string_pattern() const {
68 return string_pattern_;
69 }
70
71 // Returns whether this URLMatcherCondition needs to be executed on a
72 // full URL rather than the individual components (see
73 // URLMatcherConditionFactory).
74 bool IsFullURLCondition() const;
75
76 // Returns whether this URLMatcherCondition is a regular expression to be
77 // handled by a regex matcher instead of a substring matcher.
78 bool IsRegexCondition() const;
79
80 // Returns whether this condition is fulfilled according to
81 // |matching_patterns| and |url|.
82 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
83 const GURL& url) const;
84
85 private:
86 // |criterion_| and |string_pattern_| describe together what property a URL
87 // needs to fulfill to be considered a match.
88 Criterion criterion_;
89
90 // This is the StringPattern that is used in a SubstringSetMatcher.
91 const StringPattern* string_pattern_;
92 };
93
94 // Class to map the problem of finding {host, path, query} {prefixes, suffixes,
95 // containments, and equality} in GURLs to the substring matching problem.
96 //
97 // Say, you want to check whether the path of a URL starts with "/index.html".
98 // This class preprocesses a URL like "www.google.com/index.html" into something
99 // like "www.google.com|/index.html". After preprocessing, you can search for
100 // "|/index.html" in the string and see that this candidate URL actually has
101 // a path that starts with "/index.html". On the contrary,
102 // "www.google.com/images/index.html" would be normalized to
103 // "www.google.com|/images/index.html". It is easy to see that it contains
104 // "/index.html" but the path of the URL does not start with "/index.html".
105 //
106 // This preprocessing is important if you want to match a URL against many
107 // patterns because it reduces the matching to a "discover all substrings
108 // of a dictionary in a text" problem, which can be solved very efficiently
109 // by the Aho-Corasick algorithm.
110 //
111 // IMPORTANT: The URLMatcherConditionFactory owns the StringPattern
112 // referenced by created URLMatcherConditions. Therefore, it must outlive
113 // all created URLMatcherCondition and the SubstringSetMatcher.
114 class URLMatcherConditionFactory {
115 public:
116 URLMatcherConditionFactory();
117 ~URLMatcherConditionFactory();
118
119 // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches.
120 std::string CanonicalizeURLForComponentSearches(const GURL& url) const;
121
122 // Factory methods for various condition types.
123 //
124 // Note that these methods fill the pattern_singletons_. If you create
125 // conditions and don't register them to a URLMatcher, they will continue to
126 // consume memory. You need to call ForgetUnusedPatterns() or
127 // URLMatcher::ClearUnusedConditionSets() in this case.
128 URLMatcherCondition CreateHostPrefixCondition(const std::string& prefix);
129 URLMatcherCondition CreateHostSuffixCondition(const std::string& suffix);
130 URLMatcherCondition CreateHostContainsCondition(const std::string& str);
131 URLMatcherCondition CreateHostEqualsCondition(const std::string& str);
132
133 URLMatcherCondition CreatePathPrefixCondition(const std::string& prefix);
134 URLMatcherCondition CreatePathSuffixCondition(const std::string& suffix);
135 URLMatcherCondition CreatePathContainsCondition(const std::string& str);
136 URLMatcherCondition CreatePathEqualsCondition(const std::string& str);
137
138 URLMatcherCondition CreateQueryPrefixCondition(const std::string& prefix);
139 URLMatcherCondition CreateQuerySuffixCondition(const std::string& suffix);
140 URLMatcherCondition CreateQueryContainsCondition(const std::string& str);
141 URLMatcherCondition CreateQueryEqualsCondition(const std::string& str);
142
143 // This covers the common case, where you don't care whether a domain
144 // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it
145 // should be followed by a given |path_prefix|.
146 URLMatcherCondition CreateHostSuffixPathPrefixCondition(
147 const std::string& host_suffix,
148 const std::string& path_prefix);
149 URLMatcherCondition CreateHostEqualsPathPrefixCondition(
150 const std::string& host,
151 const std::string& path_prefix);
152
153 // Canonicalizes a URL for "CreateURL*Condition" searches.
154 std::string CanonicalizeURLForFullSearches(const GURL& url) const;
155
156 // Canonicalizes a URL for "CreateURLMatchesCondition" searches.
157 std::string CanonicalizeURLForRegexSearches(const GURL& url) const;
158
159 URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix);
160 URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix);
161 URLMatcherCondition CreateURLContainsCondition(const std::string& str);
162 URLMatcherCondition CreateURLEqualsCondition(const std::string& str);
163
164 URLMatcherCondition CreateURLMatchesCondition(const std::string& regex);
165
166 // Removes all patterns from |pattern_singletons_| that are not listed in
167 // |used_patterns|. These patterns are not referenced any more and get
168 // freed.
169 void ForgetUnusedPatterns(
170 const std::set<StringPattern::ID>& used_patterns);
171
172 // Returns true if this object retains no allocated data. Only for debugging.
173 bool IsEmpty() const;
174
175 private:
176 // Creates a URLMatcherCondition according to the parameters passed.
177 // The URLMatcherCondition will refer to a StringPattern that is
178 // owned by |pattern_singletons_|.
179 URLMatcherCondition CreateCondition(URLMatcherCondition::Criterion criterion,
180 const std::string& pattern);
181
182 // Prepends a "." to the hostname if it does not start with one.
183 std::string CanonicalizeHostname(const std::string& hostname) const;
184
185 // Counter that ensures that all created StringPatterns have unique IDs.
186 // Note that substring patterns and regex patterns will use different IDs.
187 int id_counter_;
188
189 // This comparison considers only the pattern() value of the
190 // StringPatterns.
191 struct StringPatternPointerCompare {
192 bool operator()(StringPattern* lhs, StringPattern* rhs) const;
193 };
194 // Set to ensure that we generate only one StringPattern for each content
195 // of StringPattern::pattern().
196 typedef std::set<StringPattern*, StringPatternPointerCompare>
197 PatternSingletons;
198 PatternSingletons substring_pattern_singletons_;
199 PatternSingletons regex_pattern_singletons_;
200
201 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory);
202 };
203
204 // This class represents a filter for the URL scheme to be hooked up into a
205 // URLMatcherConditionSet.
206 class URLMatcherSchemeFilter {
207 public:
208 explicit URLMatcherSchemeFilter(const std::string& filter);
209 explicit URLMatcherSchemeFilter(const std::vector<std::string>& filters);
210 ~URLMatcherSchemeFilter();
211 bool IsMatch(const GURL& url) const;
212
213 private:
214 std::vector<std::string> filters_;
215
216 DISALLOW_COPY_AND_ASSIGN(URLMatcherSchemeFilter);
217 };
218
219 // This class represents a filter for port numbers to be hooked up into a
220 // URLMatcherConditionSet.
221 class URLMatcherPortFilter {
222 public:
223 // Boundaries of a port range (both ends are included).
224 typedef std::pair<int, int> Range;
225 explicit URLMatcherPortFilter(const std::vector<Range>& ranges);
226 ~URLMatcherPortFilter();
227 bool IsMatch(const GURL& url) const;
228
229 // Creates a port range [from, to]; both ends are included.
230 static Range CreateRange(int from, int to);
231 // Creates a port range containing a single port.
232 static Range CreateRange(int port);
233
234 private:
235 std::vector<Range> ranges_;
236
237 DISALLOW_COPY_AND_ASSIGN(URLMatcherPortFilter);
238 };
239
240 // This class represents a set of conditions that all need to match on a
241 // given URL in order to be considered a match.
242 class URLMatcherConditionSet : public base::RefCounted<URLMatcherConditionSet> {
243 public:
244 typedef int ID;
245 typedef std::set<URLMatcherCondition> Conditions;
246 typedef std::vector<scoped_refptr<URLMatcherConditionSet> > Vector;
247
248 // Matches if all conditions in |conditions| are fulfilled.
249 URLMatcherConditionSet(ID id, const Conditions& conditions);
250
251 // Matches if all conditions in |conditions|, |scheme_filter| and
252 // |port_filter| are fulfilled. |scheme_filter| and |port_filter| may be NULL,
253 // in which case, no restrictions are imposed on the scheme/port of a URL.
254 URLMatcherConditionSet(ID id, const Conditions& conditions,
255 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
256 scoped_ptr<URLMatcherPortFilter> port_filter);
257
258 ID id() const { return id_; }
259 const Conditions& conditions() const { return conditions_; }
260
261 bool IsMatch(const std::set<StringPattern::ID>& matching_patterns,
262 const GURL& url) const;
263
264 private:
265 friend class base::RefCounted<URLMatcherConditionSet>;
266 ~URLMatcherConditionSet();
267 ID id_;
268 Conditions conditions_;
269 scoped_ptr<URLMatcherSchemeFilter> scheme_filter_;
270 scoped_ptr<URLMatcherPortFilter> port_filter_;
271
272 DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionSet);
273 };
274
275 // This class allows matching one URL against a large set of
276 // URLMatcherConditionSets at the same time.
277 class URLMatcher {
278 public:
279 URLMatcher();
280 ~URLMatcher();
281
282 // Adds new URLMatcherConditionSet to this URL Matcher. Each condition set
283 // must have a unique ID.
284 // This is an expensive operation as it triggers pre-calculations on the
285 // currently registered condition sets. Do not call this operation many
286 // times with a single condition set in each call.
287 void AddConditionSets(const URLMatcherConditionSet::Vector& condition_sets);
288
289 // Removes the listed condition sets. All |condition_set_ids| must be
290 // currently registered. This function should be called with large batches
291 // of |condition_set_ids| at a time to improve performance.
292 void RemoveConditionSets(
293 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids);
294
295 // Removes all unused condition sets from the ConditionFactory.
296 void ClearUnusedConditionSets();
297
298 // Returns the IDs of all URLMatcherConditionSet that match to this |url|.
299 std::set<URLMatcherConditionSet::ID> MatchURL(const GURL& url) const;
300
301 // Returns the URLMatcherConditionFactory that must be used to create
302 // URLMatcherConditionSets for this URLMatcher.
303 URLMatcherConditionFactory* condition_factory() {
304 return &condition_factory_;
305 }
306
307 // Returns true if this object retains no allocated data. Only for debugging.
308 bool IsEmpty() const;
309
310 private:
311 void UpdateSubstringSetMatcher(bool full_url_conditions);
312 void UpdateRegexSetMatcher();
313 void UpdateTriggers();
314 void UpdateConditionFactory();
315 void UpdateInternalDatastructures();
316
317 URLMatcherConditionFactory condition_factory_;
318
319 // Maps the ID of a URLMatcherConditionSet to the respective
320 // URLMatcherConditionSet.
321 typedef std::map<URLMatcherConditionSet::ID,
322 scoped_refptr<URLMatcherConditionSet> >
323 URLMatcherConditionSets;
324 URLMatcherConditionSets url_matcher_condition_sets_;
325
326 // Maps a StringPattern ID to the URLMatcherConditions that need to
327 // be triggered in case of a StringPattern match.
328 typedef std::map<StringPattern::ID, std::set<URLMatcherConditionSet::ID> >
329 StringPatternTriggers;
330 StringPatternTriggers substring_match_triggers_;
331
332 SubstringSetMatcher full_url_matcher_;
333 SubstringSetMatcher url_component_matcher_;
334 RegexSetMatcher regex_set_matcher_;
335 std::set<const StringPattern*> registered_full_url_patterns_;
336 std::set<const StringPattern*> registered_url_component_patterns_;
337
338 DISALLOW_COPY_AND_ASSIGN(URLMatcher);
339 };
340
341 } // namespace extensions
342
343 #endif // CHROME_COMMON_EXTENSIONS_MATCHER_URL_MATCHER_H_
OLDNEW
« no previous file with comments | « chrome/common/extensions/matcher/substring_set_matcher_unittest.cc ('k') | chrome/common/extensions/matcher/url_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698