Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(157)

Side by Side Diff: chrome/browser/extensions/api/declarative/url_matcher.cc

Issue 9390018: Implementation of a Matching strategy for URLs in the Declarative WebRequest API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/api/declarative/url_matcher.h"
6
7 #include <algorithm>
8
9 namespace extensions {
10
11 //
12 // UrlMatcherCondition
13 //
14
15 UrlMatcherCondition::UrlMatcherCondition() : criterion_(HOST_PREFIX) {}
16
17 UrlMatcherCondition::UrlMatcherCondition(const UrlMatcherCondition& rhs)
18 : criterion_(rhs.criterion_), value_(rhs.value_),
19 substring_pattern_(rhs.substring_pattern_) {}
20
21 UrlMatcherCondition& UrlMatcherCondition::operator=(
22 const UrlMatcherCondition& rhs) {
23 criterion_ = rhs.criterion_;
24 value_ = rhs.value_;
25 substring_pattern_ = rhs.substring_pattern_;
26 return *this;
27 }
28
29 UrlMatcherCondition::UrlMatcherCondition(Criterion criterion,
30 std::string value)
31 : criterion_(criterion), value_(value) {
32 }
33
34 bool UrlMatcherCondition::IsFullUrlCondition() const {
35 switch (criterion_) {
36 case HOST_CONTAINS:
37 case PATH_CONTAINS:
38 case QUERY_CONTAINS:
39 case URL_PREFIX:
40 case URL_SUFFIX:
41 case URL_CONTAINS:
42 case URL_EQUALS:
43 return true;
44 default:
45 break;
46 }
47 return false;
48 }
49
50 bool UrlMatcherCondition::IsMatch(
51 const std::set<SubstringPattern::ID>& matching_substring_patterns,
52 const GURL& url) const {
53 DCHECK(SubstringPattern() != substring_pattern_);
54 if (matching_substring_patterns.find(substring_pattern_.id()) ==
55 matching_substring_patterns.end())
56 return false;
57 switch (criterion_) {
58 case HOST_CONTAINS:
59 return url.host().find(value_) != std::string::npos;
60 case PATH_CONTAINS:
61 return url.path().find(value_) != std::string::npos;
62 case QUERY_CONTAINS:
63 return url.query().find(value_) != std::string::npos;
64 default:
65 break;
66 }
67 return true;
68 }
69
70 void UrlMatcherCondition::BuildSubstringPattern(
71 UrlComponentPatterns* url_component_patterns) {
72 // Check that we have not been registered before
73 DCHECK(SubstringPattern() == substring_pattern_);
74 switch (criterion_) {
75 case HOST_PREFIX:
76 substring_pattern_ =
77 url_component_patterns->CreateHostPrefixPattern(value_);
78 break;
79 case HOST_SUFFIX:
80 substring_pattern_ =
81 url_component_patterns->CreateHostSuffixPattern(value_);
82 break;
83 case HOST_CONTAINS:
84 substring_pattern_ =
85 url_component_patterns->CreateURLContainsPattern(value_);
86 // This will be accompanied by a second check that the match occurs in
87 // the host part of a URL.
88 break;
89 case HOST_EQUALS:
90 substring_pattern_ =
91 url_component_patterns->CreateHostEqualsPattern(value_);
92 break;
93
94 case PATH_PREFIX:
95 substring_pattern_ =
96 url_component_patterns->CreatePathPrefixPattern(value_);
97 break;
98 case PATH_SUFFIX:
99 substring_pattern_ =
100 url_component_patterns->CreatePathSuffixPattern(value_);
101 break;
102 case PATH_CONTAINS:
103 substring_pattern_ =
104 url_component_patterns->CreateURLContainsPattern(value_);
105 // This will be accompanied by a second check that the match occurs in
106 // the path part of a URL.
107 break;
108 case PATH_EQUALS:
109 substring_pattern_ =
110 url_component_patterns->CreatePathEqualsPattern(value_);
111 break;
112
113 case QUERY_PREFIX:
114 substring_pattern_ =
115 url_component_patterns->CreateQueryPrefixPattern(value_);
116 break;
117 case QUERY_SUFFIX:
118 substring_pattern_ =
119 url_component_patterns->CreateQuerySuffixPattern(value_);
120 break;
121 case QUERY_CONTAINS:
122 substring_pattern_ =
123 url_component_patterns->CreateURLContainsPattern(value_);
124 // This will be accompanied by a second check that the match occurs in
125 // the query part of a URL.
126 break;
127 case QUERY_EQUALS:
128 substring_pattern_ =
129 url_component_patterns->CreateQueryEqualsPattern(value_);
130 break;
131
132 case URL_PREFIX:
133 substring_pattern_ =
134 url_component_patterns->CreateURLPrefixPattern(value_);
135 break;
136 case URL_SUFFIX:
137 substring_pattern_ =
138 url_component_patterns->CreateURLSuffixPattern(value_);
139 break;
140 case URL_CONTAINS:
141 substring_pattern_ =
142 url_component_patterns->CreateURLContainsPattern(value_);
143 break;
144 case URL_EQUALS:
145 substring_pattern_ =
146 url_component_patterns->CreateURLEqualsPattern(value_);
147 break;
148 }
149 }
150
151
152 //
153 // UrlMatcherConditionSet
154 //
155
156 UrlMatcherConditionSet::UrlMatcherConditionSet() {}
157
158 UrlMatcherConditionSet::UrlMatcherConditionSet(
159 const UrlMatcherConditionSet::UrlMatcherConditionSet& rhs)
160 : id_(rhs.id_), conditions_(rhs.conditions_) {}
161
162 UrlMatcherConditionSet& UrlMatcherConditionSet::operator=(
163 const UrlMatcherConditionSet& rhs) {
164 id_ = rhs.id_;
165 conditions_ = rhs.conditions_;
166 return *this;
167 }
168
169 UrlMatcherConditionSet::UrlMatcherConditionSet(
170 ID id, const std::vector<UrlMatcherCondition>& conditions)
171 : id_(id), conditions_(conditions) {}
172
173 bool UrlMatcherConditionSet::IsMatch(
174 const std::set<SubstringPattern::ID>& matching_substring_patterns,
175 const GURL& url) const {
176 for (Conditions::const_iterator i = conditions_.begin();
177 i != conditions_.end(); ++i) {
178 if (!i->IsMatch(matching_substring_patterns, url))
179 return false;
180 }
181 return true;
182 }
183
184 //
185 // UrlMatcher
186 //
187
188 UrlMatcher::UrlMatcher() {}
189
190 void UrlMatcher::AddConditionSets(
191 const std::vector<UrlMatcherConditionSet>& conditions) {
192 for (std::vector<UrlMatcherConditionSet>::const_iterator i =
193 conditions.begin(); i != conditions.end(); ++i) {
194 AddConditionSet(*i);
195 }
196 RecreateTriggers();
197 }
198
199 void UrlMatcher::AddConditionSet(
200 const UrlMatcherConditionSet& new_condition_set) {
201 DCHECK(url_matcher_condition_sets_.find(new_condition_set.id()) ==
202 url_matcher_condition_sets_.end());
203 url_matcher_condition_sets_[new_condition_set.id()] = new_condition_set;
204
205 // Initialize.
206 UrlMatcherConditionSet& condition_set =
207 url_matcher_condition_sets_[new_condition_set.id()];
208 std::vector<UrlMatcherCondition>& conditions =
209 condition_set.mutable_conditions();
210 for (std::vector<UrlMatcherCondition>::iterator i = conditions.begin();
211 i != conditions.end(); ++i) {
212 i->BuildSubstringPattern(&url_component_patterns_);
213 }
214 }
215
216 void UrlMatcher::RemoveConditionSets(
217 const std::vector<UrlMatcherConditionSet::ID>& condition_ids) {
218 for (std::vector<UrlMatcherConditionSet::ID>::const_iterator i =
219 condition_ids.begin(); i != condition_ids.end(); ++i) {
220 RemoveConditionSet(*i);
221 }
222 RecreateTriggers();
223 }
224
225 void UrlMatcher::RemoveConditionSet(UrlMatcherConditionSet::ID id) {
226 DCHECK(url_matcher_condition_sets_.find(id) !=
227 url_matcher_condition_sets_.end());
228 url_matcher_condition_sets_.erase(id);
229 }
230
231 std::set<UrlMatcherConditionSet::ID> UrlMatcher::MatchUrl(const GURL& url) {
232 // See UrlComponentPatterns for the canonicalization of URLs and the
233 // distinction between full url searches and url component searches.
234
235 // Perform all full url searches.
236 std::string full_url =
237 url_component_patterns_.CanonlicalizeURLForFullSearches(url);
238 std::set<SubstringPattern::ID> full_url_matches;
239 full_url_matcher_.Match(full_url, &full_url_matches);
240
241 // Perform all url component searches.
242 std::string component_url =
243 url_component_patterns_.CanonlicalizeURLForComponentSearches(url);
244 std::set<SubstringPattern::ID> component_url_matches;
245 url_component_matcher_.Match(component_url, &component_url_matches);
246
247 // Build the union of both result sets.
248 std::set<SubstringPattern::ID> matches;
249 matches.swap(full_url_matches);
250 matches.insert(component_url_matches.begin(), component_url_matches.end());
251
252 // Calculate all UrlMatcherConditionSet for which all UrlMatcherConditions
253 // were fulfilled.
254 std::set<UrlMatcherConditionSet::ID> result;
255 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin();
256 i != matches.end(); ++i) {
257 // For each UrlMatcherConditionSet there is exactly one condition
258 // registered in substring_match_triggers_. This means that the following
259 // logic tests each UrlMatcherConditionSet exactly once if it can be
260 // completely fulfilled.
261 std::set<UrlMatcherConditionSet::ID>& condition_sets =
262 substring_match_triggers_[*i];
263 for (std::set<UrlMatcherConditionSet::ID>::const_iterator j =
264 condition_sets.begin(); j != condition_sets.end(); ++j) {
265 if (url_matcher_condition_sets_[*j].IsMatch(matches, url))
266 result.insert(url_matcher_condition_sets_[*j].id());
267 }
268 }
269
270 return result;
271 }
272
273 void UrlMatcher::UpdateSubstringSetMatcher(
274 bool full_url_conditions,
275 std::vector<SubstringPattern>* unregistered_patterns) {
276 // The purpose of |full_url_conditions| is just that we need to execute
277 // the same logic once for Full URL searches and once for URL Component
278 // searches (see UrlComponentPatterns).
279
280 // Determine which patterns need to be registered when this function
281 // terminates.
282 std::set<SubstringPattern> new_patterns;
283 for (UrlMatcherConditionSets::const_iterator condition_set_iter =
284 url_matcher_condition_sets_.begin();
285 condition_set_iter != url_matcher_condition_sets_.end();
286 ++condition_set_iter) {
287 const UrlMatcherConditionSet::Conditions& conditions =
288 condition_set_iter->second.conditions();
289 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
290 conditions.begin(); condition_iter != conditions.end();
291 ++condition_iter) {
292 // If we are called to process Full URL searches, ignore all others,
293 // and vice versa.
294 if (full_url_conditions == condition_iter->IsFullUrlCondition())
295 new_patterns.insert(condition_iter->substring_pattern());
296 }
297 }
298
299 // This is the set of patterns that were registered before this function
300 // is called.
301 std::set<SubstringPattern>& registered_patterns =
302 full_url_conditions ? registered_full_url_patterns_
303 : registered_url_component_patterns_;
304
305 // Add all patterns that are in new_patterns but not in registered_patterns.
306 std::vector<SubstringPattern> patterns_to_register;
307 std::set_difference(
308 new_patterns.begin(), new_patterns.end(),
309 registered_patterns.begin(), registered_patterns.end(),
310 std::back_inserter(patterns_to_register));
311
312 // Remove all patterns that are in registered_patterns but not in
313 // new_patterns.
314 std::vector<SubstringPattern> patterns_to_unregister;
315 std::set_difference(
316 registered_patterns.begin(), registered_patterns.end(),
317 new_patterns.begin(), new_patterns.end(),
318 std::back_inserter(patterns_to_unregister));
319
320 // Update the SubstringSetMatcher.
321 SubstringSetMatcher& url_matcher =
322 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
323 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
324 patterns_to_unregister);
325
326 // Update the set of registered_patterns for the next time this function
327 // is being called.
328 registered_patterns.swap(new_patterns);
329 unregistered_patterns->swap(patterns_to_unregister);
330 }
331
332 void UrlMatcher::UpdateFrequenciesAndTriggers() {
333 // Count substring pattern frequencies.
334 substring_pattern_frequencies_.clear();
335 for (UrlMatcherConditionSets::const_iterator condition_set_iter =
336 url_matcher_condition_sets_.begin();
337 condition_set_iter != url_matcher_condition_sets_.end();
338 ++condition_set_iter) {
339 const UrlMatcherConditionSet::Conditions& conditions =
340 condition_set_iter->second.conditions();
341 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
342 conditions.begin(); condition_iter != conditions.end();
343 ++condition_iter) {
344 const SubstringPattern& pattern = condition_iter->substring_pattern();
345 substring_pattern_frequencies_[pattern.id()]++;
346 }
347 }
348
349 // Update trigger conditions: Determine for each UrlMatcherConditionSet which
350 // UrlMatcherCondition occurs least frequently in this UrlMatcher. We assume
351 // that this condition is very specific and occurs rarely in URLs. If a match
352 // occurs for this UrlMatcherCondition, we want to test all other
353 // UrlMatcherCondition in the respective UrlMatcherConditionSet as well to
354 // see whether the entire UrlMatcherConditionSet is considered matching.
355 substring_match_triggers_.clear();
356 for (UrlMatcherConditionSets::const_iterator condition_set_iter =
357 url_matcher_condition_sets_.begin();
358 condition_set_iter != url_matcher_condition_sets_.end();
359 ++condition_set_iter) {
360 const UrlMatcherConditionSet::Conditions& conditions =
361 condition_set_iter->second.conditions();
362 if (conditions.empty())
363 continue;
364 SubstringPattern::ID trigger = conditions.front().substring_pattern().id();
365 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
366 conditions.begin() + 1; condition_iter != conditions.end();
367 ++condition_iter) {
368 SubstringPattern::ID current_id =
369 condition_iter->substring_pattern().id();
370 if (substring_pattern_frequencies_[trigger] >
371 substring_pattern_frequencies_[current_id]) {
372 trigger = current_id;
373 }
374 }
375 substring_match_triggers_[trigger].insert(condition_set_iter->second.id());
376 }
377 }
378
379 void UrlMatcher::RecreateTriggers() {
380 std::vector<SubstringPattern> unregistered1;
381 UpdateSubstringSetMatcher(false, &unregistered1);
382 url_component_patterns_.DestroySingletonPatterns(unregistered1);
383
384 std::vector<SubstringPattern> unregistered2;
385 UpdateSubstringSetMatcher(true, &unregistered2);
386 url_component_patterns_.DestroySingletonPatterns(unregistered2);
387
388 UpdateFrequenciesAndTriggers();
389 }
390
391 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698