Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 10910179: Event matching by regular expression matching on URLs. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: 1 Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/extensions/matcher/url_matcher.h" 5 #include "chrome/common/extensions/matcher/url_matcher.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h" 11 #include "content/public/common/url_constants.h"
12 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/gurl.h"
13 #include "googleurl/src/url_canon.h" 13 #include "googleurl/src/url_canon.h"
14 14
15 namespace extensions { 15 namespace extensions {
16 16
17 // This set of classes implement a mapping of URL Component Patterns, such as 17 // This set of classes implement a mapping of URL Component Patterns, such as
18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. 18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns.
19 // 19 //
20 // The idea of this mapping is to reduce the problem of comparing many 20 // The idea of this mapping is to reduce the problem of comparing many
21 // URL Component Patterns against one URL to the problem of searching many 21 // URL Component Patterns against one URL to the problem of searching many
22 // substrings in one string: 22 // substrings in one string:
23 // 23 //
24 // ---------------------- -------------------- 24 // ---------------------- --------------------
battre 2012/09/12 18:04:50 nit: adapt ASCII drawing
Yoyo Zhou 2012/09/12 20:25:56 I'm not sure this helps for regular expressions; t
battre 2012/09/12 20:50:39 Oh, I was just referring to the point that the ---
Yoyo Zhou 2012/09/12 21:58:06 Ah, it's that the art is ugly. Fixed.
25 // | URL Query operator | ----translate----> | SubstringPattern | 25 // | URL Query operator | ----translate----> | StringPattern |
26 // ---------------------- -------------------- 26 // ---------------------- --------------------
27 // ^ 27 // ^
28 // | 28 // |
29 // compare 29 // compare
30 // | 30 // |
31 // v 31 // v
32 // ---------------------- -------------------- 32 // ---------------------- --------------------
33 // | URL to compare | | | 33 // | URL to compare | | |
34 // | to all URL Query | ----translate----> | String | 34 // | to all URL Query | ----translate----> | String |
35 // | operators | | | 35 // | operators | | |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 // 77 //
78 // host_suffix(suffix) = suffix ED 78 // host_suffix(suffix) = suffix ED
79 // -> host_suffix("example.com") = example.com ED 79 // -> host_suffix("example.com") = example.com ED
80 // -> host_suffix(".example.com") = .example.com ED 80 // -> host_suffix(".example.com") = .example.com ED
81 // 81 //
82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED 82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED
83 // -> host_equals("www.example.com") = BU .www.example.com ED 83 // -> host_equals("www.example.com") = BU .www.example.com ED
84 // 84 //
85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
86 // 86 //
87 // With this, we can search the SubstringPatterns in the normalized URL. 87 // With this, we can search the StringPatterns in the normalized URL.
88 // 88 //
89 // 89 //
90 // Case 2: url_{prefix,suffix,equals,contains} searches. 90 // Case 2: url_{prefix,suffix,equals,contains} searches.
91 // ===================================================== 91 // =====================================================
92 // 92 //
93 // Step 1: as above, except that 93 // Step 1: as above, except that
94 // - the scheme is not removed 94 // - the scheme is not removed
95 // - the port is not removed if it is specified and does not match the default 95 // - the port is not removed if it is specified and does not match the default
96 // port for the given scheme. 96 // port for the given scheme.
97 // 97 //
(...skipping 19 matching lines...) Expand all
117 // 117 //
118 // These kinds of searches are not supported directly but can be derived 118 // These kinds of searches are not supported directly but can be derived
119 // by a combination of a url_contains() query followed by an explicit test: 119 // by a combination of a url_contains() query followed by an explicit test:
120 // 120 //
121 // host_contains(str) = url_contains(str) followed by test whether str occurs 121 // host_contains(str) = url_contains(str) followed by test whether str occurs
122 // in host component of original URL. 122 // in host component of original URL.
123 // -> host_contains("example.co") = example.co 123 // -> host_contains("example.co") = example.co
124 // followed by gurl.host().find("example.co"); 124 // followed by gurl.host().find("example.co");
125 // 125 //
126 // [similarly for path_contains and query_contains]. 126 // [similarly for path_contains and query_contains].
127 //
128 //
129 // Regular expression matching (url_matches searches)
130 // ==================================================
131 //
132 // This class also supports matching regular expressions (RE2 syntax)
133 // against full URLs, which are transformed as in case 2.
battre 2012/09/12 18:04:50 I think we want to point this out in the documenta
Yoyo Zhou 2012/09/12 20:25:56 Do you mean the comments at the top of this file?
battre 2012/09/12 20:50:39 I think you fixed this by addressing my comment to
127 134
135 namespace {
136
137 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
138 return criterion == URLMatcherCondition::URL_MATCHES;
139 }
140
141 } // namespace
128 142
129 // 143 //
130 // URLMatcherCondition 144 // URLMatcherCondition
131 // 145 //
132 146
133 URLMatcherCondition::URLMatcherCondition() 147 URLMatcherCondition::URLMatcherCondition()
134 : criterion_(HOST_PREFIX), 148 : criterion_(HOST_PREFIX),
135 substring_pattern_(NULL) {} 149 string_pattern_(NULL) {}
136 150
137 URLMatcherCondition::~URLMatcherCondition() {} 151 URLMatcherCondition::~URLMatcherCondition() {}
138 152
139 URLMatcherCondition::URLMatcherCondition( 153 URLMatcherCondition::URLMatcherCondition(
140 Criterion criterion, 154 Criterion criterion,
141 const SubstringPattern* substring_pattern) 155 const StringPattern* string_pattern)
142 : criterion_(criterion), 156 : criterion_(criterion),
143 substring_pattern_(substring_pattern) {} 157 string_pattern_(string_pattern) {}
144 158
145 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) 159 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
146 : criterion_(rhs.criterion_), 160 : criterion_(rhs.criterion_),
147 substring_pattern_(rhs.substring_pattern_) {} 161 string_pattern_(rhs.string_pattern_) {}
148 162
149 URLMatcherCondition& URLMatcherCondition::operator=( 163 URLMatcherCondition& URLMatcherCondition::operator=(
150 const URLMatcherCondition& rhs) { 164 const URLMatcherCondition& rhs) {
151 criterion_ = rhs.criterion_; 165 criterion_ = rhs.criterion_;
152 substring_pattern_ = rhs.substring_pattern_; 166 string_pattern_ = rhs.string_pattern_;
153 return *this; 167 return *this;
154 } 168 }
155 169
156 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { 170 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
157 if (criterion_ < rhs.criterion_) return true; 171 if (criterion_ < rhs.criterion_) return true;
158 if (criterion_ > rhs.criterion_) return false; 172 if (criterion_ > rhs.criterion_) return false;
159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) 173 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
160 return *substring_pattern_ < *rhs.substring_pattern_; 174 return *string_pattern_ < *rhs.string_pattern_;
161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; 175 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, 176 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
163 // or both are NULL. 177 // or both are NULL.
164 return false; 178 return false;
165 } 179 }
166 180
167 bool URLMatcherCondition::IsFullURLCondition() const { 181 bool URLMatcherCondition::IsFullURLCondition() const {
168 // For these criteria the SubstringMatcher needs to be executed on the 182 // For these criteria the SubstringMatcher needs to be executed on the
169 // GURL that is canonicalized with 183 // GURL that is canonicalized with
170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. 184 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
171 switch (criterion_) { 185 switch (criterion_) {
172 case HOST_CONTAINS: 186 case HOST_CONTAINS:
173 case PATH_CONTAINS: 187 case PATH_CONTAINS:
174 case QUERY_CONTAINS: 188 case QUERY_CONTAINS:
175 case URL_PREFIX: 189 case URL_PREFIX:
176 case URL_SUFFIX: 190 case URL_SUFFIX:
177 case URL_CONTAINS: 191 case URL_CONTAINS:
178 case URL_EQUALS: 192 case URL_EQUALS:
179 return true; 193 return true;
180 default: 194 default:
181 break; 195 break;
182 } 196 }
183 return false; 197 return false;
184 } 198 }
185 199
200 bool URLMatcherCondition::IsRegexCondition() const {
201 return IsRegexCriterion(criterion_);
202 }
203
186 bool URLMatcherCondition::IsMatch( 204 bool URLMatcherCondition::IsMatch(
187 const std::set<SubstringPattern::ID>& matching_substring_patterns, 205 const std::set<StringPattern::ID>& matching_patterns,
188 const GURL& url) const { 206 const GURL& url) const {
189 DCHECK(substring_pattern_); 207 DCHECK(string_pattern_);
190 if (matching_substring_patterns.find(substring_pattern_->id()) == 208 if (!ContainsKey(matching_patterns, string_pattern_->id()))
battre 2012/09/12 18:04:50 Thanks for introducing me to this.
191 matching_substring_patterns.end())
192 return false; 209 return false;
193 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on 210 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
194 // a substring match on the raw URL. In case of a match, we need to verify 211 // a substring match on the raw URL. In case of a match, we need to verify
195 // that the match was found in the correct component of the URL. 212 // that the match was found in the correct component of the URL.
196 switch (criterion_) { 213 switch (criterion_) {
197 case HOST_CONTAINS: 214 case HOST_CONTAINS:
198 return url.host().find(substring_pattern_->pattern()) != 215 return url.host().find(string_pattern_->pattern()) !=
199 std::string::npos; 216 std::string::npos;
200 case PATH_CONTAINS: 217 case PATH_CONTAINS:
201 return url.path().find(substring_pattern_->pattern()) != 218 return url.path().find(string_pattern_->pattern()) !=
202 std::string::npos; 219 std::string::npos;
203 case QUERY_CONTAINS: 220 case QUERY_CONTAINS:
204 return url.query().find(substring_pattern_->pattern()) != 221 return url.query().find(string_pattern_->pattern()) !=
205 std::string::npos; 222 std::string::npos;
206 default: 223 default:
207 break; 224 break;
208 } 225 }
209 return true; 226 return true;
210 } 227 }
211 228
212 // 229 //
213 // URLMatcherConditionFactory 230 // URLMatcherConditionFactory
214 // 231 //
215 232
216 namespace { 233 namespace {
217 // These are symbols that are not contained in 7-bit ASCII used in GURLs. 234 // These are symbols that are not contained in 7-bit ASCII used in GURLs.
218 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; 235 const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
219 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; 236 const char kEndOfDomain[] = {static_cast<char>(-2), 0};
220 const char kEndOfPath[] = {static_cast<char>(-3), 0}; 237 const char kEndOfPath[] = {static_cast<char>(-3), 0};
221 const char kEndOfURL[] = {static_cast<char>(-4), 0}; 238 const char kEndOfURL[] = {static_cast<char>(-4), 0};
222 } // namespace 239 } // namespace
223 240
224 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} 241 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
225 242
226 URLMatcherConditionFactory::~URLMatcherConditionFactory() { 243 URLMatcherConditionFactory::~URLMatcherConditionFactory() {
227 STLDeleteElements(&pattern_singletons_); 244 STLDeleteElements(&substring_pattern_singletons_);
245 STLDeleteElements(&regex_pattern_singletons_);
228 } 246 }
229 247
230 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( 248 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
231 const GURL& url) { 249 const GURL& url) {
232 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + 250 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
233 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + 251 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") +
234 kEndOfURL; 252 kEndOfURL;
235 } 253 }
236 254
237 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( 255 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
329 const std::string& port = url.scheme(); 347 const std::string& port = url.scheme();
330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == 348 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
331 url.EffectiveIntPort()) { 349 url.EffectiveIntPort()) {
332 replacements.ClearPort(); 350 replacements.ClearPort();
333 } 351 }
334 } 352 }
335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + 353 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
336 kEndOfURL; 354 kEndOfURL;
337 } 355 }
338 356
357 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
358 const GURL& url) {
359 GURL::Replacements replacements;
360 replacements.ClearPassword();
361 replacements.ClearUsername();
362 replacements.ClearRef();
363 // Clear port if it is implicit from scheme.
364 if (url.has_port()) {
365 const std::string& port = url.scheme();
366 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
367 url.EffectiveIntPort()) {
368 replacements.ClearPort();
369 }
370 }
371 return url.ReplaceComponents(replacements).spec();
372 }
373
339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( 374 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
340 const std::string& prefix) { 375 const std::string& prefix) {
341 return CreateCondition(URLMatcherCondition::URL_PREFIX, 376 return CreateCondition(URLMatcherCondition::URL_PREFIX,
342 kBeginningOfURL + prefix); 377 kBeginningOfURL + prefix);
343 } 378 }
344 379
345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( 380 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
346 const std::string& suffix) { 381 const std::string& suffix) {
347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); 382 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
348 } 383 }
349 384
350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( 385 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
351 const std::string& str) { 386 const std::string& str) {
352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); 387 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
353 } 388 }
354 389
355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( 390 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
356 const std::string& str) { 391 const std::string& str) {
357 return CreateCondition(URLMatcherCondition::URL_EQUALS, 392 return CreateCondition(URLMatcherCondition::URL_EQUALS,
358 kBeginningOfURL + str + kEndOfURL); 393 kBeginningOfURL + str + kEndOfURL);
359 } 394 }
360 395
396 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
397 const std::string& regex) {
398 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
399 }
400
361 void URLMatcherConditionFactory::ForgetUnusedPatterns( 401 void URLMatcherConditionFactory::ForgetUnusedPatterns(
362 const std::set<SubstringPattern::ID>& used_patterns) { 402 const std::set<StringPattern::ID>& used_patterns) {
363 PatternSingletons::iterator i = pattern_singletons_.begin(); 403 PatternSingletons::iterator i = substring_pattern_singletons_.begin();
364 while (i != pattern_singletons_.end()) { 404 while (i != substring_pattern_singletons_.end()) {
365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { 405 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
366 ++i; 406 ++i;
367 } else { 407 } else {
368 delete *i; 408 delete *i;
369 pattern_singletons_.erase(i++); 409 substring_pattern_singletons_.erase(i++);
410 }
411 }
412 i = regex_pattern_singletons_.begin();
413 while (i != regex_pattern_singletons_.end()) {
414 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
415 ++i;
416 } else {
417 delete *i;
418 regex_pattern_singletons_.erase(i++);
370 } 419 }
371 } 420 }
372 } 421 }
373 422
374 bool URLMatcherConditionFactory::IsEmpty() const { 423 bool URLMatcherConditionFactory::IsEmpty() const {
375 return pattern_singletons_.empty(); 424 return substring_pattern_singletons_.empty() &&
425 regex_pattern_singletons_.empty();
376 } 426 }
377 427
378 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( 428 URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
379 URLMatcherCondition::Criterion criterion, 429 URLMatcherCondition::Criterion criterion,
380 const std::string& pattern) { 430 const std::string& pattern) {
381 SubstringPattern search_pattern(pattern, 0); 431 StringPattern search_pattern(pattern, 0);
432 PatternSingletons* pattern_singletons =
433 IsRegexCriterion(criterion) ? &regex_pattern_singletons_
434 : &substring_pattern_singletons_;
435
382 PatternSingletons::const_iterator iter = 436 PatternSingletons::const_iterator iter =
383 pattern_singletons_.find(&search_pattern); 437 pattern_singletons->find(&search_pattern);
384 if (iter != pattern_singletons_.end()) { 438
439 if (iter != pattern_singletons->end()) {
385 return URLMatcherCondition(criterion, *iter); 440 return URLMatcherCondition(criterion, *iter);
386 } else { 441 } else {
387 SubstringPattern* new_pattern = 442 StringPattern* new_pattern =
388 new SubstringPattern(pattern, id_counter_++); 443 new StringPattern(pattern, id_counter_++);
389 pattern_singletons_.insert(new_pattern); 444 pattern_singletons->insert(new_pattern);
390 return URLMatcherCondition(criterion, new_pattern); 445 return URLMatcherCondition(criterion, new_pattern);
391 } 446 }
392 } 447 }
393 448
394 std::string URLMatcherConditionFactory::CanonicalizeHostname( 449 std::string URLMatcherConditionFactory::CanonicalizeHostname(
395 const std::string& hostname) const { 450 const std::string& hostname) const {
396 if (!hostname.empty() && hostname[0] == '.') 451 if (!hostname.empty() && hostname[0] == '.')
397 return hostname; 452 return hostname;
398 else 453 else
399 return "." + hostname; 454 return "." + hostname;
400 } 455 }
401 456
402 bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()( 457 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
403 SubstringPattern* lhs, 458 StringPattern* lhs,
404 SubstringPattern* rhs) const { 459 StringPattern* rhs) const {
405 if (lhs == NULL && rhs != NULL) return true; 460 if (lhs == NULL && rhs != NULL) return true;
406 if (lhs != NULL && rhs != NULL) 461 if (lhs != NULL && rhs != NULL)
407 return lhs->pattern() < rhs->pattern(); 462 return lhs->pattern() < rhs->pattern();
408 // Either both are NULL or only rhs is NULL. 463 // Either both are NULL or only rhs is NULL.
409 return false; 464 return false;
410 } 465 }
411 466
412 // 467 //
413 // URLMatcherSchemeFilter 468 // URLMatcherSchemeFilter
414 // 469 //
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 ID id, 531 ID id,
477 const Conditions& conditions, 532 const Conditions& conditions,
478 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, 533 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
479 scoped_ptr<URLMatcherPortFilter> port_filter) 534 scoped_ptr<URLMatcherPortFilter> port_filter)
480 : id_(id), 535 : id_(id),
481 conditions_(conditions), 536 conditions_(conditions),
482 scheme_filter_(scheme_filter.Pass()), 537 scheme_filter_(scheme_filter.Pass()),
483 port_filter_(port_filter.Pass()) {} 538 port_filter_(port_filter.Pass()) {}
484 539
485 bool URLMatcherConditionSet::IsMatch( 540 bool URLMatcherConditionSet::IsMatch(
486 const std::set<SubstringPattern::ID>& matching_substring_patterns, 541 const std::set<StringPattern::ID>& matching_patterns,
487 const GURL& url) const { 542 const GURL& url) const {
488 for (Conditions::const_iterator i = conditions_.begin(); 543 for (Conditions::const_iterator i = conditions_.begin();
489 i != conditions_.end(); ++i) { 544 i != conditions_.end(); ++i) {
490 if (!i->IsMatch(matching_substring_patterns, url)) 545 if (!i->IsMatch(matching_patterns, url))
491 return false; 546 return false;
492 } 547 }
493 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) 548 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
494 return false; 549 return false;
495 if (port_filter_.get() && !port_filter_->IsMatch(url)) 550 if (port_filter_.get() && !port_filter_->IsMatch(url))
496 return false; 551 return false;
497 return true; 552 return true;
498 } 553 }
499 554
500
501 // 555 //
502 // URLMatcher 556 // URLMatcher
503 // 557 //
504 558
505 URLMatcher::URLMatcher() {} 559 URLMatcher::URLMatcher() {}
506 560
507 URLMatcher::~URLMatcher() {} 561 URLMatcher::~URLMatcher() {}
508 562
509 void URLMatcher::AddConditionSets( 563 void URLMatcher::AddConditionSets(
510 const URLMatcherConditionSet::Vector& condition_sets) { 564 const URLMatcherConditionSet::Vector& condition_sets) {
(...skipping 15 matching lines...) Expand all
526 url_matcher_condition_sets_.erase(*i); 580 url_matcher_condition_sets_.erase(*i);
527 } 581 }
528 UpdateInternalDatastructures(); 582 UpdateInternalDatastructures();
529 } 583 }
530 584
531 void URLMatcher::ClearUnusedConditionSets() { 585 void URLMatcher::ClearUnusedConditionSets() {
532 UpdateConditionFactory(); 586 UpdateConditionFactory();
533 } 587 }
534 588
535 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { 589 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) {
536 // Find all IDs of SubstringPatterns that match |url|. 590 // Find all IDs of StringPatterns that match |url|.
537 // See URLMatcherConditionFactory for the canonicalization of URLs and the 591 // See URLMatcherConditionFactory for the canonicalization of URLs and the
538 // distinction between full url searches and url component searches. 592 // distinction between full url searches and url component searches.
539 std::set<SubstringPattern::ID> matches; 593 std::set<StringPattern::ID> matches;
540 full_url_matcher_.Match( 594 full_url_matcher_.Match(
541 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); 595 condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
542 url_component_matcher_.Match( 596 url_component_matcher_.Match(
543 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); 597 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
598 regex_set_matcher_.Match(
599 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
544 600
545 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions 601 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
546 // were fulfilled. 602 // were fulfilled.
547 std::set<URLMatcherConditionSet::ID> result; 603 std::set<URLMatcherConditionSet::ID> result;
548 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); 604 for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
549 i != matches.end(); ++i) { 605 i != matches.end(); ++i) {
550 // For each URLMatcherConditionSet there is exactly one condition 606 // For each URLMatcherConditionSet there is exactly one condition
551 // registered in substring_match_triggers_. This means that the following 607 // registered in substring_match_triggers_. This means that the following
552 // logic tests each URLMatcherConditionSet exactly once if it can be 608 // logic tests each URLMatcherConditionSet exactly once if it can be
553 // completely fulfilled. 609 // completely fulfilled.
554 std::set<URLMatcherConditionSet::ID>& condition_sets = 610 std::set<URLMatcherConditionSet::ID>& condition_sets =
555 substring_match_triggers_[*i]; 611 substring_match_triggers_[*i];
556 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = 612 for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
557 condition_sets.begin(); j != condition_sets.end(); ++j) { 613 condition_sets.begin(); j != condition_sets.end(); ++j) {
558 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) 614 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url))
(...skipping 14 matching lines...) Expand all
573 registered_url_component_patterns_.empty(); 629 registered_url_component_patterns_.empty();
574 } 630 }
575 631
576 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { 632 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
577 // The purpose of |full_url_conditions| is just that we need to execute 633 // The purpose of |full_url_conditions| is just that we need to execute
578 // the same logic once for Full URL searches and once for URL Component 634 // the same logic once for Full URL searches and once for URL Component
579 // searches (see URLMatcherConditionFactory). 635 // searches (see URLMatcherConditionFactory).
580 636
581 // Determine which patterns need to be registered when this function 637 // Determine which patterns need to be registered when this function
582 // terminates. 638 // terminates.
583 std::set<const SubstringPattern*> new_patterns; 639 std::set<const StringPattern*> new_patterns;
584 for (URLMatcherConditionSets::const_iterator condition_set_iter = 640 for (URLMatcherConditionSets::const_iterator condition_set_iter =
585 url_matcher_condition_sets_.begin(); 641 url_matcher_condition_sets_.begin();
586 condition_set_iter != url_matcher_condition_sets_.end(); 642 condition_set_iter != url_matcher_condition_sets_.end();
587 ++condition_set_iter) { 643 ++condition_set_iter) {
588 const URLMatcherConditionSet::Conditions& conditions = 644 const URLMatcherConditionSet::Conditions& conditions =
589 condition_set_iter->second->conditions(); 645 condition_set_iter->second->conditions();
590 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 646 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
591 conditions.begin(); condition_iter != conditions.end(); 647 conditions.begin(); condition_iter != conditions.end();
592 ++condition_iter) { 648 ++condition_iter) {
593 // If we are called to process Full URL searches, ignore all others, 649 // If we are called to process Full URL searches, ignore others, and
594 // and vice versa. 650 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
595 if (full_url_conditions == condition_iter->IsFullURLCondition()) 651 if (!condition_iter->IsRegexCondition() &&
596 new_patterns.insert(condition_iter->substring_pattern()); 652 full_url_conditions == condition_iter->IsFullURLCondition())
653 new_patterns.insert(condition_iter->string_pattern());
597 } 654 }
598 } 655 }
599 656
600 // This is the set of patterns that were registered before this function 657 // This is the set of patterns that were registered before this function
601 // is called. 658 // is called.
602 std::set<const SubstringPattern*>& registered_patterns = 659 std::set<const StringPattern*>& registered_patterns =
603 full_url_conditions ? registered_full_url_patterns_ 660 full_url_conditions ? registered_full_url_patterns_
604 : registered_url_component_patterns_; 661 : registered_url_component_patterns_;
605 662
606 // Add all patterns that are in new_patterns but not in registered_patterns. 663 // Add all patterns that are in new_patterns but not in registered_patterns.
607 std::vector<const SubstringPattern*> patterns_to_register; 664 std::vector<const StringPattern*> patterns_to_register;
608 std::set_difference( 665 std::set_difference(
609 new_patterns.begin(), new_patterns.end(), 666 new_patterns.begin(), new_patterns.end(),
610 registered_patterns.begin(), registered_patterns.end(), 667 registered_patterns.begin(), registered_patterns.end(),
611 std::back_inserter(patterns_to_register)); 668 std::back_inserter(patterns_to_register));
612 669
613 // Remove all patterns that are in registered_patterns but not in 670 // Remove all patterns that are in registered_patterns but not in
614 // new_patterns. 671 // new_patterns.
615 std::vector<const SubstringPattern*> patterns_to_unregister; 672 std::vector<const StringPattern*> patterns_to_unregister;
616 std::set_difference( 673 std::set_difference(
617 registered_patterns.begin(), registered_patterns.end(), 674 registered_patterns.begin(), registered_patterns.end(),
618 new_patterns.begin(), new_patterns.end(), 675 new_patterns.begin(), new_patterns.end(),
619 std::back_inserter(patterns_to_unregister)); 676 std::back_inserter(patterns_to_unregister));
620 677
621 // Update the SubstringSetMatcher. 678 // Update the SubstringSetMatcher.
622 SubstringSetMatcher& url_matcher = 679 SubstringSetMatcher& url_matcher =
623 full_url_conditions ? full_url_matcher_ : url_component_matcher_; 680 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
624 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, 681 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
625 patterns_to_unregister); 682 patterns_to_unregister);
626 683
627 // Update the set of registered_patterns for the next time this function 684 // Update the set of registered_patterns for the next time this function
628 // is being called. 685 // is being called.
629 registered_patterns.swap(new_patterns); 686 registered_patterns.swap(new_patterns);
630 } 687 }
631 688
689 void URLMatcher::UpdateRegexSetMatcher() {
690 std::vector<const StringPattern*> new_patterns;
691
692 for (URLMatcherConditionSets::const_iterator condition_set_iter =
693 url_matcher_condition_sets_.begin();
694 condition_set_iter != url_matcher_condition_sets_.end();
695 ++condition_set_iter) {
696 const URLMatcherConditionSet::Conditions& conditions =
697 condition_set_iter->second->conditions();
698 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
699 conditions.begin(); condition_iter != conditions.end();
700 ++condition_iter) {
701 if (condition_iter->IsRegexCondition())
702 new_patterns.push_back(condition_iter->string_pattern());
703 }
704 }
705
706 // Start over from scratch. We can't really do better than this, since the
707 // FilteredRE2 backend doesn't support incremental updates.
708 regex_set_matcher_.ClearPatterns();
709 regex_set_matcher_.AddPatterns(new_patterns);
710 }
711
632 void URLMatcher::UpdateTriggers() { 712 void URLMatcher::UpdateTriggers() {
633 // Count substring pattern frequencies. 713 // Count substring pattern frequencies.
634 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies; 714 std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
635 for (URLMatcherConditionSets::const_iterator condition_set_iter = 715 for (URLMatcherConditionSets::const_iterator condition_set_iter =
636 url_matcher_condition_sets_.begin(); 716 url_matcher_condition_sets_.begin();
637 condition_set_iter != url_matcher_condition_sets_.end(); 717 condition_set_iter != url_matcher_condition_sets_.end();
638 ++condition_set_iter) { 718 ++condition_set_iter) {
639 const URLMatcherConditionSet::Conditions& conditions = 719 const URLMatcherConditionSet::Conditions& conditions =
640 condition_set_iter->second->conditions(); 720 condition_set_iter->second->conditions();
641 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 721 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
642 conditions.begin(); condition_iter != conditions.end(); 722 conditions.begin(); condition_iter != conditions.end();
643 ++condition_iter) { 723 ++condition_iter) {
644 const SubstringPattern* pattern = condition_iter->substring_pattern(); 724 const StringPattern* pattern = condition_iter->string_pattern();
645 substring_pattern_frequencies[pattern->id()]++; 725 substring_pattern_frequencies[pattern->id()]++;
646 } 726 }
647 } 727 }
648 728
649 // Update trigger conditions: Determine for each URLMatcherConditionSet which 729 // Update trigger conditions: Determine for each URLMatcherConditionSet which
650 // URLMatcherCondition contains a SubstringPattern that occurs least 730 // URLMatcherCondition contains a StringPattern that occurs least
651 // frequently in this URLMatcher. We assume that this condition is very 731 // frequently in this URLMatcher. We assume that this condition is very
652 // specific and occurs rarely in URLs. If a match occurs for this 732 // specific and occurs rarely in URLs. If a match occurs for this
653 // URLMatcherCondition, we want to test all other URLMatcherCondition in the 733 // URLMatcherCondition, we want to test all other URLMatcherCondition in the
654 // respective URLMatcherConditionSet as well to see whether the entire 734 // respective URLMatcherConditionSet as well to see whether the entire
655 // URLMatcherConditionSet is considered matching. 735 // URLMatcherConditionSet is considered matching.
656 substring_match_triggers_.clear(); 736 substring_match_triggers_.clear();
657 for (URLMatcherConditionSets::const_iterator condition_set_iter = 737 for (URLMatcherConditionSets::const_iterator condition_set_iter =
658 url_matcher_condition_sets_.begin(); 738 url_matcher_condition_sets_.begin();
659 condition_set_iter != url_matcher_condition_sets_.end(); 739 condition_set_iter != url_matcher_condition_sets_.end();
660 ++condition_set_iter) { 740 ++condition_set_iter) {
661 const URLMatcherConditionSet::Conditions& conditions = 741 const URLMatcherConditionSet::Conditions& conditions =
662 condition_set_iter->second->conditions(); 742 condition_set_iter->second->conditions();
663 if (conditions.empty()) 743 if (conditions.empty())
664 continue; 744 continue;
665 URLMatcherConditionSet::Conditions::const_iterator condition_iter = 745 URLMatcherConditionSet::Conditions::const_iterator condition_iter =
666 conditions.begin(); 746 conditions.begin();
667 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id(); 747 StringPattern::ID trigger = condition_iter->string_pattern()->id();
668 // We skip the first element in the following loop. 748 // We skip the first element in the following loop.
669 ++condition_iter; 749 ++condition_iter;
670 for (; condition_iter != conditions.end(); ++condition_iter) { 750 for (; condition_iter != conditions.end(); ++condition_iter) {
671 SubstringPattern::ID current_id = 751 StringPattern::ID current_id =
672 condition_iter->substring_pattern()->id(); 752 condition_iter->string_pattern()->id();
673 if (substring_pattern_frequencies[trigger] > 753 if (substring_pattern_frequencies[trigger] >
674 substring_pattern_frequencies[current_id]) { 754 substring_pattern_frequencies[current_id]) {
675 trigger = current_id; 755 trigger = current_id;
676 } 756 }
677 } 757 }
678 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); 758 substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
679 } 759 }
680 } 760 }
681 761
682 void URLMatcher::UpdateConditionFactory() { 762 void URLMatcher::UpdateConditionFactory() {
683 std::set<SubstringPattern::ID> used_patterns; 763 std::set<StringPattern::ID> used_patterns;
684 for (URLMatcherConditionSets::const_iterator condition_set_iter = 764 for (URLMatcherConditionSets::const_iterator condition_set_iter =
685 url_matcher_condition_sets_.begin(); 765 url_matcher_condition_sets_.begin();
686 condition_set_iter != url_matcher_condition_sets_.end(); 766 condition_set_iter != url_matcher_condition_sets_.end();
687 ++condition_set_iter) { 767 ++condition_set_iter) {
688 const URLMatcherConditionSet::Conditions& conditions = 768 const URLMatcherConditionSet::Conditions& conditions =
689 condition_set_iter->second->conditions(); 769 condition_set_iter->second->conditions();
690 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 770 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
691 conditions.begin(); condition_iter != conditions.end(); 771 conditions.begin(); condition_iter != conditions.end();
692 ++condition_iter) { 772 ++condition_iter) {
693 used_patterns.insert(condition_iter->substring_pattern()->id()); 773 used_patterns.insert(condition_iter->string_pattern()->id());
694 } 774 }
695 } 775 }
696 condition_factory_.ForgetUnusedPatterns(used_patterns); 776 condition_factory_.ForgetUnusedPatterns(used_patterns);
697 } 777 }
698 778
699 void URLMatcher::UpdateInternalDatastructures() { 779 void URLMatcher::UpdateInternalDatastructures() {
700 UpdateSubstringSetMatcher(false); 780 UpdateSubstringSetMatcher(false);
701 UpdateSubstringSetMatcher(true); 781 UpdateSubstringSetMatcher(true);
782 UpdateRegexSetMatcher();
702 UpdateTriggers(); 783 UpdateTriggers();
703 UpdateConditionFactory(); 784 UpdateConditionFactory();
704 } 785 }
705 786
706 } // namespace extensions 787 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698