OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "content/public/common/url_constants.h" | 11 #include "content/public/common/url_constants.h" |
12 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
13 #include "googleurl/src/url_canon.h" | 13 #include "googleurl/src/url_canon.h" |
14 | 14 |
15 namespace extensions { | 15 namespace extensions { |
16 | 16 |
17 // This set of classes implement a mapping of URL Component Patterns, such as | 17 // This set of classes implement a mapping of URL Component Patterns, such as |
18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns. |
19 // | 19 // |
20 // The idea of this mapping is to reduce the problem of comparing many | 20 // The idea of this mapping is to reduce the problem of comparing many |
21 // URL Component Patterns against one URL to the problem of searching many | 21 // URL Component Patterns against one URL to the problem of searching many |
22 // substrings in one string: | 22 // substrings in one string: |
23 // | 23 // |
24 // ---------------------- -------------------- | 24 // ---------------------- -------------------- |
battre
2012/09/12 18:04:50
nit: adapt ASCII drawing
Yoyo Zhou
2012/09/12 20:25:56
I'm not sure this helps for regular expressions; t
battre
2012/09/12 20:50:39
Oh, I was just referring to the point that the ---
Yoyo Zhou
2012/09/12 21:58:06
Ah, it's that the art is ugly. Fixed.
| |
25 // | URL Query operator | ----translate----> | SubstringPattern | | 25 // | URL Query operator | ----translate----> | StringPattern | |
26 // ---------------------- -------------------- | 26 // ---------------------- -------------------- |
27 // ^ | 27 // ^ |
28 // | | 28 // | |
29 // compare | 29 // compare |
30 // | | 30 // | |
31 // v | 31 // v |
32 // ---------------------- -------------------- | 32 // ---------------------- -------------------- |
33 // | URL to compare | | | | 33 // | URL to compare | | | |
34 // | to all URL Query | ----translate----> | String | | 34 // | to all URL Query | ----translate----> | String | |
35 // | operators | | | | 35 // | operators | | | |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
77 // | 77 // |
78 // host_suffix(suffix) = suffix ED | 78 // host_suffix(suffix) = suffix ED |
79 // -> host_suffix("example.com") = example.com ED | 79 // -> host_suffix("example.com") = example.com ED |
80 // -> host_suffix(".example.com") = .example.com ED | 80 // -> host_suffix(".example.com") = .example.com ED |
81 // | 81 // |
82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED | 82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED |
83 // -> host_equals("www.example.com") = BU .www.example.com ED | 83 // -> host_equals("www.example.com") = BU .www.example.com ED |
84 // | 84 // |
85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
86 // | 86 // |
87 // With this, we can search the SubstringPatterns in the normalized URL. | 87 // With this, we can search the StringPatterns in the normalized URL. |
88 // | 88 // |
89 // | 89 // |
90 // Case 2: url_{prefix,suffix,equals,contains} searches. | 90 // Case 2: url_{prefix,suffix,equals,contains} searches. |
91 // ===================================================== | 91 // ===================================================== |
92 // | 92 // |
93 // Step 1: as above, except that | 93 // Step 1: as above, except that |
94 // - the scheme is not removed | 94 // - the scheme is not removed |
95 // - the port is not removed if it is specified and does not match the default | 95 // - the port is not removed if it is specified and does not match the default |
96 // port for the given scheme. | 96 // port for the given scheme. |
97 // | 97 // |
(...skipping 19 matching lines...) Expand all Loading... | |
117 // | 117 // |
118 // These kinds of searches are not supported directly but can be derived | 118 // These kinds of searches are not supported directly but can be derived |
119 // by a combination of a url_contains() query followed by an explicit test: | 119 // by a combination of a url_contains() query followed by an explicit test: |
120 // | 120 // |
121 // host_contains(str) = url_contains(str) followed by test whether str occurs | 121 // host_contains(str) = url_contains(str) followed by test whether str occurs |
122 // in host component of original URL. | 122 // in host component of original URL. |
123 // -> host_contains("example.co") = example.co | 123 // -> host_contains("example.co") = example.co |
124 // followed by gurl.host().find("example.co"); | 124 // followed by gurl.host().find("example.co"); |
125 // | 125 // |
126 // [similarly for path_contains and query_contains]. | 126 // [similarly for path_contains and query_contains]. |
127 // | |
128 // | |
129 // Regular expression matching (url_matches searches) | |
130 // ================================================== | |
131 // | |
132 // This class also supports matching regular expressions (RE2 syntax) | |
133 // against full URLs, which are transformed as in case 2. | |
battre
2012/09/12 18:04:50
I think we want to point this out in the documenta
Yoyo Zhou
2012/09/12 20:25:56
Do you mean the comments at the top of this file?
battre
2012/09/12 20:50:39
I think you fixed this by addressing my comment to
| |
127 | 134 |
135 namespace { | |
136 | |
137 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { | |
138 return criterion == URLMatcherCondition::URL_MATCHES; | |
139 } | |
140 | |
141 } // namespace | |
128 | 142 |
129 // | 143 // |
130 // URLMatcherCondition | 144 // URLMatcherCondition |
131 // | 145 // |
132 | 146 |
133 URLMatcherCondition::URLMatcherCondition() | 147 URLMatcherCondition::URLMatcherCondition() |
134 : criterion_(HOST_PREFIX), | 148 : criterion_(HOST_PREFIX), |
135 substring_pattern_(NULL) {} | 149 string_pattern_(NULL) {} |
136 | 150 |
137 URLMatcherCondition::~URLMatcherCondition() {} | 151 URLMatcherCondition::~URLMatcherCondition() {} |
138 | 152 |
139 URLMatcherCondition::URLMatcherCondition( | 153 URLMatcherCondition::URLMatcherCondition( |
140 Criterion criterion, | 154 Criterion criterion, |
141 const SubstringPattern* substring_pattern) | 155 const StringPattern* string_pattern) |
142 : criterion_(criterion), | 156 : criterion_(criterion), |
143 substring_pattern_(substring_pattern) {} | 157 string_pattern_(string_pattern) {} |
144 | 158 |
145 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) | 159 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) |
146 : criterion_(rhs.criterion_), | 160 : criterion_(rhs.criterion_), |
147 substring_pattern_(rhs.substring_pattern_) {} | 161 string_pattern_(rhs.string_pattern_) {} |
148 | 162 |
149 URLMatcherCondition& URLMatcherCondition::operator=( | 163 URLMatcherCondition& URLMatcherCondition::operator=( |
150 const URLMatcherCondition& rhs) { | 164 const URLMatcherCondition& rhs) { |
151 criterion_ = rhs.criterion_; | 165 criterion_ = rhs.criterion_; |
152 substring_pattern_ = rhs.substring_pattern_; | 166 string_pattern_ = rhs.string_pattern_; |
153 return *this; | 167 return *this; |
154 } | 168 } |
155 | 169 |
156 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { | 170 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { |
157 if (criterion_ < rhs.criterion_) return true; | 171 if (criterion_ < rhs.criterion_) return true; |
158 if (criterion_ > rhs.criterion_) return false; | 172 if (criterion_ > rhs.criterion_) return false; |
159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 173 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) |
160 return *substring_pattern_ < *rhs.substring_pattern_; | 174 return *string_pattern_ < *rhs.string_pattern_; |
161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 175 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true; |
162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 176 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, |
163 // or both are NULL. | 177 // or both are NULL. |
164 return false; | 178 return false; |
165 } | 179 } |
166 | 180 |
167 bool URLMatcherCondition::IsFullURLCondition() const { | 181 bool URLMatcherCondition::IsFullURLCondition() const { |
168 // For these criteria the SubstringMatcher needs to be executed on the | 182 // For these criteria the SubstringMatcher needs to be executed on the |
169 // GURL that is canonicalized with | 183 // GURL that is canonicalized with |
170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 184 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
171 switch (criterion_) { | 185 switch (criterion_) { |
172 case HOST_CONTAINS: | 186 case HOST_CONTAINS: |
173 case PATH_CONTAINS: | 187 case PATH_CONTAINS: |
174 case QUERY_CONTAINS: | 188 case QUERY_CONTAINS: |
175 case URL_PREFIX: | 189 case URL_PREFIX: |
176 case URL_SUFFIX: | 190 case URL_SUFFIX: |
177 case URL_CONTAINS: | 191 case URL_CONTAINS: |
178 case URL_EQUALS: | 192 case URL_EQUALS: |
179 return true; | 193 return true; |
180 default: | 194 default: |
181 break; | 195 break; |
182 } | 196 } |
183 return false; | 197 return false; |
184 } | 198 } |
185 | 199 |
200 bool URLMatcherCondition::IsRegexCondition() const { | |
201 return IsRegexCriterion(criterion_); | |
202 } | |
203 | |
186 bool URLMatcherCondition::IsMatch( | 204 bool URLMatcherCondition::IsMatch( |
187 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 205 const std::set<StringPattern::ID>& matching_patterns, |
188 const GURL& url) const { | 206 const GURL& url) const { |
189 DCHECK(substring_pattern_); | 207 DCHECK(string_pattern_); |
190 if (matching_substring_patterns.find(substring_pattern_->id()) == | 208 if (!ContainsKey(matching_patterns, string_pattern_->id())) |
battre
2012/09/12 18:04:50
Thanks for introducing me to this.
| |
191 matching_substring_patterns.end()) | |
192 return false; | 209 return false; |
193 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on | 210 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on |
194 // a substring match on the raw URL. In case of a match, we need to verify | 211 // a substring match on the raw URL. In case of a match, we need to verify |
195 // that the match was found in the correct component of the URL. | 212 // that the match was found in the correct component of the URL. |
196 switch (criterion_) { | 213 switch (criterion_) { |
197 case HOST_CONTAINS: | 214 case HOST_CONTAINS: |
198 return url.host().find(substring_pattern_->pattern()) != | 215 return url.host().find(string_pattern_->pattern()) != |
199 std::string::npos; | 216 std::string::npos; |
200 case PATH_CONTAINS: | 217 case PATH_CONTAINS: |
201 return url.path().find(substring_pattern_->pattern()) != | 218 return url.path().find(string_pattern_->pattern()) != |
202 std::string::npos; | 219 std::string::npos; |
203 case QUERY_CONTAINS: | 220 case QUERY_CONTAINS: |
204 return url.query().find(substring_pattern_->pattern()) != | 221 return url.query().find(string_pattern_->pattern()) != |
205 std::string::npos; | 222 std::string::npos; |
206 default: | 223 default: |
207 break; | 224 break; |
208 } | 225 } |
209 return true; | 226 return true; |
210 } | 227 } |
211 | 228 |
212 // | 229 // |
213 // URLMatcherConditionFactory | 230 // URLMatcherConditionFactory |
214 // | 231 // |
215 | 232 |
216 namespace { | 233 namespace { |
217 // These are symbols that are not contained in 7-bit ASCII used in GURLs. | 234 // These are symbols that are not contained in 7-bit ASCII used in GURLs. |
218 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; | 235 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; |
219 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; | 236 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; |
220 const char kEndOfPath[] = {static_cast<char>(-3), 0}; | 237 const char kEndOfPath[] = {static_cast<char>(-3), 0}; |
221 const char kEndOfURL[] = {static_cast<char>(-4), 0}; | 238 const char kEndOfURL[] = {static_cast<char>(-4), 0}; |
222 } // namespace | 239 } // namespace |
223 | 240 |
224 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} | 241 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} |
225 | 242 |
226 URLMatcherConditionFactory::~URLMatcherConditionFactory() { | 243 URLMatcherConditionFactory::~URLMatcherConditionFactory() { |
227 STLDeleteElements(&pattern_singletons_); | 244 STLDeleteElements(&substring_pattern_singletons_); |
245 STLDeleteElements(®ex_pattern_singletons_); | |
228 } | 246 } |
229 | 247 |
230 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( | 248 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( |
231 const GURL& url) { | 249 const GURL& url) { |
232 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + | 250 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + |
233 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + | 251 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + |
234 kEndOfURL; | 252 kEndOfURL; |
235 } | 253 } |
236 | 254 |
237 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( | 255 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
329 const std::string& port = url.scheme(); | 347 const std::string& port = url.scheme(); |
330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | 348 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == |
331 url.EffectiveIntPort()) { | 349 url.EffectiveIntPort()) { |
332 replacements.ClearPort(); | 350 replacements.ClearPort(); |
333 } | 351 } |
334 } | 352 } |
335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + | 353 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + |
336 kEndOfURL; | 354 kEndOfURL; |
337 } | 355 } |
338 | 356 |
357 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( | |
358 const GURL& url) { | |
359 GURL::Replacements replacements; | |
360 replacements.ClearPassword(); | |
361 replacements.ClearUsername(); | |
362 replacements.ClearRef(); | |
363 // Clear port if it is implicit from scheme. | |
364 if (url.has_port()) { | |
365 const std::string& port = url.scheme(); | |
366 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | |
367 url.EffectiveIntPort()) { | |
368 replacements.ClearPort(); | |
369 } | |
370 } | |
371 return url.ReplaceComponents(replacements).spec(); | |
372 } | |
373 | |
339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 374 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
340 const std::string& prefix) { | 375 const std::string& prefix) { |
341 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 376 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
342 kBeginningOfURL + prefix); | 377 kBeginningOfURL + prefix); |
343 } | 378 } |
344 | 379 |
345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 380 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
346 const std::string& suffix) { | 381 const std::string& suffix) { |
347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 382 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
348 } | 383 } |
349 | 384 |
350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 385 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
351 const std::string& str) { | 386 const std::string& str) { |
352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 387 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
353 } | 388 } |
354 | 389 |
355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 390 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
356 const std::string& str) { | 391 const std::string& str) { |
357 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 392 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
358 kBeginningOfURL + str + kEndOfURL); | 393 kBeginningOfURL + str + kEndOfURL); |
359 } | 394 } |
360 | 395 |
396 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( | |
397 const std::string& regex) { | |
398 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); | |
399 } | |
400 | |
361 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 401 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
362 const std::set<SubstringPattern::ID>& used_patterns) { | 402 const std::set<StringPattern::ID>& used_patterns) { |
363 PatternSingletons::iterator i = pattern_singletons_.begin(); | 403 PatternSingletons::iterator i = substring_pattern_singletons_.begin(); |
364 while (i != pattern_singletons_.end()) { | 404 while (i != substring_pattern_singletons_.end()) { |
365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 405 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
366 ++i; | 406 ++i; |
367 } else { | 407 } else { |
368 delete *i; | 408 delete *i; |
369 pattern_singletons_.erase(i++); | 409 substring_pattern_singletons_.erase(i++); |
410 } | |
411 } | |
412 i = regex_pattern_singletons_.begin(); | |
413 while (i != regex_pattern_singletons_.end()) { | |
414 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | |
415 ++i; | |
416 } else { | |
417 delete *i; | |
418 regex_pattern_singletons_.erase(i++); | |
370 } | 419 } |
371 } | 420 } |
372 } | 421 } |
373 | 422 |
374 bool URLMatcherConditionFactory::IsEmpty() const { | 423 bool URLMatcherConditionFactory::IsEmpty() const { |
375 return pattern_singletons_.empty(); | 424 return substring_pattern_singletons_.empty() && |
425 regex_pattern_singletons_.empty(); | |
376 } | 426 } |
377 | 427 |
378 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( | 428 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( |
379 URLMatcherCondition::Criterion criterion, | 429 URLMatcherCondition::Criterion criterion, |
380 const std::string& pattern) { | 430 const std::string& pattern) { |
381 SubstringPattern search_pattern(pattern, 0); | 431 StringPattern search_pattern(pattern, 0); |
432 PatternSingletons* pattern_singletons = | |
433 IsRegexCriterion(criterion) ? ®ex_pattern_singletons_ | |
434 : &substring_pattern_singletons_; | |
435 | |
382 PatternSingletons::const_iterator iter = | 436 PatternSingletons::const_iterator iter = |
383 pattern_singletons_.find(&search_pattern); | 437 pattern_singletons->find(&search_pattern); |
384 if (iter != pattern_singletons_.end()) { | 438 |
439 if (iter != pattern_singletons->end()) { | |
385 return URLMatcherCondition(criterion, *iter); | 440 return URLMatcherCondition(criterion, *iter); |
386 } else { | 441 } else { |
387 SubstringPattern* new_pattern = | 442 StringPattern* new_pattern = |
388 new SubstringPattern(pattern, id_counter_++); | 443 new StringPattern(pattern, id_counter_++); |
389 pattern_singletons_.insert(new_pattern); | 444 pattern_singletons->insert(new_pattern); |
390 return URLMatcherCondition(criterion, new_pattern); | 445 return URLMatcherCondition(criterion, new_pattern); |
391 } | 446 } |
392 } | 447 } |
393 | 448 |
394 std::string URLMatcherConditionFactory::CanonicalizeHostname( | 449 std::string URLMatcherConditionFactory::CanonicalizeHostname( |
395 const std::string& hostname) const { | 450 const std::string& hostname) const { |
396 if (!hostname.empty() && hostname[0] == '.') | 451 if (!hostname.empty() && hostname[0] == '.') |
397 return hostname; | 452 return hostname; |
398 else | 453 else |
399 return "." + hostname; | 454 return "." + hostname; |
400 } | 455 } |
401 | 456 |
402 bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()( | 457 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()( |
403 SubstringPattern* lhs, | 458 StringPattern* lhs, |
404 SubstringPattern* rhs) const { | 459 StringPattern* rhs) const { |
405 if (lhs == NULL && rhs != NULL) return true; | 460 if (lhs == NULL && rhs != NULL) return true; |
406 if (lhs != NULL && rhs != NULL) | 461 if (lhs != NULL && rhs != NULL) |
407 return lhs->pattern() < rhs->pattern(); | 462 return lhs->pattern() < rhs->pattern(); |
408 // Either both are NULL or only rhs is NULL. | 463 // Either both are NULL or only rhs is NULL. |
409 return false; | 464 return false; |
410 } | 465 } |
411 | 466 |
412 // | 467 // |
413 // URLMatcherSchemeFilter | 468 // URLMatcherSchemeFilter |
414 // | 469 // |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
476 ID id, | 531 ID id, |
477 const Conditions& conditions, | 532 const Conditions& conditions, |
478 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 533 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, |
479 scoped_ptr<URLMatcherPortFilter> port_filter) | 534 scoped_ptr<URLMatcherPortFilter> port_filter) |
480 : id_(id), | 535 : id_(id), |
481 conditions_(conditions), | 536 conditions_(conditions), |
482 scheme_filter_(scheme_filter.Pass()), | 537 scheme_filter_(scheme_filter.Pass()), |
483 port_filter_(port_filter.Pass()) {} | 538 port_filter_(port_filter.Pass()) {} |
484 | 539 |
485 bool URLMatcherConditionSet::IsMatch( | 540 bool URLMatcherConditionSet::IsMatch( |
486 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 541 const std::set<StringPattern::ID>& matching_patterns, |
487 const GURL& url) const { | 542 const GURL& url) const { |
488 for (Conditions::const_iterator i = conditions_.begin(); | 543 for (Conditions::const_iterator i = conditions_.begin(); |
489 i != conditions_.end(); ++i) { | 544 i != conditions_.end(); ++i) { |
490 if (!i->IsMatch(matching_substring_patterns, url)) | 545 if (!i->IsMatch(matching_patterns, url)) |
491 return false; | 546 return false; |
492 } | 547 } |
493 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) | 548 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) |
494 return false; | 549 return false; |
495 if (port_filter_.get() && !port_filter_->IsMatch(url)) | 550 if (port_filter_.get() && !port_filter_->IsMatch(url)) |
496 return false; | 551 return false; |
497 return true; | 552 return true; |
498 } | 553 } |
499 | 554 |
500 | |
501 // | 555 // |
502 // URLMatcher | 556 // URLMatcher |
503 // | 557 // |
504 | 558 |
505 URLMatcher::URLMatcher() {} | 559 URLMatcher::URLMatcher() {} |
506 | 560 |
507 URLMatcher::~URLMatcher() {} | 561 URLMatcher::~URLMatcher() {} |
508 | 562 |
509 void URLMatcher::AddConditionSets( | 563 void URLMatcher::AddConditionSets( |
510 const URLMatcherConditionSet::Vector& condition_sets) { | 564 const URLMatcherConditionSet::Vector& condition_sets) { |
(...skipping 15 matching lines...) Expand all Loading... | |
526 url_matcher_condition_sets_.erase(*i); | 580 url_matcher_condition_sets_.erase(*i); |
527 } | 581 } |
528 UpdateInternalDatastructures(); | 582 UpdateInternalDatastructures(); |
529 } | 583 } |
530 | 584 |
531 void URLMatcher::ClearUnusedConditionSets() { | 585 void URLMatcher::ClearUnusedConditionSets() { |
532 UpdateConditionFactory(); | 586 UpdateConditionFactory(); |
533 } | 587 } |
534 | 588 |
535 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { | 589 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { |
536 // Find all IDs of SubstringPatterns that match |url|. | 590 // Find all IDs of StringPatterns that match |url|. |
537 // See URLMatcherConditionFactory for the canonicalization of URLs and the | 591 // See URLMatcherConditionFactory for the canonicalization of URLs and the |
538 // distinction between full url searches and url component searches. | 592 // distinction between full url searches and url component searches. |
539 std::set<SubstringPattern::ID> matches; | 593 std::set<StringPattern::ID> matches; |
540 full_url_matcher_.Match( | 594 full_url_matcher_.Match( |
541 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); | 595 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); |
542 url_component_matcher_.Match( | 596 url_component_matcher_.Match( |
543 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); | 597 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); |
598 regex_set_matcher_.Match( | |
599 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); | |
544 | 600 |
545 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions | 601 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions |
546 // were fulfilled. | 602 // were fulfilled. |
547 std::set<URLMatcherConditionSet::ID> result; | 603 std::set<URLMatcherConditionSet::ID> result; |
548 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); | 604 for (std::set<StringPattern::ID>::const_iterator i = matches.begin(); |
549 i != matches.end(); ++i) { | 605 i != matches.end(); ++i) { |
550 // For each URLMatcherConditionSet there is exactly one condition | 606 // For each URLMatcherConditionSet there is exactly one condition |
551 // registered in substring_match_triggers_. This means that the following | 607 // registered in substring_match_triggers_. This means that the following |
552 // logic tests each URLMatcherConditionSet exactly once if it can be | 608 // logic tests each URLMatcherConditionSet exactly once if it can be |
553 // completely fulfilled. | 609 // completely fulfilled. |
554 std::set<URLMatcherConditionSet::ID>& condition_sets = | 610 std::set<URLMatcherConditionSet::ID>& condition_sets = |
555 substring_match_triggers_[*i]; | 611 substring_match_triggers_[*i]; |
556 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = | 612 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = |
557 condition_sets.begin(); j != condition_sets.end(); ++j) { | 613 condition_sets.begin(); j != condition_sets.end(); ++j) { |
558 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) | 614 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) |
(...skipping 14 matching lines...) Expand all Loading... | |
573 registered_url_component_patterns_.empty(); | 629 registered_url_component_patterns_.empty(); |
574 } | 630 } |
575 | 631 |
576 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { | 632 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { |
577 // The purpose of |full_url_conditions| is just that we need to execute | 633 // The purpose of |full_url_conditions| is just that we need to execute |
578 // the same logic once for Full URL searches and once for URL Component | 634 // the same logic once for Full URL searches and once for URL Component |
579 // searches (see URLMatcherConditionFactory). | 635 // searches (see URLMatcherConditionFactory). |
580 | 636 |
581 // Determine which patterns need to be registered when this function | 637 // Determine which patterns need to be registered when this function |
582 // terminates. | 638 // terminates. |
583 std::set<const SubstringPattern*> new_patterns; | 639 std::set<const StringPattern*> new_patterns; |
584 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 640 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
585 url_matcher_condition_sets_.begin(); | 641 url_matcher_condition_sets_.begin(); |
586 condition_set_iter != url_matcher_condition_sets_.end(); | 642 condition_set_iter != url_matcher_condition_sets_.end(); |
587 ++condition_set_iter) { | 643 ++condition_set_iter) { |
588 const URLMatcherConditionSet::Conditions& conditions = | 644 const URLMatcherConditionSet::Conditions& conditions = |
589 condition_set_iter->second->conditions(); | 645 condition_set_iter->second->conditions(); |
590 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 646 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
591 conditions.begin(); condition_iter != conditions.end(); | 647 conditions.begin(); condition_iter != conditions.end(); |
592 ++condition_iter) { | 648 ++condition_iter) { |
593 // If we are called to process Full URL searches, ignore all others, | 649 // If we are called to process Full URL searches, ignore others, and |
594 // and vice versa. | 650 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) |
595 if (full_url_conditions == condition_iter->IsFullURLCondition()) | 651 if (!condition_iter->IsRegexCondition() && |
596 new_patterns.insert(condition_iter->substring_pattern()); | 652 full_url_conditions == condition_iter->IsFullURLCondition()) |
653 new_patterns.insert(condition_iter->string_pattern()); | |
597 } | 654 } |
598 } | 655 } |
599 | 656 |
600 // This is the set of patterns that were registered before this function | 657 // This is the set of patterns that were registered before this function |
601 // is called. | 658 // is called. |
602 std::set<const SubstringPattern*>& registered_patterns = | 659 std::set<const StringPattern*>& registered_patterns = |
603 full_url_conditions ? registered_full_url_patterns_ | 660 full_url_conditions ? registered_full_url_patterns_ |
604 : registered_url_component_patterns_; | 661 : registered_url_component_patterns_; |
605 | 662 |
606 // Add all patterns that are in new_patterns but not in registered_patterns. | 663 // Add all patterns that are in new_patterns but not in registered_patterns. |
607 std::vector<const SubstringPattern*> patterns_to_register; | 664 std::vector<const StringPattern*> patterns_to_register; |
608 std::set_difference( | 665 std::set_difference( |
609 new_patterns.begin(), new_patterns.end(), | 666 new_patterns.begin(), new_patterns.end(), |
610 registered_patterns.begin(), registered_patterns.end(), | 667 registered_patterns.begin(), registered_patterns.end(), |
611 std::back_inserter(patterns_to_register)); | 668 std::back_inserter(patterns_to_register)); |
612 | 669 |
613 // Remove all patterns that are in registered_patterns but not in | 670 // Remove all patterns that are in registered_patterns but not in |
614 // new_patterns. | 671 // new_patterns. |
615 std::vector<const SubstringPattern*> patterns_to_unregister; | 672 std::vector<const StringPattern*> patterns_to_unregister; |
616 std::set_difference( | 673 std::set_difference( |
617 registered_patterns.begin(), registered_patterns.end(), | 674 registered_patterns.begin(), registered_patterns.end(), |
618 new_patterns.begin(), new_patterns.end(), | 675 new_patterns.begin(), new_patterns.end(), |
619 std::back_inserter(patterns_to_unregister)); | 676 std::back_inserter(patterns_to_unregister)); |
620 | 677 |
621 // Update the SubstringSetMatcher. | 678 // Update the SubstringSetMatcher. |
622 SubstringSetMatcher& url_matcher = | 679 SubstringSetMatcher& url_matcher = |
623 full_url_conditions ? full_url_matcher_ : url_component_matcher_; | 680 full_url_conditions ? full_url_matcher_ : url_component_matcher_; |
624 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, | 681 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, |
625 patterns_to_unregister); | 682 patterns_to_unregister); |
626 | 683 |
627 // Update the set of registered_patterns for the next time this function | 684 // Update the set of registered_patterns for the next time this function |
628 // is being called. | 685 // is being called. |
629 registered_patterns.swap(new_patterns); | 686 registered_patterns.swap(new_patterns); |
630 } | 687 } |
631 | 688 |
689 void URLMatcher::UpdateRegexSetMatcher() { | |
690 std::vector<const StringPattern*> new_patterns; | |
691 | |
692 for (URLMatcherConditionSets::const_iterator condition_set_iter = | |
693 url_matcher_condition_sets_.begin(); | |
694 condition_set_iter != url_matcher_condition_sets_.end(); | |
695 ++condition_set_iter) { | |
696 const URLMatcherConditionSet::Conditions& conditions = | |
697 condition_set_iter->second->conditions(); | |
698 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | |
699 conditions.begin(); condition_iter != conditions.end(); | |
700 ++condition_iter) { | |
701 if (condition_iter->IsRegexCondition()) | |
702 new_patterns.push_back(condition_iter->string_pattern()); | |
703 } | |
704 } | |
705 | |
706 // Start over from scratch. We can't really do better than this, since the | |
707 // FilteredRE2 backend doesn't support incremental updates. | |
708 regex_set_matcher_.ClearPatterns(); | |
709 regex_set_matcher_.AddPatterns(new_patterns); | |
710 } | |
711 | |
632 void URLMatcher::UpdateTriggers() { | 712 void URLMatcher::UpdateTriggers() { |
633 // Count substring pattern frequencies. | 713 // Count substring pattern frequencies. |
634 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies; | 714 std::map<StringPattern::ID, size_t> substring_pattern_frequencies; |
635 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 715 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
636 url_matcher_condition_sets_.begin(); | 716 url_matcher_condition_sets_.begin(); |
637 condition_set_iter != url_matcher_condition_sets_.end(); | 717 condition_set_iter != url_matcher_condition_sets_.end(); |
638 ++condition_set_iter) { | 718 ++condition_set_iter) { |
639 const URLMatcherConditionSet::Conditions& conditions = | 719 const URLMatcherConditionSet::Conditions& conditions = |
640 condition_set_iter->second->conditions(); | 720 condition_set_iter->second->conditions(); |
641 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 721 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
642 conditions.begin(); condition_iter != conditions.end(); | 722 conditions.begin(); condition_iter != conditions.end(); |
643 ++condition_iter) { | 723 ++condition_iter) { |
644 const SubstringPattern* pattern = condition_iter->substring_pattern(); | 724 const StringPattern* pattern = condition_iter->string_pattern(); |
645 substring_pattern_frequencies[pattern->id()]++; | 725 substring_pattern_frequencies[pattern->id()]++; |
646 } | 726 } |
647 } | 727 } |
648 | 728 |
649 // Update trigger conditions: Determine for each URLMatcherConditionSet which | 729 // Update trigger conditions: Determine for each URLMatcherConditionSet which |
650 // URLMatcherCondition contains a SubstringPattern that occurs least | 730 // URLMatcherCondition contains a StringPattern that occurs least |
651 // frequently in this URLMatcher. We assume that this condition is very | 731 // frequently in this URLMatcher. We assume that this condition is very |
652 // specific and occurs rarely in URLs. If a match occurs for this | 732 // specific and occurs rarely in URLs. If a match occurs for this |
653 // URLMatcherCondition, we want to test all other URLMatcherCondition in the | 733 // URLMatcherCondition, we want to test all other URLMatcherCondition in the |
654 // respective URLMatcherConditionSet as well to see whether the entire | 734 // respective URLMatcherConditionSet as well to see whether the entire |
655 // URLMatcherConditionSet is considered matching. | 735 // URLMatcherConditionSet is considered matching. |
656 substring_match_triggers_.clear(); | 736 substring_match_triggers_.clear(); |
657 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 737 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
658 url_matcher_condition_sets_.begin(); | 738 url_matcher_condition_sets_.begin(); |
659 condition_set_iter != url_matcher_condition_sets_.end(); | 739 condition_set_iter != url_matcher_condition_sets_.end(); |
660 ++condition_set_iter) { | 740 ++condition_set_iter) { |
661 const URLMatcherConditionSet::Conditions& conditions = | 741 const URLMatcherConditionSet::Conditions& conditions = |
662 condition_set_iter->second->conditions(); | 742 condition_set_iter->second->conditions(); |
663 if (conditions.empty()) | 743 if (conditions.empty()) |
664 continue; | 744 continue; |
665 URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 745 URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
666 conditions.begin(); | 746 conditions.begin(); |
667 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id(); | 747 StringPattern::ID trigger = condition_iter->string_pattern()->id(); |
668 // We skip the first element in the following loop. | 748 // We skip the first element in the following loop. |
669 ++condition_iter; | 749 ++condition_iter; |
670 for (; condition_iter != conditions.end(); ++condition_iter) { | 750 for (; condition_iter != conditions.end(); ++condition_iter) { |
671 SubstringPattern::ID current_id = | 751 StringPattern::ID current_id = |
672 condition_iter->substring_pattern()->id(); | 752 condition_iter->string_pattern()->id(); |
673 if (substring_pattern_frequencies[trigger] > | 753 if (substring_pattern_frequencies[trigger] > |
674 substring_pattern_frequencies[current_id]) { | 754 substring_pattern_frequencies[current_id]) { |
675 trigger = current_id; | 755 trigger = current_id; |
676 } | 756 } |
677 } | 757 } |
678 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); | 758 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); |
679 } | 759 } |
680 } | 760 } |
681 | 761 |
682 void URLMatcher::UpdateConditionFactory() { | 762 void URLMatcher::UpdateConditionFactory() { |
683 std::set<SubstringPattern::ID> used_patterns; | 763 std::set<StringPattern::ID> used_patterns; |
684 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 764 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
685 url_matcher_condition_sets_.begin(); | 765 url_matcher_condition_sets_.begin(); |
686 condition_set_iter != url_matcher_condition_sets_.end(); | 766 condition_set_iter != url_matcher_condition_sets_.end(); |
687 ++condition_set_iter) { | 767 ++condition_set_iter) { |
688 const URLMatcherConditionSet::Conditions& conditions = | 768 const URLMatcherConditionSet::Conditions& conditions = |
689 condition_set_iter->second->conditions(); | 769 condition_set_iter->second->conditions(); |
690 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 770 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
691 conditions.begin(); condition_iter != conditions.end(); | 771 conditions.begin(); condition_iter != conditions.end(); |
692 ++condition_iter) { | 772 ++condition_iter) { |
693 used_patterns.insert(condition_iter->substring_pattern()->id()); | 773 used_patterns.insert(condition_iter->string_pattern()->id()); |
694 } | 774 } |
695 } | 775 } |
696 condition_factory_.ForgetUnusedPatterns(used_patterns); | 776 condition_factory_.ForgetUnusedPatterns(used_patterns); |
697 } | 777 } |
698 | 778 |
699 void URLMatcher::UpdateInternalDatastructures() { | 779 void URLMatcher::UpdateInternalDatastructures() { |
700 UpdateSubstringSetMatcher(false); | 780 UpdateSubstringSetMatcher(false); |
701 UpdateSubstringSetMatcher(true); | 781 UpdateSubstringSetMatcher(true); |
782 UpdateRegexSetMatcher(); | |
702 UpdateTriggers(); | 783 UpdateTriggers(); |
703 UpdateConditionFactory(); | 784 UpdateConditionFactory(); |
704 } | 785 } |
705 | 786 |
706 } // namespace extensions | 787 } // namespace extensions |
OLD | NEW |