Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <iterator> | 8 #include <iterator> |
| 9 | 9 |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "content/public/common/url_constants.h" | 11 #include "content/public/common/url_constants.h" |
| 12 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
| 13 #include "googleurl/src/url_canon.h" | 13 #include "googleurl/src/url_canon.h" |
| 14 | 14 |
| 15 namespace extensions { | 15 namespace extensions { |
| 16 | 16 |
| 17 // This set of classes implement a mapping of URL Component Patterns, such as | 17 // This set of classes implement a mapping of URL Component Patterns, such as |
| 18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns. |
| 19 // | 19 // |
| 20 // The idea of this mapping is to reduce the problem of comparing many | 20 // The idea of this mapping is to reduce the problem of comparing many |
| 21 // URL Component Patterns against one URL to the problem of searching many | 21 // URL Component Patterns against one URL to the problem of searching many |
| 22 // substrings in one string: | 22 // substrings in one string: |
| 23 // | 23 // |
| 24 // ---------------------- -------------------- | 24 // ---------------------- -------------------- |
|
battre
2012/09/12 18:04:50
nit: adapt ASCII drawing
Yoyo Zhou
2012/09/12 20:25:56
I'm not sure this helps for regular expressions; t
battre
2012/09/12 20:50:39
Oh, I was just referring to the point that the ---
Yoyo Zhou
2012/09/12 21:58:06
Ah, it's that the art is ugly. Fixed.
| |
| 25 // | URL Query operator | ----translate----> | SubstringPattern | | 25 // | URL Query operator | ----translate----> | StringPattern | |
| 26 // ---------------------- -------------------- | 26 // ---------------------- -------------------- |
| 27 // ^ | 27 // ^ |
| 28 // | | 28 // | |
| 29 // compare | 29 // compare |
| 30 // | | 30 // | |
| 31 // v | 31 // v |
| 32 // ---------------------- -------------------- | 32 // ---------------------- -------------------- |
| 33 // | URL to compare | | | | 33 // | URL to compare | | | |
| 34 // | to all URL Query | ----translate----> | String | | 34 // | to all URL Query | ----translate----> | String | |
| 35 // | operators | | | | 35 // | operators | | | |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 // | 77 // |
| 78 // host_suffix(suffix) = suffix ED | 78 // host_suffix(suffix) = suffix ED |
| 79 // -> host_suffix("example.com") = example.com ED | 79 // -> host_suffix("example.com") = example.com ED |
| 80 // -> host_suffix(".example.com") = .example.com ED | 80 // -> host_suffix(".example.com") = .example.com ED |
| 81 // | 81 // |
| 82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED | 82 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED |
| 83 // -> host_equals("www.example.com") = BU .www.example.com ED | 83 // -> host_equals("www.example.com") = BU .www.example.com ED |
| 84 // | 84 // |
| 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
| 86 // | 86 // |
| 87 // With this, we can search the SubstringPatterns in the normalized URL. | 87 // With this, we can search the StringPatterns in the normalized URL. |
| 88 // | 88 // |
| 89 // | 89 // |
| 90 // Case 2: url_{prefix,suffix,equals,contains} searches. | 90 // Case 2: url_{prefix,suffix,equals,contains} searches. |
| 91 // ===================================================== | 91 // ===================================================== |
| 92 // | 92 // |
| 93 // Step 1: as above, except that | 93 // Step 1: as above, except that |
| 94 // - the scheme is not removed | 94 // - the scheme is not removed |
| 95 // - the port is not removed if it is specified and does not match the default | 95 // - the port is not removed if it is specified and does not match the default |
| 96 // port for the given scheme. | 96 // port for the given scheme. |
| 97 // | 97 // |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 117 // | 117 // |
| 118 // These kinds of searches are not supported directly but can be derived | 118 // These kinds of searches are not supported directly but can be derived |
| 119 // by a combination of a url_contains() query followed by an explicit test: | 119 // by a combination of a url_contains() query followed by an explicit test: |
| 120 // | 120 // |
| 121 // host_contains(str) = url_contains(str) followed by test whether str occurs | 121 // host_contains(str) = url_contains(str) followed by test whether str occurs |
| 122 // in host component of original URL. | 122 // in host component of original URL. |
| 123 // -> host_contains("example.co") = example.co | 123 // -> host_contains("example.co") = example.co |
| 124 // followed by gurl.host().find("example.co"); | 124 // followed by gurl.host().find("example.co"); |
| 125 // | 125 // |
| 126 // [similarly for path_contains and query_contains]. | 126 // [similarly for path_contains and query_contains]. |
| 127 // | |
| 128 // | |
| 129 // Regular expression matching (url_matches searches) | |
| 130 // ================================================== | |
| 131 // | |
| 132 // This class also supports matching regular expressions (RE2 syntax) | |
| 133 // against full URLs, which are transformed as in case 2. | |
|
battre
2012/09/12 18:04:50
I think we want to point this out in the documenta
Yoyo Zhou
2012/09/12 20:25:56
Do you mean the comments at the top of this file?
battre
2012/09/12 20:50:39
I think you fixed this by addressing my comment to
| |
| 127 | 134 |
| 135 namespace { | |
| 136 | |
| 137 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { | |
| 138 return criterion == URLMatcherCondition::URL_MATCHES; | |
| 139 } | |
| 140 | |
| 141 } // namespace | |
| 128 | 142 |
| 129 // | 143 // |
| 130 // URLMatcherCondition | 144 // URLMatcherCondition |
| 131 // | 145 // |
| 132 | 146 |
| 133 URLMatcherCondition::URLMatcherCondition() | 147 URLMatcherCondition::URLMatcherCondition() |
| 134 : criterion_(HOST_PREFIX), | 148 : criterion_(HOST_PREFIX), |
| 135 substring_pattern_(NULL) {} | 149 string_pattern_(NULL) {} |
| 136 | 150 |
| 137 URLMatcherCondition::~URLMatcherCondition() {} | 151 URLMatcherCondition::~URLMatcherCondition() {} |
| 138 | 152 |
| 139 URLMatcherCondition::URLMatcherCondition( | 153 URLMatcherCondition::URLMatcherCondition( |
| 140 Criterion criterion, | 154 Criterion criterion, |
| 141 const SubstringPattern* substring_pattern) | 155 const StringPattern* string_pattern) |
| 142 : criterion_(criterion), | 156 : criterion_(criterion), |
| 143 substring_pattern_(substring_pattern) {} | 157 string_pattern_(string_pattern) {} |
| 144 | 158 |
| 145 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) | 159 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) |
| 146 : criterion_(rhs.criterion_), | 160 : criterion_(rhs.criterion_), |
| 147 substring_pattern_(rhs.substring_pattern_) {} | 161 string_pattern_(rhs.string_pattern_) {} |
| 148 | 162 |
| 149 URLMatcherCondition& URLMatcherCondition::operator=( | 163 URLMatcherCondition& URLMatcherCondition::operator=( |
| 150 const URLMatcherCondition& rhs) { | 164 const URLMatcherCondition& rhs) { |
| 151 criterion_ = rhs.criterion_; | 165 criterion_ = rhs.criterion_; |
| 152 substring_pattern_ = rhs.substring_pattern_; | 166 string_pattern_ = rhs.string_pattern_; |
| 153 return *this; | 167 return *this; |
| 154 } | 168 } |
| 155 | 169 |
| 156 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { | 170 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { |
| 157 if (criterion_ < rhs.criterion_) return true; | 171 if (criterion_ < rhs.criterion_) return true; |
| 158 if (criterion_ > rhs.criterion_) return false; | 172 if (criterion_ > rhs.criterion_) return false; |
| 159 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 173 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) |
| 160 return *substring_pattern_ < *rhs.substring_pattern_; | 174 return *string_pattern_ < *rhs.string_pattern_; |
| 161 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 175 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true; |
| 162 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 176 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, |
| 163 // or both are NULL. | 177 // or both are NULL. |
| 164 return false; | 178 return false; |
| 165 } | 179 } |
| 166 | 180 |
| 167 bool URLMatcherCondition::IsFullURLCondition() const { | 181 bool URLMatcherCondition::IsFullURLCondition() const { |
| 168 // For these criteria the SubstringMatcher needs to be executed on the | 182 // For these criteria the SubstringMatcher needs to be executed on the |
| 169 // GURL that is canonicalized with | 183 // GURL that is canonicalized with |
| 170 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 184 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
| 171 switch (criterion_) { | 185 switch (criterion_) { |
| 172 case HOST_CONTAINS: | 186 case HOST_CONTAINS: |
| 173 case PATH_CONTAINS: | 187 case PATH_CONTAINS: |
| 174 case QUERY_CONTAINS: | 188 case QUERY_CONTAINS: |
| 175 case URL_PREFIX: | 189 case URL_PREFIX: |
| 176 case URL_SUFFIX: | 190 case URL_SUFFIX: |
| 177 case URL_CONTAINS: | 191 case URL_CONTAINS: |
| 178 case URL_EQUALS: | 192 case URL_EQUALS: |
| 179 return true; | 193 return true; |
| 180 default: | 194 default: |
| 181 break; | 195 break; |
| 182 } | 196 } |
| 183 return false; | 197 return false; |
| 184 } | 198 } |
| 185 | 199 |
| 200 bool URLMatcherCondition::IsRegexCondition() const { | |
| 201 return IsRegexCriterion(criterion_); | |
| 202 } | |
| 203 | |
| 186 bool URLMatcherCondition::IsMatch( | 204 bool URLMatcherCondition::IsMatch( |
| 187 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 205 const std::set<StringPattern::ID>& matching_patterns, |
| 188 const GURL& url) const { | 206 const GURL& url) const { |
| 189 DCHECK(substring_pattern_); | 207 DCHECK(string_pattern_); |
| 190 if (matching_substring_patterns.find(substring_pattern_->id()) == | 208 if (!ContainsKey(matching_patterns, string_pattern_->id())) |
|
battre
2012/09/12 18:04:50
Thanks for introducing me to this.
| |
| 191 matching_substring_patterns.end()) | |
| 192 return false; | 209 return false; |
| 193 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on | 210 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on |
| 194 // a substring match on the raw URL. In case of a match, we need to verify | 211 // a substring match on the raw URL. In case of a match, we need to verify |
| 195 // that the match was found in the correct component of the URL. | 212 // that the match was found in the correct component of the URL. |
| 196 switch (criterion_) { | 213 switch (criterion_) { |
| 197 case HOST_CONTAINS: | 214 case HOST_CONTAINS: |
| 198 return url.host().find(substring_pattern_->pattern()) != | 215 return url.host().find(string_pattern_->pattern()) != |
| 199 std::string::npos; | 216 std::string::npos; |
| 200 case PATH_CONTAINS: | 217 case PATH_CONTAINS: |
| 201 return url.path().find(substring_pattern_->pattern()) != | 218 return url.path().find(string_pattern_->pattern()) != |
| 202 std::string::npos; | 219 std::string::npos; |
| 203 case QUERY_CONTAINS: | 220 case QUERY_CONTAINS: |
| 204 return url.query().find(substring_pattern_->pattern()) != | 221 return url.query().find(string_pattern_->pattern()) != |
| 205 std::string::npos; | 222 std::string::npos; |
| 206 default: | 223 default: |
| 207 break; | 224 break; |
| 208 } | 225 } |
| 209 return true; | 226 return true; |
| 210 } | 227 } |
| 211 | 228 |
| 212 // | 229 // |
| 213 // URLMatcherConditionFactory | 230 // URLMatcherConditionFactory |
| 214 // | 231 // |
| 215 | 232 |
| 216 namespace { | 233 namespace { |
| 217 // These are symbols that are not contained in 7-bit ASCII used in GURLs. | 234 // These are symbols that are not contained in 7-bit ASCII used in GURLs. |
| 218 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; | 235 const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; |
| 219 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; | 236 const char kEndOfDomain[] = {static_cast<char>(-2), 0}; |
| 220 const char kEndOfPath[] = {static_cast<char>(-3), 0}; | 237 const char kEndOfPath[] = {static_cast<char>(-3), 0}; |
| 221 const char kEndOfURL[] = {static_cast<char>(-4), 0}; | 238 const char kEndOfURL[] = {static_cast<char>(-4), 0}; |
| 222 } // namespace | 239 } // namespace |
| 223 | 240 |
| 224 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} | 241 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} |
| 225 | 242 |
| 226 URLMatcherConditionFactory::~URLMatcherConditionFactory() { | 243 URLMatcherConditionFactory::~URLMatcherConditionFactory() { |
| 227 STLDeleteElements(&pattern_singletons_); | 244 STLDeleteElements(&substring_pattern_singletons_); |
| 245 STLDeleteElements(®ex_pattern_singletons_); | |
| 228 } | 246 } |
| 229 | 247 |
| 230 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( | 248 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( |
| 231 const GURL& url) { | 249 const GURL& url) { |
| 232 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + | 250 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + |
| 233 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + | 251 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") + |
| 234 kEndOfURL; | 252 kEndOfURL; |
| 235 } | 253 } |
| 236 | 254 |
| 237 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( | 255 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 329 const std::string& port = url.scheme(); | 347 const std::string& port = url.scheme(); |
| 330 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | 348 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == |
| 331 url.EffectiveIntPort()) { | 349 url.EffectiveIntPort()) { |
| 332 replacements.ClearPort(); | 350 replacements.ClearPort(); |
| 333 } | 351 } |
| 334 } | 352 } |
| 335 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + | 353 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + |
| 336 kEndOfURL; | 354 kEndOfURL; |
| 337 } | 355 } |
| 338 | 356 |
| 357 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( | |
| 358 const GURL& url) { | |
| 359 GURL::Replacements replacements; | |
| 360 replacements.ClearPassword(); | |
| 361 replacements.ClearUsername(); | |
| 362 replacements.ClearRef(); | |
| 363 // Clear port if it is implicit from scheme. | |
| 364 if (url.has_port()) { | |
| 365 const std::string& port = url.scheme(); | |
| 366 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | |
| 367 url.EffectiveIntPort()) { | |
| 368 replacements.ClearPort(); | |
| 369 } | |
| 370 } | |
| 371 return url.ReplaceComponents(replacements).spec(); | |
| 372 } | |
| 373 | |
| 339 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 374 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
| 340 const std::string& prefix) { | 375 const std::string& prefix) { |
| 341 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 376 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
| 342 kBeginningOfURL + prefix); | 377 kBeginningOfURL + prefix); |
| 343 } | 378 } |
| 344 | 379 |
| 345 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 380 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
| 346 const std::string& suffix) { | 381 const std::string& suffix) { |
| 347 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 382 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
| 348 } | 383 } |
| 349 | 384 |
| 350 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 385 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
| 351 const std::string& str) { | 386 const std::string& str) { |
| 352 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 387 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
| 353 } | 388 } |
| 354 | 389 |
| 355 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 390 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
| 356 const std::string& str) { | 391 const std::string& str) { |
| 357 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 392 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
| 358 kBeginningOfURL + str + kEndOfURL); | 393 kBeginningOfURL + str + kEndOfURL); |
| 359 } | 394 } |
| 360 | 395 |
| 396 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( | |
| 397 const std::string& regex) { | |
| 398 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); | |
| 399 } | |
| 400 | |
| 361 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 401 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
| 362 const std::set<SubstringPattern::ID>& used_patterns) { | 402 const std::set<StringPattern::ID>& used_patterns) { |
| 363 PatternSingletons::iterator i = pattern_singletons_.begin(); | 403 PatternSingletons::iterator i = substring_pattern_singletons_.begin(); |
| 364 while (i != pattern_singletons_.end()) { | 404 while (i != substring_pattern_singletons_.end()) { |
| 365 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 405 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
| 366 ++i; | 406 ++i; |
| 367 } else { | 407 } else { |
| 368 delete *i; | 408 delete *i; |
| 369 pattern_singletons_.erase(i++); | 409 substring_pattern_singletons_.erase(i++); |
| 410 } | |
| 411 } | |
| 412 i = regex_pattern_singletons_.begin(); | |
| 413 while (i != regex_pattern_singletons_.end()) { | |
| 414 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | |
| 415 ++i; | |
| 416 } else { | |
| 417 delete *i; | |
| 418 regex_pattern_singletons_.erase(i++); | |
| 370 } | 419 } |
| 371 } | 420 } |
| 372 } | 421 } |
| 373 | 422 |
| 374 bool URLMatcherConditionFactory::IsEmpty() const { | 423 bool URLMatcherConditionFactory::IsEmpty() const { |
| 375 return pattern_singletons_.empty(); | 424 return substring_pattern_singletons_.empty() && |
| 425 regex_pattern_singletons_.empty(); | |
| 376 } | 426 } |
| 377 | 427 |
| 378 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( | 428 URLMatcherCondition URLMatcherConditionFactory::CreateCondition( |
| 379 URLMatcherCondition::Criterion criterion, | 429 URLMatcherCondition::Criterion criterion, |
| 380 const std::string& pattern) { | 430 const std::string& pattern) { |
| 381 SubstringPattern search_pattern(pattern, 0); | 431 StringPattern search_pattern(pattern, 0); |
| 432 PatternSingletons* pattern_singletons = | |
| 433 IsRegexCriterion(criterion) ? ®ex_pattern_singletons_ | |
| 434 : &substring_pattern_singletons_; | |
| 435 | |
| 382 PatternSingletons::const_iterator iter = | 436 PatternSingletons::const_iterator iter = |
| 383 pattern_singletons_.find(&search_pattern); | 437 pattern_singletons->find(&search_pattern); |
| 384 if (iter != pattern_singletons_.end()) { | 438 |
| 439 if (iter != pattern_singletons->end()) { | |
| 385 return URLMatcherCondition(criterion, *iter); | 440 return URLMatcherCondition(criterion, *iter); |
| 386 } else { | 441 } else { |
| 387 SubstringPattern* new_pattern = | 442 StringPattern* new_pattern = |
| 388 new SubstringPattern(pattern, id_counter_++); | 443 new StringPattern(pattern, id_counter_++); |
| 389 pattern_singletons_.insert(new_pattern); | 444 pattern_singletons->insert(new_pattern); |
| 390 return URLMatcherCondition(criterion, new_pattern); | 445 return URLMatcherCondition(criterion, new_pattern); |
| 391 } | 446 } |
| 392 } | 447 } |
| 393 | 448 |
| 394 std::string URLMatcherConditionFactory::CanonicalizeHostname( | 449 std::string URLMatcherConditionFactory::CanonicalizeHostname( |
| 395 const std::string& hostname) const { | 450 const std::string& hostname) const { |
| 396 if (!hostname.empty() && hostname[0] == '.') | 451 if (!hostname.empty() && hostname[0] == '.') |
| 397 return hostname; | 452 return hostname; |
| 398 else | 453 else |
| 399 return "." + hostname; | 454 return "." + hostname; |
| 400 } | 455 } |
| 401 | 456 |
| 402 bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()( | 457 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()( |
| 403 SubstringPattern* lhs, | 458 StringPattern* lhs, |
| 404 SubstringPattern* rhs) const { | 459 StringPattern* rhs) const { |
| 405 if (lhs == NULL && rhs != NULL) return true; | 460 if (lhs == NULL && rhs != NULL) return true; |
| 406 if (lhs != NULL && rhs != NULL) | 461 if (lhs != NULL && rhs != NULL) |
| 407 return lhs->pattern() < rhs->pattern(); | 462 return lhs->pattern() < rhs->pattern(); |
| 408 // Either both are NULL or only rhs is NULL. | 463 // Either both are NULL or only rhs is NULL. |
| 409 return false; | 464 return false; |
| 410 } | 465 } |
| 411 | 466 |
| 412 // | 467 // |
| 413 // URLMatcherSchemeFilter | 468 // URLMatcherSchemeFilter |
| 414 // | 469 // |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 476 ID id, | 531 ID id, |
| 477 const Conditions& conditions, | 532 const Conditions& conditions, |
| 478 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, | 533 scoped_ptr<URLMatcherSchemeFilter> scheme_filter, |
| 479 scoped_ptr<URLMatcherPortFilter> port_filter) | 534 scoped_ptr<URLMatcherPortFilter> port_filter) |
| 480 : id_(id), | 535 : id_(id), |
| 481 conditions_(conditions), | 536 conditions_(conditions), |
| 482 scheme_filter_(scheme_filter.Pass()), | 537 scheme_filter_(scheme_filter.Pass()), |
| 483 port_filter_(port_filter.Pass()) {} | 538 port_filter_(port_filter.Pass()) {} |
| 484 | 539 |
| 485 bool URLMatcherConditionSet::IsMatch( | 540 bool URLMatcherConditionSet::IsMatch( |
| 486 const std::set<SubstringPattern::ID>& matching_substring_patterns, | 541 const std::set<StringPattern::ID>& matching_patterns, |
| 487 const GURL& url) const { | 542 const GURL& url) const { |
| 488 for (Conditions::const_iterator i = conditions_.begin(); | 543 for (Conditions::const_iterator i = conditions_.begin(); |
| 489 i != conditions_.end(); ++i) { | 544 i != conditions_.end(); ++i) { |
| 490 if (!i->IsMatch(matching_substring_patterns, url)) | 545 if (!i->IsMatch(matching_patterns, url)) |
| 491 return false; | 546 return false; |
| 492 } | 547 } |
| 493 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) | 548 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) |
| 494 return false; | 549 return false; |
| 495 if (port_filter_.get() && !port_filter_->IsMatch(url)) | 550 if (port_filter_.get() && !port_filter_->IsMatch(url)) |
| 496 return false; | 551 return false; |
| 497 return true; | 552 return true; |
| 498 } | 553 } |
| 499 | 554 |
| 500 | |
| 501 // | 555 // |
| 502 // URLMatcher | 556 // URLMatcher |
| 503 // | 557 // |
| 504 | 558 |
| 505 URLMatcher::URLMatcher() {} | 559 URLMatcher::URLMatcher() {} |
| 506 | 560 |
| 507 URLMatcher::~URLMatcher() {} | 561 URLMatcher::~URLMatcher() {} |
| 508 | 562 |
| 509 void URLMatcher::AddConditionSets( | 563 void URLMatcher::AddConditionSets( |
| 510 const URLMatcherConditionSet::Vector& condition_sets) { | 564 const URLMatcherConditionSet::Vector& condition_sets) { |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 526 url_matcher_condition_sets_.erase(*i); | 580 url_matcher_condition_sets_.erase(*i); |
| 527 } | 581 } |
| 528 UpdateInternalDatastructures(); | 582 UpdateInternalDatastructures(); |
| 529 } | 583 } |
| 530 | 584 |
| 531 void URLMatcher::ClearUnusedConditionSets() { | 585 void URLMatcher::ClearUnusedConditionSets() { |
| 532 UpdateConditionFactory(); | 586 UpdateConditionFactory(); |
| 533 } | 587 } |
| 534 | 588 |
| 535 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { | 589 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) { |
| 536 // Find all IDs of SubstringPatterns that match |url|. | 590 // Find all IDs of StringPatterns that match |url|. |
| 537 // See URLMatcherConditionFactory for the canonicalization of URLs and the | 591 // See URLMatcherConditionFactory for the canonicalization of URLs and the |
| 538 // distinction between full url searches and url component searches. | 592 // distinction between full url searches and url component searches. |
| 539 std::set<SubstringPattern::ID> matches; | 593 std::set<StringPattern::ID> matches; |
| 540 full_url_matcher_.Match( | 594 full_url_matcher_.Match( |
| 541 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); | 595 condition_factory_.CanonicalizeURLForFullSearches(url), &matches); |
| 542 url_component_matcher_.Match( | 596 url_component_matcher_.Match( |
| 543 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); | 597 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); |
| 598 regex_set_matcher_.Match( | |
| 599 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); | |
| 544 | 600 |
| 545 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions | 601 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions |
| 546 // were fulfilled. | 602 // were fulfilled. |
| 547 std::set<URLMatcherConditionSet::ID> result; | 603 std::set<URLMatcherConditionSet::ID> result; |
| 548 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); | 604 for (std::set<StringPattern::ID>::const_iterator i = matches.begin(); |
| 549 i != matches.end(); ++i) { | 605 i != matches.end(); ++i) { |
| 550 // For each URLMatcherConditionSet there is exactly one condition | 606 // For each URLMatcherConditionSet there is exactly one condition |
| 551 // registered in substring_match_triggers_. This means that the following | 607 // registered in substring_match_triggers_. This means that the following |
| 552 // logic tests each URLMatcherConditionSet exactly once if it can be | 608 // logic tests each URLMatcherConditionSet exactly once if it can be |
| 553 // completely fulfilled. | 609 // completely fulfilled. |
| 554 std::set<URLMatcherConditionSet::ID>& condition_sets = | 610 std::set<URLMatcherConditionSet::ID>& condition_sets = |
| 555 substring_match_triggers_[*i]; | 611 substring_match_triggers_[*i]; |
| 556 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = | 612 for (std::set<URLMatcherConditionSet::ID>::const_iterator j = |
| 557 condition_sets.begin(); j != condition_sets.end(); ++j) { | 613 condition_sets.begin(); j != condition_sets.end(); ++j) { |
| 558 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) | 614 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url)) |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 573 registered_url_component_patterns_.empty(); | 629 registered_url_component_patterns_.empty(); |
| 574 } | 630 } |
| 575 | 631 |
| 576 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { | 632 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { |
| 577 // The purpose of |full_url_conditions| is just that we need to execute | 633 // The purpose of |full_url_conditions| is just that we need to execute |
| 578 // the same logic once for Full URL searches and once for URL Component | 634 // the same logic once for Full URL searches and once for URL Component |
| 579 // searches (see URLMatcherConditionFactory). | 635 // searches (see URLMatcherConditionFactory). |
| 580 | 636 |
| 581 // Determine which patterns need to be registered when this function | 637 // Determine which patterns need to be registered when this function |
| 582 // terminates. | 638 // terminates. |
| 583 std::set<const SubstringPattern*> new_patterns; | 639 std::set<const StringPattern*> new_patterns; |
| 584 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 640 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
| 585 url_matcher_condition_sets_.begin(); | 641 url_matcher_condition_sets_.begin(); |
| 586 condition_set_iter != url_matcher_condition_sets_.end(); | 642 condition_set_iter != url_matcher_condition_sets_.end(); |
| 587 ++condition_set_iter) { | 643 ++condition_set_iter) { |
| 588 const URLMatcherConditionSet::Conditions& conditions = | 644 const URLMatcherConditionSet::Conditions& conditions = |
| 589 condition_set_iter->second->conditions(); | 645 condition_set_iter->second->conditions(); |
| 590 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 646 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 591 conditions.begin(); condition_iter != conditions.end(); | 647 conditions.begin(); condition_iter != conditions.end(); |
| 592 ++condition_iter) { | 648 ++condition_iter) { |
| 593 // If we are called to process Full URL searches, ignore all others, | 649 // If we are called to process Full URL searches, ignore others, and |
| 594 // and vice versa. | 650 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) |
| 595 if (full_url_conditions == condition_iter->IsFullURLCondition()) | 651 if (!condition_iter->IsRegexCondition() && |
| 596 new_patterns.insert(condition_iter->substring_pattern()); | 652 full_url_conditions == condition_iter->IsFullURLCondition()) |
| 653 new_patterns.insert(condition_iter->string_pattern()); | |
| 597 } | 654 } |
| 598 } | 655 } |
| 599 | 656 |
| 600 // This is the set of patterns that were registered before this function | 657 // This is the set of patterns that were registered before this function |
| 601 // is called. | 658 // is called. |
| 602 std::set<const SubstringPattern*>& registered_patterns = | 659 std::set<const StringPattern*>& registered_patterns = |
| 603 full_url_conditions ? registered_full_url_patterns_ | 660 full_url_conditions ? registered_full_url_patterns_ |
| 604 : registered_url_component_patterns_; | 661 : registered_url_component_patterns_; |
| 605 | 662 |
| 606 // Add all patterns that are in new_patterns but not in registered_patterns. | 663 // Add all patterns that are in new_patterns but not in registered_patterns. |
| 607 std::vector<const SubstringPattern*> patterns_to_register; | 664 std::vector<const StringPattern*> patterns_to_register; |
| 608 std::set_difference( | 665 std::set_difference( |
| 609 new_patterns.begin(), new_patterns.end(), | 666 new_patterns.begin(), new_patterns.end(), |
| 610 registered_patterns.begin(), registered_patterns.end(), | 667 registered_patterns.begin(), registered_patterns.end(), |
| 611 std::back_inserter(patterns_to_register)); | 668 std::back_inserter(patterns_to_register)); |
| 612 | 669 |
| 613 // Remove all patterns that are in registered_patterns but not in | 670 // Remove all patterns that are in registered_patterns but not in |
| 614 // new_patterns. | 671 // new_patterns. |
| 615 std::vector<const SubstringPattern*> patterns_to_unregister; | 672 std::vector<const StringPattern*> patterns_to_unregister; |
| 616 std::set_difference( | 673 std::set_difference( |
| 617 registered_patterns.begin(), registered_patterns.end(), | 674 registered_patterns.begin(), registered_patterns.end(), |
| 618 new_patterns.begin(), new_patterns.end(), | 675 new_patterns.begin(), new_patterns.end(), |
| 619 std::back_inserter(patterns_to_unregister)); | 676 std::back_inserter(patterns_to_unregister)); |
| 620 | 677 |
| 621 // Update the SubstringSetMatcher. | 678 // Update the SubstringSetMatcher. |
| 622 SubstringSetMatcher& url_matcher = | 679 SubstringSetMatcher& url_matcher = |
| 623 full_url_conditions ? full_url_matcher_ : url_component_matcher_; | 680 full_url_conditions ? full_url_matcher_ : url_component_matcher_; |
| 624 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, | 681 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, |
| 625 patterns_to_unregister); | 682 patterns_to_unregister); |
| 626 | 683 |
| 627 // Update the set of registered_patterns for the next time this function | 684 // Update the set of registered_patterns for the next time this function |
| 628 // is being called. | 685 // is being called. |
| 629 registered_patterns.swap(new_patterns); | 686 registered_patterns.swap(new_patterns); |
| 630 } | 687 } |
| 631 | 688 |
| 689 void URLMatcher::UpdateRegexSetMatcher() { | |
| 690 std::vector<const StringPattern*> new_patterns; | |
| 691 | |
| 692 for (URLMatcherConditionSets::const_iterator condition_set_iter = | |
| 693 url_matcher_condition_sets_.begin(); | |
| 694 condition_set_iter != url_matcher_condition_sets_.end(); | |
| 695 ++condition_set_iter) { | |
| 696 const URLMatcherConditionSet::Conditions& conditions = | |
| 697 condition_set_iter->second->conditions(); | |
| 698 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | |
| 699 conditions.begin(); condition_iter != conditions.end(); | |
| 700 ++condition_iter) { | |
| 701 if (condition_iter->IsRegexCondition()) | |
| 702 new_patterns.push_back(condition_iter->string_pattern()); | |
| 703 } | |
| 704 } | |
| 705 | |
| 706 // Start over from scratch. We can't really do better than this, since the | |
| 707 // FilteredRE2 backend doesn't support incremental updates. | |
| 708 regex_set_matcher_.ClearPatterns(); | |
| 709 regex_set_matcher_.AddPatterns(new_patterns); | |
| 710 } | |
| 711 | |
| 632 void URLMatcher::UpdateTriggers() { | 712 void URLMatcher::UpdateTriggers() { |
| 633 // Count substring pattern frequencies. | 713 // Count substring pattern frequencies. |
| 634 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies; | 714 std::map<StringPattern::ID, size_t> substring_pattern_frequencies; |
| 635 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 715 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
| 636 url_matcher_condition_sets_.begin(); | 716 url_matcher_condition_sets_.begin(); |
| 637 condition_set_iter != url_matcher_condition_sets_.end(); | 717 condition_set_iter != url_matcher_condition_sets_.end(); |
| 638 ++condition_set_iter) { | 718 ++condition_set_iter) { |
| 639 const URLMatcherConditionSet::Conditions& conditions = | 719 const URLMatcherConditionSet::Conditions& conditions = |
| 640 condition_set_iter->second->conditions(); | 720 condition_set_iter->second->conditions(); |
| 641 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 721 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 642 conditions.begin(); condition_iter != conditions.end(); | 722 conditions.begin(); condition_iter != conditions.end(); |
| 643 ++condition_iter) { | 723 ++condition_iter) { |
| 644 const SubstringPattern* pattern = condition_iter->substring_pattern(); | 724 const StringPattern* pattern = condition_iter->string_pattern(); |
| 645 substring_pattern_frequencies[pattern->id()]++; | 725 substring_pattern_frequencies[pattern->id()]++; |
| 646 } | 726 } |
| 647 } | 727 } |
| 648 | 728 |
| 649 // Update trigger conditions: Determine for each URLMatcherConditionSet which | 729 // Update trigger conditions: Determine for each URLMatcherConditionSet which |
| 650 // URLMatcherCondition contains a SubstringPattern that occurs least | 730 // URLMatcherCondition contains a StringPattern that occurs least |
| 651 // frequently in this URLMatcher. We assume that this condition is very | 731 // frequently in this URLMatcher. We assume that this condition is very |
| 652 // specific and occurs rarely in URLs. If a match occurs for this | 732 // specific and occurs rarely in URLs. If a match occurs for this |
| 653 // URLMatcherCondition, we want to test all other URLMatcherCondition in the | 733 // URLMatcherCondition, we want to test all other URLMatcherCondition in the |
| 654 // respective URLMatcherConditionSet as well to see whether the entire | 734 // respective URLMatcherConditionSet as well to see whether the entire |
| 655 // URLMatcherConditionSet is considered matching. | 735 // URLMatcherConditionSet is considered matching. |
| 656 substring_match_triggers_.clear(); | 736 substring_match_triggers_.clear(); |
| 657 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 737 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
| 658 url_matcher_condition_sets_.begin(); | 738 url_matcher_condition_sets_.begin(); |
| 659 condition_set_iter != url_matcher_condition_sets_.end(); | 739 condition_set_iter != url_matcher_condition_sets_.end(); |
| 660 ++condition_set_iter) { | 740 ++condition_set_iter) { |
| 661 const URLMatcherConditionSet::Conditions& conditions = | 741 const URLMatcherConditionSet::Conditions& conditions = |
| 662 condition_set_iter->second->conditions(); | 742 condition_set_iter->second->conditions(); |
| 663 if (conditions.empty()) | 743 if (conditions.empty()) |
| 664 continue; | 744 continue; |
| 665 URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 745 URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 666 conditions.begin(); | 746 conditions.begin(); |
| 667 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id(); | 747 StringPattern::ID trigger = condition_iter->string_pattern()->id(); |
| 668 // We skip the first element in the following loop. | 748 // We skip the first element in the following loop. |
| 669 ++condition_iter; | 749 ++condition_iter; |
| 670 for (; condition_iter != conditions.end(); ++condition_iter) { | 750 for (; condition_iter != conditions.end(); ++condition_iter) { |
| 671 SubstringPattern::ID current_id = | 751 StringPattern::ID current_id = |
| 672 condition_iter->substring_pattern()->id(); | 752 condition_iter->string_pattern()->id(); |
| 673 if (substring_pattern_frequencies[trigger] > | 753 if (substring_pattern_frequencies[trigger] > |
| 674 substring_pattern_frequencies[current_id]) { | 754 substring_pattern_frequencies[current_id]) { |
| 675 trigger = current_id; | 755 trigger = current_id; |
| 676 } | 756 } |
| 677 } | 757 } |
| 678 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); | 758 substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); |
| 679 } | 759 } |
| 680 } | 760 } |
| 681 | 761 |
| 682 void URLMatcher::UpdateConditionFactory() { | 762 void URLMatcher::UpdateConditionFactory() { |
| 683 std::set<SubstringPattern::ID> used_patterns; | 763 std::set<StringPattern::ID> used_patterns; |
| 684 for (URLMatcherConditionSets::const_iterator condition_set_iter = | 764 for (URLMatcherConditionSets::const_iterator condition_set_iter = |
| 685 url_matcher_condition_sets_.begin(); | 765 url_matcher_condition_sets_.begin(); |
| 686 condition_set_iter != url_matcher_condition_sets_.end(); | 766 condition_set_iter != url_matcher_condition_sets_.end(); |
| 687 ++condition_set_iter) { | 767 ++condition_set_iter) { |
| 688 const URLMatcherConditionSet::Conditions& conditions = | 768 const URLMatcherConditionSet::Conditions& conditions = |
| 689 condition_set_iter->second->conditions(); | 769 condition_set_iter->second->conditions(); |
| 690 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = | 770 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 691 conditions.begin(); condition_iter != conditions.end(); | 771 conditions.begin(); condition_iter != conditions.end(); |
| 692 ++condition_iter) { | 772 ++condition_iter) { |
| 693 used_patterns.insert(condition_iter->substring_pattern()->id()); | 773 used_patterns.insert(condition_iter->string_pattern()->id()); |
| 694 } | 774 } |
| 695 } | 775 } |
| 696 condition_factory_.ForgetUnusedPatterns(used_patterns); | 776 condition_factory_.ForgetUnusedPatterns(used_patterns); |
| 697 } | 777 } |
| 698 | 778 |
| 699 void URLMatcher::UpdateInternalDatastructures() { | 779 void URLMatcher::UpdateInternalDatastructures() { |
| 700 UpdateSubstringSetMatcher(false); | 780 UpdateSubstringSetMatcher(false); |
| 701 UpdateSubstringSetMatcher(true); | 781 UpdateSubstringSetMatcher(true); |
| 782 UpdateRegexSetMatcher(); | |
| 702 UpdateTriggers(); | 783 UpdateTriggers(); |
| 703 UpdateConditionFactory(); | 784 UpdateConditionFactory(); |
| 704 } | 785 } |
| 705 | 786 |
| 706 } // namespace extensions | 787 } // namespace extensions |
| OLD | NEW |