Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Unified Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 12092096: Move c/c/extensions/matcher/ to top level extension dir (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: TOT Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/common/extensions/matcher/url_matcher.cc
diff --git a/chrome/common/extensions/matcher/url_matcher.cc b/chrome/common/extensions/matcher/url_matcher.cc
deleted file mode 100644
index a6a6215b36857b3d3e1c50a42252262f50babe1f..0000000000000000000000000000000000000000
--- a/chrome/common/extensions/matcher/url_matcher.cc
+++ /dev/null
@@ -1,821 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/common/extensions/matcher/url_matcher.h"
-
-#include <algorithm>
-#include <iterator>
-
-#include "base/logging.h"
-#include "content/public/common/url_constants.h"
-#include "googleurl/src/gurl.h"
-#include "googleurl/src/url_canon.h"
-
-namespace extensions {
-
-// This set of classes implement a mapping of URL Component Patterns, such as
-// host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
-// for use in substring comparisons.
-//
-// The idea of this mapping is to reduce the problem of comparing many
-// URL Component Patterns against one URL to the problem of searching many
-// substrings in one string:
-//
-// ---------------------- -----------------
-// | URL Query operator | ----translate----> | StringPattern |
-// ---------------------- -----------------
-// ^
-// |
-// compare
-// |
-// v
-// ---------------------- -----------------
-// | URL to compare | | |
-// | to all URL Query | ----translate----> | String |
-// | operators | | |
-// ---------------------- -----------------
-//
-// The reason for this problem reduction is that there are efficient algorithms
-// for searching many substrings in one string (see Aho-Corasick algorithm).
-//
-// Additionally, some of the same pieces are reused to implement regular
-// expression comparisons. The FilteredRE2 implementation for matching many
-// regular expressions against one string uses prefiltering, in which a set
-// of substrings (derived from the regexes) are first searched for, to reduce
-// the number of regular expressions to test; the prefiltering step also
-// uses Aho-Corasick.
-//
-// Case 1: {host,path,query}_{prefix,suffix,equals} searches.
-// ==========================================================
-//
-// For searches in this class, we normalize URLs as follows:
-//
-// Step 1:
-// Remove scheme, port and segment from URL:
-// -> http://www.example.com:8080/index.html?search=foo#first_match becomes
-// www.example.com/index.html?search=foo
-//
-// We remove the scheme and port number because they can be checked later
-// in a secondary filter step. We remove the segment (the #... part) because
-// this is not guaranteed to be ASCII-7 encoded.
-//
-// Step 2:
-// Translate URL to String and add the following position markers:
-// - BU = Beginning of URL
-// - ED = End of Domain
-// - EP = End of Path
-// - EU = End of URL
-// Furthermore, the hostname is canonicalized to start with a ".".
-//
-// Position markers are represented as characters >127, which are therefore
-// guaranteed not to be part of the ASCII-7 encoded URL character set.
-//
-// -> www.example.com/index.html?search=foo becomes
-// BU .www.example.com ED /index.html EP ?search=foo EU
-//
-// -> www.example.com/index.html becomes
-// BU .www.example.com ED /index.html EP EU
-//
-// Step 3:
-// Translate URL Component Patterns as follows:
-//
-// host_prefix(prefix) = BU add_missing_dot_prefix(prefix)
-// -> host_prefix("www.example") = BU .www.example
-//
-// host_suffix(suffix) = suffix ED
-// -> host_suffix("example.com") = example.com ED
-// -> host_suffix(".example.com") = .example.com ED
-//
-// host_equals(domain) = BU add_missing_dot_prefix(domain) ED
-// -> host_equals("www.example.com") = BU .www.example.com ED
-//
-// Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
-//
-// With this, we can search the StringPatterns in the normalized URL.
-//
-//
-// Case 2: url_{prefix,suffix,equals,contains} searches.
-// =====================================================
-//
-// Step 1: as above, except that
-// - the scheme is not removed
-// - the port is not removed if it is specified and does not match the default
-// port for the given scheme.
-//
-// Step 2:
-// Translate URL to String and add the following position markers:
-// - BU = Beginning of URL
-// - EU = End of URL
-//
-// -> http://www.example.com:8080/index.html?search=foo#first_match becomes
-// BU http://www.example.com:8080/index.html?search=foo EU
-// -> http://www.example.com:80/index.html?search=foo#first_match becomes
-// BU http://www.example.com/index.html?search=foo EU
-//
-// url_prefix(prefix) = BU prefix
-// -> url_prefix("http://www.example") = BU http://www.example
-//
-// url_contains(substring) = substring
-// -> url_contains("index") = index
-//
-//
-// Case 3: {host,path,query}_contains searches.
-// ============================================
-//
-// These kinds of searches are not supported directly but can be derived
-// by a combination of a url_contains() query followed by an explicit test:
-//
-// host_contains(str) = url_contains(str) followed by test whether str occurs
-// in host component of original URL.
-// -> host_contains("example.co") = example.co
-// followed by gurl.host().find("example.co");
-//
-// [similarly for path_contains and query_contains].
-//
-//
-// Regular expression matching (url_matches searches)
-// ==================================================
-//
-// This class also supports matching regular expressions (RE2 syntax)
-// against full URLs, which are transformed as in case 2.
-
-namespace {
-
-bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
- return criterion == URLMatcherCondition::URL_MATCHES;
-}
-
-} // namespace
-
-//
-// URLMatcherCondition
-//
-
-URLMatcherCondition::URLMatcherCondition()
- : criterion_(HOST_PREFIX),
- string_pattern_(NULL) {}
-
-URLMatcherCondition::~URLMatcherCondition() {}
-
-URLMatcherCondition::URLMatcherCondition(
- Criterion criterion,
- const StringPattern* string_pattern)
- : criterion_(criterion),
- string_pattern_(string_pattern) {}
-
-URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
- : criterion_(rhs.criterion_),
- string_pattern_(rhs.string_pattern_) {}
-
-URLMatcherCondition& URLMatcherCondition::operator=(
- const URLMatcherCondition& rhs) {
- criterion_ = rhs.criterion_;
- string_pattern_ = rhs.string_pattern_;
- return *this;
-}
-
-bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
- if (criterion_ < rhs.criterion_) return true;
- if (criterion_ > rhs.criterion_) return false;
- if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
- return *string_pattern_ < *rhs.string_pattern_;
- if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
- // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
- // or both are NULL.
- return false;
-}
-
-bool URLMatcherCondition::IsFullURLCondition() const {
- // For these criteria the SubstringMatcher needs to be executed on the
- // GURL that is canonicalized with
- // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
- switch (criterion_) {
- case HOST_CONTAINS:
- case PATH_CONTAINS:
- case QUERY_CONTAINS:
- case URL_PREFIX:
- case URL_SUFFIX:
- case URL_CONTAINS:
- case URL_EQUALS:
- return true;
- default:
- break;
- }
- return false;
-}
-
-bool URLMatcherCondition::IsRegexCondition() const {
- return IsRegexCriterion(criterion_);
-}
-
-bool URLMatcherCondition::IsMatch(
- const std::set<StringPattern::ID>& matching_patterns,
- const GURL& url) const {
- DCHECK(string_pattern_);
- if (!ContainsKey(matching_patterns, string_pattern_->id()))
- return false;
- // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
- // a substring match on the raw URL. In case of a match, we need to verify
- // that the match was found in the correct component of the URL.
- switch (criterion_) {
- case HOST_CONTAINS:
- return url.host().find(string_pattern_->pattern()) !=
- std::string::npos;
- case PATH_CONTAINS:
- return url.path().find(string_pattern_->pattern()) !=
- std::string::npos;
- case QUERY_CONTAINS:
- return url.query().find(string_pattern_->pattern()) !=
- std::string::npos;
- default:
- break;
- }
- return true;
-}
-
-//
-// URLMatcherConditionFactory
-//
-
-namespace {
-// These are symbols that are not contained in 7-bit ASCII used in GURLs.
-const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
-const char kEndOfDomain[] = {static_cast<char>(-2), 0};
-const char kEndOfPath[] = {static_cast<char>(-3), 0};
-const char kEndOfURL[] = {static_cast<char>(-4), 0};
-} // namespace
-
-URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
-
-URLMatcherConditionFactory::~URLMatcherConditionFactory() {
- STLDeleteElements(&substring_pattern_singletons_);
- STLDeleteElements(&regex_pattern_singletons_);
-}
-
-std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
- const GURL& url) const {
- return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
- url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") +
- kEndOfURL;
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
- const std::string& prefix) {
- return CreateCondition(URLMatcherCondition::HOST_PREFIX,
- kBeginningOfURL + CanonicalizeHostname(prefix));
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition(
- const std::string& suffix) {
- return CreateCondition(URLMatcherCondition::HOST_SUFFIX,
- suffix + kEndOfDomain);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::HOST_EQUALS,
- kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition(
- const std::string& prefix) {
- return CreateCondition(URLMatcherCondition::PATH_PREFIX,
- kEndOfDomain + prefix);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition(
- const std::string& suffix) {
- return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::PATH_EQUALS,
- kEndOfDomain + str + kEndOfPath);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition(
- const std::string& prefix) {
- std::string pattern;
- if (!prefix.empty() && prefix[0] == '?')
- pattern = kEndOfPath + prefix;
- else
- pattern = kEndOfPath + ('?' + prefix);
-
- return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition(
- const std::string& suffix) {
- if (!suffix.empty() && suffix[0] == '?') {
- return CreateQueryEqualsCondition(suffix);
- } else {
- return CreateCondition(URLMatcherCondition::QUERY_SUFFIX,
- suffix + kEndOfURL);
- }
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition(
- const std::string& str) {
- if (!str.empty() && str[0] == '?')
- return CreateQueryPrefixCondition(str);
- else
- return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition(
- const std::string& str) {
- std::string pattern;
- if (!str.empty() && str[0] == '?')
- pattern = kEndOfPath + str + kEndOfURL;
- else
- pattern = kEndOfPath + ('?' + str) + kEndOfURL;
-
- return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern);
-}
-
-URLMatcherCondition
- URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition(
- const std::string& host_suffix,
- const std::string& path_prefix) {
- return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX,
- host_suffix + kEndOfDomain + path_prefix);
-}
-
-URLMatcherCondition
-URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
- const std::string& host,
- const std::string& path_prefix) {
- return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
- kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
- path_prefix);
-}
-
-std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
- const GURL& url) const {
- GURL::Replacements replacements;
- replacements.ClearPassword();
- replacements.ClearUsername();
- replacements.ClearRef();
- // Clear port if it is implicit from scheme.
- if (url.has_port()) {
- const std::string& port = url.scheme();
- if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
- url.EffectiveIntPort()) {
- replacements.ClearPort();
- }
- }
- return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
- kEndOfURL;
-}
-
-std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
- const GURL& url) const {
- GURL::Replacements replacements;
- replacements.ClearPassword();
- replacements.ClearUsername();
- replacements.ClearRef();
- // Clear port if it is implicit from scheme.
- if (url.has_port()) {
- const std::string& port = url.scheme();
- if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
- url.EffectiveIntPort()) {
- replacements.ClearPort();
- }
- }
- return url.ReplaceComponents(replacements).spec();
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
- const std::string& prefix) {
- return CreateCondition(URLMatcherCondition::URL_PREFIX,
- kBeginningOfURL + prefix);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
- const std::string& suffix) {
- return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
- const std::string& str) {
- return CreateCondition(URLMatcherCondition::URL_EQUALS,
- kBeginningOfURL + str + kEndOfURL);
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
- const std::string& regex) {
- return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
-}
-
-void URLMatcherConditionFactory::ForgetUnusedPatterns(
- const std::set<StringPattern::ID>& used_patterns) {
- PatternSingletons::iterator i = substring_pattern_singletons_.begin();
- while (i != substring_pattern_singletons_.end()) {
- if (used_patterns.find((*i)->id()) != used_patterns.end()) {
- ++i;
- } else {
- delete *i;
- substring_pattern_singletons_.erase(i++);
- }
- }
- i = regex_pattern_singletons_.begin();
- while (i != regex_pattern_singletons_.end()) {
- if (used_patterns.find((*i)->id()) != used_patterns.end()) {
- ++i;
- } else {
- delete *i;
- regex_pattern_singletons_.erase(i++);
- }
- }
-}
-
-bool URLMatcherConditionFactory::IsEmpty() const {
- return substring_pattern_singletons_.empty() &&
- regex_pattern_singletons_.empty();
-}
-
-URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
- URLMatcherCondition::Criterion criterion,
- const std::string& pattern) {
- StringPattern search_pattern(pattern, 0);
- PatternSingletons* pattern_singletons =
- IsRegexCriterion(criterion) ? &regex_pattern_singletons_
- : &substring_pattern_singletons_;
-
- PatternSingletons::const_iterator iter =
- pattern_singletons->find(&search_pattern);
-
- if (iter != pattern_singletons->end()) {
- return URLMatcherCondition(criterion, *iter);
- } else {
- StringPattern* new_pattern =
- new StringPattern(pattern, id_counter_++);
- pattern_singletons->insert(new_pattern);
- return URLMatcherCondition(criterion, new_pattern);
- }
-}
-
-std::string URLMatcherConditionFactory::CanonicalizeHostname(
- const std::string& hostname) const {
- if (!hostname.empty() && hostname[0] == '.')
- return hostname;
- else
- return "." + hostname;
-}
-
-bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
- StringPattern* lhs,
- StringPattern* rhs) const {
- if (lhs == NULL && rhs != NULL) return true;
- if (lhs != NULL && rhs != NULL)
- return lhs->pattern() < rhs->pattern();
- // Either both are NULL or only rhs is NULL.
- return false;
-}
-
-//
-// URLMatcherSchemeFilter
-//
-
-URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter)
- : filters_(1) {
- filters_.push_back(filter);
-}
-
-URLMatcherSchemeFilter::URLMatcherSchemeFilter(
- const std::vector<std::string>& filters)
- : filters_(filters) {}
-
-URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {}
-
-bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const {
- return std::find(filters_.begin(), filters_.end(), url.scheme()) !=
- filters_.end();
-}
-
-//
-// URLMatcherPortFilter
-//
-
-URLMatcherPortFilter::URLMatcherPortFilter(
- const std::vector<URLMatcherPortFilter::Range>& ranges)
- : ranges_(ranges) {}
-
-URLMatcherPortFilter::~URLMatcherPortFilter() {}
-
-bool URLMatcherPortFilter::IsMatch(const GURL& url) const {
- int port = url.EffectiveIntPort();
- for (std::vector<Range>::const_iterator i = ranges_.begin();
- i != ranges_.end(); ++i) {
- if (i->first <= port && port <= i->second)
- return true;
- }
- return false;
-}
-
-// static
-URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from,
- int to) {
- return Range(from, to);
-}
-
-// static
-URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) {
- return Range(port, port);
-}
-
-//
-// URLMatcherConditionSet
-//
-
-URLMatcherConditionSet::~URLMatcherConditionSet() {}
-
-URLMatcherConditionSet::URLMatcherConditionSet(
- ID id,
- const Conditions& conditions)
- : id_(id),
- conditions_(conditions) {}
-
-URLMatcherConditionSet::URLMatcherConditionSet(
- ID id,
- const Conditions& conditions,
- scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
- scoped_ptr<URLMatcherPortFilter> port_filter)
- : id_(id),
- conditions_(conditions),
- scheme_filter_(scheme_filter.Pass()),
- port_filter_(port_filter.Pass()) {}
-
-bool URLMatcherConditionSet::IsMatch(
- const std::set<StringPattern::ID>& matching_patterns,
- const GURL& url) const {
- for (Conditions::const_iterator i = conditions_.begin();
- i != conditions_.end(); ++i) {
- if (!i->IsMatch(matching_patterns, url))
- return false;
- }
- if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
- return false;
- if (port_filter_.get() && !port_filter_->IsMatch(url))
- return false;
- return true;
-}
-
-//
-// URLMatcher
-//
-
-URLMatcher::URLMatcher() {}
-
-URLMatcher::~URLMatcher() {}
-
-void URLMatcher::AddConditionSets(
- const URLMatcherConditionSet::Vector& condition_sets) {
- for (URLMatcherConditionSet::Vector::const_iterator i =
- condition_sets.begin(); i != condition_sets.end(); ++i) {
- DCHECK(url_matcher_condition_sets_.find((*i)->id()) ==
- url_matcher_condition_sets_.end());
- url_matcher_condition_sets_[(*i)->id()] = *i;
- }
- UpdateInternalDatastructures();
-}
-
-void URLMatcher::RemoveConditionSets(
- const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) {
- for (std::vector<URLMatcherConditionSet::ID>::const_iterator i =
- condition_set_ids.begin(); i != condition_set_ids.end(); ++i) {
- DCHECK(url_matcher_condition_sets_.find(*i) !=
- url_matcher_condition_sets_.end());
- url_matcher_condition_sets_.erase(*i);
- }
- UpdateInternalDatastructures();
-}
-
-void URLMatcher::ClearUnusedConditionSets() {
- UpdateConditionFactory();
-}
-
-std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(
- const GURL& url) const {
- // Find all IDs of StringPatterns that match |url|.
- // See URLMatcherConditionFactory for the canonicalization of URLs and the
- // distinction between full url searches and url component searches.
- std::set<StringPattern::ID> matches;
- full_url_matcher_.Match(
- condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
- url_component_matcher_.Match(
- condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
- regex_set_matcher_.Match(
- condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
-
- // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
- // were fulfilled.
- std::set<URLMatcherConditionSet::ID> result;
- for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
- i != matches.end(); ++i) {
- // For each URLMatcherConditionSet there is exactly one condition
- // registered in substring_match_triggers_. This means that the following
- // logic tests each URLMatcherConditionSet exactly once if it can be
- // completely fulfilled.
- StringPatternTriggers::const_iterator triggered_condition_sets_iter =
- substring_match_triggers_.find(*i);
- if (triggered_condition_sets_iter == substring_match_triggers_.end())
- continue; // Not all substring matches are triggers for a condition set.
- const std::set<URLMatcherConditionSet::ID>& condition_sets =
- triggered_condition_sets_iter->second;
- for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
- condition_sets.begin(); j != condition_sets.end(); ++j) {
- URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.find(*j);
- DCHECK(condition_set_iter != url_matcher_condition_sets_.end());
- if (condition_set_iter->second->IsMatch(matches, url))
- result.insert(*j);
- }
- }
-
- return result;
-}
-
-bool URLMatcher::IsEmpty() const {
- return condition_factory_.IsEmpty() &&
- url_matcher_condition_sets_.empty() &&
- substring_match_triggers_.empty() &&
- full_url_matcher_.IsEmpty() &&
- url_component_matcher_.IsEmpty() &&
- registered_full_url_patterns_.empty() &&
- registered_url_component_patterns_.empty();
-}
-
-void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
- // The purpose of |full_url_conditions| is just that we need to execute
- // the same logic once for Full URL searches and once for URL Component
- // searches (see URLMatcherConditionFactory).
-
- // Determine which patterns need to be registered when this function
- // terminates.
- std::set<const StringPattern*> new_patterns;
- for (URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.begin();
- condition_set_iter != url_matcher_condition_sets_.end();
- ++condition_set_iter) {
- const URLMatcherConditionSet::Conditions& conditions =
- condition_set_iter->second->conditions();
- for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
- conditions.begin(); condition_iter != conditions.end();
- ++condition_iter) {
- // If we are called to process Full URL searches, ignore others, and
- // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
- if (!condition_iter->IsRegexCondition() &&
- full_url_conditions == condition_iter->IsFullURLCondition())
- new_patterns.insert(condition_iter->string_pattern());
- }
- }
-
- // This is the set of patterns that were registered before this function
- // is called.
- std::set<const StringPattern*>& registered_patterns =
- full_url_conditions ? registered_full_url_patterns_
- : registered_url_component_patterns_;
-
- // Add all patterns that are in new_patterns but not in registered_patterns.
- std::vector<const StringPattern*> patterns_to_register;
- std::set_difference(
- new_patterns.begin(), new_patterns.end(),
- registered_patterns.begin(), registered_patterns.end(),
- std::back_inserter(patterns_to_register));
-
- // Remove all patterns that are in registered_patterns but not in
- // new_patterns.
- std::vector<const StringPattern*> patterns_to_unregister;
- std::set_difference(
- registered_patterns.begin(), registered_patterns.end(),
- new_patterns.begin(), new_patterns.end(),
- std::back_inserter(patterns_to_unregister));
-
- // Update the SubstringSetMatcher.
- SubstringSetMatcher& url_matcher =
- full_url_conditions ? full_url_matcher_ : url_component_matcher_;
- url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
- patterns_to_unregister);
-
- // Update the set of registered_patterns for the next time this function
- // is being called.
- registered_patterns.swap(new_patterns);
-}
-
-void URLMatcher::UpdateRegexSetMatcher() {
- std::vector<const StringPattern*> new_patterns;
-
- for (URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.begin();
- condition_set_iter != url_matcher_condition_sets_.end();
- ++condition_set_iter) {
- const URLMatcherConditionSet::Conditions& conditions =
- condition_set_iter->second->conditions();
- for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
- conditions.begin(); condition_iter != conditions.end();
- ++condition_iter) {
- if (condition_iter->IsRegexCondition())
- new_patterns.push_back(condition_iter->string_pattern());
- }
- }
-
- // Start over from scratch. We can't really do better than this, since the
- // FilteredRE2 backend doesn't support incremental updates.
- regex_set_matcher_.ClearPatterns();
- regex_set_matcher_.AddPatterns(new_patterns);
-}
-
-void URLMatcher::UpdateTriggers() {
- // Count substring pattern frequencies.
- std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
- for (URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.begin();
- condition_set_iter != url_matcher_condition_sets_.end();
- ++condition_set_iter) {
- const URLMatcherConditionSet::Conditions& conditions =
- condition_set_iter->second->conditions();
- for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
- conditions.begin(); condition_iter != conditions.end();
- ++condition_iter) {
- const StringPattern* pattern = condition_iter->string_pattern();
- substring_pattern_frequencies[pattern->id()]++;
- }
- }
-
- // Update trigger conditions: Determine for each URLMatcherConditionSet which
- // URLMatcherCondition contains a StringPattern that occurs least
- // frequently in this URLMatcher. We assume that this condition is very
- // specific and occurs rarely in URLs. If a match occurs for this
- // URLMatcherCondition, we want to test all other URLMatcherCondition in the
- // respective URLMatcherConditionSet as well to see whether the entire
- // URLMatcherConditionSet is considered matching.
- substring_match_triggers_.clear();
- for (URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.begin();
- condition_set_iter != url_matcher_condition_sets_.end();
- ++condition_set_iter) {
- const URLMatcherConditionSet::Conditions& conditions =
- condition_set_iter->second->conditions();
- if (conditions.empty())
- continue;
- URLMatcherConditionSet::Conditions::const_iterator condition_iter =
- conditions.begin();
- StringPattern::ID trigger = condition_iter->string_pattern()->id();
- // We skip the first element in the following loop.
- ++condition_iter;
- for (; condition_iter != conditions.end(); ++condition_iter) {
- StringPattern::ID current_id =
- condition_iter->string_pattern()->id();
- if (substring_pattern_frequencies[trigger] >
- substring_pattern_frequencies[current_id]) {
- trigger = current_id;
- }
- }
- substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
- }
-}
-
-void URLMatcher::UpdateConditionFactory() {
- std::set<StringPattern::ID> used_patterns;
- for (URLMatcherConditionSets::const_iterator condition_set_iter =
- url_matcher_condition_sets_.begin();
- condition_set_iter != url_matcher_condition_sets_.end();
- ++condition_set_iter) {
- const URLMatcherConditionSet::Conditions& conditions =
- condition_set_iter->second->conditions();
- for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
- conditions.begin(); condition_iter != conditions.end();
- ++condition_iter) {
- used_patterns.insert(condition_iter->string_pattern()->id());
- }
- }
- condition_factory_.ForgetUnusedPatterns(used_patterns);
-}
-
-void URLMatcher::UpdateInternalDatastructures() {
- UpdateSubstringSetMatcher(false);
- UpdateSubstringSetMatcher(true);
- UpdateRegexSetMatcher();
- UpdateTriggers();
- UpdateConditionFactory();
-}
-
-} // namespace extensions
« no previous file with comments | « chrome/common/extensions/matcher/url_matcher.h ('k') | chrome/common/extensions/matcher/url_matcher_constants.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698