Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(811)

Unified Diff: chrome/browser/extensions/api/declarative/url_matcher.cc

Issue 9390018: Implementation of a Matching strategy for URLs in the Declarative WebRequest API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/extensions/api/declarative/url_matcher.cc
diff --git a/chrome/browser/extensions/api/declarative/url_matcher.cc b/chrome/browser/extensions/api/declarative/url_matcher.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3ffd928cd401569cfad1fe0b6cd98e7bded05d98
--- /dev/null
+++ b/chrome/browser/extensions/api/declarative/url_matcher.cc
@@ -0,0 +1,391 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/extensions/api/declarative/url_matcher.h"
+
+#include <algorithm>
+
+namespace extensions {
+
+//
+// UrlMatcherCondition
+//
+
+UrlMatcherCondition::UrlMatcherCondition() : criterion_(HOST_PREFIX) {}
+
+UrlMatcherCondition::UrlMatcherCondition(const UrlMatcherCondition& rhs)
+ : criterion_(rhs.criterion_), value_(rhs.value_),
+ substring_pattern_(rhs.substring_pattern_) {}
+
+UrlMatcherCondition& UrlMatcherCondition::operator=(
+ const UrlMatcherCondition& rhs) {
+ criterion_ = rhs.criterion_;
+ value_ = rhs.value_;
+ substring_pattern_ = rhs.substring_pattern_;
+ return *this;
+}
+
+UrlMatcherCondition::UrlMatcherCondition(Criterion criterion,
+ std::string value)
+ : criterion_(criterion), value_(value) {
+}
+
+bool UrlMatcherCondition::IsFullUrlCondition() const {
+ switch (criterion_) {
+ case HOST_CONTAINS:
+ case PATH_CONTAINS:
+ case QUERY_CONTAINS:
+ case URL_PREFIX:
+ case URL_SUFFIX:
+ case URL_CONTAINS:
+ case URL_EQUALS:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool UrlMatcherCondition::IsMatch(
+ const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const GURL& url) const {
+ DCHECK(SubstringPattern() != substring_pattern_);
+ if (matching_substring_patterns.find(substring_pattern_.id()) ==
+ matching_substring_patterns.end())
+ return false;
+ switch (criterion_) {
+ case HOST_CONTAINS:
+ return url.host().find(value_) != std::string::npos;
+ case PATH_CONTAINS:
+ return url.path().find(value_) != std::string::npos;
+ case QUERY_CONTAINS:
+ return url.query().find(value_) != std::string::npos;
+ default:
+ break;
+ }
+ return true;
+}
+
+void UrlMatcherCondition::BuildSubstringPattern(
+ UrlComponentPatterns* url_component_patterns) {
+ // Check that we have not been registered before
+ DCHECK(SubstringPattern() == substring_pattern_);
+ switch (criterion_) {
+ case HOST_PREFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateHostPrefixPattern(value_);
+ break;
+ case HOST_SUFFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateHostSuffixPattern(value_);
+ break;
+ case HOST_CONTAINS:
+ substring_pattern_ =
+ url_component_patterns->CreateURLContainsPattern(value_);
+ // This will be accompanied by a second check that the match occurs in
+ // the host part of a URL.
+ break;
+ case HOST_EQUALS:
+ substring_pattern_ =
+ url_component_patterns->CreateHostEqualsPattern(value_);
+ break;
+
+ case PATH_PREFIX:
+ substring_pattern_ =
+ url_component_patterns->CreatePathPrefixPattern(value_);
+ break;
+ case PATH_SUFFIX:
+ substring_pattern_ =
+ url_component_patterns->CreatePathSuffixPattern(value_);
+ break;
+ case PATH_CONTAINS:
+ substring_pattern_ =
+ url_component_patterns->CreateURLContainsPattern(value_);
+ // This will be accompanied by a second check that the match occurs in
+ // the path part of a URL.
+ break;
+ case PATH_EQUALS:
+ substring_pattern_ =
+ url_component_patterns->CreatePathEqualsPattern(value_);
+ break;
+
+ case QUERY_PREFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateQueryPrefixPattern(value_);
+ break;
+ case QUERY_SUFFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateQuerySuffixPattern(value_);
+ break;
+ case QUERY_CONTAINS:
+ substring_pattern_ =
+ url_component_patterns->CreateURLContainsPattern(value_);
+ // This will be accompanied by a second check that the match occurs in
+ // the query part of a URL.
+ break;
+ case QUERY_EQUALS:
+ substring_pattern_ =
+ url_component_patterns->CreateQueryEqualsPattern(value_);
+ break;
+
+ case URL_PREFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateURLPrefixPattern(value_);
+ break;
+ case URL_SUFFIX:
+ substring_pattern_ =
+ url_component_patterns->CreateURLSuffixPattern(value_);
+ break;
+ case URL_CONTAINS:
+ substring_pattern_ =
+ url_component_patterns->CreateURLContainsPattern(value_);
+ break;
+ case URL_EQUALS:
+ substring_pattern_ =
+ url_component_patterns->CreateURLEqualsPattern(value_);
+ break;
+ }
+}
+
+
+//
+// UrlMatcherConditionSet
+//
+
+UrlMatcherConditionSet::UrlMatcherConditionSet() {}
+
+UrlMatcherConditionSet::UrlMatcherConditionSet(
+ const UrlMatcherConditionSet::UrlMatcherConditionSet& rhs)
+ : id_(rhs.id_), conditions_(rhs.conditions_) {}
+
+UrlMatcherConditionSet& UrlMatcherConditionSet::operator=(
+ const UrlMatcherConditionSet& rhs) {
+ id_ = rhs.id_;
+ conditions_ = rhs.conditions_;
+ return *this;
+}
+
+UrlMatcherConditionSet::UrlMatcherConditionSet(
+ ID id, const std::vector<UrlMatcherCondition>& conditions)
+ : id_(id), conditions_(conditions) {}
+
+bool UrlMatcherConditionSet::IsMatch(
+ const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const GURL& url) const {
+ for (Conditions::const_iterator i = conditions_.begin();
+ i != conditions_.end(); ++i) {
+ if (!i->IsMatch(matching_substring_patterns, url))
+ return false;
+ }
+ return true;
+}
+
+//
+// UrlMatcher
+//
+
+UrlMatcher::UrlMatcher() {}
+
+void UrlMatcher::AddConditionSets(
+ const std::vector<UrlMatcherConditionSet>& conditions) {
+ for (std::vector<UrlMatcherConditionSet>::const_iterator i =
+ conditions.begin(); i != conditions.end(); ++i) {
+ AddConditionSet(*i);
+ }
+ RecreateTriggers();
+}
+
+void UrlMatcher::AddConditionSet(
+ const UrlMatcherConditionSet& new_condition_set) {
+ DCHECK(url_matcher_condition_sets_.find(new_condition_set.id()) ==
+ url_matcher_condition_sets_.end());
+ url_matcher_condition_sets_[new_condition_set.id()] = new_condition_set;
+
+ // Initialize.
+ UrlMatcherConditionSet& condition_set =
+ url_matcher_condition_sets_[new_condition_set.id()];
+ std::vector<UrlMatcherCondition>& conditions =
+ condition_set.mutable_conditions();
+ for (std::vector<UrlMatcherCondition>::iterator i = conditions.begin();
+ i != conditions.end(); ++i) {
+ i->BuildSubstringPattern(&url_component_patterns_);
+ }
+}
+
+void UrlMatcher::RemoveConditionSets(
+ const std::vector<UrlMatcherConditionSet::ID>& condition_ids) {
+ for (std::vector<UrlMatcherConditionSet::ID>::const_iterator i =
+ condition_ids.begin(); i != condition_ids.end(); ++i) {
+ RemoveConditionSet(*i);
+ }
+ RecreateTriggers();
+}
+
+void UrlMatcher::RemoveConditionSet(UrlMatcherConditionSet::ID id) {
+ DCHECK(url_matcher_condition_sets_.find(id) !=
+ url_matcher_condition_sets_.end());
+ url_matcher_condition_sets_.erase(id);
+}
+
+std::set<UrlMatcherConditionSet::ID> UrlMatcher::MatchUrl(const GURL& url) {
+ // See UrlComponentPatterns for the canonicalization of URLs and the
+ // distinction between full url searches and url component searches.
+
+ // Perform all full url searches.
+ std::string full_url =
+ url_component_patterns_.CanonlicalizeURLForFullSearches(url);
+ std::set<SubstringPattern::ID> full_url_matches;
+ full_url_matcher_.Match(full_url, &full_url_matches);
+
+ // Perform all url component searches.
+ std::string component_url =
+ url_component_patterns_.CanonlicalizeURLForComponentSearches(url);
+ std::set<SubstringPattern::ID> component_url_matches;
+ url_component_matcher_.Match(component_url, &component_url_matches);
+
+ // Build the union of both result sets.
+ std::set<SubstringPattern::ID> matches;
+ matches.swap(full_url_matches);
+ matches.insert(component_url_matches.begin(), component_url_matches.end());
+
+ // Calculate all UrlMatcherConditionSet for which all UrlMatcherConditions
+ // were fulfilled.
+ std::set<UrlMatcherConditionSet::ID> result;
+ for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin();
+ i != matches.end(); ++i) {
+ // For each UrlMatcherConditionSet there is exactly one condition
+ // registered in substring_match_triggers_. This means that the following
+ // logic tests each UrlMatcherConditionSet exactly once if it can be
+ // completely fulfilled.
+ std::set<UrlMatcherConditionSet::ID>& condition_sets =
+ substring_match_triggers_[*i];
+ for (std::set<UrlMatcherConditionSet::ID>::const_iterator j =
+ condition_sets.begin(); j != condition_sets.end(); ++j) {
+ if (url_matcher_condition_sets_[*j].IsMatch(matches, url))
+ result.insert(url_matcher_condition_sets_[*j].id());
+ }
+ }
+
+ return result;
+}
+
+void UrlMatcher::UpdateSubstringSetMatcher(
+ bool full_url_conditions,
+ std::vector<SubstringPattern>* unregistered_patterns) {
+ // The purpose of |full_url_conditions| is just that we need to execute
+ // the same logic once for Full URL searches and once for URL Component
+ // searches (see UrlComponentPatterns).
+
+ // Determine which patterns need to be registered when this function
+ // terminates.
+ std::set<SubstringPattern> new_patterns;
+ for (UrlMatcherConditionSets::const_iterator condition_set_iter =
+ url_matcher_condition_sets_.begin();
+ condition_set_iter != url_matcher_condition_sets_.end();
+ ++condition_set_iter) {
+ const UrlMatcherConditionSet::Conditions& conditions =
+ condition_set_iter->second.conditions();
+ for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
+ conditions.begin(); condition_iter != conditions.end();
+ ++condition_iter) {
+ // If we are called to process Full URL searches, ignore all others,
+ // and vice versa.
+ if (full_url_conditions == condition_iter->IsFullUrlCondition())
+ new_patterns.insert(condition_iter->substring_pattern());
+ }
+ }
+
+ // This is the set of patterns that were registered before this function
+ // is called.
+ std::set<SubstringPattern>& registered_patterns =
+ full_url_conditions ? registered_full_url_patterns_
+ : registered_url_component_patterns_;
+
+ // Add all patterns that are in new_patterns but not in registered_patterns.
+ std::vector<SubstringPattern> patterns_to_register;
+ std::set_difference(
+ new_patterns.begin(), new_patterns.end(),
+ registered_patterns.begin(), registered_patterns.end(),
+ std::back_inserter(patterns_to_register));
+
+ // Remove all patterns that are in registered_patterns but not in
+ // new_patterns.
+ std::vector<SubstringPattern> patterns_to_unregister;
+ std::set_difference(
+ registered_patterns.begin(), registered_patterns.end(),
+ new_patterns.begin(), new_patterns.end(),
+ std::back_inserter(patterns_to_unregister));
+
+ // Update the SubstringSetMatcher.
+ SubstringSetMatcher& url_matcher =
+ full_url_conditions ? full_url_matcher_ : url_component_matcher_;
+ url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
+ patterns_to_unregister);
+
+ // Update the set of registered_patterns for the next time this function
+ // is being called.
+ registered_patterns.swap(new_patterns);
+ unregistered_patterns->swap(patterns_to_unregister);
+}
+
+void UrlMatcher::UpdateFrequenciesAndTriggers() {
+ // Count substring pattern frequencies.
+ substring_pattern_frequencies_.clear();
+ for (UrlMatcherConditionSets::const_iterator condition_set_iter =
+ url_matcher_condition_sets_.begin();
+ condition_set_iter != url_matcher_condition_sets_.end();
+ ++condition_set_iter) {
+ const UrlMatcherConditionSet::Conditions& conditions =
+ condition_set_iter->second.conditions();
+ for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
+ conditions.begin(); condition_iter != conditions.end();
+ ++condition_iter) {
+ const SubstringPattern& pattern = condition_iter->substring_pattern();
+ substring_pattern_frequencies_[pattern.id()]++;
+ }
+ }
+
+ // Update trigger conditions: Determine for each UrlMatcherConditionSet which
+ // UrlMatcherCondition occurs least frequently in this UrlMatcher. We assume
+ // that this condition is very specific and occurs rarely in URLs. If a match
+ // occurs for this UrlMatcherCondition, we want to test all other
+ // UrlMatcherCondition in the respective UrlMatcherConditionSet as well to
+ // see whether the entire UrlMatcherConditionSet is considered matching.
+ substring_match_triggers_.clear();
+ for (UrlMatcherConditionSets::const_iterator condition_set_iter =
+ url_matcher_condition_sets_.begin();
+ condition_set_iter != url_matcher_condition_sets_.end();
+ ++condition_set_iter) {
+ const UrlMatcherConditionSet::Conditions& conditions =
+ condition_set_iter->second.conditions();
+ if (conditions.empty())
+ continue;
+ SubstringPattern::ID trigger = conditions.front().substring_pattern().id();
+ for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter =
+ conditions.begin() + 1; condition_iter != conditions.end();
+ ++condition_iter) {
+ SubstringPattern::ID current_id =
+ condition_iter->substring_pattern().id();
+ if (substring_pattern_frequencies_[trigger] >
+ substring_pattern_frequencies_[current_id]) {
+ trigger = current_id;
+ }
+ }
+ substring_match_triggers_[trigger].insert(condition_set_iter->second.id());
+ }
+}
+
+void UrlMatcher::RecreateTriggers() {
+ std::vector<SubstringPattern> unregistered1;
+ UpdateSubstringSetMatcher(false, &unregistered1);
+ url_component_patterns_.DestroySingletonPatterns(unregistered1);
+
+ std::vector<SubstringPattern> unregistered2;
+ UpdateSubstringSetMatcher(true, &unregistered2);
+ url_component_patterns_.DestroySingletonPatterns(unregistered2);
+
+ UpdateFrequenciesAndTriggers();
+}
+
+} // namespace extensions

Powered by Google App Engine
This is Rietveld 408576698