| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/extensions/api/declarative/url_matcher.h" |
| 6 |
| 7 #include <algorithm> |
| 8 |
| 9 namespace extensions { |
| 10 |
| 11 // |
| 12 // UrlMatcherCondition |
| 13 // |
| 14 |
| 15 UrlMatcherCondition::UrlMatcherCondition() : criterion_(HOST_PREFIX) {} |
| 16 |
| 17 UrlMatcherCondition::UrlMatcherCondition(const UrlMatcherCondition& rhs) |
| 18 : criterion_(rhs.criterion_), value_(rhs.value_), |
| 19 substring_pattern_(rhs.substring_pattern_) {} |
| 20 |
| 21 UrlMatcherCondition& UrlMatcherCondition::operator=( |
| 22 const UrlMatcherCondition& rhs) { |
| 23 criterion_ = rhs.criterion_; |
| 24 value_ = rhs.value_; |
| 25 substring_pattern_ = rhs.substring_pattern_; |
| 26 return *this; |
| 27 } |
| 28 |
| 29 UrlMatcherCondition::UrlMatcherCondition(Criterion criterion, |
| 30 std::string value) |
| 31 : criterion_(criterion), value_(value) { |
| 32 } |
| 33 |
| 34 bool UrlMatcherCondition::IsFullUrlCondition() const { |
| 35 switch (criterion_) { |
| 36 case HOST_CONTAINS: |
| 37 case PATH_CONTAINS: |
| 38 case QUERY_CONTAINS: |
| 39 case URL_PREFIX: |
| 40 case URL_SUFFIX: |
| 41 case URL_CONTAINS: |
| 42 case URL_EQUALS: |
| 43 return true; |
| 44 default: |
| 45 break; |
| 46 } |
| 47 return false; |
| 48 } |
| 49 |
| 50 bool UrlMatcherCondition::IsMatch( |
| 51 const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| 52 const GURL& url) const { |
| 53 DCHECK(SubstringPattern() != substring_pattern_); |
| 54 if (matching_substring_patterns.find(substring_pattern_.id()) == |
| 55 matching_substring_patterns.end()) |
| 56 return false; |
| 57 switch (criterion_) { |
| 58 case HOST_CONTAINS: |
| 59 return url.host().find(value_) != std::string::npos; |
| 60 case PATH_CONTAINS: |
| 61 return url.path().find(value_) != std::string::npos; |
| 62 case QUERY_CONTAINS: |
| 63 return url.query().find(value_) != std::string::npos; |
| 64 default: |
| 65 break; |
| 66 } |
| 67 return true; |
| 68 } |
| 69 |
| 70 void UrlMatcherCondition::BuildSubstringPattern( |
| 71 UrlComponentPatterns* url_component_patterns) { |
| 72 // Check that we have not been registered before |
| 73 DCHECK(SubstringPattern() == substring_pattern_); |
| 74 switch (criterion_) { |
| 75 case HOST_PREFIX: |
| 76 substring_pattern_ = |
| 77 url_component_patterns->CreateHostPrefixPattern(value_); |
| 78 break; |
| 79 case HOST_SUFFIX: |
| 80 substring_pattern_ = |
| 81 url_component_patterns->CreateHostSuffixPattern(value_); |
| 82 break; |
| 83 case HOST_CONTAINS: |
| 84 substring_pattern_ = |
| 85 url_component_patterns->CreateURLContainsPattern(value_); |
| 86 // This will be accompanied by a second check that the match occurs in |
| 87 // the host part of a URL. |
| 88 break; |
| 89 case HOST_EQUALS: |
| 90 substring_pattern_ = |
| 91 url_component_patterns->CreateHostEqualsPattern(value_); |
| 92 break; |
| 93 |
| 94 case PATH_PREFIX: |
| 95 substring_pattern_ = |
| 96 url_component_patterns->CreatePathPrefixPattern(value_); |
| 97 break; |
| 98 case PATH_SUFFIX: |
| 99 substring_pattern_ = |
| 100 url_component_patterns->CreatePathSuffixPattern(value_); |
| 101 break; |
| 102 case PATH_CONTAINS: |
| 103 substring_pattern_ = |
| 104 url_component_patterns->CreateURLContainsPattern(value_); |
| 105 // This will be accompanied by a second check that the match occurs in |
| 106 // the path part of a URL. |
| 107 break; |
| 108 case PATH_EQUALS: |
| 109 substring_pattern_ = |
| 110 url_component_patterns->CreatePathEqualsPattern(value_); |
| 111 break; |
| 112 |
| 113 case QUERY_PREFIX: |
| 114 substring_pattern_ = |
| 115 url_component_patterns->CreateQueryPrefixPattern(value_); |
| 116 break; |
| 117 case QUERY_SUFFIX: |
| 118 substring_pattern_ = |
| 119 url_component_patterns->CreateQuerySuffixPattern(value_); |
| 120 break; |
| 121 case QUERY_CONTAINS: |
| 122 substring_pattern_ = |
| 123 url_component_patterns->CreateURLContainsPattern(value_); |
| 124 // This will be accompanied by a second check that the match occurs in |
| 125 // the query part of a URL. |
| 126 break; |
| 127 case QUERY_EQUALS: |
| 128 substring_pattern_ = |
| 129 url_component_patterns->CreateQueryEqualsPattern(value_); |
| 130 break; |
| 131 |
| 132 case URL_PREFIX: |
| 133 substring_pattern_ = |
| 134 url_component_patterns->CreateURLPrefixPattern(value_); |
| 135 break; |
| 136 case URL_SUFFIX: |
| 137 substring_pattern_ = |
| 138 url_component_patterns->CreateURLSuffixPattern(value_); |
| 139 break; |
| 140 case URL_CONTAINS: |
| 141 substring_pattern_ = |
| 142 url_component_patterns->CreateURLContainsPattern(value_); |
| 143 break; |
| 144 case URL_EQUALS: |
| 145 substring_pattern_ = |
| 146 url_component_patterns->CreateURLEqualsPattern(value_); |
| 147 break; |
| 148 } |
| 149 } |
| 150 |
| 151 |
| 152 // |
| 153 // UrlMatcherConditionSet |
| 154 // |
| 155 |
| 156 UrlMatcherConditionSet::UrlMatcherConditionSet() {} |
| 157 |
| 158 UrlMatcherConditionSet::UrlMatcherConditionSet( |
| 159 const UrlMatcherConditionSet::UrlMatcherConditionSet& rhs) |
| 160 : id_(rhs.id_), conditions_(rhs.conditions_) {} |
| 161 |
| 162 UrlMatcherConditionSet& UrlMatcherConditionSet::operator=( |
| 163 const UrlMatcherConditionSet& rhs) { |
| 164 id_ = rhs.id_; |
| 165 conditions_ = rhs.conditions_; |
| 166 return *this; |
| 167 } |
| 168 |
| 169 UrlMatcherConditionSet::UrlMatcherConditionSet( |
| 170 ID id, const std::vector<UrlMatcherCondition>& conditions) |
| 171 : id_(id), conditions_(conditions) {} |
| 172 |
| 173 bool UrlMatcherConditionSet::IsMatch( |
| 174 const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| 175 const GURL& url) const { |
| 176 for (Conditions::const_iterator i = conditions_.begin(); |
| 177 i != conditions_.end(); ++i) { |
| 178 if (!i->IsMatch(matching_substring_patterns, url)) |
| 179 return false; |
| 180 } |
| 181 return true; |
| 182 } |
| 183 |
| 184 // |
| 185 // UrlMatcher |
| 186 // |
| 187 |
| 188 UrlMatcher::UrlMatcher() {} |
| 189 |
| 190 void UrlMatcher::AddConditionSets( |
| 191 const std::vector<UrlMatcherConditionSet>& conditions) { |
| 192 for (std::vector<UrlMatcherConditionSet>::const_iterator i = |
| 193 conditions.begin(); i != conditions.end(); ++i) { |
| 194 AddConditionSet(*i); |
| 195 } |
| 196 RecreateTriggers(); |
| 197 } |
| 198 |
| 199 void UrlMatcher::AddConditionSet( |
| 200 const UrlMatcherConditionSet& new_condition_set) { |
| 201 DCHECK(url_matcher_condition_sets_.find(new_condition_set.id()) == |
| 202 url_matcher_condition_sets_.end()); |
| 203 url_matcher_condition_sets_[new_condition_set.id()] = new_condition_set; |
| 204 |
| 205 // Initialize. |
| 206 UrlMatcherConditionSet& condition_set = |
| 207 url_matcher_condition_sets_[new_condition_set.id()]; |
| 208 std::vector<UrlMatcherCondition>& conditions = |
| 209 condition_set.mutable_conditions(); |
| 210 for (std::vector<UrlMatcherCondition>::iterator i = conditions.begin(); |
| 211 i != conditions.end(); ++i) { |
| 212 i->BuildSubstringPattern(&url_component_patterns_); |
| 213 } |
| 214 } |
| 215 |
| 216 void UrlMatcher::RemoveConditionSets( |
| 217 const std::vector<UrlMatcherConditionSet::ID>& condition_ids) { |
| 218 for (std::vector<UrlMatcherConditionSet::ID>::const_iterator i = |
| 219 condition_ids.begin(); i != condition_ids.end(); ++i) { |
| 220 RemoveConditionSet(*i); |
| 221 } |
| 222 RecreateTriggers(); |
| 223 } |
| 224 |
| 225 void UrlMatcher::RemoveConditionSet(UrlMatcherConditionSet::ID id) { |
| 226 DCHECK(url_matcher_condition_sets_.find(id) != |
| 227 url_matcher_condition_sets_.end()); |
| 228 url_matcher_condition_sets_.erase(id); |
| 229 } |
| 230 |
| 231 std::set<UrlMatcherConditionSet::ID> UrlMatcher::MatchUrl(const GURL& url) { |
| 232 // See UrlComponentPatterns for the canonicalization of URLs and the |
| 233 // distinction between full url searches and url component searches. |
| 234 |
| 235 // Perform all full url searches. |
| 236 std::string full_url = |
| 237 url_component_patterns_.CanonlicalizeURLForFullSearches(url); |
| 238 std::set<SubstringPattern::ID> full_url_matches; |
| 239 full_url_matcher_.Match(full_url, &full_url_matches); |
| 240 |
| 241 // Perform all url component searches. |
| 242 std::string component_url = |
| 243 url_component_patterns_.CanonlicalizeURLForComponentSearches(url); |
| 244 std::set<SubstringPattern::ID> component_url_matches; |
| 245 url_component_matcher_.Match(component_url, &component_url_matches); |
| 246 |
| 247 // Build the union of both result sets. |
| 248 std::set<SubstringPattern::ID> matches; |
| 249 matches.swap(full_url_matches); |
| 250 matches.insert(component_url_matches.begin(), component_url_matches.end()); |
| 251 |
| 252 // Calculate all UrlMatcherConditionSet for which all UrlMatcherConditions |
| 253 // were fulfilled. |
| 254 std::set<UrlMatcherConditionSet::ID> result; |
| 255 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin(); |
| 256 i != matches.end(); ++i) { |
| 257 // For each UrlMatcherConditionSet there is exactly one condition |
| 258 // registered in substring_match_triggers_. This means that the following |
| 259 // logic tests each UrlMatcherConditionSet exactly once if it can be |
| 260 // completely fulfilled. |
| 261 std::set<UrlMatcherConditionSet::ID>& condition_sets = |
| 262 substring_match_triggers_[*i]; |
| 263 for (std::set<UrlMatcherConditionSet::ID>::const_iterator j = |
| 264 condition_sets.begin(); j != condition_sets.end(); ++j) { |
| 265 if (url_matcher_condition_sets_[*j].IsMatch(matches, url)) |
| 266 result.insert(url_matcher_condition_sets_[*j].id()); |
| 267 } |
| 268 } |
| 269 |
| 270 return result; |
| 271 } |
| 272 |
| 273 void UrlMatcher::UpdateSubstringSetMatcher( |
| 274 bool full_url_conditions, |
| 275 std::vector<SubstringPattern>* unregistered_patterns) { |
| 276 // The purpose of |full_url_conditions| is just that we need to execute |
| 277 // the same logic once for Full URL searches and once for URL Component |
| 278 // searches (see UrlComponentPatterns). |
| 279 |
| 280 // Determine which patterns need to be registered when this function |
| 281 // terminates. |
| 282 std::set<SubstringPattern> new_patterns; |
| 283 for (UrlMatcherConditionSets::const_iterator condition_set_iter = |
| 284 url_matcher_condition_sets_.begin(); |
| 285 condition_set_iter != url_matcher_condition_sets_.end(); |
| 286 ++condition_set_iter) { |
| 287 const UrlMatcherConditionSet::Conditions& conditions = |
| 288 condition_set_iter->second.conditions(); |
| 289 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 290 conditions.begin(); condition_iter != conditions.end(); |
| 291 ++condition_iter) { |
| 292 // If we are called to process Full URL searches, ignore all others, |
| 293 // and vice versa. |
| 294 if (full_url_conditions == condition_iter->IsFullUrlCondition()) |
| 295 new_patterns.insert(condition_iter->substring_pattern()); |
| 296 } |
| 297 } |
| 298 |
| 299 // This is the set of patterns that were registered before this function |
| 300 // is called. |
| 301 std::set<SubstringPattern>& registered_patterns = |
| 302 full_url_conditions ? registered_full_url_patterns_ |
| 303 : registered_url_component_patterns_; |
| 304 |
| 305 // Add all patterns that are in new_patterns but not in registered_patterns. |
| 306 std::vector<SubstringPattern> patterns_to_register; |
| 307 std::set_difference( |
| 308 new_patterns.begin(), new_patterns.end(), |
| 309 registered_patterns.begin(), registered_patterns.end(), |
| 310 std::back_inserter(patterns_to_register)); |
| 311 |
| 312 // Remove all patterns that are in registered_patterns but not in |
| 313 // new_patterns. |
| 314 std::vector<SubstringPattern> patterns_to_unregister; |
| 315 std::set_difference( |
| 316 registered_patterns.begin(), registered_patterns.end(), |
| 317 new_patterns.begin(), new_patterns.end(), |
| 318 std::back_inserter(patterns_to_unregister)); |
| 319 |
| 320 // Update the SubstringSetMatcher. |
| 321 SubstringSetMatcher& url_matcher = |
| 322 full_url_conditions ? full_url_matcher_ : url_component_matcher_; |
| 323 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, |
| 324 patterns_to_unregister); |
| 325 |
| 326 // Update the set of registered_patterns for the next time this function |
| 327 // is being called. |
| 328 registered_patterns.swap(new_patterns); |
| 329 unregistered_patterns->swap(patterns_to_unregister); |
| 330 } |
| 331 |
| 332 void UrlMatcher::UpdateFrequenciesAndTriggers() { |
| 333 // Count substring pattern frequencies. |
| 334 substring_pattern_frequencies_.clear(); |
| 335 for (UrlMatcherConditionSets::const_iterator condition_set_iter = |
| 336 url_matcher_condition_sets_.begin(); |
| 337 condition_set_iter != url_matcher_condition_sets_.end(); |
| 338 ++condition_set_iter) { |
| 339 const UrlMatcherConditionSet::Conditions& conditions = |
| 340 condition_set_iter->second.conditions(); |
| 341 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 342 conditions.begin(); condition_iter != conditions.end(); |
| 343 ++condition_iter) { |
| 344 const SubstringPattern& pattern = condition_iter->substring_pattern(); |
| 345 substring_pattern_frequencies_[pattern.id()]++; |
| 346 } |
| 347 } |
| 348 |
| 349 // Update trigger conditions: Determine for each UrlMatcherConditionSet which |
| 350 // UrlMatcherCondition occurs least frequently in this UrlMatcher. We assume |
| 351 // that this condition is very specific and occurs rarely in URLs. If a match |
| 352 // occurs for this UrlMatcherCondition, we want to test all other |
| 353 // UrlMatcherCondition in the respective UrlMatcherConditionSet as well to |
| 354 // see whether the entire UrlMatcherConditionSet is considered matching. |
| 355 substring_match_triggers_.clear(); |
| 356 for (UrlMatcherConditionSets::const_iterator condition_set_iter = |
| 357 url_matcher_condition_sets_.begin(); |
| 358 condition_set_iter != url_matcher_condition_sets_.end(); |
| 359 ++condition_set_iter) { |
| 360 const UrlMatcherConditionSet::Conditions& conditions = |
| 361 condition_set_iter->second.conditions(); |
| 362 if (conditions.empty()) |
| 363 continue; |
| 364 SubstringPattern::ID trigger = conditions.front().substring_pattern().id(); |
| 365 for (UrlMatcherConditionSet::Conditions::const_iterator condition_iter = |
| 366 conditions.begin() + 1; condition_iter != conditions.end(); |
| 367 ++condition_iter) { |
| 368 SubstringPattern::ID current_id = |
| 369 condition_iter->substring_pattern().id(); |
| 370 if (substring_pattern_frequencies_[trigger] > |
| 371 substring_pattern_frequencies_[current_id]) { |
| 372 trigger = current_id; |
| 373 } |
| 374 } |
| 375 substring_match_triggers_[trigger].insert(condition_set_iter->second.id()); |
| 376 } |
| 377 } |
| 378 |
| 379 void UrlMatcher::RecreateTriggers() { |
| 380 std::vector<SubstringPattern> unregistered1; |
| 381 UpdateSubstringSetMatcher(false, &unregistered1); |
| 382 url_component_patterns_.DestroySingletonPatterns(unregistered1); |
| 383 |
| 384 std::vector<SubstringPattern> unregistered2; |
| 385 UpdateSubstringSetMatcher(true, &unregistered2); |
| 386 url_component_patterns_.DestroySingletonPatterns(unregistered2); |
| 387 |
| 388 UpdateFrequenciesAndTriggers(); |
| 389 } |
| 390 |
| 391 } // namespace extensions |
| OLD | NEW |