Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(136)

Side by Side Diff: chrome/common/extensions/matcher/url_matcher.cc

Issue 12092096: Move c/c/extensions/matcher/ to top level extension dir (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: TOT Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/common/extensions/matcher/url_matcher.h"
6
7 #include <algorithm>
8 #include <iterator>
9
10 #include "base/logging.h"
11 #include "content/public/common/url_constants.h"
12 #include "googleurl/src/gurl.h"
13 #include "googleurl/src/url_canon.h"
14
15 namespace extensions {
16
17 // This set of classes implement a mapping of URL Component Patterns, such as
18 // host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
19 // for use in substring comparisons.
20 //
21 // The idea of this mapping is to reduce the problem of comparing many
22 // URL Component Patterns against one URL to the problem of searching many
23 // substrings in one string:
24 //
25 // ---------------------- -----------------
26 // | URL Query operator | ----translate----> | StringPattern |
27 // ---------------------- -----------------
28 // ^
29 // |
30 // compare
31 // |
32 // v
33 // ---------------------- -----------------
34 // | URL to compare | | |
35 // | to all URL Query | ----translate----> | String |
36 // | operators | | |
37 // ---------------------- -----------------
38 //
39 // The reason for this problem reduction is that there are efficient algorithms
40 // for searching many substrings in one string (see Aho-Corasick algorithm).
41 //
42 // Additionally, some of the same pieces are reused to implement regular
43 // expression comparisons. The FilteredRE2 implementation for matching many
44 // regular expressions against one string uses prefiltering, in which a set
45 // of substrings (derived from the regexes) are first searched for, to reduce
46 // the number of regular expressions to test; the prefiltering step also
47 // uses Aho-Corasick.
48 //
49 // Case 1: {host,path,query}_{prefix,suffix,equals} searches.
50 // ==========================================================
51 //
52 // For searches in this class, we normalize URLs as follows:
53 //
54 // Step 1:
55 // Remove scheme, port and segment from URL:
56 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
57 // www.example.com/index.html?search=foo
58 //
59 // We remove the scheme and port number because they can be checked later
60 // in a secondary filter step. We remove the segment (the #... part) because
61 // this is not guaranteed to be ASCII-7 encoded.
62 //
63 // Step 2:
64 // Translate URL to String and add the following position markers:
65 // - BU = Beginning of URL
66 // - ED = End of Domain
67 // - EP = End of Path
68 // - EU = End of URL
69 // Furthermore, the hostname is canonicalized to start with a ".".
70 //
71 // Position markers are represented as characters >127, which are therefore
72 // guaranteed not to be part of the ASCII-7 encoded URL character set.
73 //
74 // -> www.example.com/index.html?search=foo becomes
75 // BU .www.example.com ED /index.html EP ?search=foo EU
76 //
77 // -> www.example.com/index.html becomes
78 // BU .www.example.com ED /index.html EP EU
79 //
80 // Step 3:
81 // Translate URL Component Patterns as follows:
82 //
83 // host_prefix(prefix) = BU add_missing_dot_prefix(prefix)
84 // -> host_prefix("www.example") = BU .www.example
85 //
86 // host_suffix(suffix) = suffix ED
87 // -> host_suffix("example.com") = example.com ED
88 // -> host_suffix(".example.com") = .example.com ED
89 //
90 // host_equals(domain) = BU add_missing_dot_prefix(domain) ED
91 // -> host_equals("www.example.com") = BU .www.example.com ED
92 //
93 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
94 //
95 // With this, we can search the StringPatterns in the normalized URL.
96 //
97 //
98 // Case 2: url_{prefix,suffix,equals,contains} searches.
99 // =====================================================
100 //
101 // Step 1: as above, except that
102 // - the scheme is not removed
103 // - the port is not removed if it is specified and does not match the default
104 // port for the given scheme.
105 //
106 // Step 2:
107 // Translate URL to String and add the following position markers:
108 // - BU = Beginning of URL
109 // - EU = End of URL
110 //
111 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes
112 // BU http://www.example.com:8080/index.html?search=foo EU
113 // -> http://www.example.com:80/index.html?search=foo#first_match becomes
114 // BU http://www.example.com/index.html?search=foo EU
115 //
116 // url_prefix(prefix) = BU prefix
117 // -> url_prefix("http://www.example") = BU http://www.example
118 //
119 // url_contains(substring) = substring
120 // -> url_contains("index") = index
121 //
122 //
123 // Case 3: {host,path,query}_contains searches.
124 // ============================================
125 //
126 // These kinds of searches are not supported directly but can be derived
127 // by a combination of a url_contains() query followed by an explicit test:
128 //
129 // host_contains(str) = url_contains(str) followed by test whether str occurs
130 // in host component of original URL.
131 // -> host_contains("example.co") = example.co
132 // followed by gurl.host().find("example.co");
133 //
134 // [similarly for path_contains and query_contains].
135 //
136 //
137 // Regular expression matching (url_matches searches)
138 // ==================================================
139 //
140 // This class also supports matching regular expressions (RE2 syntax)
141 // against full URLs, which are transformed as in case 2.
142
143 namespace {
144
145 bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
146 return criterion == URLMatcherCondition::URL_MATCHES;
147 }
148
149 } // namespace
150
151 //
152 // URLMatcherCondition
153 //
154
155 URLMatcherCondition::URLMatcherCondition()
156 : criterion_(HOST_PREFIX),
157 string_pattern_(NULL) {}
158
159 URLMatcherCondition::~URLMatcherCondition() {}
160
161 URLMatcherCondition::URLMatcherCondition(
162 Criterion criterion,
163 const StringPattern* string_pattern)
164 : criterion_(criterion),
165 string_pattern_(string_pattern) {}
166
167 URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
168 : criterion_(rhs.criterion_),
169 string_pattern_(rhs.string_pattern_) {}
170
171 URLMatcherCondition& URLMatcherCondition::operator=(
172 const URLMatcherCondition& rhs) {
173 criterion_ = rhs.criterion_;
174 string_pattern_ = rhs.string_pattern_;
175 return *this;
176 }
177
178 bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
179 if (criterion_ < rhs.criterion_) return true;
180 if (criterion_ > rhs.criterion_) return false;
181 if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
182 return *string_pattern_ < *rhs.string_pattern_;
183 if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
184 // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
185 // or both are NULL.
186 return false;
187 }
188
189 bool URLMatcherCondition::IsFullURLCondition() const {
190 // For these criteria the SubstringMatcher needs to be executed on the
191 // GURL that is canonicalized with
192 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
193 switch (criterion_) {
194 case HOST_CONTAINS:
195 case PATH_CONTAINS:
196 case QUERY_CONTAINS:
197 case URL_PREFIX:
198 case URL_SUFFIX:
199 case URL_CONTAINS:
200 case URL_EQUALS:
201 return true;
202 default:
203 break;
204 }
205 return false;
206 }
207
208 bool URLMatcherCondition::IsRegexCondition() const {
209 return IsRegexCriterion(criterion_);
210 }
211
212 bool URLMatcherCondition::IsMatch(
213 const std::set<StringPattern::ID>& matching_patterns,
214 const GURL& url) const {
215 DCHECK(string_pattern_);
216 if (!ContainsKey(matching_patterns, string_pattern_->id()))
217 return false;
218 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
219 // a substring match on the raw URL. In case of a match, we need to verify
220 // that the match was found in the correct component of the URL.
221 switch (criterion_) {
222 case HOST_CONTAINS:
223 return url.host().find(string_pattern_->pattern()) !=
224 std::string::npos;
225 case PATH_CONTAINS:
226 return url.path().find(string_pattern_->pattern()) !=
227 std::string::npos;
228 case QUERY_CONTAINS:
229 return url.query().find(string_pattern_->pattern()) !=
230 std::string::npos;
231 default:
232 break;
233 }
234 return true;
235 }
236
237 //
238 // URLMatcherConditionFactory
239 //
240
241 namespace {
242 // These are symbols that are not contained in 7-bit ASCII used in GURLs.
243 const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
244 const char kEndOfDomain[] = {static_cast<char>(-2), 0};
245 const char kEndOfPath[] = {static_cast<char>(-3), 0};
246 const char kEndOfURL[] = {static_cast<char>(-4), 0};
247 } // namespace
248
249 URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
250
251 URLMatcherConditionFactory::~URLMatcherConditionFactory() {
252 STLDeleteElements(&substring_pattern_singletons_);
253 STLDeleteElements(&regex_pattern_singletons_);
254 }
255
256 std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
257 const GURL& url) const {
258 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
259 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") +
260 kEndOfURL;
261 }
262
263 URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
264 const std::string& prefix) {
265 return CreateCondition(URLMatcherCondition::HOST_PREFIX,
266 kBeginningOfURL + CanonicalizeHostname(prefix));
267 }
268
269 URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition(
270 const std::string& suffix) {
271 return CreateCondition(URLMatcherCondition::HOST_SUFFIX,
272 suffix + kEndOfDomain);
273 }
274
275 URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition(
276 const std::string& str) {
277 return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str);
278 }
279
280 URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition(
281 const std::string& str) {
282 return CreateCondition(URLMatcherCondition::HOST_EQUALS,
283 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain);
284 }
285
286 URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition(
287 const std::string& prefix) {
288 return CreateCondition(URLMatcherCondition::PATH_PREFIX,
289 kEndOfDomain + prefix);
290 }
291
292 URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition(
293 const std::string& suffix) {
294 return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath);
295 }
296
297 URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition(
298 const std::string& str) {
299 return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str);
300 }
301
302 URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition(
303 const std::string& str) {
304 return CreateCondition(URLMatcherCondition::PATH_EQUALS,
305 kEndOfDomain + str + kEndOfPath);
306 }
307
308 URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition(
309 const std::string& prefix) {
310 std::string pattern;
311 if (!prefix.empty() && prefix[0] == '?')
312 pattern = kEndOfPath + prefix;
313 else
314 pattern = kEndOfPath + ('?' + prefix);
315
316 return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern);
317 }
318
319 URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition(
320 const std::string& suffix) {
321 if (!suffix.empty() && suffix[0] == '?') {
322 return CreateQueryEqualsCondition(suffix);
323 } else {
324 return CreateCondition(URLMatcherCondition::QUERY_SUFFIX,
325 suffix + kEndOfURL);
326 }
327 }
328
329 URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition(
330 const std::string& str) {
331 if (!str.empty() && str[0] == '?')
332 return CreateQueryPrefixCondition(str);
333 else
334 return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str);
335 }
336
337 URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition(
338 const std::string& str) {
339 std::string pattern;
340 if (!str.empty() && str[0] == '?')
341 pattern = kEndOfPath + str + kEndOfURL;
342 else
343 pattern = kEndOfPath + ('?' + str) + kEndOfURL;
344
345 return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern);
346 }
347
348 URLMatcherCondition
349 URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition(
350 const std::string& host_suffix,
351 const std::string& path_prefix) {
352 return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX,
353 host_suffix + kEndOfDomain + path_prefix);
354 }
355
356 URLMatcherCondition
357 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
358 const std::string& host,
359 const std::string& path_prefix) {
360 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
361 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
362 path_prefix);
363 }
364
365 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
366 const GURL& url) const {
367 GURL::Replacements replacements;
368 replacements.ClearPassword();
369 replacements.ClearUsername();
370 replacements.ClearRef();
371 // Clear port if it is implicit from scheme.
372 if (url.has_port()) {
373 const std::string& port = url.scheme();
374 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
375 url.EffectiveIntPort()) {
376 replacements.ClearPort();
377 }
378 }
379 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
380 kEndOfURL;
381 }
382
383 std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
384 const GURL& url) const {
385 GURL::Replacements replacements;
386 replacements.ClearPassword();
387 replacements.ClearUsername();
388 replacements.ClearRef();
389 // Clear port if it is implicit from scheme.
390 if (url.has_port()) {
391 const std::string& port = url.scheme();
392 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) ==
393 url.EffectiveIntPort()) {
394 replacements.ClearPort();
395 }
396 }
397 return url.ReplaceComponents(replacements).spec();
398 }
399
400 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
401 const std::string& prefix) {
402 return CreateCondition(URLMatcherCondition::URL_PREFIX,
403 kBeginningOfURL + prefix);
404 }
405
406 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
407 const std::string& suffix) {
408 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
409 }
410
411 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
412 const std::string& str) {
413 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
414 }
415
416 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
417 const std::string& str) {
418 return CreateCondition(URLMatcherCondition::URL_EQUALS,
419 kBeginningOfURL + str + kEndOfURL);
420 }
421
422 URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
423 const std::string& regex) {
424 return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
425 }
426
427 void URLMatcherConditionFactory::ForgetUnusedPatterns(
428 const std::set<StringPattern::ID>& used_patterns) {
429 PatternSingletons::iterator i = substring_pattern_singletons_.begin();
430 while (i != substring_pattern_singletons_.end()) {
431 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
432 ++i;
433 } else {
434 delete *i;
435 substring_pattern_singletons_.erase(i++);
436 }
437 }
438 i = regex_pattern_singletons_.begin();
439 while (i != regex_pattern_singletons_.end()) {
440 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
441 ++i;
442 } else {
443 delete *i;
444 regex_pattern_singletons_.erase(i++);
445 }
446 }
447 }
448
449 bool URLMatcherConditionFactory::IsEmpty() const {
450 return substring_pattern_singletons_.empty() &&
451 regex_pattern_singletons_.empty();
452 }
453
454 URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
455 URLMatcherCondition::Criterion criterion,
456 const std::string& pattern) {
457 StringPattern search_pattern(pattern, 0);
458 PatternSingletons* pattern_singletons =
459 IsRegexCriterion(criterion) ? &regex_pattern_singletons_
460 : &substring_pattern_singletons_;
461
462 PatternSingletons::const_iterator iter =
463 pattern_singletons->find(&search_pattern);
464
465 if (iter != pattern_singletons->end()) {
466 return URLMatcherCondition(criterion, *iter);
467 } else {
468 StringPattern* new_pattern =
469 new StringPattern(pattern, id_counter_++);
470 pattern_singletons->insert(new_pattern);
471 return URLMatcherCondition(criterion, new_pattern);
472 }
473 }
474
475 std::string URLMatcherConditionFactory::CanonicalizeHostname(
476 const std::string& hostname) const {
477 if (!hostname.empty() && hostname[0] == '.')
478 return hostname;
479 else
480 return "." + hostname;
481 }
482
483 bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
484 StringPattern* lhs,
485 StringPattern* rhs) const {
486 if (lhs == NULL && rhs != NULL) return true;
487 if (lhs != NULL && rhs != NULL)
488 return lhs->pattern() < rhs->pattern();
489 // Either both are NULL or only rhs is NULL.
490 return false;
491 }
492
493 //
494 // URLMatcherSchemeFilter
495 //
496
497 URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter)
498 : filters_(1) {
499 filters_.push_back(filter);
500 }
501
502 URLMatcherSchemeFilter::URLMatcherSchemeFilter(
503 const std::vector<std::string>& filters)
504 : filters_(filters) {}
505
506 URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {}
507
508 bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const {
509 return std::find(filters_.begin(), filters_.end(), url.scheme()) !=
510 filters_.end();
511 }
512
513 //
514 // URLMatcherPortFilter
515 //
516
517 URLMatcherPortFilter::URLMatcherPortFilter(
518 const std::vector<URLMatcherPortFilter::Range>& ranges)
519 : ranges_(ranges) {}
520
521 URLMatcherPortFilter::~URLMatcherPortFilter() {}
522
523 bool URLMatcherPortFilter::IsMatch(const GURL& url) const {
524 int port = url.EffectiveIntPort();
525 for (std::vector<Range>::const_iterator i = ranges_.begin();
526 i != ranges_.end(); ++i) {
527 if (i->first <= port && port <= i->second)
528 return true;
529 }
530 return false;
531 }
532
533 // static
534 URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from,
535 int to) {
536 return Range(from, to);
537 }
538
539 // static
540 URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) {
541 return Range(port, port);
542 }
543
544 //
545 // URLMatcherConditionSet
546 //
547
548 URLMatcherConditionSet::~URLMatcherConditionSet() {}
549
550 URLMatcherConditionSet::URLMatcherConditionSet(
551 ID id,
552 const Conditions& conditions)
553 : id_(id),
554 conditions_(conditions) {}
555
556 URLMatcherConditionSet::URLMatcherConditionSet(
557 ID id,
558 const Conditions& conditions,
559 scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
560 scoped_ptr<URLMatcherPortFilter> port_filter)
561 : id_(id),
562 conditions_(conditions),
563 scheme_filter_(scheme_filter.Pass()),
564 port_filter_(port_filter.Pass()) {}
565
566 bool URLMatcherConditionSet::IsMatch(
567 const std::set<StringPattern::ID>& matching_patterns,
568 const GURL& url) const {
569 for (Conditions::const_iterator i = conditions_.begin();
570 i != conditions_.end(); ++i) {
571 if (!i->IsMatch(matching_patterns, url))
572 return false;
573 }
574 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
575 return false;
576 if (port_filter_.get() && !port_filter_->IsMatch(url))
577 return false;
578 return true;
579 }
580
581 //
582 // URLMatcher
583 //
584
585 URLMatcher::URLMatcher() {}
586
587 URLMatcher::~URLMatcher() {}
588
589 void URLMatcher::AddConditionSets(
590 const URLMatcherConditionSet::Vector& condition_sets) {
591 for (URLMatcherConditionSet::Vector::const_iterator i =
592 condition_sets.begin(); i != condition_sets.end(); ++i) {
593 DCHECK(url_matcher_condition_sets_.find((*i)->id()) ==
594 url_matcher_condition_sets_.end());
595 url_matcher_condition_sets_[(*i)->id()] = *i;
596 }
597 UpdateInternalDatastructures();
598 }
599
600 void URLMatcher::RemoveConditionSets(
601 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) {
602 for (std::vector<URLMatcherConditionSet::ID>::const_iterator i =
603 condition_set_ids.begin(); i != condition_set_ids.end(); ++i) {
604 DCHECK(url_matcher_condition_sets_.find(*i) !=
605 url_matcher_condition_sets_.end());
606 url_matcher_condition_sets_.erase(*i);
607 }
608 UpdateInternalDatastructures();
609 }
610
611 void URLMatcher::ClearUnusedConditionSets() {
612 UpdateConditionFactory();
613 }
614
615 std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(
616 const GURL& url) const {
617 // Find all IDs of StringPatterns that match |url|.
618 // See URLMatcherConditionFactory for the canonicalization of URLs and the
619 // distinction between full url searches and url component searches.
620 std::set<StringPattern::ID> matches;
621 full_url_matcher_.Match(
622 condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
623 url_component_matcher_.Match(
624 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
625 regex_set_matcher_.Match(
626 condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
627
628 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
629 // were fulfilled.
630 std::set<URLMatcherConditionSet::ID> result;
631 for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
632 i != matches.end(); ++i) {
633 // For each URLMatcherConditionSet there is exactly one condition
634 // registered in substring_match_triggers_. This means that the following
635 // logic tests each URLMatcherConditionSet exactly once if it can be
636 // completely fulfilled.
637 StringPatternTriggers::const_iterator triggered_condition_sets_iter =
638 substring_match_triggers_.find(*i);
639 if (triggered_condition_sets_iter == substring_match_triggers_.end())
640 continue; // Not all substring matches are triggers for a condition set.
641 const std::set<URLMatcherConditionSet::ID>& condition_sets =
642 triggered_condition_sets_iter->second;
643 for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
644 condition_sets.begin(); j != condition_sets.end(); ++j) {
645 URLMatcherConditionSets::const_iterator condition_set_iter =
646 url_matcher_condition_sets_.find(*j);
647 DCHECK(condition_set_iter != url_matcher_condition_sets_.end());
648 if (condition_set_iter->second->IsMatch(matches, url))
649 result.insert(*j);
650 }
651 }
652
653 return result;
654 }
655
656 bool URLMatcher::IsEmpty() const {
657 return condition_factory_.IsEmpty() &&
658 url_matcher_condition_sets_.empty() &&
659 substring_match_triggers_.empty() &&
660 full_url_matcher_.IsEmpty() &&
661 url_component_matcher_.IsEmpty() &&
662 registered_full_url_patterns_.empty() &&
663 registered_url_component_patterns_.empty();
664 }
665
666 void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
667 // The purpose of |full_url_conditions| is just that we need to execute
668 // the same logic once for Full URL searches and once for URL Component
669 // searches (see URLMatcherConditionFactory).
670
671 // Determine which patterns need to be registered when this function
672 // terminates.
673 std::set<const StringPattern*> new_patterns;
674 for (URLMatcherConditionSets::const_iterator condition_set_iter =
675 url_matcher_condition_sets_.begin();
676 condition_set_iter != url_matcher_condition_sets_.end();
677 ++condition_set_iter) {
678 const URLMatcherConditionSet::Conditions& conditions =
679 condition_set_iter->second->conditions();
680 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
681 conditions.begin(); condition_iter != conditions.end();
682 ++condition_iter) {
683 // If we are called to process Full URL searches, ignore others, and
684 // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
685 if (!condition_iter->IsRegexCondition() &&
686 full_url_conditions == condition_iter->IsFullURLCondition())
687 new_patterns.insert(condition_iter->string_pattern());
688 }
689 }
690
691 // This is the set of patterns that were registered before this function
692 // is called.
693 std::set<const StringPattern*>& registered_patterns =
694 full_url_conditions ? registered_full_url_patterns_
695 : registered_url_component_patterns_;
696
697 // Add all patterns that are in new_patterns but not in registered_patterns.
698 std::vector<const StringPattern*> patterns_to_register;
699 std::set_difference(
700 new_patterns.begin(), new_patterns.end(),
701 registered_patterns.begin(), registered_patterns.end(),
702 std::back_inserter(patterns_to_register));
703
704 // Remove all patterns that are in registered_patterns but not in
705 // new_patterns.
706 std::vector<const StringPattern*> patterns_to_unregister;
707 std::set_difference(
708 registered_patterns.begin(), registered_patterns.end(),
709 new_patterns.begin(), new_patterns.end(),
710 std::back_inserter(patterns_to_unregister));
711
712 // Update the SubstringSetMatcher.
713 SubstringSetMatcher& url_matcher =
714 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
715 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
716 patterns_to_unregister);
717
718 // Update the set of registered_patterns for the next time this function
719 // is being called.
720 registered_patterns.swap(new_patterns);
721 }
722
723 void URLMatcher::UpdateRegexSetMatcher() {
724 std::vector<const StringPattern*> new_patterns;
725
726 for (URLMatcherConditionSets::const_iterator condition_set_iter =
727 url_matcher_condition_sets_.begin();
728 condition_set_iter != url_matcher_condition_sets_.end();
729 ++condition_set_iter) {
730 const URLMatcherConditionSet::Conditions& conditions =
731 condition_set_iter->second->conditions();
732 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
733 conditions.begin(); condition_iter != conditions.end();
734 ++condition_iter) {
735 if (condition_iter->IsRegexCondition())
736 new_patterns.push_back(condition_iter->string_pattern());
737 }
738 }
739
740 // Start over from scratch. We can't really do better than this, since the
741 // FilteredRE2 backend doesn't support incremental updates.
742 regex_set_matcher_.ClearPatterns();
743 regex_set_matcher_.AddPatterns(new_patterns);
744 }
745
746 void URLMatcher::UpdateTriggers() {
747 // Count substring pattern frequencies.
748 std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
749 for (URLMatcherConditionSets::const_iterator condition_set_iter =
750 url_matcher_condition_sets_.begin();
751 condition_set_iter != url_matcher_condition_sets_.end();
752 ++condition_set_iter) {
753 const URLMatcherConditionSet::Conditions& conditions =
754 condition_set_iter->second->conditions();
755 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
756 conditions.begin(); condition_iter != conditions.end();
757 ++condition_iter) {
758 const StringPattern* pattern = condition_iter->string_pattern();
759 substring_pattern_frequencies[pattern->id()]++;
760 }
761 }
762
763 // Update trigger conditions: Determine for each URLMatcherConditionSet which
764 // URLMatcherCondition contains a StringPattern that occurs least
765 // frequently in this URLMatcher. We assume that this condition is very
766 // specific and occurs rarely in URLs. If a match occurs for this
767 // URLMatcherCondition, we want to test all other URLMatcherCondition in the
768 // respective URLMatcherConditionSet as well to see whether the entire
769 // URLMatcherConditionSet is considered matching.
770 substring_match_triggers_.clear();
771 for (URLMatcherConditionSets::const_iterator condition_set_iter =
772 url_matcher_condition_sets_.begin();
773 condition_set_iter != url_matcher_condition_sets_.end();
774 ++condition_set_iter) {
775 const URLMatcherConditionSet::Conditions& conditions =
776 condition_set_iter->second->conditions();
777 if (conditions.empty())
778 continue;
779 URLMatcherConditionSet::Conditions::const_iterator condition_iter =
780 conditions.begin();
781 StringPattern::ID trigger = condition_iter->string_pattern()->id();
782 // We skip the first element in the following loop.
783 ++condition_iter;
784 for (; condition_iter != conditions.end(); ++condition_iter) {
785 StringPattern::ID current_id =
786 condition_iter->string_pattern()->id();
787 if (substring_pattern_frequencies[trigger] >
788 substring_pattern_frequencies[current_id]) {
789 trigger = current_id;
790 }
791 }
792 substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
793 }
794 }
795
796 void URLMatcher::UpdateConditionFactory() {
797 std::set<StringPattern::ID> used_patterns;
798 for (URLMatcherConditionSets::const_iterator condition_set_iter =
799 url_matcher_condition_sets_.begin();
800 condition_set_iter != url_matcher_condition_sets_.end();
801 ++condition_set_iter) {
802 const URLMatcherConditionSet::Conditions& conditions =
803 condition_set_iter->second->conditions();
804 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
805 conditions.begin(); condition_iter != conditions.end();
806 ++condition_iter) {
807 used_patterns.insert(condition_iter->string_pattern()->id());
808 }
809 }
810 condition_factory_.ForgetUnusedPatterns(used_patterns);
811 }
812
813 void URLMatcher::UpdateInternalDatastructures() {
814 UpdateSubstringSetMatcher(false);
815 UpdateSubstringSetMatcher(true);
816 UpdateRegexSetMatcher();
817 UpdateTriggers();
818 UpdateConditionFactory();
819 }
820
821 } // namespace extensions
OLDNEW
« no previous file with comments | « chrome/common/extensions/matcher/url_matcher.h ('k') | chrome/common/extensions/matcher/url_matcher_constants.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698