OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/matcher/url_matcher.h" | 5 #include "chrome/common/extensions/matcher/url_matcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "content/public/common/url_constants.h" | |
11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
13 #include "googleurl/src/url_canon.h" | |
12 | 14 |
13 namespace extensions { | 15 namespace extensions { |
14 | 16 |
15 // This set of classes implement a mapping of URL Component Patterns, such as | 17 // This set of classes implement a mapping of URL Component Patterns, such as |
16 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. | 18 // host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns. |
17 // | 19 // |
18 // The idea of this mapping is to reduce the problem of comparing many | 20 // The idea of this mapping is to reduce the problem of comparing many |
19 // URL Component Patterns against one URL to the problem of searching many | 21 // URL Component Patterns against one URL to the problem of searching many |
20 // substrings in one string: | 22 // substrings in one string: |
21 // | 23 // |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
81 // -> host_equals("www.example.com") = BU .www.example.com ED | 83 // -> host_equals("www.example.com") = BU .www.example.com ED |
82 // | 84 // |
83 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). | 85 // Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). |
84 // | 86 // |
85 // With this, we can search the SubstringPatterns in the normalized URL. | 87 // With this, we can search the SubstringPatterns in the normalized URL. |
86 // | 88 // |
87 // | 89 // |
88 // Case 2: url_{prefix,suffix,equals,contains} searches. | 90 // Case 2: url_{prefix,suffix,equals,contains} searches. |
89 // ===================================================== | 91 // ===================================================== |
90 // | 92 // |
91 // Step 1: as above | 93 // Step 1: as above, except that the scheme is not removed. |
Yoyo Zhou
2012/08/16 17:52:32
and port
battre
2012/08/16 18:29:16
Done.
| |
92 // | 94 // |
93 // Step 2: | 95 // Step 2: |
94 // Translate URL to String and add the following position markers: | 96 // Translate URL to String and add the following position markers: |
95 // - BU = Beginning of URL | 97 // - BU = Beginning of URL |
96 // - EU = End of URL | 98 // - EU = End of URL |
97 // Furthermore, the hostname is canonicalized to start with a ".". | |
98 // | 99 // |
99 // -> www.example.com/index.html?search=foo becomes | 100 // -> http://www.example.com:8080/index.html?search=foo#first_match becomes |
100 // BU .www.example.com/index.html?search=foo EU | 101 // BU http://www.example.com/index.html?search=foo EU |
Yoyo Zhou
2012/08/16 17:52:32
This comment seems to be at odds with the port cha
battre
2012/08/16 18:29:16
Done.
| |
101 // | 102 // |
102 // url_prefix(prefix) = BU add_missing_dot_prefix(prefix) | 103 // url_prefix(prefix) = BU prefix |
103 // -> url_prefix("www.example") = BU .www.example | 104 // -> url_prefix("http://www.example") = BU http://www.example |
104 // | 105 // |
105 // url_contains(substring) = substring | 106 // url_contains(substring) = substring |
106 // -> url_contains("index") = index | 107 // -> url_contains("index") = index |
107 // | 108 // |
108 // | 109 // |
109 // Case 3: {host,path,query}_contains searches. | 110 // Case 3: {host,path,query}_contains searches. |
110 // ============================================ | 111 // ============================================ |
111 // | 112 // |
112 // These kinds of searches are not supported directly but can be derived | 113 // These kinds of searches are not supported directly but can be derived |
113 // by a combination of a url_contains() query followed by an explicit test: | 114 // by a combination of a url_contains() query followed by an explicit test: |
114 // | 115 // |
115 // host_contains(str) = url_contains(str) followed by test whether str occurs | 116 // host_contains(str) = url_contains(str) followed by test whether str occurs |
116 // in host comonent of original URL. | 117 // in host component of original URL. |
117 // -> host_contains("example.co") = example.co | 118 // -> host_contains("example.co") = example.co |
118 // followed by gurl.host().find("example.co"); | 119 // followed by gurl.host().find("example.co"); |
119 // | 120 // |
120 // [similarly for path_contains and query_contains]. | 121 // [similarly for path_contains and query_contains]. |
121 | 122 |
122 | 123 |
123 // | 124 // |
124 // URLMatcherCondition | 125 // URLMatcherCondition |
125 // | 126 // |
126 | 127 |
(...skipping 26 matching lines...) Expand all Loading... | |
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) | 154 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL) |
154 return *substring_pattern_ < *rhs.substring_pattern_; | 155 return *substring_pattern_ < *rhs.substring_pattern_; |
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; | 156 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true; |
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, | 157 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL, |
157 // or both are NULL. | 158 // or both are NULL. |
158 return false; | 159 return false; |
159 } | 160 } |
160 | 161 |
161 bool URLMatcherCondition::IsFullURLCondition() const { | 162 bool URLMatcherCondition::IsFullURLCondition() const { |
162 // For these criteria the SubstringMatcher needs to be executed on the | 163 // For these criteria the SubstringMatcher needs to be executed on the |
163 // GURL that is canonlizaliced with | 164 // GURL that is canonicalized with |
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. | 165 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. |
165 switch (criterion_) { | 166 switch (criterion_) { |
166 case HOST_CONTAINS: | 167 case HOST_CONTAINS: |
167 case PATH_CONTAINS: | 168 case PATH_CONTAINS: |
168 case QUERY_CONTAINS: | 169 case QUERY_CONTAINS: |
169 case URL_PREFIX: | 170 case URL_PREFIX: |
170 case URL_SUFFIX: | 171 case URL_SUFFIX: |
171 case URL_CONTAINS: | 172 case URL_CONTAINS: |
172 case URL_EQUALS: | 173 case URL_EQUALS: |
173 return true; | 174 return true; |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
307 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( | 308 URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( |
308 const std::string& host, | 309 const std::string& host, |
309 const std::string& path_prefix) { | 310 const std::string& path_prefix) { |
310 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, | 311 return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, |
311 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + | 312 kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + |
312 path_prefix); | 313 path_prefix); |
313 } | 314 } |
314 | 315 |
315 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( | 316 std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( |
316 const GURL& url) { | 317 const GURL& url) { |
317 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() + | 318 GURL::Replacements replacements; |
318 (url.has_query() ? "?" + url.query() : "") + kEndOfURL; | 319 replacements.ClearPassword(); |
320 replacements.ClearUsername(); | |
321 replacements.ClearRef(); | |
322 // Clear port if it is implicit from scheme. | |
323 if (url.has_port()) { | |
324 const std::string& port = url.scheme(); | |
325 if (url_canon::DefaultPortForScheme(port.c_str(), port.size()) == | |
326 url.EffectiveIntPort()) { | |
327 replacements.ClearPort(); | |
328 } | |
329 } | |
330 return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + | |
331 kEndOfURL; | |
319 } | 332 } |
320 | 333 |
321 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( | 334 URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( |
322 const std::string& prefix) { | 335 const std::string& prefix) { |
323 return CreateCondition(URLMatcherCondition::URL_PREFIX, | 336 return CreateCondition(URLMatcherCondition::URL_PREFIX, |
324 kBeginningOfURL + CanonicalizeHostname(prefix)); | 337 kBeginningOfURL + prefix); |
325 } | 338 } |
326 | 339 |
327 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( | 340 URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( |
328 const std::string& suffix) { | 341 const std::string& suffix) { |
329 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); | 342 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); |
330 } | 343 } |
331 | 344 |
332 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( | 345 URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( |
333 const std::string& str) { | 346 const std::string& str) { |
334 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); | 347 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); |
335 } | 348 } |
336 | 349 |
337 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( | 350 URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( |
338 const std::string& str) { | 351 const std::string& str) { |
339 return CreateCondition(URLMatcherCondition::URL_EQUALS, | 352 return CreateCondition(URLMatcherCondition::URL_EQUALS, |
340 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL); | 353 kBeginningOfURL + str + kEndOfURL); |
341 } | 354 } |
342 | 355 |
343 void URLMatcherConditionFactory::ForgetUnusedPatterns( | 356 void URLMatcherConditionFactory::ForgetUnusedPatterns( |
344 const std::set<SubstringPattern::ID>& used_patterns) { | 357 const std::set<SubstringPattern::ID>& used_patterns) { |
345 PatternSingletons::iterator i = pattern_singletons_.begin(); | 358 PatternSingletons::iterator i = pattern_singletons_.begin(); |
346 while (i != pattern_singletons_.end()) { | 359 while (i != pattern_singletons_.end()) { |
347 if (used_patterns.find((*i)->id()) != used_patterns.end()) { | 360 if (used_patterns.find((*i)->id()) != used_patterns.end()) { |
348 ++i; | 361 ++i; |
349 } else { | 362 } else { |
350 delete *i; | 363 delete *i; |
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
679 } | 692 } |
680 | 693 |
681 void URLMatcher::UpdateInternalDatastructures() { | 694 void URLMatcher::UpdateInternalDatastructures() { |
682 UpdateSubstringSetMatcher(false); | 695 UpdateSubstringSetMatcher(false); |
683 UpdateSubstringSetMatcher(true); | 696 UpdateSubstringSetMatcher(true); |
684 UpdateTriggers(); | 697 UpdateTriggers(); |
685 UpdateConditionFactory(); | 698 UpdateConditionFactory(); |
686 } | 699 } |
687 | 700 |
688 } // namespace extensions | 701 } // namespace extensions |
OLD | NEW |