Chromium Code Reviews| Index: chrome/browser/extensions/api/declarative/url_component_patterns.h |
| diff --git a/chrome/browser/extensions/api/declarative/url_component_patterns.h b/chrome/browser/extensions/api/declarative/url_component_patterns.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2e8fa3b9f68e376d502dfb20e68809911d8b25d9 |
| --- /dev/null |
| +++ b/chrome/browser/extensions/api/declarative/url_component_patterns.h |
| @@ -0,0 +1,95 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ |
| +#define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ |
| +#pragma once |
| + |
| +#include <string> |
| +#include <map> |
| + |
| +#include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" |
| + |
| +class GURL; |
| + |
| +namespace extensions { |
| + |
| +// Class to map the problem of finding {host, path, query} {prefixes, suffixes, |
| +// containments, and equality} in GURLs to the substring matching problem. |
| +// |
| +// Say, you want to check whether the path of a URL starts with "/index.html". |
| +// This class preprocesses a URL like "www.google.com/index.html" into something |
| +// like "www.google.com|/index.html". After preprocessing, you can search for |
| +// "|/index.html" in the string and see that this candidate URL actually has |
| +// a path that starts with "/index.html". On the contrary, |
| +// "www.google.com/images/images.html" would be normalized to |
| +// "www.google.com|/images/images.html". It is easy to see that it contains |
| +// "/index.html" but the path of the URL does not start with "/index.html". |
|
Matt Perry
2012/02/14 01:38:34
Typo: your example contains /images.html :)
battre
2012/02/14 19:32:21
Done.
|
| +// |
| +// This preprocessing is important if you want to match a URL against many |
| +// patterns because it reduces the matching to a "discover all substrings |
| +// of a dictionary in a text" problem, which can be solved very efficiently |
| +// by the Aho-Corasick algorithm. |
| +class UrlComponentPatterns { |
|
Matt Perry
2012/02/14 01:38:34
Chrome style is to capitalize acronyms in class/me
battre
2012/02/14 19:32:21
Done.
|
| + public: |
| + UrlComponentPatterns(); |
| + |
| + // Canonicalizes a URL for "Create{Host,Path,Query}*Pattern" searches. |
| + std::string CanonlicalizeURLForComponentSearches(const GURL& url); |
| + |
| + // These functions create singleton SubstringPatterns. Each pattern gets a |
| + // unique id, but if a function is called twice with the same parameter, it |
| + // returns the same pattern. |
| + // |
| + // There is no CreateHostContainsPattern query because this cannot be |
| + // mapped to the substring matching efficiently. Instead you have to use |
| + // a CreateURLContainsPattern query followed by a verification whether the |
| + // pattern actually occurs in url.host(). |
| + SubstringPattern CreateHostPrefixPattern(const std::string& prefix); |
| + SubstringPattern CreateHostSuffixPattern(const std::string& suffix); |
| + SubstringPattern CreateHostEqualsPattern(const std::string& str); |
| + |
| + SubstringPattern CreatePathPrefixPattern(const std::string& prefix); |
| + SubstringPattern CreatePathSuffixPattern(const std::string& suffix); |
| + SubstringPattern CreatePathEqualsPattern(const std::string& str); |
| + |
| + SubstringPattern CreateQueryPrefixPattern(const std::string& prefix); |
| + SubstringPattern CreateQuerySuffixPattern(const std::string& suffix); |
| + SubstringPattern CreateQueryEqualsPattern(const std::string& str); |
| + |
| + // This covers the common case, where you don't care whether a domain |
| + // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it |
| + // should be followed by a given |path_prefix|. |
| + SubstringPattern CreateHostSuffixPathPrefixPattern( |
| + const std::string& host_suffix, |
| + const std::string& path_prefix); |
| + |
| + // Canonicalizes a URL for "CreateURL*Pattern" searches. |
| + std::string CanonlicalizeURLForFullSearches(const GURL& url); |
| + |
| + SubstringPattern CreateURLPrefixPattern(const std::string& prefix); |
| + SubstringPattern CreateURLSuffixPattern(const std::string& suffix); |
| + SubstringPattern CreateURLContainsPattern(const std::string& str); |
| + SubstringPattern CreateURLEqualsPattern(const std::string& str); |
| + |
| + // Forgets about the existence of |pattern|. The owner must be really |
| + // sure that there exists no remaining reference to |pattern|. |
| + // This function should be called to prevent this class from leaking |
| + // memory. |
| + void DestroySingletonPattern(const SubstringPattern& pattern); |
| + void DestroySingletonPatterns(const std::vector<SubstringPattern>& patterns); |
| + |
| + private: |
| + SubstringPattern CreateSingletonPattern(const std::string& pattern); |
| + |
| + // Prepends a "." to the hostname if it does not start with one. |
| + std::string CanonicalizeHostname(const std::string hostname) const; |
|
Matt Perry
2012/02/14 01:38:34
missing ref on parameter
battre
2012/02/14 19:32:21
Done.
|
| + |
| + int id_counter_; |
| + std::map<std::string, SubstringPattern> pattern_singletons_; |
|
Matt Perry
2012/02/14 01:38:34
Please add a comment about what the key in this ma
battre
2012/02/14 19:32:21
Done.
|
| +}; |
| + |
| +} // namespace extensions |
| + |
| +#endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_COMPONENT_PATTERNS_H_ |