Index: chrome/common/extensions/matcher/substring_set_matcher.h |
diff --git a/chrome/common/extensions/matcher/substring_set_matcher.h b/chrome/common/extensions/matcher/substring_set_matcher.h |
deleted file mode 100644 |
index 0b4bbc352b56ff640589f794b1d785f44f4b4fdc..0000000000000000000000000000000000000000 |
--- a/chrome/common/extensions/matcher/substring_set_matcher.h |
+++ /dev/null |
@@ -1,136 +0,0 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |
-#define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |
- |
-#include <map> |
-#include <set> |
-#include <string> |
-#include <vector> |
- |
-#include "base/basictypes.h" |
-#include "chrome/common/extensions/matcher/string_pattern.h" |
- |
-namespace extensions { |
- |
-// Class that store a set of string patterns and can find for a string S, |
-// which string patterns occur in S. |
-class SubstringSetMatcher { |
- public: |
- SubstringSetMatcher(); |
- ~SubstringSetMatcher(); |
- |
- // Registers all |patterns|. The ownership remains with the caller. |
- // The same pattern cannot be registered twice and each pattern needs to have |
- // a unique ID. |
- // Ownership of the patterns remains with the caller. |
- void RegisterPatterns(const std::vector<const StringPattern*>& patterns); |
- |
- // Unregisters the passed |patterns|. |
- void UnregisterPatterns(const std::vector<const StringPattern*>& patterns); |
- |
- // Analogous to RegisterPatterns and UnregisterPatterns but executes both |
- // operations in one step, which is cheaper in the execution. |
- void RegisterAndUnregisterPatterns( |
- const std::vector<const StringPattern*>& to_register, |
- const std::vector<const StringPattern*>& to_unregister); |
- |
- // Matches |text| against all registered StringPatterns. Stores the IDs |
- // of matching patterns in |matches|. |matches| is not cleared before adding |
- // to it. |
- bool Match(const std::string& text, |
- std::set<StringPattern::ID>* matches) const; |
- |
- // Returns true if this object retains no allocated data. Only for debugging. |
- bool IsEmpty() const; |
- |
- private: |
- // A node of an Aho Corasick Tree. This is implemented according to |
- // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf |
- // |
- // The algorithm is based on the idea of building a trie of all registered |
- // patterns. Each node of the tree is annotated with a set of pattern |
- // IDs that are used to report matches. |
- // |
- // The root of the trie represents an empty match. If we were looking whether |
- // any registered pattern matches a text at the beginning of the text (i.e. |
- // whether any pattern is a prefix of the text), we could just follow |
- // nodes in the trie according to the matching characters in the text. |
- // E.g., if text == "foobar", we would follow the trie from the root node |
- // to its child labeled 'f', from there to child 'o', etc. In this process we |
- // would report all pattern IDs associated with the trie nodes as matches. |
- // |
- // As we are not looking for all prefix matches but all substring matches, |
- // this algorithm would need to compare text.substr(0), text.substr(1), ... |
- // against the trie, which is in O(|text|^2). |
- // |
- // The Aho Corasick algorithm improves this runtime by using failure edges. |
- // In case we have found a partial match of length k in the text |
- // (text[i, ..., i + k - 1]) in the trie starting at the root and ending at |
- // a node at depth k, but cannot find a match in the trie for character |
- // text[i + k] at depth k + 1, we follow a failure edge. This edge |
- // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that |
- // is a prefix of any registered pattern. |
- // |
- // If your brain thinks "Forget it, let's go shopping.", don't worry. |
- // Take a nap and read an introductory text on the Aho Corasick algorithm. |
- // It will make sense. Eventually. |
- class AhoCorasickNode { |
- public: |
- // Key: label of the edge, value: node index in |tree_| of parent class. |
- typedef std::map<char, int> Edges; |
- typedef std::set<StringPattern::ID> Matches; |
- |
- AhoCorasickNode(); |
- ~AhoCorasickNode(); |
- AhoCorasickNode(const AhoCorasickNode& other); |
- AhoCorasickNode& operator=(const AhoCorasickNode& other); |
- |
- bool HasEdge(char c) const; |
- int GetEdge(char c) const; |
- void SetEdge(char c, int node); |
- const Edges& edges() const { return edges_; } |
- |
- int failure() const { return failure_; } |
- void set_failure(int failure) { failure_ = failure; } |
- |
- void AddMatch(StringPattern::ID id); |
- void AddMatches(const Matches& matches); |
- const Matches& matches() const { return matches_; } |
- |
- private: |
- // Outgoing edges of current node. |
- Edges edges_; |
- |
- // Node index that failure edge leads to. |
- int failure_; |
- |
- // Identifiers of matches. |
- Matches matches_; |
- }; |
- |
- void RebuildAhoCorasickTree(); |
- |
- // Inserts a path for |pattern->pattern()| into the tree and adds |
- // |pattern->id()| to the set of matches. Ownership of |pattern| remains with |
- // the caller. |
- void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); |
- void CreateFailureEdges(); |
- |
- // Set of all registered StringPatterns. Used to regenerate the |
- // Aho-Corasick tree in case patterns are registered or unregistered. |
- typedef std::map<StringPattern::ID, const StringPattern*> |
- SubstringPatternSet; |
- SubstringPatternSet patterns_; |
- |
- // The nodes of a Aho-Corasick tree. |
- std::vector<AhoCorasickNode> tree_; |
- |
- DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); |
-}; |
- |
-} // namespace extensions |
- |
-#endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_ |