Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1645)

Unified Diff: chrome/common/extensions/matcher/substring_set_matcher.h

Issue 12092096: Move c/c/extensions/matcher/ to top level extension dir (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: TOT Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/common/extensions/matcher/substring_set_matcher.h
diff --git a/chrome/common/extensions/matcher/substring_set_matcher.h b/chrome/common/extensions/matcher/substring_set_matcher.h
deleted file mode 100644
index 0b4bbc352b56ff640589f794b1d785f44f4b4fdc..0000000000000000000000000000000000000000
--- a/chrome/common/extensions/matcher/substring_set_matcher.h
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
-#define CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/basictypes.h"
-#include "chrome/common/extensions/matcher/string_pattern.h"
-
-namespace extensions {
-
-// Class that store a set of string patterns and can find for a string S,
-// which string patterns occur in S.
-class SubstringSetMatcher {
- public:
- SubstringSetMatcher();
- ~SubstringSetMatcher();
-
- // Registers all |patterns|. The ownership remains with the caller.
- // The same pattern cannot be registered twice and each pattern needs to have
- // a unique ID.
- // Ownership of the patterns remains with the caller.
- void RegisterPatterns(const std::vector<const StringPattern*>& patterns);
-
- // Unregisters the passed |patterns|.
- void UnregisterPatterns(const std::vector<const StringPattern*>& patterns);
-
- // Analogous to RegisterPatterns and UnregisterPatterns but executes both
- // operations in one step, which is cheaper in the execution.
- void RegisterAndUnregisterPatterns(
- const std::vector<const StringPattern*>& to_register,
- const std::vector<const StringPattern*>& to_unregister);
-
- // Matches |text| against all registered StringPatterns. Stores the IDs
- // of matching patterns in |matches|. |matches| is not cleared before adding
- // to it.
- bool Match(const std::string& text,
- std::set<StringPattern::ID>* matches) const;
-
- // Returns true if this object retains no allocated data. Only for debugging.
- bool IsEmpty() const;
-
- private:
- // A node of an Aho Corasick Tree. This is implemented according to
- // http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf
- //
- // The algorithm is based on the idea of building a trie of all registered
- // patterns. Each node of the tree is annotated with a set of pattern
- // IDs that are used to report matches.
- //
- // The root of the trie represents an empty match. If we were looking whether
- // any registered pattern matches a text at the beginning of the text (i.e.
- // whether any pattern is a prefix of the text), we could just follow
- // nodes in the trie according to the matching characters in the text.
- // E.g., if text == "foobar", we would follow the trie from the root node
- // to its child labeled 'f', from there to child 'o', etc. In this process we
- // would report all pattern IDs associated with the trie nodes as matches.
- //
- // As we are not looking for all prefix matches but all substring matches,
- // this algorithm would need to compare text.substr(0), text.substr(1), ...
- // against the trie, which is in O(|text|^2).
- //
- // The Aho Corasick algorithm improves this runtime by using failure edges.
- // In case we have found a partial match of length k in the text
- // (text[i, ..., i + k - 1]) in the trie starting at the root and ending at
- // a node at depth k, but cannot find a match in the trie for character
- // text[i + k] at depth k + 1, we follow a failure edge. This edge
- // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that
- // is a prefix of any registered pattern.
- //
- // If your brain thinks "Forget it, let's go shopping.", don't worry.
- // Take a nap and read an introductory text on the Aho Corasick algorithm.
- // It will make sense. Eventually.
- class AhoCorasickNode {
- public:
- // Key: label of the edge, value: node index in |tree_| of parent class.
- typedef std::map<char, int> Edges;
- typedef std::set<StringPattern::ID> Matches;
-
- AhoCorasickNode();
- ~AhoCorasickNode();
- AhoCorasickNode(const AhoCorasickNode& other);
- AhoCorasickNode& operator=(const AhoCorasickNode& other);
-
- bool HasEdge(char c) const;
- int GetEdge(char c) const;
- void SetEdge(char c, int node);
- const Edges& edges() const { return edges_; }
-
- int failure() const { return failure_; }
- void set_failure(int failure) { failure_ = failure; }
-
- void AddMatch(StringPattern::ID id);
- void AddMatches(const Matches& matches);
- const Matches& matches() const { return matches_; }
-
- private:
- // Outgoing edges of current node.
- Edges edges_;
-
- // Node index that failure edge leads to.
- int failure_;
-
- // Identifiers of matches.
- Matches matches_;
- };
-
- void RebuildAhoCorasickTree();
-
- // Inserts a path for |pattern->pattern()| into the tree and adds
- // |pattern->id()| to the set of matches. Ownership of |pattern| remains with
- // the caller.
- void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern);
- void CreateFailureEdges();
-
- // Set of all registered StringPatterns. Used to regenerate the
- // Aho-Corasick tree in case patterns are registered or unregistered.
- typedef std::map<StringPattern::ID, const StringPattern*>
- SubstringPatternSet;
- SubstringPatternSet patterns_;
-
- // The nodes of a Aho-Corasick tree.
- std::vector<AhoCorasickNode> tree_;
-
- DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher);
-};
-
-} // namespace extensions
-
-#endif // CHROME_COMMON_EXTENSIONS_MATCHER_SUBSTRING_SET_MATCHER_H_
« no previous file with comments | « chrome/common/extensions/matcher/string_pattern_unittest.cc ('k') | chrome/common/extensions/matcher/substring_set_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698