Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: extensions/common/matcher/substring_set_matcher.h

Issue 15027008: Speed improvements to SubstringSetMatcher (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix errors detected by the win dbg trybot Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | extensions/common/matcher/substring_set_matcher.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
7 7
8 #include <limits>
8 #include <map> 9 #include <map>
9 #include <set> 10 #include <set>
10 #include <string> 11 #include <string>
11 #include <vector> 12 #include <vector>
12 13
13 #include "base/basictypes.h" 14 #include "base/basictypes.h"
14 #include "extensions/common/matcher/string_pattern.h" 15 #include "extensions/common/matcher/string_pattern.h"
15 16
16 namespace extensions { 17 namespace extensions {
17 18
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
73 // text[i + k] at depth k + 1, we follow a failure edge. This edge 74 // text[i + k] at depth k + 1, we follow a failure edge. This edge
74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that 75 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that
75 // is a prefix of any registered pattern. 76 // is a prefix of any registered pattern.
76 // 77 //
77 // If your brain thinks "Forget it, let's go shopping.", don't worry. 78 // If your brain thinks "Forget it, let's go shopping.", don't worry.
78 // Take a nap and read an introductory text on the Aho Corasick algorithm. 79 // Take a nap and read an introductory text on the Aho Corasick algorithm.
79 // It will make sense. Eventually. 80 // It will make sense. Eventually.
80 class AhoCorasickNode { 81 class AhoCorasickNode {
81 public: 82 public:
82 // Key: label of the edge, value: node index in |tree_| of parent class. 83 // Key: label of the edge, value: node index in |tree_| of parent class.
83 typedef std::map<char, int> Edges; 84 typedef std::map<char, uint32> Edges;
84 typedef std::set<StringPattern::ID> Matches; 85 typedef std::set<StringPattern::ID> Matches;
85 86
87 static const uint32 kNoSuchEdge; // Represents an invalid node index.
88
86 AhoCorasickNode(); 89 AhoCorasickNode();
87 ~AhoCorasickNode(); 90 ~AhoCorasickNode();
88 AhoCorasickNode(const AhoCorasickNode& other); 91 AhoCorasickNode(const AhoCorasickNode& other);
89 AhoCorasickNode& operator=(const AhoCorasickNode& other); 92 AhoCorasickNode& operator=(const AhoCorasickNode& other);
90 93
91 bool HasEdge(char c) const; 94 uint32 GetEdge(char c) const;
92 int GetEdge(char c) const; 95 void SetEdge(char c, uint32 node);
93 void SetEdge(char c, int node);
94 const Edges& edges() const { return edges_; } 96 const Edges& edges() const { return edges_; }
95 97
96 int failure() const { return failure_; } 98 uint32 failure() const { return failure_; }
97 void set_failure(int failure) { failure_ = failure; } 99 void set_failure(uint32 failure) { failure_ = failure; }
98 100
99 void AddMatch(StringPattern::ID id); 101 void AddMatch(StringPattern::ID id);
100 void AddMatches(const Matches& matches); 102 void AddMatches(const Matches& matches);
101 const Matches& matches() const { return matches_; } 103 const Matches& matches() const { return matches_; }
102 104
103 private: 105 private:
104 // Outgoing edges of current node. 106 // Outgoing edges of current node.
105 Edges edges_; 107 Edges edges_;
106 108
107 // Node index that failure edge leads to. 109 // Node index that failure edge leads to.
108 int failure_; 110 uint32 failure_;
109 111
110 // Identifiers of matches. 112 // Identifiers of matches.
111 Matches matches_; 113 Matches matches_;
112 }; 114 };
113 115
114 void RebuildAhoCorasickTree(); 116 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap;
117 typedef std::vector<const StringPattern*> SubstringPatternVector;
118
119 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string.
120 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns);
115 121
116 // Inserts a path for |pattern->pattern()| into the tree and adds 122 // Inserts a path for |pattern->pattern()| into the tree and adds
117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with 123 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with
118 // the caller. 124 // the caller.
119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); 125 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern);
120 void CreateFailureEdges(); 126 void CreateFailureEdges();
121 127
122 // Set of all registered StringPatterns. Used to regenerate the 128 // Set of all registered StringPatterns. Used to regenerate the
123 // Aho-Corasick tree in case patterns are registered or unregistered. 129 // Aho-Corasick tree in case patterns are registered or unregistered.
124 typedef std::map<StringPattern::ID, const StringPattern*> 130 SubstringPatternMap patterns_;
125 SubstringPatternSet;
126 SubstringPatternSet patterns_;
127 131
128 // The nodes of a Aho-Corasick tree. 132 // The nodes of a Aho-Corasick tree.
129 std::vector<AhoCorasickNode> tree_; 133 std::vector<AhoCorasickNode> tree_;
130 134
131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); 135 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher);
132 }; 136 };
133 137
134 } // namespace extensions 138 } // namespace extensions
135 139
136 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 140 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
OLDNEW
« no previous file with comments | « no previous file | extensions/common/matcher/substring_set_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698