OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | |
6 | |
7 #include <queue> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/stl_util.h" | |
11 | |
12 namespace extensions { | |
13 | |
14 // | |
15 // SubstringSetMatcher | |
16 // | |
17 | |
18 SubstringSetMatcher::SubstringSetMatcher() { | |
19 RebuildAhoCorasickTree(); | |
20 } | |
21 | |
22 SubstringSetMatcher::~SubstringSetMatcher() {} | |
23 | |
24 void SubstringSetMatcher::RegisterPatterns( | |
25 const std::vector<const StringPattern*>& patterns) { | |
26 RegisterAndUnregisterPatterns(patterns, | |
27 std::vector<const StringPattern*>()); | |
28 } | |
29 | |
30 void SubstringSetMatcher::UnregisterPatterns( | |
31 const std::vector<const StringPattern*>& patterns) { | |
32 RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(), | |
33 patterns); | |
34 } | |
35 | |
36 void SubstringSetMatcher::RegisterAndUnregisterPatterns( | |
37 const std::vector<const StringPattern*>& to_register, | |
38 const std::vector<const StringPattern*>& to_unregister) { | |
39 // Register patterns. | |
40 for (std::vector<const StringPattern*>::const_iterator i = | |
41 to_register.begin(); i != to_register.end(); ++i) { | |
42 DCHECK(patterns_.find((*i)->id()) == patterns_.end()); | |
43 patterns_[(*i)->id()] = *i; | |
44 } | |
45 | |
46 // Unregister patterns | |
47 for (std::vector<const StringPattern*>::const_iterator i = | |
48 to_unregister.begin(); i != to_unregister.end(); ++i) { | |
49 patterns_.erase((*i)->id()); | |
50 } | |
51 | |
52 RebuildAhoCorasickTree(); | |
53 } | |
54 | |
55 bool SubstringSetMatcher::Match(const std::string& text, | |
56 std::set<StringPattern::ID>* matches) const { | |
57 size_t old_number_of_matches = matches->size(); | |
58 | |
59 // Handle patterns matching the empty string. | |
60 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); | |
61 | |
62 int current_node = 0; | |
63 size_t text_length = text.length(); | |
64 for (size_t i = 0; i < text_length; ++i) { | |
65 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0) | |
66 current_node = tree_[current_node].failure(); | |
67 if (tree_[current_node].HasEdge(text[i])) { | |
68 current_node = tree_[current_node].GetEdge(text[i]); | |
69 matches->insert(tree_[current_node].matches().begin(), | |
70 tree_[current_node].matches().end()); | |
71 } else { | |
72 DCHECK_EQ(0, current_node); | |
73 } | |
74 } | |
75 | |
76 return old_number_of_matches != matches->size(); | |
77 } | |
78 | |
79 bool SubstringSetMatcher::IsEmpty() const { | |
80 // An empty tree consists of only the root node. | |
81 return patterns_.empty() && tree_.size() == 1u; | |
82 } | |
83 | |
84 void SubstringSetMatcher::RebuildAhoCorasickTree() { | |
85 tree_.clear(); | |
86 | |
87 // Initialize root note of tree. | |
88 AhoCorasickNode root; | |
89 root.set_failure(0); | |
90 tree_.push_back(root); | |
91 | |
92 // Insert all patterns. | |
93 for (SubstringPatternSet::const_iterator i = patterns_.begin(); | |
94 i != patterns_.end(); ++i) { | |
95 InsertPatternIntoAhoCorasickTree(i->second); | |
96 } | |
97 | |
98 CreateFailureEdges(); | |
99 } | |
100 | |
101 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( | |
102 const StringPattern* pattern) { | |
103 const std::string& text = pattern->pattern(); | |
104 size_t text_length = text.length(); | |
105 | |
106 // Iterators on the tree and the text. | |
107 int current_node = 0; | |
108 size_t text_pos = 0; | |
109 | |
110 // Follow existing paths for as long as possible. | |
111 while (text_pos < text_length && | |
112 tree_[current_node].HasEdge(text[text_pos])) { | |
113 current_node = tree_[current_node].GetEdge(text[text_pos]); | |
114 ++text_pos; | |
115 } | |
116 | |
117 // Create new nodes if necessary. | |
118 while (text_pos < text_length) { | |
119 AhoCorasickNode new_node; | |
120 tree_.push_back(new_node); | |
121 tree_[current_node].SetEdge(text[text_pos], tree_.size() - 1); | |
122 current_node = tree_.size() - 1; | |
123 ++text_pos; | |
124 } | |
125 | |
126 // Register match. | |
127 tree_[current_node].AddMatch(pattern->id()); | |
128 } | |
129 | |
130 void SubstringSetMatcher::CreateFailureEdges() { | |
131 typedef AhoCorasickNode::Edges Edges; | |
132 | |
133 std::queue<int> queue; | |
134 | |
135 AhoCorasickNode& root = tree_[0]; | |
136 root.set_failure(0); | |
137 const AhoCorasickNode::Edges& root_edges = root.edges(); | |
138 for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end(); | |
139 ++e) { | |
140 int leads_to = e->second; | |
141 tree_[leads_to].set_failure(0); | |
142 queue.push(leads_to); | |
143 } | |
144 | |
145 while (!queue.empty()) { | |
146 AhoCorasickNode& current_node = tree_[queue.front()]; | |
147 queue.pop(); | |
148 for (Edges::const_iterator e = current_node.edges().begin(); | |
149 e != current_node.edges().end(); ++e) { | |
150 char edge_label = e->first; | |
151 int leads_to = e->second; | |
152 queue.push(leads_to); | |
153 | |
154 int failure = current_node.failure(); | |
155 while (!tree_[failure].HasEdge(edge_label) && failure != 0) | |
156 failure = tree_[failure].failure(); | |
157 | |
158 int follow_in_case_of_failure = tree_[failure].HasEdge(e->first) ? | |
159 tree_[failure].GetEdge(edge_label) : 0; | |
160 tree_[leads_to].set_failure(follow_in_case_of_failure); | |
161 tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches()); | |
162 } | |
163 } | |
164 } | |
165 | |
166 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode() | |
167 : failure_(-1) {} | |
168 | |
169 SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {} | |
170 | |
171 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode( | |
172 const SubstringSetMatcher::AhoCorasickNode& other) | |
173 : edges_(other.edges_), | |
174 failure_(other.failure_), | |
175 matches_(other.matches_) {} | |
176 | |
177 SubstringSetMatcher::AhoCorasickNode& | |
178 SubstringSetMatcher::AhoCorasickNode::operator=( | |
179 const SubstringSetMatcher::AhoCorasickNode& other) { | |
180 edges_ = other.edges_; | |
181 failure_ = other.failure_; | |
182 matches_ = other.matches_; | |
183 return *this; | |
184 } | |
185 | |
186 bool SubstringSetMatcher::AhoCorasickNode::HasEdge(char c) const { | |
187 return edges_.find(c) != edges_.end(); | |
188 } | |
189 | |
190 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { | |
191 std::map<char, int>::const_iterator i = edges_.find(c); | |
192 return i == edges_.end() ? -1 : i->second; | |
193 } | |
194 | |
195 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) { | |
196 edges_[c] = node; | |
197 } | |
198 | |
199 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) { | |
200 matches_.insert(id); | |
201 } | |
202 | |
203 void SubstringSetMatcher::AhoCorasickNode::AddMatches( | |
204 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { | |
205 matches_.insert(matches.begin(), matches.end()); | |
206 } | |
207 | |
208 } // namespace extensions | |
OLD | NEW |