Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(437)

Side by Side Diff: chrome/common/extensions/matcher/substring_set_matcher.cc

Issue 12092096: Move c/c/extensions/matcher/ to top level extension dir (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: TOT Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/common/extensions/matcher/substring_set_matcher.h"
6
7 #include <queue>
8
9 #include "base/logging.h"
10 #include "base/stl_util.h"
11
12 namespace extensions {
13
14 //
15 // SubstringSetMatcher
16 //
17
18 SubstringSetMatcher::SubstringSetMatcher() {
19 RebuildAhoCorasickTree();
20 }
21
22 SubstringSetMatcher::~SubstringSetMatcher() {}
23
24 void SubstringSetMatcher::RegisterPatterns(
25 const std::vector<const StringPattern*>& patterns) {
26 RegisterAndUnregisterPatterns(patterns,
27 std::vector<const StringPattern*>());
28 }
29
30 void SubstringSetMatcher::UnregisterPatterns(
31 const std::vector<const StringPattern*>& patterns) {
32 RegisterAndUnregisterPatterns(std::vector<const StringPattern*>(),
33 patterns);
34 }
35
36 void SubstringSetMatcher::RegisterAndUnregisterPatterns(
37 const std::vector<const StringPattern*>& to_register,
38 const std::vector<const StringPattern*>& to_unregister) {
39 // Register patterns.
40 for (std::vector<const StringPattern*>::const_iterator i =
41 to_register.begin(); i != to_register.end(); ++i) {
42 DCHECK(patterns_.find((*i)->id()) == patterns_.end());
43 patterns_[(*i)->id()] = *i;
44 }
45
46 // Unregister patterns
47 for (std::vector<const StringPattern*>::const_iterator i =
48 to_unregister.begin(); i != to_unregister.end(); ++i) {
49 patterns_.erase((*i)->id());
50 }
51
52 RebuildAhoCorasickTree();
53 }
54
55 bool SubstringSetMatcher::Match(const std::string& text,
56 std::set<StringPattern::ID>* matches) const {
57 size_t old_number_of_matches = matches->size();
58
59 // Handle patterns matching the empty string.
60 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end());
61
62 int current_node = 0;
63 size_t text_length = text.length();
64 for (size_t i = 0; i < text_length; ++i) {
65 while (!tree_[current_node].HasEdge(text[i]) && current_node != 0)
66 current_node = tree_[current_node].failure();
67 if (tree_[current_node].HasEdge(text[i])) {
68 current_node = tree_[current_node].GetEdge(text[i]);
69 matches->insert(tree_[current_node].matches().begin(),
70 tree_[current_node].matches().end());
71 } else {
72 DCHECK_EQ(0, current_node);
73 }
74 }
75
76 return old_number_of_matches != matches->size();
77 }
78
79 bool SubstringSetMatcher::IsEmpty() const {
80 // An empty tree consists of only the root node.
81 return patterns_.empty() && tree_.size() == 1u;
82 }
83
84 void SubstringSetMatcher::RebuildAhoCorasickTree() {
85 tree_.clear();
86
87 // Initialize root note of tree.
88 AhoCorasickNode root;
89 root.set_failure(0);
90 tree_.push_back(root);
91
92 // Insert all patterns.
93 for (SubstringPatternSet::const_iterator i = patterns_.begin();
94 i != patterns_.end(); ++i) {
95 InsertPatternIntoAhoCorasickTree(i->second);
96 }
97
98 CreateFailureEdges();
99 }
100
101 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree(
102 const StringPattern* pattern) {
103 const std::string& text = pattern->pattern();
104 size_t text_length = text.length();
105
106 // Iterators on the tree and the text.
107 int current_node = 0;
108 size_t text_pos = 0;
109
110 // Follow existing paths for as long as possible.
111 while (text_pos < text_length &&
112 tree_[current_node].HasEdge(text[text_pos])) {
113 current_node = tree_[current_node].GetEdge(text[text_pos]);
114 ++text_pos;
115 }
116
117 // Create new nodes if necessary.
118 while (text_pos < text_length) {
119 AhoCorasickNode new_node;
120 tree_.push_back(new_node);
121 tree_[current_node].SetEdge(text[text_pos], tree_.size() - 1);
122 current_node = tree_.size() - 1;
123 ++text_pos;
124 }
125
126 // Register match.
127 tree_[current_node].AddMatch(pattern->id());
128 }
129
130 void SubstringSetMatcher::CreateFailureEdges() {
131 typedef AhoCorasickNode::Edges Edges;
132
133 std::queue<int> queue;
134
135 AhoCorasickNode& root = tree_[0];
136 root.set_failure(0);
137 const AhoCorasickNode::Edges& root_edges = root.edges();
138 for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end();
139 ++e) {
140 int leads_to = e->second;
141 tree_[leads_to].set_failure(0);
142 queue.push(leads_to);
143 }
144
145 while (!queue.empty()) {
146 AhoCorasickNode& current_node = tree_[queue.front()];
147 queue.pop();
148 for (Edges::const_iterator e = current_node.edges().begin();
149 e != current_node.edges().end(); ++e) {
150 char edge_label = e->first;
151 int leads_to = e->second;
152 queue.push(leads_to);
153
154 int failure = current_node.failure();
155 while (!tree_[failure].HasEdge(edge_label) && failure != 0)
156 failure = tree_[failure].failure();
157
158 int follow_in_case_of_failure = tree_[failure].HasEdge(e->first) ?
159 tree_[failure].GetEdge(edge_label) : 0;
160 tree_[leads_to].set_failure(follow_in_case_of_failure);
161 tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches());
162 }
163 }
164 }
165
166 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode()
167 : failure_(-1) {}
168
169 SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {}
170
171 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode(
172 const SubstringSetMatcher::AhoCorasickNode& other)
173 : edges_(other.edges_),
174 failure_(other.failure_),
175 matches_(other.matches_) {}
176
177 SubstringSetMatcher::AhoCorasickNode&
178 SubstringSetMatcher::AhoCorasickNode::operator=(
179 const SubstringSetMatcher::AhoCorasickNode& other) {
180 edges_ = other.edges_;
181 failure_ = other.failure_;
182 matches_ = other.matches_;
183 return *this;
184 }
185
186 bool SubstringSetMatcher::AhoCorasickNode::HasEdge(char c) const {
187 return edges_.find(c) != edges_.end();
188 }
189
190 int SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const {
191 std::map<char, int>::const_iterator i = edges_.find(c);
192 return i == edges_.end() ? -1 : i->second;
193 }
194
195 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, int node) {
196 edges_[c] = node;
197 }
198
199 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) {
200 matches_.insert(id);
201 }
202
203 void SubstringSetMatcher::AhoCorasickNode::AddMatches(
204 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) {
205 matches_.insert(matches.begin(), matches.end());
206 }
207
208 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698