OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/common/extensions/matcher/regex_set_matcher.h" | |
6 | |
7 #include "base/logging.h" | |
8 #include "base/string_util.h" | |
9 #include "base/stl_util.h" | |
10 #include "chrome/common/extensions/matcher/substring_set_matcher.h" | |
11 #include "third_party/re2/re2/filtered_re2.h" | |
12 #include "third_party/re2/re2/re2.h" | |
13 | |
14 namespace extensions { | |
15 | |
16 RegexSetMatcher::RegexSetMatcher() {} | |
17 | |
18 RegexSetMatcher::~RegexSetMatcher() { | |
19 DeleteSubstringPatterns(); | |
20 } | |
21 | |
22 void RegexSetMatcher::AddPatterns( | |
23 const std::vector<const StringPattern*>& regex_list) { | |
24 if (regex_list.empty()) | |
25 return; | |
26 for (size_t i = 0; i < regex_list.size(); ++i) { | |
27 regexes_[regex_list[i]->id()] = regex_list[i]; | |
28 } | |
29 | |
30 RebuildMatcher(); | |
31 } | |
32 | |
33 void RegexSetMatcher::ClearPatterns() { | |
34 regexes_.clear(); | |
35 RebuildMatcher(); | |
36 } | |
37 | |
38 bool RegexSetMatcher::Match(const std::string& text, | |
39 std::set<StringPattern::ID>* matches) const { | |
40 size_t old_number_of_matches = matches->size(); | |
41 if (regexes_.empty()) | |
42 return false; | |
43 if (!filtered_re2_.get()) { | |
44 LOG(ERROR) << "RegexSetMatcher was not initialized"; | |
45 return false; | |
46 } | |
47 | |
48 // FilteredRE2 expects lowercase for prefiltering, but we still | |
49 // match case-sensitively. | |
50 std::vector<RE2ID> atoms(FindSubstringMatches( | |
51 StringToLowerASCII(text))); | |
52 | |
53 std::vector<RE2ID> re2_ids; | |
54 filtered_re2_->AllMatches(text, atoms, &re2_ids); | |
55 | |
56 std::set<StringPattern::ID> matched_ids; | |
57 for (size_t i = 0; i < re2_ids.size(); ++i) { | |
58 StringPattern::ID id = re2_id_map_[re2_ids[i]]; | |
59 matches->insert(id); | |
60 } | |
61 return old_number_of_matches != matches->size(); | |
62 } | |
63 | |
64 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( | |
65 const std::string& text) const { | |
66 std::set<int> atoms_set; | |
67 substring_matcher_->Match(text, &atoms_set); | |
68 return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); | |
69 } | |
70 | |
71 void RegexSetMatcher::RebuildMatcher() { | |
72 re2_id_map_.clear(); | |
73 filtered_re2_.reset(new re2::FilteredRE2()); | |
74 if (regexes_.empty()) | |
75 return; | |
76 | |
77 for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { | |
78 RE2ID re2_id; | |
79 RE2::ErrorCode error = filtered_re2_->Add( | |
80 it->second->pattern(), RE2::DefaultOptions, &re2_id); | |
81 if (error == RE2::NoError) { | |
82 DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); | |
83 re2_id_map_.push_back(it->first); | |
84 } else { | |
85 // TODO(yoz): Return an unparseable regex error as soon as possible. | |
86 LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " | |
87 << it->second->pattern() << ")"; | |
battre
2012/09/12 18:04:50
I think it would be great if we could report such
Yoyo Zhou
2012/09/12 20:25:56
Yes, I looked into this and it seemed like it woul
battre
2012/09/12 20:50:39
SGTM
| |
88 } | |
89 } | |
90 | |
91 std::vector<std::string> strings_to_match; | |
92 filtered_re2_->Compile(&strings_to_match); | |
93 DeleteSubstringPatterns(); | |
battre
2012/09/12 18:04:50
Here you delete objects, that substring_matcher_ s
Yoyo Zhou
2012/09/12 20:25:56
I just moved the substring_matcher_.reset line to
| |
94 | |
95 // Build SubstringSetMatcher from |strings_to_match|. | |
96 // SubstringSetMatcher doesn't own its strings. | |
97 for (size_t i = 0; i < strings_to_match.size(); ++i) { | |
98 substring_patterns_.push_back( | |
99 new StringPattern(strings_to_match[i], i)); | |
100 } | |
101 substring_matcher_.reset(new SubstringSetMatcher); | |
102 substring_matcher_->RegisterPatterns(substring_patterns_); | |
103 } | |
104 | |
105 void RegexSetMatcher::DeleteSubstringPatterns() { | |
106 STLDeleteElements(&substring_patterns_); | |
107 } | |
108 | |
109 } // namespace extensions | |
OLD | NEW |