Chromium Code Reviews| Index: chrome/common/extensions/matcher/regex_set_matcher.cc |
| diff --git a/chrome/common/extensions/matcher/regex_set_matcher.cc b/chrome/common/extensions/matcher/regex_set_matcher.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8ae7aa4e4c34776c4c34aca2e94e065635bc3f48 |
| --- /dev/null |
| +++ b/chrome/common/extensions/matcher/regex_set_matcher.cc |
| @@ -0,0 +1,109 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "chrome/common/extensions/matcher/regex_set_matcher.h" |
| + |
| +#include "base/logging.h" |
| +#include "base/string_util.h" |
| +#include "base/stl_util.h" |
| +#include "chrome/common/extensions/matcher/substring_set_matcher.h" |
| +#include "third_party/re2/re2/filtered_re2.h" |
| +#include "third_party/re2/re2/re2.h" |
| + |
| +namespace extensions { |
| + |
| +RegexSetMatcher::RegexSetMatcher() {} |
| + |
| +RegexSetMatcher::~RegexSetMatcher() { |
| + DeleteSubstringPatterns(); |
| +} |
| + |
| +void RegexSetMatcher::AddPatterns( |
| + const std::vector<const StringPattern*>& regex_list) { |
| + if (regex_list.empty()) |
| + return; |
| + for (size_t i = 0; i < regex_list.size(); ++i) { |
| + regexes_[regex_list[i]->id()] = regex_list[i]; |
| + } |
| + |
| + RebuildMatcher(); |
| +} |
| + |
| +void RegexSetMatcher::ClearPatterns() { |
| + regexes_.clear(); |
| + RebuildMatcher(); |
| +} |
| + |
| +bool RegexSetMatcher::Match(const std::string& text, |
| + std::set<StringPattern::ID>* matches) const { |
| + size_t old_number_of_matches = matches->size(); |
| + if (regexes_.empty()) |
| + return false; |
| + if (!filtered_re2_.get()) { |
| + LOG(ERROR) << "RegexSetMatcher was not initialized"; |
| + return false; |
| + } |
| + |
| + // FilteredRE2 expects lowercase for prefiltering, but we still |
| + // match case-sensitively. |
| + std::vector<RE2ID> atoms(FindSubstringMatches( |
| + StringToLowerASCII(text))); |
| + |
| + std::vector<RE2ID> re2_ids; |
| + filtered_re2_->AllMatches(text, atoms, &re2_ids); |
| + |
| + std::set<StringPattern::ID> matched_ids; |
| + for (size_t i = 0; i < re2_ids.size(); ++i) { |
| + StringPattern::ID id = re2_id_map_[re2_ids[i]]; |
| + matches->insert(id); |
| + } |
| + return old_number_of_matches != matches->size(); |
| +} |
| + |
| +std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( |
| + const std::string& text) const { |
| + std::set<int> atoms_set; |
| + substring_matcher_->Match(text, &atoms_set); |
| + return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); |
| +} |
| + |
| +void RegexSetMatcher::RebuildMatcher() { |
| + re2_id_map_.clear(); |
| + filtered_re2_.reset(new re2::FilteredRE2()); |
| + if (regexes_.empty()) |
| + return; |
| + |
| + for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { |
| + RE2ID re2_id; |
| + RE2::ErrorCode error = filtered_re2_->Add( |
| + it->second->pattern(), RE2::DefaultOptions, &re2_id); |
| + if (error == RE2::NoError) { |
| + DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); |
| + re2_id_map_.push_back(it->first); |
| + } else { |
| + // TODO(yoz): Return an unparseable regex error as soon as possible. |
| + LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " |
| + << it->second->pattern() << ")"; |
|
battre
2012/09/12 18:04:50
I think it would be great if we could report such
Yoyo Zhou
2012/09/12 20:25:56
Yes, I looked into this and it seemed like it woul
battre
2012/09/12 20:50:39
SGTM
|
| + } |
| + } |
| + |
| + std::vector<std::string> strings_to_match; |
| + filtered_re2_->Compile(&strings_to_match); |
| + DeleteSubstringPatterns(); |
|
battre
2012/09/12 18:04:50
Here you delete objects, that substring_matcher_ s
Yoyo Zhou
2012/09/12 20:25:56
I just moved the substring_matcher_.reset line to
|
| + |
| + // Build SubstringSetMatcher from |strings_to_match|. |
| + // SubstringSetMatcher doesn't own its strings. |
| + for (size_t i = 0; i < strings_to_match.size(); ++i) { |
| + substring_patterns_.push_back( |
| + new StringPattern(strings_to_match[i], i)); |
| + } |
| + substring_matcher_.reset(new SubstringSetMatcher); |
| + substring_matcher_->RegisterPatterns(substring_patterns_); |
| +} |
| + |
| +void RegexSetMatcher::DeleteSubstringPatterns() { |
| + STLDeleteElements(&substring_patterns_); |
| +} |
| + |
| +} // namespace extensions |