| Index: chrome/common/extensions/matcher/regex_set_matcher.cc
|
| diff --git a/chrome/common/extensions/matcher/regex_set_matcher.cc b/chrome/common/extensions/matcher/regex_set_matcher.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..455bece0bd7c662ceb1a9683e3471f95fe362d36
|
| --- /dev/null
|
| +++ b/chrome/common/extensions/matcher/regex_set_matcher.cc
|
| @@ -0,0 +1,109 @@
|
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "chrome/common/extensions/matcher/regex_set_matcher.h"
|
| +
|
| +#include "base/logging.h"
|
| +#include "base/string_util.h"
|
| +#include "base/stl_util.h"
|
| +#include "chrome/common/extensions/matcher/substring_set_matcher.h"
|
| +#include "third_party/re2/re2/filtered_re2.h"
|
| +#include "third_party/re2/re2/re2.h"
|
| +
|
| +namespace extensions {
|
| +
|
| +RegexSetMatcher::RegexSetMatcher() {}
|
| +
|
| +RegexSetMatcher::~RegexSetMatcher() {
|
| + DeleteSubstringPatterns();
|
| +}
|
| +
|
| +void RegexSetMatcher::AddPatterns(
|
| + const std::vector<const StringPattern*>& regex_list) {
|
| + if (regex_list.empty())
|
| + return;
|
| + for (size_t i = 0; i < regex_list.size(); ++i) {
|
| + regexes_[regex_list[i]->id()] = regex_list[i];
|
| + }
|
| +
|
| + RebuildMatcher();
|
| +}
|
| +
|
| +void RegexSetMatcher::ClearPatterns() {
|
| + regexes_.clear();
|
| + RebuildMatcher();
|
| +}
|
| +
|
| +bool RegexSetMatcher::Match(const std::string& text,
|
| + std::set<StringPattern::ID>* matches) const {
|
| + size_t old_number_of_matches = matches->size();
|
| + if (regexes_.empty())
|
| + return false;
|
| + if (!filtered_re2_.get()) {
|
| + LOG(ERROR) << "RegexSetMatcher was not initialized";
|
| + return false;
|
| + }
|
| +
|
| + // FilteredRE2 expects lowercase for prefiltering, but we still
|
| + // match case-sensitively.
|
| + std::vector<RE2ID> atoms(FindSubstringMatches(
|
| + StringToLowerASCII(text)));
|
| +
|
| + std::vector<RE2ID> re2_ids;
|
| + filtered_re2_->AllMatches(text, atoms, &re2_ids);
|
| +
|
| + std::set<StringPattern::ID> matched_ids;
|
| + for (size_t i = 0; i < re2_ids.size(); ++i) {
|
| + StringPattern::ID id = re2_id_map_[re2_ids[i]];
|
| + matches->insert(id);
|
| + }
|
| + return old_number_of_matches != matches->size();
|
| +}
|
| +
|
| +std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
|
| + const std::string& text) const {
|
| + std::set<int> atoms_set;
|
| + substring_matcher_->Match(text, &atoms_set);
|
| + return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
|
| +}
|
| +
|
| +void RegexSetMatcher::RebuildMatcher() {
|
| + re2_id_map_.clear();
|
| + filtered_re2_.reset(new re2::FilteredRE2());
|
| + if (regexes_.empty())
|
| + return;
|
| +
|
| + for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
|
| + RE2ID re2_id;
|
| + RE2::ErrorCode error = filtered_re2_->Add(
|
| + it->second->pattern(), RE2::DefaultOptions, &re2_id);
|
| + if (error == RE2::NoError) {
|
| + DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
|
| + re2_id_map_.push_back(it->first);
|
| + } else {
|
| + // TODO(yoz): Return an unparseable regex error as soon as possible.
|
| + LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
|
| + << it->second->pattern() << ")";
|
| + }
|
| + }
|
| +
|
| + std::vector<std::string> strings_to_match;
|
| + filtered_re2_->Compile(&strings_to_match);
|
| +
|
| + substring_matcher_.reset(new SubstringSetMatcher);
|
| + DeleteSubstringPatterns();
|
| + // Build SubstringSetMatcher from |strings_to_match|.
|
| + // SubstringSetMatcher doesn't own its strings.
|
| + for (size_t i = 0; i < strings_to_match.size(); ++i) {
|
| + substring_patterns_.push_back(
|
| + new StringPattern(strings_to_match[i], i));
|
| + }
|
| + substring_matcher_->RegisterPatterns(substring_patterns_);
|
| +}
|
| +
|
| +void RegexSetMatcher::DeleteSubstringPatterns() {
|
| + STLDeleteElements(&substring_patterns_);
|
| +}
|
| +
|
| +} // namespace extensions
|
|
|