Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(192)

Unified Diff: third_party/re2/re2/prefilter_tree.cc

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/prefilter_tree.h ('k') | third_party/re2/re2/prog.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/prefilter_tree.cc
diff --git a/third_party/re2/re2/prefilter_tree.cc b/third_party/re2/re2/prefilter_tree.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cdcf77e532297f433e061433c1247b60625a8e44
--- /dev/null
+++ b/third_party/re2/re2/prefilter_tree.cc
@@ -0,0 +1,403 @@
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "re2/prefilter.h"
+#include "re2/prefilter_tree.h"
+#include "re2/re2.h"
+
+#ifdef WIN32
+#include <stdio.h>
+#define snprintf _snprintf
+#endif
+
+DEFINE_int32(filtered_re2_min_atom_len,
+ 3,
+ "Strings less than this length are not stored as atoms");
+
+namespace re2 {
+
+PrefilterTree::PrefilterTree()
+ : compiled_(false) {
+}
+
+PrefilterTree::~PrefilterTree() {
+ for (int i = 0; i < prefilter_vec_.size(); i++)
+ delete prefilter_vec_[i];
+
+ for (int i = 0; i < entries_.size(); i++)
+ delete entries_[i].parents;
+}
+
+// Functions used for adding and Compiling prefilters to the
+// PrefilterTree.
+static bool KeepPart(Prefilter* prefilter, int level) {
+ if (prefilter == NULL)
+ return false;
+
+ switch (prefilter->op()) {
+ default:
+ LOG(DFATAL) << "Unexpected op in KeepPart: "
+ << prefilter->op();
+ return false;
+
+ case Prefilter::ALL:
+ return false;
+
+ case Prefilter::ATOM:
+ return prefilter->atom().size() >=
+ FLAGS_filtered_re2_min_atom_len;
+
+ case Prefilter::AND: {
+ int j = 0;
+ vector<Prefilter*>* subs = prefilter->subs();
+ for (int i = 0; i < subs->size(); i++)
+ if (KeepPart((*subs)[i], level + 1))
+ (*subs)[j++] = (*subs)[i];
+ else
+ delete (*subs)[i];
+
+ subs->resize(j);
+ return j > 0;
+ }
+
+ case Prefilter::OR:
+ for (int i = 0; i < prefilter->subs()->size(); i++)
+ if (!KeepPart((*prefilter->subs())[i], level + 1))
+ return false;
+ return true;
+ }
+}
+
+void PrefilterTree::Add(Prefilter *f) {
+ if (compiled_) {
+ LOG(DFATAL) << "Add after Compile.";
+ return;
+ }
+ if (f != NULL && !KeepPart(f, 0)) {
+ delete f;
+ f = NULL;
+ }
+
+ prefilter_vec_.push_back(f);
+}
+
+void PrefilterTree::Compile(vector<string>* atom_vec) {
+ if (compiled_) {
+ LOG(DFATAL) << "Compile after Compile.";
+ return;
+ }
+
+ // We do this check to support some legacy uses of
+ // PrefilterTree that call Compile before adding any regexps,
+ // and expect Compile not to have effect.
+ if (prefilter_vec_.empty())
+ return;
+
+ compiled_ = true;
+
+ AssignUniqueIds(atom_vec);
+
+ // Identify nodes that are too common among prefilters and are
+ // triggering too many parents. Then get rid of them if possible.
+ // Note that getting rid of a prefilter node simply means they are
+ // no longer necessary for their parent to trigger; that is, we do
+ // not miss out on any regexps triggering by getting rid of a
+ // prefilter node.
+ for (int i = 0; i < entries_.size(); i++) {
+ IntMap* parents = entries_[i].parents;
+ if (parents->size() > 8) {
+ // This one triggers too many things. If all the parents are AND
+ // nodes and have other things guarding them, then get rid of
+ // this trigger. TODO(vsri): Adjust the threshold appropriately,
+ // make it a function of total number of nodes?
+ bool have_other_guard = true;
+ for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+ have_other_guard = have_other_guard &&
+ (entries_[it->index()].propagate_up_at_count > 1);
+
+ if (have_other_guard) {
+ for (IntMap::iterator it = parents->begin();
+ it != parents->end(); ++it)
+ entries_[it->index()].propagate_up_at_count -= 1;
+
+ parents->clear(); // Forget the parents
+ }
+ }
+ }
+
+ PrintDebugInfo();
+}
+
+Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) {
+ string node_string = NodeString(node);
+ map<string, Prefilter*>::iterator iter = node_map_.find(node_string);
+ if (iter == node_map_.end())
+ return NULL;
+ return (*iter).second;
+}
+
+static string Itoa(int n) {
+ char buf[100];
+ snprintf(buf, sizeof buf, "%d", n);
+ return string(buf);
+}
+
+string PrefilterTree::NodeString(Prefilter* node) const {
+ // Adding the operation disambiguates AND/OR/atom nodes.
+ string s = Itoa(node->op()) + ":";
+ if (node->op() == Prefilter::ATOM) {
+ s += node->atom();
+ } else {
+ for (int i = 0; i < node->subs()->size() ; i++) {
+ if (i > 0)
+ s += ',';
+ s += Itoa((*node->subs())[i]->unique_id());
+ }
+ }
+ return s;
+}
+
+void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
+ atom_vec->clear();
+
+ // Build vector of all filter nodes, sorted topologically
+ // from top to bottom in v.
+ vector<Prefilter*> v;
+
+ // Add the top level nodes of each regexp prefilter.
+ for (int i = 0; i < prefilter_vec_.size(); i++) {
+ Prefilter* f = prefilter_vec_[i];
+ if (f == NULL)
+ unfiltered_.push_back(i);
+
+ // We push NULL also on to v, so that we maintain the
+ // mapping of index==regexpid for level=0 prefilter nodes.
+ v.push_back(f);
+ }
+
+ // Now add all the descendant nodes.
+ for (int i = 0; i < v.size(); i++) {
+ Prefilter* f = v[i];
+ if (f == NULL)
+ continue;
+ if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {
+ const vector<Prefilter*>& subs = *f->subs();
+ for (int j = 0; j < subs.size(); j++)
+ v.push_back(subs[j]);
+ }
+ }
+
+ // Identify unique nodes.
+ int unique_id = 0;
+ for (int i = v.size() - 1; i >= 0; i--) {
+ Prefilter *node = v[i];
+ if (node == NULL)
+ continue;
+ node->set_unique_id(-1);
+ Prefilter* canonical = CanonicalNode(node);
+ if (canonical == NULL) {
+ // Any further nodes that have the same node string
+ // will find this node as the canonical node.
+ node_map_[NodeString(node)] = node;
+ if (node->op() == Prefilter::ATOM) {
+ atom_vec->push_back(node->atom());
+ atom_index_to_id_.push_back(unique_id);
+ }
+ node->set_unique_id(unique_id++);
+ } else {
+ node->set_unique_id(canonical->unique_id());
+ }
+ }
+ entries_.resize(node_map_.size());
+
+ // Create parent IntMap for the entries.
+ for (int i = v.size() - 1; i >= 0; i--) {
+ Prefilter* prefilter = v[i];
+ if (prefilter == NULL)
+ continue;
+
+ if (CanonicalNode(prefilter) != prefilter)
+ continue;
+
+ Entry* entry = &entries_[prefilter->unique_id()];
+ entry->parents = new IntMap(node_map_.size());
+ }
+
+ // Fill the entries.
+ for (int i = v.size() - 1; i >= 0; i--) {
+ Prefilter* prefilter = v[i];
+ if (prefilter == NULL)
+ continue;
+
+ if (CanonicalNode(prefilter) != prefilter)
+ continue;
+
+ Entry* entry = &entries_[prefilter->unique_id()];
+
+ switch (prefilter->op()) {
+ default:
+ case Prefilter::ALL:
+ LOG(DFATAL) << "Unexpected op: " << prefilter->op();
+ return;
+
+ case Prefilter::ATOM:
+ entry->propagate_up_at_count = 1;
+ break;
+
+ case Prefilter::OR:
+ case Prefilter::AND: {
+ IntMap uniq_child(node_map_.size());
+ for (int j = 0; j < prefilter->subs()->size() ; j++) {
+ Prefilter* child = (*prefilter->subs())[j];
+ Prefilter* canonical = CanonicalNode(child);
+ if (canonical == NULL) {
+ LOG(DFATAL) << "Null canonical node";
+ return;
+ }
+ int child_id = canonical->unique_id();
+ if (!uniq_child.has_index(child_id))
+ uniq_child.set_new(child_id, 1);
+ // To the child, we want to add to parent indices.
+ Entry* child_entry = &entries_[child_id];
+ if (!child_entry->parents->has_index(prefilter->unique_id()))
+ child_entry->parents->set_new(prefilter->unique_id(), 1);
+ }
+ entry->propagate_up_at_count =
+ prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
+
+ break;
+ }
+ }
+ }
+
+ // For top level nodes, populate regexp id.
+ for (int i = 0; i < prefilter_vec_.size(); i++) {
+ if (prefilter_vec_[i] == NULL)
+ continue;
+ int id = CanonicalNode(prefilter_vec_[i])->unique_id();
+ DCHECK_LE(0, id);
+ Entry* entry = &entries_[id];
+ entry->regexps.push_back(i);
+ }
+}
+
+// Functions for triggering during search.
+void PrefilterTree::RegexpsGivenStrings(
+ const vector<int>& matched_atoms,
+ vector<int>* regexps) const {
+ regexps->clear();
+ if (!compiled_) {
+ LOG(WARNING) << "Compile() not called";
+ for (int i = 0; i < prefilter_vec_.size(); ++i)
+ regexps->push_back(i);
+ } else {
+ if (!prefilter_vec_.empty()) {
+ IntMap regexps_map(prefilter_vec_.size());
+ vector<int> matched_atom_ids;
+ for (int j = 0; j < matched_atoms.size(); j++) {
+ matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
+ VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]];
+ }
+ PropagateMatch(matched_atom_ids, &regexps_map);
+ for (IntMap::iterator it = regexps_map.begin();
+ it != regexps_map.end();
+ ++it)
+ regexps->push_back(it->index());
+
+ regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
+ }
+ }
+ sort(regexps->begin(), regexps->end());
+}
+
+void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
+ IntMap* regexps) const {
+ IntMap count(entries_.size());
+ IntMap work(entries_.size());
+ for (int i = 0; i < atom_ids.size(); i++)
+ work.set(atom_ids[i], 1);
+ for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
+ const Entry& entry = entries_[it->index()];
+ VLOG(10) << "Processing: " << it->index();
+ // Record regexps triggered.
+ for (int i = 0; i < entry.regexps.size(); i++) {
+ VLOG(10) << "Regexp triggered: " << entry.regexps[i];
+ regexps->set(entry.regexps[i], 1);
+ }
+ int c;
+ // Pass trigger up to parents.
+ for (IntMap::iterator it = entry.parents->begin();
+ it != entry.parents->end();
+ ++it) {
+ int j = it->index();
+ const Entry& parent = entries_[j];
+ VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
+ // Delay until all the children have succeeded.
+ if (parent.propagate_up_at_count > 1) {
+ if (count.has_index(j)) {
+ c = count.get_existing(j) + 1;
+ count.set_existing(j, c);
+ } else {
+ c = 1;
+ count.set_new(j, c);
+ }
+ if (c < parent.propagate_up_at_count)
+ continue;
+ }
+ VLOG(10) << "Triggering: " << j;
+ // Trigger the parent.
+ work.set(j, 1);
+ }
+ }
+}
+
+// Debugging help.
+void PrefilterTree::PrintPrefilter(int regexpid) {
+ LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]);
+}
+
+void PrefilterTree::PrintDebugInfo() {
+ VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size();
+ VLOG(10) << "#Unique Nodes: " << entries_.size();
+
+ for (int i = 0; i < entries_.size(); ++i) {
+ IntMap* parents = entries_[i].parents;
+ const vector<int>& regexps = entries_[i].regexps;
+ VLOG(10) << "EntryId: " << i
+ << " N: " << parents->size() << " R: " << regexps.size();
+ for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+ VLOG(10) << it->index();
+ }
+ VLOG(10) << "Map:";
+ for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
+ iter != node_map_.end(); ++iter)
+ VLOG(10) << "NodeId: " << (*iter).second->unique_id()
+ << " Str: " << (*iter).first;
+}
+
+string PrefilterTree::DebugNodeString(Prefilter* node) const {
+ string node_string = "";
+
+ if (node->op() == Prefilter::ATOM) {
+ DCHECK(!node->atom().empty());
+ node_string += node->atom();
+ } else {
+ // Adding the operation disambiguates AND and OR nodes.
+ node_string += node->op() == Prefilter::AND ? "AND" : "OR";
+ node_string += "(";
+ for (int i = 0; i < node->subs()->size() ; i++) {
+ if (i > 0)
+ node_string += ',';
+ node_string += Itoa((*node->subs())[i]->unique_id());
+ node_string += ":";
+ node_string += DebugNodeString((*node->subs())[i]);
+ }
+ node_string += ")";
+ }
+ return node_string;
+}
+
+} // namespace re2
« no previous file with comments | « third_party/re2/re2/prefilter_tree.h ('k') | third_party/re2/re2/prog.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698