Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1663)

Unified Diff: third_party/re2/re2/prefilter_tree.h

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/prefilter.cc ('k') | third_party/re2/re2/prefilter_tree.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/prefilter_tree.h
diff --git a/third_party/re2/re2/prefilter_tree.h b/third_party/re2/re2/prefilter_tree.h
new file mode 100644
index 0000000000000000000000000000000000000000..596b734ff6a5f05f84780ce693089351bcc7e92b
--- /dev/null
+++ b/third_party/re2/re2/prefilter_tree.h
@@ -0,0 +1,130 @@
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The PrefilterTree class is used to form an AND-OR tree of strings
+// that would trigger each regexp. The 'prefilter' of each regexp is
+// added tp PrefilterTree, and then PrefilterTree is used to find all
+// the unique strings across the prefilters. During search, by using
+// matches from a string matching engine, PrefilterTree deduces the
+// set of regexps that are to be triggered. The 'string matching
+// engine' itself is outside of this class, and the caller can use any
+// favorite engine. PrefilterTree provides a set of strings (called
+// atoms) that the user of this class should use to do the string
+// matching.
+//
+#ifndef RE2_PREFILTER_TREE_H_
+#define RE2_PREFILTER_TREE_H_
+
+#include "util/util.h"
+#include "util/sparse_array.h"
+
+namespace re2 {
+
+typedef SparseArray<int> IntMap;
+
+class Prefilter;
+
+class PrefilterTree {
+ public:
+ PrefilterTree();
+ ~PrefilterTree();
+
+ // Adds the prefilter for the next regexp. Note that we assume that
+ // Add called sequentially for all regexps. All Add calls
+ // must precede Compile.
+ void Add(Prefilter* prefilter);
+
+ // The Compile returns a vector of string in atom_vec.
+ // Call this after all the prefilters are added through Add.
+ // No calls to Add after Compile are allowed.
+ // The caller should use the returned set of strings to do string matching.
+ // Each time a string matches, the corresponding index then has to be
+ // and passed to RegexpsGivenStrings below.
+ void Compile(vector<string>* atom_vec);
+
+ // Given the indices of the atoms that matched, returns the indexes
+ // of regexps that should be searched. The matched_atoms should
+ // contain all the ids of string atoms that were found to match the
+ // content. The caller can use any string match engine to perform
+ // this function. This function is thread safe.
+ void RegexpsGivenStrings(const vector<int>& matched_atoms,
+ vector<int>* regexps) const;
+
+ // Print debug prefilter. Also prints unique ids associated with
+ // nodes of the prefilter of the regexp.
+ void PrintPrefilter(int regexpid);
+
+
+ // Each unique node has a corresponding Entry that helps in
+ // passing the matching trigger information along the tree.
+ struct Entry {
+ public:
+ // How many children should match before this node triggers the
+ // parent. For an atom and an OR node, this is 1 and for an AND
+ // node, it is the number of unique children.
+ int propagate_up_at_count;
+
+ // When this node is ready to trigger the parent, what are the indices
+ // of the parent nodes to trigger. The reason there may be more than
+ // one is because of sharing. For example (abc | def) and (xyz | def)
+ // are two different nodes, but they share the atom 'def'. So when
+ // 'def' matches, it triggers two parents, corresponding to the two
+ // different OR nodes.
+ IntMap* parents;
+
+ // When this node is ready to trigger the parent, what are the
+ // regexps that are triggered.
+ vector<int> regexps;
+ };
+
+ private:
+ // This function assigns unique ids to various parts of the
+ // prefilter, by looking at if these nodes are already in the
+ // PrefilterTree.
+ void AssignUniqueIds(vector<string>* atom_vec);
+
+ // Given the matching atoms, find the regexps to be triggered.
+ void PropagateMatch(const vector<int>& atom_ids,
+ IntMap* regexps) const;
+
+ // Returns the prefilter node that has the same NodeString as this
+ // node. For the canonical node, returns node.
+ Prefilter* CanonicalNode(Prefilter* node);
+
+ // A string that uniquely identifies the node. Assumes that the
+ // children of node has already been assigned unique ids.
+ string NodeString(Prefilter* node) const;
+
+ // Recursively constructs a readable prefilter string.
+ string DebugNodeString(Prefilter* node) const;
+
+ // Used for debugging.
+ void PrintDebugInfo();
+
+ // These are all the nodes formed by Compile. Essentially, there is
+ // one node for each unique atom and each unique AND/OR node.
+ vector<Entry> entries_;
+
+ // Map node string to canonical Prefilter node.
+ map<string, Prefilter*> node_map_;
+
+ // indices of regexps that always pass through the filter (since we
+ // found no required literals in these regexps).
+ vector<int> unfiltered_;
+
+ // vector of Prefilter for all regexps.
+ vector<Prefilter*> prefilter_vec_;
+
+ // Atom index in returned strings to entry id mapping.
+ vector<int> atom_index_to_id_;
+
+ // Has the prefilter tree been compiled.
+ bool compiled_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(PrefilterTree);
+};
+
+} // namespace
+
+#endif // RE2_PREFILTER_TREE_H_
« no previous file with comments | « third_party/re2/re2/prefilter.cc ('k') | third_party/re2/re2/prefilter_tree.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698