Index: third_party/re2/re2/filtered_re2.h |
diff --git a/third_party/re2/re2/filtered_re2.h b/third_party/re2/re2/filtered_re2.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..64b35be6c2c392e5f5a6b5d5093b4f28eda0ae70 |
--- /dev/null |
+++ b/third_party/re2/re2/filtered_re2.h |
@@ -0,0 +1,101 @@ |
+// Copyright 2009 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. |
+// It provides a prefilter mechanism that helps in cutting down the |
+// number of regexps that need to be actually searched. |
+// |
+// By design, it does not include a string matching engine. This is to |
+// allow the user of the class to use their favorite string match |
+// engine. The overall flow is: Add all the regexps using Add, then |
+// Compile the FilteredRE2. The compile returns strings that need to |
+// be matched. Note that all returned strings are lowercase. For |
+// applying regexps to a search text, the caller does the string |
+// matching using the strings returned. When doing the string match, |
+// note that the caller has to do that on lower cased version of the |
+// search text. Then call FirstMatch or AllMatches with a vector of |
+// indices of strings that were found in the text to get the actual |
+// regexp matches. |
+ |
+#ifndef RE2_FILTERED_RE2_H_ |
+#define RE2_FILTERED_RE2_H_ |
+ |
+#include <vector> |
+#include "re2/re2.h" |
+ |
+namespace re2 { |
+using std::vector; |
+ |
+class PrefilterTree; |
+ |
+class FilteredRE2 { |
+ public: |
+ FilteredRE2(); |
+ ~FilteredRE2(); |
+ |
+ // Uses RE2 constructor to create a RE2 object (re). Returns |
+ // re->error_code(). If error_code is other than NoError, then re is |
+ // deleted and not added to re2_vec_. |
+ RE2::ErrorCode Add(const StringPiece& pattern, |
+ const RE2::Options& options, |
+ int *id); |
+ |
+ // Prepares the regexps added by Add for filtering. Returns a set |
+ // of strings that the caller should check for in candidate texts. |
+ // The returned strings are lowercased. When doing string matching, |
+ // the search text should be lowercased first to find matching |
+ // strings from the set of strings returned by Compile. Call after |
+ // all Add calls are done. |
+ void Compile(vector<string>* strings_to_match); |
+ |
+ // Returns the index of the first matching regexp. |
+ // Returns -1 on no match. Can be called prior to Compile. |
+ // Does not do any filtering: simply tries to Match the |
+ // regexps in a loop. |
+ int SlowFirstMatch(const StringPiece& text) const; |
+ |
+ // Returns the index of the first matching regexp. |
+ // Returns -1 on no match. Compile has to be called before |
+ // calling this. |
+ int FirstMatch(const StringPiece& text, |
+ const vector<int>& atoms) const; |
+ |
+ // Returns the indices of all matching regexps, after first clearing |
+ // matched_regexps. |
+ bool AllMatches(const StringPiece& text, |
+ const vector<int>& atoms, |
+ vector<int>* matching_regexps) const; |
+ |
+ // The number of regexps added. |
+ int NumRegexps() const { return re2_vec_.size(); } |
+ |
+ private: |
+ |
+ // Get the individual RE2 objects. Useful for testing. |
+ RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; } |
+ |
+ // Print prefilter. |
+ void PrintPrefilter(int regexpid); |
+ |
+ // Useful for testing and debugging. |
+ void RegexpsGivenStrings(const vector<int>& matched_atoms, |
+ vector<int>* passed_regexps); |
+ |
+ // All the regexps in the FilteredRE2. |
+ vector<RE2*> re2_vec_; |
+ |
+ // Has the FilteredRE2 been compiled using Compile() |
+ bool compiled_; |
+ |
+ // An AND-OR tree of string atoms used for filtering regexps. |
+ PrefilterTree* prefilter_tree_; |
+ |
+ //DISALLOW_EVIL_CONSTRUCTORS(FilteredRE2); |
+ FilteredRE2(const FilteredRE2&); |
+ void operator=(const FilteredRE2&); |
+}; |
+ |
+} // namespace re2 |
+ |
+#endif // RE2_FILTERED_RE2_H_ |