Index: third_party/re2/re2/testing/filtered_re2_test.cc |
diff --git a/third_party/re2/re2/testing/filtered_re2_test.cc b/third_party/re2/re2/testing/filtered_re2_test.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..7755d30622e29ac7e65f6244a45c4c11eb37918c |
--- /dev/null |
+++ b/third_party/re2/re2/testing/filtered_re2_test.cc |
@@ -0,0 +1,258 @@ |
+// Copyright 2009 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+#include "util/test.h" |
+#include "re2/filtered_re2.h" |
+#include "re2/re2.h" |
+ |
+DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc |
+ |
+namespace re2 { |
+ |
+struct FilterTestVars { |
+ vector<string> atoms; |
+ vector<int> atom_indices; |
+ vector<int> matches; |
+ RE2::Options opts; |
+ FilteredRE2 f; |
+}; |
+ |
+TEST(FilteredRE2Test, EmptyTest) { |
+ FilterTestVars v; |
+ v.f.AllMatches("foo", v.atom_indices, &v.matches); |
+ EXPECT_EQ(0, v.matches.size()); |
+} |
+ |
+TEST(FilteredRE2Test, SmallOrTest) { |
+ FLAGS_filtered_re2_min_atom_len = 4; |
+ |
+ FilterTestVars v; |
+ int id; |
+ v.f.Add("(foo|bar)", v.opts, &id); |
+ |
+ v.f.Compile(&v.atoms); |
+ EXPECT_EQ(0, v.atoms.size()); |
+ |
+ v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches); |
+ EXPECT_EQ(1, v.matches.size()); |
+ EXPECT_EQ(id, v.matches[0]); |
+} |
+ |
+struct AtomTest { |
+ const char* testname; |
+ // If any test needs more than this many regexps or atoms, increase |
+ // the size of the corresponding array. |
+ const char* regexps[20]; |
+ const char* atoms[20]; |
+}; |
+ |
+AtomTest atom_tests[] = { |
+ { |
+ // This test checks to make sure empty patterns are allowed. |
+ "CheckEmptyPattern", |
+ {""}, |
+ {} |
+ }, { |
+ // This test checks that all atoms of length greater than min length |
+ // are found, and no atoms that are of smaller length are found. |
+ "AllAtomsGtMinLengthFound", { |
+ "(abc123|def456|ghi789).*mnop[x-z]+", |
+ "abc..yyy..zz", |
+ "mnmnpp[a-z]+PPP" |
+ }, { |
+ "abc123", |
+ "def456", |
+ "ghi789", |
+ "mnop", |
+ "abc", |
+ "yyy", |
+ "mnmnpp", |
+ "ppp" |
+ } |
+ }, { |
+ // Test to make sure that any atoms that have another atom as a |
+ // substring in an OR are removed; that is, only the shortest |
+ // substring is kept. |
+ "SubstrAtomRemovesSuperStrInOr", { |
+ "(abc123|abc|ghi789|abc1234).*[x-z]+", |
+ "abcd..yyy..yyyzzz", |
+ "mnmnpp[a-z]+PPP" |
+ }, { |
+ "abc", |
+ "ghi789", |
+ "abcd", |
+ "yyy", |
+ "yyyzzz", |
+ "mnmnpp", |
+ "ppp" |
+ } |
+ }, { |
+ // Test character class expansion. |
+ "CharClassExpansion", { |
+ "m[a-c][d-f]n.*[x-z]+", |
+ "[x-y]bcde[ab]" |
+ }, { |
+ "madn", "maen", "mafn", |
+ "mbdn", "mben", "mbfn", |
+ "mcdn", "mcen", "mcfn", |
+ "xbcdea", "xbcdeb", |
+ "ybcdea", "ybcdeb" |
+ } |
+ }, { |
+ // Test upper/lower of non-ASCII. |
+ "UnicodeLower", { |
+ "(?i)ΔδΠϖπΣςσ", |
+ "ΛΜΝΟΠ", |
+ "ψρστυ", |
+ }, { |
+ "δδπππσσσ", |
+ "λμνοπ", |
+ "ψρστυ", |
+ }, |
+ }, |
+}; |
+ |
+void AddRegexpsAndCompile(const char* regexps[], |
+ int n, |
+ struct FilterTestVars* v) { |
+ for (int i = 0; i < n; i++) { |
+ int id; |
+ v->f.Add(regexps[i], v->opts, &id); |
+ } |
+ v->f.Compile(&v->atoms); |
+} |
+ |
+bool CheckExpectedAtoms(const char* atoms[], |
+ int n, |
+ const char* testname, |
+ struct FilterTestVars* v) { |
+ vector<string> expected; |
+ for (int i = 0; i < n; i++) |
+ expected.push_back(atoms[i]); |
+ |
+ bool pass = expected.size() == v->atoms.size(); |
+ |
+ sort(v->atoms.begin(), v->atoms.end()); |
+ sort(expected.begin(), expected.end()); |
+ for (int i = 0; pass && i < n; i++) |
+ pass = pass && expected[i] == v->atoms[i]; |
+ |
+ if (!pass) { |
+ LOG(WARNING) << "Failed " << testname; |
+ LOG(WARNING) << "Expected #atoms = " << expected.size(); |
+ for (int i = 0; i < expected.size(); i++) |
+ LOG(WARNING) << expected[i]; |
+ LOG(WARNING) << "Found #atoms = " << v->atoms.size(); |
+ for (int i = 0; i < v->atoms.size(); i++) |
+ LOG(WARNING) << v->atoms[i]; |
+ } |
+ |
+ return pass; |
+} |
+ |
+TEST(FilteredRE2Test, AtomTests) { |
+ FLAGS_filtered_re2_min_atom_len = 3; |
+ |
+ int nfail = 0; |
+ for (int i = 0; i < arraysize(atom_tests); i++) { |
+ FilterTestVars v; |
+ AtomTest* t = &atom_tests[i]; |
+ int natom, nregexp; |
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) |
+ if (t->regexps[nregexp] == NULL) |
+ break; |
+ for (natom = 0; natom < arraysize(t->atoms); natom++) |
+ if (t->atoms[natom] == NULL) |
+ break; |
+ AddRegexpsAndCompile(t->regexps, nregexp, &v); |
+ if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v)) |
+ nfail++; |
+ } |
+ EXPECT_EQ(0, nfail); |
+} |
+ |
+void FindAtomIndices(const vector<string> atoms, |
+ const vector<string> matched_atoms, |
+ vector<int>* atom_indices) { |
+ atom_indices->clear(); |
+ for (int i = 0; i < matched_atoms.size(); i++) { |
+ int j = 0; |
+ for (; j < atoms.size(); j++) { |
+ if (matched_atoms[i] == atoms[j]) { |
+ atom_indices->push_back(j); |
+ break; |
+ } |
+ EXPECT_LT(j, atoms.size()); |
+ } |
+ } |
+} |
+ |
+TEST(FilteredRE2Test, MatchEmptyPattern) { |
+ FLAGS_filtered_re2_min_atom_len = 3; |
+ |
+ FilterTestVars v; |
+ AtomTest* t = &atom_tests[0]; |
+ // We are using the regexps used in one of the atom tests |
+ // for this test. Adding the EXPECT here to make sure |
+ // the index we use for the test is for the correct test. |
+ EXPECT_EQ("CheckEmptyPattern", string(t->testname)); |
+ int nregexp; |
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) |
+ if (t->regexps[nregexp] == NULL) |
+ break; |
+ AddRegexpsAndCompile(t->regexps, nregexp, &v); |
+ string text = "0123"; |
+ vector<int> atom_ids; |
+ vector<int> matching_regexps; |
+ EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); |
+} |
+ |
+TEST(FilteredRE2Test, MatchTests) { |
+ FLAGS_filtered_re2_min_atom_len = 3; |
+ |
+ FilterTestVars v; |
+ AtomTest* t = &atom_tests[2]; |
+ // We are using the regexps used in one of the atom tests |
+ // for this test. |
+ EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname)); |
+ int nregexp; |
+ for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) |
+ if (t->regexps[nregexp] == NULL) |
+ break; |
+ AddRegexpsAndCompile(t->regexps, nregexp, &v); |
+ |
+ string text = "abc121212xyz"; |
+ // atoms = abc |
+ vector<int> atom_ids; |
+ vector<string> atoms; |
+ atoms.push_back("abc"); |
+ FindAtomIndices(v.atoms, atoms, &atom_ids); |
+ vector<int> matching_regexps; |
+ v.f.AllMatches(text, atom_ids, &matching_regexps); |
+ EXPECT_EQ(1, matching_regexps.size()); |
+ |
+ text = "abc12312yyyzzz"; |
+ atoms.clear(); |
+ atoms.push_back("abc"); |
+ atoms.push_back("yyy"); |
+ atoms.push_back("yyyzzz"); |
+ FindAtomIndices(v.atoms, atoms, &atom_ids); |
+ v.f.AllMatches(text, atom_ids, &matching_regexps); |
+ EXPECT_EQ(1, matching_regexps.size()); |
+ |
+ text = "abcd12yyy32yyyzzz"; |
+ atoms.clear(); |
+ atoms.push_back("abc"); |
+ atoms.push_back("abcd"); |
+ atoms.push_back("yyy"); |
+ atoms.push_back("yyyzzz"); |
+ FindAtomIndices(v.atoms, atoms, &atom_ids); |
+ LOG(INFO) << "S: " << atom_ids.size(); |
+ for (int i = 0; i < atom_ids.size(); i++) |
+ LOG(INFO) << "i: " << i << " : " << atom_ids[i]; |
+ v.f.AllMatches(text, atom_ids, &matching_regexps); |
+ EXPECT_EQ(2, matching_regexps.size()); |
+} |
+ |
+} // namespace re2 |