Index: third_party/re2/re2/testing/possible_match_test.cc |
diff --git a/third_party/re2/re2/testing/possible_match_test.cc b/third_party/re2/re2/testing/possible_match_test.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..7c2400eb5b4320670859db9a2624c761e36c357c |
--- /dev/null |
+++ b/third_party/re2/re2/testing/possible_match_test.cc |
@@ -0,0 +1,240 @@ |
+// Copyright 2006-2008 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+#include <vector> |
+#include "util/test.h" |
+#include "re2/prog.h" |
+#include "re2/re2.h" |
+#include "re2/regexp.h" |
+#include "re2/testing/regexp_generator.h" |
+#include "re2/testing/string_generator.h" |
+ |
+namespace re2 { |
+ |
+// Test that C++ strings are compared as uint8s, not int8s. |
+// PossibleMatchRange doesn't depend on this, but callers probably will. |
+TEST(CplusplusStrings, EightBit) { |
+ string s = "\x70"; |
+ string t = "\xA0"; |
+ EXPECT_LT(s, t); |
+} |
+ |
+struct PrefixTest { |
+ const char* regexp; |
+ int maxlen; |
+ const char* min; |
+ const char* max; |
+}; |
+ |
+static PrefixTest tests[] = { |
+ { "", 10, "", "", }, |
+ { "Abcdef", 10, "Abcdef", "Abcdef" }, |
+ { "abc(def|ghi)", 10, "abcdef", "abcghi" }, |
+ { "a+hello", 10, "aa", "ahello" }, |
+ { "a*hello", 10, "a", "hello" }, |
+ { "def|abc", 10, "abc", "def" }, |
+ { "a(b)(c)[d]", 10, "abcd", "abcd" }, |
+ { "ab(cab|cat)", 10, "abcab", "abcat" }, |
+ { "ab(cab|ca)x", 10, "abcabx", "abcax" }, |
+ { "(ab|x)(c|de)", 10, "abc", "xde" }, |
+ { "(ab|x)?(c|z)?", 10, "", "z" }, |
+ { "[^\\s\\S]", 10, "", "" }, |
+ { "(abc)+", 5, "abc", "abcac" }, |
+ { "(abc)+", 2, "ab", "ac" }, |
+ { "(abc)+", 1, "a", "b" }, |
+ { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, |
+ { "a*", 10, "", "ab" }, |
+ |
+ { "(?i)Abcdef", 10, "ABCDEF", "abcdef" }, |
+ { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" }, |
+ { "(?i)a+hello", 10, "AA", "ahello" }, |
+ { "(?i)a*hello", 10, "A", "hello" }, |
+ { "(?i)def|abc", 10, "ABC", "def" }, |
+ { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" }, |
+ { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" }, |
+ { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" }, |
+ { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" }, |
+ { "(?i)(ab|x)?(c|z)?", 10, "", "z" }, |
+ { "(?i)[^\\s\\S]", 10, "", "" }, |
+ { "(?i)(abc)+", 5, "ABC", "abcac" }, |
+ { "(?i)(abc)+", 2, "AB", "ac" }, |
+ { "(?i)(abc)+", 1, "A", "b" }, |
+ { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, |
+ { "(?i)a*", 10, "", "ab" }, |
+ { "(?i)A*", 10, "", "ab" }, |
+ |
+ { "\\AAbcdef", 10, "Abcdef", "Abcdef" }, |
+ { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" }, |
+ { "\\Aa+hello", 10, "aa", "ahello" }, |
+ { "\\Aa*hello", 10, "a", "hello" }, |
+ { "\\Adef|abc", 10, "abc", "def" }, |
+ { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" }, |
+ { "\\Aab(cab|cat)", 10, "abcab", "abcat" }, |
+ { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" }, |
+ { "\\A(ab|x)(c|de)", 10, "abc", "xde" }, |
+ { "\\A(ab|x)?(c|z)?", 10, "", "z" }, |
+ { "\\A[^\\s\\S]", 10, "", "" }, |
+ { "\\A(abc)+", 5, "abc", "abcac" }, |
+ { "\\A(abc)+", 2, "ab", "ac" }, |
+ { "\\A(abc)+", 1, "a", "b" }, |
+ { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, |
+ { "\\Aa*", 10, "", "ab" }, |
+ |
+ { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" }, |
+ { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" }, |
+ { "(?i)\\Aa+hello", 10, "AA", "ahello" }, |
+ { "(?i)\\Aa*hello", 10, "A", "hello" }, |
+ { "(?i)\\Adef|abc", 10, "ABC", "def" }, |
+ { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" }, |
+ { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" }, |
+ { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" }, |
+ { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" }, |
+ { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" }, |
+ { "(?i)\\A[^\\s\\S]", 10, "", "" }, |
+ { "(?i)\\A(abc)+", 5, "ABC", "abcac" }, |
+ { "(?i)\\A(abc)+", 2, "AB", "ac" }, |
+ { "(?i)\\A(abc)+", 1, "A", "b" }, |
+ { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, |
+ { "(?i)\\Aa*", 10, "", "ab" }, |
+ { "(?i)\\AA*", 10, "", "ab" }, |
+}; |
+ |
+TEST(PossibleMatchRange, HandWritten) { |
+ for (int i = 0; i < arraysize(tests); i++) { |
+ for (int j = 0; j < 2; j++) { |
+ const PrefixTest& t = tests[i]; |
+ string min, max; |
+ if (j == 0) { |
+ LOG(INFO) << "Checking regexp=" << CEscape(t.regexp); |
+ Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); |
+ CHECK(re); |
+ Prog* prog = re->CompileToProg(0); |
+ CHECK(prog); |
+ CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen)) |
+ << " " << t.regexp; |
+ delete prog; |
+ re->Decref(); |
+ } else { |
+ CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen)); |
+ } |
+ EXPECT_EQ(t.min, min) << t.regexp; |
+ EXPECT_EQ(t.max, max) << t.regexp; |
+ } |
+ } |
+} |
+ |
+// Test cases where PossibleMatchRange should return false. |
+TEST(PossibleMatchRange, Failures) { |
+ string min, max; |
+ |
+ // Fails because no room to write max. |
+ EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0)); |
+ |
+ // Fails because there is no max -- any non-empty string matches |
+ // or begins a match. Have to use Latin-1 input, because there |
+ // are no valid UTF-8 strings beginning with byte 0xFF. |
+ EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ EXPECT_FALSE(RE2(".+hello", RE2::Latin1). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ EXPECT_FALSE(RE2(".*hello", RE2::Latin1). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ EXPECT_FALSE(RE2(".*", RE2::Latin1). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ EXPECT_FALSE(RE2("\\C*"). |
+ PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+ |
+ // Fails because it's a malformed regexp. |
+ EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10)) |
+ << "min=" << CEscape(min) << ", max=" << CEscape(max); |
+} |
+ |
+// Exhaustive test: generate all regexps within parameters, |
+// then generate all strings of a given length over a given alphabet, |
+// then check that the prefix information agrees with whether |
+// the regexp matches each of the strings. |
+class PossibleMatchTester : public RegexpGenerator { |
+ public: |
+ PossibleMatchTester(int maxatoms, |
+ int maxops, |
+ const vector<string>& alphabet, |
+ const vector<string>& ops, |
+ int maxstrlen, |
+ const vector<string>& stralphabet) |
+ : RegexpGenerator(maxatoms, maxops, alphabet, ops), |
+ strgen_(maxstrlen, stralphabet), |
+ regexps_(0), tests_(0) { } |
+ |
+ int regexps() { return regexps_; } |
+ int tests() { return tests_; } |
+ |
+ // Needed for RegexpGenerator interface. |
+ void HandleRegexp(const string& regexp); |
+ |
+ private: |
+ StringGenerator strgen_; |
+ |
+ int regexps_; // Number of HandleRegexp calls |
+ int tests_; // Number of regexp tests. |
+ |
+ DISALLOW_EVIL_CONSTRUCTORS(PossibleMatchTester); |
+}; |
+ |
+// Processes a single generated regexp. |
+// Checks that all accepted strings agree with the prefix range. |
+void PossibleMatchTester::HandleRegexp(const string& regexp) { |
+ regexps_++; |
+ |
+ VLOG(3) << CEscape(regexp); |
+ |
+ RE2 re(regexp, RE2::Latin1); |
+ CHECK_EQ(re.error(), ""); |
+ |
+ string min, max; |
+ if(!re.PossibleMatchRange(&min, &max, 10)) { |
+ // There's no good max for "\\C*". Can't use strcmp |
+ // because sometimes it gets embedded in more |
+ // complicated expressions. |
+ if(strstr(regexp.c_str(), "\\C*")) |
+ return; |
+ LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp); |
+ } |
+ |
+ strgen_.Reset(); |
+ while (strgen_.HasNext()) { |
+ const StringPiece& s = strgen_.Next(); |
+ tests_++; |
+ if (!RE2::FullMatch(s, re)) |
+ continue; |
+ CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max; |
+ CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min; |
+ } |
+} |
+ |
+TEST(PossibleMatchRange, Exhaustive) { |
+ int natom = 3; |
+ int noperator = 3; |
+ int stringlen = 5; |
+ if (DEBUG_MODE) { |
+ natom = 2; |
+ noperator = 3; |
+ stringlen = 3; |
+ } |
+ PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"), |
+ RegexpGenerator::EgrepOps(), |
+ stringlen, Explode("ab4")); |
+ t.Generate(); |
+ LOG(INFO) << t.regexps() << " regexps, " |
+ << t.tests() << " tests"; |
+} |
+ |
+} // namespace re2 |