Index: third_party/re2/re2/testing/exhaustive_tester.cc |
diff --git a/third_party/re2/re2/testing/exhaustive_tester.cc b/third_party/re2/re2/testing/exhaustive_tester.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..54de85748e7b9b484ae48dc53600465deb7d4bcb |
--- /dev/null |
+++ b/third_party/re2/re2/testing/exhaustive_tester.cc |
@@ -0,0 +1,188 @@ |
+// Copyright 2008 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+// Exhaustive testing of regular expression matching. |
+ |
+// Each test picks an alphabet (e.g., "abc"), a maximum string length, |
+// a maximum regular expression length, and a maximum number of letters |
+// that can appear in the regular expression. Given these parameters, |
+// it tries every possible regular expression and string, verifying that |
+// the NFA, DFA, and a trivial backtracking implementation agree about |
+// the location of the match. |
+ |
+#include <stdlib.h> |
+#include <stdio.h> |
+ |
+#ifndef LOGGING |
+#define LOGGING 0 |
+#endif |
+ |
+#include "util/test.h" |
+#include "re2/testing/exhaustive_tester.h" |
+#include "re2/testing/tester.h" |
+ |
+DEFINE_bool(show_regexps, false, "show regexps during testing"); |
+ |
+DEFINE_int32(max_bad_regexp_inputs, 1, |
+ "Stop testing a regular expression after finding this many " |
+ "strings that break it."); |
+ |
+// Compiled in debug mode, the usual tests run for over an hour. |
+// Have to cut it down to make the unit test machines happy. |
+DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode."); |
+ |
+namespace re2 { |
+ |
+static char* escape(const StringPiece& sp) { |
+ static char buf[512]; |
+ char* p = buf; |
+ *p++ = '\"'; |
+ for (int i = 0; i < sp.size(); i++) { |
+ if(p+5 >= buf+sizeof buf) |
+ LOG(FATAL) << "ExhaustiveTester escape: too long"; |
+ if(sp[i] == '\\' || sp[i] == '\"') { |
+ *p++ = '\\'; |
+ *p++ = sp[i]; |
+ } else if(sp[i] == '\n') { |
+ *p++ = '\\'; |
+ *p++ = 'n'; |
+ } else { |
+ *p++ = sp[i]; |
+ } |
+ } |
+ *p++ = '\"'; |
+ *p = '\0'; |
+ return buf; |
+} |
+ |
+static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) { |
+ if (!re.Match(input, 0, input.size(), anchor, m, n)) { |
+ printf("-"); |
+ return; |
+ } |
+ for (int i = 0; i < n; i++) { |
+ if (i > 0) |
+ printf(" "); |
+ if (m[i].begin() == NULL) |
+ printf("-"); |
+ else |
+ printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin())); |
+ } |
+} |
+ |
+// Processes a single generated regexp. |
+// Compiles it using Regexp interface and PCRE, and then |
+// checks that NFA, DFA, and PCRE all return the same results. |
+void ExhaustiveTester::HandleRegexp(const string& const_regexp) { |
+ regexps_++; |
+ string regexp = const_regexp; |
+ if (!topwrapper_.empty()) |
+ regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); |
+ |
+ if (FLAGS_show_regexps) { |
+ printf("\r%s", regexp.c_str()); |
+ fflush(stdout); |
+ } |
+ |
+ if (LOGGING) { |
+ // Write out test cases and answers for use in testing |
+ // other implementations, such as Go's regexp package. |
+ if (randomstrings_) |
+ LOG(ERROR) << "Cannot log with random strings."; |
+ if (regexps_ == 1) { // first |
+ printf("strings\n"); |
+ strgen_.Reset(); |
+ while (strgen_.HasNext()) |
+ printf("%s\n", escape(strgen_.Next())); |
+ printf("regexps\n"); |
+ } |
+ printf("%s\n", escape(regexp)); |
+ |
+ RE2 re(regexp); |
+ RE2::Options longest; |
+ longest.set_longest_match(true); |
+ RE2 relongest(regexp, longest); |
+ int ngroup = re.NumberOfCapturingGroups()+1; |
+ StringPiece* group = new StringPiece[ngroup]; |
+ |
+ strgen_.Reset(); |
+ while (strgen_.HasNext()) { |
+ StringPiece input = strgen_.Next(); |
+ PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup); |
+ printf(";"); |
+ PrintResult(re, input, RE2::UNANCHORED, group, ngroup); |
+ printf(";"); |
+ PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup); |
+ printf(";"); |
+ PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup); |
+ printf("\n"); |
+ } |
+ delete[] group; |
+ return; |
+ } |
+ |
+ Tester tester(regexp); |
+ if (tester.error()) |
+ return; |
+ |
+ strgen_.Reset(); |
+ strgen_.GenerateNULL(); |
+ if (randomstrings_) |
+ strgen_.Random(stringseed_, stringcount_); |
+ int bad_inputs = 0; |
+ while (strgen_.HasNext()) { |
+ tests_++; |
+ if (!tester.TestInput(strgen_.Next())) { |
+ failures_++; |
+ if (++bad_inputs >= FLAGS_max_bad_regexp_inputs) |
+ break; |
+ } |
+ } |
+} |
+ |
+// Runs an exhaustive test on the given parameters. |
+void ExhaustiveTest(int maxatoms, int maxops, |
+ const vector<string>& alphabet, |
+ const vector<string>& ops, |
+ int maxstrlen, const vector<string>& stralphabet, |
+ const string& wrapper, |
+ const string& topwrapper) { |
+ if (DEBUG_MODE && FLAGS_quick_debug_mode) { |
+ if (maxatoms > 1) |
+ maxatoms--; |
+ if (maxops > 1) |
+ maxops--; |
+ if (maxstrlen > 1) |
+ maxstrlen--; |
+ } |
+ ExhaustiveTester t(maxatoms, maxops, alphabet, ops, |
+ maxstrlen, stralphabet, wrapper, |
+ topwrapper); |
+ t.Generate(); |
+ if (!LOGGING) { |
+ printf("%d regexps, %d tests, %d failures [%d/%d str]\n", |
+ t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); |
+ } |
+ EXPECT_EQ(0, t.failures()); |
+} |
+ |
+// Runs an exhaustive test using the given parameters and |
+// the basic egrep operators. |
+void EgrepTest(int maxatoms, int maxops, const string& alphabet, |
+ int maxstrlen, const string& stralphabet, |
+ const string& wrapper) { |
+ const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" }; |
+ |
+ for (int i = 0; i < arraysize(tops); i++) { |
+ ExhaustiveTest(maxatoms, maxops, |
+ Split("", alphabet), |
+ RegexpGenerator::EgrepOps(), |
+ maxstrlen, |
+ Split("", stralphabet), |
+ wrapper, |
+ tops[i]); |
+ } |
+} |
+ |
+} // namespace re2 |