Index: third_party/re2/re2/testing/exhaustive2_test.cc |
diff --git a/third_party/re2/re2/testing/exhaustive2_test.cc b/third_party/re2/re2/testing/exhaustive2_test.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..c5fec5b3e4a884b03a5717653864499fa7730bb2 |
--- /dev/null |
+++ b/third_party/re2/re2/testing/exhaustive2_test.cc |
@@ -0,0 +1,70 @@ |
+// Copyright 2008 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+// Exhaustive testing of regular expression matching. |
+ |
+#include "util/test.h" |
+#include "re2/re2.h" |
+#include "re2/testing/exhaustive_tester.h" |
+ |
+DECLARE_string(regexp_engines); |
+ |
+namespace re2 { |
+ |
+// Test empty string matches (aka "(?:)") |
+TEST(EmptyString, Exhaustive) { |
+ ExhaustiveTest(2, 2, Split(" ", "(?:) a"), |
+ RegexpGenerator::EgrepOps(), |
+ 5, Split("", "ab"), "", ""); |
+} |
+ |
+// Test escaped versions of regexp syntax. |
+TEST(Punctuation, Literals) { |
+ vector<string> alphabet = Explode("()*+?{}[]\\^$."); |
+ vector<string> escaped = alphabet; |
+ for (int i = 0; i < escaped.size(); i++) |
+ escaped[i] = "\\" + escaped[i]; |
+ ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), |
+ 2, alphabet, "", ""); |
+} |
+ |
+// Test ^ $ . \A \z in presence of line endings. |
+// Have to wrap the empty-width ones in (?:) so that |
+// they can be repeated -- PCRE rejects ^* but allows (?:^)* |
+TEST(LineEnds, Exhaustive) { |
+ ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"), |
+ RegexpGenerator::EgrepOps(), |
+ 4, Explode("ab\n"), "", ""); |
+} |
+ |
+// Test what does and does not match \n. |
+// This would be a good test, except that PCRE seems to have a bug: |
+// in single-byte character set mode (the default), |
+// [^a] matches \n, but in UTF-8 mode it does not. |
+// So when we run the test, the tester complains that |
+// we don't agree with PCRE, but it's PCRE that is at fault. |
+// For what it's worth, Perl gets this right (matches |
+// regardless of whether UTF-8 input is selected): |
+// |
+// #!/usr/bin/perl |
+// use POSIX qw(locale_h); |
+// print "matches in latin1\n" if "\n" =~ /[^a]/; |
+// setlocale("en_US.utf8"); |
+// print "matches in utf8\n" if "\n" =~ /[^a]/; |
+// |
+// The rule chosen for RE2 is that by default, like Perl, |
+// dot does not match \n but negated character classes [^a] do. |
+// (?s) will allow dot to match \n; there is no way in RE2 |
+// to stop [^a] from matching \n, though the underlying library |
+// provides a mechanism, and RE2 could add new syntax if needed. |
+// |
+// TEST(Newlines, Exhaustive) { |
+// vector<string> empty_vector; |
+// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), |
+// RegexpGenerator::EgrepOps(), |
+// 4, Explode("a\n"), ""); |
+// } |
+ |
+} // namespace re2 |
+ |