| Index: third_party/re2/re2/testing/exhaustive2_test.cc
|
| diff --git a/third_party/re2/re2/testing/exhaustive2_test.cc b/third_party/re2/re2/testing/exhaustive2_test.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..c5fec5b3e4a884b03a5717653864499fa7730bb2
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/testing/exhaustive2_test.cc
|
| @@ -0,0 +1,70 @@
|
| +// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| +// Use of this source code is governed by a BSD-style
|
| +// license that can be found in the LICENSE file.
|
| +
|
| +// Exhaustive testing of regular expression matching.
|
| +
|
| +#include "util/test.h"
|
| +#include "re2/re2.h"
|
| +#include "re2/testing/exhaustive_tester.h"
|
| +
|
| +DECLARE_string(regexp_engines);
|
| +
|
| +namespace re2 {
|
| +
|
| +// Test empty string matches (aka "(?:)")
|
| +TEST(EmptyString, Exhaustive) {
|
| + ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
|
| + RegexpGenerator::EgrepOps(),
|
| + 5, Split("", "ab"), "", "");
|
| +}
|
| +
|
| +// Test escaped versions of regexp syntax.
|
| +TEST(Punctuation, Literals) {
|
| + vector<string> alphabet = Explode("()*+?{}[]\\^$.");
|
| + vector<string> escaped = alphabet;
|
| + for (int i = 0; i < escaped.size(); i++)
|
| + escaped[i] = "\\" + escaped[i];
|
| + ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
|
| + 2, alphabet, "", "");
|
| +}
|
| +
|
| +// Test ^ $ . \A \z in presence of line endings.
|
| +// Have to wrap the empty-width ones in (?:) so that
|
| +// they can be repeated -- PCRE rejects ^* but allows (?:^)*
|
| +TEST(LineEnds, Exhaustive) {
|
| + ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
|
| + RegexpGenerator::EgrepOps(),
|
| + 4, Explode("ab\n"), "", "");
|
| +}
|
| +
|
| +// Test what does and does not match \n.
|
| +// This would be a good test, except that PCRE seems to have a bug:
|
| +// in single-byte character set mode (the default),
|
| +// [^a] matches \n, but in UTF-8 mode it does not.
|
| +// So when we run the test, the tester complains that
|
| +// we don't agree with PCRE, but it's PCRE that is at fault.
|
| +// For what it's worth, Perl gets this right (matches
|
| +// regardless of whether UTF-8 input is selected):
|
| +//
|
| +// #!/usr/bin/perl
|
| +// use POSIX qw(locale_h);
|
| +// print "matches in latin1\n" if "\n" =~ /[^a]/;
|
| +// setlocale("en_US.utf8");
|
| +// print "matches in utf8\n" if "\n" =~ /[^a]/;
|
| +//
|
| +// The rule chosen for RE2 is that by default, like Perl,
|
| +// dot does not match \n but negated character classes [^a] do.
|
| +// (?s) will allow dot to match \n; there is no way in RE2
|
| +// to stop [^a] from matching \n, though the underlying library
|
| +// provides a mechanism, and RE2 could add new syntax if needed.
|
| +//
|
| +// TEST(Newlines, Exhaustive) {
|
| +// vector<string> empty_vector;
|
| +// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
|
| +// RegexpGenerator::EgrepOps(),
|
| +// 4, Explode("a\n"), "");
|
| +// }
|
| +
|
| +} // namespace re2
|
| +
|
|
|