third_party/re2/re2/testing/exhaustive2_test.cc - Issue 10575037: Include RE2 library

Unified Diff: third_party/re2/re2/testing/exhaustive2_test.cc

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/re2/re2/testing/exhaustive2_test.cc

diff --git a/third_party/re2/re2/testing/exhaustive2_test.cc b/third_party/re2/re2/testing/exhaustive2_test.cc

new file mode 100644

index 0000000000000000000000000000000000000000..c5fec5b3e4a884b03a5717653864499fa7730bb2

--- /dev/null

+++ b/third_party/re2/re2/testing/exhaustive2_test.cc

@@ -0,0 +1,70 @@

+// Use of this source code is governed by a BSD-style

+// license that can be found in the LICENSE file.

+// Exhaustive testing of regular expression matching.

+#include "util/test.h"

+#include "re2/re2.h"

+#include "re2/testing/exhaustive_tester.h"

+DECLARE_string(regexp_engines);

+namespace re2 {

+// Test empty string matches (aka "(?:)")

+TEST(EmptyString, Exhaustive) {

+ ExhaustiveTest(2, 2, Split(" ", "(?:) a"),

+ RegexpGenerator::EgrepOps(),

+ 5, Split("", "ab"), "", "");

+// Test escaped versions of regexp syntax.

+TEST(Punctuation, Literals) {

+ vector<string> alphabet = Explode("()*+?{}[]\\^$.");

+ vector<string> escaped = alphabet;

+ for (int i = 0; i < escaped.size(); i++)

+ escaped[i] = "\\" + escaped[i];

+ ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),

+ 2, alphabet, "", "");

+// Test ^ $ . \A \z in presence of line endings.

+// Have to wrap the empty-width ones in (?:) so that

+// they can be repeated -- PCRE rejects ^* but allows (?:^)*

+TEST(LineEnds, Exhaustive) {

+ ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),

+ RegexpGenerator::EgrepOps(),

+ 4, Explode("ab\n"), "", "");

+// Test what does and does not match \n.

+// This would be a good test, except that PCRE seems to have a bug:

+// in single-byte character set mode (the default),

+// [^a] matches \n, but in UTF-8 mode it does not.

+// So when we run the test, the tester complains that

+// we don't agree with PCRE, but it's PCRE that is at fault.

+// For what it's worth, Perl gets this right (matches

+// regardless of whether UTF-8 input is selected):

+//

+// #!/usr/bin/perl

+// use POSIX qw(locale_h);

+// print "matches in latin1\n" if "\n" =~ /[^a]/;

+// setlocale("en_US.utf8");

+// print "matches in utf8\n" if "\n" =~ /[^a]/;

+//

+// The rule chosen for RE2 is that by default, like Perl,

+// dot does not match \n but negated character classes [^a] do.

+// (?s) will allow dot to match \n; there is no way in RE2

+// to stop [^a] from matching \n, though the underlying library

+// provides a mechanism, and RE2 could add new syntax if needed.

+//

+// TEST(Newlines, Exhaustive) {

+// vector<string> empty_vector;

+// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),

+// RegexpGenerator::EgrepOps(),

+// 4, Explode("a\n"), "");

+// }

+} // namespace re2

« no previous file with comments | « third_party/re2/re2/testing/exhaustive1_test.cc ('k') | third_party/re2/re2/testing/exhaustive3_test.cc » ('j') | no next file with comments »