Index: third_party/re2/re2/testing/compile_test.cc |
diff --git a/third_party/re2/re2/testing/compile_test.cc b/third_party/re2/re2/testing/compile_test.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..8d92105e21501ea9003c4ab2bdb2bda6a94884a8 |
--- /dev/null |
+++ b/third_party/re2/re2/testing/compile_test.cc |
@@ -0,0 +1,171 @@ |
+// Copyright 2007 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+// Test prog.cc, compile.cc |
+ |
+#include <string> |
+#include <vector> |
+#include "util/test.h" |
+#include "re2/regexp.h" |
+#include "re2/prog.h" |
+ |
+DEFINE_string(show, "", "regular expression to compile and dump"); |
+ |
+namespace re2 { |
+ |
+// Simple input/output tests checking that |
+// the regexp compiles to the expected code. |
+// These are just to sanity check the basic implementation. |
+// The real confidence tests happen by testing the NFA/DFA |
+// that run the compiled code. |
+ |
+struct Test { |
+ const char* regexp; |
+ const char* code; |
+}; |
+ |
+static Test tests[] = { |
+ { "a", |
+ "1. byte [61-61] -> 2\n" |
+ "2. match! 0\n" }, |
+ { "ab", |
+ "1. byte [61-61] -> 2\n" |
+ "2. byte [62-62] -> 3\n" |
+ "3. match! 0\n" }, |
+ { "a|c", |
+ "3. alt -> 1 | 2\n" |
+ "1. byte [61-61] -> 4\n" |
+ "2. byte [63-63] -> 4\n" |
+ "4. match! 0\n" }, |
+ { "a|b", |
+ "1. byte [61-62] -> 2\n" |
+ "2. match! 0\n" }, |
+ { "[ab]", |
+ "1. byte [61-62] -> 2\n" |
+ "2. match! 0\n" }, |
+ { "a+", |
+ "1. byte [61-61] -> 2\n" |
+ "2. alt -> 1 | 3\n" |
+ "3. match! 0\n" }, |
+ { "a+?", |
+ "1. byte [61-61] -> 2\n" |
+ "2. alt -> 3 | 1\n" |
+ "3. match! 0\n" }, |
+ { "a*", |
+ "2. alt -> 1 | 3\n" |
+ "1. byte [61-61] -> 2\n" |
+ "3. match! 0\n" }, |
+ { "a*?", |
+ "2. alt -> 3 | 1\n" |
+ "3. match! 0\n" |
+ "1. byte [61-61] -> 2\n" }, |
+ { "a?", |
+ "2. alt -> 1 | 3\n" |
+ "1. byte [61-61] -> 3\n" |
+ "3. match! 0\n" }, |
+ { "a??", |
+ "2. alt -> 3 | 1\n" |
+ "3. match! 0\n" |
+ "1. byte [61-61] -> 3\n" }, |
+ { "a{4}", |
+ "1. byte [61-61] -> 2\n" |
+ "2. byte [61-61] -> 3\n" |
+ "3. byte [61-61] -> 4\n" |
+ "4. byte [61-61] -> 5\n" |
+ "5. match! 0\n" }, |
+ { "(a)", |
+ "2. capture 2 -> 1\n" |
+ "1. byte [61-61] -> 3\n" |
+ "3. capture 3 -> 4\n" |
+ "4. match! 0\n" }, |
+ { "(?:a)", |
+ "1. byte [61-61] -> 2\n" |
+ "2. match! 0\n" }, |
+ { "", |
+ "2. match! 0\n" }, |
+ { ".", |
+ "3. alt -> 1 | 2\n" |
+ "1. byte [00-09] -> 4\n" |
+ "2. byte [0b-ff] -> 4\n" |
+ "4. match! 0\n" }, |
+ { "[^ab]", |
+ "5. alt -> 3 | 4\n" |
+ "3. alt -> 1 | 2\n" |
+ "4. byte [63-ff] -> 6\n" |
+ "1. byte [00-09] -> 6\n" |
+ "2. byte [0b-60] -> 6\n" |
+ "6. match! 0\n" }, |
+ { "[Aa]", |
+ "1. byte/i [61-61] -> 2\n" |
+ "2. match! 0\n" }, |
+}; |
+ |
+TEST(TestRegexpCompileToProg, Simple) { |
+ int failed = 0; |
+ for (int i = 0; i < arraysize(tests); i++) { |
+ const re2::Test& t = tests[i]; |
+ Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL); |
+ if (re == NULL) { |
+ LOG(ERROR) << "Cannot parse: " << t.regexp; |
+ failed++; |
+ continue; |
+ } |
+ Prog* prog = re->CompileToProg(0); |
+ if (prog == NULL) { |
+ LOG(ERROR) << "Cannot compile: " << t.regexp; |
+ re->Decref(); |
+ failed++; |
+ continue; |
+ } |
+ CHECK(re->CompileToProg(1) == NULL); |
+ string s = prog->Dump(); |
+ if (s != t.code) { |
+ LOG(ERROR) << "Incorrect compiled code for: " << t.regexp; |
+ LOG(ERROR) << "Want:\n" << t.code; |
+ LOG(ERROR) << "Got:\n" << s; |
+ failed++; |
+ } |
+ delete prog; |
+ re->Decref(); |
+ } |
+ EXPECT_EQ(failed, 0); |
+} |
+ |
+// The distinct byte ranges involved in the UTF-8 dot ([^\n]). |
+// Once, erroneously split between 0x3f and 0x40 because it is |
+// a 6-bit boundary. |
+static struct UTF8ByteRange { |
+ int lo; |
+ int hi; |
+} utf8ranges[] = { |
+ { 0x00, 0x09 }, |
+ { 0x0A, 0x0A }, |
+ { 0x10, 0x7F }, |
+ { 0x80, 0x8F }, |
+ { 0x90, 0x9F }, |
+ { 0xA0, 0xBF }, |
+ { 0xC0, 0xC1 }, |
+ { 0xC2, 0xDF }, |
+ { 0xE0, 0xE0 }, |
+ { 0xE1, 0xEF }, |
+ { 0xF0, 0xF0 }, |
+ { 0xF1, 0xF3 }, |
+ { 0xF4, 0xF4 }, |
+ { 0xF5, 0xFF }, |
+}; |
+ |
+TEST(TestCompile, ByteRanges) { |
+ Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL); |
+ EXPECT_TRUE(re != NULL); |
+ Prog* prog = re->CompileToProg(0); |
+ EXPECT_TRUE(prog != NULL); |
+ EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges)); |
+ for (int i = 0; i < arraysize(utf8ranges); i++) |
+ for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++) |
+ EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j; |
+ delete prog; |
+ re->Decref(); |
+} |
+ |
+} // namespace re2 |