Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(195)

Unified Diff: third_party/re2/re2/testing/compile_test.cc

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/testing/charclass_test.cc ('k') | third_party/re2/re2/testing/dfa_test.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/testing/compile_test.cc
diff --git a/third_party/re2/re2/testing/compile_test.cc b/third_party/re2/re2/testing/compile_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8d92105e21501ea9003c4ab2bdb2bda6a94884a8
--- /dev/null
+++ b/third_party/re2/re2/testing/compile_test.cc
@@ -0,0 +1,171 @@
+// Copyright 2007 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test prog.cc, compile.cc
+
+#include <string>
+#include <vector>
+#include "util/test.h"
+#include "re2/regexp.h"
+#include "re2/prog.h"
+
+DEFINE_string(show, "", "regular expression to compile and dump");
+
+namespace re2 {
+
+// Simple input/output tests checking that
+// the regexp compiles to the expected code.
+// These are just to sanity check the basic implementation.
+// The real confidence tests happen by testing the NFA/DFA
+// that run the compiled code.
+
+struct Test {
+ const char* regexp;
+ const char* code;
+};
+
+static Test tests[] = {
+ { "a",
+ "1. byte [61-61] -> 2\n"
+ "2. match! 0\n" },
+ { "ab",
+ "1. byte [61-61] -> 2\n"
+ "2. byte [62-62] -> 3\n"
+ "3. match! 0\n" },
+ { "a|c",
+ "3. alt -> 1 | 2\n"
+ "1. byte [61-61] -> 4\n"
+ "2. byte [63-63] -> 4\n"
+ "4. match! 0\n" },
+ { "a|b",
+ "1. byte [61-62] -> 2\n"
+ "2. match! 0\n" },
+ { "[ab]",
+ "1. byte [61-62] -> 2\n"
+ "2. match! 0\n" },
+ { "a+",
+ "1. byte [61-61] -> 2\n"
+ "2. alt -> 1 | 3\n"
+ "3. match! 0\n" },
+ { "a+?",
+ "1. byte [61-61] -> 2\n"
+ "2. alt -> 3 | 1\n"
+ "3. match! 0\n" },
+ { "a*",
+ "2. alt -> 1 | 3\n"
+ "1. byte [61-61] -> 2\n"
+ "3. match! 0\n" },
+ { "a*?",
+ "2. alt -> 3 | 1\n"
+ "3. match! 0\n"
+ "1. byte [61-61] -> 2\n" },
+ { "a?",
+ "2. alt -> 1 | 3\n"
+ "1. byte [61-61] -> 3\n"
+ "3. match! 0\n" },
+ { "a??",
+ "2. alt -> 3 | 1\n"
+ "3. match! 0\n"
+ "1. byte [61-61] -> 3\n" },
+ { "a{4}",
+ "1. byte [61-61] -> 2\n"
+ "2. byte [61-61] -> 3\n"
+ "3. byte [61-61] -> 4\n"
+ "4. byte [61-61] -> 5\n"
+ "5. match! 0\n" },
+ { "(a)",
+ "2. capture 2 -> 1\n"
+ "1. byte [61-61] -> 3\n"
+ "3. capture 3 -> 4\n"
+ "4. match! 0\n" },
+ { "(?:a)",
+ "1. byte [61-61] -> 2\n"
+ "2. match! 0\n" },
+ { "",
+ "2. match! 0\n" },
+ { ".",
+ "3. alt -> 1 | 2\n"
+ "1. byte [00-09] -> 4\n"
+ "2. byte [0b-ff] -> 4\n"
+ "4. match! 0\n" },
+ { "[^ab]",
+ "5. alt -> 3 | 4\n"
+ "3. alt -> 1 | 2\n"
+ "4. byte [63-ff] -> 6\n"
+ "1. byte [00-09] -> 6\n"
+ "2. byte [0b-60] -> 6\n"
+ "6. match! 0\n" },
+ { "[Aa]",
+ "1. byte/i [61-61] -> 2\n"
+ "2. match! 0\n" },
+};
+
+TEST(TestRegexpCompileToProg, Simple) {
+ int failed = 0;
+ for (int i = 0; i < arraysize(tests); i++) {
+ const re2::Test& t = tests[i];
+ Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
+ if (re == NULL) {
+ LOG(ERROR) << "Cannot parse: " << t.regexp;
+ failed++;
+ continue;
+ }
+ Prog* prog = re->CompileToProg(0);
+ if (prog == NULL) {
+ LOG(ERROR) << "Cannot compile: " << t.regexp;
+ re->Decref();
+ failed++;
+ continue;
+ }
+ CHECK(re->CompileToProg(1) == NULL);
+ string s = prog->Dump();
+ if (s != t.code) {
+ LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
+ LOG(ERROR) << "Want:\n" << t.code;
+ LOG(ERROR) << "Got:\n" << s;
+ failed++;
+ }
+ delete prog;
+ re->Decref();
+ }
+ EXPECT_EQ(failed, 0);
+}
+
+// The distinct byte ranges involved in the UTF-8 dot ([^\n]).
+// Once, erroneously split between 0x3f and 0x40 because it is
+// a 6-bit boundary.
+static struct UTF8ByteRange {
+ int lo;
+ int hi;
+} utf8ranges[] = {
+ { 0x00, 0x09 },
+ { 0x0A, 0x0A },
+ { 0x10, 0x7F },
+ { 0x80, 0x8F },
+ { 0x90, 0x9F },
+ { 0xA0, 0xBF },
+ { 0xC0, 0xC1 },
+ { 0xC2, 0xDF },
+ { 0xE0, 0xE0 },
+ { 0xE1, 0xEF },
+ { 0xF0, 0xF0 },
+ { 0xF1, 0xF3 },
+ { 0xF4, 0xF4 },
+ { 0xF5, 0xFF },
+};
+
+TEST(TestCompile, ByteRanges) {
+ Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL);
+ EXPECT_TRUE(re != NULL);
+ Prog* prog = re->CompileToProg(0);
+ EXPECT_TRUE(prog != NULL);
+ EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges));
+ for (int i = 0; i < arraysize(utf8ranges); i++)
+ for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++)
+ EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j;
+ delete prog;
+ re->Decref();
+}
+
+} // namespace re2
« no previous file with comments | « third_party/re2/re2/testing/charclass_test.cc ('k') | third_party/re2/re2/testing/dfa_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698