third_party/re2/re2/testing/simplify_test.cc - Issue 10575037: Include RE2 library

Unified Diff: third_party/re2/re2/testing/simplify_test.cc

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Less intrusive fix for Android Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/re2/re2/testing/simplify_test.cc

diff --git a/third_party/re2/re2/testing/simplify_test.cc b/third_party/re2/re2/testing/simplify_test.cc

new file mode 100644

index 0000000000000000000000000000000000000000..d54837c950723c4f0b16ba9420e3b126f10a00d6

--- /dev/null

+++ b/third_party/re2/re2/testing/simplify_test.cc

@@ -0,0 +1,167 @@

+// Use of this source code is governed by a BSD-style

+// license that can be found in the LICENSE file.

+// Test simplify.cc.

+#include <string>

+#include <vector>

+#include "util/test.h"

+#include "re2/regexp.h"

+namespace re2 {

+struct Test {

+ const char* regexp;

+ const char* simplified;

+};

+static Test tests[] = {

+ // Already-simple constructs

+ { "a", "a" },

+ { "ab", "ab" },

+ { "a|b", "[a-b]" },

+ { "ab|cd", "ab|cd" },

+ { "(ab)*", "(ab)*" },

+ { "(ab)+", "(ab)+" },

+ { "(ab)?", "(ab)?" },

+ { ".", "." },

+ { "^", "^" },

+ { "$", "$" },

+ { "[ac]", "[ac]" },

+ { "[^ac]", "[^ac]" },

+ // Posix character classes

+ { "[[:alnum:]]", "[0-9A-Za-z]" },

+ { "[[:alpha:]]", "[A-Za-z]" },

+ { "[[:blank:]]", "[\\t ]" },

+ { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },

+ { "[[:digit:]]", "[0-9]" },

+ { "[[:graph:]]", "[!-~]" },

+ { "[[:lower:]]", "[a-z]" },

+ { "[[:print:]]", "[ -~]" },

+ { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },

+ { "[[:space:]]" , "[\\t-\\r ]" },

+ { "[[:upper:]]", "[A-Z]" },

+ { "[[:xdigit:]]", "[0-9A-Fa-f]" },

+ // Perl character classes

+ { "\\d", "[0-9]" },

+ { "\\s", "[\\t-\\n\\f-\\r ]" },

+ { "\\w", "[0-9A-Z_a-z]" },

+ { "\\D", "[^0-9]" },

+ { "\\S", "[^\\t-\\n\\f-\\r ]" },

+ { "\\W", "[^0-9A-Z_a-z]" },

+ { "[\\d]", "[0-9]" },

+ { "[\\s]", "[\\t-\\n\\f-\\r ]" },

+ { "[\\w]", "[0-9A-Z_a-z]" },

+ { "[\\D]", "[^0-9]" },

+ { "[\\S]", "[^\\t-\\n\\f-\\r ]" },

+ { "[\\W]", "[^0-9A-Z_a-z]" },

+ // Posix repetitions

+ { "a{1}", "a" },

+ { "a{2}", "aa" },

+ { "a{5}", "aaaaa" },

+ { "a{0,1}", "a?" },

+ // The next three are illegible because Simplify inserts (?:)

+ // parens instead of () parens to avoid creating extra

+ // captured subexpressions. The comments show a version fewer parens.

+ { "(a){0,2}", "(?:(a)(a)?)?" }, // (aa?)?

+ { "(a){0,4}", "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // (a(a(aa?)?)?)?

+ { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" }, // aa(a(a(aa?)?)?)?

+ { "a{0,2}", "(?:aa?)?" }, // (aa?)?

+ { "a{0,4}", "(?:a(?:a(?:aa?)?)?)?" }, // (a(a(aa?)?)?)?

+ { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" }, // aa(a(a(aa?)?)?)?

+ { "a{0,}", "a*" },

+ { "a{1,}", "a+" },

+ { "a{2,}", "aa+" },

+ { "a{5,}", "aaaaa+" },

+ // Test that operators simplify their arguments.

+ // (Simplify used to not simplify arguments to a {} repeat.)

+ { "(?:a{1,}){1,}", "a+" },

+ { "(a{1,}b{1,})", "(a+b+)" },

+ { "a{1,}|b{1,}", "a+|b+" },

+ { "(?:a{1,})*", "(?:a+)*" },

+ { "(?:a{1,})+", "a+" },

+ { "(?:a{1,})?", "(?:a+)?" },

+ { "a{0}", "" },

+ // Character class simplification

+ { "[ab]", "[a-b]" },

+ { "[a-za-za-z]", "[a-z]" },

+ { "[A-Za-zA-Za-z]", "[A-Za-z]" },

+ { "[ABCDEFGH]", "[A-H]" },

+ { "[AB-CD-EF-GH]", "[A-H]" },

+ { "[W-ZP-XE-R]", "[E-Z]" },

+ { "[a-ee-gg-m]", "[a-m]" },

+ { "[a-ea-ha-m]", "[a-m]" },

+ { "[a-ma-ha-e]", "[a-m]" },

+ { "[a-zA-Z0-9 -~]", "[ -~]" },

+ // Empty character classes

+ { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },

+ // Full character classes

+ { "[[:cntrl:][:^cntrl:]]", "." },

+ // Unicode case folding.

+ { "(?i)A", "[Aa]" },

+ { "(?i)a", "[Aa]" },

+ { "(?i)K", "[Kk\\x{212a}]" },

+ { "(?i)k", "[Kk\\x{212a}]" },

+ { "(?i)\\x{212a}", "[Kk\\x{212a}]" },

+ { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },

+ { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },

+ { "(?i)[\\x00-\\x{10ffff}]", "." },

+ // Empty string as a regular expression.

+ // Empty string must be preserved inside parens in order

+ // to make submatches work right, so these are less

+ // interesting than they used to be. ToString inserts

+ // explicit (?:) in place of non-parenthesized empty strings,

+ // to make them easier to spot for other parsers.

+ { "(a|b|)", "([a-b]|(?:))" },

+ { "(|)", "()" },

+ { "a()", "a()" },

+ { "(()|())", "(()|())" },

+ { "(a|)", "(a|(?:))" },

+ { "ab()cd()", "ab()cd()" },

+ { "()", "()" },

+ { "()*", "()*" },

+ { "()+", "()+" },

+ { "()?" , "()?" },

+ { "(){0}", "" },

+ { "(){1}", "()" },

+ { "(){1,}", "()+" },

+ { "(){0,2}", "(?:()()?)?" },

+};

+TEST(TestSimplify, SimpleRegexps) {

+ for (int i = 0; i < arraysize(tests); i++) {

+ RegexpStatus status;

+ VLOG(1) << "Testing " << tests[i].regexp;

+ Regexp* re = Regexp::Parse(tests[i].regexp,

+ Regexp::MatchNL | (Regexp::LikePerl &

+ ~Regexp::OneLine),

+ &status);

+ CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text();

+ Regexp* sre = re->Simplify();

+ CHECK(sre != NULL);

+ // Check that already-simple regexps don't allocate new ones.

+ if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {

+ CHECK(re == sre) << " " << tests[i].regexp

+ << " " << re->ToString() << " " << sre->ToString();

+ }

+ EXPECT_EQ(tests[i].simplified, sre->ToString())

+ << " " << tests[i].regexp << " " << sre->Dump();

+ re->Decref();

+ sre->Decref();

+ }

+} // namespace re2

« no previous file with comments | « third_party/re2/re2/testing/set_test.cc ('k') | third_party/re2/re2/testing/string_generator.h » ('j') | no next file with comments »