Index: third_party/re2/re2/tostring.cc |
diff --git a/third_party/re2/re2/tostring.cc b/third_party/re2/re2/tostring.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..555524f291bca34a4a3e6dba0a29c3f9a98e8a6a |
--- /dev/null |
+++ b/third_party/re2/re2/tostring.cc |
@@ -0,0 +1,341 @@ |
+// Copyright 2006 The RE2 Authors. All Rights Reserved. |
+// Use of this source code is governed by a BSD-style |
+// license that can be found in the LICENSE file. |
+ |
+// Format a regular expression structure as a string. |
+// Tested by parse_test.cc |
+ |
+#include "util/util.h" |
+#include "re2/regexp.h" |
+#include "re2/walker-inl.h" |
+ |
+namespace re2 { |
+ |
+enum { |
+ PrecAtom, |
+ PrecUnary, |
+ PrecConcat, |
+ PrecAlternate, |
+ PrecEmpty, |
+ PrecParen, |
+ PrecToplevel, |
+}; |
+ |
+// Helper function. See description below. |
+static void AppendCCRange(string* t, Rune lo, Rune hi); |
+ |
+// Walker to generate string in s_. |
+// The arg pointers are actually integers giving the |
+// context precedence. |
+// The child_args are always NULL. |
+class ToStringWalker : public Regexp::Walker<int> { |
+ public: |
+ explicit ToStringWalker(string* t) : t_(t) {} |
+ |
+ virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); |
+ virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, |
+ int* child_args, int nchild_args); |
+ virtual int ShortVisit(Regexp* re, int parent_arg) { |
+ return 0; |
+ } |
+ |
+ private: |
+ string* t_; // The string the walker appends to. |
+ |
+ DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker); |
+}; |
+ |
+string Regexp::ToString() { |
+ string t; |
+ ToStringWalker w(&t); |
+ w.WalkExponential(this, PrecToplevel, 100000); |
+ if (w.stopped_early()) |
+ t += " [truncated]"; |
+ return t; |
+} |
+ |
+#define ToString DontCallToString // Avoid accidental recursion. |
+ |
+// Visits re before children are processed. |
+// Appends ( if needed and passes new precedence to children. |
+int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { |
+ int prec = parent_arg; |
+ int nprec = PrecAtom; |
+ |
+ switch (re->op()) { |
+ case kRegexpNoMatch: |
+ case kRegexpEmptyMatch: |
+ case kRegexpLiteral: |
+ case kRegexpAnyChar: |
+ case kRegexpAnyByte: |
+ case kRegexpBeginLine: |
+ case kRegexpEndLine: |
+ case kRegexpBeginText: |
+ case kRegexpEndText: |
+ case kRegexpWordBoundary: |
+ case kRegexpNoWordBoundary: |
+ case kRegexpCharClass: |
+ case kRegexpHaveMatch: |
+ nprec = PrecAtom; |
+ break; |
+ |
+ case kRegexpConcat: |
+ case kRegexpLiteralString: |
+ if (prec < PrecConcat) |
+ t_->append("(?:"); |
+ nprec = PrecConcat; |
+ break; |
+ |
+ case kRegexpAlternate: |
+ if (prec < PrecAlternate) |
+ t_->append("(?:"); |
+ nprec = PrecAlternate; |
+ break; |
+ |
+ case kRegexpCapture: |
+ t_->append("("); |
+ if (re->name()) { |
+ t_->append("?P<"); |
+ t_->append(*re->name()); |
+ t_->append(">"); |
+ } |
+ nprec = PrecParen; |
+ break; |
+ |
+ case kRegexpStar: |
+ case kRegexpPlus: |
+ case kRegexpQuest: |
+ case kRegexpRepeat: |
+ if (prec < PrecUnary) |
+ t_->append("(?:"); |
+ // The subprecedence here is PrecAtom instead of PrecUnary |
+ // because PCRE treats two unary ops in a row as a parse error. |
+ nprec = PrecAtom; |
+ break; |
+ } |
+ |
+ return nprec; |
+} |
+ |
+static void AppendLiteral(string *t, Rune r, bool foldcase) { |
+ if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { |
+ t->append(1, '\\'); |
+ t->append(1, r); |
+ } else if (foldcase && 'a' <= r && r <= 'z') { |
+ if ('a' <= r && r <= 'z') |
+ r += 'A' - 'a'; |
+ t->append(1, '['); |
+ t->append(1, r); |
+ t->append(1, r + 'a' - 'A'); |
+ t->append(1, ']'); |
+ } else { |
+ AppendCCRange(t, r, r); |
+ } |
+} |
+ |
+// Visits re after children are processed. |
+// For childless regexps, all the work is done here. |
+// For regexps with children, append any unary suffixes or ). |
+int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, |
+ int* child_args, int nchild_args) { |
+ int prec = parent_arg; |
+ switch (re->op()) { |
+ case kRegexpNoMatch: |
+ // There's no simple symbol for "no match", but |
+ // [^0-Runemax] excludes everything. |
+ t_->append("[^\\x00-\\x{10ffff}]"); |
+ break; |
+ |
+ case kRegexpEmptyMatch: |
+ // Append (?:) to make empty string visible, |
+ // unless this is already being parenthesized. |
+ if (prec < PrecEmpty) |
+ t_->append("(?:)"); |
+ break; |
+ |
+ case kRegexpLiteral: |
+ AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); |
+ break; |
+ |
+ case kRegexpLiteralString: |
+ for (int i = 0; i < re->nrunes(); i++) |
+ AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); |
+ if (prec < PrecConcat) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpConcat: |
+ if (prec < PrecConcat) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpAlternate: |
+ // Clumsy but workable: the children all appended | |
+ // at the end of their strings, so just remove the last one. |
+ if ((*t_)[t_->size()-1] == '|') |
+ t_->erase(t_->size()-1); |
+ else |
+ LOG(DFATAL) << "Bad final char: " << t_; |
+ if (prec < PrecAlternate) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpStar: |
+ t_->append("*"); |
+ if (re->parse_flags() & Regexp::NonGreedy) |
+ t_->append("?"); |
+ if (prec < PrecUnary) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpPlus: |
+ t_->append("+"); |
+ if (re->parse_flags() & Regexp::NonGreedy) |
+ t_->append("?"); |
+ if (prec < PrecUnary) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpQuest: |
+ t_->append("?"); |
+ if (re->parse_flags() & Regexp::NonGreedy) |
+ t_->append("?"); |
+ if (prec < PrecUnary) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpRepeat: |
+ if (re->max() == -1) |
+ t_->append(StringPrintf("{%d,}", re->min())); |
+ else if (re->min() == re->max()) |
+ t_->append(StringPrintf("{%d}", re->min())); |
+ else |
+ t_->append(StringPrintf("{%d,%d}", re->min(), re->max())); |
+ if (re->parse_flags() & Regexp::NonGreedy) |
+ t_->append("?"); |
+ if (prec < PrecUnary) |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpAnyChar: |
+ t_->append("."); |
+ break; |
+ |
+ case kRegexpAnyByte: |
+ t_->append("\\C"); |
+ break; |
+ |
+ case kRegexpBeginLine: |
+ t_->append("^"); |
+ break; |
+ |
+ case kRegexpEndLine: |
+ t_->append("$"); |
+ break; |
+ |
+ case kRegexpBeginText: |
+ t_->append("(?-m:^)"); |
+ break; |
+ |
+ case kRegexpEndText: |
+ if (re->parse_flags() & Regexp::WasDollar) |
+ t_->append("(?-m:$)"); |
+ else |
+ t_->append("\\z"); |
+ break; |
+ |
+ case kRegexpWordBoundary: |
+ t_->append("\\b"); |
+ break; |
+ |
+ case kRegexpNoWordBoundary: |
+ t_->append("\\B"); |
+ break; |
+ |
+ case kRegexpCharClass: { |
+ if (re->cc()->size() == 0) { |
+ t_->append("[^\\x00-\\x{10ffff}]"); |
+ break; |
+ } |
+ t_->append("["); |
+ // Heuristic: show class as negated if it contains the |
+ // non-character 0xFFFE. |
+ CharClass* cc = re->cc(); |
+ if (cc->Contains(0xFFFE)) { |
+ cc = cc->Negate(); |
+ t_->append("^"); |
+ } |
+ for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) |
+ AppendCCRange(t_, i->lo, i->hi); |
+ if (cc != re->cc()) |
+ cc->Delete(); |
+ t_->append("]"); |
+ break; |
+ } |
+ |
+ case kRegexpCapture: |
+ t_->append(")"); |
+ break; |
+ |
+ case kRegexpHaveMatch: |
+ // There's no syntax accepted by the parser to generate |
+ // this node (it is generated by RE2::Set) so make something |
+ // up that is readable but won't compile. |
+ t_->append("(?HaveMatch:%d)", re->match_id()); |
+ break; |
+ } |
+ |
+ // If the parent is an alternation, append the | for it. |
+ if (prec == PrecAlternate) |
+ t_->append("|"); |
+ |
+ return 0; |
+} |
+ |
+// Appends a rune for use in a character class to the string t. |
+static void AppendCCChar(string* t, Rune r) { |
+ if (0x20 <= r && r <= 0x7E) { |
+ if (strchr("[]^-\\", r)) |
+ t->append("\\"); |
+ t->append(1, r); |
+ return; |
+ } |
+ switch (r) { |
+ default: |
+ break; |
+ |
+ case '\r': |
+ t->append("\\r"); |
+ return; |
+ |
+ case '\t': |
+ t->append("\\t"); |
+ return; |
+ |
+ case '\n': |
+ t->append("\\n"); |
+ return; |
+ |
+ case '\f': |
+ t->append("\\f"); |
+ return; |
+ } |
+ |
+ if (r < 0x100) { |
+ StringAppendF(t, "\\x%02x", static_cast<int>(r)); |
+ return; |
+ } |
+ StringAppendF(t, "\\x{%x}", static_cast<int>(r)); |
+} |
+ |
+static void AppendCCRange(string* t, Rune lo, Rune hi) { |
+ if (lo > hi) |
+ return; |
+ AppendCCChar(t, lo); |
+ if (lo < hi) { |
+ t->append("-"); |
+ AppendCCChar(t, hi); |
+ } |
+} |
+ |
+} // namespace re2 |