Index: third_party/cython/src/Cython/Plex/Traditional.py |
diff --git a/third_party/cython/src/Cython/Plex/Traditional.py b/third_party/cython/src/Cython/Plex/Traditional.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..6d3e48fa4a2ed8f4240327222d2e415dba032024 |
--- /dev/null |
+++ b/third_party/cython/src/Cython/Plex/Traditional.py |
@@ -0,0 +1,154 @@ |
+#======================================================================= |
+# |
+# Python Lexical Analyser |
+# |
+# Traditional Regular Expression Syntax |
+# |
+#======================================================================= |
+ |
+from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char |
+from Errors import PlexError |
+ |
+class RegexpSyntaxError(PlexError): |
+ pass |
+ |
+def re(s): |
+ """ |
+ Convert traditional string representation of regular expression |s| |
+ into Plex representation. |
+ """ |
+ return REParser(s).parse_re() |
+ |
+class REParser(object): |
+ |
+ def __init__(self, s): |
+ self.s = s |
+ self.i = -1 |
+ self.end = 0 |
+ self.next() |
+ |
+ def parse_re(self): |
+ re = self.parse_alt() |
+ if not self.end: |
+ self.error("Unexpected %s" % repr(self.c)) |
+ return re |
+ |
+ def parse_alt(self): |
+ """Parse a set of alternative regexps.""" |
+ re = self.parse_seq() |
+ if self.c == '|': |
+ re_list = [re] |
+ while self.c == '|': |
+ self.next() |
+ re_list.append(self.parse_seq()) |
+ re = Alt(*re_list) |
+ return re |
+ |
+ def parse_seq(self): |
+ """Parse a sequence of regexps.""" |
+ re_list = [] |
+ while not self.end and not self.c in "|)": |
+ re_list.append(self.parse_mod()) |
+ return Seq(*re_list) |
+ |
+ def parse_mod(self): |
+ """Parse a primitive regexp followed by *, +, ? modifiers.""" |
+ re = self.parse_prim() |
+ while not self.end and self.c in "*+?": |
+ if self.c == '*': |
+ re = Rep(re) |
+ elif self.c == '+': |
+ re = Rep1(re) |
+ else: # self.c == '?' |
+ re = Opt(re) |
+ self.next() |
+ return re |
+ |
+ def parse_prim(self): |
+ """Parse a primitive regexp.""" |
+ c = self.get() |
+ if c == '.': |
+ re = AnyBut("\n") |
+ elif c == '^': |
+ re = Bol |
+ elif c == '$': |
+ re = Eol |
+ elif c == '(': |
+ re = self.parse_alt() |
+ self.expect(')') |
+ elif c == '[': |
+ re = self.parse_charset() |
+ self.expect(']') |
+ else: |
+ if c == '\\': |
+ c = self.get() |
+ re = Char(c) |
+ return re |
+ |
+ def parse_charset(self): |
+ """Parse a charset. Does not include the surrounding [].""" |
+ char_list = [] |
+ invert = 0 |
+ if self.c == '^': |
+ invert = 1 |
+ self.next() |
+ if self.c == ']': |
+ char_list.append(']') |
+ self.next() |
+ while not self.end and self.c != ']': |
+ c1 = self.get() |
+ if self.c == '-' and self.lookahead(1) != ']': |
+ self.next() |
+ c2 = self.get() |
+ for a in xrange(ord(c1), ord(c2) + 1): |
+ char_list.append(chr(a)) |
+ else: |
+ char_list.append(c1) |
+ chars = ''.join(char_list) |
+ if invert: |
+ return AnyBut(chars) |
+ else: |
+ return Any(chars) |
+ |
+ def next(self): |
+ """Advance to the next char.""" |
+ s = self.s |
+ i = self.i = self.i + 1 |
+ if i < len(s): |
+ self.c = s[i] |
+ else: |
+ self.c = '' |
+ self.end = 1 |
+ |
+ def get(self): |
+ if self.end: |
+ self.error("Premature end of string") |
+ c = self.c |
+ self.next() |
+ return c |
+ |
+ def lookahead(self, n): |
+ """Look ahead n chars.""" |
+ j = self.i + n |
+ if j < len(self.s): |
+ return self.s[j] |
+ else: |
+ return '' |
+ |
+ def expect(self, c): |
+ """ |
+ Expect to find character |c| at current position. |
+ Raises an exception otherwise. |
+ """ |
+ if self.c == c: |
+ self.next() |
+ else: |
+ self.error("Missing %s" % repr(c)) |
+ |
+ def error(self, mess): |
+ """Raise exception to signal syntax error in regexp.""" |
+ raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( |
+ repr(self.s), self.i, mess)) |
+ |
+ |
+ |