| Index: third_party/cython/src/Cython/Plex/Traditional.py
|
| diff --git a/third_party/cython/src/Cython/Plex/Traditional.py b/third_party/cython/src/Cython/Plex/Traditional.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..6d3e48fa4a2ed8f4240327222d2e415dba032024
|
| --- /dev/null
|
| +++ b/third_party/cython/src/Cython/Plex/Traditional.py
|
| @@ -0,0 +1,154 @@
|
| +#=======================================================================
|
| +#
|
| +# Python Lexical Analyser
|
| +#
|
| +# Traditional Regular Expression Syntax
|
| +#
|
| +#=======================================================================
|
| +
|
| +from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
|
| +from Errors import PlexError
|
| +
|
| +class RegexpSyntaxError(PlexError):
|
| + pass
|
| +
|
| +def re(s):
|
| + """
|
| + Convert traditional string representation of regular expression |s|
|
| + into Plex representation.
|
| + """
|
| + return REParser(s).parse_re()
|
| +
|
| +class REParser(object):
|
| +
|
| + def __init__(self, s):
|
| + self.s = s
|
| + self.i = -1
|
| + self.end = 0
|
| + self.next()
|
| +
|
| + def parse_re(self):
|
| + re = self.parse_alt()
|
| + if not self.end:
|
| + self.error("Unexpected %s" % repr(self.c))
|
| + return re
|
| +
|
| + def parse_alt(self):
|
| + """Parse a set of alternative regexps."""
|
| + re = self.parse_seq()
|
| + if self.c == '|':
|
| + re_list = [re]
|
| + while self.c == '|':
|
| + self.next()
|
| + re_list.append(self.parse_seq())
|
| + re = Alt(*re_list)
|
| + return re
|
| +
|
| + def parse_seq(self):
|
| + """Parse a sequence of regexps."""
|
| + re_list = []
|
| + while not self.end and not self.c in "|)":
|
| + re_list.append(self.parse_mod())
|
| + return Seq(*re_list)
|
| +
|
| + def parse_mod(self):
|
| + """Parse a primitive regexp followed by *, +, ? modifiers."""
|
| + re = self.parse_prim()
|
| + while not self.end and self.c in "*+?":
|
| + if self.c == '*':
|
| + re = Rep(re)
|
| + elif self.c == '+':
|
| + re = Rep1(re)
|
| + else: # self.c == '?'
|
| + re = Opt(re)
|
| + self.next()
|
| + return re
|
| +
|
| + def parse_prim(self):
|
| + """Parse a primitive regexp."""
|
| + c = self.get()
|
| + if c == '.':
|
| + re = AnyBut("\n")
|
| + elif c == '^':
|
| + re = Bol
|
| + elif c == '$':
|
| + re = Eol
|
| + elif c == '(':
|
| + re = self.parse_alt()
|
| + self.expect(')')
|
| + elif c == '[':
|
| + re = self.parse_charset()
|
| + self.expect(']')
|
| + else:
|
| + if c == '\\':
|
| + c = self.get()
|
| + re = Char(c)
|
| + return re
|
| +
|
| + def parse_charset(self):
|
| + """Parse a charset. Does not include the surrounding []."""
|
| + char_list = []
|
| + invert = 0
|
| + if self.c == '^':
|
| + invert = 1
|
| + self.next()
|
| + if self.c == ']':
|
| + char_list.append(']')
|
| + self.next()
|
| + while not self.end and self.c != ']':
|
| + c1 = self.get()
|
| + if self.c == '-' and self.lookahead(1) != ']':
|
| + self.next()
|
| + c2 = self.get()
|
| + for a in xrange(ord(c1), ord(c2) + 1):
|
| + char_list.append(chr(a))
|
| + else:
|
| + char_list.append(c1)
|
| + chars = ''.join(char_list)
|
| + if invert:
|
| + return AnyBut(chars)
|
| + else:
|
| + return Any(chars)
|
| +
|
| + def next(self):
|
| + """Advance to the next char."""
|
| + s = self.s
|
| + i = self.i = self.i + 1
|
| + if i < len(s):
|
| + self.c = s[i]
|
| + else:
|
| + self.c = ''
|
| + self.end = 1
|
| +
|
| + def get(self):
|
| + if self.end:
|
| + self.error("Premature end of string")
|
| + c = self.c
|
| + self.next()
|
| + return c
|
| +
|
| + def lookahead(self, n):
|
| + """Look ahead n chars."""
|
| + j = self.i + n
|
| + if j < len(self.s):
|
| + return self.s[j]
|
| + else:
|
| + return ''
|
| +
|
| + def expect(self, c):
|
| + """
|
| + Expect to find character |c| at current position.
|
| + Raises an exception otherwise.
|
| + """
|
| + if self.c == c:
|
| + self.next()
|
| + else:
|
| + self.error("Missing %s" % repr(c))
|
| +
|
| + def error(self, mess):
|
| + """Raise exception to signal syntax error in regexp."""
|
| + raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
|
| + repr(self.s), self.i, mess))
|
| +
|
| +
|
| +
|
|
|