| Index: third_party/cython/src/Cython/Compiler/Scanning.py
|
| diff --git a/third_party/cython/src/Cython/Compiler/Scanning.py b/third_party/cython/src/Cython/Compiler/Scanning.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..93905e09792bdec62616bbf1f0252e524fd9a4ff
|
| --- /dev/null
|
| +++ b/third_party/cython/src/Cython/Compiler/Scanning.py
|
| @@ -0,0 +1,483 @@
|
| +# cython: infer_types=True, language_level=3, py2_import=True
|
| +#
|
| +# Cython Scanner
|
| +#
|
| +
|
| +import os
|
| +import platform
|
| +
|
| +import cython
|
| +cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
|
| + print_function=object)
|
| +
|
| +from Cython import Utils
|
| +from Cython.Plex.Scanners import Scanner
|
| +from Cython.Plex.Errors import UnrecognizedInput
|
| +from Errors import error
|
| +from Lexicon import any_string_prefix, make_lexicon, IDENT
|
| +from Future import print_function
|
| +
|
| +from StringEncoding import EncodedString
|
| +
|
| +debug_scanner = 0
|
| +trace_scanner = 0
|
| +scanner_debug_flags = 0
|
| +scanner_dump_file = None
|
| +
|
| +lexicon = None
|
| +
|
| +def get_lexicon():
|
| + global lexicon
|
| + if not lexicon:
|
| + lexicon = make_lexicon()
|
| + return lexicon
|
| +
|
| +#------------------------------------------------------------------
|
| +
|
| +py_reserved_words = [
|
| + "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
|
| + "continue", "return", "raise", "import", "exec", "try",
|
| + "except", "finally", "while", "if", "elif", "else", "for",
|
| + "in", "assert", "and", "or", "not", "is", "in", "lambda",
|
| + "from", "yield", "with", "nonlocal",
|
| +]
|
| +
|
| +pyx_reserved_words = py_reserved_words + [
|
| + "include", "ctypedef", "cdef", "cpdef",
|
| + "cimport", "DEF", "IF", "ELIF", "ELSE"
|
| +]
|
| +
|
| +class Method(object):
|
| +
|
| + def __init__(self, name):
|
| + self.name = name
|
| + self.__name__ = name # for Plex tracing
|
| +
|
| + def __call__(self, stream, text):
|
| + return getattr(stream, self.name)(text)
|
| +
|
| +#------------------------------------------------------------------
|
| +
|
| +class CompileTimeScope(object):
|
| +
|
| + def __init__(self, outer = None):
|
| + self.entries = {}
|
| + self.outer = outer
|
| +
|
| + def declare(self, name, value):
|
| + self.entries[name] = value
|
| +
|
| + def update(self, other):
|
| + self.entries.update(other)
|
| +
|
| + def lookup_here(self, name):
|
| + return self.entries[name]
|
| +
|
| + def __contains__(self, name):
|
| + return name in self.entries
|
| +
|
| + def lookup(self, name):
|
| + try:
|
| + return self.lookup_here(name)
|
| + except KeyError:
|
| + outer = self.outer
|
| + if outer:
|
| + return outer.lookup(name)
|
| + else:
|
| + raise
|
| +
|
| +def initial_compile_time_env():
|
| + benv = CompileTimeScope()
|
| + names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
|
| + 'UNAME_VERSION', 'UNAME_MACHINE')
|
| + for name, value in zip(names, platform.uname()):
|
| + benv.declare(name, value)
|
| + try:
|
| + import __builtin__ as builtins
|
| + except ImportError:
|
| + import builtins
|
| +
|
| + names = ('False', 'True',
|
| + 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
|
| + 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
|
| + 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
|
| + 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
|
| + 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
|
| + 'sum', 'tuple', 'xrange', 'zip')
|
| +
|
| + for name in names:
|
| + try:
|
| + benv.declare(name, getattr(builtins, name))
|
| + except AttributeError:
|
| + # ignore, likely Py3
|
| + pass
|
| + denv = CompileTimeScope(benv)
|
| + return denv
|
| +
|
| +#------------------------------------------------------------------
|
| +
|
| +class SourceDescriptor(object):
|
| + """
|
| + A SourceDescriptor should be considered immutable.
|
| + """
|
| + _file_type = 'pyx'
|
| +
|
| + _escaped_description = None
|
| + _cmp_name = ''
|
| + def __str__(self):
|
| + assert False # To catch all places where a descriptor is used directly as a filename
|
| +
|
| + def set_file_type_from_name(self, filename):
|
| + name, ext = os.path.splitext(filename)
|
| + self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
|
| +
|
| + def is_cython_file(self):
|
| + return self._file_type in ('pyx', 'pxd')
|
| +
|
| + def is_python_file(self):
|
| + return self._file_type == 'py'
|
| +
|
| + def get_escaped_description(self):
|
| + if self._escaped_description is None:
|
| + self._escaped_description = \
|
| + self.get_description().encode('ASCII', 'replace').decode("ASCII")
|
| + return self._escaped_description
|
| +
|
| + def __gt__(self, other):
|
| + # this is only used to provide some sort of order
|
| + try:
|
| + return self._cmp_name > other._cmp_name
|
| + except AttributeError:
|
| + return False
|
| +
|
| + def __lt__(self, other):
|
| + # this is only used to provide some sort of order
|
| + try:
|
| + return self._cmp_name < other._cmp_name
|
| + except AttributeError:
|
| + return False
|
| +
|
| + def __le__(self, other):
|
| + # this is only used to provide some sort of order
|
| + try:
|
| + return self._cmp_name <= other._cmp_name
|
| + except AttributeError:
|
| + return False
|
| +
|
| +class FileSourceDescriptor(SourceDescriptor):
|
| + """
|
| + Represents a code source. A code source is a more generic abstraction
|
| + for a "filename" (as sometimes the code doesn't come from a file).
|
| + Instances of code sources are passed to Scanner.__init__ as the
|
| + optional name argument and will be passed back when asking for
|
| + the position()-tuple.
|
| + """
|
| + def __init__(self, filename, path_description=None):
|
| + filename = Utils.decode_filename(filename)
|
| + self.path_description = path_description or filename
|
| + self.filename = filename
|
| + self.set_file_type_from_name(filename)
|
| + self._cmp_name = filename
|
| + self._lines = {}
|
| +
|
| + def get_lines(self, encoding=None, error_handling=None):
|
| + # we cache the lines only the second time this is called, in
|
| + # order to save memory when they are only used once
|
| + key = (encoding, error_handling)
|
| + try:
|
| + lines = self._lines[key]
|
| + if lines is not None:
|
| + return lines
|
| + except KeyError:
|
| + pass
|
| + f = Utils.open_source_file(
|
| + self.filename, encoding=encoding,
|
| + error_handling=error_handling,
|
| + # newline normalisation is costly before Py2.6
|
| + require_normalised_newlines=False)
|
| + try:
|
| + lines = list(f)
|
| + finally:
|
| + f.close()
|
| + if key in self._lines:
|
| + self._lines[key] = lines
|
| + else:
|
| + # do not cache the first access, but remember that we
|
| + # already read it once
|
| + self._lines[key] = None
|
| + return lines
|
| +
|
| + def get_description(self):
|
| + return self.path_description
|
| +
|
| + def get_error_description(self):
|
| + path = self.filename
|
| + cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
|
| + if path.startswith(cwd):
|
| + return path[len(cwd):]
|
| + return path
|
| +
|
| + def get_filenametable_entry(self):
|
| + return self.filename
|
| +
|
| + def __eq__(self, other):
|
| + return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
|
| +
|
| + def __hash__(self):
|
| + return hash(self.filename)
|
| +
|
| + def __repr__(self):
|
| + return "<FileSourceDescriptor:%s>" % self.filename
|
| +
|
| +class StringSourceDescriptor(SourceDescriptor):
|
| + """
|
| + Instances of this class can be used instead of a filenames if the
|
| + code originates from a string object.
|
| + """
|
| + filename = None
|
| +
|
| + def __init__(self, name, code):
|
| + self.name = name
|
| + #self.set_file_type_from_name(name)
|
| + self.codelines = [x + "\n" for x in code.split("\n")]
|
| + self._cmp_name = name
|
| +
|
| + def get_lines(self, encoding=None, error_handling=None):
|
| + if not encoding:
|
| + return self.codelines
|
| + else:
|
| + return [ line.encode(encoding, error_handling).decode(encoding)
|
| + for line in self.codelines ]
|
| +
|
| + def get_description(self):
|
| + return self.name
|
| +
|
| + get_error_description = get_description
|
| +
|
| + def get_filenametable_entry(self):
|
| + return "stringsource"
|
| +
|
| + def __hash__(self):
|
| + return id(self)
|
| + # Do not hash on the name, an identical string source should be the
|
| + # same object (name is often defaulted in other places)
|
| + # return hash(self.name)
|
| +
|
| + def __eq__(self, other):
|
| + return isinstance(other, StringSourceDescriptor) and self.name == other.name
|
| +
|
| + def __repr__(self):
|
| + return "<StringSourceDescriptor:%s>" % self.name
|
| +
|
| +#------------------------------------------------------------------
|
| +
|
| +class PyrexScanner(Scanner):
|
| + # context Context Compilation context
|
| + # included_files [string] Files included with 'include' statement
|
| + # compile_time_env dict Environment for conditional compilation
|
| + # compile_time_eval boolean In a true conditional compilation context
|
| + # compile_time_expr boolean In a compile-time expression context
|
| +
|
| + def __init__(self, file, filename, parent_scanner = None,
|
| + scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
|
| + Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
|
| + if parent_scanner:
|
| + self.context = parent_scanner.context
|
| + self.included_files = parent_scanner.included_files
|
| + self.compile_time_env = parent_scanner.compile_time_env
|
| + self.compile_time_eval = parent_scanner.compile_time_eval
|
| + self.compile_time_expr = parent_scanner.compile_time_expr
|
| + else:
|
| + self.context = context
|
| + self.included_files = scope.included_files
|
| + self.compile_time_env = initial_compile_time_env()
|
| + self.compile_time_eval = 1
|
| + self.compile_time_expr = 0
|
| + if hasattr(context.options, 'compile_time_env') and \
|
| + context.options.compile_time_env is not None:
|
| + self.compile_time_env.update(context.options.compile_time_env)
|
| + self.parse_comments = parse_comments
|
| + self.source_encoding = source_encoding
|
| + if filename.is_python_file():
|
| + self.in_python_file = True
|
| + self.keywords = set(py_reserved_words)
|
| + else:
|
| + self.in_python_file = False
|
| + self.keywords = set(pyx_reserved_words)
|
| + self.trace = trace_scanner
|
| + self.indentation_stack = [0]
|
| + self.indentation_char = None
|
| + self.bracket_nesting_level = 0
|
| + self.begin('INDENT')
|
| + self.sy = ''
|
| + self.next()
|
| +
|
| + def commentline(self, text):
|
| + if self.parse_comments:
|
| + self.produce('commentline', text)
|
| +
|
| + def current_level(self):
|
| + return self.indentation_stack[-1]
|
| +
|
| + def open_bracket_action(self, text):
|
| + self.bracket_nesting_level = self.bracket_nesting_level + 1
|
| + return text
|
| +
|
| + def close_bracket_action(self, text):
|
| + self.bracket_nesting_level = self.bracket_nesting_level - 1
|
| + return text
|
| +
|
| + def newline_action(self, text):
|
| + if self.bracket_nesting_level == 0:
|
| + self.begin('INDENT')
|
| + self.produce('NEWLINE', '')
|
| +
|
| + string_states = {
|
| + "'": 'SQ_STRING',
|
| + '"': 'DQ_STRING',
|
| + "'''": 'TSQ_STRING',
|
| + '"""': 'TDQ_STRING'
|
| + }
|
| +
|
| + def begin_string_action(self, text):
|
| + while text[:1] in any_string_prefix:
|
| + text = text[1:]
|
| + self.begin(self.string_states[text])
|
| + self.produce('BEGIN_STRING')
|
| +
|
| + def end_string_action(self, text):
|
| + self.begin('')
|
| + self.produce('END_STRING')
|
| +
|
| + def unclosed_string_action(self, text):
|
| + self.end_string_action(text)
|
| + self.error("Unclosed string literal")
|
| +
|
| + def indentation_action(self, text):
|
| + self.begin('')
|
| + # Indentation within brackets should be ignored.
|
| + #if self.bracket_nesting_level > 0:
|
| + # return
|
| + # Check that tabs and spaces are being used consistently.
|
| + if text:
|
| + c = text[0]
|
| + #print "Scanner.indentation_action: indent with", repr(c) ###
|
| + if self.indentation_char is None:
|
| + self.indentation_char = c
|
| + #print "Scanner.indentation_action: setting indent_char to", repr(c)
|
| + else:
|
| + if self.indentation_char != c:
|
| + self.error("Mixed use of tabs and spaces")
|
| + if text.replace(c, "") != "":
|
| + self.error("Mixed use of tabs and spaces")
|
| + # Figure out how many indents/dedents to do
|
| + current_level = self.current_level()
|
| + new_level = len(text)
|
| + #print "Changing indent level from", current_level, "to", new_level ###
|
| + if new_level == current_level:
|
| + return
|
| + elif new_level > current_level:
|
| + #print "...pushing level", new_level ###
|
| + self.indentation_stack.append(new_level)
|
| + self.produce('INDENT', '')
|
| + else:
|
| + while new_level < self.current_level():
|
| + #print "...popping level", self.indentation_stack[-1] ###
|
| + self.indentation_stack.pop()
|
| + self.produce('DEDENT', '')
|
| + #print "...current level now", self.current_level() ###
|
| + if new_level != self.current_level():
|
| + self.error("Inconsistent indentation")
|
| +
|
| + def eof_action(self, text):
|
| + while len(self.indentation_stack) > 1:
|
| + self.produce('DEDENT', '')
|
| + self.indentation_stack.pop()
|
| + self.produce('EOF', '')
|
| +
|
| + def next(self):
|
| + try:
|
| + sy, systring = self.read()
|
| + except UnrecognizedInput:
|
| + self.error("Unrecognized character")
|
| + if sy == IDENT:
|
| + if systring in self.keywords:
|
| + if systring == u'print' and print_function in self.context.future_directives:
|
| + self.keywords.discard('print')
|
| + systring = EncodedString(systring)
|
| + elif systring == u'exec' and self.context.language_level >= 3:
|
| + self.keywords.discard('exec')
|
| + systring = EncodedString(systring)
|
| + else:
|
| + sy = systring
|
| + else:
|
| + systring = EncodedString(systring)
|
| + self.sy = sy
|
| + self.systring = systring
|
| + if False: # debug_scanner:
|
| + _, line, col = self.position()
|
| + if not self.systring or self.sy == self.systring:
|
| + t = self.sy
|
| + else:
|
| + t = "%s %s" % (self.sy, self.systring)
|
| + print("--- %3d %2d %s" % (line, col, t))
|
| +
|
| + def peek(self):
|
| + saved = self.sy, self.systring
|
| + self.next()
|
| + next = self.sy, self.systring
|
| + self.unread(*next)
|
| + self.sy, self.systring = saved
|
| + return next
|
| +
|
| + def put_back(self, sy, systring):
|
| + self.unread(self.sy, self.systring)
|
| + self.sy = sy
|
| + self.systring = systring
|
| +
|
| + def unread(self, token, value):
|
| + # This method should be added to Plex
|
| + self.queue.insert(0, (token, value))
|
| +
|
| + def error(self, message, pos = None, fatal = True):
|
| + if pos is None:
|
| + pos = self.position()
|
| + if self.sy == 'INDENT':
|
| + err = error(pos, "Possible inconsistent indentation")
|
| + err = error(pos, message)
|
| + if fatal: raise err
|
| +
|
| + def expect(self, what, message = None):
|
| + if self.sy == what:
|
| + self.next()
|
| + else:
|
| + self.expected(what, message)
|
| +
|
| + def expect_keyword(self, what, message = None):
|
| + if self.sy == IDENT and self.systring == what:
|
| + self.next()
|
| + else:
|
| + self.expected(what, message)
|
| +
|
| + def expected(self, what, message = None):
|
| + if message:
|
| + self.error(message)
|
| + else:
|
| + if self.sy == IDENT:
|
| + found = self.systring
|
| + else:
|
| + found = self.sy
|
| + self.error("Expected '%s', found '%s'" % (what, found))
|
| +
|
| + def expect_indent(self):
|
| + self.expect('INDENT',
|
| + "Expected an increase in indentation level")
|
| +
|
| + def expect_dedent(self):
|
| + self.expect('DEDENT',
|
| + "Expected a decrease in indentation level")
|
| +
|
| + def expect_newline(self, message = "Expected a newline"):
|
| + # Expect either a newline or end of file
|
| + if self.sy != 'EOF':
|
| + self.expect('NEWLINE', message)
|
|
|