OLD | NEW |
(Empty) | |
| 1 # cython: infer_types=True, language_level=3, py2_import=True |
| 2 # |
| 3 # Cython Scanner |
| 4 # |
| 5 |
| 6 import os |
| 7 import platform |
| 8 |
| 9 import cython |
| 10 cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode, |
| 11 print_function=object) |
| 12 |
| 13 from Cython import Utils |
| 14 from Cython.Plex.Scanners import Scanner |
| 15 from Cython.Plex.Errors import UnrecognizedInput |
| 16 from Errors import error |
| 17 from Lexicon import any_string_prefix, make_lexicon, IDENT |
| 18 from Future import print_function |
| 19 |
| 20 from StringEncoding import EncodedString |
| 21 |
| 22 debug_scanner = 0 |
| 23 trace_scanner = 0 |
| 24 scanner_debug_flags = 0 |
| 25 scanner_dump_file = None |
| 26 |
| 27 lexicon = None |
| 28 |
| 29 def get_lexicon(): |
| 30 global lexicon |
| 31 if not lexicon: |
| 32 lexicon = make_lexicon() |
| 33 return lexicon |
| 34 |
| 35 #------------------------------------------------------------------ |
| 36 |
| 37 py_reserved_words = [ |
| 38 "global", "nonlocal", "def", "class", "print", "del", "pass", "break", |
| 39 "continue", "return", "raise", "import", "exec", "try", |
| 40 "except", "finally", "while", "if", "elif", "else", "for", |
| 41 "in", "assert", "and", "or", "not", "is", "in", "lambda", |
| 42 "from", "yield", "with", "nonlocal", |
| 43 ] |
| 44 |
| 45 pyx_reserved_words = py_reserved_words + [ |
| 46 "include", "ctypedef", "cdef", "cpdef", |
| 47 "cimport", "DEF", "IF", "ELIF", "ELSE" |
| 48 ] |
| 49 |
| 50 class Method(object): |
| 51 |
| 52 def __init__(self, name): |
| 53 self.name = name |
| 54 self.__name__ = name # for Plex tracing |
| 55 |
| 56 def __call__(self, stream, text): |
| 57 return getattr(stream, self.name)(text) |
| 58 |
| 59 #------------------------------------------------------------------ |
| 60 |
| 61 class CompileTimeScope(object): |
| 62 |
| 63 def __init__(self, outer = None): |
| 64 self.entries = {} |
| 65 self.outer = outer |
| 66 |
| 67 def declare(self, name, value): |
| 68 self.entries[name] = value |
| 69 |
| 70 def update(self, other): |
| 71 self.entries.update(other) |
| 72 |
| 73 def lookup_here(self, name): |
| 74 return self.entries[name] |
| 75 |
| 76 def __contains__(self, name): |
| 77 return name in self.entries |
| 78 |
| 79 def lookup(self, name): |
| 80 try: |
| 81 return self.lookup_here(name) |
| 82 except KeyError: |
| 83 outer = self.outer |
| 84 if outer: |
| 85 return outer.lookup(name) |
| 86 else: |
| 87 raise |
| 88 |
| 89 def initial_compile_time_env(): |
| 90 benv = CompileTimeScope() |
| 91 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', |
| 92 'UNAME_VERSION', 'UNAME_MACHINE') |
| 93 for name, value in zip(names, platform.uname()): |
| 94 benv.declare(name, value) |
| 95 try: |
| 96 import __builtin__ as builtins |
| 97 except ImportError: |
| 98 import builtins |
| 99 |
| 100 names = ('False', 'True', |
| 101 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes', |
| 102 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter', |
| 103 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len', |
| 104 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range', |
| 105 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str', |
| 106 'sum', 'tuple', 'xrange', 'zip') |
| 107 |
| 108 for name in names: |
| 109 try: |
| 110 benv.declare(name, getattr(builtins, name)) |
| 111 except AttributeError: |
| 112 # ignore, likely Py3 |
| 113 pass |
| 114 denv = CompileTimeScope(benv) |
| 115 return denv |
| 116 |
| 117 #------------------------------------------------------------------ |
| 118 |
| 119 class SourceDescriptor(object): |
| 120 """ |
| 121 A SourceDescriptor should be considered immutable. |
| 122 """ |
| 123 _file_type = 'pyx' |
| 124 |
| 125 _escaped_description = None |
| 126 _cmp_name = '' |
| 127 def __str__(self): |
| 128 assert False # To catch all places where a descriptor is used directly a
s a filename |
| 129 |
| 130 def set_file_type_from_name(self, filename): |
| 131 name, ext = os.path.splitext(filename) |
| 132 self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' |
| 133 |
| 134 def is_cython_file(self): |
| 135 return self._file_type in ('pyx', 'pxd') |
| 136 |
| 137 def is_python_file(self): |
| 138 return self._file_type == 'py' |
| 139 |
| 140 def get_escaped_description(self): |
| 141 if self._escaped_description is None: |
| 142 self._escaped_description = \ |
| 143 self.get_description().encode('ASCII', 'replace').decode("ASCII"
) |
| 144 return self._escaped_description |
| 145 |
| 146 def __gt__(self, other): |
| 147 # this is only used to provide some sort of order |
| 148 try: |
| 149 return self._cmp_name > other._cmp_name |
| 150 except AttributeError: |
| 151 return False |
| 152 |
| 153 def __lt__(self, other): |
| 154 # this is only used to provide some sort of order |
| 155 try: |
| 156 return self._cmp_name < other._cmp_name |
| 157 except AttributeError: |
| 158 return False |
| 159 |
| 160 def __le__(self, other): |
| 161 # this is only used to provide some sort of order |
| 162 try: |
| 163 return self._cmp_name <= other._cmp_name |
| 164 except AttributeError: |
| 165 return False |
| 166 |
| 167 class FileSourceDescriptor(SourceDescriptor): |
| 168 """ |
| 169 Represents a code source. A code source is a more generic abstraction |
| 170 for a "filename" (as sometimes the code doesn't come from a file). |
| 171 Instances of code sources are passed to Scanner.__init__ as the |
| 172 optional name argument and will be passed back when asking for |
| 173 the position()-tuple. |
| 174 """ |
| 175 def __init__(self, filename, path_description=None): |
| 176 filename = Utils.decode_filename(filename) |
| 177 self.path_description = path_description or filename |
| 178 self.filename = filename |
| 179 self.set_file_type_from_name(filename) |
| 180 self._cmp_name = filename |
| 181 self._lines = {} |
| 182 |
| 183 def get_lines(self, encoding=None, error_handling=None): |
| 184 # we cache the lines only the second time this is called, in |
| 185 # order to save memory when they are only used once |
| 186 key = (encoding, error_handling) |
| 187 try: |
| 188 lines = self._lines[key] |
| 189 if lines is not None: |
| 190 return lines |
| 191 except KeyError: |
| 192 pass |
| 193 f = Utils.open_source_file( |
| 194 self.filename, encoding=encoding, |
| 195 error_handling=error_handling, |
| 196 # newline normalisation is costly before Py2.6 |
| 197 require_normalised_newlines=False) |
| 198 try: |
| 199 lines = list(f) |
| 200 finally: |
| 201 f.close() |
| 202 if key in self._lines: |
| 203 self._lines[key] = lines |
| 204 else: |
| 205 # do not cache the first access, but remember that we |
| 206 # already read it once |
| 207 self._lines[key] = None |
| 208 return lines |
| 209 |
| 210 def get_description(self): |
| 211 return self.path_description |
| 212 |
| 213 def get_error_description(self): |
| 214 path = self.filename |
| 215 cwd = Utils.decode_filename(os.getcwd() + os.path.sep) |
| 216 if path.startswith(cwd): |
| 217 return path[len(cwd):] |
| 218 return path |
| 219 |
| 220 def get_filenametable_entry(self): |
| 221 return self.filename |
| 222 |
| 223 def __eq__(self, other): |
| 224 return isinstance(other, FileSourceDescriptor) and self.filename == othe
r.filename |
| 225 |
| 226 def __hash__(self): |
| 227 return hash(self.filename) |
| 228 |
| 229 def __repr__(self): |
| 230 return "<FileSourceDescriptor:%s>" % self.filename |
| 231 |
| 232 class StringSourceDescriptor(SourceDescriptor): |
| 233 """ |
| 234 Instances of this class can be used instead of a filenames if the |
| 235 code originates from a string object. |
| 236 """ |
| 237 filename = None |
| 238 |
| 239 def __init__(self, name, code): |
| 240 self.name = name |
| 241 #self.set_file_type_from_name(name) |
| 242 self.codelines = [x + "\n" for x in code.split("\n")] |
| 243 self._cmp_name = name |
| 244 |
| 245 def get_lines(self, encoding=None, error_handling=None): |
| 246 if not encoding: |
| 247 return self.codelines |
| 248 else: |
| 249 return [ line.encode(encoding, error_handling).decode(encoding) |
| 250 for line in self.codelines ] |
| 251 |
| 252 def get_description(self): |
| 253 return self.name |
| 254 |
| 255 get_error_description = get_description |
| 256 |
| 257 def get_filenametable_entry(self): |
| 258 return "stringsource" |
| 259 |
| 260 def __hash__(self): |
| 261 return id(self) |
| 262 # Do not hash on the name, an identical string source should be the |
| 263 # same object (name is often defaulted in other places) |
| 264 # return hash(self.name) |
| 265 |
| 266 def __eq__(self, other): |
| 267 return isinstance(other, StringSourceDescriptor) and self.name == other.
name |
| 268 |
| 269 def __repr__(self): |
| 270 return "<StringSourceDescriptor:%s>" % self.name |
| 271 |
| 272 #------------------------------------------------------------------ |
| 273 |
| 274 class PyrexScanner(Scanner): |
| 275 # context Context Compilation context |
| 276 # included_files [string] Files included with 'include' statement |
| 277 # compile_time_env dict Environment for conditional compilation |
| 278 # compile_time_eval boolean In a true conditional compilation context |
| 279 # compile_time_expr boolean In a compile-time expression context |
| 280 |
| 281 def __init__(self, file, filename, parent_scanner = None, |
| 282 scope = None, context = None, source_encoding=None, parse_comme
nts=True, initial_pos=None): |
| 283 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) |
| 284 if parent_scanner: |
| 285 self.context = parent_scanner.context |
| 286 self.included_files = parent_scanner.included_files |
| 287 self.compile_time_env = parent_scanner.compile_time_env |
| 288 self.compile_time_eval = parent_scanner.compile_time_eval |
| 289 self.compile_time_expr = parent_scanner.compile_time_expr |
| 290 else: |
| 291 self.context = context |
| 292 self.included_files = scope.included_files |
| 293 self.compile_time_env = initial_compile_time_env() |
| 294 self.compile_time_eval = 1 |
| 295 self.compile_time_expr = 0 |
| 296 if hasattr(context.options, 'compile_time_env') and \ |
| 297 context.options.compile_time_env is not None: |
| 298 self.compile_time_env.update(context.options.compile_time_env) |
| 299 self.parse_comments = parse_comments |
| 300 self.source_encoding = source_encoding |
| 301 if filename.is_python_file(): |
| 302 self.in_python_file = True |
| 303 self.keywords = set(py_reserved_words) |
| 304 else: |
| 305 self.in_python_file = False |
| 306 self.keywords = set(pyx_reserved_words) |
| 307 self.trace = trace_scanner |
| 308 self.indentation_stack = [0] |
| 309 self.indentation_char = None |
| 310 self.bracket_nesting_level = 0 |
| 311 self.begin('INDENT') |
| 312 self.sy = '' |
| 313 self.next() |
| 314 |
| 315 def commentline(self, text): |
| 316 if self.parse_comments: |
| 317 self.produce('commentline', text) |
| 318 |
| 319 def current_level(self): |
| 320 return self.indentation_stack[-1] |
| 321 |
| 322 def open_bracket_action(self, text): |
| 323 self.bracket_nesting_level = self.bracket_nesting_level + 1 |
| 324 return text |
| 325 |
| 326 def close_bracket_action(self, text): |
| 327 self.bracket_nesting_level = self.bracket_nesting_level - 1 |
| 328 return text |
| 329 |
| 330 def newline_action(self, text): |
| 331 if self.bracket_nesting_level == 0: |
| 332 self.begin('INDENT') |
| 333 self.produce('NEWLINE', '') |
| 334 |
| 335 string_states = { |
| 336 "'": 'SQ_STRING', |
| 337 '"': 'DQ_STRING', |
| 338 "'''": 'TSQ_STRING', |
| 339 '"""': 'TDQ_STRING' |
| 340 } |
| 341 |
| 342 def begin_string_action(self, text): |
| 343 while text[:1] in any_string_prefix: |
| 344 text = text[1:] |
| 345 self.begin(self.string_states[text]) |
| 346 self.produce('BEGIN_STRING') |
| 347 |
| 348 def end_string_action(self, text): |
| 349 self.begin('') |
| 350 self.produce('END_STRING') |
| 351 |
| 352 def unclosed_string_action(self, text): |
| 353 self.end_string_action(text) |
| 354 self.error("Unclosed string literal") |
| 355 |
| 356 def indentation_action(self, text): |
| 357 self.begin('') |
| 358 # Indentation within brackets should be ignored. |
| 359 #if self.bracket_nesting_level > 0: |
| 360 # return |
| 361 # Check that tabs and spaces are being used consistently. |
| 362 if text: |
| 363 c = text[0] |
| 364 #print "Scanner.indentation_action: indent with", repr(c) ### |
| 365 if self.indentation_char is None: |
| 366 self.indentation_char = c |
| 367 #print "Scanner.indentation_action: setting indent_char to", rep
r(c) |
| 368 else: |
| 369 if self.indentation_char != c: |
| 370 self.error("Mixed use of tabs and spaces") |
| 371 if text.replace(c, "") != "": |
| 372 self.error("Mixed use of tabs and spaces") |
| 373 # Figure out how many indents/dedents to do |
| 374 current_level = self.current_level() |
| 375 new_level = len(text) |
| 376 #print "Changing indent level from", current_level, "to", new_level ### |
| 377 if new_level == current_level: |
| 378 return |
| 379 elif new_level > current_level: |
| 380 #print "...pushing level", new_level ### |
| 381 self.indentation_stack.append(new_level) |
| 382 self.produce('INDENT', '') |
| 383 else: |
| 384 while new_level < self.current_level(): |
| 385 #print "...popping level", self.indentation_stack[-1] ### |
| 386 self.indentation_stack.pop() |
| 387 self.produce('DEDENT', '') |
| 388 #print "...current level now", self.current_level() ### |
| 389 if new_level != self.current_level(): |
| 390 self.error("Inconsistent indentation") |
| 391 |
| 392 def eof_action(self, text): |
| 393 while len(self.indentation_stack) > 1: |
| 394 self.produce('DEDENT', '') |
| 395 self.indentation_stack.pop() |
| 396 self.produce('EOF', '') |
| 397 |
| 398 def next(self): |
| 399 try: |
| 400 sy, systring = self.read() |
| 401 except UnrecognizedInput: |
| 402 self.error("Unrecognized character") |
| 403 if sy == IDENT: |
| 404 if systring in self.keywords: |
| 405 if systring == u'print' and print_function in self.context.futur
e_directives: |
| 406 self.keywords.discard('print') |
| 407 systring = EncodedString(systring) |
| 408 elif systring == u'exec' and self.context.language_level >= 3: |
| 409 self.keywords.discard('exec') |
| 410 systring = EncodedString(systring) |
| 411 else: |
| 412 sy = systring |
| 413 else: |
| 414 systring = EncodedString(systring) |
| 415 self.sy = sy |
| 416 self.systring = systring |
| 417 if False: # debug_scanner: |
| 418 _, line, col = self.position() |
| 419 if not self.systring or self.sy == self.systring: |
| 420 t = self.sy |
| 421 else: |
| 422 t = "%s %s" % (self.sy, self.systring) |
| 423 print("--- %3d %2d %s" % (line, col, t)) |
| 424 |
| 425 def peek(self): |
| 426 saved = self.sy, self.systring |
| 427 self.next() |
| 428 next = self.sy, self.systring |
| 429 self.unread(*next) |
| 430 self.sy, self.systring = saved |
| 431 return next |
| 432 |
| 433 def put_back(self, sy, systring): |
| 434 self.unread(self.sy, self.systring) |
| 435 self.sy = sy |
| 436 self.systring = systring |
| 437 |
| 438 def unread(self, token, value): |
| 439 # This method should be added to Plex |
| 440 self.queue.insert(0, (token, value)) |
| 441 |
| 442 def error(self, message, pos = None, fatal = True): |
| 443 if pos is None: |
| 444 pos = self.position() |
| 445 if self.sy == 'INDENT': |
| 446 err = error(pos, "Possible inconsistent indentation") |
| 447 err = error(pos, message) |
| 448 if fatal: raise err |
| 449 |
| 450 def expect(self, what, message = None): |
| 451 if self.sy == what: |
| 452 self.next() |
| 453 else: |
| 454 self.expected(what, message) |
| 455 |
| 456 def expect_keyword(self, what, message = None): |
| 457 if self.sy == IDENT and self.systring == what: |
| 458 self.next() |
| 459 else: |
| 460 self.expected(what, message) |
| 461 |
| 462 def expected(self, what, message = None): |
| 463 if message: |
| 464 self.error(message) |
| 465 else: |
| 466 if self.sy == IDENT: |
| 467 found = self.systring |
| 468 else: |
| 469 found = self.sy |
| 470 self.error("Expected '%s', found '%s'" % (what, found)) |
| 471 |
| 472 def expect_indent(self): |
| 473 self.expect('INDENT', |
| 474 "Expected an increase in indentation level") |
| 475 |
| 476 def expect_dedent(self): |
| 477 self.expect('DEDENT', |
| 478 "Expected a decrease in indentation level") |
| 479 |
| 480 def expect_newline(self, message = "Expected a newline"): |
| 481 # Expect either a newline or end of file |
| 482 if self.sy != 'EOF': |
| 483 self.expect('NEWLINE', message) |
OLD | NEW |