third_party/cython/src/Cython/Plex/Lexicons.py - Issue 385073004: Adding cython v0.20.2 in third-party.

Unified Diff: third_party/cython/src/Cython/Plex/Lexicons.py

Issue 385073004: Adding cython v0.20.2 in third-party. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Reference cython dev list thread. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/cython/src/Cython/Plex/Lexicons.py

diff --git a/third_party/cython/src/Cython/Plex/Lexicons.py b/third_party/cython/src/Cython/Plex/Lexicons.py

new file mode 100644

index 0000000000000000000000000000000000000000..88074666b014ced0daa542c7c5accceb6c119e61

--- /dev/null

+++ b/third_party/cython/src/Cython/Plex/Lexicons.py

@@ -0,0 +1,195 @@

+#=======================================================================

+# Python Lexical Analyser

+# Lexical Analyser Specification

+#=======================================================================

+import types

+import Actions

+import DFA

+import Errors

+import Machines

+import Regexps

+# debug_flags for Lexicon constructor

+DUMP_NFA = 1

+DUMP_DFA = 2

+class State(object):

+ """

+ This class is used as part of a Plex.Lexicon specification to

+ introduce a user-defined state.

+ Constructor:

+ State(name, token_specifications)

+ """

+ name = None

+ tokens = None

+ def __init__(self, name, tokens):

+ self.name = name

+ self.tokens = tokens

+class Lexicon(object):

+ """

+ Lexicon(specification) builds a lexical analyser from the given

+ |specification|. The specification consists of a list of

+ specification items. Each specification item may be either:

+ 1) A token definition, which is a tuple:

+ (pattern, action)

+ The |pattern| is a regular axpression built using the

+ constructors defined in the Plex module.

+ The |action| is the action to be performed when this pattern

+ is recognised (see below).

+ 2) A state definition:

+ State(name, tokens)

+ where |name| is a character string naming the state,

+ and |tokens| is a list of token definitions as

+ above. The meaning and usage of states is described

+ below.

+ Actions

+ -------

+ The |action| in a token specication may be one of three things:

+ 1) A function, which is called as follows:

+ function(scanner, text)

+ where |scanner| is the relevant Scanner instance, and |text|

+ is the matched text. If the function returns anything

+ other than None, that value is returned as the value of the

+ token. If it returns None, scanning continues as if the IGNORE

+ action were specified (see below).

+ 2) One of the following special actions:

+ IGNORE means that the recognised characters will be treated as

+ white space and ignored. Scanning will continue until

+ the next non-ignored token is recognised before returning.

+ TEXT causes the scanned text itself to be returned as the

+ value of the token.

+ 3) Any other value, which is returned as the value of the token.

+ States

+ ------

+ At any given time, the scanner is in one of a number of states.

+ Associated with each state is a set of possible tokens. When scanning,

+ only tokens associated with the current state are recognised.

+ There is a default state, whose name is the empty string. Token

+ definitions which are not inside any State definition belong to

+ the default state.

+ The initial state of the scanner is the default state. The state can

+ be changed in one of two ways:

+ 1) Using Begin(state_name) as the action of a token.

+ 2) Calling the begin(state_name) method of the Scanner.

+ To change back to the default state, use '' as the state name.

+ """

+ machine = None # Machine

+ tables = None # StateTableMachine

+ def __init__(self, specifications, debug = None, debug_flags = 7, timings = None):

+ if type(specifications) != types.ListType:

+ raise Errors.InvalidScanner("Scanner definition is not a list")

+ if timings:

+ from Timing import time

+ total_time = 0.0

+ time1 = time()

+ nfa = Machines.Machine()

+ default_initial_state = nfa.new_initial_state('')

+ token_number = 1

+ for spec in specifications:

+ if isinstance(spec, State):

+ user_initial_state = nfa.new_initial_state(spec.name)

+ for token in spec.tokens:

+ self.add_token_to_machine(

+ nfa, user_initial_state, token, token_number)

+ token_number = token_number + 1

+ elif type(spec) == types.TupleType:

+ self.add_token_to_machine(

+ nfa, default_initial_state, spec, token_number)

+ token_number = token_number + 1

+ else:

+ raise Errors.InvalidToken(

+ token_number,

+ "Expected a token definition (tuple) or State instance")

+ if timings:

+ time2 = time()

+ total_time = total_time + (time2 - time1)

+ time3 = time()

+ if debug and (debug_flags & 1):

+ debug.write("\n============= NFA ===========\n")

+ nfa.dump(debug)

+ dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)

+ if timings:

+ time4 = time()

+ total_time = total_time + (time4 - time3)

+ if debug and (debug_flags & 2):

+ debug.write("\n============= DFA ===========\n")

+ dfa.dump(debug)

+ if timings:

+ timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))

+ timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))

+ timings.write("TOTAL : %5.2f\n" % total_time)

+ self.machine = dfa

+ def add_token_to_machine(self, machine, initial_state, token_spec, token_number):

+ try:

+ (re, action_spec) = self.parse_token_definition(token_spec)

+ # Disabled this -- matching empty strings can be useful

+ #if re.nullable:

+ # raise Errors.InvalidToken(

+ # token_number, "Pattern can match 0 input symbols")

+ if isinstance(action_spec, Actions.Action):

+ action = action_spec

+ else:

+ try:

+ action_spec.__call__

+ except AttributeError:

+ action = Actions.Return(action_spec)

+ else:

+ action = Actions.Call(action_spec)

+ final_state = machine.new_state()

+ re.build_machine(machine, initial_state, final_state,

+ match_bol = 1, nocase = 0)

+ final_state.set_action(action, priority = -token_number)

+ except Errors.PlexError, e:

+ raise e.__class__("Token number %d: %s" % (token_number, e))

+ def parse_token_definition(self, token_spec):

+ if type(token_spec) != types.TupleType:

+ raise Errors.InvalidToken("Token definition is not a tuple")

+ if len(token_spec) != 2:

+ raise Errors.InvalidToken("Wrong number of items in token definition")

+ pattern, action = token_spec

+ if not isinstance(pattern, Regexps.RE):

+ raise Errors.InvalidToken("Pattern is not an RE instance")

+ return (pattern, action)

+ def get_initial_state(self, name):

+ return self.machine.get_initial_state(name)

« no previous file with comments | « third_party/cython/src/Cython/Plex/Errors.py ('k') | third_party/cython/src/Cython/Plex/Machines.py » ('j') | no next file with comments »