third_party/cython/src/Cython/Plex/Lexicons.py - Issue 385073004: Adding cython v0.20.2 in third-party.

Side by Side Diff: third_party/cython/src/Cython/Plex/Lexicons.py

Issue 385073004: Adding cython v0.20.2 in third-party. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Reference cython dev list thread. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #=======================================================================

	2 #

	3 # Python Lexical Analyser

	4 #

	5 # Lexical Analyser Specification

	6 #

	7 #=======================================================================

	8

	9 import types

	10

	11 import Actions

	12 import DFA

	13 import Errors

	14 import Machines

	15 import Regexps

	16

	17 # debug_flags for Lexicon constructor

	18 DUMP_NFA = 1

	19 DUMP_DFA = 2

	20

	21 class State(object):

	22 """

	23 This class is used as part of a Plex.Lexicon specification to

	24 introduce a user-defined state.

	25

	26 Constructor:

	27

	28 State(name, token_specifications)

	29 """

	30

	31 name = None

	32 tokens = None

	33

	34 def __init__(self, name, tokens):

	35 self.name = name

	36 self.tokens = tokens

	37

	38 class Lexicon(object):

	39 """

	40 Lexicon(specification) builds a lexical analyser from the given

	41 \|specification\|. The specification consists of a list of

	42 specification items. Each specification item may be either:

	43

	44 1) A token definition, which is a tuple:

	45

	46 (pattern, action)

	47

	48 The \|pattern\| is a regular axpression built using the

	49 constructors defined in the Plex module.

	50

	51 The \|action\| is the action to be performed when this pattern

	52 is recognised (see below).

	53

	54 2) A state definition:

	55

	56 State(name, tokens)

	57

	58 where \|name\| is a character string naming the state,

	59 and \|tokens\| is a list of token definitions as

	60 above. The meaning and usage of states is described

	61 below.

	62

	63 Actions

	64 -------

	65

	66 The \|action\| in a token specication may be one of three things:

	67

	68 1) A function, which is called as follows:

	69

	70 function(scanner, text)

	71

	72 where \|scanner\| is the relevant Scanner instance, and \|text\|

	73 is the matched text. If the function returns anything

	74 other than None, that value is returned as the value of the

	75 token. If it returns None, scanning continues as if the IGNORE

	76 action were specified (see below).

	77

	78 2) One of the following special actions:

	79

	80 IGNORE means that the recognised characters will be treated as

	81 white space and ignored. Scanning will continue until

	82 the next non-ignored token is recognised before returning.

	83

	84 TEXT causes the scanned text itself to be returned as the

	85 value of the token.

	86

	87 3) Any other value, which is returned as the value of the token.

	88

	89 States

	90 ------

	91

	92 At any given time, the scanner is in one of a number of states.

	93 Associated with each state is a set of possible tokens. When scanning,

	94 only tokens associated with the current state are recognised.

	95

	96 There is a default state, whose name is the empty string. Token

	97 definitions which are not inside any State definition belong to

	98 the default state.

	99

	100 The initial state of the scanner is the default state. The state can

	101 be changed in one of two ways:

	102

	103 1) Using Begin(state_name) as the action of a token.

	104

	105 2) Calling the begin(state_name) method of the Scanner.

	106

	107 To change back to the default state, use '' as the state name.

	108 """

	109

	110 machine = None # Machine

	111 tables = None # StateTableMachine

	112

	113 def __init__(self, specifications, debug = None, debug_flags = 7, timings = No ne):

	114 if type(specifications) != types.ListType:

	115 raise Errors.InvalidScanner("Scanner definition is not a list")

	116 if timings:

	117 from Timing import time

	118 total_time = 0.0

	119 time1 = time()

	120 nfa = Machines.Machine()

	121 default_initial_state = nfa.new_initial_state('')

	122 token_number = 1

	123 for spec in specifications:

	124 if isinstance(spec, State):

	125 user_initial_state = nfa.new_initial_state(spec.name)

	126 for token in spec.tokens:

	127 self.add_token_to_machine(

	128 nfa, user_initial_state, token, token_number)

	129 token_number = token_number + 1

	130 elif type(spec) == types.TupleType:

	131 self.add_token_to_machine(

	132 nfa, default_initial_state, spec, token_number)

	133 token_number = token_number + 1

	134 else:

	135 raise Errors.InvalidToken(

	136 token_number,

	137 "Expected a token definition (tuple) or State instance")

	138 if timings:

	139 time2 = time()

	140 total_time = total_time + (time2 - time1)

	141 time3 = time()

	142 if debug and (debug_flags & 1):

	143 debug.write("\n============= NFA ===========\n")

	144 nfa.dump(debug)

	145 dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)

	146 if timings:

	147 time4 = time()

	148 total_time = total_time + (time4 - time3)

	149 if debug and (debug_flags & 2):

	150 debug.write("\n============= DFA ===========\n")

	151 dfa.dump(debug)

	152 if timings:

	153 timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))

	154 timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))

	155 timings.write("TOTAL : %5.2f\n" % total_time)

	156 self.machine = dfa

	157

	158 def add_token_to_machine(self, machine, initial_state, token_spec, token_numbe r):

	159 try:

	160 (re, action_spec) = self.parse_token_definition(token_spec)

	161 # Disabled this -- matching empty strings can be useful

	162 #if re.nullable:

	163 # raise Errors.InvalidToken(

	164 # token_number, "Pattern can match 0 input symbols")

	165 if isinstance(action_spec, Actions.Action):

	166 action = action_spec

	167 else:

	168 try:

	169 action_spec.__call__

	170 except AttributeError:

	171 action = Actions.Return(action_spec)

	172 else:

	173 action = Actions.Call(action_spec)

	174 final_state = machine.new_state()

	175 re.build_machine(machine, initial_state, final_state,

	176 match_bol = 1, nocase = 0)

	177 final_state.set_action(action, priority = -token_number)

	178 except Errors.PlexError, e:

	179 raise e.__class__("Token number %d: %s" % (token_number, e))

	180

	181 def parse_token_definition(self, token_spec):

	182 if type(token_spec) != types.TupleType:

	183 raise Errors.InvalidToken("Token definition is not a tuple")

	184 if len(token_spec) != 2:

	185 raise Errors.InvalidToken("Wrong number of items in token definition")

	186 pattern, action = token_spec

	187 if not isinstance(pattern, Regexps.RE):

	188 raise Errors.InvalidToken("Pattern is not an RE instance")

	189 return (pattern, action)

	190

	191 def get_initial_state(self, name):

	192 return self.machine.get_initial_state(name)

	193

	194

	195

OLD	NEW

« no previous file with comments | « third_party/cython/src/Cython/Plex/Errors.py ('k') | third_party/cython/src/Cython/Plex/Machines.py » ('j') | no next file with comments »