OLD | NEW |
(Empty) | |
| 1 /* Type definitions for nondeterministic finite state machine for Bison. |
| 2 |
| 3 Copyright (C) 1984, 1989, 2000, 2001, 2002, 2003, 2004, 2007 Free |
| 4 Software Foundation, Inc. |
| 5 |
| 6 This file is part of Bison, the GNU Compiler Compiler. |
| 7 |
| 8 This program is free software: you can redistribute it and/or modify |
| 9 it under the terms of the GNU General Public License as published by |
| 10 the Free Software Foundation, either version 3 of the License, or |
| 11 (at your option) any later version. |
| 12 |
| 13 This program is distributed in the hope that it will be useful, |
| 14 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 GNU General Public License for more details. |
| 17 |
| 18 You should have received a copy of the GNU General Public License |
| 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 20 |
| 21 |
| 22 /* These type definitions are used to represent a nondeterministic |
| 23 finite state machine that parses the specified grammar. This |
| 24 information is generated by the function generate_states in the |
| 25 file LR0. |
| 26 |
| 27 Each state of the machine is described by a set of items -- |
| 28 particular positions in particular rules -- that are the possible |
| 29 places where parsing could continue when the machine is in this |
| 30 state. These symbols at these items are the allowable inputs that |
| 31 can follow now. |
| 32 |
| 33 A core represents one state. States are numbered in the NUMBER |
| 34 field. When generate_states is finished, the starting state is |
| 35 state 0 and NSTATES is the number of states. (FIXME: This sentence |
| 36 is no longer true: A transition to a state whose state number is |
| 37 NSTATES indicates termination.) All the cores are chained together |
| 38 and FIRST_STATE points to the first one (state 0). |
| 39 |
| 40 For each state there is a particular symbol which must have been |
| 41 the last thing accepted to reach that state. It is the |
| 42 ACCESSING_SYMBOL of the core. |
| 43 |
| 44 Each core contains a vector of NITEMS items which are the indices |
| 45 in the RITEM vector of the items that are selected in this state. |
| 46 |
| 47 The two types of actions are shifts/gotos (push the lookahead token |
| 48 and read another/goto to the state designated by a nterm) and |
| 49 reductions (combine the last n things on the stack via a rule, |
| 50 replace them with the symbol that the rule derives, and leave the |
| 51 lookahead token alone). When the states are generated, these |
| 52 actions are represented in two other lists. |
| 53 |
| 54 Each transition structure describes the possible transitions out |
| 55 of one state, the state whose number is in the number field. Each |
| 56 contains a vector of numbers of the states that transitions can go |
| 57 to. The accessing_symbol fields of those states' cores say what |
| 58 kind of input leads to them. |
| 59 |
| 60 A transition to state zero should be ignored: conflict resolution |
| 61 deletes transitions by having them point to zero. |
| 62 |
| 63 Each reductions structure describes the possible reductions at the |
| 64 state whose number is in the number field. rules is an array of |
| 65 num rules. lookahead_tokens is an array of bitsets, one per rule. |
| 66 |
| 67 Conflict resolution can decide that certain tokens in certain |
| 68 states should explicitly be errors (for implementing %nonassoc). |
| 69 For each state, the tokens that are errors for this reason are |
| 70 recorded in an errs structure, which holds the token numbers. |
| 71 |
| 72 There is at least one goto transition present in state zero. It |
| 73 leads to a next-to-final state whose accessing_symbol is the |
| 74 grammar's start symbol. The next-to-final state has one shift to |
| 75 the final state, whose accessing_symbol is zero (end of input). |
| 76 The final state has one shift, which goes to the termination state. |
| 77 The reason for the extra state at the end is to placate the |
| 78 parser's strategy of making all decisions one token ahead of its |
| 79 actions. */ |
| 80 |
| 81 #ifndef STATE_H_ |
| 82 # define STATE_H_ |
| 83 |
| 84 # include <bitset.h> |
| 85 |
| 86 # include "gram.h" |
| 87 # include "symtab.h" |
| 88 |
| 89 |
| 90 /*-------------------. |
| 91 | Numbering states. | |
| 92 `-------------------*/ |
| 93 |
| 94 typedef int state_number; |
| 95 # define STATE_NUMBER_MAXIMUM INT_MAX |
| 96 |
| 97 /* Be ready to map a state_number to an int. */ |
| 98 static inline int |
| 99 state_number_as_int (state_number s) |
| 100 { |
| 101 return s; |
| 102 } |
| 103 |
| 104 |
| 105 typedef struct state state; |
| 106 |
| 107 /*--------------. |
| 108 | Transitions. | |
| 109 `--------------*/ |
| 110 |
| 111 typedef struct |
| 112 { |
| 113 int num; |
| 114 state *states[1]; |
| 115 } transitions; |
| 116 |
| 117 |
| 118 /* What is the symbol labelling the transition to |
| 119 TRANSITIONS->states[Num]? Can be a token (amongst which the error |
| 120 token), or non terminals in case of gotos. */ |
| 121 |
| 122 #define TRANSITION_SYMBOL(Transitions, Num) \ |
| 123 (Transitions->states[Num]->accessing_symbol) |
| 124 |
| 125 /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ |
| 126 |
| 127 #define TRANSITION_IS_SHIFT(Transitions, Num) \ |
| 128 (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) |
| 129 |
| 130 /* Is the TRANSITIONS->states[Num] a goto?. */ |
| 131 |
| 132 #define TRANSITION_IS_GOTO(Transitions, Num) \ |
| 133 (!TRANSITION_IS_SHIFT (Transitions, Num)) |
| 134 |
| 135 /* Is the TRANSITIONS->states[Num] labelled by the error token? */ |
| 136 |
| 137 #define TRANSITION_IS_ERROR(Transitions, Num) \ |
| 138 (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) |
| 139 |
| 140 /* When resolving a SR conflicts, if the reduction wins, the shift is |
| 141 disabled. */ |
| 142 |
| 143 #define TRANSITION_DISABLE(Transitions, Num) \ |
| 144 (Transitions->states[Num] = NULL) |
| 145 |
| 146 #define TRANSITION_IS_DISABLED(Transitions, Num) \ |
| 147 (Transitions->states[Num] == NULL) |
| 148 |
| 149 |
| 150 /* Iterate over each transition over a token (shifts). */ |
| 151 #define FOR_EACH_SHIFT(Transitions, Iter) \ |
| 152 for (Iter = 0; \ |
| 153 Iter < Transitions->num \ |
| 154 && (TRANSITION_IS_DISABLED (Transitions, Iter) \ |
| 155 || TRANSITION_IS_SHIFT (Transitions, Iter)); \ |
| 156 ++Iter) \ |
| 157 if (!TRANSITION_IS_DISABLED (Transitions, Iter)) |
| 158 |
| 159 |
| 160 /* Return the state such SHIFTS contain a shift/goto to it on SYM. |
| 161 Abort if none found. */ |
| 162 struct state *transitions_to (transitions *shifts, symbol_number sym); |
| 163 |
| 164 |
| 165 /*-------. |
| 166 | Errs. | |
| 167 `-------*/ |
| 168 |
| 169 typedef struct |
| 170 { |
| 171 int num; |
| 172 symbol *symbols[1]; |
| 173 } errs; |
| 174 |
| 175 errs *errs_new (int num, symbol **tokens); |
| 176 |
| 177 |
| 178 /*-------------. |
| 179 | Reductions. | |
| 180 `-------------*/ |
| 181 |
| 182 typedef struct |
| 183 { |
| 184 int num; |
| 185 bitset *lookahead_tokens; |
| 186 /* Sorted ascendingly on rule number. */ |
| 187 rule *rules[1]; |
| 188 } reductions; |
| 189 |
| 190 |
| 191 |
| 192 /*---------. |
| 193 | states. | |
| 194 `---------*/ |
| 195 |
| 196 struct state |
| 197 { |
| 198 state_number number; |
| 199 symbol_number accessing_symbol; |
| 200 transitions *transitions; |
| 201 reductions *reductions; |
| 202 errs *errs; |
| 203 |
| 204 /* If non-zero, then no lookahead sets on reduce actions are needed to |
| 205 decide what to do in state S. */ |
| 206 char consistent; |
| 207 |
| 208 /* If some conflicts were solved thanks to precedence/associativity, |
| 209 a human readable description of the resolution. */ |
| 210 const char *solved_conflicts; |
| 211 const char *solved_conflicts_xml; |
| 212 |
| 213 /* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted |
| 214 ascendingly on item index in RITEM, which is sorted on rule number. */ |
| 215 size_t nitems; |
| 216 item_number items[1]; |
| 217 }; |
| 218 |
| 219 extern state_number nstates; |
| 220 extern state *final_state; |
| 221 |
| 222 /* Create a new state with ACCESSING_SYMBOL for those items. */ |
| 223 state *state_new (symbol_number accessing_symbol, |
| 224 size_t core_size, item_number *core); |
| 225 |
| 226 /* Set the transitions of STATE. */ |
| 227 void state_transitions_set (state *s, int num, state **trans); |
| 228 |
| 229 /* Set the reductions of STATE. */ |
| 230 void state_reductions_set (state *s, int num, rule **reds); |
| 231 |
| 232 int state_reduction_find (state *s, rule *r); |
| 233 |
| 234 /* Set the errs of STATE. */ |
| 235 void state_errs_set (state *s, int num, symbol **errors); |
| 236 |
| 237 /* Print on OUT all the lookahead tokens such that this STATE wants to |
| 238 reduce R. */ |
| 239 void state_rule_lookahead_tokens_print (state *s, rule *r, FILE *out); |
| 240 void state_rule_lookahead_tokens_print_xml (state *s, rule *r, |
| 241 FILE *out, int level); |
| 242 |
| 243 /* Create/destroy the states hash table. */ |
| 244 void state_hash_new (void); |
| 245 void state_hash_free (void); |
| 246 |
| 247 /* Find the state associated to the CORE, and return it. If it does |
| 248 not exist yet, return NULL. */ |
| 249 state *state_hash_lookup (size_t core_size, item_number *core); |
| 250 |
| 251 /* Insert STATE in the state hash table. */ |
| 252 void state_hash_insert (state *s); |
| 253 |
| 254 /* Remove unreachable states, renumber remaining states, update NSTATES, and |
| 255 write to OLD_TO_NEW a mapping of old state numbers to new state numbers such |
| 256 that the old value of NSTATES is written as the new state number for removed |
| 257 states. The size of OLD_TO_NEW must be the old value of NSTATES. */ |
| 258 void state_remove_unreachable_states (state_number old_to_new[]); |
| 259 |
| 260 /* All the states, indexed by the state number. */ |
| 261 extern state **states; |
| 262 |
| 263 /* Free all the states. */ |
| 264 void states_free (void); |
| 265 |
| 266 #endif /* !STATE_H_ */ |
OLD | NEW |