OLD | NEW |
(Empty) | |
| 1 /* Compute lookahead criteria for Bison. |
| 2 |
| 3 Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, |
| 4 2006, 2007 Free Software Foundation, Inc. |
| 5 |
| 6 This file is part of Bison, the GNU Compiler Compiler. |
| 7 |
| 8 This program is free software: you can redistribute it and/or modify |
| 9 it under the terms of the GNU General Public License as published by |
| 10 the Free Software Foundation, either version 3 of the License, or |
| 11 (at your option) any later version. |
| 12 |
| 13 This program is distributed in the hope that it will be useful, |
| 14 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 GNU General Public License for more details. |
| 17 |
| 18 You should have received a copy of the GNU General Public License |
| 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 20 |
| 21 |
| 22 /* Compute how to make the finite state machine deterministic; find |
| 23 which rules need lookahead in each state, and which lookahead |
| 24 tokens they accept. */ |
| 25 |
| 26 #include <config.h> |
| 27 #include "system.h" |
| 28 |
| 29 #include <bitset.h> |
| 30 #include <bitsetv.h> |
| 31 #include <quotearg.h> |
| 32 |
| 33 #include "LR0.h" |
| 34 #include "complain.h" |
| 35 #include "derives.h" |
| 36 #include "getargs.h" |
| 37 #include "gram.h" |
| 38 #include "lalr.h" |
| 39 #include "nullable.h" |
| 40 #include "reader.h" |
| 41 #include "relation.h" |
| 42 #include "symtab.h" |
| 43 |
| 44 goto_number *goto_map; |
| 45 static goto_number ngotos; |
| 46 state_number *from_state; |
| 47 state_number *to_state; |
| 48 |
| 49 /* Linked list of goto numbers. */ |
| 50 typedef struct goto_list |
| 51 { |
| 52 struct goto_list *next; |
| 53 goto_number value; |
| 54 } goto_list; |
| 55 |
| 56 |
| 57 /* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
| 58 LArule[l] is applicable in the appropriate state when the next |
| 59 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, |
| 60 it is a conflict. */ |
| 61 |
| 62 static bitsetv LA = NULL; |
| 63 size_t nLA; |
| 64 |
| 65 |
| 66 /* And for the famous F variable, which name is so descriptive that a |
| 67 comment is hardly needed. <grin>. */ |
| 68 static bitsetv F = NULL; |
| 69 |
| 70 static goto_number **includes; |
| 71 static goto_list **lookback; |
| 72 |
| 73 |
| 74 |
| 75 |
| 76 static void |
| 77 set_goto_map (void) |
| 78 { |
| 79 state_number s; |
| 80 goto_number *temp_map; |
| 81 |
| 82 goto_map = xcalloc (nvars + 1, sizeof *goto_map); |
| 83 temp_map = xnmalloc (nvars + 1, sizeof *temp_map); |
| 84 |
| 85 ngotos = 0; |
| 86 for (s = 0; s < nstates; ++s) |
| 87 { |
| 88 transitions *sp = states[s]->transitions; |
| 89 int i; |
| 90 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
| 91 { |
| 92 ngotos++; |
| 93 |
| 94 /* Abort if (ngotos + 1) would overflow. */ |
| 95 aver (ngotos != GOTO_NUMBER_MAXIMUM); |
| 96 |
| 97 goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
| 98 } |
| 99 } |
| 100 |
| 101 { |
| 102 goto_number k = 0; |
| 103 int i; |
| 104 for (i = ntokens; i < nsyms; i++) |
| 105 { |
| 106 temp_map[i - ntokens] = k; |
| 107 k += goto_map[i - ntokens]; |
| 108 } |
| 109 |
| 110 for (i = ntokens; i < nsyms; i++) |
| 111 goto_map[i - ntokens] = temp_map[i - ntokens]; |
| 112 |
| 113 goto_map[nsyms - ntokens] = ngotos; |
| 114 temp_map[nsyms - ntokens] = ngotos; |
| 115 } |
| 116 |
| 117 from_state = xcalloc (ngotos, sizeof *from_state); |
| 118 to_state = xcalloc (ngotos, sizeof *to_state); |
| 119 |
| 120 for (s = 0; s < nstates; ++s) |
| 121 { |
| 122 transitions *sp = states[s]->transitions; |
| 123 int i; |
| 124 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
| 125 { |
| 126 goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
| 127 from_state[k] = s; |
| 128 to_state[k] = sp->states[i]->number; |
| 129 } |
| 130 } |
| 131 |
| 132 free (temp_map); |
| 133 } |
| 134 |
| 135 |
| 136 |
| 137 /*----------------------------------------------------------. |
| 138 | Map a state/symbol pair into its numeric representation. | |
| 139 `----------------------------------------------------------*/ |
| 140 |
| 141 static goto_number |
| 142 map_goto (state_number s0, symbol_number sym) |
| 143 { |
| 144 goto_number high; |
| 145 goto_number low; |
| 146 goto_number middle; |
| 147 state_number s; |
| 148 |
| 149 low = goto_map[sym - ntokens]; |
| 150 high = goto_map[sym - ntokens + 1] - 1; |
| 151 |
| 152 for (;;) |
| 153 { |
| 154 aver (low <= high); |
| 155 middle = (low + high) / 2; |
| 156 s = from_state[middle]; |
| 157 if (s == s0) |
| 158 return middle; |
| 159 else if (s < s0) |
| 160 low = middle + 1; |
| 161 else |
| 162 high = middle - 1; |
| 163 } |
| 164 } |
| 165 |
| 166 |
| 167 static void |
| 168 initialize_F (void) |
| 169 { |
| 170 goto_number **reads = xnmalloc (ngotos, sizeof *reads); |
| 171 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); |
| 172 goto_number nedges = 0; |
| 173 |
| 174 goto_number i; |
| 175 |
| 176 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
| 177 |
| 178 for (i = 0; i < ngotos; i++) |
| 179 { |
| 180 state_number stateno = to_state[i]; |
| 181 transitions *sp = states[stateno]->transitions; |
| 182 |
| 183 int j; |
| 184 FOR_EACH_SHIFT (sp, j) |
| 185 bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); |
| 186 |
| 187 for (; j < sp->num; j++) |
| 188 { |
| 189 symbol_number sym = TRANSITION_SYMBOL (sp, j); |
| 190 if (nullable[sym - ntokens]) |
| 191 edge[nedges++] = map_goto (stateno, sym); |
| 192 } |
| 193 |
| 194 if (nedges == 0) |
| 195 reads[i] = NULL; |
| 196 else |
| 197 { |
| 198 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); |
| 199 memcpy (reads[i], edge, nedges * sizeof edge[0]); |
| 200 reads[i][nedges] = END_NODE; |
| 201 nedges = 0; |
| 202 } |
| 203 } |
| 204 |
| 205 relation_digraph (reads, ngotos, &F); |
| 206 |
| 207 for (i = 0; i < ngotos; i++) |
| 208 free (reads[i]); |
| 209 |
| 210 free (reads); |
| 211 free (edge); |
| 212 } |
| 213 |
| 214 |
| 215 static void |
| 216 add_lookback_edge (state *s, rule *r, goto_number gotono) |
| 217 { |
| 218 int ri = state_reduction_find (s, r); |
| 219 goto_list *sp = xmalloc (sizeof *sp); |
| 220 sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri]; |
| 221 sp->value = gotono; |
| 222 lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp; |
| 223 } |
| 224 |
| 225 |
| 226 |
| 227 static void |
| 228 build_relations (void) |
| 229 { |
| 230 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); |
| 231 state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1); |
| 232 goto_number i; |
| 233 |
| 234 includes = xnmalloc (ngotos, sizeof *includes); |
| 235 |
| 236 for (i = 0; i < ngotos; i++) |
| 237 { |
| 238 int nedges = 0; |
| 239 symbol_number symbol1 = states[to_state[i]]->accessing_symbol; |
| 240 rule **rulep; |
| 241 |
| 242 for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) |
| 243 { |
| 244 bool done; |
| 245 int length = 1; |
| 246 item_number const *rp; |
| 247 state *s = states[from_state[i]]; |
| 248 states1[0] = s->number; |
| 249 |
| 250 for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++) |
| 251 { |
| 252 s = transitions_to (s->transitions, |
| 253 item_number_as_symbol_number (*rp)); |
| 254 states1[length++] = s->number; |
| 255 } |
| 256 |
| 257 if (!s->consistent) |
| 258 add_lookback_edge (s, *rulep, i); |
| 259 |
| 260 length--; |
| 261 done = false; |
| 262 while (!done) |
| 263 { |
| 264 done = true; |
| 265 /* Each rhs ends in a rule number, and there is a |
| 266 sentinel before the first rhs, so it is safe to |
| 267 decrement RP here. */ |
| 268 rp--; |
| 269 if (ISVAR (*rp)) |
| 270 { |
| 271 /* Downcasting from item_number to symbol_number. */ |
| 272 edge[nedges++] = map_goto (states1[--length], |
| 273 item_number_as_symbol_number (*rp))
; |
| 274 if (nullable[*rp - ntokens]) |
| 275 done = false; |
| 276 } |
| 277 } |
| 278 } |
| 279 |
| 280 if (nedges == 0) |
| 281 includes[i] = NULL; |
| 282 else |
| 283 { |
| 284 int j; |
| 285 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); |
| 286 for (j = 0; j < nedges; j++) |
| 287 includes[i][j] = edge[j]; |
| 288 includes[i][nedges] = END_NODE; |
| 289 } |
| 290 } |
| 291 |
| 292 free (edge); |
| 293 free (states1); |
| 294 |
| 295 relation_transpose (&includes, ngotos); |
| 296 } |
| 297 |
| 298 |
| 299 |
| 300 static void |
| 301 compute_FOLLOWS (void) |
| 302 { |
| 303 goto_number i; |
| 304 |
| 305 relation_digraph (includes, ngotos, &F); |
| 306 |
| 307 for (i = 0; i < ngotos; i++) |
| 308 free (includes[i]); |
| 309 |
| 310 free (includes); |
| 311 } |
| 312 |
| 313 |
| 314 static void |
| 315 compute_lookahead_tokens (void) |
| 316 { |
| 317 size_t i; |
| 318 goto_list *sp; |
| 319 |
| 320 for (i = 0; i < nLA; i++) |
| 321 for (sp = lookback[i]; sp; sp = sp->next) |
| 322 bitset_or (LA[i], LA[i], F[sp->value]); |
| 323 |
| 324 /* Free LOOKBACK. */ |
| 325 for (i = 0; i < nLA; i++) |
| 326 LIST_FREE (goto_list, lookback[i]); |
| 327 |
| 328 free (lookback); |
| 329 bitsetv_free (F); |
| 330 } |
| 331 |
| 332 |
| 333 /*----------------------------------------------------. |
| 334 | Count the number of lookahead tokens required for S | |
| 335 | (N_LOOKAHEAD_TOKENS member). | |
| 336 `----------------------------------------------------*/ |
| 337 |
| 338 static int |
| 339 state_lookahead_tokens_count (state *s) |
| 340 { |
| 341 int n_lookahead_tokens = 0; |
| 342 reductions *rp = s->reductions; |
| 343 transitions *sp = s->transitions; |
| 344 |
| 345 /* We need a lookahead either to distinguish different |
| 346 reductions (i.e., there are two or more), or to distinguish a |
| 347 reduction from a shift. Otherwise, it is straightforward, |
| 348 and the state is `consistent'. There is no need to check that |
| 349 transition 0 hasn't been disabled before checking if it is a |
| 350 shift since transitions are only disabled during conflict |
| 351 resolution, and that hasn't happened yet. */ |
| 352 aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0)); |
| 353 if (rp->num > 1 |
| 354 || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))) |
| 355 n_lookahead_tokens += rp->num; |
| 356 else |
| 357 s->consistent = 1; |
| 358 |
| 359 return n_lookahead_tokens; |
| 360 } |
| 361 |
| 362 |
| 363 /*----------------------------------------------------. |
| 364 | Compute LA, NLA, and the lookahead_tokens members. | |
| 365 `----------------------------------------------------*/ |
| 366 |
| 367 static void |
| 368 initialize_LA (void) |
| 369 { |
| 370 state_number i; |
| 371 bitsetv pLA; |
| 372 |
| 373 /* Compute the total number of reductions requiring a lookahead. */ |
| 374 nLA = 0; |
| 375 for (i = 0; i < nstates; i++) |
| 376 nLA += state_lookahead_tokens_count (states[i]); |
| 377 /* Avoid having to special case 0. */ |
| 378 if (!nLA) |
| 379 nLA = 1; |
| 380 |
| 381 pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); |
| 382 lookback = xcalloc (nLA, sizeof *lookback); |
| 383 |
| 384 /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions |
| 385 require lookahead tokens. */ |
| 386 for (i = 0; i < nstates; i++) |
| 387 { |
| 388 int count = state_lookahead_tokens_count (states[i]); |
| 389 if (count) |
| 390 { |
| 391 states[i]->reductions->lookahead_tokens = pLA; |
| 392 pLA += count; |
| 393 } |
| 394 } |
| 395 } |
| 396 |
| 397 |
| 398 /*---------------------------------------------. |
| 399 | Output the lookahead tokens for each state. | |
| 400 `---------------------------------------------*/ |
| 401 |
| 402 static void |
| 403 lookahead_tokens_print (FILE *out) |
| 404 { |
| 405 state_number i; |
| 406 int j, k; |
| 407 fprintf (out, "Lookahead tokens: BEGIN\n"); |
| 408 for (i = 0; i < nstates; ++i) |
| 409 { |
| 410 reductions *reds = states[i]->reductions; |
| 411 bitset_iterator iter; |
| 412 int n_lookahead_tokens = 0; |
| 413 |
| 414 if (reds->lookahead_tokens) |
| 415 for (k = 0; k < reds->num; ++k) |
| 416 if (reds->lookahead_tokens[k]) |
| 417 ++n_lookahead_tokens; |
| 418 |
| 419 fprintf (out, "State %d: %d lookahead tokens\n", |
| 420 i, n_lookahead_tokens); |
| 421 |
| 422 if (reds->lookahead_tokens) |
| 423 for (j = 0; j < reds->num; ++j) |
| 424 BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0) |
| 425 { |
| 426 fprintf (out, " on %d (%s) -> rule %d\n", |
| 427 k, symbols[k]->tag, |
| 428 reds->rules[j]->number); |
| 429 }; |
| 430 } |
| 431 fprintf (out, "Lookahead tokens: END\n"); |
| 432 } |
| 433 |
| 434 void |
| 435 lalr (void) |
| 436 { |
| 437 initialize_LA (); |
| 438 set_goto_map (); |
| 439 initialize_F (); |
| 440 build_relations (); |
| 441 compute_FOLLOWS (); |
| 442 compute_lookahead_tokens (); |
| 443 |
| 444 if (trace_flag & trace_sets) |
| 445 lookahead_tokens_print (stderr); |
| 446 } |
| 447 |
| 448 |
| 449 void |
| 450 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old) |
| 451 { |
| 452 goto_number ngotos_reachable = 0; |
| 453 symbol_number nonterminal = 0; |
| 454 aver (nsyms == nvars + ntokens); |
| 455 { |
| 456 goto_number i; |
| 457 for (i = 0; i < ngotos; ++i) |
| 458 { |
| 459 while (i == goto_map[nonterminal]) |
| 460 goto_map[nonterminal++] = ngotos_reachable; |
| 461 /* If old_to_new[from_state[i]] = nstates_old, remove this goto |
| 462 entry. */ |
| 463 if (old_to_new[from_state[i]] != nstates_old) |
| 464 { |
| 465 /* from_state[i] is not removed, so it and thus to_state[i] are |
| 466 reachable, so to_state[i] != nstates_old. */ |
| 467 aver (old_to_new[to_state[i]] != nstates_old); |
| 468 from_state[ngotos_reachable] = old_to_new[from_state[i]]; |
| 469 to_state[ngotos_reachable] = old_to_new[to_state[i]]; |
| 470 ++ngotos_reachable; |
| 471 } |
| 472 } |
| 473 } |
| 474 while (nonterminal <= nvars) |
| 475 { |
| 476 aver (ngotos == goto_map[nonterminal]); |
| 477 goto_map[nonterminal++] = ngotos_reachable; |
| 478 } |
| 479 ngotos = ngotos_reachable; |
| 480 } |
| 481 |
| 482 |
| 483 void |
| 484 lalr_free (void) |
| 485 { |
| 486 state_number s; |
| 487 for (s = 0; s < nstates; ++s) |
| 488 states[s]->reductions->lookahead_tokens = NULL; |
| 489 bitsetv_free (LA); |
| 490 } |
OLD | NEW |