OLD | NEW |
(Empty) | |
| 1 /* Data definitions for internal representation of Bison's input. |
| 2 |
| 3 Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003, 2004, 2005, 2006 |
| 4 2007 Free Software Foundation, Inc. |
| 5 |
| 6 This file is part of Bison, the GNU Compiler Compiler. |
| 7 |
| 8 This program is free software: you can redistribute it and/or modify |
| 9 it under the terms of the GNU General Public License as published by |
| 10 the Free Software Foundation, either version 3 of the License, or |
| 11 (at your option) any later version. |
| 12 |
| 13 This program is distributed in the hope that it will be useful, |
| 14 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 GNU General Public License for more details. |
| 17 |
| 18 You should have received a copy of the GNU General Public License |
| 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 20 |
| 21 #ifndef GRAM_H_ |
| 22 # define GRAM_H_ |
| 23 |
| 24 /* Representation of the grammar rules: |
| 25 |
| 26 NTOKENS is the number of tokens, and NVARS is the number of |
| 27 variables (nonterminals). NSYMS is the total number, ntokens + |
| 28 nvars. |
| 29 |
| 30 Each symbol (either token or variable) receives a symbol number. |
| 31 Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
| 32 are for variables. Symbol number zero is the end-of-input token. |
| 33 This token is counted in ntokens. The true number of token values |
| 34 assigned is NTOKENS reduced by one for each alias declaration. |
| 35 |
| 36 The rules receive rule numbers 1 to NRULES in the order they are |
| 37 written. More precisely Bison augments the grammar with the |
| 38 initial rule, `$accept: START-SYMBOL $end', which is numbered 1, |
| 39 all the user rules are 2, 3 etc. Each time a rule number is |
| 40 presented to the user, we subtract 1, so *displayed* rule numbers |
| 41 are 0, 1, 2... |
| 42 |
| 43 Internally, we cannot use the number 0 for a rule because for |
| 44 instance RITEM stores both symbol (the RHS) and rule numbers: the |
| 45 symbols are shorts >= 0, and rule number are stored negative. |
| 46 Therefore 0 cannot be used, since it would be both the rule number |
| 47 0, and the token $end). |
| 48 |
| 49 Actions are accessed via the rule number. |
| 50 |
| 51 The rules themselves are described by several arrays: amongst which |
| 52 RITEM, and RULES. |
| 53 |
| 54 RULES is an array of rules, whose members are: |
| 55 |
| 56 RULES[R].lhs -- the symbol of the left hand side of rule R. |
| 57 |
| 58 RULES[R].rhs -- the index in RITEM of the beginning of the portion |
| 59 for rule R. |
| 60 |
| 61 RULES[R].prec -- the symbol providing the precedence level of R. |
| 62 |
| 63 RULES[R].precsym -- the symbol attached (via %prec) to give its |
| 64 precedence to R. Of course, if set, it is equal to `prec', but we |
| 65 need to distinguish one from the other when reducing: a symbol used |
| 66 in a %prec is not useless. |
| 67 |
| 68 RULES[R].assoc -- the associativity of R. |
| 69 |
| 70 RULES[R].dprec -- the dynamic precedence level of R (for GLR |
| 71 parsing). |
| 72 |
| 73 RULES[R].merger -- index of merging function for R (for GLR |
| 74 parsing). |
| 75 |
| 76 RULES[R].line -- the line where R was defined. |
| 77 |
| 78 RULES[R].useful -- true iff the rule is used (i.e., false if thrown |
| 79 away by reduce). |
| 80 |
| 81 The right hand side is stored as symbol numbers in a portion of |
| 82 RITEM. |
| 83 |
| 84 The length of the portion is one greater than the number of symbols |
| 85 in the rule's right hand side. The last element in the portion |
| 86 contains minus R, which identifies it as the end of a portion and |
| 87 says which rule it is for. |
| 88 |
| 89 The portions of RITEM come in order of increasing rule number. |
| 90 NRITEMS is the total length of RITEM. Each element of RITEM is |
| 91 called an "item" and its index in RITEM is an item number. |
| 92 |
| 93 Item numbers are used in the finite state machine to represent |
| 94 places that parsing can get to. |
| 95 |
| 96 SYMBOLS[I]->prec records the precedence level of each symbol. |
| 97 |
| 98 Precedence levels are assigned in increasing order starting with 1 |
| 99 so that numerically higher precedence values mean tighter binding |
| 100 as they ought to. Zero as a symbol or rule's precedence means none |
| 101 is assigned. |
| 102 |
| 103 Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
| 104 |
| 105 # include "location.h" |
| 106 # include "symtab.h" |
| 107 |
| 108 # define ISTOKEN(i) ((i) < ntokens) |
| 109 # define ISVAR(i) ((i) >= ntokens) |
| 110 |
| 111 extern int nsyms; |
| 112 extern int ntokens; |
| 113 extern int nvars; |
| 114 |
| 115 typedef int item_number; |
| 116 #define ITEM_NUMBER_MAX INT_MAX |
| 117 extern item_number *ritem; |
| 118 extern unsigned int nritems; |
| 119 |
| 120 /* There is weird relationship between OT1H item_number and OTOH |
| 121 symbol_number and rule_number: we store the latter in |
| 122 item_number. symbol_number values are stored as-is, while |
| 123 the negation of (rule_number + 1) is stored. |
| 124 |
| 125 Therefore, a symbol_number must be a valid item_number, and we |
| 126 sometimes have to perform the converse transformation. */ |
| 127 |
| 128 static inline item_number |
| 129 symbol_number_as_item_number (symbol_number sym) |
| 130 { |
| 131 return sym; |
| 132 } |
| 133 |
| 134 static inline symbol_number |
| 135 item_number_as_symbol_number (item_number i) |
| 136 { |
| 137 return i; |
| 138 } |
| 139 |
| 140 static inline bool |
| 141 item_number_is_symbol_number (item_number i) |
| 142 { |
| 143 return i >= 0; |
| 144 } |
| 145 |
| 146 /* Rule numbers. */ |
| 147 typedef int rule_number; |
| 148 #define RULE_NUMBER_MAX INT_MAX |
| 149 extern rule_number nrules; |
| 150 |
| 151 static inline item_number |
| 152 rule_number_as_item_number (rule_number r) |
| 153 { |
| 154 return -1 - r; |
| 155 } |
| 156 |
| 157 static inline rule_number |
| 158 item_number_as_rule_number (item_number i) |
| 159 { |
| 160 return -1 - i; |
| 161 } |
| 162 |
| 163 static inline bool |
| 164 item_number_is_rule_number (item_number i) |
| 165 { |
| 166 return i < 0; |
| 167 } |
| 168 |
| 169 /*--------. |
| 170 | Rules. | |
| 171 `--------*/ |
| 172 |
| 173 typedef struct |
| 174 { |
| 175 /* The number of the rule in the source. It is usually the index in |
| 176 RULES too, except if there are useless rules. */ |
| 177 rule_number user_number; |
| 178 |
| 179 /* The index in RULES. Usually the rule number in the source, |
| 180 except if some rules are useless. */ |
| 181 rule_number number; |
| 182 |
| 183 symbol *lhs; |
| 184 item_number *rhs; |
| 185 |
| 186 /* This symbol provides both the associativity, and the precedence. */ |
| 187 symbol *prec; |
| 188 |
| 189 int dprec; |
| 190 int merger; |
| 191 |
| 192 /* This symbol was attached to the rule via %prec. */ |
| 193 symbol *precsym; |
| 194 |
| 195 location location; |
| 196 bool useful; |
| 197 |
| 198 const char *action; |
| 199 location action_location; |
| 200 } rule; |
| 201 |
| 202 extern rule *rules; |
| 203 |
| 204 /* A function that selects a rule. */ |
| 205 typedef bool (*rule_filter) (rule *); |
| 206 |
| 207 /* Return true IFF the rule has a `number' smaller than NRULES. That is, it is |
| 208 useful in the grammar. */ |
| 209 bool rule_useful_in_grammar_p (rule *r); |
| 210 |
| 211 /* Return true IFF the rule has a `number' higher than NRULES. That is, it is |
| 212 useless in the grammar. */ |
| 213 bool rule_useless_in_grammar_p (rule *r); |
| 214 |
| 215 /* Return true IFF the rule is not flagged as useful but is useful in the |
| 216 grammar. In other words, it was discarded because of conflicts. */ |
| 217 bool rule_useless_in_parser_p (rule *r); |
| 218 |
| 219 /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was |
| 220 already displayed (by a previous call for another rule), avoid |
| 221 useless repetitions. */ |
| 222 void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out); |
| 223 void rule_lhs_print_xml (rule *r, FILE *out, int level); |
| 224 |
| 225 /* Return the length of the RHS. */ |
| 226 int rule_rhs_length (rule *r); |
| 227 |
| 228 /* Print this rule's RHS on OUT. */ |
| 229 void rule_rhs_print (rule *r, FILE *out); |
| 230 |
| 231 /* Print this rule on OUT. */ |
| 232 void rule_print (rule *r, FILE *out); |
| 233 |
| 234 |
| 235 |
| 236 |
| 237 /* Table of the symbols, indexed by the symbol number. */ |
| 238 extern symbol **symbols; |
| 239 |
| 240 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned |
| 241 by the user's yylex routine, it yields the internal token number |
| 242 used by the parser and throughout bison. */ |
| 243 extern symbol_number *token_translations; |
| 244 extern int max_user_token_number; |
| 245 |
| 246 |
| 247 |
| 248 /* Dump RITEM for traces. */ |
| 249 void ritem_print (FILE *out); |
| 250 |
| 251 /* Return the size of the longest rule RHS. */ |
| 252 size_t ritem_longest_rhs (void); |
| 253 |
| 254 /* Print the grammar's rules that match FILTER on OUT under TITLE. */ |
| 255 void grammar_rules_partial_print (FILE *out, const char *title, |
| 256 rule_filter filter); |
| 257 |
| 258 /* Print the grammar's useful rules on OUT. */ |
| 259 void grammar_rules_print (FILE *out); |
| 260 /* Print all of the grammar's rules with a "usefulness" attribute. */ |
| 261 void grammar_rules_print_xml (FILE *out, int level); |
| 262 |
| 263 /* Dump the grammar. */ |
| 264 void grammar_dump (FILE *out, const char *title); |
| 265 |
| 266 /* Report on STDERR the rules that are not flagged USEFUL, using the |
| 267 MESSAGE (which can be `rule useless in grammar' when invoked after grammar |
| 268 reduction, or `rule useless in parser due to conflicts' after conflicts |
| 269 were taken into account). */ |
| 270 void grammar_rules_useless_report (const char *message); |
| 271 |
| 272 /* Free the packed grammar. */ |
| 273 void grammar_free (void); |
| 274 |
| 275 #endif /* !GRAM_H_ */ |
OLD | NEW |