blob: eb827b769ca825458d73de7468b3d78c7fb30286 [file] [log] [blame]
/* Output the generated parsing program for Bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2006, 2009-2012 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitsetv.h>
#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "muscle-tab.h"
#include "reader.h"
#include "symtab.h"
#include "tables.h"
/* Several tables are indexed both by state and nonterminal numbers.
We call such an index a `vector'; i.e., a vector is either a state
or a nonterminal number.
Of course vector_number_t ought to be wide enough to contain
state_number and symbol_number. */
typedef int vector_number;
#if 0 /* Not currently used. */
static inline vector_number
state_number_to_vector_number (state_number s)
{
return s;
}
#endif
static inline vector_number
symbol_number_to_vector_number (symbol_number sym)
{
return state_number_as_int (nstates) + sym - ntokens;
}
int nvectors;
/* FROMS and TOS are indexed by vector_number.
If VECTOR is a nonterminal, (FROMS[VECTOR], TOS[VECTOR]) form an
array of state numbers of the non defaulted GOTO on VECTOR.
If VECTOR is a state, TOS[VECTOR] is the array of actions to do on
the (array of) symbols FROMS[VECTOR].
In both cases, TALLY[VECTOR] is the size of the arrays
FROMS[VECTOR], TOS[VECTOR]; and WIDTH[VECTOR] =
(FROMS[VECTOR][SIZE] - FROMS[VECTOR][0] + 1) where SIZE =
TALLY[VECTOR].
FROMS therefore contains symbol_number and action_number,
TOS state_number and action_number,
TALLY sizes,
WIDTH differences of FROMS.
Let base_number be the type of FROMS, TOS, and WIDTH. */
#define BASE_MAXIMUM INT_MAX
#define BASE_MINIMUM INT_MIN
static base_number **froms;
static base_number **tos;
static unsigned int **conflict_tos;
static int *tally;
static base_number *width;
/* For a given state, N = ACTROW[SYMBOL]:
If N = 0, stands for `run the default action'.
If N = MIN, stands for `raise a syntax error'.
If N > 0, stands for `shift SYMBOL and go to n'.
If N < 0, stands for `reduce -N'. */
typedef int action_number;
#define ACTION_NUMBER_MINIMUM INT_MIN
static action_number *actrow;
/* FROMS and TOS are reordered to be compressed. ORDER[VECTOR] is the
new vector number of VECTOR. We skip `empty' vectors (i.e.,
TALLY[VECTOR] = 0), and call these `entries'. */
static vector_number *order;
static int nentries;
base_number *base = NULL;
/* A distinguished value of BASE, negative infinite. During the
computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
keep parser tables small. */
base_number base_ninf = 0;
static base_number *pos = NULL;
static unsigned int *conflrow;
unsigned int *conflict_table;
unsigned int *conflict_list;
int conflict_list_cnt;
static int conflict_list_free;
/* TABLE_SIZE is the allocated size of both TABLE and CHECK. We start
with more or less the original hard-coded value (which was
SHRT_MAX). */
static int table_size = 32768;
base_number *table;
base_number *check;
/* The value used in TABLE to denote explicit syntax errors
(%nonassoc), a negative infinite. First defaults to ACTION_NUMBER_MINIMUM,
but in order to keep small tables, renumbered as TABLE_ERROR, which
is the smallest (non error) value minus 1. */
base_number table_ninf = 0;
static int lowzero;
int high;
state_number *yydefgoto;
rule_number *yydefact;
/*----------------------------------------------------------------.
| If TABLE (and CHECK) appear to be small to be addressed at |
| DESIRED, grow them. Note that TABLE[DESIRED] is to be used, so |
| the desired size is at least DESIRED + 1. |
`----------------------------------------------------------------*/
static void
table_grow (int desired)
{
int old_size = table_size;
while (table_size <= desired)
table_size *= 2;
if (trace_flag & trace_resource)
fprintf (stderr, "growing table and check from: %d to %d\n",
old_size, table_size);
table = xnrealloc (table, table_size, sizeof *table);
conflict_table = xnrealloc (conflict_table, table_size,
sizeof *conflict_table);
check = xnrealloc (check, table_size, sizeof *check);
for (/* Nothing. */; old_size < table_size; ++old_size)
{
table[old_size] = 0;
conflict_table[old_size] = 0;
check[old_size] = -1;
}
}
/*-------------------------------------------------------------------.
| For GLR parsers, for each conflicted token in S, as indicated |
| by non-zero entries in CONFLROW, create a list of possible |
| reductions that are alternatives to the shift or reduction |
| currently recorded for that token in S. Store the alternative |
| reductions followed by a 0 in CONFLICT_LIST, updating |
| CONFLICT_LIST_CNT, and storing an index to the start of the list |
| back into CONFLROW. |
`-------------------------------------------------------------------*/
static void
conflict_row (state *s)
{
int i, j;
reductions *reds = s->reductions;
if (!nondeterministic_parser)
return;
for (j = 0; j < ntokens; j += 1)
if (conflrow[j])
{
conflrow[j] = conflict_list_cnt;
/* Find all reductions for token J, and record all that do not
match ACTROW[J]. */
for (i = 0; i < reds->num; i += 1)
if (bitset_test (reds->lookahead_tokens[i], j)
&& (actrow[j]
!= rule_number_as_item_number (reds->rules[i]->number)))
{
aver (0 < conflict_list_free);
conflict_list[conflict_list_cnt] = reds->rules[i]->number + 1;
conflict_list_cnt += 1;
conflict_list_free -= 1;
}
/* Leave a 0 at the end. */
aver (0 < conflict_list_free);
conflict_list[conflict_list_cnt] = 0;
conflict_list_cnt += 1;
conflict_list_free -= 1;
}
}
/*------------------------------------------------------------------.
| Decide what to do for each type of token if seen as the |
| lookahead in specified state. The value returned is used as the |
| default action (yydefact) for the state. In addition, ACTROW is |
| filled with what to do for each kind of token, index by symbol |
| number, with zero meaning do the default action. The value |
| ACTION_NUMBER_MINIMUM, a very negative number, means this |
| situation is an error. The parser recognizes this value |
| specially. |
| |
| This is where conflicts are resolved. The loop over lookahead |
| rules considered lower-numbered rules last, and the last rule |
| considered that likes a token gets to handle it. |
| |
| For GLR parsers, also sets CONFLROW[SYM] to an index into |
| CONFLICT_LIST iff there is an unresolved conflict (s/r or r/r) |
| with symbol SYM. The default reduction is not used for a symbol |
| that has any such conflicts. |
`------------------------------------------------------------------*/
static rule *
action_row (state *s)
{
int i;
rule *default_reduction = NULL;
reductions *reds = s->reductions;
transitions *trans = s->transitions;
errs *errp = s->errs;
/* Set to nonzero to inhibit having any default reduction. */
bool nodefault = false;
bool conflicted = false;
for (i = 0; i < ntokens; i++)
actrow[i] = conflrow[i] = 0;
if (reds->lookahead_tokens)
{
int j;
bitset_iterator biter;
/* loop over all the rules available here which require
lookahead (in reverse order to give precedence to the first
rule) */
for (i = reds->num - 1; i >= 0; --i)
/* and find each token which the rule finds acceptable
to come next */
BITSET_FOR_EACH (biter, reds->lookahead_tokens[i], j, 0)
{
/* and record this rule as the rule to use if that
token follows. */
if (actrow[j] != 0)
{
conflicted = true;
conflrow[j] = 1;
}
actrow[j] = rule_number_as_item_number (reds->rules[i]->number);
}
}
/* Now see which tokens are allowed for shifts in this state. For
them, record the shift as the thing to do. So shift is preferred
to reduce. */
FOR_EACH_SHIFT (trans, i)
{
symbol_number sym = TRANSITION_SYMBOL (trans, i);
state *shift_state = trans->states[i];
if (actrow[sym] != 0)
{
conflicted = true;
conflrow[sym] = 1;
}
actrow[sym] = state_number_as_int (shift_state->number);
/* Do not use any default reduction if there is a shift for
error */
if (sym == errtoken->number)
nodefault = true;
}
/* See which tokens are an explicit error in this state (due to
%nonassoc). For them, record ACTION_NUMBER_MINIMUM as the
action. */
for (i = 0; i < errp->num; i++)
{
symbol *sym = errp->symbols[i];
actrow[sym->number] = ACTION_NUMBER_MINIMUM;
}
/* Turn off default reductions where requested by the user. See
state_lookahead_tokens_count in lalr.c to understand when states are
labeled as consistent. */
{
char *default_reductions =
muscle_percent_define_get ("lr.default-reductions");
if (0 != strcmp (default_reductions, "most") && !s->consistent)
nodefault = true;
free (default_reductions);
}
/* Now find the most common reduction and make it the default action
for this state. */
if (reds->num >= 1 && !nodefault)
{
if (s->consistent)
default_reduction = reds->rules[0];
else
{
int max = 0;
for (i = 0; i < reds->num; i++)
{
int count = 0;
rule *r = reds->rules[i];
symbol_number j;
for (j = 0; j < ntokens; j++)
if (actrow[j] == rule_number_as_item_number (r->number))
count++;
if (count > max)
{
max = count;
default_reduction = r;
}
}
/* GLR parsers need space for conflict lists, so we can't
default conflicted entries. For non-conflicted entries
or as long as we are not building a GLR parser,
actions that match the default are replaced with zero,
which means "use the default". */
if (max > 0)
{
int j;
for (j = 0; j < ntokens; j++)
if (actrow[j]
== rule_number_as_item_number (default_reduction->number)
&& ! (nondeterministic_parser && conflrow[j]))
actrow[j] = 0;
}
}
}
/* If have no default reduction, the default is an error.
So replace any action which says "error" with "use default". */
if (!default_reduction)
for (i = 0; i < ntokens; i++)
if (actrow[i] == ACTION_NUMBER_MINIMUM)
actrow[i] = 0;
if (conflicted)
conflict_row (s);
return default_reduction;
}
/*----------------------------------------.
| Set FROMS, TOS, TALLY and WIDTH for S. |
`----------------------------------------*/
static void
save_row (state_number s)
{
symbol_number i;
int count;
base_number *sp;
base_number *sp1;
base_number *sp2;
unsigned int *sp3;
/* Number of non default actions in S. */
count = 0;
for (i = 0; i < ntokens; i++)
if (actrow[i] != 0)
count++;
if (count == 0)
return;
/* Allocate non defaulted actions. */
froms[s] = sp = sp1 = xnmalloc (count, sizeof *sp1);
tos[s] = sp2 = xnmalloc (count, sizeof *sp2);
conflict_tos[s] = sp3 =
nondeterministic_parser ? xnmalloc (count, sizeof *sp3) : NULL;
/* Store non defaulted actions. */
for (i = 0; i < ntokens; i++)
if (actrow[i] != 0)
{
*sp1++ = i;
*sp2++ = actrow[i];
if (nondeterministic_parser)
*sp3++ = conflrow[i];
}
tally[s] = count;
width[s] = sp1[-1] - sp[0] + 1;
}
/*------------------------------------------------------------------.
| Figure out the actions for the specified state, indexed by |
| lookahead token type. |
| |
| The YYDEFACT table is output now. The detailed info is saved for |
| putting into YYTABLE later. |
`------------------------------------------------------------------*/
static void
token_actions (void)
{
state_number i;
symbol_number j;
rule_number r;
int nconflict = nondeterministic_parser ? conflicts_total_count () : 0;
yydefact = xnmalloc (nstates, sizeof *yydefact);
actrow = xnmalloc (ntokens, sizeof *actrow);
conflrow = xnmalloc (ntokens, sizeof *conflrow);
conflict_list = xnmalloc (1 + 2 * nconflict, sizeof *conflict_list);
conflict_list_free = 2 * nconflict;
conflict_list_cnt = 1;
/* Find the rules which are reduced. */
if (!nondeterministic_parser)
for (r = 0; r < nrules; ++r)
rules[r].useful = false;
for (i = 0; i < nstates; ++i)
{
rule *default_reduction = action_row (states[i]);
yydefact[i] = default_reduction ? default_reduction->number + 1 : 0;
save_row (i);
/* Now that the parser was computed, we can find which rules are
really reduced, and which are not because of SR or RR
conflicts. */
if (!nondeterministic_parser)
{
for (j = 0; j < ntokens; ++j)
if (actrow[j] < 0 && actrow[j] != ACTION_NUMBER_MINIMUM)
rules[item_number_as_rule_number (actrow[j])].useful = true;
if (yydefact[i])
rules[yydefact[i] - 1].useful = true;
}
}
free (actrow);
free (conflrow);
}
/*------------------------------------------------------------------.
| Compute FROMS[VECTOR], TOS[VECTOR], TALLY[VECTOR], WIDTH[VECTOR], |
| i.e., the information related to non defaulted GOTO on the nterm |
| SYM. |
| |
| DEFAULT_STATE is the principal destination on SYM, i.e., the |
| default GOTO destination on SYM. |
`------------------------------------------------------------------*/
static void
save_column (symbol_number sym, state_number default_state)
{
goto_number i;
base_number *sp;
base_number *sp1;
base_number *sp2;
int count;
vector_number symno = symbol_number_to_vector_number (sym);
goto_number begin = goto_map[sym - ntokens];
goto_number end = goto_map[sym - ntokens + 1];
/* Number of non default GOTO. */
count = 0;
for (i = begin; i < end; i++)
if (to_state[i] != default_state)
count++;
if (count == 0)
return;
/* Allocate room for non defaulted gotos. */
froms[symno] = sp = sp1 = xnmalloc (count, sizeof *sp1);
tos[symno] = sp2 = xnmalloc (count, sizeof *sp2);
/* Store the state numbers of the non defaulted gotos. */
for (i = begin; i < end; i++)
if (to_state[i] != default_state)
{
*sp1++ = from_state[i];
*sp2++ = to_state[i];
}
tally[symno] = count;
width[symno] = sp1[-1] - sp[0] + 1;
}
/*-------------------------------------------------------------.
| Return `the' most common destination GOTO on SYM (a nterm). |
`-------------------------------------------------------------*/
static state_number
default_goto (symbol_number sym, size_t state_count[])
{
state_number s;
goto_number i;
goto_number m = goto_map[sym - ntokens];
goto_number n = goto_map[sym - ntokens + 1];
state_number default_state = -1;
size_t max = 0;
if (m == n)
return -1;
for (s = 0; s < nstates; s++)
state_count[s] = 0;
for (i = m; i < n; i++)
state_count[to_state[i]]++;
for (s = 0; s < nstates; s++)
if (state_count[s] > max)
{
max = state_count[s];
default_state = s;
}
return default_state;
}
/*-------------------------------------------------------------------.
| Figure out what to do after reducing with each rule, depending on |
| the saved state from before the beginning of parsing the data that |
| matched this rule. |
| |
| The YYDEFGOTO table is output now. The detailed info is saved for |
| putting into YYTABLE later. |
`-------------------------------------------------------------------*/
static void
goto_actions (void)
{
symbol_number i;
size_t *state_count = xnmalloc (nstates, sizeof *state_count);
yydefgoto = xnmalloc (nvars, sizeof *yydefgoto);
/* For a given nterm I, STATE_COUNT[S] is the number of times there
is a GOTO to S on I. */
for (i = ntokens; i < nsyms; ++i)
{
state_number default_state = default_goto (i, state_count);
save_column (i, default_state);
yydefgoto[i - ntokens] = default_state;
}
free (state_count);
}
/*------------------------------------------------------------------.
| Compute ORDER, a reordering of vectors, in order to decide how to |
| pack the actions and gotos information into yytable. |
`------------------------------------------------------------------*/
static void
sort_actions (void)
{
int i;
nentries = 0;
for (i = 0; i < nvectors; i++)
if (tally[i] > 0)
{
int k;
int t = tally[i];
int w = width[i];
int j = nentries - 1;
while (j >= 0 && (width[order[j]] < w))
j--;
while (j >= 0 && (width[order[j]] == w) && (tally[order[j]] < t))
j--;
for (k = nentries - 1; k > j; k--)
order[k + 1] = order[k];
order[j + 1] = i;
nentries++;
}
}
/* If VECTOR is a state which actions (reflected by FROMS, TOS, TALLY
and WIDTH of VECTOR) are common to a previous state, return this
state number.
In any other case, return -1. */
static state_number
matching_state (vector_number vector)
{
vector_number i = order[vector];
int t;
int w;
int prev;
/* If VECTOR is a nterm, return -1. */
if (nstates <= i)
return -1;
t = tally[i];
w = width[i];
/* If VECTOR has GLR conflicts, return -1 */
if (conflict_tos[i] != NULL)
{
int j;
for (j = 0; j < t; j += 1)
if (conflict_tos[i][j] != 0)
return -1;
}
for (prev = vector - 1; prev >= 0; prev--)
{
vector_number j = order[prev];
int k;
int match = 1;
/* Given how ORDER was computed, if the WIDTH or TALLY is
different, there cannot be a matching state. */
if (width[j] != w || tally[j] != t)
return -1;
for (k = 0; match && k < t; k++)
if (tos[j][k] != tos[i][k] || froms[j][k] != froms[i][k]
|| (conflict_tos[j] != NULL && conflict_tos[j][k] != 0))
match = 0;
if (match)
return j;
}
return -1;
}
static base_number
pack_vector (vector_number vector)
{
vector_number i = order[vector];
int j;
int t = tally[i];
int loc = 0;
base_number *from = froms[i];
base_number *to = tos[i];
unsigned int *conflict_to = conflict_tos[i];
aver (t != 0);
for (j = lowzero - from[0]; ; j++)
{
int k;
bool ok = true;
aver (j < table_size);
for (k = 0; ok && k < t; k++)
{
loc = j + state_number_as_int (from[k]);
if (table_size <= loc)
table_grow (loc);
if (table[loc] != 0)
ok = false;
}
for (k = 0; ok && k < vector; k++)
if (pos[k] == j)
ok = false;
if (ok)
{
for (k = 0; k < t; k++)
{
loc = j + from[k];
table[loc] = to[k];
if (nondeterministic_parser && conflict_to != NULL)
conflict_table[loc] = conflict_to[k];
check[loc] = from[k];
}
while (table[lowzero] != 0)
lowzero++;
if (loc > high)
high = loc;
aver (BASE_MINIMUM <= j && j <= BASE_MAXIMUM);
return j;
}
}
}
/*-------------------------------------------------------------.
| Remap the negative infinite in TAB from NINF to the greatest |
| possible smallest value. Return it. |
| |
| In most case this allows us to use shorts instead of ints in |
| parsers. |
`-------------------------------------------------------------*/
static base_number
table_ninf_remap (base_number tab[], int size, base_number ninf)
{
base_number res = 0;
int i;
for (i = 0; i < size; i++)
if (tab[i] < res && tab[i] != ninf)
res = tab[i];
--res;
for (i = 0; i < size; i++)
if (tab[i] == ninf)
tab[i] = res;
return res;
}
static void
pack_table (void)
{
int i;
base = xnmalloc (nvectors, sizeof *base);
pos = xnmalloc (nentries, sizeof *pos);
table = xcalloc (table_size, sizeof *table);
conflict_table = xcalloc (table_size, sizeof *conflict_table);
check = xnmalloc (table_size, sizeof *check);
lowzero = 0;
high = 0;
for (i = 0; i < nvectors; i++)
base[i] = BASE_MINIMUM;
for (i = 0; i < table_size; i++)
check[i] = -1;
for (i = 0; i < nentries; i++)
{
state_number s = matching_state (i);
base_number place;
if (s < 0)
/* A new set of state actions, or a nonterminal. */
place = pack_vector (i);
else
/* Action of I were already coded for S. */
place = base[s];
pos[i] = place;
base[order[i]] = place;
}
/* Use the greatest possible negative infinites. */
base_ninf = table_ninf_remap (base, nvectors, BASE_MINIMUM);
table_ninf = table_ninf_remap (table, high + 1, ACTION_NUMBER_MINIMUM);
free (pos);
}
/*-----------------------------------------------------------------.
| Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable |
| and yycheck. |
`-----------------------------------------------------------------*/
void
tables_generate (void)
{
int i;
/* This is a poor way to make sure the sizes are properly
correlated. In particular the signedness is not taken into
account. But it's not useless. */
verify (sizeof nstates <= sizeof nvectors
&& sizeof nvars <= sizeof nvectors);
nvectors = state_number_as_int (nstates) + nvars;
froms = xcalloc (nvectors, sizeof *froms);
tos = xcalloc (nvectors, sizeof *tos);
conflict_tos = xcalloc (nvectors, sizeof *conflict_tos);
tally = xcalloc (nvectors, sizeof *tally);
width = xnmalloc (nvectors, sizeof *width);
token_actions ();
goto_actions ();
free (goto_map);
free (from_state);
free (to_state);
order = xcalloc (nvectors, sizeof *order);
sort_actions ();
pack_table ();
free (order);
free (tally);
free (width);
for (i = 0; i < nvectors; i++)
{
free (froms[i]);
free (tos[i]);
free (conflict_tos[i]);
}
free (froms);
free (tos);
free (conflict_tos);
}
/*-------------------------.
| Free the parser tables. |
`-------------------------*/
void
tables_free (void)
{
free (base);
free (conflict_table);
free (conflict_list);
free (table);
free (check);
free (yydefgoto);
free (yydefact);
}