blob: e6aa6f5ccac34ba86fd14c39dff87c4b8acabccb [file] [log] [blame]
/* IELR main implementation.
Copyright (C) 2009-2012 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "ielr.h"
#include <bitset.h>
#include <timevar.h>
#include "AnnotationList.h"
#include "derives.h"
#include "getargs.h"
#include "lalr.h"
#include "muscle-tab.h"
#include "nullable.h"
#include "relation.h"
#include "state.h"
#include "symtab.h"
/** Records the value of the \%define variable lr.type. */
typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType;
/**
* \post:
* - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
* is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
* the same RHS are nullable nonterminals. In other words, the follows of
* a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
*/
static bitset
ielr_compute_ritem_sees_lookahead_set (void)
{
bitset result = bitset_create (nritems, BITSET_FIXED);
unsigned int i = nritems-1;
while (i>0)
{
--i;
while (!item_number_is_rule_number (ritem[i])
&& ISVAR (ritem[i])
&& nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
bitset_set (result, i--);
if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
bitset_set (result, i--);
while (!item_number_is_rule_number (ritem[i]) && i>0)
--i;
}
if (trace_flag & trace_ielr)
{
fprintf (stderr, "ritem_sees_lookahead_set:\n");
debug_bitset (result);
fprintf (stderr, "\n");
}
return result;
}
/**
* \pre:
* - \c ritem_sees_lookahead_set was computed by
* \c ielr_compute_ritem_sees_lookahead_set.
* \post:
* - Each of \c *edgesp and \c *edge_countsp is a new array of size
* \c ::ngotos.
* - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
* <tt>(*edge_countsp)[i]+1</tt>.
* - In such a \c goto_number array, the last element is \c ::END_NODE.
* - All remaining elements are the indices of the gotos to which there is an
* internal follow edge from goto \c i.
* - There is an internal follow edge from goto \c i to goto \c j iff both:
* - The from states of gotos \c i and \c j are the same.
* - The transition nonterminal for goto \c i appears as the first RHS
* symbol of at least one production for which both:
* - The LHS is the transition symbol of goto \c j.
* - All other RHS symbols are nullable nonterminals.
* - In other words, the follows of goto \c i include the follows of
* goto \c j and it's an internal edge because the from states are the
* same.
*/
static void
ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
goto_number ***edgesp, int **edge_countsp)
{
*edgesp = xnmalloc (ngotos, sizeof **edgesp);
*edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
{
bitset sources = bitset_create (ngotos, BITSET_FIXED);
goto_number i;
for (i = 0; i < ngotos; ++i)
(*edge_countsp)[i] = 0;
for (i = 0; i < ngotos; ++i)
{
int nsources = 0;
{
rule **rulep;
for (rulep = derives[states[to_state[i]]->accessing_symbol
- ntokens];
*rulep;
++rulep)
{
/* If there is at least one RHS symbol, if the first RHS symbol
is a nonterminal, and if all remaining RHS symbols (if any)
are nullable nonterminals, create an edge from the LHS
symbol's goto to the first RHS symbol's goto such that the RHS
symbol's goto will be the source of the edge after the
eventual relation_transpose below.
Unlike in ielr_compute_always_follows, I use a bitset for
edges rather than an array because it is possible that
multiple RHS's with the same first symbol could fit and thus
that we could end up with redundant edges. With the
possibility of redundant edges, it's hard to know ahead of
time how large to make such an array. Another possible
redundancy is that source and destination might be the same
goto. Eliminating all these possible redundancies now might
possibly help performance a little. I have not proven any of
this, but I'm guessing the bitset shouldn't entail much of a
performance penalty, if any. */
if (bitset_test (ritem_sees_lookahead_set,
(*rulep)->rhs - ritem))
{
goto_number source =
map_goto (from_state[i],
item_number_as_symbol_number (*(*rulep)->rhs));
if (i != source && !bitset_test (sources, source))
{
bitset_set (sources, source);
++nsources;
++(*edge_countsp)[source];
}
}
}
}
if (nsources == 0)
(*edgesp)[i] = NULL;
else
{
(*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
{
bitset_iterator biter_source;
bitset_bindex source;
int j = 0;
BITSET_FOR_EACH (biter_source, sources, source, 0)
(*edgesp)[i][j++] = source;
}
(*edgesp)[i][nsources] = END_NODE;
}
bitset_zero (sources);
}
bitset_free (sources);
}
relation_transpose (edgesp, ngotos);
if (trace_flag & trace_ielr)
{
fprintf (stderr, "internal_follow_edges:\n");
relation_print (*edgesp, ngotos, stderr);
}
}
/**
* \pre:
* - \c ritem_sees_lookahead_set was computed by
* \c ielr_compute_ritem_sees_lookahead_set.
* - \c internal_follow_edges was computed by
* \c ielr_compute_internal_follow_edges.
* \post:
* - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
* is \c ngotos and the number of columns is maximum number of kernel items
* in any state.
* - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
* \c i include the lookahead set of item \c j in the from state of goto
* \c i.
* - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
* no item \c j in the from state of goto \c i.
*/
static void
ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
goto_number **internal_follow_edges,
bitsetv *follow_kernel_itemsp)
{
{
size_t max_nitems = 0;
state_number i;
for (i = 0; i < nstates; ++i)
if (states[i]->nitems > max_nitems)
max_nitems = states[i]->nitems;
*follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
}
{
goto_number i;
for (i = 0; i < ngotos; ++i)
{
size_t nitems = states[from_state[i]]->nitems;
item_number *items = states[from_state[i]]->items;
size_t j;
for (j = 0; j < nitems; ++j)
/* If this item has this goto and if all subsequent symbols in this
RHS (if any) are nullable nonterminals, then record this item as
one whose lookahead set is included in this goto's follows. */
if (item_number_is_symbol_number (ritem[items[j]])
&& item_number_as_symbol_number (ritem[items[j]])
== states[to_state[i]]->accessing_symbol
&& bitset_test (ritem_sees_lookahead_set, items[j]))
bitset_set ((*follow_kernel_itemsp)[i], j);
}
}
relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp);
if (trace_flag & trace_ielr)
{
fprintf (stderr, "follow_kernel_items:\n");
debug_bitsetv (*follow_kernel_itemsp);
}
}
/**
* \pre
* - \c *edgesp and \c edge_counts were computed by
* \c ielr_compute_internal_follow_edges.
* \post
* - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
* \c ntokens columns.
* - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
* follow (that is, it's computed by internal and successor edges) of goto
* \c i.
* - Rows of \c *edgesp have been realloc'ed and extended to include
* successor follow edges. \c edge_counts has not been updated.
*/
static void
ielr_compute_always_follows (goto_number ***edgesp,
int const edge_counts[],
bitsetv *always_followsp)
{
*always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
{
goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
goto_number i;
for (i = 0; i < ngotos; ++i)
{
goto_number nedges = edge_counts[i];
{
int j;
transitions *trans = states[to_state[i]]->transitions;
FOR_EACH_SHIFT (trans, j)
bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
for (; j < trans->num; ++j)
{
symbol_number sym = TRANSITION_SYMBOL (trans, j);
if (nullable[sym - ntokens])
edge_array[nedges++] = map_goto (to_state[i], sym);
}
}
if (nedges - edge_counts[i])
{
(*edgesp)[i] =
xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
(nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
(*edgesp)[i][nedges] = END_NODE;
}
}
free (edge_array);
}
relation_digraph (*edgesp, ngotos, always_followsp);
if (trace_flag & trace_ielr)
{
fprintf (stderr, "always follow edges:\n");
relation_print (*edgesp, ngotos, stderr);
fprintf (stderr, "always_follows:\n");
debug_bitsetv (*always_followsp);
}
}
/**
* \post
* - \c result is a new array of size \c ::nstates.
* - <tt>result[i]</tt> is an array of pointers to the predecessor
* <tt>state</tt>'s of state \c i.
* - The last element of such an array is \c NULL.
*/
static state ***
ielr_compute_predecessors (void)
{
state_number i;
int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
state ***result = xnmalloc (nstates, sizeof *result);
for (i = 0; i < nstates; ++i)
predecessor_counts[i] = 0;
for (i = 0; i < nstates; ++i)
{
int j;
for (j = 0; j < states[i]->transitions->num; ++j)
++predecessor_counts[states[i]->transitions->states[j]->number];
}
for (i = 0; i < nstates; ++i)
{
result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
result[i][predecessor_counts[i]] = NULL;
predecessor_counts[i] = 0;
}
for (i = 0; i < nstates; ++i)
{
int j;
for (j = 0; j < states[i]->transitions->num; ++j)
{
state_number k = states[i]->transitions->states[j]->number;
result[k][predecessor_counts[k]++] = states[i];
}
}
free (predecessor_counts);
return result;
}
/**
* \post
* - \c *follow_kernel_itemsp and \c *always_followsp were computed by
* \c ielr_compute_follow_kernel_items and
* \c ielr_compute_always_follows.
* - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
* by \c ielr_compute_predecessors.
*/
static void
ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
bitsetv *always_followsp,
state ****predecessorsp)
{
goto_number **edges;
int *edge_counts;
{
bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
&edges, &edge_counts);
ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
follow_kernel_itemsp);
bitset_free (ritem_sees_lookahead_set);
}
ielr_compute_always_follows (&edges, edge_counts, always_followsp);
{
int i;
for (i = 0; i < ngotos; ++i)
free (edges[i]);
}
free (edges);
free (edge_counts);
if (predecessorsp)
*predecessorsp = ielr_compute_predecessors ();
}
/**
* \note
* - FIXME: It might be an interesting experiment to compare the space and
* time efficiency of computing \c item_lookahead_sets either:
* - Fully up front.
* - Just-in-time, as implemented below.
* - Not at all. That is, just let annotations continue even when
* unnecessary.
*/
bool
ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
symbol_number lookahead, state ***predecessors,
bitset **item_lookahead_sets)
{
if (!item_lookahead_sets[s->number])
{
size_t i;
item_lookahead_sets[s->number] =
xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
for (i = 0; i < s->nitems; ++i)
item_lookahead_sets[s->number][i] = NULL;
}
if (!item_lookahead_sets[s->number][item])
{
item_lookahead_sets[s->number][item] =
bitset_create (ntokens, BITSET_FIXED);
/* If this kernel item is the beginning of a RHS, it must be the kernel
item in the start state, and so its LHS has no follows and no goto to
check. If, instead, this kernel item is the successor of the start
state's kernel item, there are still no follows and no goto. This
situation is fortunate because we want to avoid the - 2 below in both
cases.
Actually, IELR(1) should never invoke this function for either of
those cases because (1) follow_kernel_items will never reference a
kernel item for this RHS because the end token blocks sight of the
lookahead set from the RHS's only nonterminal, and (2) no reduction
has a lookback dependency on this lookahead set. Nevertheless, I
didn't change this test to an aver just in case the usage of this
function evolves to need those two cases. In both cases, the current
implementation returns the right result. */
if (s->items[item] > 1)
{
/* If the LHS symbol of this item isn't known (because this is a
top-level invocation), go get it. */
if (!lhs)
{
unsigned int i;
for (i = s->items[item];
!item_number_is_rule_number (ritem[i]);
++i)
;
lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
}
/* If this kernel item is next to the beginning of the RHS, then
check all predecessors' goto follows for the LHS. */
if (item_number_is_rule_number (ritem[s->items[item] - 2]))
{
state **predecessor;
aver (lhs != accept->number);
for (predecessor = predecessors[s->number];
*predecessor;
++predecessor)
bitset_or (item_lookahead_sets[s->number][item],
item_lookahead_sets[s->number][item],
goto_follows[map_goto ((*predecessor)->number,
lhs)]);
}
/* If this kernel item is later in the RHS, then check all
predecessor items' lookahead sets. */
else
{
state **predecessor;
for (predecessor = predecessors[s->number];
*predecessor;
++predecessor)
{
size_t predecessor_item;
for (predecessor_item = 0;
predecessor_item < (*predecessor)->nitems;
++predecessor_item)
if ((*predecessor)->items[predecessor_item]
== s->items[item] - 1)
break;
aver (predecessor_item != (*predecessor)->nitems);
ielr_item_has_lookahead (*predecessor, lhs,
predecessor_item, 0 /*irrelevant*/,
predecessors, item_lookahead_sets);
bitset_or (item_lookahead_sets[s->number][item],
item_lookahead_sets[s->number][item],
item_lookahead_sets[(*predecessor)->number]
[predecessor_item]);
}
}
}
}
return bitset_test (item_lookahead_sets[s->number][item], lookahead);
}
/**
* \pre
* - \c follow_kernel_items, \c always_follows, and \c predecessors
* were computed by \c ielr_compute_auxiliary_tables.
* \post
* - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
* points to a new array of size \c ::nstates.
* - For <tt>0 <= i < ::nstates</tt>:
* - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
* node for <tt>states[i]</tt>.
* - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
* node for <tt>states[i]</tt>.
* - <tt>*max_annotationsp</tt> is the maximum number of annotations per
* state.
*/
static void
ielr_compute_annotation_lists (bitsetv follow_kernel_items,
bitsetv always_follows, state ***predecessors,
AnnotationIndex *max_annotationsp,
InadequacyList ***inadequacy_listsp,
AnnotationList ***annotation_listsp,
struct obstack *annotations_obstackp)
{
bitset **item_lookahead_sets =
xnmalloc (nstates, sizeof *item_lookahead_sets);
AnnotationIndex *annotation_counts =
xnmalloc (nstates, sizeof *annotation_counts);
ContributionIndex max_contributions = 0;
unsigned int total_annotations = 0;
state_number i;
*inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
*annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
for (i = 0; i < nstates; ++i)
{
item_lookahead_sets[i] = NULL;
(*inadequacy_listsp)[i] = NULL;
(*annotation_listsp)[i] = NULL;
annotation_counts[i] = 0;
}
{
InadequacyListNodeCount inadequacy_list_node_count = 0;
for (i = 0; i < nstates; ++i)
AnnotationList__compute_from_inadequacies (
states[i], follow_kernel_items, always_follows, predecessors,
item_lookahead_sets, *inadequacy_listsp, *annotation_listsp,
annotation_counts, &max_contributions, annotations_obstackp,
&inadequacy_list_node_count);
}
*max_annotationsp = 0;
for (i = 0; i < nstates; ++i)
{
if (annotation_counts[i] > *max_annotationsp)
*max_annotationsp = annotation_counts[i];
total_annotations += annotation_counts[i];
}
if (trace_flag & trace_ielr)
{
for (i = 0; i < nstates; ++i)
{
fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
AnnotationList__debug ((*annotation_listsp)[i],
states[i]->nitems, 2);
}
fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
fprintf (stderr, "Average number of annotations per state: %f\n",
(float)total_annotations/nstates);
fprintf (stderr, "Max number of annotations per state: %d\n",
*max_annotationsp);
fprintf (stderr, "Max number of contributions per annotation: %d\n",
max_contributions);
}
for (i = 0; i < nstates; ++i)
if (item_lookahead_sets[i])
{
size_t j;
for (j = 0; j < states[i]->nitems; ++j)
if (item_lookahead_sets[i][j])
bitset_free (item_lookahead_sets[i][j]);
free (item_lookahead_sets[i]);
}
free (item_lookahead_sets);
free (annotation_counts);
}
typedef struct state_list {
struct state_list *next;
state *state;
/** Has this state been recomputed as a successor of another state? */
bool recomputedAsSuccessor;
/**
* \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
* iff the lookahead set on item \c i is empty.
*/
bitset *lookaheads;
/**
* nextIsocore is the next state in a circularly linked-list of all states
* with the same core. The one originally computed by generate_states in
* LR0.c is lr0Isocore.
*/
struct state_list *lr0Isocore;
struct state_list *nextIsocore;
} state_list;
/**
* \pre
* - \c follow_kernel_items and \c always_follows were computed by
* \c ielr_compute_auxiliary_tables.
* - <tt>n->class = nterm_sym</tt>.
* \post
* - \c follow_set contains the follow set for the goto on nonterminal \c n
* in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
*/
static void
ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
bitsetv always_follows, state_list *s,
symbol *n, bitset follow_set)
{
goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
bitset_copy (follow_set, always_follows[n_goto]);
if (s->lookaheads)
{
bitset_iterator biter_item;
bitset_bindex item;
BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
if (s->lookaheads[item])
bitset_or (follow_set, follow_set, s->lookaheads[item]);
}
}
/**
* \pre
* - \c follow_kernel_items and \c always_follows were computed by
* \c ielr_compute_auxiliary_tables.
* - \c lookahead_filter was computed by
* \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
* of \c t.
* - The number of rows in \c lookaheads is at least the number of items in
* \c t, and the number of columns is \c ::ntokens.
* \post
* - <tt>lookaheads[i][j]</tt> is set iff both:
* - <tt>lookahead_filter[i][j]</tt> is set.
* - The isocore of \c t that will be the transition successor of \c s will
* inherit from \c s token \c j into the lookahead set of item \c i.
*/
static void
ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
state_list *s, state *t, bitsetv lookahead_filter,
bitsetv lookaheads)
{
size_t s_item = 0;
size_t t_item;
bitsetv_zero (lookaheads);
for (t_item = 0; t_item < t->nitems; ++t_item)
{
/* If this kernel item is the beginning of a RHS, it must be the
kernel item in the start state, but t is supposed to be a successor
state. If, instead, this kernel item is the successor of the start
state's kernel item, the lookahead set is empty. This second case is
a special case to avoid the - 2 below, but the next successor can be
handled fine without special casing it. */
aver (t->items[t_item] != 0);
if (t->items[t_item] > 1
&& !bitset_empty_p (lookahead_filter[t_item]))
{
if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
{
unsigned int rule_item;
for (rule_item = t->items[t_item];
!item_number_is_rule_number (ritem[rule_item]);
++rule_item)
;
ielr_compute_goto_follow_set (
follow_kernel_items, always_follows, s,
rules[item_number_as_rule_number (ritem[rule_item])].lhs,
lookaheads[t_item]);
}
else if (s->lookaheads)
{
/* We don't have to start the s item search at the beginning
every time because items from both states are sorted by their
indices in ritem. */
for (; s_item < s->state->nitems; ++s_item)
if (s->state->items[s_item] == t->items[t_item] - 1)
break;
aver (s_item != s->state->nitems);
if (s->lookaheads[s_item])
bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
}
bitset_and (lookaheads[t_item],
lookaheads[t_item], lookahead_filter[t_item]);
}
}
}
/**
* \pre
* - \c follow_kernel_items and \c always_follows were computed by
* \c ielr_compute_auxiliary_tables.
* - Either:
* - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
* - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
* - The number of rows in each of \c lookaheads and \c work2 is the maximum
* number of items in any state. The number of columns in each is
* \c ::ntokens.
* - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
* - \c ::nstates is the total number of states, some not yet fully computed,
* in the list ending at \c *last_statep. It is at least the number of
* original LR(0) states.
* - The size of \c work1 is at least the number of annotations for the LR(0)
* isocore of \c t.
* \post
* - Either:
* - In the case that <tt>annotation_lists != NULL</tt>,
* <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
* permitted by the annotations for the original LR(0) isocore of \c t.
* If this changed the lookaheads in that isocore, those changes were
* propagated to all already computed transition successors recursively
* possibly resulting in the splitting of some of those successors.
* - In the case that <tt>annotation_lists = NULL</tt>,
* <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
* isocore's lookahead sets were identical to those specified by
* <tt>lookaheads \@pre</tt>.
* - If no such merge was permitted, a new isocore of \c t containing
* <tt>lookaheads \@pre</tt> was appended to the state list whose
* previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
* incremented. It was also appended to \c t's isocore list.
* - <tt>*tp</tt> = the isocore of \c t into which
* <tt>lookaheads \@pre</tt> was placed/merged.
* - \c lookaheads, \c work1, and \c work2 may have been altered.
*/
static void
ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
AnnotationList **annotation_lists, state *t,
bitsetv lookaheads, state_list **last_statep,
ContributionIndex work1[], bitsetv work2, state **tp)
{
state_list *lr0_isocore = t->state_list->lr0Isocore;
state_list **this_isocorep;
bool has_lookaheads;
/* Determine whether there's an isocore of t with which these lookaheads can
be merged. */
{
AnnotationIndex ai;
AnnotationList *a;
if (annotation_lists)
for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
a;
++ai, a = a->next)
work1[ai] =
AnnotationList__computeDominantContribution (
a, lr0_isocore->state->nitems, lookaheads, false);
for (this_isocorep = &t->state_list;
this_isocorep == &t->state_list || *this_isocorep != t->state_list;
this_isocorep = &(*this_isocorep)->nextIsocore)
{
if (!(*this_isocorep)->recomputedAsSuccessor)
break;
if (annotation_lists)
{
for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
a;
++ai, a = a->next)
{
if (work1[ai] != ContributionIndex__none)
{
/* This isocore compatibility test depends on the fact
that, if the dominant contributions are the same for the
two isocores, then merging their lookahead sets will not
produce a state with a different dominant contribution.
*/
ContributionIndex ci =
AnnotationList__computeDominantContribution (
a, lr0_isocore->state->nitems,
(*this_isocorep)->lookaheads, false);
if (ci != ContributionIndex__none && work1[ai] != ci)
break;
}
}
if (!a)
break;
}
else
{
size_t i;
for (i = 0; i < t->nitems; ++i)
{
if (!(*this_isocorep)->lookaheads
|| !(*this_isocorep)->lookaheads[i])
{
if (!bitset_empty_p (lookaheads[i]))
break;
}
/* bitset_equal_p uses the size of the first argument,
so lookaheads[i] must be the second argument. */
else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
lookaheads[i]))
break;
}
if (i == t->nitems)
break;
}
}
}
has_lookaheads = false;
{
size_t i;
for (i = 0; i < lr0_isocore->state->nitems; ++i)
if (!bitset_empty_p (lookaheads[i]))
{
has_lookaheads = true;
break;
}
}
/* Merge with an existing isocore. */
if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
{
bool new_lookaheads = false;
*tp = (*this_isocorep)->state;
/* Merge lookaheads into the state and record whether any of them are
actually new. */
if (has_lookaheads)
{
size_t i;
if (!(*this_isocorep)->lookaheads)
{
(*this_isocorep)->lookaheads =
xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
for (i = 0; i < t->nitems; ++i)
(*this_isocorep)->lookaheads[i] = NULL;
}
for (i = 0; i < t->nitems; ++i)
if (!bitset_empty_p (lookaheads[i]))
{
if (!(*this_isocorep)->lookaheads[i])
(*this_isocorep)->lookaheads[i] =
bitset_create (ntokens, BITSET_FIXED);
bitset_andn (lookaheads[i],
lookaheads[i], (*this_isocorep)->lookaheads[i]);
bitset_or ((*this_isocorep)->lookaheads[i],
lookaheads[i], (*this_isocorep)->lookaheads[i]);
if (!bitset_empty_p (lookaheads[i]))
new_lookaheads = true;
}
}
/* If new lookaheads were merged, propagate those lookaheads to the
successors, possibly splitting them. If *tp is being recomputed for
the first time, this isn't necessary because the main
ielr_split_states loop will handle the successors later. */
if (!(*this_isocorep)->recomputedAsSuccessor)
(*this_isocorep)->recomputedAsSuccessor = true;
else if (new_lookaheads)
{
int i;
/* When merging demands identical lookahead sets, it is impossible to
merge new lookaheads. */
aver (annotation_lists);
for (i = 0; i < (*tp)->transitions->num; ++i)
{
state *t2 = (*tp)->transitions->states[i];
/* At any time, there's at most one state for which we have so
far initially recomputed only some of its successors in the
main ielr_split_states loop. Because we recompute successors
in order, we can just stop at the first such successor. Of
course, *tp might be a state some of whose successors have
been recomputed as successors of other states rather than as
successors of *tp. It's fine if we go ahead and propagate to
some of those. We'll propagate to them again (but stop when
we see nothing changes) and to the others when we reach *tp in
the main ielr_split_states loop later. */
if (!t2->state_list->recomputedAsSuccessor)
break;
AnnotationList__computeLookaheadFilter (
annotation_lists[t2->state_list->lr0Isocore->state->number],
t2->nitems, work2);
ielr_compute_lookaheads (follow_kernel_items, always_follows,
(*this_isocorep), t2, work2,
lookaheads);
/* FIXME: If splitting t2 here, it's possible that lookaheads
that had already propagated from *tp to t2 will be left in t2
after *tp has been removed as t2's predecessor:
- We're going to recompute all lookaheads in phase 4, so these
extra lookaheads won't appear in the final parser table.
- If t2 has just one annotation, then these extra lookaheads
cannot alter the dominating contribution to the associated
inadequacy and thus cannot needlessly prevent a future merge
of some new state with t2. We can be sure of this because:
- The fact that we're splitting t2 now means that some
predecessors (at least one) other than *tp must be
propagating contributions to t2.
- The fact that t2 was merged in the first place means that,
if *tp propagated any contributions, the dominating
contribution must be the same as that from those other
predecessors.
- Thus, if some new state to be merged with t2 in the future
proves to be compatible with the contributions propagated
by the other predecessors, it will also be compatible with
the contributions made by the extra lookaheads left behind
by *tp.
- However, if t2 has more than one annotation and these extra
lookaheads contribute to one of their inadequacies, it's
possible these extra lookaheads may needlessly prevent a
future merge with t2. For example:
- Let's say there's an inadequacy A that makes the split
necessary as follows:
- There's currently just one other predecessor and it
propagates to t2 some contributions to inadequacy A.
- The new lookaheads that we were attempting to propagate
from *tp to t2 made contributions to inadequacy A with a
different dominating contribution than those from that
other predecessor.
- The extra lookaheads either make no contribution to
inadequacy A or have the same dominating contribution as
the contributions from the other predecessor. Either
way, as explained above, they can't prevent a future
merge.
- Let's say there's an inadequacy B that causes the trouble
with future merges as follows:
- The extra lookaheads make contributions to inadequacy B.
- Those extra contributions did not prevent the original
merge to create t2 because the other predecessor
propagates to t2 no contributions to inadequacy B.
- Thus, those extra contributions may prevent a future
merge with t2 even though the merge would be fine if *tp
had not left them behind.
- Is the latter case common enough to worry about?
- Perhaps we should track all predecessors and iterate them
now to recreate t2 without those extra lookaheads. */
ielr_compute_state (follow_kernel_items, always_follows,
annotation_lists, t2, lookaheads,
last_statep, work1, work2,
&(*tp)->transitions->states[i]);
}
}
}
/* Create a new isocore. */
else
{
state_list *old_isocore = *this_isocorep;
(*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
*last_statep = *this_isocorep;
(*last_statep)->state = *tp = state_new_isocore (t);
(*tp)->state_list = *last_statep;
(*last_statep)->recomputedAsSuccessor = true;
(*last_statep)->next = NULL;
(*last_statep)->lookaheads = NULL;
if (has_lookaheads)
{
size_t i;
(*last_statep)->lookaheads =
xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
for (i = 0; i < t->nitems; ++i)
{
if (bitset_empty_p (lookaheads[i]))
(*last_statep)->lookaheads[i] = NULL;
else
{
(*last_statep)->lookaheads[i] =
bitset_create (ntokens, BITSET_FIXED);
bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
}
}
}
(*last_statep)->lr0Isocore = lr0_isocore;
(*last_statep)->nextIsocore = old_isocore;
}
}
/**
* \pre
* - \c follow_kernel_items and \c always_follows were computed by
* \c ielr_compute_auxiliary_tables.
* - Either:
* - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
* - \c annotation_lists and \c max_annotations were computed by
* \c ielr_compute_annotation_lists.
* \post
* - \c ::states is of size \c ::nstates (which might be greater than
* <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
* inadequacy. \c annotation_lists was used to determine state
* compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
* LR(1) state compatibility test was used.
* - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
* computed in all states. TV_IELR_PHASE4 was pushed while they were
* computed from item lookahead sets.
*/
static void
ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
AnnotationList **annotation_lists,
AnnotationIndex max_annotations)
{
state_list *first_state;
state_list *last_state;
bitsetv lookahead_filter = NULL;
bitsetv lookaheads;
/* Set up state list and some reusable bitsets. */
{
size_t max_nitems = 0;
state_number i;
state_list **nodep = &first_state;
for (i = 0; i < nstates; ++i)
{
*nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
(*nodep)->state = states[i];
(*nodep)->recomputedAsSuccessor = false;
(*nodep)->lookaheads = NULL;
(*nodep)->lr0Isocore = *nodep;
(*nodep)->nextIsocore = *nodep;
nodep = &(*nodep)->next;
if (states[i]->nitems > max_nitems)
max_nitems = states[i]->nitems;
}
*nodep = NULL;
lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
if (!annotation_lists)
bitsetv_ones (lookahead_filter);
lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
}
/* Recompute states. */
{
ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
state_list *this_state;
for (this_state = first_state; this_state; this_state = this_state->next)
{
state *s = this_state->state;
int i;
for (i = 0; i < s->transitions->num; ++i)
{
state *t = s->transitions->states[i];
if (annotation_lists)
AnnotationList__computeLookaheadFilter (
annotation_lists[t->state_list->lr0Isocore->state->number],
t->nitems, lookahead_filter);
ielr_compute_lookaheads (follow_kernel_items, always_follows,
this_state, t, lookahead_filter,
lookaheads);
ielr_compute_state (follow_kernel_items, always_follows,
annotation_lists, t, lookaheads, &last_state,
work, lookahead_filter,
&s->transitions->states[i]);
}
}
free (work);
}
bitsetv_free (lookahead_filter);
bitsetv_free (lookaheads);
/* Store states back in the states array. */
states = xnrealloc (states, nstates, sizeof *states);
{
state_list *node;
for (node = first_state; node; node = node->next)
states[node->state->number] = node->state;
}
/* In the case of canonical LR(1), copy item lookahead sets to reduction
lookahead sets. */
if (!annotation_lists)
{
timevar_push (TV_IELR_PHASE4);
initialize_LA ();
state_list *node;
for (node = first_state; node; node = node->next)
if (!node->state->consistent)
{
size_t i = 0;
item_number *itemset = node->state->items;
size_t r;
for (r = 0; r < node->state->reductions->num; ++r)
{
rule *this_rule = node->state->reductions->rules[r];
bitset lookahead_set =
node->state->reductions->lookahead_tokens[r];
if (item_number_is_rule_number (*this_rule->rhs))
ielr_compute_goto_follow_set (follow_kernel_items,
always_follows, node,
this_rule->lhs, lookahead_set);
else if (node->lookaheads)
{
/* We don't need to start the kernel item search back at
i=0 because both items and reductions are sorted on rule
number. */
while (!item_number_is_rule_number (ritem[itemset[i]])
|| item_number_as_rule_number (ritem[itemset[i]])
!= this_rule->number)
{
++i;
aver (i < node->state->nitems);
}
if (node->lookaheads[i])
bitset_copy (lookahead_set, node->lookaheads[i]);
}
}
}
timevar_pop (TV_IELR_PHASE4);
}
/* Free state list. */
while (first_state)
{
state_list *node = first_state;
if (node->lookaheads)
{
size_t i;
for (i = 0; i < node->state->nitems; ++i)
if (node->lookaheads[i])
bitset_free (node->lookaheads[i]);
free (node->lookaheads);
}
first_state = node->next;
free (node);
}
}
void
ielr (void)
{
LrType lr_type;
/* Examine user options. */
{
char *type = muscle_percent_define_get ("lr.type");
if (0 == strcmp (type, "lalr"))
lr_type = LR_TYPE__LALR;
else if (0 == strcmp (type, "ielr"))
lr_type = LR_TYPE__IELR;
else if (0 == strcmp (type, "canonical-lr"))
lr_type = LR_TYPE__CANONICAL_LR;
else
aver (false);
free (type);
}
/* Phase 0: LALR(1). */
timevar_push (TV_LALR);
if (lr_type == LR_TYPE__CANONICAL_LR)
set_goto_map ();
else
lalr ();
if (lr_type == LR_TYPE__LALR)
{
bitsetv_free (goto_follows);
timevar_pop (TV_LALR);
return;
}
timevar_pop (TV_LALR);
{
bitsetv follow_kernel_items;
bitsetv always_follows;
InadequacyList **inadequacy_lists = NULL;
AnnotationList **annotation_lists = NULL;
struct obstack annotations_obstack;
AnnotationIndex max_annotations = 0;
{
/* Phase 1: Compute Auxiliary Tables. */
state ***predecessors;
timevar_push (TV_IELR_PHASE1);
ielr_compute_auxiliary_tables (
&follow_kernel_items, &always_follows,
lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
timevar_pop (TV_IELR_PHASE1);
/* Phase 2: Compute Annotations. */
timevar_push (TV_IELR_PHASE2);
if (lr_type != LR_TYPE__CANONICAL_LR)
{
obstack_init (&annotations_obstack);
ielr_compute_annotation_lists (follow_kernel_items, always_follows,
predecessors, &max_annotations,
&inadequacy_lists, &annotation_lists,
&annotations_obstack);
{
state_number i;
for (i = 0; i < nstates; ++i)
free (predecessors[i]);
}
free (predecessors);
bitsetv_free (goto_follows);
lalr_free ();
}
timevar_pop (TV_IELR_PHASE2);
}
/* Phase 3: Split States. */
timevar_push (TV_IELR_PHASE3);
{
state_number nstates_lr0 = nstates;
ielr_split_states (follow_kernel_items, always_follows,
annotation_lists, max_annotations);
if (inadequacy_lists)
{
state_number i;
for (i = 0; i < nstates_lr0; ++i)
InadequacyList__delete (inadequacy_lists[i]);
}
}
free (inadequacy_lists);
if (annotation_lists)
obstack_free (&annotations_obstack, NULL);
free (annotation_lists);
bitsetv_free (follow_kernel_items);
bitsetv_free (always_follows);
timevar_pop (TV_IELR_PHASE3);
}
/* Phase 4: Compute Reduction Lookaheads. */
timevar_push (TV_IELR_PHASE4);
free (goto_map);
free (from_state);
free (to_state);
if (lr_type == LR_TYPE__CANONICAL_LR)
{
/* Reduction lookaheads are computed in ielr_split_states above
but are timed as part of phase 4. */
set_goto_map ();
}
else
{
lalr ();
bitsetv_free (goto_follows);
}
timevar_pop (TV_IELR_PHASE4);
}