blob: 9b607a2f0dc485e356d217026582345695dda5ce [file] [log] [blame]
/* Lookahead sensitive state item searches for counterexample generation
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "lssi.h"
#include <gl_linked_list.h>
#include <gl_xlist.h>
#include <stdlib.h>
#include "getargs.h"
#include "nullable.h"
// Lookahead sensitive state item.
typedef struct lssi
{
state_item_number si;
struct lssi *parent;
// this is the precise lookahead set (follow_L from the CupEx paper)
bitset lookahead;
bool free_lookahead;
} lssi;
static lssi *
new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
{
lssi *res = xmalloc (sizeof *res);
res->si = si;
res->parent = p;
res->lookahead = l;
res->free_lookahead = free_l;
return res;
}
static void
lssi_free (lssi *sn)
{
if (sn == NULL)
return;
if (sn->free_lookahead)
bitset_free (sn->lookahead);
free (sn);
}
static size_t
lssi_hasher (lssi *sn, size_t max)
{
size_t hash = sn->si;
bitset_iterator biter;
symbol_number syn;
BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
hash += syn;
return hash % max;
}
static bool
lssi_comparator (lssi *s1, lssi *s2)
{
if (s1->si == s2->si)
{
if (s1->lookahead == s2->lookahead)
return true;
return bitset_equal_p (s1->lookahead, s2->lookahead);
}
return false;
}
typedef gl_list_t lssi_list;
static inline bool
append_lssi (lssi *sn, Hash_table *visited, lssi_list queue)
{
if (hash_lookup (visited, sn))
{
sn->free_lookahead = false;
lssi_free (sn);
return false;
}
hash_xinsert (visited, sn);
gl_list_add_last (queue, sn);
return true;
}
#if 0
static void
lssi_print (lssi *l)
{
FILE *out = stderr;
print_state_item (&state_items[l->si], out);
if (l->lookahead)
{
fprintf (out, "FOLLOWL = { ");
bitset_iterator biter;
symbol_number sin;
BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
fprintf (out, "%s, \n", symbols[sin]->tag);
fprintf (out, "}\n");
}
}
#endif
/**
* Compute the set of state-items that can reach the given conflict item via
* a combination of transitions or production steps.
*/
static bitset
eligible_state_items (state_item *target)
{
bitset result = bitset_create (nstate_items, BITSET_FIXED);
state_item_list queue =
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
(const void **) &target);
while (gl_list_size (queue) > 0)
{
state_item *si = (state_item *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
if (bitset_test (result, si - state_items))
continue;
bitset_set (result, si - state_items);
// search all reverse edges.
bitset rsi = si->revs;
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, rsi, sin, 0)
gl_list_add_last (queue, &state_items[sin]);
}
gl_list_free (queue);
return result;
}
/**
* Compute the shortest lookahead-sensitive path from the start state to
* this conflict. If optimized is true, only consider parser states
* that can reach the conflict state.
*/
state_item_list
shortest_path_from_start (state_item_number target, symbol_number next_sym)
{
bitset eligible = eligible_state_items (&state_items[target]);
Hash_table *visited = hash_initialize (32,
NULL,
(Hash_hasher) lssi_hasher,
(Hash_comparator) lssi_comparator,
(Hash_data_freer) lssi_free);
bitset il = bitset_create (nsyms, BITSET_FIXED);
bitset_set (il, 0);
lssi *init = new_lssi (0, NULL, il, true);
lssi_list queue = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
NULL, true);
append_lssi (init, visited, queue);
// breadth-first search
bool finished = false;
lssi *n;
while (gl_list_size (queue) > 0)
{
n = (lssi *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
state_item_number last = n->si;
if (target == last && bitset_test (n->lookahead, next_sym))
{
finished = true;
break;
}
state_item *si = &state_items[last];
// Transitions don't change follow_L
if (si->trans >= 0)
{
if (bitset_test (eligible, si->trans))
{
lssi *next = new_lssi (si->trans, n, n->lookahead, false);
append_lssi (next, visited, queue);
}
}
// For production steps, follow_L is based on the symbol after the
// nonterminal being produced.
// if no such symbol exists, follow_L is unchanged
// if the symbol is a terminal, follow_L only contains that terminal
// if the symbol is not nullable, follow_L is its FIRSTS set
// if the symbol is nullable, follow_L is its FIRSTS set unioned with
// this logic applied to the next symbol in the rule
if (si->prods)
{
// Compute follow_L as above
bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
item_number *pos = si->item + 1;
for (; !item_number_is_rule_number (*pos); ++pos)
{
item_number it = *pos;
if (ISTOKEN (it))
{
bitset_set (lookahead, it);
break;
}
else
{
bitset_union (lookahead, lookahead, FIRSTS (it));
if (!nullable[it - ntokens])
break;
}
}
if (item_number_is_rule_number (*pos))
bitset_union (lookahead, n->lookahead, lookahead);
bool lookahead_used = false;
// Try all possible production steps within this parser state.
bitset_iterator biter;
state_item_number nextSI;
BITSET_FOR_EACH (biter, si->prods, nextSI, 0)
{
if (!bitset_test (eligible, nextSI))
continue;
lssi *next = new_lssi (nextSI, n, lookahead,
!lookahead_used);
lookahead_used = append_lssi (next, visited, queue)
|| lookahead_used;
}
if (!lookahead_used)
bitset_free (lookahead);
}
}
bitset_free (eligible);
if (!finished)
{
gl_list_free (queue);
fputs ("Cannot find shortest path to conflict state.", stderr);
abort ();
}
state_item_list res =
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
for (lssi *sn = n; sn != NULL; sn = sn->parent)
gl_list_add_first (res, &state_items[sn->si]);
hash_free (visited);
gl_list_free (queue);
if (trace_flag & trace_cex)
{
fputs ("REDUCE ITEM PATH:\n", stderr);
gl_list_iterator_t it = gl_list_iterator (res);
const void *sip;
while (gl_list_iterator_next (&it, &sip, NULL))
state_item_print ((state_item *) sip, stderr, "");
}
return res;
}
/**
* Determine if the given terminal is in the given symbol set or can begin
* a nonterminal in the given symbol set.
*/
bool
intersect_symbol (symbol_number sym, bitset syms)
{
if (!syms)
return true;
bitset_iterator biter;
symbol_number sn;
BITSET_FOR_EACH (biter, syms, sn, 0)
{
if (sym == sn)
return true;
if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
return true;
}
return false;
}
/**
* Determine if any symbol in ts is in syms
* or can begin a nonterminal syms.
*/
bool
intersect (bitset ts, bitset syms)
{
if (!syms || !ts)
return true;
bitset_iterator biter;
symbol_number sn;
BITSET_FOR_EACH (biter, syms, sn, 0)
{
if (bitset_test (ts, sn))
return true;
if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
return true;
}
return false;
}
/**
* Compute a list of state_items that have a production to n with respect
* to its lookahead
*/
state_item_list
lssi_reverse_production (const state_item *si, bitset lookahead)
{
state_item_list result =
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
if (SI_TRANSITION (si))
return result;
// A production step was made to the current lalr_item.
// Check that the next symbol in the parent lalr_item is
// compatible with the lookahead.
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, si->revs, sin, 0)
{
state_item *prevsi = &state_items[sin];
if (!production_allowed (prevsi, si))
continue;
bitset prev_lookahead = prevsi->lookahead;
if (item_number_is_rule_number (*(prevsi->item)))
{
// reduce item
// Check that some lookaheads can be preserved.
if (!intersect (prev_lookahead, lookahead))
continue;
}
else
{
// shift item
if (lookahead)
{
// Check that lookahead is compatible with the first
// possible symbols in the rest of the production.
// Alternatively, if the rest of the production is
// nullable, the lookahead must be compatible with
// the lookahead of the corresponding item.
bool applicable = false;
bool nlable = true;
for (item_number *pos = prevsi->item + 1;
!applicable && nlable && item_number_is_symbol_number (*pos);
++pos)
{
symbol_number next_sym = item_number_as_symbol_number (*pos);
if (ISTOKEN (next_sym))
{
applicable = intersect_symbol (next_sym, lookahead);
nlable = false;
}
else
{
applicable = intersect (FIRSTS (next_sym), lookahead);
if (!applicable)
nlable = nullable[next_sym - ntokens];
}
}
if (!applicable && !nlable)
continue;
}
}
gl_list_add_last (result, prevsi);
}
return result;
}