blob: 57b095122c59c27bd53d1fbba6c1f313bf6a4873 [file] [log] [blame]
/* Parser simulator for unifying counterexample search
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "parse-simulation.h"
#include <gl_linked_list.h>
#include <gl_xlist.h>
#include <stdlib.h>
#include "lssi.h"
#include "nullable.h"
struct parse_state
{
// Path of state-items the parser has traversed.
struct si_chunk
{
// Elements newly added in this chunk.
state_item_list contents;
// Properties of the linked list this chunk represents.
const state_item *head_elt;
const state_item *tail_elt;
size_t total_size;
} state_items;
// List of derivations of the symbols.
struct deriv_chunk
{
derivation_list contents;
const derivation *head_elt;
const derivation *tail_elt;
size_t total_size;
} derivs;
struct parse_state *parent;
int reference_count;
// Incremented during productions, decremented during reductions.
int depth;
// Whether the contents of the chunks should be prepended or
// appended to the list the chunks represent.
bool prepend;
// Causes chunk contents to be freed when the reference count is
// one. Used when only the chunk metadata will be needed.
bool free_contents_early;
};
static void
ps_si_prepend (parse_state *ps, const state_item *si)
{
struct si_chunk *sic = &ps->state_items;
gl_list_add_first (sic->contents, si);
sic->head_elt = si;
++sic->total_size;
if (!sic->tail_elt)
sic->tail_elt = si;
}
static void
ps_si_append (parse_state *ps, const state_item *si)
{
struct si_chunk *sic = &ps->state_items;
gl_list_add_last (sic->contents, si);
sic->tail_elt = si;
++sic->total_size;
if (!sic->head_elt)
sic->head_elt = si;
}
static void
ps_derivs_prepend (parse_state *ps, derivation *d)
{
struct deriv_chunk *dc = &ps->derivs;
derivation_list_prepend (dc->contents, d);
dc->head_elt = d;
++dc->total_size;
if (!dc->tail_elt)
dc->tail_elt = d;
}
static void
ps_derivs_append (parse_state *ps, derivation *d)
{
struct deriv_chunk *dc = &ps->derivs;
derivation_list_append (dc->contents, d);
dc->tail_elt = d;
++dc->total_size;
if (!dc->head_elt)
dc->head_elt = d;
}
static int allocs = 0;
static int frees = 0;
static parse_state *
empty_parse_state (void)
{
parse_state *res = xcalloc (1, sizeof *res);
res->state_items.contents
= gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
res->derivs.contents = derivation_list_new ();
++allocs;
return res;
}
parse_state *
new_parse_state (const state_item *si)
{
parse_state *res = empty_parse_state ();
ps_si_append (res, si);
ps_derivs_append (res, derivation_dot ());
return res;
}
static parse_state *
copy_parse_state (bool prepend, parse_state *parent)
{
parse_state *res = xmalloc (sizeof *res);
*res = *parent;
res->state_items.contents
= gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
res->derivs.contents = derivation_list_new ();
res->parent = parent;
res->prepend = prepend;
res->reference_count = 0;
res->free_contents_early = false;
parse_state_retain (parent);
++allocs;
return res;
}
bool
parse_state_derivation_completed (const parse_state *ps)
{
return ps->derivs.total_size == 1;
}
derivation *
parse_state_derivation (const parse_state *ps)
{
return (derivation *) ps->derivs.head_elt;
}
const state_item *
parse_state_head (const parse_state *ps)
{
return ps->state_items.head_elt;
}
const state_item *
parse_state_tail (const parse_state *ps)
{
return ps->state_items.tail_elt;
}
int
parse_state_length (const parse_state *ps)
{
return ps->state_items.total_size;
}
int
parse_state_depth (const parse_state *ps)
{
return ps->depth;
}
void
parse_state_retain (parse_state *ps)
{
++ps->reference_count;
}
void
parse_state_free_contents_early (parse_state *ps)
{
ps->free_contents_early = true;
}
void
free_parse_state (parse_state *original_ps)
{
bool free_contents = true;
parse_state *parent_ps = NULL;
for (parse_state *ps = original_ps; ps && free_contents; ps = parent_ps)
{
--ps->reference_count;
free_contents = (ps->reference_count == 1 && ps->free_contents_early)
|| (ps->reference_count == 0 && !ps->free_contents_early);
// need to keep the parse state around for visited hash set,
// but its contents and parent can be freed
if (free_contents)
{
if (ps->state_items.contents)
gl_list_free (ps->state_items.contents);
if (ps->derivs.contents)
derivation_list_free (ps->derivs.contents);
}
parent_ps = ps->parent;
if (ps->reference_count <= 0)
{
free (ps);
++frees;
}
}
}
size_t
parse_state_hasher (const parse_state *ps, size_t max)
{
const struct si_chunk *sis = &ps->state_items;
return ((state_item *) sis->head_elt - state_items +
(state_item *) sis->tail_elt - state_items + sis->total_size) % max;
}
bool
parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
{
const struct si_chunk *sis1 = &ps1->state_items;
const struct si_chunk *sis2 = &ps2->state_items;
return sis1->head_elt == sis2->head_elt
&& sis1->tail_elt == sis2->tail_elt
&& sis1->total_size == sis2->total_size;
}
void
parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions)
{
// traverse to the root parse_state,
// which will have a list of all completed productions.
const parse_state *root_ps = ps;
while (root_ps->parent)
root_ps = root_ps->parent;
state_item_list sis = root_ps->state_items.contents;
int count = 0;
state_item *last = NULL;
state_item *next = NULL;
for (gl_list_iterator_t it = gl_list_iterator (sis);
state_item_list_next (&it, &next);
)
{
if (last && last->state == next->state)
++count;
last = next;
}
*productions = count;
*shifts = root_ps->state_items.total_size - count;
}
typedef void (*chunk_append_fn) (gl_list_t, const void *);
// A version of gl_list_add_last which has the chunk_append_fn
// signature.
static void
list_add_last (gl_list_t list, const void *elt)
{
gl_list_add_last (list, elt);
}
// takes an array of n gl_lists and flattens them into two list
// based off of the index split
static void
list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n,
chunk_append_fn append_fn)
{
int ret_index = 0;
int ret_array = 0;
for (int i = 0; i < n; ++i)
{
const void *p = NULL;
gl_list_iterator_t it = gl_list_iterator (list[i]);
while (gl_list_iterator_next (&it, &p, NULL))
if (p)
{
gl_list_t l = (gl_list_t) p;
const void *si = NULL;
gl_list_iterator_t it2 = gl_list_iterator (l);
while (gl_list_iterator_next (&it2, &si, NULL))
{
if (ret_index++ == split)
++ret_array;
if (rets[ret_array])
append_fn (rets[ret_array], si);
}
gl_list_iterator_free (&it2);
}
gl_list_iterator_free (&it);
}
}
static parse_state_list
parse_state_list_new (void)
{
return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
(gl_listelement_dispose_fn)free_parse_state,
true);
}
static void
parse_state_list_append (parse_state_list pl, parse_state *ps)
{
parse_state_retain (ps);
gl_list_add_last (pl, ps);
}
// Emulates a reduction on a parse state by popping some amount of
// derivations and state_items off of the parse_state and returning
// the result in ret. Returns the derivation of what's popped.
static derivation_list
parser_pop (parse_state *ps, int deriv_index,
int si_index, parse_state *ret)
{
// prepend sis, append sis, prepend derivs, append derivs
gl_list_t chunks[4];
for (int i = 0; i < 4; ++i)
chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
if (pn->prepend)
{
gl_list_add_last (chunks[0], pn->state_items.contents);
gl_list_add_last (chunks[2], pn->derivs.contents);
}
else
{
gl_list_add_first (chunks[1], pn->state_items.contents);
gl_list_add_first (chunks[3], pn->derivs.contents);
}
derivation_list popped_derivs = derivation_list_new ();
gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
ret->derivs.contents, popped_derivs
};
list_flatten_and_split (chunks, ret_chunks, si_index, 2,
list_add_last);
list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2,
(chunk_append_fn)derivation_list_append);
size_t s_size = gl_list_size (ret->state_items.contents);
ret->state_items.total_size = s_size;
if (s_size > 0)
{
ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
s_size - 1);
ret->state_items.head_elt =
gl_list_get_at (ret->state_items.contents, 0);
}
else
{
ret->state_items.tail_elt = NULL;
ret->state_items.head_elt = NULL;
}
size_t d_size = gl_list_size (ret->derivs.contents);
ret->derivs.total_size = d_size;
if (d_size > 0)
{
ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
d_size - 1);
ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
}
else
{
ret->derivs.tail_elt = NULL;
ret->derivs.head_elt = NULL;
}
for (int i = 0; i < 4; ++i)
gl_list_free (chunks[i]);
return popped_derivs;
}
void
parse_state_lists (parse_state *ps, state_item_list *sitems,
derivation_list *derivs)
{
parse_state *temp = empty_parse_state ();
size_t si_size = ps->state_items.total_size;
size_t deriv_size = ps->derivs.total_size;
derivation_list dl = parser_pop (ps, si_size, deriv_size, temp);
*sitems = temp->state_items.contents;
*derivs = temp->derivs.contents;
// prevent the return lists from being freed
temp->state_items.contents = NULL;
temp->derivs.contents = NULL;
free_parse_state (temp);
derivation_list_free (dl);
}
/**
* Compute the parse states that result from taking a transition on
* nullable symbols whenever possible from the given state_item.
*/
static void
nullable_closure (parse_state *ps, state_item *si, parse_state_list state_list)
{
parse_state *current_ps = ps;
state_item_number prev_sin = si - state_items;
for (state_item_number sin = si->trans; sin != -1;
prev_sin = sin, sin = state_items[sin].trans)
{
state_item *psi = &state_items[prev_sin];
symbol_number sp = item_number_as_symbol_number (*psi->item);
if (ISTOKEN (sp) || !nullable[sp - ntokens])
break;
state_item *nsi = &state_items[sin];
current_ps = copy_parse_state (false, current_ps);
ps_si_append (current_ps, nsi);
ps_derivs_append (current_ps,
derivation_new (sp, derivation_list_new (),
state_item_rule (nsi)));
parse_state_list_append (state_list, current_ps);
}
}
parse_state_list
simulate_transition (parse_state *ps)
{
const state_item *si = ps->state_items.tail_elt;
symbol_number sym = item_number_as_symbol_number (*si->item);
// Transition on the same next symbol, taking nullable
// symbols into account.
parse_state_list result = parse_state_list_new ();
state_item_number si_next = si->trans;
// Check for disabled transition, shouldn't happen as any
// state_items that lead to these should be disabled.
if (si_next < 0)
return result;
parse_state *next_ps = copy_parse_state (false, ps);
ps_si_append (next_ps, &state_items[si_next]);
ps_derivs_append (next_ps, derivation_new_leaf (sym));
parse_state_list_append (result, next_ps);
nullable_closure (next_ps, &state_items[si_next], result);
return result;
}
/**
* Determine if the given symbols are equal or their first sets
* intersect.
*/
static bool
compatible (symbol_number sym1, symbol_number sym2)
{
if (sym1 == sym2)
return true;
if (ISTOKEN (sym1) && ISVAR (sym2))
return bitset_test (FIRSTS (sym2), sym1);
else if (ISVAR (sym1) && ISTOKEN (sym2))
return bitset_test (FIRSTS (sym1), sym2);
else if (ISVAR (sym1) && ISVAR (sym2))
return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
else
return false;
}
parse_state_list
simulate_production (parse_state *ps, symbol_number compat_sym)
{
parse_state_list result = parse_state_list_new ();
const state_item *si = parse_state_tail (ps);
if (si->prods)
{
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, si->prods, sin, 0)
{
// Take production step only if lhs is not nullable and
// if first rhs symbol is compatible with compat_sym
state_item *next = &state_items[sin];
item_number *itm1 = next->item;
if (!compatible (*itm1, compat_sym) || !production_allowed (si, next))
continue;
parse_state *next_ps = copy_parse_state (false, ps);
ps_si_append (next_ps, next);
parse_state_list_append (result, next_ps);
if (next_ps->depth >= 0)
++next_ps->depth;
nullable_closure (next_ps, next, result);
}
}
return result;
}
// simulates a reduction on the given parse state, conflict_item is the
// item associated with ps's conflict. symbol_set is a lookahead set this
// reduction must be compatible with
parse_state_list
simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
{
parse_state_list result = parse_state_list_new ();
int s_size = ps->state_items.total_size;
int d_size = ps->derivs.total_size;
if (ps->depth >= 0)
d_size--; // account for dot
parse_state *new_root = empty_parse_state ();
derivation_list popped_derivs =
parser_pop (ps, d_size - rule_len,
s_size - rule_len - 1, new_root);
// update derivation
state_item *si = (state_item *) ps->state_items.tail_elt;
const rule *r = item_rule (si->item);
symbol_number lhs = r->lhs->number;
derivation *deriv = derivation_new (lhs, popped_derivs, state_item_rule (si));
--new_root->depth;
ps_derivs_append (new_root, deriv);
if (s_size != rule_len + 1)
{
state_item *tail = (state_item *) new_root->state_items.tail_elt;
ps_si_append (new_root, &state_items[tail->trans]);
parse_state_list_append (result, new_root);
}
else
{
// The head state_item is a production item, so we need to prepend
// with possible source state-items.
const state_item *head = ps->state_items.head_elt;
state_item_list prev = lssi_reverse_production (head, symbol_set);
// TODO: better understand what causes this case.
if (gl_list_size (prev) == 0)
{
// new_root needs to have an RC of 1 to be freed correctly here.
parse_state_retain (new_root);
free_parse_state (new_root);
}
else
{
state_item *psis = NULL;
for (gl_list_iterator_t it = gl_list_iterator (prev);
state_item_list_next (&it, &psis);
)
{
// Prepend the result from the reverse production.
parse_state *copy = copy_parse_state (true, new_root);
ps_si_prepend (copy, psis);
// Append the left hand side to the end of the parser state
copy = copy_parse_state (false, copy);
struct si_chunk *sis = &copy->state_items;
const state_item *tail = sis->tail_elt;
ps_si_append (copy, &state_items[tail->trans]);
parse_state_list_append (result, copy);
nullable_closure (copy, (state_item *) sis->tail_elt, result);
}
}
gl_list_free (prev);
}
return result;
}
parse_state_list
parser_prepend (parse_state *ps)
{
parse_state_list res = parse_state_list_new ();
const state_item *head = ps->state_items.head_elt;
symbol_number prepend_sym =
item_number_as_symbol_number (*(head->item - 1));
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, head->revs, sin, 0)
{
parse_state *copy = copy_parse_state (true, ps);
ps_si_prepend (copy, &state_items[sin]);
if (SI_TRANSITION (head))
ps_derivs_prepend (copy, derivation_new_leaf (prepend_sym));
parse_state_list_append (res, copy);
}
return res;
}
void
print_parse_state (parse_state *ps)
{
FILE *out = stderr;
fprintf (out, "(size %zu depth %d rc %d)\n",
ps->state_items.total_size, ps->depth, ps->reference_count);
state_item_print (ps->state_items.head_elt, out, "");
state_item_print (ps->state_items.tail_elt, out, "");
if (ps->derivs.total_size > 0)
derivation_print (ps->derivs.head_elt, out, "");
putc ('\n', out);
}