blob: a64584eec2745396b00dd2573a6e41277ac91e6a [file] [log] [blame]
/*================================================================*/
/*
JavaCup Specification for the JavaCup Specification Language
by Scott Hudson, GVU Center, Georgia Tech, August 1995
This JavaCup specification is used to implement JavaCup itself.
It specifies the parser for the JavaCup specification language.
(It also serves as a reasonable example of what a typical JavaCup
spec looks like).
The specification has the following parts:
Package and import declarations
These serve the same purpose as in a normal Java source file
(and will appear in the generated code for the parser). In this
case we are part of the java_cup package and we import both the
java_cup runtime system and Hashtable from the standard Java
utilities package.
Action code
This section provides code that is included with the class encapsulating
the various pieces of user code embedded in the grammar (i.e., the
semantic actions). This provides a series of helper routines and
data structures that the semantic actions use.
Parser code
This section provides code included in the parser class itself. In
this case we override the default error reporting routines.
Init with and scan with
These sections provide small bits of code that initialize, then
indicate how to invoke the scanner.
Symbols and grammar
These sections declare all the terminal and non terminal symbols
and the types of objects that they will be represented by at runtime,
then indicate the start symbol of the grammar (), and finally provide
the grammar itself (with embedded actions).
Operation of the parser
The parser acts primarily by accumulating data structures representing
various parts of the specification. Various small parts (e.g., single
code strings) are stored as static variables of the emit class and
in a few cases as variables declared in the action code section.
Terminals, non terminals, and productions, are maintained as collection
accessible via static methods of those classes. In addition, two
symbol tables are kept:
symbols maintains the name to object mapping for all symbols
non_terms maintains a separate mapping containing only the non terms
Several intermediate working structures are also declared in the action
code section. These include: rhs_parts, rhs_pos, and lhs_nt which
build up parts of the current production while it is being parsed.
Author(s)
Scott Hudson, GVU Center, Georgia Tech.
Revisions
v0.9a First released version [SEH] 8/29/95
v0.9b Updated for beta language (throws clauses) [SEH] 11/25/95
*/
/*================================================================*/
package java_cup;
import java_cup.runtime.*;
import java.util.Hashtable;
/*----------------------------------------------------------------*/
action code {:
/** helper routine to clone a new production part adding a given label */
protected production_part add_lab(production_part part, String lab)
throws internal_error
{
/* if there is no label, or this is an action, just return the original */
if (lab == null || part.is_action()) return part;
/* otherwise build a new one with the given label attached */
return new symbol_part(((symbol_part)part).the_symbol(),lab);
}
/** max size of right hand side we will support */
protected final int MAX_RHS = 200;
/** array for accumulating right hand side parts */
protected production_part[] rhs_parts = new production_part[MAX_RHS];
/** where we are currently in building a right hand side */
protected int rhs_pos = 0;
/** start a new right hand side */
protected void new_rhs() {rhs_pos = 0; }
/** add a new right hand side part */
protected void add_rhs_part(production_part part) throws java.lang.Exception
{
if (rhs_pos >= MAX_RHS)
throw new Exception("Internal Error: Productions limited to " +
MAX_RHS + " symbols and actions");
rhs_parts[rhs_pos] = part;
rhs_pos++;
}
/** string to build up multiple part names */
protected String multipart_name = new String();
/** append a new name segment to the accumulated multipart name */
protected void append_multipart(String name)
{
String dot = "";
/* if we aren't just starting out, put on a dot */
if (multipart_name.length() != 0) dot = ".";
multipart_name = multipart_name.concat(dot + name);
}
/** table of declared symbols -- contains production parts indexed by name */
protected Hashtable symbols = new Hashtable();
/** table of just non terminals -- contains non_terminals indexed by name */
protected Hashtable non_terms = new Hashtable();
/** declared start non_terminal */
protected non_terminal start_nt = null;
/** left hand side non terminal of the current production */
protected non_terminal lhs_nt;
:};
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
parser code {:
/* override error routines */
public void report_fatal_error(
String message,
Object info)
{
done_parsing();
lexer.emit_error(message);
System.err.println("Can't recover from previous error(s), giving up.");
System.exit(1);
}
public void report_error(String message, Object info)
{
lexer.emit_error(message);
}
:};
/*----------------------------------------------------------------*/
init with {: lexer.init(); :};
scan with {: return lexer.next_token(); :};
/*----------------------------------------------------------------*/
terminal java_cup.runtime.token
PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH,
START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR,
DEBUG;
terminal java_cup.runtime.str_token ID, CODE_STRING;
non terminal java_cup.runtime.symbol
spec, package_spec, import_list, code_part, action_code_part,
parser_code_part, symbol_list, start_spec, production_list,
multipart_id, import_spec, import_id, init_code, scan_code, symbol,
debug_grammar,
type_id, term_name_list, non_term_name_list, production, prod_part_list,
prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty;
non terminal java_cup.runtime.str_token nt_id, symbol_id, label_id, opt_label;
/*----------------------------------------------------------------*/
start with spec;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
spec ::=
{:
/* declare "error" as a terminal */
symbols.put("error", new symbol_part(terminal.error));
/* declare start non terminal */
non_terms.put("$START", non_terminal.START_nt);
:}
package_spec
import_list
code_part
debug_grammar
init_code
scan_code
symbol_list
start_spec
production_list
|
/* error recovery assuming something went wrong before symbols
and we have TERMINAL or NON TERMINAL to sync on. if we get
an error after that, we recover inside symbol_list or
production_list
*/
error
symbol_list
start_spec
production_list
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
package_spec ::=
PACKAGE
multipart_id
{:
/* save the package name */
emit.package_name = multipart_name;
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
import_list ::=
import_list
import_spec
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
import_spec ::=
IMPORT
import_id
{:
/* save this import on the imports list */
emit.import_list.push(multipart_name);
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
code_part ::= action_code_part parser_code_part ;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
action_code_part ::=
ACTION CODE CODE_STRING:user_code SEMI
{:
/* save the user included code string */
emit.action_code = user_code.str_val;
:}
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
parser_code_part ::=
PARSER CODE CODE_STRING:user_code SEMI
{:
/* save the user included code string */
emit.parser_code = user_code.str_val;
:}
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
init_code ::=
INIT WITH CODE_STRING:user_code SEMI
{:
/* save the user code */
emit.init_code = user_code.str_val;
:}
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
scan_code ::=
SCAN WITH CODE_STRING:user_code SEMI
{:
/* save the user code */
emit.scan_code = user_code.str_val;
:}
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
debug_grammar ::=
DEBUG WITH multipart_id SEMI
{:
/* save the procedure name */
emit.debug_grammar = multipart_name;
/* reset the accumulated multipart name */
multipart_name = new String();
:}
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
symbol_list ::= symbol_list symbol | symbol;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
symbol ::=
TERMINAL
type_id
term_name_list
{:
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
|
NON
TERMINAL
type_id
non_term_name_list
{:
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
|
/* error recovery productions -- sync on semicolon */
TERMINAL
error
{:
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
|
NON
TERMINAL
error
{:
/* reset the accumulated multipart name */
multipart_name = new String();
:}
SEMI
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
term_name_list ::= term_name_list COMMA new_term_id | new_term_id;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
non_term_name_list ::=
non_term_name_list
COMMA
new_non_term_id
|
new_non_term_id
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
start_spec ::=
START WITH nt_id:start_name
{:
/* verify that the name has been declared as a non terminal */
non_terminal nt = (non_terminal)non_terms.get(start_name.str_val);
if (nt == null)
{
lexer.emit_error( "Start non terminal \"" + start_name.str_val +
"\" has not been declared");
}
else
{
/* remember the non-terminal for later */
start_nt = nt;
/* build a special start production */
new_rhs();
add_rhs_part(new symbol_part(start_nt));
add_rhs_part(new symbol_part(terminal.EOF));
emit.start_production =
new production(non_terminal.START_nt, rhs_parts, rhs_pos);
new_rhs();
}
:}
SEMI
|
empty
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
production_list ::= production_list production | production;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
production ::=
nt_id:lhs_id
{:
/* lookup the lhs nt */
lhs_nt = (non_terminal)non_terms.get(lhs_id.str_val);
/* if it wasn't declared, emit a message */
if (lhs_nt == null)
{
if (lexer.error_count == 0)
lexer.emit_error("LHS non terminal \"" + lhs_id.str_val +
"\" has not been declared");
}
/* reset the rhs accumulation */
new_rhs();
:}
COLON_COLON_EQUALS
rhs_list
SEMI
|
error
{: lexer.emit_error("Syntax Error"); :}
SEMI
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
rhs_list ::= rhs_list BAR rhs | rhs;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
rhs ::=
prod_part_list
{:
if (lhs_nt != null)
{
/* build the production */
production p = new production(lhs_nt, rhs_parts, rhs_pos);
/* if we have no start non-terminal declared and this is
the first production, make its lhs nt the start_nt
and build a special start production for it. */
if (start_nt == null)
{
start_nt = lhs_nt;
/* build a special start production */
new_rhs();
add_rhs_part(new symbol_part(start_nt));
add_rhs_part(new symbol_part(terminal.EOF));
emit.start_production =
new production(non_terminal.START_nt, rhs_parts, rhs_pos);
new_rhs();
}
}
/* reset the rhs accumulation in any case */
new_rhs();
:}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
prod_part_list ::= prod_part_list prod_part | empty;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
prod_part ::=
symbol_id:symid opt_label:labid
{:
/* try to look up the id */
production_part symb = (production_part)symbols.get(symid.str_val);
/* if that fails, symbol is undeclared */
if (symb == null)
{
if (lexer.error_count == 0)
lexer.emit_error("Symbol \"" + symid.str_val +
"\" has not been declared");
}
else
{
/* add a labeled production part */
add_rhs_part(add_lab(symb, labid.str_val));
}
:}
|
CODE_STRING:code_str
{:
/* add a new production part */
add_rhs_part(new action_part(code_str.str_val));
:}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
opt_label ::=
COLON label_id:labid
{: RESULT.str_val = labid.str_val; :}
|
empty
{: RESULT.str_val = null; :}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
multipart_id ::=
multipart_id DOT ID:another_id
{: append_multipart(another_id.str_val); :}
|
ID:an_id
{: append_multipart(an_id.str_val); :}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
import_id ::=
multipart_id DOT STAR
{: append_multipart("*"); :}
|
multipart_id
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
type_id ::= multipart_id;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
new_term_id ::=
ID:term_id
{:
/* see if this terminal has been declared before */
if (symbols.get(term_id.str_val) != null)
{
/* issue a message */
lexer.emit_error("Symbol \"" + term_id.str_val +
"\" has already been declared");
}
else
{
/* build a production_part and put it in the table */
symbols.put(term_id.str_val,
new symbol_part(new terminal(term_id.str_val, multipart_name)));
}
:}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
new_non_term_id ::=
ID:non_term_id
{:
/* see if this non terminal has been declared before */
if (symbols.get(non_term_id.str_val) != null)
{
/* issue a message */
lexer.emit_error( "Symbol \"" + non_term_id.str_val +
"\" has already been declared");
}
else
{
/* build the non terminal object */
non_terminal this_nt =
new non_terminal(non_term_id.str_val, multipart_name);
/* put it in the non_terms table */
non_terms.put(non_term_id.str_val, this_nt);
/* build a production_part and put it in the symbols table */
symbols.put(non_term_id.str_val, new symbol_part(this_nt));
}
:}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
nt_id ::=
ID:the_id
{: RESULT.str_val = the_id.str_val; :}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
symbol_id ::=
ID:the_id
{: RESULT.str_val = the_id.str_val; :}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
label_id ::=
ID:the_id
{: RESULT.str_val = the_id.str_val; :}
;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
empty ::= /* nothing */;
/*----------------------------------------------------------------*/