blob: 6b4e60b64ea8d3fd74362a26f2ea242b9860dc39 [file] [log] [blame]
/*
[The "BSD license"]
Copyright (c) 2005-2011 Terence Parr
All rights reserved.
Grammar conversion to ANTLR v3:
Copyright (c) 2011 Sam Harwell
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** Read in an ANTLR grammar and build an AST. Try not to do
* any actions, just build the tree.
*
* The phases are:
*
* antlr.g (this file)
* assign.types.g
* define.g
* buildnfa.g
* antlr.print.g (optional)
* codegen.g
*
* Terence Parr
* University of San Francisco
* 2005
*/
grammar ANTLR;
options
{
output=AST;
ASTLabelType=GrammarAST;
}
tokens
{
//OPTIONS='options';
//TOKENS='tokens';
LEXER='lexer';
PARSER='parser';
CATCH='catch';
FINALLY='finally';
GRAMMAR='grammar';
PRIVATE='private';
PROTECTED='protected';
PUBLIC='public';
RETURNS='returns';
THROWS='throws';
TREE='tree';
RULE;
PREC_RULE;
RECURSIVE_RULE_REF; // flip recursive RULE_REF to RECURSIVE_RULE_REF in prec rules
BLOCK;
OPTIONAL;
CLOSURE;
POSITIVE_CLOSURE;
SYNPRED;
RANGE;
CHAR_RANGE;
EPSILON;
ALT;
EOR;
EOB;
EOA; // end of alt
ID;
ARG;
ARGLIST;
RET;
LEXER_GRAMMAR;
PARSER_GRAMMAR;
TREE_GRAMMAR;
COMBINED_GRAMMAR;
INITACTION;
FORCED_ACTION; // {{...}} always exec even during syn preds
LABEL; // $x used in rewrite rules
TEMPLATE;
SCOPE='scope';
IMPORT='import';
GATED_SEMPRED; // {p}? =>
SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred
BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
FRAGMENT='fragment';
DOT;
REWRITES;
}
@lexer::header {
package org.antlr.grammar.v3;
import org.antlr.tool.ErrorManager;
import org.antlr.tool.Grammar;
}
@parser::header {
package org.antlr.grammar.v3;
import org.antlr.tool.ErrorManager;
import org.antlr.tool.Grammar;
import org.antlr.tool.GrammarAST;
import org.antlr.misc.IntSet;
import org.antlr.tool.Rule;
}
@lexer::members {
public boolean hasASTOperator = false;
private String fileName;
public String getFileName() {
return fileName;
}
public void setFileName(String value) {
fileName = value;
}
}
@parser::members {
protected String currentRuleName = null;
protected GrammarAST currentBlockAST = null;
protected boolean atTreeRoot; // are we matching a tree root in tree grammar?
public static ANTLRParser createParser(TokenStream input) {
ANTLRParser parser = new ANTLRParser(input);
parser.adaptor = new grammar_Adaptor(parser);
return parser;
}
private static class GrammarASTErrorNode extends GrammarAST {
public IntStream input;
public Token start;
public Token stop;
public RecognitionException trappedException;
public GrammarASTErrorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
super(stop);
//Console.Out.WriteLine( "start: " + start + ", stop: " + stop );
if ( stop == null ||
( stop.getTokenIndex() < start.getTokenIndex() &&
stop.getType() != Token.EOF) ) {
// sometimes resync does not consume a token (when LT(1) is
// in follow set. So, stop will be 1 to left to start. adjust.
// Also handle case where start is the first token and no token
// is consumed during recovery; LT(-1) will return null.
stop = start;
}
this.input = input;
this.start = start;
this.stop = stop;
this.trappedException = e;
}
@Override
public boolean isNil() { return false; }
@Override
public String getText()
{
String badText = null;
if (start instanceof Token) {
int i = ((Token)start).getTokenIndex();
int j = ((Token)stop).getTokenIndex();
if (((Token)stop).getType() == Token.EOF) {
j = ((TokenStream)input).size();
}
badText = ((TokenStream)input).toString(i, j);
} else if (start instanceof Tree) {
badText = ((TreeNodeStream)input).toString(start, stop);
} else {
// people should subclass if they alter the tree type so this
// next one is for sure correct.
badText = "<unknown>";
}
return badText;
}
@Override
public void setText(String value) { }
@Override
public int getType() { return Token.INVALID_TOKEN_TYPE; }
@Override
public void setType(int value) { }
@Override
public String toString()
{
if (trappedException instanceof MissingTokenException)
{
return "<missing type: " +
( (MissingTokenException)trappedException ).getMissingType() +
">";
} else if (trappedException instanceof UnwantedTokenException) {
return "<extraneous: " +
( (UnwantedTokenException)trappedException ).getUnexpectedToken() +
", resync=" + getText() + ">";
} else if (trappedException instanceof MismatchedTokenException) {
return "<mismatched token: " + trappedException.token + ", resync=" + getText() + ">";
} else if (trappedException instanceof NoViableAltException) {
return "<unexpected: " + trappedException.token +
", resync=" + getText() + ">";
}
return "<error: " + getText() + ">";
}
}
static class grammar_Adaptor extends CommonTreeAdaptor {
ANTLRParser _outer;
public grammar_Adaptor(ANTLRParser outer) {
_outer = outer;
}
@Override
public Object create(Token payload) {
GrammarAST t = new GrammarAST( payload );
if (_outer != null)
t.enclosingRuleName = _outer.currentRuleName;
return t;
}
@Override
public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
GrammarAST t = new GrammarASTErrorNode(input, start, stop, e);
if (_outer != null)
t.enclosingRuleName = _outer.currentRuleName;
return t;
}
}
private Grammar grammar;
private int grammarType;
private String fileName;
public Grammar getGrammar() {
return grammar;
}
public void setGrammar(Grammar value) {
grammar = value;
}
public int getGrammarType() {
return grammarType;
}
public void setGrammarType(int value) {
grammarType = value;
}
public String getFileName() {
return fileName;
}
public void setFileName(String value) {
fileName = value;
}
private final int LA(int i) { return input.LA( i ); }
private final Token LT(int k) { return input.LT( k ); }
/*partial void createTreeAdaptor(ref ITreeAdaptor adaptor)
{
adaptor = new grammar_Adaptor(this);
}*/
protected GrammarAST setToBlockWithSet(GrammarAST b) {
/*
* alt = ^(ALT["ALT"] {b} EOA["EOA"])
* prefixWithSynpred( alt )
* return ^(BLOCK["BLOCK"] {alt} EOB["<end-of-block>"])
*/
GrammarAST alt = (GrammarAST)adaptor.create(ALT, "ALT");
adaptor.addChild(alt, b);
adaptor.addChild(alt, adaptor.create(EOA, "<end-of-alt>"));
prefixWithSynPred(alt);
GrammarAST block = (GrammarAST)adaptor.create(BLOCK, b.getToken(), "BLOCK");
adaptor.addChild(block, alt);
adaptor.addChild(alt, adaptor.create(EOB, "<end-of-block>"));
return block;
}
/** Create a copy of the alt and make it into a BLOCK; all actions,
* labels, tree operators, rewrites are removed.
*/
protected GrammarAST createBlockFromDupAlt(GrammarAST alt) {
/*
* ^(BLOCK["BLOCK"] {GrammarAST.dupTreeNoActions(alt)} EOB["<end-of-block>"])
*/
GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null);
GrammarAST block = (GrammarAST)adaptor.create(BLOCK, alt.getToken(), "BLOCK");
adaptor.addChild( block, nalt );
adaptor.addChild( block, adaptor.create( EOB, "<end-of-block>" ) );
return block;
}
/** Rewrite alt to have a synpred as first element;
* (xxx)=>xxx
* but only if they didn't specify one manually.
*/
protected void prefixWithSynPred( GrammarAST alt ) {
// if they want backtracking and it's not a lexer rule in combined grammar
String autoBacktrack = (String)grammar.getBlockOption( currentBlockAST, "backtrack" );
if ( autoBacktrack == null )
{
autoBacktrack = (String)grammar.getOption( "backtrack" );
}
if ( autoBacktrack != null && autoBacktrack.equals( "true" ) &&
!( grammarType == Grammar.COMBINED &&
Rule.getRuleType(currentRuleName) == Grammar.LEXER) &&
alt.getChild( 0 ).getType() != SYN_SEMPRED )
{
// duplicate alt and make a synpred block around that dup'd alt
GrammarAST synpredBlockAST = createBlockFromDupAlt( alt );
// Create a BACKTRACK_SEMPRED node as if user had typed this in
// Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx
GrammarAST synpredAST = createSynSemPredFromBlock( synpredBlockAST,
BACKTRACK_SEMPRED );
// insert BACKTRACK_SEMPRED as first element of alt
//synpredAST.getLastSibling().setNextSibling( alt.getFirstChild() );
//synpredAST.addChild( alt.getFirstChild() );
//alt.setFirstChild( synpredAST );
GrammarAST[] children = alt.getChildrenAsArray();
adaptor.setChild( alt, 0, synpredAST );
for ( int i = 0; i < children.length; i++ )
{
if ( i < children.length - 1 )
adaptor.setChild( alt, i + 1, children[i] );
else
adaptor.addChild( alt, children[i] );
}
}
}
protected GrammarAST createSynSemPredFromBlock( GrammarAST synpredBlockAST, int synpredTokenType ) {
// add grammar fragment to a list so we can make fake rules for them later.
String predName = grammar.defineSyntacticPredicate( synpredBlockAST, currentRuleName );
// convert (alpha)=> into {synpredN}? where N is some pred count
// during code gen we convert to function call with templates
String synpredinvoke = predName;
GrammarAST p = (GrammarAST)adaptor.create( synpredTokenType, synpredinvoke );
// track how many decisions have synpreds
grammar.blocksWithSynPreds.add( currentBlockAST );
return p;
}
public static GrammarAST createSimpleRuleAST( String name, GrammarAST block, boolean fragment ) {
TreeAdaptor adaptor = new grammar_Adaptor(null);
GrammarAST modifier = null;
if ( fragment )
{
modifier = (GrammarAST)adaptor.create( FRAGMENT, "fragment" );
}
/*
* EOBAST = block.getLastChild()
* ^(RULE[block,"rule"] ID["name"] {modifier} ARG["ARG"] RET["RET"] SCOPE["scope"] {block} EOR[EOBAST,"<end-of-rule>"])
*/
GrammarAST rule = (GrammarAST)adaptor.create( RULE, block.getToken(), "rule" );
adaptor.addChild( rule, adaptor.create( ID, name ) );
if ( modifier != null )
adaptor.addChild( rule, modifier );
adaptor.addChild( rule, adaptor.create( ARG, "ARG" ) );
adaptor.addChild( rule, adaptor.create( RET, "RET" ) );
adaptor.addChild( rule, adaptor.create( SCOPE, "scope" ) );
adaptor.addChild( rule, block );
adaptor.addChild( rule, adaptor.create( EOR, block.getLastChild().getToken(), "<end-of-rule>" ) );
return rule;
}
@Override
public void reportError(RecognitionException ex)
{
//Token token = null;
//try
//{
// token = LT( 1 );
//}
//catch ( TokenStreamException tse )
//{
// ErrorManager.internalError( "can't get token???", tse );
//}
Token token = ex.token;
ErrorManager.syntaxError(
ErrorManager.MSG_SYNTAX_ERROR,
grammar,
token,
"antlr: " + ex.toString(),
ex );
}
public void cleanup( GrammarAST root )
{
if ( grammarType == Grammar.LEXER )
{
String filter = (String)grammar.getOption( "filter" );
GrammarAST tokensRuleAST =
grammar.addArtificialMatchTokensRule(
root,
grammar.lexerRuleNamesInCombined,
grammar.getDelegateNames(),
filter != null && filter.equals( "true" ) );
}
}
}
public
grammar_![Grammar g]
@init
{
this.grammar = g;
Map<String, Object> opts;
}
@after
{
cleanup( $tree );
}
: //hdr:headerSpec
( ACTION )?
( cmt=DOC_COMMENT )?
gr=grammarType gid=id {grammar.setName($gid.text);} SEMI
( optionsSpec {opts = $optionsSpec.opts; grammar.setOptions(opts, $optionsSpec.start);}
)?
(ig=delegateGrammars)?
(ts=tokensSpec)?
scopes=attrScopes
(a=actions)?
r=rules
EOF
-> ^($gr $gid $cmt? optionsSpec? $ig? $ts? $scopes? $a? $r)
;
grammarType
: ( 'lexer' gr='grammar' {grammarType=Grammar.LEXER; grammar.type = Grammar.LEXER;} // pure lexer
-> LEXER_GRAMMAR[$gr]
| 'parser' gr='grammar' {grammarType=Grammar.PARSER; grammar.type = Grammar.PARSER;} // pure parser
-> PARSER_GRAMMAR[$gr]
| 'tree' gr='grammar' {grammarType=Grammar.TREE_PARSER; grammar.type = Grammar.TREE_PARSER;} // a tree parser
-> TREE_GRAMMAR[$gr]
| gr='grammar' {grammarType=Grammar.COMBINED; grammar.type = Grammar.COMBINED;} // merged parser/lexer
-> COMBINED_GRAMMAR[$gr]
)
;
actions
: (action)+
;
/** Match stuff like @parser::members {int i;} */
action
: AMPERSAND^ (actionScopeName COLON! COLON!)? id ACTION
;
/** Sometimes the scope names will collide with keywords; allow them as
* ids for action scopes.
*/
actionScopeName
: id
| l='lexer'
-> ID[$l]
| p='parser'
-> ID[$p]
;
optionsSpec returns [Map<String, Object> opts=new HashMap<String, Object>()]
: OPTIONS^ (option[$opts] SEMI!)+ RCURLY!
;
option[Map<String, Object> opts]
: id ASSIGN^ optionValue
{
$opts.put($id.text, $optionValue.value);
}
;
optionValue returns [Object value = null]
: x=id {$value = $x.text;}
| s=STRING_LITERAL {String vs = $s.text;
// remove the quotes:
$value=vs.substring(1,vs.length()-1);}
| c=CHAR_LITERAL {String vs = $c.text;
// remove the quotes:
$value=vs.substring(1,vs.length()-1);}
| i=INT {$value = Integer.parseInt($i.text);}
| ss=STAR {$value = "*";} // used for k=*
-> STRING_LITERAL[$ss]
// | cs:charSet {value = #cs;} // return set AST in this case
;
delegateGrammars
: 'import'^ delegateGrammar (COMMA! delegateGrammar)* SEMI!
;
delegateGrammar
: lab=id ASSIGN^ g=id {grammar.importGrammar($g.tree, $lab.text);}
| g2=id {grammar.importGrammar($g2.tree,null);}
;
tokensSpec
: TOKENS^
tokenSpec*
RCURLY!
;
tokenSpec
: TOKEN_REF ( ASSIGN^ (STRING_LITERAL|CHAR_LITERAL) )? SEMI!
;
attrScopes
: (attrScope)*
;
attrScope
: 'scope'^ id ruleActions? ACTION
;
rules
: ( rule
)+
;
public
rule
@init
{
GrammarAST eob=null;
CommonToken start = (CommonToken)LT(1);
int startLine = LT(1).getLine();
}
:
( ( d=DOC_COMMENT
)?
( p1='protected' //{modifier=$p1.tree;}
| p2='public' //{modifier=$p2.tree;}
| p3='private' //{modifier=$p3.tree;}
| p4='fragment' //{modifier=$p4.tree;}
)?
ruleName=id
{
currentRuleName=$ruleName.text;
if ( grammarType==Grammar.LEXER && $p4==null )
grammar.lexerRuleNamesInCombined.add(currentRuleName);
}
( BANG )?
( aa=ARG_ACTION )?
( 'returns' rt=ARG_ACTION )?
( throwsSpec )?
( optionsSpec )?
scopes=ruleScopeSpec
(ruleActions)?
COLON
ruleAltList[$optionsSpec.opts]
SEMI
( ex=exceptionGroup )?
-> ^( RULE[$ruleName.start, "rule"]
$ruleName
// the modifier will be 0 or one of the modifiers:
$p1? $p2? $p3? $p4?
^(ARG["ARG"] $aa?)
^(RET["RET"] $rt?)
throwsSpec?
optionsSpec?
$scopes
ruleActions?
ruleAltList
$ex?
EOR[$SEMI,"<end-of-rule>"])
)
{
$tree.setTreeEnclosingRuleNameDeeply(currentRuleName);
((GrammarAST)$tree.getChild(0)).setBlockOptions($optionsSpec.opts);
}
;
ruleActions
: (ruleAction)+
;
/** Match stuff like @init {int i;} */
ruleAction
: AMPERSAND^ id ACTION
;
throwsSpec
: 'throws'^ id ( COMMA! id )*
;
ruleScopeSpec
: ( 'scope' ruleActions? ACTION )?
( 'scope' idList SEMI )*
-> ^(SCOPE[$start,"scope"] ruleActions? ACTION? idList*)
;
ruleAltList[Map<String, Object> opts]
@init
{
GrammarAST blkRoot = null;
GrammarAST save = currentBlockAST;
}
: ( -> BLOCK[input.LT(-1),"BLOCK"] )
{
blkRoot = (GrammarAST)$tree.getChild(0);
blkRoot.setBlockOptions($opts);
currentBlockAST = blkRoot;
}
( a1=alternative r1=rewrite
{if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a1.tree);}
-> $a1 $r1?
)
( ( OR a2=alternative r2=rewrite
{if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a2.tree);}
-> $ruleAltList $a2 $r2?
)+
|
)
-> ^({blkRoot} $ruleAltList EOB["<end-of-block>"])
;
finally { currentBlockAST = save; }
/** Build #(BLOCK ( #(ALT ...) EOB )+ ) */
block
@init
{
GrammarAST save = currentBlockAST;
}
: ( lp=LPAREN
-> BLOCK[$lp,"BLOCK"]
)
{currentBlockAST = (GrammarAST)$tree.getChild(0);}
(
// 2nd alt and optional branch ambig due to
// linear approx LL(2) issue. COLON ACTION
// matched correctly in 2nd alt.
(optionsSpec {((GrammarAST)$tree.getChild(0)).setOptions(grammar,$optionsSpec.opts);})?
( ruleActions )?
COLON
| ACTION COLON
)?
a=alternative r=rewrite
{
stream_alternative.add( $r.tree );
if ( LA(1)==OR || (LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR) )
prefixWithSynPred($a.tree);
}
( OR a=alternative r=rewrite
{
stream_alternative.add( $r.tree );
if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR))
prefixWithSynPred($a.tree);
}
)*
rp=RPAREN
-> ^($block optionsSpec? ruleActions? ACTION? alternative+ EOB[$rp,"<end-of-block>"])
;
finally { currentBlockAST = save; }
// ALT and EOA have indexes tracking start/stop of entire alt
alternative
: element+
-> ^(ALT[$start,"ALT"] element+ EOA[input.LT(-1),"<end-of-alt>"])
| // epsilon alt
-> ^(ALT[$start,"ALT"] EPSILON[input.LT(-1),"epsilon"] EOA[input.LT(-1),"<end-of-alt>"])
;
exceptionGroup
: exceptionHandler+ finallyClause?
| finallyClause
;
exceptionHandler
: 'catch'^ ARG_ACTION ACTION
;
finallyClause
: 'finally'^ ACTION
;
element
: elementNoOptionSpec
;
elementNoOptionSpec
@init
{
IntSet elements=null;
}
: ( ( id (ASSIGN^|PLUS_ASSIGN^) (atom|block)
)
( sub=ebnfSuffix[root_0,false]! {root_0 = $sub.tree;}
)?
| a=atom
( sub2=ebnfSuffix[$a.tree,false]! {root_0=$sub2.tree;}
)?
| ebnf
| FORCED_ACTION
| ACTION
| p=SEMPRED ( IMPLIES! {$p.setType(GATED_SEMPRED);} )?
{
grammar.blocksWithSemPreds.add(currentBlockAST);
}
| t3=tree_
)
;
atom
: range (ROOT^|BANG^)?
| (
// grammar.rule but ensure no spaces. "A . B" is not a qualified ref
// We do here rather than lexer so we can build a tree
({LT(1).getCharPositionInLine()+LT(1).getText().length()==LT(2).getCharPositionInLine()&&
LT(2).getCharPositionInLine()+1==LT(3).getCharPositionInLine()}? id WILDCARD (terminal|ruleref)) =>
id w=WILDCARD^ (terminal|ruleref) {$w.setType(DOT);}
| terminal
| ruleref
)
| notSet (ROOT^|BANG^)?
;
ruleref
: RULE_REF^ ARG_ACTION? (ROOT^|BANG^)?
;
notSet
: NOT^
( notTerminal
| block
)
;
treeRoot
@init{atTreeRoot=true;}
@after{atTreeRoot=false;}
: id (ASSIGN^|PLUS_ASSIGN^) (atom|block)
| atom
| block
;
tree_
: TREE_BEGIN^
treeRoot element+
RPAREN!
;
/** matches ENBF blocks (and sets via block rule) */
ebnf
: block
( QUESTION
-> ^(OPTIONAL[$start,"?"] block)
| STAR
-> ^(CLOSURE[$start,"*"] block)
| PLUS
-> ^(POSITIVE_CLOSURE[$start,"+"] block)
| IMPLIES // syntactic predicate
// ignore for lexer rules in combined
-> {grammarType == Grammar.COMBINED && Rule.getRuleType(currentRuleName) == Grammar.LEXER}? ^(SYNPRED[$start,"=>"] block)
// create manually specified (...)=> predicate; convert to sempred
-> {createSynSemPredFromBlock($block.tree, SYN_SEMPRED)}
| ROOT
-> ^(ROOT block)
| BANG
-> ^(BANG block)
|
-> block
)
;
range
: c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL
-> ^(CHAR_RANGE[$c1,".."] $c1 $c2)
;
terminal
: cl=CHAR_LITERAL^ ( elementOptions[$cl.tree]! )? (ROOT^|BANG^)?
| tr=TOKEN_REF^
( elementOptions[$tr.tree]! )?
( ARG_ACTION )? // Args are only valid for lexer rules
(ROOT^|BANG^)?
| sl=STRING_LITERAL^ ( elementOptions[$sl.tree]! )? (ROOT^|BANG^)?
| wi=WILDCARD (ROOT^|BANG^)?
{
if ( atTreeRoot )
{
ErrorManager.syntaxError(
ErrorManager.MSG_WILDCARD_AS_ROOT,grammar,$wi,null,null);
}
}
;
elementOptions[GrammarAST terminalAST]
: OPEN_ELEMENT_OPTION^ defaultNodeOption[terminalAST] CLOSE_ELEMENT_OPTION!
| OPEN_ELEMENT_OPTION^ elementOption[terminalAST] (SEMI! elementOption[terminalAST])* CLOSE_ELEMENT_OPTION!
;
defaultNodeOption[GrammarAST terminalAST]
: elementOptionId
{terminalAST.setTerminalOption(grammar,Grammar.defaultTokenOption,$elementOptionId.qid);}
;
elementOption[GrammarAST terminalAST]
: id ASSIGN^
( elementOptionId
{terminalAST.setTerminalOption(grammar,$id.text,$elementOptionId.qid);}
| (t=STRING_LITERAL|t=DOUBLE_QUOTE_STRING_LITERAL|t=DOUBLE_ANGLE_STRING_LITERAL)
{terminalAST.setTerminalOption(grammar,$id.text,$t.text);}
)
;
elementOptionId returns [String qid]
@init{StringBuffer buf = new StringBuffer();}
: i=id {buf.append($i.text);} ('.' i=id {buf.append("." + $i.text);})*
{$qid = buf.toString();}
;
ebnfSuffix[GrammarAST elemAST, boolean inRewrite]
@init
{
GrammarAST blkRoot=null;
GrammarAST alt=null;
GrammarAST save = currentBlockAST;
}
@after
{
currentBlockAST = save;
}
: ( -> BLOCK[$elemAST.getToken(), "BLOCK"]
)
{ blkRoot = (GrammarAST)$tree.getChild(0); currentBlockAST = blkRoot; }
( // create alt
-> ^(ALT[$elemAST.getToken(), "ALT"] {$elemAST} EOA["<end-of-alt>"])
)
{
alt = (GrammarAST)$tree.getChild(0);
if ( !inRewrite )
prefixWithSynPred(alt);
}
( QUESTION
-> OPTIONAL[$elemAST.getToken(),"?"]
| STAR
-> CLOSURE[$elemAST.getToken(),"*"]
| PLUS
-> POSITIVE_CLOSURE[$elemAST.getToken(),"+"]
)
-> ^($ebnfSuffix ^({blkRoot} {alt} EOB[$elemAST.getToken(), "<end-of-block>"]))
;
notTerminal
: CHAR_LITERAL
| TOKEN_REF
| STRING_LITERAL
;
idList
: id (COMMA! id)*
;
id
: TOKEN_REF
-> ID[$TOKEN_REF]
| RULE_REF
-> ID[$RULE_REF]
;
// R E W R I T E S Y N T A X
rewrite
: rewrite_with_sempred*
REWRITE rewrite_alternative
-> ^(REWRITES rewrite_with_sempred* ^(REWRITE rewrite_alternative))
|
;
rewrite_with_sempred
: REWRITE^ SEMPRED rewrite_alternative
;
rewrite_block
: LPAREN
rewrite_alternative
RPAREN
-> ^(BLOCK[$LPAREN,"BLOCK"] rewrite_alternative EOB[$RPAREN,"<end-of-block>"])
;
rewrite_alternative
options{k=1;}
: {grammar.buildTemplate()}? => rewrite_template
| {grammar.buildAST()}? => ( rewrite_element )+
-> {!stream_rewrite_element.hasNext()}? ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"])
-> ^(ALT[LT(1),"ALT"] rewrite_element+ EOA["<end-of-alt>"])
|
-> ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"])
| {grammar.buildAST()}? ETC
;
rewrite_element
: ( t=rewrite_atom
-> $t
)
( subrule=ebnfSuffix[$t.tree,true]
-> $subrule
)?
| rewrite_ebnf
| ( tr=rewrite_tree
-> $tr
)
( subrule=ebnfSuffix[$tr.tree,true]
-> $subrule
)?
;
rewrite_atom
: tr=TOKEN_REF^ elementOptions[$tr.tree]!? ARG_ACTION? // for imaginary nodes
| RULE_REF
| cl=CHAR_LITERAL elementOptions[$cl.tree]!?
| sl=STRING_LITERAL elementOptions[$sl.tree]!?
| DOLLAR! label // reference to a label in a rewrite rule
| ACTION
;
label
: TOKEN_REF -> LABEL[$TOKEN_REF]
| RULE_REF -> LABEL[$RULE_REF]
;
rewrite_ebnf
: b=rewrite_block
( QUESTION
-> ^(OPTIONAL[$b.start,"?"] $b)
| STAR
-> ^(CLOSURE[$b.start,"*"] $b)
| PLUS
-> ^(POSITIVE_CLOSURE[$b.start,"+"] $b)
)
;
rewrite_tree
: TREE_BEGIN^
rewrite_atom rewrite_element*
RPAREN!
;
/** Build a tree for a template rewrite:
^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
where ARGLIST is always there even if no args exist.
ID can be "template" keyword. If first child is ACTION then it's
an indirect template ref
-> foo(a={...}, b={...})
-> ({string-e})(a={...}, b={...}) // e evaluates to template name
-> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
-> {st-expr} // st-expr evaluates to ST
*/
public
rewrite_template
options{k=1;}
: // -> template(a={...},...) "..."
{LT(1).getText().equals("template")}? => // inline
( rewrite_template_head
-> rewrite_template_head
)
( st=DOUBLE_QUOTE_STRING_LITERAL | st=DOUBLE_ANGLE_STRING_LITERAL )
{ adaptor.addChild( $tree.getChild(0), adaptor.create($st) ); }
| // -> foo(a={...}, ...)
rewrite_template_head
| // -> ({expr})(a={...}, ...)
rewrite_indirect_template_head
| // -> {...}
ACTION
;
/** -> foo(a={...}, ...) */
rewrite_template_head
: id lp=LPAREN
rewrite_template_args
RPAREN
-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
;
/** -> ({expr})(a={...}, ...) */
rewrite_indirect_template_head
: lp=LPAREN
ACTION
RPAREN
LPAREN rewrite_template_args RPAREN
-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
;
rewrite_template_args
: rewrite_template_arg (COMMA rewrite_template_arg)*
-> ^(ARGLIST["ARGLIST"] rewrite_template_arg+)
|
-> ARGLIST["ARGLIST"]
;
rewrite_template_arg
: id a=ASSIGN ACTION
-> ^(ARG[$a,"ARG"] id ACTION)
;
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// L E X E R
// get rid of warnings:
fragment STRING_LITERAL : ;
fragment FORCED_ACTION : ;
fragment DOC_COMMENT : ;
fragment SEMPRED : ;
WS
: ( ' '
| '\t'
| ('\r')? '\n'
)
{ $channel = HIDDEN; }
;
COMMENT
@init{List<Integer> type = new ArrayList<Integer>() {{ add(0); }};}
: ( SL_COMMENT | ML_COMMENT[type] {$type = type.get(0);} )
{
if ( $type != DOC_COMMENT )
$channel = HIDDEN;
}
;
fragment
SL_COMMENT
: '//'
( (' $ANTLR') => ' $ANTLR ' SRC (('\r')? '\n')? // src directive
| ~('\r'|'\n')* (('\r')? '\n')?
)
;
fragment
ML_COMMENT[List<Integer> type]
: '/*'
{$type.set(0, (input.LA(1) == '*' && input.LA(2) != '/') ? DOC_COMMENT : ML_COMMENT);}
.*
'*/'
;
OPEN_ELEMENT_OPTION
: '<'
;
CLOSE_ELEMENT_OPTION
: '>'
;
AMPERSAND : '@';
COMMA : ',';
QUESTION : '?' ;
TREE_BEGIN : '^(' ;
LPAREN: '(' ;
RPAREN: ')' ;
COLON : ':' ;
STAR: '*' ;
PLUS: '+' ;
ASSIGN : '=' ;
PLUS_ASSIGN : '+=' ;
IMPLIES : '=>' ;
REWRITE : '->' ;
SEMI: ';' ;
ROOT : '^' {hasASTOperator=true;} ;
BANG : '!' {hasASTOperator=true;} ;
OR : '|' ;
WILDCARD : '.' ;
ETC : '...' ;
RANGE : '..' ;
NOT : '~' ;
RCURLY: '}' ;
DOLLAR : '$' ;
STRAY_BRACKET
: ']'
{
ErrorManager.syntaxError(
ErrorManager.MSG_SYNTAX_ERROR,
null,
state.token,
"antlr: dangling ']'? make sure to escape with \\]",
null);
}
;
CHAR_LITERAL
: '\''
( ESC
| ~('\\'|'\'')
)*
'\''
{
StringBuffer s = Grammar.getUnescapedStringFromGrammarStringLiteral($text);
if ( s.length() > 1 )
{
$type = STRING_LITERAL;
}
}
;
DOUBLE_QUOTE_STRING_LITERAL
@init
{
StringBuilder builder = new StringBuilder();
}
: '"' {builder.append('"');}
( ('\\\"') => '\\' '"' {builder.append('"');}
| '\\' c=~'"' {builder.append("\\" + (char)$c);}
| c=~('\\'|'"') {builder.append((char)$c);}
)*
'"' {builder.append('"');}
{
setText(builder.toString());
}
;
DOUBLE_ANGLE_STRING_LITERAL
: '<<' .* '>>'
;
fragment
ESC
: '\\'
( // due to the way ESC is used, we don't need to handle the following character in different ways
/*'n'
| 'r'
| 't'
| 'b'
| 'f'
| '"'
| '\''
| '\\'
| '>'
| 'u' XDIGIT XDIGIT XDIGIT XDIGIT
|*/ . // unknown, leave as it is
)
;
fragment
DIGIT
: '0'..'9'
;
fragment
XDIGIT
: '0' .. '9'
| 'a' .. 'f'
| 'A' .. 'F'
;
INT
: ('0'..'9')+
;
ARG_ACTION
@init {
List<String> text = new ArrayList<String>() {{ add(null); }};
}
: '['
NESTED_ARG_ACTION[text]
']'
{setText(text.get(0));}
;
fragment
NESTED_ARG_ACTION[List<String> text]
@init {
$text.set(0, "");
StringBuilder builder = new StringBuilder();
}
: ( ('\\]') => '\\' ']' {builder.append("]");}
| '\\' c=~(']') {builder.append("\\" + (char)$c);}
| ACTION_STRING_LITERAL {builder.append($ACTION_STRING_LITERAL.text);}
| ACTION_CHAR_LITERAL {builder.append($ACTION_CHAR_LITERAL.text);}
| c=~('\\'|'"'|'\''|']') {builder.append((char)$c);}
)*
{
$text.set(0, builder.toString());
}
;
ACTION
@init
{
int actionLine = getLine();
int actionColumn = getCharPositionInLine();
}
: NESTED_ACTION
('?' {$type = SEMPRED;})?
{
String action = $text;
int n = 1; // num delimiter chars
if ( action.startsWith("{{") && action.endsWith("}}") )
{
$type = FORCED_ACTION;
n = 2;
}
action = action.substring(n,action.length()-n - ($type==SEMPRED ? 1 : 0));
setText(action);
}
;
fragment
NESTED_ACTION
: '{'
( NESTED_ACTION
| ACTION_CHAR_LITERAL
| ('//' | '/*') => COMMENT
| ACTION_STRING_LITERAL
| ACTION_ESC
| ~('{'|'\''|'"'|'\\'|'}')
)*
'}'
;
fragment
ACTION_CHAR_LITERAL
: '\''
( ACTION_ESC
| ~('\\'|'\'')
)*
'\''
;
fragment
ACTION_STRING_LITERAL
: '"'
( ACTION_ESC
| ~('\\'|'"')
)*
'"'
;
fragment
ACTION_ESC
: '\\\''
| '\\\"'
| '\\' ~('\''|'"')
;
TOKEN_REF
: 'A'..'Z'
( 'a'..'z'|'A'..'Z'|'_'|'0'..'9'
)*
;
TOKENS
: 'tokens' WS_LOOP '{'
;
OPTIONS
: 'options' WS_LOOP '{'
;
// we get a warning here when looking for options '{', but it works right
RULE_REF
@init
{
int t=0;
}
: 'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
;
fragment
WS_LOOP
: ( WS
| COMMENT
)*
;
fragment
WS_OPT
: (WS)?
;
/** Reset the file and line information; useful when the grammar
* has been generated so that errors are shown relative to the
* original file like the old C preprocessor used to do.
*/
fragment
SRC
: 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT
{
setFileName($file.text.substring(1,$file.text.length()-1));
input.setLine(Integer.parseInt($line.text) - 1); // -1 because SL_COMMENT will increment the line no. KR
}
;