| /* parse.y - parser for flex input */ |
| |
| %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP |
| %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE |
| %token TOK_TABLES_FILE |
| |
| %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH |
| %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT |
| |
| %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH |
| %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT |
| |
| %left CCL_OP_DIFF CCL_OP_UNION |
| |
| /* |
| *POSIX and AT&T lex place the |
| * precedence of the repeat operator, {}, below that of concatenation. |
| * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended |
| * Regular Expression (ERE) precedence that has the repeat operator |
| * higher than concatenation. This causes ab{3} to yield abbb. |
| * |
| * In order to support the POSIX and AT&T precedence and the flex |
| * precedence we define two token sets for the begin and end tokens of |
| * the repeat operator, '{' and '}'. The lexical scanner chooses |
| * which tokens to return based on whether posix_compat or lex_compat |
| * are specified. Specifying either posix_compat or lex_compat will |
| * cause flex to parse scanner files as per the AT&T and |
| * POSIX-mandated behavior. |
| */ |
| |
| %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX |
| |
| |
| %{ |
| /* Copyright (c) 1990 The Regents of the University of California. */ |
| /* All rights reserved. */ |
| |
| /* This code is derived from software contributed to Berkeley by */ |
| /* Vern Paxson. */ |
| |
| /* The United States Government has rights in this work pursuant */ |
| /* to contract no. DE-AC03-76SF00098 between the United States */ |
| /* Department of Energy and the University of California. */ |
| |
| /* This file is part of flex. */ |
| |
| /* Redistribution and use in source and binary forms, with or without */ |
| /* modification, are permitted provided that the following conditions */ |
| /* are met: */ |
| |
| /* 1. Redistributions of source code must retain the above copyright */ |
| /* notice, this list of conditions and the following disclaimer. */ |
| /* 2. Redistributions in binary form must reproduce the above copyright */ |
| /* notice, this list of conditions and the following disclaimer in the */ |
| /* documentation and/or other materials provided with the distribution. */ |
| |
| /* Neither the name of the University nor the names of its contributors */ |
| /* may be used to endorse or promote products derived from this software */ |
| /* without specific prior written permission. */ |
| |
| /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
| /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
| /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
| /* PURPOSE. */ |
| |
| #include "flexdef.h" |
| #include "tables.h" |
| |
| int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; |
| int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; |
| |
| int *scon_stk; |
| int scon_stk_ptr; |
| |
| static int madeany = false; /* whether we've made the '.' character class */ |
| static int ccldot, cclany; |
| int previous_continued_action; /* whether the previous rule's action was '|' */ |
| |
| #define format_warn3(fmt, a1, a2) \ |
| do{ \ |
| char fw3_msg[MAXLINE];\ |
| snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ |
| lwarn( fw3_msg );\ |
| }while(0) |
| |
| /* Expand a POSIX character class expression. */ |
| #define CCL_EXPR(func) \ |
| do{ \ |
| int c; \ |
| for ( c = 0; c < csize; ++c ) \ |
| if ( isascii(c) && func(c) ) \ |
| ccladd( currccl, c ); \ |
| }while(0) |
| |
| /* negated class */ |
| #define CCL_NEG_EXPR(func) \ |
| do{ \ |
| int c; \ |
| for ( c = 0; c < csize; ++c ) \ |
| if ( !func(c) ) \ |
| ccladd( currccl, c ); \ |
| }while(0) |
| |
| /* While POSIX defines isblank(), it's not ANSI C. */ |
| #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') |
| |
| /* On some over-ambitious machines, such as DEC Alpha's, the default |
| * token type is "long" instead of "int"; this leads to problems with |
| * declaring yylval in flexdef.h. But so far, all the yacc's I've seen |
| * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the |
| * following should ensure that the default token type is "int". |
| */ |
| #define YYSTYPE int |
| |
| %} |
| |
| %% |
| goal : initlex sect1 sect1end sect2 initforrule |
| { /* add default rule */ |
| int def_rule; |
| |
| pat = cclinit(); |
| cclnegate( pat ); |
| |
| def_rule = mkstate( -pat ); |
| |
| /* Remember the number of the default rule so we |
| * don't generate "can't match" warnings for it. |
| */ |
| default_rule = num_rules; |
| |
| finish_rule( def_rule, false, 0, 0, 0); |
| |
| for ( i = 1; i <= lastsc; ++i ) |
| scset[i] = mkbranch( scset[i], def_rule ); |
| |
| if ( spprdflt ) |
| add_action( |
| "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); |
| else |
| add_action( "ECHO" ); |
| |
| add_action( ";\n\tYY_BREAK]]\n" ); |
| } |
| ; |
| |
| initlex : |
| { /* initialize for processing rules */ |
| |
| /* Create default DFA start condition. */ |
| scinstal( "INITIAL", false ); |
| } |
| ; |
| |
| sect1 : sect1 startconddecl namelist1 |
| | sect1 options |
| | |
| | error |
| { synerr( _("unknown error processing section 1") ); } |
| ; |
| |
| sect1end : SECTEND |
| { |
| check_options(); |
| scon_stk = allocate_integer_array( lastsc + 1 ); |
| scon_stk_ptr = 0; |
| } |
| ; |
| |
| startconddecl : SCDECL |
| { xcluflg = false; } |
| |
| | XSCDECL |
| { xcluflg = true; } |
| ; |
| |
| namelist1 : namelist1 NAME |
| { scinstal( nmstr, xcluflg ); } |
| |
| | NAME |
| { scinstal( nmstr, xcluflg ); } |
| |
| | error |
| { synerr( _("bad start condition list") ); } |
| ; |
| |
| options : TOK_OPTION optionlist |
| ; |
| |
| optionlist : optionlist option |
| | |
| ; |
| |
| option : TOK_OUTFILE '=' NAME |
| { |
| outfilename = xstrdup(nmstr); |
| did_outfilename = 1; |
| } |
| | TOK_EXTRA_TYPE '=' NAME |
| { extra_type = xstrdup(nmstr); } |
| | TOK_PREFIX '=' NAME |
| { prefix = xstrdup(nmstr); |
| if (strchr(prefix, '[') || strchr(prefix, ']')) |
| flexerror(_("Prefix must not contain [ or ]")); } |
| | TOK_YYCLASS '=' NAME |
| { yyclass = xstrdup(nmstr); } |
| | TOK_HEADER_FILE '=' NAME |
| { headerfilename = xstrdup(nmstr); } |
| | TOK_TABLES_FILE '=' NAME |
| { tablesext = true; tablesfilename = xstrdup(nmstr); } |
| ; |
| |
| sect2 : sect2 scon initforrule flexrule '\n' |
| { scon_stk_ptr = $2; } |
| | sect2 scon '{' sect2 '}' |
| { scon_stk_ptr = $2; } |
| | |
| ; |
| |
| initforrule : |
| { |
| /* Initialize for a parse of one rule. */ |
| trlcontxt = variable_trail_rule = varlength = false; |
| trailcnt = headcnt = rulelen = 0; |
| current_state_type = STATE_NORMAL; |
| previous_continued_action = continued_action; |
| in_rule = true; |
| |
| new_rule(); |
| } |
| ; |
| |
| flexrule : '^' rule |
| { |
| pat = $2; |
| finish_rule( pat, variable_trail_rule, |
| headcnt, trailcnt , previous_continued_action); |
| |
| if ( scon_stk_ptr > 0 ) |
| { |
| for ( i = 1; i <= scon_stk_ptr; ++i ) |
| scbol[scon_stk[i]] = |
| mkbranch( scbol[scon_stk[i]], |
| pat ); |
| } |
| |
| else |
| { |
| /* Add to all non-exclusive start conditions, |
| * including the default (0) start condition. |
| */ |
| |
| for ( i = 1; i <= lastsc; ++i ) |
| if ( ! scxclu[i] ) |
| scbol[i] = mkbranch( scbol[i], |
| pat ); |
| } |
| |
| if ( ! bol_needed ) |
| { |
| bol_needed = true; |
| |
| if ( performance_report > 1 ) |
| pinpoint_message( |
| "'^' operator results in sub-optimal performance" ); |
| } |
| } |
| |
| | rule |
| { |
| pat = $1; |
| finish_rule( pat, variable_trail_rule, |
| headcnt, trailcnt , previous_continued_action); |
| |
| if ( scon_stk_ptr > 0 ) |
| { |
| for ( i = 1; i <= scon_stk_ptr; ++i ) |
| scset[scon_stk[i]] = |
| mkbranch( scset[scon_stk[i]], |
| pat ); |
| } |
| |
| else |
| { |
| for ( i = 1; i <= lastsc; ++i ) |
| if ( ! scxclu[i] ) |
| scset[i] = |
| mkbranch( scset[i], |
| pat ); |
| } |
| } |
| |
| | EOF_OP |
| { |
| if ( scon_stk_ptr > 0 ) |
| build_eof_action(); |
| |
| else |
| { |
| /* This EOF applies to all start conditions |
| * which don't already have EOF actions. |
| */ |
| for ( i = 1; i <= lastsc; ++i ) |
| if ( ! sceof[i] ) |
| scon_stk[++scon_stk_ptr] = i; |
| |
| if ( scon_stk_ptr == 0 ) |
| lwarn( |
| "all start conditions already have <<EOF>> rules" ); |
| |
| else |
| build_eof_action(); |
| } |
| } |
| |
| | error |
| { synerr( _("unrecognized rule") ); } |
| ; |
| |
| scon_stk_ptr : |
| { $$ = scon_stk_ptr; } |
| ; |
| |
| scon : '<' scon_stk_ptr namelist2 '>' |
| { $$ = $2; } |
| |
| | '<' '*' '>' |
| { |
| $$ = scon_stk_ptr; |
| |
| for ( i = 1; i <= lastsc; ++i ) |
| { |
| int j; |
| |
| for ( j = 1; j <= scon_stk_ptr; ++j ) |
| if ( scon_stk[j] == i ) |
| break; |
| |
| if ( j > scon_stk_ptr ) |
| scon_stk[++scon_stk_ptr] = i; |
| } |
| } |
| |
| | |
| { $$ = scon_stk_ptr; } |
| ; |
| |
| namelist2 : namelist2 ',' sconname |
| |
| | sconname |
| |
| | error |
| { synerr( _("bad start condition list") ); } |
| ; |
| |
| sconname : NAME |
| { |
| if ( (scnum = sclookup( nmstr )) == 0 ) |
| format_pinpoint_message( |
| "undeclared start condition %s", |
| nmstr ); |
| else |
| { |
| for ( i = 1; i <= scon_stk_ptr; ++i ) |
| if ( scon_stk[i] == scnum ) |
| { |
| format_warn( |
| "<%s> specified twice", |
| scname[scnum] ); |
| break; |
| } |
| |
| if ( i > scon_stk_ptr ) |
| scon_stk[++scon_stk_ptr] = scnum; |
| } |
| } |
| ; |
| |
| rule : re2 re |
| { |
| if ( transchar[lastst[$2]] != SYM_EPSILON ) |
| /* Provide final transition \now/ so it |
| * will be marked as a trailing context |
| * state. |
| */ |
| $2 = link_machines( $2, |
| mkstate( SYM_EPSILON ) ); |
| |
| mark_beginning_as_normal( $2 ); |
| current_state_type = STATE_NORMAL; |
| |
| if ( previous_continued_action ) |
| { |
| /* We need to treat this as variable trailing |
| * context so that the backup does not happen |
| * in the action but before the action switch |
| * statement. If the backup happens in the |
| * action, then the rules "falling into" this |
| * one's action will *also* do the backup, |
| * erroneously. |
| */ |
| if ( ! varlength || headcnt != 0 ) |
| lwarn( |
| "trailing context made variable due to preceding '|' action" ); |
| |
| /* Mark as variable. */ |
| varlength = true; |
| headcnt = 0; |
| |
| } |
| |
| if ( lex_compat || (varlength && headcnt == 0) ) |
| { /* variable trailing context rule */ |
| /* Mark the first part of the rule as the |
| * accepting "head" part of a trailing |
| * context rule. |
| * |
| * By the way, we didn't do this at the |
| * beginning of this production because back |
| * then current_state_type was set up for a |
| * trail rule, and add_accept() can create |
| * a new state ... |
| */ |
| add_accept( $1, |
| num_rules | YY_TRAILING_HEAD_MASK ); |
| variable_trail_rule = true; |
| } |
| |
| else |
| trailcnt = rulelen; |
| |
| $$ = link_machines( $1, $2 ); |
| } |
| |
| | re2 re '$' |
| { synerr( _("trailing context used twice") ); } |
| |
| | re '$' |
| { |
| headcnt = 0; |
| trailcnt = 1; |
| rulelen = 1; |
| varlength = false; |
| |
| current_state_type = STATE_TRAILING_CONTEXT; |
| |
| if ( trlcontxt ) |
| { |
| synerr( _("trailing context used twice") ); |
| $$ = mkstate( SYM_EPSILON ); |
| } |
| |
| else if ( previous_continued_action ) |
| { |
| /* See the comment in the rule for "re2 re" |
| * above. |
| */ |
| lwarn( |
| "trailing context made variable due to preceding '|' action" ); |
| |
| varlength = true; |
| } |
| |
| if ( lex_compat || varlength ) |
| { |
| /* Again, see the comment in the rule for |
| * "re2 re" above. |
| */ |
| add_accept( $1, |
| num_rules | YY_TRAILING_HEAD_MASK ); |
| variable_trail_rule = true; |
| } |
| |
| trlcontxt = true; |
| |
| eps = mkstate( SYM_EPSILON ); |
| $$ = link_machines( $1, |
| link_machines( eps, mkstate( '\n' ) ) ); |
| } |
| |
| | re |
| { |
| $$ = $1; |
| |
| if ( trlcontxt ) |
| { |
| if ( lex_compat || (varlength && headcnt == 0) ) |
| /* Both head and trail are |
| * variable-length. |
| */ |
| variable_trail_rule = true; |
| else |
| trailcnt = rulelen; |
| } |
| } |
| ; |
| |
| |
| re : re '|' series |
| { |
| varlength = true; |
| $$ = mkor( $1, $3 ); |
| } |
| |
| | series |
| { $$ = $1; } |
| ; |
| |
| |
| re2 : re '/' |
| { |
| /* This rule is written separately so the |
| * reduction will occur before the trailing |
| * series is parsed. |
| */ |
| |
| if ( trlcontxt ) |
| synerr( _("trailing context used twice") ); |
| else |
| trlcontxt = true; |
| |
| if ( varlength ) |
| /* We hope the trailing context is |
| * fixed-length. |
| */ |
| varlength = false; |
| else |
| headcnt = rulelen; |
| |
| rulelen = 0; |
| |
| current_state_type = STATE_TRAILING_CONTEXT; |
| $$ = $1; |
| } |
| ; |
| |
| series : series singleton |
| { |
| /* This is where concatenation of adjacent patterns |
| * gets done. |
| */ |
| $$ = link_machines( $1, $2 ); |
| } |
| |
| | singleton |
| { $$ = $1; } |
| |
| | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX |
| { |
| varlength = true; |
| |
| if ( $3 > $5 || $3 < 0 ) |
| { |
| synerr( _("bad iteration values") ); |
| $$ = $1; |
| } |
| else |
| { |
| if ( $3 == 0 ) |
| { |
| if ( $5 <= 0 ) |
| { |
| synerr( |
| _("bad iteration values") ); |
| $$ = $1; |
| } |
| else |
| $$ = mkopt( |
| mkrep( $1, 1, $5 ) ); |
| } |
| else |
| $$ = mkrep( $1, $3, $5 ); |
| } |
| } |
| |
| | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX |
| { |
| varlength = true; |
| |
| if ( $3 <= 0 ) |
| { |
| synerr( _("iteration value must be positive") ); |
| $$ = $1; |
| } |
| |
| else |
| $$ = mkrep( $1, $3, INFINITE_REPEAT ); |
| } |
| |
| | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX |
| { |
| /* The series could be something like "(foo)", |
| * in which case we have no idea what its length |
| * is, so we punt here. |
| */ |
| varlength = true; |
| |
| if ( $3 <= 0 ) |
| { |
| synerr( _("iteration value must be positive") |
| ); |
| $$ = $1; |
| } |
| |
| else |
| $$ = link_machines( $1, |
| copysingl( $1, $3 - 1 ) ); |
| } |
| |
| ; |
| |
| singleton : singleton '*' |
| { |
| varlength = true; |
| |
| $$ = mkclos( $1 ); |
| } |
| |
| | singleton '+' |
| { |
| varlength = true; |
| $$ = mkposcl( $1 ); |
| } |
| |
| | singleton '?' |
| { |
| varlength = true; |
| $$ = mkopt( $1 ); |
| } |
| |
| | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX |
| { |
| varlength = true; |
| |
| if ( $3 > $5 || $3 < 0 ) |
| { |
| synerr( _("bad iteration values") ); |
| $$ = $1; |
| } |
| else |
| { |
| if ( $3 == 0 ) |
| { |
| if ( $5 <= 0 ) |
| { |
| synerr( |
| _("bad iteration values") ); |
| $$ = $1; |
| } |
| else |
| $$ = mkopt( |
| mkrep( $1, 1, $5 ) ); |
| } |
| else |
| $$ = mkrep( $1, $3, $5 ); |
| } |
| } |
| |
| | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX |
| { |
| varlength = true; |
| |
| if ( $3 <= 0 ) |
| { |
| synerr( _("iteration value must be positive") ); |
| $$ = $1; |
| } |
| |
| else |
| $$ = mkrep( $1, $3, INFINITE_REPEAT ); |
| } |
| |
| | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX |
| { |
| /* The singleton could be something like "(foo)", |
| * in which case we have no idea what its length |
| * is, so we punt here. |
| */ |
| varlength = true; |
| |
| if ( $3 <= 0 ) |
| { |
| synerr( _("iteration value must be positive") ); |
| $$ = $1; |
| } |
| |
| else |
| $$ = link_machines( $1, |
| copysingl( $1, $3 - 1 ) ); |
| } |
| |
| | '.' |
| { |
| if ( ! madeany ) |
| { |
| /* Create the '.' character class. */ |
| ccldot = cclinit(); |
| ccladd( ccldot, '\n' ); |
| cclnegate( ccldot ); |
| |
| if ( useecs ) |
| mkeccl( ccltbl + cclmap[ccldot], |
| ccllen[ccldot], nextecm, |
| ecgroup, csize, csize ); |
| |
| /* Create the (?s:'.') character class. */ |
| cclany = cclinit(); |
| cclnegate( cclany ); |
| |
| if ( useecs ) |
| mkeccl( ccltbl + cclmap[cclany], |
| ccllen[cclany], nextecm, |
| ecgroup, csize, csize ); |
| |
| madeany = true; |
| } |
| |
| ++rulelen; |
| |
| if (sf_dot_all()) |
| $$ = mkstate( -cclany ); |
| else |
| $$ = mkstate( -ccldot ); |
| } |
| |
| | fullccl |
| { |
| /* Sort characters for fast searching. |
| */ |
| qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp ); |
| |
| if ( useecs ) |
| mkeccl( ccltbl + cclmap[$1], ccllen[$1], |
| nextecm, ecgroup, csize, csize ); |
| |
| ++rulelen; |
| |
| if (ccl_has_nl[$1]) |
| rule_has_nl[num_rules] = true; |
| |
| $$ = mkstate( -$1 ); |
| } |
| |
| | PREVCCL |
| { |
| ++rulelen; |
| |
| if (ccl_has_nl[$1]) |
| rule_has_nl[num_rules] = true; |
| |
| $$ = mkstate( -$1 ); |
| } |
| |
| | '"' string '"' |
| { $$ = $2; } |
| |
| | '(' re ')' |
| { $$ = $2; } |
| |
| | CHAR |
| { |
| ++rulelen; |
| |
| if ($1 == nlch) |
| rule_has_nl[num_rules] = true; |
| |
| if (sf_case_ins() && has_case($1)) |
| /* create an alternation, as in (a|A) */ |
| $$ = mkor (mkstate($1), mkstate(reverse_case($1))); |
| else |
| $$ = mkstate( $1 ); |
| } |
| ; |
| fullccl: |
| fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } |
| | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } |
| | braceccl |
| ; |
| |
| braceccl: |
| |
| '[' ccl ']' { $$ = $2; } |
| |
| | '[' '^' ccl ']' |
| { |
| cclnegate( $3 ); |
| $$ = $3; |
| } |
| ; |
| |
| ccl : ccl CHAR '-' CHAR |
| { |
| |
| if (sf_case_ins()) |
| { |
| |
| /* If one end of the range has case and the other |
| * does not, or the cases are different, then we're not |
| * sure what range the user is trying to express. |
| * Examples: [@-z] or [S-t] |
| */ |
| if (has_case ($2) != has_case ($4) |
| || (has_case ($2) && (b_islower ($2) != b_islower ($4))) |
| || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) |
| format_warn3 ( |
| _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), |
| $2, $4); |
| |
| /* If the range spans uppercase characters but not |
| * lowercase (or vice-versa), then should we automatically |
| * include lowercase characters in the range? |
| * Example: [@-_] spans [a-z] but not [A-Z] |
| */ |
| else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) |
| format_warn3 ( |
| _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), |
| $2, $4); |
| } |
| |
| if ( $2 > $4 ) |
| synerr( _("negative range in character class") ); |
| |
| else |
| { |
| for ( i = $2; i <= $4; ++i ) |
| ccladd( $1, i ); |
| |
| /* Keep track if this ccl is staying in |
| * alphabetical order. |
| */ |
| cclsorted = cclsorted && ($2 > lastchar); |
| lastchar = $4; |
| |
| /* Do it again for upper/lowercase */ |
| if (sf_case_ins() && has_case($2) && has_case($4)){ |
| $2 = reverse_case ($2); |
| $4 = reverse_case ($4); |
| |
| for ( i = $2; i <= $4; ++i ) |
| ccladd( $1, i ); |
| |
| cclsorted = cclsorted && ($2 > lastchar); |
| lastchar = $4; |
| } |
| |
| } |
| |
| $$ = $1; |
| } |
| |
| | ccl CHAR |
| { |
| ccladd( $1, $2 ); |
| cclsorted = cclsorted && ($2 > lastchar); |
| lastchar = $2; |
| |
| /* Do it again for upper/lowercase */ |
| if (sf_case_ins() && has_case($2)){ |
| $2 = reverse_case ($2); |
| ccladd ($1, $2); |
| |
| cclsorted = cclsorted && ($2 > lastchar); |
| lastchar = $2; |
| } |
| |
| $$ = $1; |
| } |
| |
| | ccl ccl_expr |
| { |
| /* Too hard to properly maintain cclsorted. */ |
| cclsorted = false; |
| $$ = $1; |
| } |
| |
| | |
| { |
| cclsorted = true; |
| lastchar = 0; |
| currccl = $$ = cclinit(); |
| } |
| ; |
| |
| ccl_expr: |
| CCE_ALNUM { CCL_EXPR(isalnum); } |
| | CCE_ALPHA { CCL_EXPR(isalpha); } |
| | CCE_BLANK { CCL_EXPR(IS_BLANK); } |
| | CCE_CNTRL { CCL_EXPR(iscntrl); } |
| | CCE_DIGIT { CCL_EXPR(isdigit); } |
| | CCE_GRAPH { CCL_EXPR(isgraph); } |
| | CCE_LOWER { |
| CCL_EXPR(islower); |
| if (sf_case_ins()) |
| CCL_EXPR(isupper); |
| } |
| | CCE_PRINT { CCL_EXPR(isprint); } |
| | CCE_PUNCT { CCL_EXPR(ispunct); } |
| | CCE_SPACE { CCL_EXPR(isspace); } |
| | CCE_XDIGIT { CCL_EXPR(isxdigit); } |
| | CCE_UPPER { |
| CCL_EXPR(isupper); |
| if (sf_case_ins()) |
| CCL_EXPR(islower); |
| } |
| |
| | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } |
| | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } |
| | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } |
| | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } |
| | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } |
| | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } |
| | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } |
| | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } |
| | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } |
| | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } |
| | CCE_NEG_LOWER { |
| if ( sf_case_ins() ) |
| lwarn(_("[:^lower:] is ambiguous in case insensitive scanner")); |
| else |
| CCL_NEG_EXPR(islower); |
| } |
| | CCE_NEG_UPPER { |
| if ( sf_case_ins() ) |
| lwarn(_("[:^upper:] ambiguous in case insensitive scanner")); |
| else |
| CCL_NEG_EXPR(isupper); |
| } |
| ; |
| |
| string : string CHAR |
| { |
| if ( $2 == nlch ) |
| rule_has_nl[num_rules] = true; |
| |
| ++rulelen; |
| |
| if (sf_case_ins() && has_case($2)) |
| $$ = mkor (mkstate($2), mkstate(reverse_case($2))); |
| else |
| $$ = mkstate ($2); |
| |
| $$ = link_machines( $1, $$); |
| } |
| |
| | |
| { $$ = mkstate( SYM_EPSILON ); } |
| ; |
| |
| %% |
| |
| |
| /* build_eof_action - build the "<<EOF>>" action for the active start |
| * conditions |
| */ |
| |
| void build_eof_action(void) |
| { |
| int i; |
| char action_text[MAXLINE]; |
| |
| for ( i = 1; i <= scon_stk_ptr; ++i ) |
| { |
| if ( sceof[scon_stk[i]] ) |
| format_pinpoint_message( |
| "multiple <<EOF>> rules for start condition %s", |
| scname[scon_stk[i]] ); |
| |
| else |
| { |
| sceof[scon_stk[i]] = true; |
| |
| if (previous_continued_action /* && previous action was regular */) |
| add_action("YY_RULE_SETUP\n"); |
| |
| snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", |
| scname[scon_stk[i]] ); |
| add_action( action_text ); |
| } |
| } |
| |
| line_directive_out(NULL, 1); |
| add_action("[["); |
| |
| /* This isn't a normal rule after all - don't count it as |
| * such, so we don't have any holes in the rule numbering |
| * (which make generating "rule can never match" warnings |
| * more difficult. |
| */ |
| --num_rules; |
| ++num_eof_rules; |
| } |
| |
| |
| /* format_synerr - write out formatted syntax error */ |
| |
| void format_synerr( const char *msg, const char arg[] ) |
| { |
| char errmsg[MAXLINE]; |
| |
| (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); |
| synerr( errmsg ); |
| } |
| |
| |
| /* synerr - report a syntax error */ |
| |
| void synerr( const char *str ) |
| { |
| syntaxerror = true; |
| pinpoint_message( str ); |
| } |
| |
| |
| /* format_warn - write out formatted warning */ |
| |
| void format_warn( const char *msg, const char arg[] ) |
| { |
| char warn_msg[MAXLINE]; |
| |
| snprintf( warn_msg, sizeof(warn_msg), msg, arg ); |
| lwarn( warn_msg ); |
| } |
| |
| |
| /* lwarn - report a warning, unless -w was given */ |
| |
| void lwarn( const char *str ) |
| { |
| line_warning( str, linenum ); |
| } |
| |
| /* format_pinpoint_message - write out a message formatted with one string, |
| * pinpointing its location |
| */ |
| |
| void format_pinpoint_message( const char *msg, const char arg[] ) |
| { |
| char errmsg[MAXLINE]; |
| |
| snprintf( errmsg, sizeof(errmsg), msg, arg ); |
| pinpoint_message( errmsg ); |
| } |
| |
| |
| /* pinpoint_message - write out a message, pinpointing its location */ |
| |
| void pinpoint_message( const char *str ) |
| { |
| line_pinpoint( str, linenum ); |
| } |
| |
| |
| /* line_warning - report a warning at a given line, unless -w was given */ |
| |
| void line_warning( const char *str, int line ) |
| { |
| char warning[MAXLINE]; |
| |
| if ( ! nowarn ) |
| { |
| snprintf( warning, sizeof(warning), "warning, %s", str ); |
| line_pinpoint( warning, line ); |
| } |
| } |
| |
| |
| /* line_pinpoint - write out a message, pinpointing it at the given line */ |
| |
| void line_pinpoint( const char *str, int line ) |
| { |
| fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); |
| } |
| |
| |
| /* yyerror - eat up an error message from the parser; |
| * currently, messages are ignore |
| */ |
| |
| void yyerror( const char *msg ) |
| { |
| (void)msg; |
| } |