| /*============================================================================= |
| Boost.Wave: A Standard compliant C++ preprocessor library |
| |
| Sample: IDL lexer |
| |
| http://www.boost.org/ |
| |
| Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost |
| Software License, Version 1.0. (See accompanying file |
| LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| =============================================================================*/ |
| |
| #include <ctime> |
| #include <cstdlib> |
| #include <cstdio> |
| #include <cstring> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <fcntl.h> |
| |
| #include <boost/config.hpp> |
| |
| #if defined(BOOST_HAS_UNISTD_H) |
| #include <unistd.h> |
| #else |
| #include <io.h> |
| #endif |
| |
| #include <boost/assert.hpp> |
| #include <boost/detail/workaround.hpp> |
| |
| // reuse the token ids and re2c helper functions from the default C++ lexer |
| #include <boost/wave/token_ids.hpp> |
| #include <boost/wave/cpplexer/re2clex/aq.hpp> |
| #include <boost/wave/cpplexer/re2clex/scanner.hpp> |
| #include <boost/wave/cpplexer/cpplexer_exceptions.hpp> |
| |
| #include "idl_re.hpp" |
| |
| #if defined(_MSC_VER) && !defined(__COMO__) |
| #pragma warning (disable: 4101) // 'foo' : unreferenced local variable |
| #pragma warning (disable: 4102) // 'foo' : unreferenced label |
| #endif |
| |
| #define BOOST_WAVE_BSIZE 196608 |
| |
| #define YYCTYPE uchar |
| #define YYCURSOR cursor |
| #define YYLIMIT s->lim |
| #define YYMARKER s->ptr |
| #define YYFILL(n) {cursor = fill(s, cursor);} |
| |
| //#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);} |
| #define BOOST_WAVE_RET(i) \ |
| { \ |
| s->line += count_backslash_newlines(s, cursor); \ |
| s->cur = cursor; \ |
| return (i); \ |
| } \ |
| /**/ |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| namespace boost { |
| namespace wave { |
| namespace idllexer { |
| namespace re2clex { |
| |
| #define RE2C_ASSERT BOOST_ASSERT |
| |
| int |
| get_one_char(boost::wave::cpplexer::re2clex::Scanner *s) |
| { |
| using namespace boost::wave::cpplexer::re2clex; |
| if (0 != s->act) { |
| RE2C_ASSERT(s->first != 0 && s->last != 0); |
| RE2C_ASSERT(s->first <= s->act && s->act <= s->last); |
| if (s->act < s->last) |
| return *(s->act)++; |
| } |
| return -1; |
| } |
| |
| std::ptrdiff_t |
| rewind_stream (boost::wave::cpplexer::re2clex::Scanner *s, int cnt) |
| { |
| if (0 != s->act) { |
| RE2C_ASSERT(s->first != 0 && s->last != 0); |
| s->act += cnt; |
| RE2C_ASSERT(s->first <= s->act && s->act <= s->last); |
| return s->act - s->first; |
| } |
| return 0; |
| } |
| |
| std::size_t |
| get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner* s) |
| { |
| if (!AQ_EMPTY(s->eol_offsets)) |
| { |
| return s->eol_offsets->queue[s->eol_offsets->head]; |
| } |
| else |
| { |
| return (unsigned int)-1; |
| } |
| } |
| |
| void |
| adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner* s, |
| std::size_t adjustment) |
| { |
| boost::wave::cpplexer::re2clex::aq_queue q; |
| std::size_t i; |
| |
| if (!s->eol_offsets) |
| s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create(); |
| |
| q = s->eol_offsets; |
| |
| if (AQ_EMPTY(q)) |
| return; |
| |
| i = q->head; |
| while (i != q->tail) |
| { |
| if (adjustment > q->queue[i]) |
| q->queue[i] = 0; |
| else |
| q->queue[i] -= adjustment; |
| ++i; |
| if (i == q->max_size) |
| i = 0; |
| } |
| if (adjustment > q->queue[i]) |
| q->queue[i] = 0; |
| else |
| q->queue[i] -= adjustment; |
| } |
| |
| int |
| count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner *s, |
| boost::wave::cpplexer::re2clex::uchar *cursor) |
| { |
| using namespace boost::wave::cpplexer::re2clex; |
| |
| std::size_t diff, offset; |
| int skipped = 0; |
| |
| /* figure out how many backslash-newlines skipped over unknowingly. */ |
| diff = cursor - s->bot; |
| offset = get_first_eol_offset(s); |
| while (offset <= diff && offset != (unsigned int)-1) |
| { |
| skipped++; |
| boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets); |
| offset = get_first_eol_offset(s); |
| } |
| return skipped; |
| } |
| |
| bool is_backslash( |
| boost::wave::cpplexer::re2clex::uchar *p, |
| boost::wave::cpplexer::re2clex::uchar *end, int &len) |
| { |
| if (*p == '\\') { |
| len = 1; |
| return true; |
| } |
| else if (*p == '?' && *(p+1) == '?' && (p+2 < end && *(p+2) == '/')) { |
| len = 3; |
| return true; |
| } |
| return false; |
| } |
| |
| boost::wave::cpplexer::re2clex::uchar * |
| fill(boost::wave::cpplexer::re2clex::Scanner *s, |
| boost::wave::cpplexer::re2clex::uchar *cursor) |
| { |
| using namespace std; // some systems have memcpy etc. in namespace std |
| using namespace boost::wave::cpplexer::re2clex; |
| |
| if(!s->eof) |
| { |
| uchar* p; |
| std::ptrdiff_t cnt = s->tok - s->bot; |
| if(cnt) |
| { |
| memcpy(s->bot, s->tok, s->lim - s->tok); |
| s->tok = s->bot; |
| s->ptr -= cnt; |
| cursor -= cnt; |
| s->lim -= cnt; |
| adjust_eol_offsets(s, cnt); |
| } |
| |
| if((s->top - s->lim) < BOOST_WAVE_BSIZE) |
| { |
| uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar)); |
| if (buf == 0) |
| { |
| using namespace std; // some systems have printf in std |
| if (0 != s->error_proc) { |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::unexpected_error, |
| "Out of memory!"); |
| } |
| else |
| printf("Out of memory!\n"); |
| |
| /* get the scanner to stop */ |
| *cursor = 0; |
| return cursor; |
| } |
| |
| memcpy(buf, s->tok, s->lim - s->tok); |
| s->tok = buf; |
| s->ptr = &buf[s->ptr - s->bot]; |
| cursor = &buf[cursor - s->bot]; |
| s->lim = &buf[s->lim - s->bot]; |
| s->top = &s->lim[BOOST_WAVE_BSIZE]; |
| free(s->bot); |
| s->bot = buf; |
| } |
| |
| if (s->act != 0) { |
| cnt = s->last - s->act; |
| if (cnt > BOOST_WAVE_BSIZE) |
| cnt = BOOST_WAVE_BSIZE; |
| memcpy(s->lim, s->act, cnt); |
| s->act += cnt; |
| if (cnt != BOOST_WAVE_BSIZE) |
| { |
| s->eof = &s->lim[cnt]; *(s->eof)++ = '\0'; |
| } |
| } |
| |
| /* backslash-newline erasing time */ |
| |
| /* first scan for backslash-newline and erase them */ |
| for (p = s->lim; p < s->lim + cnt - 2; ++p) |
| { |
| int len = 0; |
| if (is_backslash(p, s->lim + cnt, len)) |
| { |
| if (*(p+len) == '\n') |
| { |
| int offset = len + 1; |
| memmove(p, p + offset, s->lim + cnt - p - offset); |
| cnt -= offset; |
| --p; |
| aq_enqueue(s->eol_offsets, p - s->bot + 1); |
| } |
| else if (*(p+len) == '\r') |
| { |
| if (*(p+len+1) == '\n') |
| { |
| int offset = len + 2; |
| memmove(p, p + offset, s->lim + cnt - p - offset); |
| cnt -= offset; |
| --p; |
| } |
| else |
| { |
| int offset = len + 1; |
| memmove(p, p + offset, s->lim + cnt - p - offset); |
| cnt -= offset; |
| --p; |
| } |
| aq_enqueue(s->eol_offsets, p - s->bot + 1); |
| } |
| } |
| } |
| |
| /* FIXME: the following code should be fixed to recognize correctly the |
| trigraph backslash token */ |
| |
| /* check to see if what we just read ends in a backslash */ |
| if (cnt >= 2) |
| { |
| uchar last = s->lim[cnt-1]; |
| uchar last2 = s->lim[cnt-2]; |
| /* check \ EOB */ |
| if (last == '\\') |
| { |
| int next = get_one_char(s); |
| /* check for \ \n or \ \r or \ \r \n straddling the border */ |
| if (next == '\n') |
| { |
| --cnt; /* chop the final \, we've already read the \n. */ |
| boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, |
| cnt + (s->lim - s->bot)); |
| } |
| else if (next == '\r') |
| { |
| int next2 = get_one_char(s); |
| if (next2 == '\n') |
| { |
| --cnt; /* skip the backslash */ |
| } |
| else |
| { |
| /* rewind one, and skip one char */ |
| rewind_stream(s, -1); |
| --cnt; |
| } |
| boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, |
| cnt + (s->lim - s->bot)); |
| } |
| else if (next != -1) /* -1 means end of file */ |
| { |
| /* next was something else, so rewind the stream */ |
| rewind_stream(s, -1); |
| } |
| } |
| /* check \ \r EOB */ |
| else if (last == '\r' && last2 == '\\') |
| { |
| int next = get_one_char(s); |
| if (next == '\n') |
| { |
| cnt -= 2; /* skip the \ \r */ |
| } |
| else |
| { |
| /* rewind one, and skip two chars */ |
| rewind_stream(s, -1); |
| cnt -= 2; |
| } |
| boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, |
| cnt + (s->lim - s->bot)); |
| } |
| /* check \ \n EOB */ |
| else if (last == '\n' && last2 == '\\') |
| { |
| cnt -= 2; |
| boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, |
| cnt + (s->lim - s->bot)); |
| } |
| } |
| |
| s->lim += cnt; |
| if (s->eof) /* eof needs adjusting if we erased backslash-newlines */ |
| { |
| s->eof = s->lim; |
| *(s->eof)++ = '\0'; |
| } |
| } |
| return cursor; |
| } |
| |
| boost::wave::token_id |
| scan(boost::wave::cpplexer::re2clex::Scanner *s) |
| { |
| using namespace boost::wave::cpplexer::re2clex; |
| |
| uchar *cursor = s->tok = s->cur; |
| |
| /*!re2c |
| re2c:indent:string = " "; |
| any = [\t\v\f\r\n\040-\377]; |
| anyctrl = [\000-\377]; |
| OctalDigit = [0-7]; |
| Digit = [0-9]; |
| HexDigit = [a-fA-F0-9]; |
| ExponentPart = [Ee] [+-]? Digit+; |
| FractionalConstant = (Digit* "." Digit+) | (Digit+ "."); |
| FloatingSuffix = [fF][lL]?|[lL][fF]?; |
| IntegerSuffix = [uU][lL]?|[lL][uU]?; |
| FixedPointSuffix = [dD]; |
| Backslash = [\\]|"??/"; |
| EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?); |
| HexQuad = HexDigit HexDigit HexDigit HexDigit; |
| UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad); |
| Newline = "\r\n" | "\n" | "\r"; |
| PPSpace = ([ \t]|("/*"(any\[*]|Newline|("*"+(any\[*/]|Newline)))*"*"+"/"))*; |
| Pound = "#" | "??=" | "%:"; |
| */ |
| |
| /*!re2c |
| "/*" { goto ccomment; } |
| "//" { goto cppcomment; } |
| |
| "TRUE" { BOOST_WAVE_RET(T_TRUE); } |
| "FALSE" { BOOST_WAVE_RET(T_FALSE); } |
| |
| "{" { BOOST_WAVE_RET(T_LEFTBRACE); } |
| "}" { BOOST_WAVE_RET(T_RIGHTBRACE); } |
| "[" { BOOST_WAVE_RET(T_LEFTBRACKET); } |
| "]" { BOOST_WAVE_RET(T_RIGHTBRACKET); } |
| "#" { BOOST_WAVE_RET(T_POUND); } |
| "##" { BOOST_WAVE_RET(T_POUND_POUND); } |
| "(" { BOOST_WAVE_RET(T_LEFTPAREN); } |
| ")" { BOOST_WAVE_RET(T_RIGHTPAREN); } |
| ";" { BOOST_WAVE_RET(T_SEMICOLON); } |
| ":" { BOOST_WAVE_RET(T_COLON); } |
| "?" { BOOST_WAVE_RET(T_QUESTION_MARK); } |
| "." { BOOST_WAVE_RET(T_DOT); } |
| "+" { BOOST_WAVE_RET(T_PLUS); } |
| "-" { BOOST_WAVE_RET(T_MINUS); } |
| "*" { BOOST_WAVE_RET(T_STAR); } |
| "/" { BOOST_WAVE_RET(T_DIVIDE); } |
| "%" { BOOST_WAVE_RET(T_PERCENT); } |
| "^" { BOOST_WAVE_RET(T_XOR); } |
| "&" { BOOST_WAVE_RET(T_AND); } |
| "|" { BOOST_WAVE_RET(T_OR); } |
| "~" { BOOST_WAVE_RET(T_COMPL); } |
| "!" { BOOST_WAVE_RET(T_NOT); } |
| "=" { BOOST_WAVE_RET(T_ASSIGN); } |
| "<" { BOOST_WAVE_RET(T_LESS); } |
| ">" { BOOST_WAVE_RET(T_GREATER); } |
| "<<" { BOOST_WAVE_RET(T_SHIFTLEFT); } |
| ">>" { BOOST_WAVE_RET(T_SHIFTRIGHT); } |
| "==" { BOOST_WAVE_RET(T_EQUAL); } |
| "!=" { BOOST_WAVE_RET(T_NOTEQUAL); } |
| "<=" { BOOST_WAVE_RET(T_LESSEQUAL); } |
| ">=" { BOOST_WAVE_RET(T_GREATEREQUAL); } |
| "&&" { BOOST_WAVE_RET(T_ANDAND); } |
| "||" { BOOST_WAVE_RET(T_OROR); } |
| "++" { BOOST_WAVE_RET(T_PLUSPLUS); } |
| "--" { BOOST_WAVE_RET(T_MINUSMINUS); } |
| "," { BOOST_WAVE_RET(T_COMMA); } |
| |
| ([a-zA-Z_] | UniversalChar) ([a-zA-Z_0-9] | UniversalChar)* |
| { BOOST_WAVE_RET(T_IDENTIFIER); } |
| |
| (("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix? |
| { BOOST_WAVE_RET(T_INTLIT); } |
| |
| ((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix? |
| { BOOST_WAVE_RET(T_FLOATLIT); } |
| |
| (FractionalConstant | Digit+) FixedPointSuffix |
| { BOOST_WAVE_RET(T_FIXEDPOINTLIT); } |
| |
| "L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ [']) |
| { BOOST_WAVE_RET(T_CHARLIT); } |
| |
| "L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["]) |
| { BOOST_WAVE_RET(T_STRINGLIT); } |
| |
| |
| Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">" |
| { BOOST_WAVE_RET(T_PP_HHEADER); } |
| |
| Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\"" |
| { BOOST_WAVE_RET(T_PP_QHEADER); } |
| |
| Pound PPSpace "include" PPSpace |
| { BOOST_WAVE_RET(T_PP_INCLUDE); } |
| |
| Pound PPSpace "if" { BOOST_WAVE_RET(T_PP_IF); } |
| Pound PPSpace "ifdef" { BOOST_WAVE_RET(T_PP_IFDEF); } |
| Pound PPSpace "ifndef" { BOOST_WAVE_RET(T_PP_IFNDEF); } |
| Pound PPSpace "else" { BOOST_WAVE_RET(T_PP_ELSE); } |
| Pound PPSpace "elif" { BOOST_WAVE_RET(T_PP_ELIF); } |
| Pound PPSpace "endif" { BOOST_WAVE_RET(T_PP_ENDIF); } |
| Pound PPSpace "define" { BOOST_WAVE_RET(T_PP_DEFINE); } |
| Pound PPSpace "undef" { BOOST_WAVE_RET(T_PP_UNDEF); } |
| Pound PPSpace "line" { BOOST_WAVE_RET(T_PP_LINE); } |
| Pound PPSpace "error" { BOOST_WAVE_RET(T_PP_ERROR); } |
| Pound PPSpace "pragma" { BOOST_WAVE_RET(T_PP_PRAGMA); } |
| |
| Pound PPSpace "warning" { BOOST_WAVE_RET(T_PP_WARNING); } |
| |
| [ \t\v\f]+ |
| { BOOST_WAVE_RET(T_SPACE); } |
| |
| Newline |
| { |
| s->line++; |
| BOOST_WAVE_RET(T_NEWLINE); |
| } |
| |
| "\000" |
| { |
| if(cursor != s->eof) |
| { |
| using namespace std; // some systems have printf in std |
| if (0 != s->error_proc) { |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::generic_lexing_error, |
| "'\\000' in input stream"); |
| } |
| else |
| printf("Error: 0 in file\n"); |
| } |
| BOOST_WAVE_RET(T_EOF); |
| } |
| |
| anyctrl |
| { |
| BOOST_WAVE_RET(TOKEN_FROM_ID(*s->tok, UnknownTokenType)); |
| } |
| */ |
| |
| ccomment: |
| /*!re2c |
| "*/" { BOOST_WAVE_RET(T_CCOMMENT); } |
| Newline |
| { |
| /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF);*/ |
| /*s->tok = cursor; */ |
| s->line += count_backslash_newlines(s, cursor) +1; |
| goto ccomment; |
| } |
| |
| any { goto ccomment; } |
| |
| "\000" |
| { |
| using namespace std; // some systems have printf in std |
| if(cursor == s->eof) |
| { |
| if (s->error_proc) |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::generic_lexing_warning, |
| "Unterminated comment"); |
| else |
| printf("Error: Unterminated comment\n"); |
| } |
| else |
| { |
| if (s->error_proc) |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::generic_lexing_error, |
| "'\\000' in input stream"); |
| else |
| printf("Error: 0 in file"); |
| } |
| /* adjust cursor such next call returns T_EOF */ |
| --YYCURSOR; |
| /* the comment is unterminated, but nevertheless its a comment */ |
| BOOST_WAVE_RET(T_CCOMMENT); |
| } |
| |
| anyctrl |
| { |
| if (s->error_proc) |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::generic_lexing_error, |
| "invalid character in input stream"); |
| else |
| printf("Error: 0 in file"); |
| } |
| |
| */ |
| |
| cppcomment: |
| /*!re2c |
| Newline |
| { |
| /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF); */ |
| /*s->tok = cursor; */ |
| s->line++; |
| BOOST_WAVE_RET(T_CPPCOMMENT); |
| } |
| |
| any { goto cppcomment; } |
| |
| "\000" |
| { |
| using namespace std; // some systems have printf in std |
| if(cursor != s->eof) |
| { |
| if (s->error_proc) |
| (*s->error_proc)(s, |
| cpplexer::lexing_exception::generic_lexing_error, |
| "'\\000' in input stream"); |
| else |
| printf("Error: 0 in file"); |
| } |
| /* adjust cursor such next call returns T_EOF */ |
| --YYCURSOR; |
| /* the comment is unterminated, but nevertheless its a comment */ |
| BOOST_WAVE_RET(T_CPPCOMMENT); |
| } |
| */ |
| |
| } /* end of scan */ |
| |
| #undef RE2C_ASSERT |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| } // namespace re2clex |
| } // namespace idllexer |
| } // namespace wave |
| } // namespace boost |