blob: a2e0b200a2f0cf121c5866a88913930b74bef1e0 [file] [log] [blame]
/*
* *****************************************************************************
*
* Copyright (c) 2018-2019 Gavin D. Howard and contributors.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* *****************************************************************************
*
* The lexer for bc.
*
*/
#if BC_ENABLED
#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <lex.h>
#include <bc.h>
#include <vm.h>
static BcStatus bc_lex_identifier(BcLex *l) {
BcStatus s = BC_STATUS_SUCCESS;
size_t i;
const char *buf = l->buf + l->i - 1;
for (i = 0; i < bc_lex_kws_len; ++i) {
const BcLexKeyword *kw = bc_lex_kws + i;
size_t n = BC_LEX_KW_LEN(kw);
if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
l->t = BC_LEX_KW_AUTO + (BcLexType) i;
if (!BC_LEX_KW_POSIX(kw)) {
s = bc_lex_verr(l, BC_ERROR_POSIX_KW, kw->name);
if (BC_ERR(s)) return s;
}
// We minus 1 because the index has already been incremented.
l->i += n - 1;
return BC_STATUS_SUCCESS;
}
}
bc_lex_name(l);
if (BC_ERR(l->str.len - 1 > 1))
s = bc_lex_verr(l, BC_ERROR_POSIX_NAME_LEN, l->str.v);
return s;
}
static BcStatus bc_lex_string(BcLex *l) {
size_t len, nlines = 0, i = l->i;
const char *buf = l->buf;
char c;
l->t = BC_LEX_STR;
for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
if (BC_ERR(c == '\0')) {
l->i = i;
return bc_lex_err(l, BC_ERROR_PARSE_STRING);
}
len = i - l->i;
bc_vec_string(&l->str, len, l->buf + l->i);
l->i = i + 1;
l->line += nlines;
return BC_STATUS_SUCCESS;
}
static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
if (l->buf[l->i] == '=') {
l->i += 1;
l->t = with;
}
else l->t = without;
}
BcStatus bc_lex_token(BcLex *l) {
BcStatus s = BC_STATUS_SUCCESS;
char c = l->buf[l->i++], c2;
// This is the workhorse of the lexer.
switch (c) {
case '\0':
case '\n':
case '\t':
case '\v':
case '\f':
case '\r':
case ' ':
{
bc_lex_commonTokens(l, c);
break;
}
case '!':
{
bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
if (l->t == BC_LEX_OP_BOOL_NOT) {
s = bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "!");
if (BC_ERR(s)) return s;
}
break;
}
case '"':
{
s = bc_lex_string(l);
break;
}
case '#':
{
s = bc_lex_err(l, BC_ERROR_POSIX_COMMENT);
if (BC_ERR(s)) return s;
bc_lex_lineComment(l);
break;
}
case '%':
{
bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
break;
}
case '&':
{
c2 = l->buf[l->i];
if (BC_NO_ERR(c2 == '&')) {
s = bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "&&");
if (BC_ERR(s)) return s;
l->i += 1;
l->t = BC_LEX_OP_BOOL_AND;
}
else s = bc_lex_invalidChar(l, c);
break;
}
#if BC_ENABLE_EXTRA_MATH
case '$':
{
l->t = BC_LEX_OP_TRUNC;
break;
}
case '@':
{
bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
break;
}
#endif // BC_ENABLE_EXTRA_MATH
case '(':
case ')':
{
l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
break;
}
case '*':
{
bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
break;
}
case '+':
{
c2 = l->buf[l->i];
if (c2 == '+') {
l->i += 1;
l->t = BC_LEX_OP_INC;
}
else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
break;
}
case ',':
{
l->t = BC_LEX_COMMA;
break;
}
case '-':
{
c2 = l->buf[l->i];
if (c2 == '-') {
l->i += 1;
l->t = BC_LEX_OP_DEC;
}
else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
break;
}
case '.':
{
c2 = l->buf[l->i];
if (BC_LEX_NUM_CHAR(c2, true, false)) s = bc_lex_number(l, c);
else {
l->t = BC_LEX_KW_LAST;
s = bc_lex_err(l, BC_ERROR_POSIX_DOT);
}
break;
}
case '/':
{
c2 = l->buf[l->i];
if (c2 =='*') s = bc_lex_comment(l);
else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
// Apparently, GNU bc (and maybe others) allows any uppercase letter as
// a number. When single digits, they act like the ones above. When
// multi-digit, any letter above the input base is automatically set to
// the biggest allowable digit in the input base.
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
{
s = bc_lex_number(l, c);
break;
}
case ';':
{
l->t = BC_LEX_SCOLON;
break;
}
case '<':
{
#if BC_ENABLE_EXTRA_MATH
c2 = l->buf[l->i];
if (c2 == '<') {
l->i += 1;
bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
break;
}
#endif // BC_ENABLE_EXTRA_MATH
bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
break;
}
case '=':
{
bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
break;
}
case '>':
{
#if BC_ENABLE_EXTRA_MATH
c2 = l->buf[l->i];
if (c2 == '>') {
l->i += 1;
bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
break;
}
#endif // BC_ENABLE_EXTRA_MATH
bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
break;
}
case '[':
case ']':
{
l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
break;
}
case '\\':
{
if (BC_NO_ERR(l->buf[l->i] == '\n')) {
l->i += 1;
l->t = BC_LEX_WHITESPACE;
}
else s = bc_lex_invalidChar(l, c);
break;
}
case '^':
{
bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
break;
}
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
{
s = bc_lex_identifier(l);
break;
}
case '{':
case '}':
{
l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
break;
}
case '|':
{
c2 = l->buf[l->i];
if (BC_NO_ERR(c2 == '|')) {
s = bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "||");
if (BC_ERR(s)) return s;
l->i += 1;
l->t = BC_LEX_OP_BOOL_OR;
}
else s = bc_lex_invalidChar(l, c);
break;
}
default:
{
s = bc_lex_invalidChar(l, c);
break;
}
}
return s;
}
#endif // BC_ENABLED