| /* |
| * ***************************************************************************** |
| * |
| * SPDX-License-Identifier: BSD-2-Clause |
| * |
| * Copyright (c) 2018-2021 Gavin D. Howard and contributors. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, this |
| * list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * ***************************************************************************** |
| * |
| * The lexer for dc. |
| * |
| */ |
| |
| #if DC_ENABLED |
| |
| #include <ctype.h> |
| |
| #include <dc.h> |
| #include <vm.h> |
| |
| bool dc_lex_negCommand(BcLex *l) { |
| char c = l->buf[l->i]; |
| return !BC_LEX_NUM_CHAR(c, false, false); |
| } |
| |
| /** |
| * Processes a dc command that needs a register. This is where the |
| * extended-register extension is implemented. |
| * @param l The lexer. |
| */ |
| static void dc_lex_register(BcLex *l) { |
| |
| // If extended register is enabled and the character is whitespace... |
| if (DC_X && isspace(l->buf[l->i - 1])) { |
| |
| char c; |
| |
| // Eat the whitespace. |
| bc_lex_whitespace(l); |
| c = l->buf[l->i]; |
| |
| // Check for a letter or underscore. |
| if (BC_ERR(!isalpha(c) && c != '_')) |
| bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); |
| |
| // Parse a normal identifier. |
| l->i += 1; |
| bc_lex_name(l); |
| } |
| else { |
| |
| // I don't allow newlines because newlines are used for controlling when |
| // execution happens, and allowing newlines would just be complex. |
| if (BC_ERR(l->buf[l->i - 1] == '\n')) |
| bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]); |
| |
| // Set the lexer string and token. |
| bc_vec_popAll(&l->str); |
| bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]); |
| bc_vec_pushByte(&l->str, '\0'); |
| l->t = BC_LEX_NAME; |
| } |
| } |
| |
| /** |
| * Parses a dc string. Since dc's strings need to check for balanced brackets, |
| * we can't just parse bc and dc strings with different start and end |
| * characters. Oh, and dc strings need to check for escaped brackets. |
| * @param l The lexer. |
| */ |
| static void dc_lex_string(BcLex *l) { |
| |
| size_t depth, nls, i; |
| char c; |
| bool got_more; |
| |
| // Set the token and clear the string. |
| l->t = BC_LEX_STR; |
| bc_vec_popAll(&l->str); |
| |
| do { |
| |
| depth = 1; |
| nls = 0; |
| got_more = false; |
| |
| assert(!l->is_stdin || l->buf == vm.buffer.v); |
| |
| // This is the meat. As long as we don't run into the NUL byte, and we |
| // have "depth", which means we haven't completely balanced brackets |
| // yet, we continue eating the string. |
| for (i = l->i; (c = l->buf[i]) && depth; ++i) { |
| |
| // Check for escaped brackets and set the depths as appropriate. |
| if (c == '\\') { |
| c = l->buf[++i]; |
| if (!c) break; |
| } |
| else { |
| depth += (c == '['); |
| depth -= (c == ']'); |
| } |
| |
| // We want to adjust the line in the lexer as necessary. |
| nls += (c == '\n'); |
| |
| if (depth) bc_vec_push(&l->str, &c); |
| } |
| |
| if (BC_ERR(c == '\0' && depth)) { |
| if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l); |
| if (got_more) bc_vec_popAll(&l->str); |
| } |
| |
| } while (got_more && depth); |
| |
| // Obviously, if we didn't balance, that's an error. |
| if (BC_ERR(c == '\0' && depth)) { |
| l->i = i; |
| bc_lex_err(l, BC_ERR_PARSE_STRING); |
| } |
| |
| bc_vec_pushByte(&l->str, '\0'); |
| |
| l->i = i; |
| l->line += nls; |
| } |
| |
| /** |
| * Lexes a dc token. This is the dc implementation of BcLexNext. |
| * @param l The lexer. |
| */ |
| void dc_lex_token(BcLex *l) { |
| |
| char c = l->buf[l->i++], c2; |
| size_t i; |
| |
| // If the last token was a command that needs a register, we need to parse a |
| // register, so do so. |
| for (i = 0; i < dc_lex_regs_len; ++i) { |
| |
| // If the token is a register token, take care of it and return. |
| if (l->last == dc_lex_regs[i]) { |
| dc_lex_register(l); |
| return; |
| } |
| } |
| |
| // These lines are for tokens that easily correspond to one character. We |
| // just set the token. |
| if (c >= '"' && c <= '~' && |
| (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID) |
| { |
| return; |
| } |
| |
| // This is the workhorse of the lexer when more complicated things are |
| // needed. |
| switch (c) { |
| |
| case '\0': |
| case '\n': |
| case '\t': |
| case '\v': |
| case '\f': |
| case '\r': |
| case ' ': |
| { |
| bc_lex_commonTokens(l, c); |
| break; |
| } |
| |
| // We don't have the ! command, so we always expect certain things |
| // after the exclamation point. |
| case '!': |
| { |
| c2 = l->buf[l->i]; |
| |
| if (c2 == '=') l->t = BC_LEX_OP_REL_NE; |
| else if (c2 == '<') l->t = BC_LEX_OP_REL_LE; |
| else if (c2 == '>') l->t = BC_LEX_OP_REL_GE; |
| else bc_lex_invalidChar(l, c); |
| |
| l->i += 1; |
| |
| break; |
| } |
| |
| case '#': |
| { |
| bc_lex_lineComment(l); |
| break; |
| } |
| |
| case '.': |
| { |
| c2 = l->buf[l->i]; |
| |
| // If the character after is a number, this dot is part of a number. |
| // Otherwise, it's the BSD dot (equivalent to last). |
| if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false))) |
| bc_lex_number(l, c); |
| else bc_lex_invalidChar(l, c); |
| |
| break; |
| } |
| |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| case 'A': |
| case 'B': |
| case 'C': |
| case 'D': |
| case 'E': |
| case 'F': |
| { |
| bc_lex_number(l, c); |
| break; |
| } |
| |
| case '[': |
| { |
| dc_lex_string(l); |
| break; |
| } |
| |
| default: |
| { |
| bc_lex_invalidChar(l, c); |
| } |
| } |
| } |
| #endif // DC_ENABLED |