| /* |
| * ***************************************************************************** |
| * |
| * SPDX-License-Identifier: BSD-2-Clause |
| * |
| * Copyright (c) 2018-2021 Gavin D. Howard and contributors. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, this |
| * list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * ***************************************************************************** |
| * |
| * Definitions for program data. |
| * |
| */ |
| |
| #ifndef BC_LANG_H |
| #define BC_LANG_H |
| |
| #include <stdbool.h> |
| |
| // These have to come first to silence a warning on BC_C11 below. |
| #include <status.h> |
| #include <vector.h> |
| #include <num.h> |
| |
| #if BC_C11 |
| #include <assert.h> |
| #endif // BC_C11 |
| |
| /// The instructions for bytecode. |
| typedef enum BcInst |
| { |
| #if BC_ENABLED |
| /// Postfix increment and decrement. Prefix are translated into |
| /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. |
| BC_INST_INC = 0, |
| BC_INST_DEC, |
| #endif // BC_ENABLED |
| |
| /// Unary negation. |
| BC_INST_NEG, |
| |
| /// Boolean not. |
| BC_INST_BOOL_NOT, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| /// Truncation operator. |
| BC_INST_TRUNC, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// These should be self-explanatory. |
| BC_INST_POWER, |
| BC_INST_MULTIPLY, |
| BC_INST_DIVIDE, |
| BC_INST_MODULUS, |
| BC_INST_PLUS, |
| BC_INST_MINUS, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| /// Places operator. |
| BC_INST_PLACES, |
| |
| /// Shift operators. |
| BC_INST_LSHIFT, |
| BC_INST_RSHIFT, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// Comparison operators. |
| BC_INST_REL_EQ, |
| BC_INST_REL_LE, |
| BC_INST_REL_GE, |
| BC_INST_REL_NE, |
| BC_INST_REL_LT, |
| BC_INST_REL_GT, |
| |
| /// Boolean or and and. |
| BC_INST_BOOL_OR, |
| BC_INST_BOOL_AND, |
| |
| #if BC_ENABLED |
| /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. |
| BC_INST_ASSIGN_POWER, |
| BC_INST_ASSIGN_MULTIPLY, |
| BC_INST_ASSIGN_DIVIDE, |
| BC_INST_ASSIGN_MODULUS, |
| BC_INST_ASSIGN_PLUS, |
| BC_INST_ASSIGN_MINUS, |
| #if BC_ENABLE_EXTRA_MATH |
| /// Places and shift assignment operators. |
| BC_INST_ASSIGN_PLACES, |
| BC_INST_ASSIGN_LSHIFT, |
| BC_INST_ASSIGN_RSHIFT, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// Normal assignment. |
| BC_INST_ASSIGN, |
| |
| /// bc and dc detect when the value from an assignment is not necessary. |
| /// For example, a plain assignment statement means the value is never used. |
| /// In those cases, we can get lots of performance back by not even creating |
| /// a copy at all. In fact, it saves a copy, a push onto the results stack, |
| /// a pop from the results stack, and a free. Definitely worth it to detect. |
| BC_INST_ASSIGN_POWER_NO_VAL, |
| BC_INST_ASSIGN_MULTIPLY_NO_VAL, |
| BC_INST_ASSIGN_DIVIDE_NO_VAL, |
| BC_INST_ASSIGN_MODULUS_NO_VAL, |
| BC_INST_ASSIGN_PLUS_NO_VAL, |
| BC_INST_ASSIGN_MINUS_NO_VAL, |
| #if BC_ENABLE_EXTRA_MATH |
| /// Same as above. |
| BC_INST_ASSIGN_PLACES_NO_VAL, |
| BC_INST_ASSIGN_LSHIFT_NO_VAL, |
| BC_INST_ASSIGN_RSHIFT_NO_VAL, |
| #endif // BC_ENABLE_EXTRA_MATH |
| #endif // BC_ENABLED |
| |
| /// Normal assignment that pushes no value on the stack. |
| BC_INST_ASSIGN_NO_VAL, |
| |
| /// Push a constant onto the results stack. |
| BC_INST_NUM, |
| |
| /// Push a variable onto the results stack. |
| BC_INST_VAR, |
| |
| /// Push an array element onto the results stack. |
| BC_INST_ARRAY_ELEM, |
| |
| /// Push an array onto the results stack. This is different from pushing an |
| /// array *element* onto the results stack; it pushes a reference to the |
| /// whole array. This is needed in bc for function arguments that are |
| /// arrays. It is also needed for returning the length of an array. |
| BC_INST_ARRAY, |
| |
| /// Push a zero or a one onto the stack. These are special cased because it |
| /// does help performance, particularly for one since inc/dec operators |
| /// use it. |
| BC_INST_ZERO, |
| BC_INST_ONE, |
| |
| #if BC_ENABLED |
| /// Push the last printed value onto the stack. |
| BC_INST_LAST, |
| #endif // BC_ENABLED |
| |
| /// Push the value of any of the globals onto the stack. |
| BC_INST_IBASE, |
| BC_INST_OBASE, |
| BC_INST_SCALE, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| /// Push the value of the seed global onto the stack. |
| BC_INST_SEED, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// These are builtin functions. |
| BC_INST_LENGTH, |
| BC_INST_SCALE_FUNC, |
| BC_INST_SQRT, |
| BC_INST_ABS, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| /// Another builtin function. |
| BC_INST_IRAND, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// Asciify. |
| BC_INST_ASCIIFY, |
| |
| /// Another builtin function. |
| BC_INST_READ, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| /// Another builtin function. |
| BC_INST_RAND, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// Return the max for the various globals. |
| BC_INST_MAXIBASE, |
| BC_INST_MAXOBASE, |
| BC_INST_MAXSCALE, |
| #if BC_ENABLE_EXTRA_MATH |
| /// Return the max value returned by rand(). |
| BC_INST_MAXRAND, |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| /// bc line_length() builtin function. |
| BC_INST_LINE_LENGTH, |
| |
| #if BC_ENABLED |
| |
| /// bc global_stacks() builtin function. |
| BC_INST_GLOBAL_STACKS, |
| |
| #endif // BC_ENABLED |
| |
| /// bc leading_zero() builtin function. |
| BC_INST_LEADING_ZERO, |
| |
| /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. |
| /// dc uses this instruction to print, but not pop. That's valid in dc. |
| /// However, in bc, it is *never* valid to print without popping. In bc, |
| /// BC_INST_PRINT_POP is used to indicate when a string should be printed |
| /// because of a print statement or whether it should be printed raw. The |
| /// reason for this is because a print statement handles escaped characters. |
| /// So BC_INST_PRINT_POP is for printing a string from a print statement, |
| /// BC_INST_PRINT_STR is for printing a string by itself. |
| /// |
| /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. |
| /// |
| /// Oh, and BC_INST_STR pushes a string onto the results stack. |
| BC_INST_PRINT, |
| BC_INST_PRINT_POP, |
| BC_INST_STR, |
| #if BC_ENABLED |
| BC_INST_PRINT_STR, |
| |
| /// Jumps unconditionally. |
| BC_INST_JUMP, |
| |
| /// Jumps if the top of the results stack is zero (condition failed). It |
| /// turns out that we only want to jump when conditions fail to "skip" code. |
| BC_INST_JUMP_ZERO, |
| |
| /// Call a function. |
| BC_INST_CALL, |
| |
| /// Return the top of the stack to the caller. |
| BC_INST_RET, |
| |
| /// Return 0 to the caller. |
| BC_INST_RET0, |
| |
| /// Special return instruction for void functions. |
| BC_INST_RET_VOID, |
| |
| /// Special halt instruction. |
| BC_INST_HALT, |
| #endif // BC_ENABLED |
| |
| /// Pop an item off of the results stack. |
| BC_INST_POP, |
| |
| /// Swaps the top two items on the results stack. |
| BC_INST_SWAP, |
| |
| /// Modular exponentiation. |
| BC_INST_MODEXP, |
| |
| /// Do divide and modulus at the same time. |
| BC_INST_DIVMOD, |
| |
| /// Turns a number into a string and prints it. |
| BC_INST_PRINT_STREAM, |
| |
| #if DC_ENABLED |
| |
| /// dc's return; it pops an executing string off of the stack. |
| BC_INST_POP_EXEC, |
| |
| /// Unconditionally execute a string. |
| BC_INST_EXECUTE, |
| |
| /// Conditionally execute a string. |
| BC_INST_EXEC_COND, |
| |
| /// Prints each item on the results stack, separated by newlines. |
| BC_INST_PRINT_STACK, |
| |
| /// Pops everything off of the results stack. |
| BC_INST_CLEAR_STACK, |
| |
| /// Pushes the current length of a register stack onto the results stack. |
| BC_INST_REG_STACK_LEN, |
| |
| /// Pushes the current length of the results stack onto the results stack. |
| BC_INST_STACK_LEN, |
| |
| /// Pushes a copy of the item on the top of the results stack onto the |
| /// results stack. |
| BC_INST_DUPLICATE, |
| |
| /// Copies the value in a register and pushes the copy onto the results |
| /// stack. |
| BC_INST_LOAD, |
| |
| /// Pops an item off of a register stack and pushes it onto the results |
| /// stack. |
| BC_INST_PUSH_VAR, |
| |
| /// Pops an item off of the results stack and pushes it onto a register's |
| /// stack. |
| BC_INST_PUSH_TO_VAR, |
| |
| /// Quit. |
| BC_INST_QUIT, |
| |
| /// Quit executing some number of strings. |
| BC_INST_NQUIT, |
| |
| /// Push the depth of the execution stack onto the stack. |
| BC_INST_EXEC_STACK_LEN, |
| |
| #endif // DC_ENABLED |
| |
| /// Invalid instruction. |
| BC_INST_INVALID, |
| |
| } BcInst; |
| |
| #if BC_C11 |
| _Static_assert(BC_INST_INVALID <= UCHAR_MAX, |
| "Too many instructions to fit into an unsigned char"); |
| #endif // BC_C11 |
| |
| /// Used by maps to identify where items are in the array. |
| typedef struct BcId |
| { |
| /// The name of the item. |
| char* name; |
| |
| /// The index into the array where the item is. |
| size_t idx; |
| |
| } BcId; |
| |
| /// The location of a var, array, or array element. |
| typedef struct BcLoc |
| { |
| /// The index of the var or array. |
| size_t loc; |
| |
| /// The index of the array element. Only used for array elements. |
| size_t idx; |
| |
| } BcLoc; |
| |
| /// An entry for a constant. |
| typedef struct BcConst |
| { |
| /// The original string as parsed from the source code. |
| char* val; |
| |
| /// The last base that the constant was parsed in. |
| BcBigDig base; |
| |
| /// The parsed constant. |
| BcNum num; |
| |
| } BcConst; |
| |
| /// A function. This is also used in dc, not just bc. The reason is that strings |
| /// are executed in dc, and they are converted to functions in order to be |
| /// executed. |
| typedef struct BcFunc |
| { |
| /// The bytecode instructions. |
| BcVec code; |
| |
| #if BC_ENABLED |
| |
| /// The labels. This is a vector of indices. The index is the index into |
| /// the bytecode vector where the label is. |
| BcVec labels; |
| |
| /// The autos for the function. The first items are the parameters, and the |
| /// arguments to the parameters must match the types in this vector. |
| BcVec autos; |
| |
| /// The number of parameters the function takes. |
| size_t nparams; |
| |
| #endif // BC_ENABLED |
| |
| /// The strings encountered in the function. |
| BcVec strs; |
| |
| /// The constants encountered in the function. |
| BcVec consts; |
| |
| /// The function's name. |
| const char* name; |
| |
| #if BC_ENABLED |
| /// True if the function is a void function. |
| bool voidfn; |
| #endif // BC_ENABLED |
| |
| } BcFunc; |
| |
| /// Types of results that can be pushed onto the results stack. |
| typedef enum BcResultType |
| { |
| /// Result is a variable. |
| BC_RESULT_VAR, |
| |
| /// Result is an array element. |
| BC_RESULT_ARRAY_ELEM, |
| |
| /// Result is an array. This is only allowed for function arguments or |
| /// returning the length of the array. |
| BC_RESULT_ARRAY, |
| |
| /// Result is a string. |
| BC_RESULT_STR, |
| |
| /// Result is a temporary. This is used for the result of almost all |
| /// expressions. |
| BC_RESULT_TEMP, |
| |
| /// Special casing the two below gave performance improvements. |
| |
| /// Result is a 0. |
| BC_RESULT_ZERO, |
| |
| /// Result is a 1. Useful for inc/dec operators. |
| BC_RESULT_ONE, |
| |
| #if BC_ENABLED |
| |
| /// Result is the special "last" variable. |
| BC_RESULT_LAST, |
| |
| /// Result is the return value of a void function. |
| BC_RESULT_VOID, |
| #endif // BC_ENABLED |
| |
| /// Result is the value of ibase. |
| BC_RESULT_IBASE, |
| |
| /// Result is the value of obase. |
| BC_RESULT_OBASE, |
| |
| /// Result is the value of scale. |
| BC_RESULT_SCALE, |
| |
| #if BC_ENABLE_EXTRA_MATH |
| |
| /// Result is the value of seed. |
| BC_RESULT_SEED, |
| |
| #endif // BC_ENABLE_EXTRA_MATH |
| |
| } BcResultType; |
| |
| /// A union to store data for various result types. |
| typedef union BcResultData |
| { |
| /// A number. Strings are stored here too; they are numbers with |
| /// cap == 0 && num == NULL. The string's index into the strings vector is |
| /// stored in the scale field. But this is only used for strings stored in |
| /// variables. |
| BcNum n; |
| |
| /// A vector. |
| BcVec v; |
| |
| /// A variable, array, or array element reference. This could also be a |
| /// string if a string is not stored in a variable (dc only). |
| BcLoc loc; |
| |
| } BcResultData; |
| |
| /// A tagged union for results. |
| typedef struct BcResult |
| { |
| /// The tag. The type of the result. |
| BcResultType t; |
| |
| /// The data. The data for the result. |
| BcResultData d; |
| |
| } BcResult; |
| |
| /// An instruction pointer. This is how bc knows where in the bytecode vector, |
| /// and which function, the current execution is. |
| typedef struct BcInstPtr |
| { |
| /// The index of the currently executing function in the fns vector. |
| size_t func; |
| |
| /// The index into the bytecode vector of the *next* instruction. |
| size_t idx; |
| |
| /// The length of the results vector when this function started executing. |
| /// This is mostly used for bc where functions should not affect the results |
| /// of their callers. |
| size_t len; |
| |
| } BcInstPtr; |
| |
| /// Types of identifiers. |
| typedef enum BcType |
| { |
| /// Variable. |
| BC_TYPE_VAR, |
| |
| /// Array. |
| BC_TYPE_ARRAY, |
| |
| #if BC_ENABLED |
| |
| /// Array reference. |
| BC_TYPE_REF, |
| |
| #endif // BC_ENABLED |
| |
| } BcType; |
| |
| #if BC_ENABLED |
| /// An auto variable in bc. |
| typedef struct BcAuto |
| { |
| /// The index of the variable in the vars or arrs vectors. |
| size_t idx; |
| |
| /// The type of the variable. |
| BcType type; |
| |
| } BcAuto; |
| #endif // BC_ENABLED |
| |
| /// Forward declaration. |
| struct BcProgram; |
| |
| /** |
| * Initializes a function. |
| * @param f The function to initialize. |
| * @param name The name of the function. The string is assumed to be owned by |
| * some other entity. |
| */ |
| void |
| bc_func_init(BcFunc* f, const char* name); |
| |
| /** |
| * Inserts an auto into the function. |
| * @param f The function to insert into. |
| * @param p The program. This is to search for the variable or array name. |
| * @param name The name of the auto to insert. |
| * @param type The type of the auto. |
| * @param line The line in the source code where the insert happened. This is |
| * solely for error reporting. |
| */ |
| void |
| bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type, |
| size_t line); |
| |
| /** |
| * Resets a function in preparation for it to be reused. This can happen in bc |
| * because it is a dynamic language and functions can be redefined. |
| * @param f The functio to reset. |
| */ |
| void |
| bc_func_reset(BcFunc* f); |
| |
| #ifndef NDEBUG |
| /** |
| * Frees a function. This is a destructor. This is only used in debug builds |
| * because all functions are freed at exit. We free them in debug builds to |
| * check for memory leaks. |
| * @param func The function to free as a void pointer. |
| */ |
| void |
| bc_func_free(void* func); |
| #endif // NDEBUG |
| |
| /** |
| * Initializes an array, which is the array type in bc and dc source code. Since |
| * variables and arrays are both arrays (see the development manual, |
| * manuals/development.md#execution, for more information), the @a nums |
| * parameter tells bc whether to initialize an array of numbers or an array of |
| * arrays of numbers. If the latter, it does a recursive call with nums set to |
| * true. |
| * @param a The array to initialize. |
| * @param nums True if the array should be for numbers, false if it should be |
| * for vectors. |
| */ |
| void |
| bc_array_init(BcVec* a, bool nums); |
| |
| /** |
| * Copies an array to another array. This is used to do pass arrays to functions |
| * that do not take references to arrays. The arrays are passed entirely by |
| * value, which means that they need to be copied. |
| * @param d The destination array. |
| * @param s The source array. |
| */ |
| void |
| bc_array_copy(BcVec* d, const BcVec* s); |
| |
| /** |
| * Frees a string stored in a function. This is a destructor. |
| * @param string The string to free as a void pointer. |
| */ |
| void |
| bc_string_free(void* string); |
| |
| /** |
| * Frees a constant stored in a function. This is a destructor. |
| * @param constant The constant to free as a void pointer. |
| */ |
| void |
| bc_const_free(void* constant); |
| |
| /** |
| * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by |
| * clearing the BcNum in the union. This is to ensure that bc does not use |
| * uninitialized data. |
| * @param r The result to clear. |
| */ |
| void |
| bc_result_clear(BcResult* r); |
| |
| /** |
| * Copies a result into another. This is done for things like duplicating the |
| * top of the results stack or copying the result of an assignment to put back |
| * on the results stack. |
| * @param d The destination result. |
| * @param src The source result. |
| */ |
| void |
| bc_result_copy(BcResult* d, BcResult* src); |
| |
| /** |
| * Frees a result. This is a destructor. |
| * @param result The result to free as a void pointer. |
| */ |
| void |
| bc_result_free(void* result); |
| |
| /** |
| * Expands an array to @a len. This can happen because in bc, you do not have to |
| * explicitly initialize elements of an array. If you access an element that is |
| * not initialized, the array is expanded to fit it, and all missing elements |
| * are initialized to 0 if they are numbers, or arrays with one element of 0. |
| * This function does that expansion. |
| * @param a The array to expand. |
| * @param len The length to expand to. |
| */ |
| void |
| bc_array_expand(BcVec* a, size_t len); |
| |
| #if BC_ENABLED |
| |
| /** |
| * Returns non-zero if the bytecode instruction i is an assignment instruction. |
| * @param i The instruction to test. |
| * @return Non-zero if i is an assignment instruction, zero otherwise. |
| */ |
| #define BC_INST_IS_ASSIGN(i) \ |
| ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) |
| |
| /** |
| * Returns true if the bytecode instruction @a i requires the value to be |
| * returned for use. |
| * @param i The instruction to test. |
| * @return True if @a i requires the value to be returned for use, false |
| * otherwise. |
| */ |
| #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) |
| |
| #else // BC_ENABLED |
| |
| /** |
| * Returns non-zero if the bytecode instruction i is an assignment instruction. |
| * @param i The instruction to test. |
| * @return Non-zero if i is an assignment instruction, zero otherwise. |
| */ |
| #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) |
| |
| /** |
| * Returns true if the bytecode instruction @a i requires the value to be |
| * returned for use. |
| * @param i The instruction to test. |
| * @return True if @a i requires the value to be returned for use, false |
| * otherwise. |
| */ |
| #define BC_INST_USE_VAL(i) (false) |
| |
| #endif // BC_ENABLED |
| |
| #if BC_DEBUG_CODE |
| /// Reference to string names for all of the instructions. For debugging. |
| extern const char* bc_inst_names[]; |
| #endif // BC_DEBUG_CODE |
| |
| /// References to the names of the main and read functions. |
| extern const char bc_func_main[]; |
| extern const char bc_func_read[]; |
| |
| #endif // BC_LANG_H |