blob: 71d5d192c22a383dd75e6390c7b8b762aab5dda9 [file] [log] [blame]
/* expr.c - evaluate expression
*
* Copyright 2016 Google Inc.
* Copyright 2013 Daniel Verkamp <daniel@drv.nu>
*
* http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
*
* The web standard is incomplete (precedence grouping missing), see:
* http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
config EXPR
bool "expr"
default n
help
usage: expr ARG1 OPERATOR ARG2...
Evaluate expression and print result. For example, "expr 1 + 2".
The supported operators are (grouped from highest to lowest priority):
( ) : * / % + - != <= < >= > = & |
Each constant and operator must be a separate command line argument.
All operators are infix, meaning they expect a constant (or expression
that resolves to a constant) on each side of the operator. Operators of
the same priority (within each group above) are evaluated left to right.
Parentheses may be used (as separate arguments) to elevate the priority
of expressions.
Calling expr from a command shell requires a lot of \( or '*' escaping
to avoid interpreting shell control characters.
The & and | operators are logical (not bitwise) and may operate on
strings (a blank string is "false"). Comparison operators may also
operate on strings (alphabetical sort).
Constants may be strings or integers. Comparison, logical, and regex
operators may operate on strings (a blank string is "false"), other
operators require integers.
*/
// TODO: int overflow checking
#define FOR_expr
#include "toys.h"
GLOBALS(
char* tok; // current token, not on the stack since recursive calls mutate it
)
// Scalar value. If s != NULL, it's a string, otherwise it's an int.
struct value {
char *s;
long long i;
};
void syntax_error(char *msg, ...) {
if (0) { // detailed message for debugging. TODO: add CFG_ var to enable
va_list va;
va_start(va, msg);
verror_msg(msg, 0, va);
va_end(va);
xexit();
} else
error_exit("syntax error");
}
#define LONG_LONG_MAX_LEN 21
// Get the value as a string.
void get_str(struct value *v, char** ret)
{
if (v->s)
*ret = v->s;
else {
*ret = xmalloc(LONG_LONG_MAX_LEN);
snprintf(*ret, LONG_LONG_MAX_LEN, "%lld", v->i);
}
}
// Get the value as an integer and return 1, or return 0 on error.
int get_int(struct value *v, long long *ret)
{
if (v->s) {
char *endp;
*ret = strtoll(v->s, &endp, 10);
return *endp ? 0 : 1; // If endp points to NUL, all chars were converted
} else {
*ret = v->i;
return 1;
}
}
// Preserve the invariant that v.s is NULL when the value is an integer.
void assign_int(struct value *v, long long i)
{
v->i = i;
v->s = NULL;
}
// Check if v is 0 or the empty string.
static int is_false(struct value *v)
{
if (v->s)
return !*v->s || !strcmp(v->s, "0"); // get_int("0") -> 0
else
return !v->i;
}
// 'ret' is filled with a string capture or int match position.
static void re(char *target, char *pattern, struct value *ret)
{
regex_t pat;
regmatch_t m[2];
xregcomp(&pat, pattern, 0);
if (!regexec(&pat, target, 2, m, 0) && m[0].rm_so == 0) { // match at pos 0
regmatch_t* g1 = &m[1]; // group capture 1
if (pat.re_nsub > 0 && g1->rm_so >= 0) // has capture
ret->s = xmprintf("%.*s", g1->rm_eo - g1->rm_so, target + g1->rm_so);
else
assign_int(ret, m[0].rm_eo);
} else { // no match
if (pat.re_nsub > 0) // has capture
ret->s = "";
else
assign_int(ret, 0);
}
}
// 4 different signatures of operators. S = string, I = int, SI = string or
// int.
enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
// operators grouped by precedence
static struct op_def {
char *tok;
char prec, sig, op; // precedence, signature for type coercion, operator ID
} OPS[] = {
// logical ops, precedence 1 and 2, signature SI_TO_SI
{"|", 1, SI_TO_SI, OR },
{"&", 2, SI_TO_SI, AND },
// comparison ops, precedence 3, signature SI_TO_I
{"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
{">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
{"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
// arithmetic ops, precedence 4 and 5, signature I_TO_I
{"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
{"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
// regex match, precedence 6, signature S_TO_SI
{":", 6, S_TO_SI, RE },
{NULL, 0, 0, 0}, // sentinel
};
void eval_op(struct op_def *o, struct value *ret, struct value *rhs) {
long long a, b, x = 0; // x = a OP b for ints.
char *s, *t; // string operands
int cmp;
switch (o->sig) {
case SI_TO_SI:
switch (o->op) {
case OR: if (is_false(ret)) *ret = *rhs; break;
case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
}
break;
case SI_TO_I:
if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
cmp = a - b;
} else { // otherwise compare both as strings
get_str(ret, &s);
get_str(rhs, &t);
cmp = strcmp(s, t);
}
switch (o->op) {
case EQ: x = cmp == 0; break;
case NE: x = cmp != 0; break;
case GT: x = cmp > 0; break;
case GTE: x = cmp >= 0; break;
case LT: x = cmp < 0; break;
case LTE: x = cmp <= 0; break;
}
assign_int(ret, x);
break;
case I_TO_I:
if (!get_int(ret, &a) || !get_int(rhs, &b))
error_exit("non-integer argument");
switch (o->op) {
case ADD: x = a + b; break;
case SUB: x = a - b; break;
case MUL: x = a * b; break;
case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
}
assign_int(ret, x);
break;
case S_TO_SI: // op == RE
get_str(ret, &s);
get_str(rhs, &t);
re(s, t, ret);
break;
}
}
// Point TT.tok at the next token. It's NULL when there are no more tokens.
void advance() {
TT.tok = *toys.optargs++;
}
// Evalute a compound expression, setting 'ret'.
//
// This function uses the recursive "Precedence Climbing" algorithm:
//
// Clarke, Keith. "The top-down parsing of expressions." University of London.
// Queen Mary College. Department of Computer Science and Statistics, 1986.
//
// http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
//
// Nice explanation and Python implementation:
// http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
static void eval_expr(struct value *ret, int min_prec)
{
if (!TT.tok) syntax_error("Unexpected end of input");
// Evaluate LHS atom, setting 'ret'.
if (!strcmp(TT.tok, "(")) { // parenthesized expression
advance(); // consume (
eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
if (!TT.tok) syntax_error("Expected )");
if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok);
advance(); // consume )
} else { // simple literal
ret->s = TT.tok; // all values start as strings
advance();
}
// Evaluate RHS and apply operator until precedence is too low.
struct value rhs;
while (TT.tok) {
struct op_def *o = OPS;
while (o->tok) { // Look up operator
if (!strcmp(TT.tok, o->tok)) break;
o++;
}
if (!o->tok) break; // Not an operator (extra input will fail later)
if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
advance();
eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
}
}
void expr_main(void)
{
struct value ret = {0};
toys.exitval = 2; // if exiting early, indicate error
advance(); // initialize global token
eval_expr(&ret, 1);
if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok);
if (ret.s) printf("%s\n", ret.s);
else printf("%lld\n", ret.i);
exit(is_false(&ret));
}