Fix type coercion bugs in expr.

All tests pass now; this fixes the 2 remaining failures, including a
segfault.

The structure of the code has changed a lot -- instead of having a tiny
function per operator, we have eval_op() which does common type coercion
and then evaluates the operator.  I tried writing it a couple different
ways, and this was the cleanest.

The OPS table now contains the operator string, precedence level,
signature for type coercion, and operator ID.
diff --git a/tests/expr.test b/tests/expr.test
index 69e4bc4..19db19e 100755
--- a/tests/expr.test
+++ b/tests/expr.test
@@ -5,9 +5,10 @@
 testing "integer" "expr 5" "5\n" "" ""
 testing "integer negative" "expr -5" "-5\n" "" ""
 testing "string" "expr astring" "astring\n" "" ""
-testing "1 + 3" "expr 1 + 3" "4\n" "" ""
+testing "addition" "expr 1 + 3" "4\n" "" ""
 testing "5 + 6 * 3" "expr 5 + 6 \* 3" "23\n" "" ""
 testing "( 5 + 6 ) * 3" "expr \( 5 + 6 \) \* 3" "33\n" "" ""
+testing ">" "expr 3 \> 2" "1\n" "" ""
 testing "* / same priority" "expr 4 \* 3 / 2"  "6\n" "" ""
 testing "/ * same priority" "expr 3 / 2 \* 4" "4\n" "" ""
 testing "& before |" "expr 0 \| 1 \& 0" "0\n" "" ""
@@ -24,6 +25,7 @@
 testing "regex position" "expr ab21xx : '[^0-9]*[0-9]*'" "4\n" "" ""
 testing "regex extraction" "expr ab21xx : '[^0-9]*\([0-9]*\).*'" "21\n" "" ""
 testing "regex no match" "expr ab21xx : x" "0\n" "" ""
+testing "long str" "expr abcdefghijklmnopqrstuvwxyz : '\(.*\)' = a" "0\n" "" ""
 
 # result of ':' regex match can subsequently be used for arithmetic
 testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \
@@ -31,7 +33,7 @@
 
 testing "integer comparison" "expr -3 \< -2" "1\n" "" ""
 testing "string comparison" "expr -3 \< -2s" "0\n" "" ""
-testing "integer expr comparison" "expr 2 - 5 \< -2" "1\n" "" ""
+testing "integer expression comparison" "expr 2 - 5 \< -2" "1\n" "" ""
 # result of arithmetic can subsequently be compared as a string
 testing "string expr comparison" "expr 2 - 5 \< -2s" "0\n" "" ""
 
@@ -52,3 +54,4 @@
 testing "missing outer )" "expr \( 1 + \( 2 \* 3 \); echo \$?" "2\n" "" ""
 testing "bad operator" "expr 1 @ 2; echo \$?" "2\n" "" ""
 testing "adjacent literals" "expr 1 2; echo \$?" "2\n" "" ""
+testing "non-integer argument" "expr 1 + a; echo \$?" "2\n" "" ""
diff --git a/toys/pending/expr.c b/toys/pending/expr.c
index ac7edac..71d5d19 100644
--- a/toys/pending/expr.c
+++ b/toys/pending/expr.c
@@ -1,5 +1,6 @@
 /* expr.c - evaluate expression
  *
+ * Copyright 2016 Google Inc.
  * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
  *
  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
@@ -49,150 +50,12 @@
   char* tok; // current token, not on the stack since recursive calls mutate it
 )
 
-// Scalar value.
-// If s is NULL, the value is an integer (i).
-// If s is not NULL, the value is a string (s).
+// Scalar value.  If s != NULL, it's a string, otherwise it's an int.
 struct value {
   char *s;
   long long i;
 };
 
-// check if v is the integer 0 or the empty string
-static int is_zero(struct value *v)
-{
-  return v->s ? !*v->s : !v->i;
-}
-
-static char *num_to_str(long long num)
-{
-  static char num_buf[21];
-  snprintf(num_buf, sizeof(num_buf), "%lld", num);
-  return num_buf;
-}
-
-static int cmp(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) {
-    // at least one operand is a string
-    char *ls = lhs->s ? lhs->s : num_to_str(lhs->i);
-    char *rs = rhs->s ? rhs->s : num_to_str(rhs->i);
-    return strcmp(ls, rs);
-  } else return lhs->i - rhs->i;
-}
-
-static void re(struct value *lhs, struct value *rhs)
-{
-  regex_t rp;
-  regmatch_t rm[2];
-
-  xregcomp(&rp, rhs->s, 0);
-  // BUG: lhs->s is NULL when it looks like an integer, causing a segfault.
-  if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) {
-    if (rp.re_nsub > 0 && rm[1].rm_so >= 0) 
-      lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so);
-    else {
-      lhs->i = rm[0].rm_eo;
-      lhs->s = 0;
-    }
-  } else {
-    if (!rp.re_nsub) {
-      lhs->i = 0;
-      lhs->s = 0;
-    } else lhs->s = "";
-  }
-}
-
-static void mod(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) error_exit("non-integer argument");
-  if (is_zero(rhs)) error_exit("division by zero");
-  lhs->i %= rhs->i;
-}
-
-static void divi(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) error_exit("non-integer argument");
-  if (is_zero(rhs)) error_exit("division by zero");
-  lhs->i /= rhs->i;
-}
-
-static void mul(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) error_exit("non-integer argument");
-  lhs->i *= rhs->i;
-}
-
-static void sub(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) error_exit("non-integer argument");
-  lhs->i -= rhs->i;
-}
-
-static void add(struct value *lhs, struct value *rhs)
-{
-  if (lhs->s || rhs->s) error_exit("non-integer argument");
-  lhs->i += rhs->i;
-}
-
-static void ne(struct value *lhs, struct value *rhs)
-{
-  lhs->i = cmp(lhs, rhs) != 0;
-  lhs->s = NULL;
-}
-
-static void lte(struct value *lhs, struct value *rhs)
-{
-  lhs->i = cmp(lhs, rhs) <= 0;
-  lhs->s = NULL;
-}
-
-static void lt(struct value *lhs, struct value *rhs)
-{
-  lhs->i = cmp(lhs, rhs) < 0;
-  lhs->s = NULL;
-}
-
-static void gte(struct value *lhs, struct value *rhs)
-{
-  lhs->i = cmp(lhs, rhs) >= 0;
-  lhs->s = NULL;
-}
-
-static void gt(struct value *lhs, struct value *rhs)
-{
-  lhs->i = cmp(lhs, rhs) > 0;
-  lhs->s = NULL;
-}
-
-static void eq(struct value *lhs, struct value *rhs)
-{
-  lhs->i = !cmp(lhs, rhs);
-  lhs->s = NULL;
-}
-
-static void and(struct value *lhs, struct value *rhs)
-{
-  if (is_zero(lhs) || is_zero(rhs)) {
-    lhs->i = 0;
-    lhs->s = NULL;
-  }
-}
-
-static void or(struct value *lhs, struct value *rhs)
-{
-  if (is_zero(lhs)) *lhs = *rhs;
-}
-
-// Converts an arg string to a value struct.  Assumes arg != NULL.
-static void parse_value(char* arg, struct value *v)
-{
-  char *endp;
-  v->i = strtoll(arg, &endp, 10);
-  // if non-NULL, there's still stuff left, and it's a string.  Otherwise no
-  // string.
-  v->s = *endp ? arg : NULL;
-}
-
 void syntax_error(char *msg, ...) {
   if (0) { // detailed message for debugging.  TODO: add CFG_ var to enable
     va_list va;
@@ -204,23 +67,149 @@
     error_exit("syntax error");
 }
 
+#define LONG_LONG_MAX_LEN 21
+
+// Get the value as a string.
+void get_str(struct value *v, char** ret)
+{
+  if (v->s)
+    *ret = v->s;
+  else {
+    *ret = xmalloc(LONG_LONG_MAX_LEN);
+    snprintf(*ret, LONG_LONG_MAX_LEN, "%lld", v->i);
+  }
+}
+
+// Get the value as an integer and return 1, or return 0 on error.
+int get_int(struct value *v, long long *ret)
+{
+  if (v->s) {
+    char *endp;
+    *ret = strtoll(v->s, &endp, 10);
+    return *endp ? 0 : 1; // If endp points to NUL, all chars were converted
+  } else {
+    *ret = v->i;
+    return 1;
+  }
+}
+
+// Preserve the invariant that v.s is NULL when the value is an integer.
+void assign_int(struct value *v, long long i)
+{
+  v->i = i;
+  v->s = NULL;
+}
+
+// Check if v is 0 or the empty string.
+static int is_false(struct value *v)
+{
+  if (v->s)
+    return !*v->s || !strcmp(v->s, "0");  // get_int("0") -> 0
+  else
+    return !v->i;
+}
+
+// 'ret' is filled with a string capture or int match position.
+static void re(char *target, char *pattern, struct value *ret)
+{
+  regex_t pat;
+  regmatch_t m[2];
+
+  xregcomp(&pat, pattern, 0);
+  if (!regexec(&pat, target, 2, m, 0) && m[0].rm_so == 0) { // match at pos 0
+    regmatch_t* g1 = &m[1]; // group capture 1
+    if (pat.re_nsub > 0 && g1->rm_so >= 0) // has capture
+      ret->s = xmprintf("%.*s", g1->rm_eo - g1->rm_so, target + g1->rm_so);
+    else
+      assign_int(ret, m[0].rm_eo);
+  } else { // no match
+    if (pat.re_nsub > 0) // has capture
+      ret->s = "";
+    else
+      assign_int(ret, 0);
+  }
+}
+
+// 4 different signatures of operators.  S = string, I = int, SI = string or
+// int.
+enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
+
+enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
+
 // operators grouped by precedence
-static struct op {
+static struct op_def {
   char *tok;
-  char prec;
-  // calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs
-  void (*calc)(struct value *lhs, struct value *rhs);
+  char prec, sig, op; // precedence, signature for type coercion, operator ID
 } OPS[] = {
-  {"|", 1, or  },
-  {"&", 2, and },
-  {"=", 3, eq  }, {"==", 3, eq  }, {">",  3, gt  }, {">=", 3, gte },
-  {"<", 3, lt  }, {"<=", 3, lte }, {"!=", 3, ne  },
-  {"+", 4, add }, {"-",  4, sub },
-  {"*", 5, mul }, {"/",  5, divi }, {"%", 5, mod },
-  {":", 6, re  },
-  {"",  0, NULL}, // sentinel
+  // logical ops, precedence 1 and 2, signature SI_TO_SI
+  {"|", 1, SI_TO_SI, OR  },
+  {"&", 2, SI_TO_SI, AND },
+  // comparison ops, precedence 3, signature SI_TO_I
+  {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
+  {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
+  {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE }, 
+  // arithmetic ops, precedence 4 and 5, signature I_TO_I
+  {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
+  {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
+  // regex match, precedence 6, signature S_TO_SI
+  {":", 6, S_TO_SI, RE },
+  {NULL, 0, 0, 0}, // sentinel
 };
 
+void eval_op(struct op_def *o, struct value *ret, struct value *rhs) {
+  long long a, b, x = 0; // x = a OP b for ints.
+  char *s, *t; // string operands
+  int cmp;
+
+  switch (o->sig) {
+
+  case SI_TO_SI:
+    switch (o->op) {
+    case OR:  if (is_false(ret)) *ret = *rhs; break;
+    case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
+    }
+    break;  
+
+  case SI_TO_I:
+    if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
+      cmp = a - b;
+    } else { // otherwise compare both as strings
+      get_str(ret, &s);
+      get_str(rhs, &t);
+      cmp = strcmp(s, t);
+    }
+    switch (o->op) {
+    case EQ:  x = cmp == 0; break;
+    case NE:  x = cmp != 0; break;
+    case GT:  x = cmp >  0; break;
+    case GTE: x = cmp >= 0; break;
+    case LT:  x = cmp <  0; break;
+    case LTE: x = cmp <= 0; break;
+    }
+    assign_int(ret, x);
+    break;
+
+  case I_TO_I:
+    if (!get_int(ret, &a) || !get_int(rhs, &b))
+      error_exit("non-integer argument");
+    switch (o->op) {
+    case ADD: x = a + b; break;
+    case SUB: x = a - b; break;
+    case MUL: x = a * b; break;
+    case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
+    case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
+    }
+    assign_int(ret, x);
+    break;
+
+  case S_TO_SI: // op == RE
+    get_str(ret, &s);
+    get_str(rhs, &t);
+    re(s, t, ret);
+    break;
+  }
+}
+
 // Point TT.tok at the next token.  It's NULL when there are no more tokens.
 void advance() {
   TT.tok = *toys.optargs++;
@@ -243,45 +232,44 @@
 
   // Evaluate LHS atom, setting 'ret'.
   if (!strcmp(TT.tok, "(")) { // parenthesized expression
-    advance(); // consume (
-    eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1
+    advance();                // consume (
+    eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
     if (!TT.tok)             syntax_error("Expected )");
     if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok);
-    advance(); // consume )
-  } else { // simple literal
-    parse_value(TT.tok, ret);
+    advance();                // consume )
+  } else {                    // simple literal
+    ret->s = TT.tok;          // all values start as strings
     advance();
   }
 
   // Evaluate RHS and apply operator until precedence is too low.
   struct value rhs;
   while (TT.tok) {
-    struct op *o = OPS;
-    while (o->calc) {  // Look up the precedence of operator TT.tok
+    struct op_def *o = OPS;
+    while (o->tok) { // Look up operator
       if (!strcmp(TT.tok, o->tok)) break;
       o++;
     }
-    if (!o->calc) break; // Not an operator (extra input will fail later)
+    if (!o->tok) break; // Not an operator (extra input will fail later)
     if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
     advance();
 
     eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
-    o->calc(ret, &rhs); // Apply operator, setting 'ret'.
+    eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
   }
 }
 
 void expr_main(void)
 {
   struct value ret = {0};
-  toys.exitval = 2; // if exiting early, indicate invalid expression
+  toys.exitval = 2; // if exiting early, indicate error
 
   advance(); // initialize global token
   eval_expr(&ret, 1);
-
   if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok);
 
   if (ret.s) printf("%s\n", ret.s);
   else printf("%lld\n", ret.i);
 
-  exit(is_zero(&ret));
+  exit(is_false(&ret));
 }