dd: iflags, oflags, fix ^C, fix the fundamental loop.

Investigating why the toybox tar tests fail on Android with toybox dd, I
realized I was rewriting a part of dd I'd rewritten before!

This is a re-send of my 2019-02-22 patch, rebased against the current
ToT...

This patch was originally motivated because after suggesting to the author of
https://stackoverflow.com/questions/17157820/access-vdsolinux/54797221#54797221
that he could tell dd to work in bytes rather than blocks, I realized
that our dd doesn't actually support that. But the rewrite of the main
loop is necessary to fix the incorrect output from the dd calls in the
tar test.

Without this patch, `yes | dd bs=65536 count=1 > fweep` basically gives
random output, based on how many bytes the pipe feels like giving you in
your first read. (As far as I know, dd *without* bs= was fine, but I
can't guarantee that that's true, just that I haven't seen it fail.)

Also switch to TAGGED_ARRAY and comma_* for conv rather than add two more
copies of an undesired idiom. It turned out -- contrary to the belief of
cp(1) -- that comma_scan isn't suitable for this because of its magic
handling of "no" prefixes. (It's actually harmless in cp because none
of the --preserve options begin with "no", but some dd options do.) To
this end, comma_remove is a less-magic comma_scan.

I've also changed an `if` to a `while` because other implementations
allow things like `--preserve=mode,mode` or `conv=sync,sync`. (If we
decide this is a bug rather than a feature, we should at least fix the
error message to be clear that we're rejecting the *duplication*, not
the option itself.)

I've also fixed the ^C behavior by simply adding a direct SIGINT handler
rather than trying to be clever inside the read loop (which is why we
weren't handling the SIGINT until the read returned).

I've also removed `strstarteq` and just added the '=' to each literal
when calling regular `strstart`.

Plus basic tests.
diff --git a/lib/commas.c b/lib/commas.c
index 03b2e34..2267684 100644
--- a/lib/commas.c
+++ b/lib/commas.c
@@ -59,7 +59,7 @@
   return start;
 }
 
-// check all instances of opt and "no"opt in optlist, return true if opt
+// Check all instances of opt and "no"opt in optlist, return true if opt
 // found and last instance wasn't no. If clean, remove each instance from list.
 int comma_scan(char *optlist, char *opt, int clean)
 {
@@ -97,3 +97,23 @@
 
   return i;
 }
+
+// Returns true and removes `opt` from `optlist` if present, false otherwise.
+// Doesn't have the magic "no" behavior of comma_scan.
+int comma_remove(char *optlist, char *opt)
+{
+  int optlen = strlen(opt), len, got = 0;
+
+  if (optlist) for (;;) {
+    char *s = comma_iterate(&optlist, &len);
+
+    if (!s) break;
+    if (optlen == len && !strncmp(opt, s, optlen)) {
+      got = 1;
+      if (optlist) memmove(s, optlist, strlen(optlist)+1);
+      else *s = 0;
+    }
+  }
+
+  return got;
+}
diff --git a/lib/lib.h b/lib/lib.h
index b74cfbf..e354a82 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -366,6 +366,7 @@
 char *comma_iterate(char **list, int *len);
 int comma_scan(char *optlist, char *opt, int clean);
 int comma_scanall(char *optlist, char *scanlist);
+int comma_remove(char *optlist, char *opt);
 
 // deflate.c
 
diff --git a/tests/cp.test b/tests/cp.test
index 6c798b4..af59593 100755
--- a/tests/cp.test
+++ b/tests/cp.test
@@ -111,6 +111,9 @@
   "-rw-r--r--\n" "" ""
 rm -rf walrus woot carpenter
 
+# duplicated --preserve= options are fine.
+testing "--preserve=mode,mode" "cp --preserve=mode,mode walrus walrus2" "" "" ""
+
 # cp -r ../source destdir
 # cp -r one/two/three missing
 # cp -r one/two/three two
diff --git a/tests/dd.test b/tests/dd.test
index d2d6033..7d7b794 100644
--- a/tests/dd.test
+++ b/tests/dd.test
@@ -95,3 +95,15 @@
 
 testing "seek stdout" "yes 2> /dev/null | dd bs=8 seek=2 count=1 > out 2> /dev/null && xxd -p out" \
   "00000000000000000000000000000000790a790a790a790a\n" "" ""
+
+# Duplicated options are fine.
+testing "conv=sync,sync" "dd conv=sync,sync $opt | head -n 1" "I WANT\n" "" "I WANT\n"
+
+# _bytes options
+testing "iflag=count_bytes" \
+  "dd if=input count=2 ibs=4096 iflag=count_bytes $opt" "hi" "high" ""
+testing "iflag=skip_bytes" \
+  "dd if=input skip=2 ibs=4096 iflag=skip_bytes $opt" "gh" "high" ""
+testing "oflag=seek_bytes" \
+  "dd if=input of=output seek=2 obs=4096 oflag=seek_bytes status=none && \
+   xxd -p output" "000030313233\n" "0123" ""
diff --git a/toys/pending/dd.c b/toys/pending/dd.c
index 0c447f7..80a7595 100644
--- a/toys/pending/dd.c
+++ b/toys/pending/dd.c
@@ -4,8 +4,6 @@
  * Copyright 2013 Kyungwan Han <asura321@gmail.com>
  *
  * See  http://opengroup.org/onlinepubs/9699919799/utilities/dd.html
- *
- * todo: ctrl-c doesn't work, the read() is restarting.
 
 USE_DD(NEWTOY(dd, 0, TOYFLAG_USR|TOYFLAG_BIN))
 
@@ -13,19 +11,22 @@
   bool "dd"
   default n
   help
-    usage: dd [if=FILE] [of=FILE] [ibs=N] [obs=N] [bs=N] [count=N] [skip=N]
-            [seek=N] [conv=notrunc|noerror|sync|fsync] [status=noxfer|none]
+    usage: dd [if=FILE] [of=FILE] [ibs=N] [obs=N] [iflag=FLAGS] [oflag=FLAGS]
+            [bs=N] [count=N] [seek=N] [skip=N]
+            [conv=notrunc|noerror|sync|fsync] [status=noxfer|none]
 
     Copy/convert files.
 
     if=FILE		Read from FILE instead of stdin
     of=FILE		Write to FILE instead of stdout
     bs=N		Read and write N bytes at a time
-    ibs=N		Read N bytes at a time
-    obs=N		Write N bytes at a time
+    ibs=N		Input block size
+    obs=N		Output block size
     count=N		Copy only N input blocks
     skip=N		Skip N input blocks
     seek=N		Skip N output blocks
+    iflag=FLAGS	Set input flags
+    oflag=FLAGS	Set output flags
     conv=notrunc	Don't truncate output file
     conv=noerror	Continue after read errors
     conv=sync	Pad blocks with zeros
@@ -33,6 +34,12 @@
     status=noxfer	Don't show transfer rate
     status=none	Don't show transfer rate or records in/out
 
+    FLAGS is a comma-separated list of:
+
+    count_bytes	(iflag) interpret count=N in bytes, not blocks
+    seek_bytes	(oflag) interpret seek=N in bytes, not blocks
+    skip_bytes	(iflag) interpret skip=N in bytes, not blocks
+
     Numbers may be suffixed by c (*1), w (*2), b (*512), kD (*1000), k (*1024),
     MD (*1000*1000), M (*1024*1024), GD (*1000*1000*1000) or G (*1024*1024*1024).
 */
@@ -51,12 +58,24 @@
     long sz, count;
     unsigned long long offset;
   } in, out;
+  unsigned conv, iflag, oflag;
 );
 
-#define C_FSYNC   1
-#define C_NOERROR 2
-#define C_NOTRUNC 4
-#define C_SYNC    8
+struct dd_flag {
+  char *name;
+};
+
+static const struct dd_flag dd_conv[] = TAGGED_ARRAY(DD_conv,
+  {"fsync"}, {"noerror"}, {"notrunc"}, {"sync"},
+);
+
+static const struct dd_flag dd_iflag[] = TAGGED_ARRAY(DD_iflag,
+  {"count_bytes"}, {"skip_bytes"},
+);
+
+static const struct dd_flag dd_oflag[] = TAGGED_ARRAY(DD_oflag,
+  {"seek_bytes"},
+);
 
 static void status()
 {
@@ -79,6 +98,12 @@
   }
 }
 
+static void dd_sigint(int sig) {
+  status();
+  toys.exitval = sig|128;
+  xexit();
+}
+
 static void write_out(int all)
 {
   TT.out.bp = TT.out.buff;
@@ -97,18 +122,24 @@
   if (TT.out.count) memmove(TT.out.buff, TT.out.bp, TT.out.count); //move remainder to front
 }
 
-int strstarteq(char **a, char *b)
+static void parse_flags(char *what, char *arg,
+    const struct dd_flag* flags, int flag_count, unsigned *result)
 {
-  char *aa = *a;
+  char *pre = xstrdup(arg);
+  int i;
 
-  return strstart(&aa, b) && *aa == '=' && (*a = aa+1);
+  for (i=0; i<flag_count; ++i) {
+    while (comma_remove(pre, flags[i].name)) *result |= 1<<i;
+  }
+  if (*pre) error_exit("bad %s=%s", what, pre);
+  free(pre);
 }
 
 void dd_main()
 {
   char **args;
   unsigned long long bs = 0;
-  int trunc = O_TRUNC, conv = 0;
+  int trunc = O_TRUNC;
 
   TT.show_xfer = TT.show_records = 1;
   TT.c_count = ULLONG_MAX;
@@ -117,51 +148,46 @@
   for (args = toys.optargs; *args; args++) {
     char *arg = *args;
 
-    if (strstarteq(&arg, "bs")) bs = atolx_range(arg, 1, LONG_MAX);
-    else if (strstarteq(&arg, "ibs")) TT.in.sz = atolx_range(arg, 1, LONG_MAX);
-    else if (strstarteq(&arg, "obs")) TT.out.sz = atolx_range(arg, 1, LONG_MAX);
-    else if (strstarteq(&arg, "count"))
+    if (strstart(&arg, "bs=")) bs = atolx_range(arg, 1, LONG_MAX);
+    else if (strstart(&arg, "ibs=")) TT.in.sz = atolx_range(arg, 1, LONG_MAX);
+    else if (strstart(&arg, "obs=")) TT.out.sz = atolx_range(arg, 1, LONG_MAX);
+    else if (strstart(&arg, "count="))
       TT.c_count = atolx_range(arg, 0, LLONG_MAX);
-    else if (strstarteq(&arg, "if")) TT.in.name = arg;
-    else if (strstarteq(&arg, "of")) TT.out.name = arg;
-    else if (strstarteq(&arg, "seek"))
+    else if (strstart(&arg, "if=")) TT.in.name = arg;
+    else if (strstart(&arg, "of=")) TT.out.name = arg;
+    else if (strstart(&arg, "seek="))
       TT.out.offset = atolx_range(arg, 0, LLONG_MAX);
-    else if (strstarteq(&arg, "skip"))
+    else if (strstart(&arg, "skip="))
       TT.in.offset = atolx_range(arg, 0, LLONG_MAX);
-    else if (strstarteq(&arg, "status")) {
+    else if (strstart(&arg, "status=")) {
       if (!strcmp(arg, "noxfer")) TT.show_xfer = 0;
       else if (!strcmp(arg, "none")) TT.show_xfer = TT.show_records = 0;
       else error_exit("unknown status '%s'", arg);
-    } else if (strstarteq(&arg, "conv")) {
-      char *ss, *convs[] = {"fsync", "noerror", "notrunc", "sync"};
-      int i, len;
-
-      while ((ss = comma_iterate(&arg, &len))) {
-        for (i = 0; i<ARRAY_LEN(convs); i++)
-          if (len == strlen(convs[i]) && !strncmp(ss, convs[i], len)) break;
-        if (i == ARRAY_LEN(convs)) error_exit("bad conv=%.*s", len, ss);
-        conv |= 1<<i;
-      }
-    } else error_exit("bad arg %s", arg);
+    } else if (strstart(&arg, "conv=")) {
+      parse_flags("conv", arg, dd_conv, ARRAY_LEN(dd_conv), &TT.conv);
+      fprintf(stderr, "conv=%x\n", TT.conv);
+    } else if (strstart(&arg, "iflag="))
+      parse_flags("iflag", arg, dd_iflag, ARRAY_LEN(dd_iflag), &TT.iflag);
+    else if (strstart(&arg, "oflag="))
+      parse_flags("oflag", arg, dd_oflag, ARRAY_LEN(dd_oflag), &TT.oflag);
+    else error_exit("bad arg %s", arg);
   }
   if (bs) TT.in.sz = TT.out.sz = bs;
 
-  signal(SIGINT, generic_signal);
+  signal(SIGINT, dd_sigint);
   signal(SIGUSR1, generic_signal);
   gettimeofday(&TT.start, NULL);
 
-  /* for bs=, in/out is done as it is. so only in.sz is enough.
-   * With Single buffer there will be overflow in a read following partial read
-   */
+  // For bs=, in/out is done as it is. so only in.sz is enough.
+  // With Single buffer there will be overflow in a read following partial read.
   TT.in.buff = TT.out.buff = xmalloc(TT.in.sz + (bs ? 0 : TT.out.sz));
   TT.in.bp = TT.out.bp = TT.in.buff;
-  //setup input
+
   if (!TT.in.name) TT.in.name = "stdin";
   else TT.in.fd = xopenro(TT.in.name);
 
-  if (conv&C_NOTRUNC) trunc = 0;
+  if (TT.conv & _DD_conv_notrunc) trunc = 0;
 
-  //setup output
   if (!TT.out.name) {
     TT.out.name = "stdout";
     TT.out.fd = 1;
@@ -170,30 +196,43 @@
 
   // Implement skip=
   if (TT.in.offset) {
-    if (lseek(TT.in.fd, (off_t)(TT.in.offset * TT.in.sz), SEEK_CUR) < 0) {
-      while (TT.in.offset--) {
-        ssize_t n = read(TT.in.fd, TT.in.bp, TT.in.sz);
+    off_t off = TT.in.offset;
+
+    if (!(TT.iflag & _DD_iflag_skip_bytes)) off *= TT.in.sz;
+    if (lseek(TT.in.fd, off, SEEK_CUR) < 0) {
+      while (off > 0) {
+        int chunk = off < TT.in.sz ? off : TT.in.sz;
+        ssize_t n = read(TT.in.fd, TT.in.bp, chunk);
 
         if (n < 0) {
           perror_msg("%s", TT.in.name);
-          if (conv&C_NOERROR) status();
+          if (TT.conv & _DD_conv_noerror) status();
           else return;
         } else if (!n) {
           xprintf("%s: Can't skip\n", TT.in.name);
           return;
         }
+        off -= chunk;
       }
     }
   }
 
-  // seek/truncate as necessary. We handled position zero truncate with
-  // O_TRUNC on open, so output to /dev/null and such doesn't error.
-  if ((bs = TT.out.offset*TT.out.sz)) {
+  // Implement seek= and truncate as necessary. We handled position zero
+  // truncate with O_TRUNC on open, so output to /dev/null and such doesn't
+  // error.
+  bs = TT.out.offset;
+  if (!(TT.oflag & _DD_oflag_seek_bytes)) bs *= TT.out.sz;
+  if (bs) {
     xlseek(TT.out.fd, bs, SEEK_CUR);
     if (trunc && ftruncate(TT.out.fd, bs)) perror_exit("ftruncate");
   }
 
-  while (TT.c_count==ULLONG_MAX || (TT.in_full + TT.in_part) < TT.c_count) {
+  unsigned long long bytes_left = TT.c_count;
+  if (TT.c_count != ULLONG_MAX && !(TT.iflag & _DD_iflag_count_bytes)) {
+    bytes_left *= TT.in.sz;
+  }
+  while (bytes_left) {
+    int chunk = bytes_left < TT.in.sz ? bytes_left : TT.in.sz;
     ssize_t n;
 
     // Show progress and exit on SIGINT or just continue on SIGUSR1.
@@ -204,16 +243,16 @@
     }
 
     TT.in.bp = TT.in.buff + TT.in.count;
-    if (conv&C_SYNC) memset(TT.in.bp, 0, TT.in.sz);
-    if (!(n = read(TT.in.fd, TT.in.bp, TT.in.sz))) break;
-    if (n < 0) { 
+    if (TT.conv & _DD_conv_sync) memset(TT.in.bp, 0, TT.in.sz);
+    if (!(n = read(TT.in.fd, TT.in.bp, chunk))) break;
+    if (n < 0) {
       if (errno == EINTR) continue;
       //read error case.
       perror_msg("%s: read error", TT.in.name);
-      if (!(conv&C_NOERROR)) exit(1);
+      if (!(TT.conv & _DD_conv_noerror)) exit(1);
       status();
       xlseek(TT.in.fd, TT.in.sz, SEEK_CUR);
-      if (!(conv&C_SYNC)) continue;
+      if (!(TT.conv & _DD_conv_sync)) continue;
       // if SYNC, then treat as full block of nuls
       n = TT.in.sz;
     }
@@ -222,9 +261,10 @@
       TT.in.count += n;
     } else {
       TT.in_part++;
-      if (conv&C_SYNC) TT.in.count += TT.in.sz;
+      if (TT.conv & _DD_conv_sync) TT.in.count += TT.in.sz;
       else TT.in.count += n;
     }
+    bytes_left -= n;
 
     TT.out.count = TT.in.count;
     if (bs) {
@@ -239,7 +279,7 @@
     }
   }
   if (TT.out.count) write_out(1); //write any remaining input blocks
-  if ((conv&C_FSYNC) && fsync(TT.out.fd)<0)
+  if ((TT.conv & _DD_conv_fsync) && fsync(TT.out.fd)<0)
     perror_exit("%s: fsync", TT.out.name);
 
   close(TT.in.fd);
diff --git a/toys/posix/cp.c b/toys/posix/cp.c
index 5e7d99e..bba5480 100644
--- a/toys/posix/cp.c
+++ b/toys/posix/cp.c
@@ -377,9 +377,9 @@
   if (CFG_CP_PRESERVE && FLAG(preserve)) {
     char *pre = xstrdup(TT.c.preserve ? TT.c.preserve : "mot"), *s;
 
-    if (comma_scan(pre, "all", 1)) TT.pflags = ~0;
+    if (comma_remove(pre, "all")) TT.pflags = ~0;
     for (i=0; i<ARRAY_LEN(cp_preserve); i++)
-      if (comma_scan(pre, cp_preserve[i].name, 1)) TT.pflags |= 1<<i;
+      while (comma_remove(pre, cp_preserve[i].name)) TT.pflags |= 1<<i;
     if (*pre) {
 
       // Try to interpret as letters, commas won't set anything this doesn't.