sg_format: make '-FFF' bypass mode sense/select, add --mode=MP; trim trailing spaces in dStrHex() and friends; add examples/sg_tst_async

git-svn-id: https://svn.bingwo.ca/repos/sg3_utils/trunk@591 6180dd3e-e324-4e3e-922d-17de1ae2f315
diff --git a/ChangeLog b/ChangeLog
index 0fb180c..2868af9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,8 +2,14 @@
 some description at the top of its ".c" file. All utilities in the main
 directory have their own "man" pages. There is also a sg3_utils man page.
 
-Changelog for sg3_utils-1.40 [20140626] [svn: r590]
+Changelog for sg3_utils-1.40 [20140710] [svn: r591]
   - sg_copy_results: correct response length calculations
+  - sg_format: make '-FFF' bypass mode sense/select
+    - add --mode=MP to supply alternate mode page,
+      default remains read-write error recovery mpage
+  - sg_inq: expand Block limits VPD page output
+  - sg_lib: trim trailing spaces in dStrHex() and friends
+  - examples/sg_tst_async: new Linux sg test utility
 
 Changelog for sg3_utils-1.39 [20140612] [svn: r588]
   - sg_rep_zones: new utility for ZBC REPORT ZONES
diff --git a/README b/README
index 9d7cb8f..afc58cb 100644
--- a/README
+++ b/README
@@ -264,7 +264,7 @@
     sg_sat_chk_power, sg__sat_identify, sg__sat_phy_event,
     sg__sat_set_features, sg_sat_smart_rd_data, sg_simple1, sg_simple2,
     sg_simple3, sg_simple4, sg_simple5, sg_simple16, sg_tst_excl,
-    sg_tst_excl2 and sg_tst_excl3
+    sg_tst_excl2, sg_tst_excl3, sg_tst_context and sg_tst_async
 
 Also in that subdirectory is a script to test sg_persist, an example data
 file for sg_persist (called "transport_ids.txt") and an example data file for
@@ -403,4 +403,4 @@
 
 
 Douglas Gilbert
-12th June 2014
+10th July 2014
diff --git a/debian/changelog b/debian/changelog
index 8a22c00..768dac2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,7 +2,7 @@
 
   * New upstream version
 
- -- Douglas Gilbert <dgilbert@interlog.com>  Fri, 13 Jun 2014 12:00:00 -0400
+ -- Douglas Gilbert <dgilbert@interlog.com>  Thu, 10 Jul 2014 13:00:00 -0400
 
 sg3-utils (1.39-0.1) unstable; urgency=low
 
diff --git a/doc/sg3_utils.8 b/doc/sg3_utils.8
index 84a6676..e783362 100644
--- a/doc/sg3_utils.8
+++ b/doc/sg3_utils.8
@@ -1,4 +1,4 @@
-.TH SG3_UTILS "8" "June 2014" "sg3_utils\-1.40" SG3_UTILS
+.TH SG3_UTILS "8" "July 2014" "sg3_utils\-1.40" SG3_UTILS
 .SH NAME
 sg3_utils \- a package of utilities for sending SCSI commands
 .SH SYNOPSIS
diff --git a/doc/sg_format.8 b/doc/sg_format.8
index 13e0747..a62f5de 100644
--- a/doc/sg_format.8
+++ b/doc/sg_format.8
@@ -1,13 +1,13 @@
-.TH SG_FORMAT "8" "May 2014" "sg3_utils\-1.39" SG3_UTILS
+.TH SG_FORMAT "8" "July 2014" "sg3_utils\-1.40" SG3_UTILS
 .SH NAME
 sg_format \- format, resize or modify protection information of a SCSI disk
 .SH SYNOPSIS
 .B sg_format
 [\fI\-\-cmplst=\fR{0|1}] [\fI\-\-count=COUNT\fR] [\fI\-\-dcrt\fR]
 [\fI\-\-early\fR] [\fI\-\-fmtpinfo=FPI\fR] [\fI\-\-format\fR]
-[\fI\-\-help\fR] [\fI\-\-ip_def\fR] [\fI\-\-long\fR] [\fI\-\-pfu=PFU\fR]
-[\fI\-\-pie=PIE\fR] [\fI\-\-pinfo\fR] [\fI\-\-poll=PT\fR] [\fI\-\-resize\fR]
-[\fI\-\-rto_req\fR] [\fI\-\-security\fR] [\fI\-\-six\fR]
+[\fI\-\-help\fR] [\fI\-\-ip_def\fR] [\fI\-\-long\fR] [\fI\-\-mode=MP\fR]
+[\fI\-\-pfu=PFU\fR] [\fI\-\-pie=PIE\fR] [\fI\-\-pinfo\fR] [\fI\-\-poll=PT\fR]
+[\fI\-\-resize\fR] [\fI\-\-rto_req\fR] [\fI\-\-security\fR] [\fI\-\-six\fR]
 [\fI\-\-size=SIZE\fR] [\fI\-\-verbose\fR] [\fI\-\-version\fR]
 [\fI\-\-wait\fR] \fIDEVICE\fR
 .SH DESCRIPTION
@@ -134,8 +134,15 @@
 This option is required to change the block size of a disk. The user is given
 a 15 second count down to ponder the wisdom of doing this, during which time
 control\-C (amongst other Unix commands) can be used to kill this process
-before it does any damage. See NOTES section for implementation details and
-EXAMPLES section for typical use.
+before it does any damage.
+.br
+When used three times (or more) the preliminary MODE SENSE and SELECT
+commands are bypassed, leaving only the initial INQUIRY and FORMAT UNIT
+commands. This is for emergency use (e.g. when the MODE SENSE/SELECT
+commands are not working) and cannot change the logical block size.
+.br
+See NOTES section for implementation details and EXAMPLES section for typical
+use.
 .TP
 \fB\-h\fR, \fB\-\-help\fR
 print out the usage information then exit.
@@ -160,6 +167,11 @@
 command. The LONGLIST bit is set as required depending other
 parameters (e.g. when '\-\-pie=PIE' is greater than zero).
 .TP
+\fB\-M\fR, \fB\-\-mode\fR=\fIMP\fR
+\fIMP\fR is a mode page number (0 to 62 inclusive) that will be used for
+reading and perhaps changing the device logical block size. The default
+is 1 which is the Read\-Write Error Recovery mode page.
+.TP
 \fB\-P\fR, \fB\-\-pfu\fR=\fIPFU\fR
 sets the "Protection Field Usage" field in the parameter block associated
 with a FORMAT UNIT command to \fIPFU\fR. The default value is 0, the only
@@ -251,7 +263,7 @@
 defects that can be given to the format operation. The GLIST is the grown
 list which starts in the format process as CLIST+DLIST and can "grow" later
 due to automatic reallocation (see the ARRE and AWRE bits in the
-read\-write error recovery mode page (see sdparm(8))) and use of the
+Read\-Write Error Recovery mode page (see sdparm(8))) and use of the
 SCSI REASSIGN BLOCKS command (see sg_reassign(8)).
 .PP
 The CMPLST bit (controlled by the \fI\-\-cmplst=\fR0|1 option) determines
diff --git a/doc/sg_inq.8 b/doc/sg_inq.8
index 504298e..c9d3262 100644
--- a/doc/sg_inq.8
+++ b/doc/sg_inq.8
@@ -1,4 +1,4 @@
-.TH SG_INQ "8" "May 2014" "sg3_utils\-1.39" SG3_UTILS
+.TH SG_INQ "8" "July 2014" "sg3_utils\-1.40" SG3_UTILS
 .SH NAME
 sg_inq \- issue SCSI INQUIRY command and/or decode its response
 .SH SYNOPSIS
diff --git a/examples/README b/examples/README
index b26d8da..0a17fb6 100644
--- a/examples/README
+++ b/examples/README
@@ -16,4 +16,4 @@
 gcc/g++ compiler of 4.7.3 vintage or later will be required.
 
 Douglas Gilbert
-21st October 2013
+10th July 2014
diff --git a/examples/sg_tst_async.cpp b/examples/sg_tst_async.cpp
index ee834c5..c69446d 100644
--- a/examples/sg_tst_async.cpp
+++ b/examples/sg_tst_async.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Douglas Gilbert.
+ * Copyright (c) 2014 Douglas Gilbert.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,115 +28,157 @@
 
 #include <iostream>
 #include <vector>
+#include <map>
+#include <list>
 #include <system_error>
 #include <thread>
 #include <mutex>
 #include <chrono>
+#include <atomic>
 
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <poll.h>
 #include <errno.h>
 #include <ctype.h>
+#include <time.h>
+#define __STDC_FORMAT_MACROS 1
+#include <inttypes.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include "sg_lib.h"
 #include "sg_io_linux.h"
 
-static const char * version_str = "1.00 20140614";
+static const char * version_str = "1.00 20140710";
 static const char * util_name = "sg_tst_async";
 
 /* This is a test program for checking the async usage of the Linux sg
- * driver.
- * multiple threads and can be run as multiple processes and attempts
- * to "break" O_EXCL. The strategy is to open a device O_EXCL|O_NONBLOCK
- * and do a double increment on a LB then close it. Prior to the first
- * increment, the value is checked for even or odd. Assuming the count
- * starts as an even (typically 0) then it should remain even. Odd instances
- * are counted and reported at the end of the program, after all threads
- * have completed.
+ * driver. Each thread opens 1 file descriptor to the sg device and then
+ * starts up to 16 commands while checking with the poll command for
+ * the completion of those commands. Each command has a unique "pack_id"
+ * which is a sequence starting at 1. Either TEST UNIT UNIT, READ(16)
+ * or WRITE(16) commands are issued.
  *
  * This is C++ code with some things from C++11 (e.g. threads) and was
  * only just able to compile (when some things were reverted) with gcc/g++
  * version 4.7.3 found in Ubuntu 13.04 . C++11 "feature complete" support
- * was not available until g++ version 4.8.1 and that is only currently
- * found in Fedora 19 .
+ * was not available until g++ version 4.8.1 . It should build okay on
+ * recent distributions.
  *
  * The build uses various object files from the <sg3_utils>/lib directory
  * which is assumed to be a sibling of this examples directory. Those
  * object files in the lib directory can be built with:
  *   cd <sg3_utils> ; ./configure ; cd lib; make
- * Then to build sg_tst_excl concatenate the next 3 lines:
+ * Then to build sg_tst_async concatenate the next 3 lines:
  *   g++ -Wall -std=c++11 -pthread -I ../include ../lib/sg_lib.o
- *     ../lib/sg_lib_data.o ../lib/sg_io_linux.o -o sg_tst_excl
- *     sg_tst_excl.cpp
+ *     ../lib/sg_lib_data.o ../lib/sg_io_linux.o -o sg_tst_async
+ *     sg_tst_async.cpp
+ * or use the C++ Makefile in that directory:
+ *   make -f Makefile.cplus sg_tst_async
  *
- * Currently this utility is Linux only and assumes the SG_IO v3 interface
- * which is supported by sg and block devices (but not bsg devices which
- * require the SG_IO v4 interface). This restriction is relaxed in the
- * sg_tst_excl2 variant of this utility.
+ * Currently this utility is Linux only and uses the sg driver. The bsg
+ * driver is known to be broken (it doesn't match responses to the
+ * correct file descriptor that requested them) so this utility won't
+ * be extended to bsg until that if fixed.
  *
- * BEWARE: this utility modifies a logical block (default LBA 1000) on the
- * given device.
+ * BEWARE: this utility will modify a logical block (default LBA 1000) on the
+ * given device when the '-W' option is given.
  *
  */
 
 using namespace std;
 using namespace std::chrono;
 
-#define DEF_NUM_PER_THREAD 200
+#define DEF_NUM_PER_THREAD 1000
 #define DEF_NUM_THREADS 4
-#define DEF_WAIT_MS 0          /* 0: yield; -1: don't wait; -2: sleep(0) */
+#define DEF_WAIT_MS 10          /* 0: yield; -1: don't wait; -2: sleep(0) */
+#define DEF_TIMEOUT_MS 20000    /* 20 seconds */
+#define DEF_LB_SZ 512
+#define DEF_BLOCKING 0
+#define DEF_DIRECT 0
+#define DEF_NO_XFER 0
+
+#define Q_PER_FD 16
+
+#ifndef SG_FLAG_Q_AT_TAIL
+#define SG_FLAG_Q_AT_TAIL 0x10
+#endif
+#ifndef SG_FLAG_Q_AT_HEAD
+#define SG_FLAG_Q_AT_HEAD 0x20
+#endif
 
 
 #define DEF_LBA 1000
 
 #define EBUFF_SZ 256
 
-static mutex odd_count_mutex;
 static mutex console_mutex;
-static unsigned int odd_count;
-static unsigned int ebusy_count;
-static unsigned int eagain_count;
+static atomic<int> async_starts(0);
+static atomic<int> async_finishes(0);
+static atomic<int> ebusy_count(0);
+static atomic<int> eagain_count(0);
+static atomic<int> uniq_pack_id(1);
+
+static int page_size = 4096;   /* rough guess, will ask sysconf() */
+
+enum command2execute {SCSI_TUR, SCSI_READ16, SCSI_WRITE16};
+enum blkQDiscipline {BQ_DEFAULT, BQ_AT_HEAD, BQ_AT_TAIL};
+
+struct opts_t {
+    const char * dev_name;
+    bool direct;
+    int num_per_thread;
+    bool block;
+    uint64_t lba;
+    int lb_sz;
+    bool no_xfer;
+    int verbose;
+    int wait_ms;
+    command2execute c2e;
+    blkQDiscipline bqd;
+};
 
 
 static void
 usage(void)
 {
-    printf("Usage: %s [-b] [-f] [-h] [-l <lba>] [-n <n_per_thr>] "
-           "[-t <num_thrs>]\n"
-           "                   [-V] [-w <wait_ms>] [-x] [-xx] "
-           "<sg_disk_device>\n", util_name);
+    printf("Usage: %s [-d] [-f] [-h] [-l <lba>] [-n <n_per_thr>] [-N]\n"
+           "                    [-q 0|1] [-R] [-s <lb_sz>] [-t <num_thrs>] "
+           "[-T]\n"
+           "                    [-v] [-V] [-w <wait_ms>] [-W] "
+           "<sg_disk_device>\n",
+           util_name);
     printf("  where\n");
-    printf("    -b                block on open (def: O_NONBLOCK)\n");
-    printf("    -f                force: any SCSI disk (def: only "
-           "scsi_debug)\n");
-    printf("                      WARNING: <lba> written to\n");
+    printf("    -d                do direct_io (def: indirect)\n");
+    printf("    -f                force: any sg device (def: only scsi_debug "
+           "owned)\n");
+    printf("                      WARNING: <lba> written to if '-W' given\n");
     printf("    -h                print this usage message then exit\n");
-    printf("    -l <lba>          logical block to increment (def: %u)\n",
+    printf("    -l <lba>          logical block to access (def: %u)\n",
            DEF_LBA);
-    printf("    -n <n_per_thr>    number of loops per thread "
+    printf("    -n <n_per_thr>    number of commands per thread "
            "(def: %d)\n", DEF_NUM_PER_THREAD);
+    printf("    -N                no data xfer (def: xfer on READ and "
+           "WRITE)\n");
+    printf("    -q 0|1            0: blk q_at_head; 1: q_at_tail\n");
+    printf("    -s <lb_sz>        logical block size (def: 512)\n");
+    printf("    -R                do READs (def: TUR)\n");
     printf("    -t <num_thrs>     number of threads (def: %d)\n",
            DEF_NUM_THREADS);
+    printf("    -T                do TEST UNIT READYs (default is TURs)\n");
+    printf("    -v                increase verbosity\n");
     printf("    -V                print version number then exit\n");
-    printf("    -w <wait_ms>      >0: sleep_for(<wait_ms>); =0: "
-           "yield(); -1: no\n"
-           "                      wait; -2: sleep(0)  (def: %d)\n",
-           DEF_WAIT_MS);
-    printf("    -x                don't use O_EXCL on first thread "
-           "(def: use\n"
-           "                      O_EXCL on all threads)\n"
-           "    -xx               don't use O_EXCL on any thread\n\n");
-    printf("Test O_EXCL open flag with Linux sg driver. Each open/close "
-           "cycle with the\nO_EXCL flag does a double increment on "
-           "lba (using its first 4 bytes).\nEach increment uses a READ_16, "
-           "READ_16, increment, WRITE_16 cycle. The two\nREAD_16s are "
-           "launched asynchronously. Note that '-xx' will run test\n"
-           "without any O_EXCL flags.\n");
+    printf("    -w <wait_ms>      >0: poll(<wait_ms>); =0: poll(0); (def: "
+           "%d)\n", DEF_WAIT_MS);
+    printf("    -W                do WRITEs (def: TUR)\n\n");
+    printf("Multiple threads do READ(16), WRITE(16) or TEST UNIT READY "
+           "(TUR) SCSI\ncommands. Each thread has its own file descriptor "
+           "and queues up to\n16 commands. One block is transferred by "
+           "each READ and WRITE; zeros\nare written.\n");
 }
 
 
@@ -146,9 +188,10 @@
 #define WRITE16_REPLY_LEN 512
 #define WRITE16_CMD_LEN 16
 
+/* Returns 0 if command injected okay, else -1 */
 static int
-start_sg3_cmd(int sg_fd, int tur0_rd1_wr2, int pack_id, unsigned int lba,
-              unsigned char * lbp, int xfer_bytes)
+start_sg3_cmd(int sg_fd, command2execute cmd2exe, int pack_id, uint64_t lba,
+              unsigned char * lbp, int xfer_bytes, int flags)
 {
     struct sg_io_hdr pt;
     unsigned char turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
@@ -160,15 +203,21 @@
     const char * np;
 
     memset(&pt, 0, sizeof(pt));
-    switch (tur0_rd1_wr2) {
-    case 0:
+    switch (cmd2exe) {
+    case SCSI_TUR:
         np = "TEST UNIT READY";
         pt.cmdp = turCmdBlk;
         pt.cmd_len = sizeof(turCmdBlk);
         pt.dxfer_direction = SG_DXFER_NONE;
         break;
-    case 1:
+    case SCSI_READ16:
         np = "READ(16)";
+        if (lba > 0xffffffff) {
+            r16CmdBlk[2] = (lba >> 56) & 0xff;
+            r16CmdBlk[3] = (lba >> 48) & 0xff;
+            r16CmdBlk[4] = (lba >> 40) & 0xff;
+            r16CmdBlk[5] = (lba >> 32) & 0xff;
+        }
         r16CmdBlk[6] = (lba >> 24) & 0xff;
         r16CmdBlk[7] = (lba >> 16) & 0xff;
         r16CmdBlk[8] = (lba >> 8) & 0xff;
@@ -179,8 +228,14 @@
         pt.dxferp = lbp;
         pt.dxfer_len = xfer_bytes;
         break;
-    case 2:
+    case SCSI_WRITE16:
         np = "WRITE(16)";
+        if (lba > 0xffffffff) {
+            w16CmdBlk[2] = (lba >> 56) & 0xff;
+            w16CmdBlk[3] = (lba >> 48) & 0xff;
+            w16CmdBlk[4] = (lba >> 40) & 0xff;
+            w16CmdBlk[5] = (lba >> 32) & 0xff;
+        }
         w16CmdBlk[6] = (lba >> 24) & 0xff;
         w16CmdBlk[7] = (lba >> 16) & 0xff;
         w16CmdBlk[8] = (lba >> 8) & 0xff;
@@ -191,87 +246,50 @@
         pt.dxferp = lbp;
         pt.dxfer_len = xfer_bytes;
         break;
-    default:
-        console_mutex.lock();
-        cerr << __func__ << ": unknown tur0_rd1_wr2=" << tur0_rd1_wr2 << endl;
-        console_mutex.unlock();
-        return -99;
     }
     pt.interface_id = 'S';
     pt.mx_sb_len = sizeof(sense_buffer);
     pt.sbp = sense_buffer;      /* ignored .... */
-    pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
+    pt.timeout = DEF_TIMEOUT_MS;
     pt.pack_id = pack_id;
+    pt.flags = flags;
 
-    // queue up two READ_16s to same LBA
     if (write(sg_fd, &pt, sizeof(pt)) < 0) {
         console_mutex.lock();
         cerr << __func__ << ": " << np << " pack_id=" << pack_id;
         perror(" write(sg)");
         console_mutex.unlock();
-        close(sg_fd);
         return -1;
     }
     return 0;
 }
 
 static int
-finish_sg3_cmd(int sg_fd, int tur0_rd1_wr2, int & pack_id, unsigned int lba,
-              unsigned char * lbp, int xfer_bytes, int wait_ms, int & eagains)
+finish_sg3_cmd(int sg_fd, command2execute cmd2exe, int & pack_id, int wait_ms,
+               unsigned int & eagains)
 {
     int ok, res;
     struct sg_io_hdr pt;
-    unsigned char turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
-    unsigned char r16CmdBlk[READ16_CMD_LEN] =
-                {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
-    unsigned char w16CmdBlk[WRITE16_CMD_LEN] =
-                {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
     unsigned char sense_buffer[64];
-    const char * np;
+    const char * np = NULL;
 
     memset(&pt, 0, sizeof(pt));
-    switch (tur0_rd1_wr2) {
-    case 0:
+    switch (cmd2exe) {
+    case SCSI_TUR:
         np = "TEST UNIT READY";
-        pt.cmdp = turCmdBlk;
-        pt.cmd_len = sizeof(turCmdBlk);
-        pt.dxfer_direction = SG_DXFER_NONE;
         break;
-    case 1:
+    case SCSI_READ16:
         np = "READ(16)";
-        r16CmdBlk[6] = (lba >> 24) & 0xff;
-        r16CmdBlk[7] = (lba >> 16) & 0xff;
-        r16CmdBlk[8] = (lba >> 8) & 0xff;
-        r16CmdBlk[9] = lba & 0xff;
-        pt.cmdp = r16CmdBlk;
-        pt.cmd_len = sizeof(r16CmdBlk);
-        pt.dxfer_direction = SG_DXFER_FROM_DEV;
-        pt.dxferp = lbp;
-        pt.dxfer_len = xfer_bytes;
         break;
-    case 2:
+    case SCSI_WRITE16:
         np = "WRITE(16)";
-        w16CmdBlk[6] = (lba >> 24) & 0xff;
-        w16CmdBlk[7] = (lba >> 16) & 0xff;
-        w16CmdBlk[8] = (lba >> 8) & 0xff;
-        w16CmdBlk[9] = lba & 0xff;
-        pt.cmdp = w16CmdBlk;
-        pt.cmd_len = sizeof(w16CmdBlk);
-        pt.dxfer_direction = SG_DXFER_TO_DEV;
-        pt.dxferp = lbp;
-        pt.dxfer_len = xfer_bytes;
         break;
-    default:
-        console_mutex.lock();
-        cerr << __func__ << ": unknown tur0_rd1_wr2=" << tur0_rd1_wr2 << endl;
-        console_mutex.unlock();
-        return -99;
     }
     pt.interface_id = 'S';
     pt.mx_sb_len = sizeof(sense_buffer);
     pt.sbp = sense_buffer;
-    pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
-    pt.pack_id = pack_id;
+    pt.timeout = DEF_TIMEOUT_MS;
+    pt.pack_id = 0;
 
     while (((res = read(sg_fd, &pt, sizeof(pt))) < 0) &&
            (EAGAIN == errno)) {
@@ -287,10 +305,10 @@
         console_mutex.lock();
         perror("do_rd_inc_wr_twice: read(sg, READ_16)");
         console_mutex.unlock();
-        close(sg_fd);
         return -1;
     }
     /* now for the error processing */
+    pack_id = pt.pack_id;
     ok = 0;
     switch (sg_err_category3(&pt)) {
     case SG_LIB_CAT_CLEAN:
@@ -299,7 +317,7 @@
     case SG_LIB_CAT_RECOVERED:
         console_mutex.lock();
         fprintf(stderr, "%s: Recovered error on %s, continuing\n",
-		__func__, np);
+                __func__, np);
         console_mutex.unlock();
         ok = 1;
         break;
@@ -312,230 +330,195 @@
     return ok ? 0 : -1;
 }
 
-/* Opens dev_name and spins if busy (i.e. gets EBUSY), sleeping for
- * wait_ms milliseconds if wait_ms is positive.
- * Reads lba (twice) and treats the first 4 bytes as an int (SCSI endian),
- * increments it and writes it back. Repeats so that happens twice. Then
- * closes dev_name. If an error occurs returns -1 else returns 0 if
- * first int read from lba is even otherwise returns 1. */
-static int
-do_rd_inc_wr_twice(const char * dev_name, unsigned int lba, int block,
-                   int excl, int wait_ms, int id, unsigned int & ebusy,
-                   unsigned int & eagains)
+/* Should have page alignment if direct_io chosen */
+static unsigned char *
+get_aligned_heap(int bytes_at_least)
 {
-    int k, sg_fd, ok, res;
-    int odd = 0;
-    unsigned int u = 0;
-    struct sg_io_hdr pt, pt2;
-    unsigned char r16CmdBlk [READ16_CMD_LEN] =
-                {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
-    unsigned char w16CmdBlk [WRITE16_CMD_LEN] =
-                {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
-    unsigned char sense_buffer[64];
-    unsigned char lb[READ16_REPLY_LEN];
-    char ebuff[EBUFF_SZ];
+    int n;
+    void * wp;
+
+    if (bytes_at_least < page_size)
+        n = page_size;
+    else
+        n = bytes_at_least;
+#if 1
+    int err = posix_memalign(&wp, page_size, n);
+    if (err) {
+        console_mutex.lock();
+        fprintf(stderr, "posix_memalign: error [%d] out of memory?\n", err);
+        console_mutex.unlock();
+        return NULL;
+    }
+    memset(wp, 0, n);
+    return (unsigned char *)wp;
+#else
+    if (n == page_size) {
+        wp = calloc(page_size, 1);
+        memset(wp, 0, n);
+        return (unsigned char *)wp;
+    } else {
+        console_mutex.lock();
+        fprintf(stderr, "get_aligned_heap: too fiddly to align, choose "
+                "smaller lb_sz\n");
+        console_mutex.unlock();
+        return NULL;
+    }
+#endif
+}
+
+static void
+work_thread(int id, struct opts_t * op)
+{
+    int thr_async_starts = 0;
+    int thr_async_finishes = 0;
+    unsigned int thr_eagain_count = 0;
+    int k, res, sg_fd, num_outstanding, do_inc, num, pack_id, sg_flags;
     int open_flags = O_RDWR;
+    char ebuff[EBUFF_SZ];
+    unsigned char * lbp;
+    const char * err = NULL;
+    struct pollfd  pfd;
+    list<unsigned char *> free_lst;
+    map<int, unsigned char *> pi_map;
 
-    r16CmdBlk[6] = w16CmdBlk[6] = (lba >> 24) & 0xff;
-    r16CmdBlk[7] = w16CmdBlk[7] = (lba >> 16) & 0xff;
-    r16CmdBlk[8] = w16CmdBlk[8] = (lba >> 8) & 0xff;
-    r16CmdBlk[9] = w16CmdBlk[9] = lba & 0xff;
-    if (! block)
+    if (op->verbose) {
+        console_mutex.lock();
+        cerr << "Enter work_thread id=" << id << endl;
+        console_mutex.unlock();
+    }
+    if (! op->block)
         open_flags |= O_NONBLOCK;
-    if (excl)
-        open_flags |= O_EXCL;
 
-    while (((sg_fd = open(dev_name, open_flags)) < 0) &&
-           (EBUSY == errno)) {
-        ++ebusy;
-        if (wait_ms > 0)
-            this_thread::sleep_for(milliseconds{wait_ms});
-        else if (0 == wait_ms)
-            this_thread::yield();
-        else if (-2 == wait_ms)
-            sleep(0);                   // process yield ??
-    }
+    sg_fd = open(op->dev_name, open_flags);
     if (sg_fd < 0) {
-        snprintf(ebuff, EBUFF_SZ,
-                 "do_rd_inc_wr_twice: error opening file: %s", dev_name);
+        snprintf(ebuff, EBUFF_SZ, "%s: id=%d, error opening file: %s",
+                 __func__, id, op->dev_name);
+        console_mutex.lock();
         perror(ebuff);
-        return -1;
+        console_mutex.unlock();
+        return;
+    }
+    pfd.fd = sg_fd;
+    pfd.events = POLLIN;
+    sg_flags = 0;
+    if (BQ_AT_TAIL == op->bqd)
+        sg_flags |= SG_FLAG_Q_AT_TAIL;
+    else if (BQ_AT_HEAD == op->bqd)
+        sg_flags |= SG_FLAG_Q_AT_HEAD;
+    if (op->direct)
+        sg_flags |= SG_FLAG_DIRECT_IO;
+    if (op->no_xfer)
+        sg_flags |= SG_FLAG_NO_DXFER;
+    if (op->verbose > 1) {
+        console_mutex.lock();
+        fprintf(stderr, "sg_flags=0x%x, %s cmd\n", sg_flags,
+                ((SCSI_TUR != op->c2e) ? "TUR": "IO"));
+        console_mutex.unlock();
     }
 
-    for (k = 0; k < 2; ++k) {
-        /* Prepare READ_16 command */
-        memset(&pt, 0, sizeof(pt));
-        pt.interface_id = 'S';
-        pt.cmd_len = sizeof(r16CmdBlk);
-        pt.mx_sb_len = sizeof(sense_buffer);
-        pt.dxfer_direction = SG_DXFER_FROM_DEV;
-        pt.dxfer_len = READ16_REPLY_LEN;
-        pt.dxferp = lb;
-        pt.cmdp = r16CmdBlk;
-        pt.sbp = sense_buffer;
-        pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
-        pt.pack_id = id;
-
-        // queue up two READ_16s to same LBA
-        if (write(sg_fd, &pt, sizeof(pt)) < 0) {
-            console_mutex.lock();
-            perror("do_rd_inc_wr_twice: write(sg, READ_16)");
-            console_mutex.unlock();
-            close(sg_fd);
-            return -1;
-        }
-        pt2 = pt;
-        if (write(sg_fd, &pt2, sizeof(pt2)) < 0) {
-            console_mutex.lock();
-            perror("do_rd_inc_wr_twice: write(sg, READ_16) 2");
-            console_mutex.unlock();
-            close(sg_fd);
-            return -1;
-        }
-
-        while (((res = read(sg_fd, &pt, sizeof(pt))) < 0) &&
-               (EAGAIN == errno)) {
-            ++eagains;
-            if (wait_ms > 0)
-                this_thread::sleep_for(milliseconds{wait_ms});
-            else if (0 == wait_ms)
-                this_thread::yield();
-            else if (-2 == wait_ms)
-                sleep(0);                   // process yield ??
-        }
-        if (res < 0) {
-            console_mutex.lock();
-            perror("do_rd_inc_wr_twice: read(sg, READ_16)");
-            console_mutex.unlock();
-            close(sg_fd);
-            return -1;
-        }
-        /* now for the error processing */
-        ok = 0;
-        switch (sg_err_category3(&pt)) {
-        case SG_LIB_CAT_CLEAN:
-            ok = 1;
-            break;
-        case SG_LIB_CAT_RECOVERED:
-            console_mutex.lock();
-            fprintf(stderr, "Recovered error on READ_16, continuing\n");
-            console_mutex.unlock();
-            ok = 1;
-            break;
-        default: /* won't bother decoding other categories */
-            console_mutex.lock();
-            sg_chk_n_print3("READ_16 command error", &pt, 1);
-            console_mutex.unlock();
-            break;
-        }
-        if (ok) {
-            while (((res = read(sg_fd, &pt2, sizeof(pt2))) < 0) &&
-                   (EAGAIN == errno)) {
-                ++eagains;
-                if (wait_ms > 0)
-                    this_thread::sleep_for(milliseconds{wait_ms});
-                else if (0 == wait_ms)
-                    this_thread::yield();
-                else if (-2 == wait_ms)
-                    sleep(0);                   // process yield ??
+    num = op->num_per_thread;
+    for (k = 0, num_outstanding = 0; (k < num) || num_outstanding;
+         k = do_inc ? k + 1 : k) {
+        do_inc = 0;
+        if ((num_outstanding < Q_PER_FD) && (k < num)) {
+            do_inc = 1;
+            pack_id = uniq_pack_id.fetch_add(1);
+            if (SCSI_TUR != op->c2e) {
+                if (free_lst.empty()) {
+                    lbp = get_aligned_heap(op->lb_sz);
+                    if (NULL == lbp) {
+                        err = "out of memory";
+                        break;
+                    }
+                } else {
+                    lbp = free_lst.back();
+                    free_lst.pop_back();
+                }
+            } else
+                lbp = NULL;
+            if (start_sg3_cmd(sg_fd, op->c2e, pack_id, op->lba, lbp,
+                              op->lb_sz, sg_flags)) {
+                err = "start_sg3_cmd() failed";
+                break;
             }
+            ++thr_async_starts;
+            ++num_outstanding;
+            pi_map[pack_id] = lbp;
+            /* check if any responses, don't wait */
+            res = poll(&pfd, 1, 0);
             if (res < 0) {
-                console_mutex.lock();
-                perror("do_rd_inc_wr_twice: read(sg, READ_16) 2");
-                console_mutex.unlock();
-                close(sg_fd);
-                return -1;
+                err = "poll(0) failed";
+                break;
             }
-            pt = pt2;
-            /* now for the error processing */
-            ok = 0;
-            switch (sg_err_category3(&pt)) {
-            case SG_LIB_CAT_CLEAN:
-                ok = 1;
-                break;
-            case SG_LIB_CAT_RECOVERED:
-                console_mutex.lock();
-                fprintf(stderr, "Recovered error on READ_16, continuing 2\n");
-                console_mutex.unlock();
-                ok = 1;
-                break;
-            default: /* won't bother decoding other categories */
-                console_mutex.lock();
-                sg_chk_n_print3("READ_16 command error 2", &pt, 1);
-                console_mutex.unlock();
+        } else {
+            /* check if any responses, wait as requested */
+            res = poll(&pfd, 1, ((op->wait_ms > 0) ? op->wait_ms : 0));
+            if (res < 0) {
+                err = "poll(wait_ms) failed";
                 break;
             }
         }
-        if (! ok) {
-            close(sg_fd);
-            return -1;
-        }
+        if (0 == res)
+            continue;
+        while (res-- > 0) {
+            if (finish_sg3_cmd(sg_fd, op->c2e, pack_id, op->wait_ms,
+                               thr_eagain_count)) {
+                err = "finish_sg3_cmd() failed";
+                break;
+            }
+            ++thr_async_finishes;
+            --num_outstanding;
+            auto p = pi_map.find(pack_id);
 
-        u = (lb[0] << 24) + (lb[1] << 16) + (lb[2] << 8) + lb[3];
-        if (0 == k)
-            odd = (1 == (u % 2));
-        ++u;
-        lb[0] = (u >> 24) & 0xff;
-        lb[1] = (u >> 16) & 0xff;
-        lb[2] = (u >> 8) & 0xff;
-        lb[3] = u & 0xff;
-
-        if (wait_ms > 0)       /* allow daylight for bad things ... */
-            this_thread::sleep_for(milliseconds{wait_ms});
-        else if (0 == wait_ms)
-            this_thread::yield();
-        else if (-2 == wait_ms)
-            sleep(0);                   // process yield ??
-
-        /* Prepare WRITE_16 command */
-        memset(&pt, 0, sizeof(pt));
-        pt.interface_id = 'S';
-        pt.cmd_len = sizeof(w16CmdBlk);
-        pt.mx_sb_len = sizeof(sense_buffer);
-        pt.dxfer_direction = SG_DXFER_TO_DEV;
-        pt.dxfer_len = WRITE16_REPLY_LEN;
-        pt.dxferp = lb;
-        pt.cmdp = w16CmdBlk;
-        pt.sbp = sense_buffer;
-        pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
-        pt.pack_id = id;
-
-        if (ioctl(sg_fd, SG_IO, &pt) < 0) {
-            console_mutex.lock();
-            perror("do_rd_inc_wr_twice: WRITE_16 SG_IO ioctl error");
-            console_mutex.unlock();
-            close(sg_fd);
-            return -1;
-        }
-        /* now for the error processing */
-        ok = 0;
-        switch (sg_err_category3(&pt)) {
-        case SG_LIB_CAT_CLEAN:
-            ok = 1;
-            break;
-        case SG_LIB_CAT_RECOVERED:
-            console_mutex.lock();
-            fprintf(stderr, "Recovered error on WRITE_16, continuing\n");
-            console_mutex.unlock();
-            ok = 1;
-            break;
-        default: /* won't bother decoding other categories */
-            console_mutex.lock();
-            sg_chk_n_print3("WRITE_16 command error", &pt, 1);
-            console_mutex.unlock();
-            break;
-        }
-        if (! ok) {
-            close(sg_fd);
-            return -1;
+            if (p == pi_map.end()) {
+                snprintf(ebuff, sizeof(ebuff), "pack_id=%d from "
+                         "finish_sg3_cmd() not found\n", pack_id);
+                err = ebuff;
+                break;
+            } else {
+                lbp = p->second;
+                pi_map.erase(p);
+                if (lbp)
+                    free_lst.push_front(lbp);
+            }
         }
     }
     close(sg_fd);
-    return odd;
+    if (err || (k < num) || (op->verbose > 0)) {
+        console_mutex.lock();
+        if (k < num) {
+            cerr << "thread id=" << id << " FAILed at iteration: " << k;
+            if (err)
+                cerr << " Reason: " << err << endl;
+            else
+                cerr << endl;
+        } else {
+            if (err)
+                cerr << "thread id=" << id << " FAILed on last, " <<
+                        "Reason: " << err << endl;
+            else
+                cerr << "thread id=" << id << " normal exit" << '\n';
+        }
+        console_mutex.unlock();
+    }
+    k = pi_map.size();
+    if (k > 0) {
+        console_mutex.lock();
+            cerr << "thread id=" << id << " Still " << k << " elements " <<
+                    "in pack_id map on exit" << endl;
+        console_mutex.unlock();
+    }
+    while (! free_lst.empty()) {
+        lbp = free_lst.back();
+        free_lst.pop_back();
+        if (lbp)
+            free(lbp);
+    }
+    async_starts += thr_async_starts;
+    async_finishes += thr_async_finishes;
+    eagain_count += thr_eagain_count;
 }
 
-
-
 #define INQ_REPLY_LEN 96
 #define INQ_CMD_LEN 6
 
@@ -543,8 +526,7 @@
  * in b (up to m_blen bytes). Does not use O_EXCL flag. Returns 0 on success,
  * else -1 . */
 static int
-do_inquiry_prod_id(const char * dev_name, int block, int wait_ms,
-                   unsigned int & ebusys, char * b, int b_mlen)
+do_inquiry_prod_id(const char * dev_name, int block, char * b, int b_mlen)
 {
     int sg_fd, ok, ret;
     struct sg_io_hdr pt;
@@ -557,16 +539,7 @@
 
     if (! block)
         open_flags |= O_NONBLOCK;
-    while (((sg_fd = open(dev_name, open_flags)) < 0) &&
-           (EBUSY == errno)) {
-        ++ebusys;
-        if (wait_ms > 0)
-            this_thread::sleep_for(milliseconds{wait_ms});
-        else if (0 == wait_ms)
-            this_thread::yield();
-        else if (-2 == wait_ms)
-            sleep(0);                   // process yield ??
-    }
+    sg_fd = open(dev_name, open_flags);
     if (sg_fd < 0) {
         snprintf(ebuff, EBUFF_SZ,
                  "do_inquiry_prod_id: error opening file: %s", dev_name);
@@ -627,59 +600,37 @@
     return ret;
 }
 
-static void
-work_thread(const char * dev_name, unsigned int lba, int id, int block,
-            int excl, int num, int wait_ms)
-{
-    unsigned int thr_odd_count = 0;
-    unsigned int thr_ebusy_count = 0;
-    unsigned int thr_eagain_count = 0;
-    int k, res;
-
-    console_mutex.lock();
-    cerr << "Enter work_thread id=" << id << " excl=" << excl << " block="
-         << block << endl;
-    console_mutex.unlock();
-    for (k = 0; k < num; ++k) {
-        res = do_rd_inc_wr_twice(dev_name, lba, block, excl, wait_ms, k,
-                                 thr_ebusy_count, thr_eagain_count);
-        if (res < 0)
-            break;
-        if (res)
-            ++thr_odd_count;
-    }
-    console_mutex.lock();
-    if (k < num)
-        cerr << "thread id=" << id << " FAILed at iteration: " << k << '\n';
-    else
-        cerr << "thread id=" << id << " normal exit" << '\n';
-    console_mutex.unlock();
-
-    odd_count_mutex.lock();
-    odd_count += thr_odd_count;
-    ebusy_count += thr_ebusy_count;
-    eagain_count += thr_eagain_count;
-    odd_count_mutex.unlock();
-}
-
 
 int
 main(int argc, char * argv[])
 {
-    int k, res;
-    int block = 0;
+    int k, n, res;
     int force = 0;
-    unsigned int lba = DEF_LBA;
-    int num_per_thread = DEF_NUM_PER_THREAD;
+    int64_t ll;
+    unsigned int inq_ebusy_count = 0;
     int num_threads = DEF_NUM_THREADS;
-    int wait_ms = DEF_WAIT_MS;
-    int no_o_excl = 0;
-    char * dev_name = NULL;
     char b[64];
+    struct timespec start_tm, end_tm;
+    struct opts_t opts;
+    struct opts_t * op;
+
+    op = &opts;
+    op->dev_name = NULL;
+    op->direct = !! DEF_DIRECT;
+    op->lba = DEF_LBA;
+    op->lb_sz = DEF_LB_SZ;;
+    op->num_per_thread = DEF_NUM_PER_THREAD;
+    op->no_xfer = !! DEF_NO_XFER;
+    op->verbose = 0;
+    op->wait_ms = DEF_WAIT_MS;
+    op->c2e = SCSI_TUR;
+    op->bqd = BQ_DEFAULT;
+    op->block = !! DEF_BLOCKING;
+    page_size = sysconf(_SC_PAGESIZE);
 
     for (k = 1; k < argc; ++k) {
-        if (0 == memcmp("-b", argv[k], 2))
-            ++block;
+        if (0 == memcmp("-d", argv[k], 2))
+            op->direct = true;
         else if (0 == memcmp("-f", argv[k], 2))
             ++force;
         else if (0 == memcmp("-h", argv[k], 2)) {
@@ -687,100 +638,130 @@
             return 0;
         } else if (0 == memcmp("-l", argv[k], 2)) {
             ++k;
-            if ((k < argc) && isdigit(*argv[k]))
-                lba = (unsigned int)atoi(argv[k]);
-            else
+            if ((k < argc) && isdigit(*argv[k])) {
+                ll = sg_get_llnum(argv[k]);
+                if (-1 == ll) {
+                    fprintf(stderr, "could not decode lba\n");
+                    return 1;
+                } else
+                    op->lba = (uint64_t)ll;
+            } else
                 break;
         } else if (0 == memcmp("-n", argv[k], 2)) {
             ++k;
             if ((k < argc) && isdigit(*argv[k]))
-                num_per_thread = atoi(argv[k]);
+                op->num_per_thread = atoi(argv[k]);
             else
                 break;
+        } else if (0 == memcmp("-N", argv[k], 2))
+            op->no_xfer = true;
+        else if (0 == memcmp("-q", argv[k], 2)) {
+            ++k;
+            if ((k < argc) && isdigit(*argv[k])) {
+                n = atoi(argv[k]);
+                if (0 == n)
+                    op->bqd = BQ_AT_HEAD;
+                else if (1 == n)
+                    op->bqd = BQ_AT_TAIL;
+            }
+        } else if (0 == memcmp("-R", argv[k], 2))
+            op->c2e = SCSI_READ16;
+        else if (0 == memcmp("-s", argv[k], 2)) {
+            ++k;
+            if ((k < argc) && isdigit(*argv[k])) {
+                op->lb_sz = atoi(argv[k]);
+                if (op->lb_sz < 256) {
+                    cerr << "Strange lb_sz, using 256" << endl;
+                    op->lb_sz = 256;
+                }
+            } else
+                break;
         } else if (0 == memcmp("-t", argv[k], 2)) {
             ++k;
             if ((k < argc) && isdigit(*argv[k]))
                 num_threads = atoi(argv[k]);
             else
                 break;
-        } else if (0 == memcmp("-V", argv[k], 2)) {
+        } else if (0 == memcmp("-T", argv[k], 2))
+            op->c2e = SCSI_TUR;
+        else if (0 == memcmp("-vvvv", argv[k], 5))
+            op->verbose += 4;
+        else if (0 == memcmp("-vvv", argv[k], 4))
+            op->verbose += 3;
+        else if (0 == memcmp("-vv", argv[k], 3))
+            op->verbose += 2;
+        else if (0 == memcmp("-v", argv[k], 2))
+            ++op->verbose;
+        else if (0 == memcmp("-V", argv[k], 2)) {
             printf("%s version: %s\n", util_name, version_str);
             return 0;
         } else if (0 == memcmp("-w", argv[k], 2)) {
             ++k;
             if ((k < argc) && (isdigit(*argv[k]) || ('-' == *argv[k]))) {
                 if ('-' == *argv[k])
-                    wait_ms = - atoi(argv[k] + 1);
+                    op->wait_ms = - atoi(argv[k] + 1);
                 else
-                    wait_ms = atoi(argv[k]);
+                    op->wait_ms = atoi(argv[k]);
             } else
                 break;
-        } else if (0 == memcmp("-xxx", argv[k], 4))
-            no_o_excl += 3;
-        else if (0 == memcmp("-xx", argv[k], 3))
-            no_o_excl += 2;
-        else if (0 == memcmp("-x", argv[k], 2))
-            ++no_o_excl;
+        } else if (0 == memcmp("-W", argv[k], 2))
+            op->c2e = SCSI_WRITE16;
         else if (*argv[k] == '-') {
             printf("Unrecognized switch: %s\n", argv[k]);
-            dev_name = NULL;
+            op->dev_name = NULL;
             break;
         }
-        else if (! dev_name)
-            dev_name = argv[k];
+        else if (! op->dev_name)
+            op->dev_name = argv[k];
         else {
             printf("too many arguments\n");
-            dev_name = 0;
+            op->dev_name = NULL;
             break;
         }
     }
-    if (0 == dev_name) {
+    if (0 == op->dev_name) {
         usage();
         return 1;
     }
     try {
         struct stat a_stat;
 
-        if (stat(dev_name, &a_stat) < 0) {
+        if (stat(op->dev_name, &a_stat) < 0) {
             perror("stat() on dev_name failed");
             return 1;
         }
         if (! S_ISCHR(a_stat.st_mode)) {
             fprintf(stderr, "%s should be a sg device which is a char "
-                    "device. %s\n", dev_name, dev_name);
+                    "device. %s\n", op->dev_name, op->dev_name);
             fprintf(stderr, "is not a char device and damage could be done "
                     "if it is a BLOCK\ndevice, exiting ...\n");
             return 1;
         }
         if (! force) {
-            res = do_inquiry_prod_id(dev_name, block, wait_ms, ebusy_count,
-                                     b, sizeof(b));
+            res = do_inquiry_prod_id(op->dev_name, op->block, b, sizeof(b));
             if (res) {
-                fprintf(stderr, "INQUIRY failed on %s\n", dev_name);
+                fprintf(stderr, "INQUIRY failed on %s\n", op->dev_name);
                 return 1;
             }
             // For safety, since <lba> written to, only permit scsi_debug
             // devices. Bypass this with '-f' option.
             if (0 != memcmp("scsi_debug", b, 10)) {
-                fprintf(stderr, "Since this utility writes to LBA %d, only "
-                        "devices with scsi_debug\nproduct ID accepted.\n",
-                        lba);
+                fprintf(stderr, "Since this utility writes to LBA 0x%" PRIx64
+                        ", only devices with scsi_debug\n"
+                        "product ID accepted\n", op->lba);
                 return 2;
             }
+            ebusy_count += inq_ebusy_count;
         }
+        start_tm.tv_sec = 0;
+        start_tm.tv_nsec = 0;
+        if (clock_gettime(CLOCK_MONOTONIC, &start_tm) < 0)
+            perror("clock_gettime failed");
 
         vector<thread *> vt;
 
         for (k = 0; k < num_threads; ++k) {
-            int excl = 1;
-
-            if (no_o_excl > 1)
-                excl = 0;
-            else if ((0 == k) && (1 == no_o_excl))
-                excl = 0;
-
-            thread * tp = new thread {work_thread, dev_name, lba, k, block,
-                                      excl, num_per_thread, wait_ms};
+            thread * tp = new thread {work_thread, k, op};
             vt.push_back(tp);
         }
 
@@ -791,13 +772,35 @@
         for (k = 0; k < (int)vt.size(); ++k)
             delete vt[k];
 
-        if (no_o_excl)
-            cout << "Odd count: " << odd_count << endl;
-        else
-            cout << "Expecting odd count of 0, got " << odd_count << endl;
-        cout << "Number of EBUSYs: " << ebusy_count << endl;
-        cout << "Number of EAGAINs: " << eagain_count << endl;
+        n = uniq_pack_id.load() - 1;
+        if ((n > 0) && (0 == clock_gettime(CLOCK_MONOTONIC, &end_tm))) {
+            struct timespec res_tm;
+            double a, b;
 
+            res_tm.tv_sec = end_tm.tv_sec - start_tm.tv_sec;
+            res_tm.tv_nsec = end_tm.tv_nsec - start_tm.tv_nsec;
+            if (res_tm.tv_nsec < 0) {
+                --res_tm.tv_sec;
+                res_tm.tv_nsec += 1000000000;
+            }
+            a = res_tm.tv_sec;
+            a += (0.000001 * (res_tm.tv_nsec / 1000));
+            b = (double)n;
+            if (a > 0.000001) {
+                printf("Time to complete %d commands was %d.%06d seconds\n",
+                       n, (int)res_tm.tv_sec, (int)(res_tm.tv_nsec / 1000));
+                cout << "Implies " << (b / a) << " IOPS" << endl;
+            }
+        }
+
+        if (op->verbose) {
+            cout << "Number of async_starts: " << async_starts.load() << endl;
+            cout << "Number of async_finishes: " << async_finishes.load() <<
+                    endl;
+            cout << "Last pack_id: " << n << endl;
+            cout << "Number of EBUSYs: " << ebusy_count.load() << endl;
+            cout << "Number of EAGAINs: " << eagain_count.load() << endl;
+        }
     }
     catch(system_error& e)  {
         cerr << "got a system_error exception: " << e.what() << '\n';
diff --git a/examples/sg_tst_excl.cpp b/examples/sg_tst_excl.cpp
index b19d4a2..a4a8512 100644
--- a/examples/sg_tst_excl.cpp
+++ b/examples/sg_tst_excl.cpp
@@ -46,7 +46,7 @@
 #include "sg_lib.h"
 #include "sg_io_linux.h"
 
-static const char * version_str = "1.07 20131110";
+static const char * version_str = "1.08 20140710";
 static const char * util_name = "sg_tst_excl";
 
 /* This is a test program for checking O_EXCL on open() works. It uses
@@ -72,6 +72,8 @@
  *   g++ -Wall -std=c++11 -pthread -I ../include ../lib/sg_lib.o
  *     ../lib/sg_lib_data.o ../lib/sg_io_linux.o -o sg_tst_excl
  *     sg_tst_excl.cpp
+ * or use the C++ Makefile in that directory:
+ *   make -f Makefile.cplus sg_tst_excl
  *
  * Currently this utility is Linux only and assumes the SG_IO v3 interface
  * which is supported by sg and block devices (but not bsg devices which
diff --git a/examples/sgq_dd.c b/examples/sgq_dd.c
index 7ed18a1..55b03c3 100644
--- a/examples/sgq_dd.c
+++ b/examples/sgq_dd.c
@@ -9,11 +9,11 @@
 #include <errno.h>
 #include <limits.h>
 #include <signal.h>
+#include <poll.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
-#include <sys/poll.h>
 #include <linux/major.h>
 #include <sys/time.h>
 typedef unsigned char u_char;   /* horrible, for scsi.h */
diff --git a/lib/sg_lib.c b/lib/sg_lib.c
index a1020ec..0ab201e 100644
--- a/lib/sg_lib.c
+++ b/lib/sg_lib.c
@@ -1555,6 +1555,18 @@
     return errstr;
 }
 
+static void
+trimTrailingSpaces(char * b)
+{
+    int k;
+
+    for (k = ((int)strlen(b) - 1); k >= 0; --k) {
+        if (' ' != b[k])
+            break;
+    }
+    if ('\0' != b[k + 1])
+        b[k + 1] = '\0';
+}
 
 /* Note the ASCII-hex output goes to stdout. [Most other output from functions
  * in this file go to sg_warnings_strm (default stderr).]
@@ -1582,9 +1594,9 @@
     if (0 == no_ascii)  /* address at left and ASCII at right */
         formatstr = "%.76s\n";
     else if (no_ascii > 0)
-        formatstr = "%.58s\n";
+        formatstr = "%s\n";     /* was: "%.58s\n" */
     else /* negative: no address at left and no ASCII at right */
-        formatstr = "%.48s\n";
+        formatstr = "%s\n";     /* was: "%.48s\n"; */
     memset(buff, ' ', 80);
     buff[80] = '\0';
     if (no_ascii < 0) {
@@ -1598,6 +1610,7 @@
                         (int)(unsigned char)c);
             buff[bpos + 2] = ' ';
             if ((k > 0) && (0 == ((k + 1) % 16))) {
+                trimTrailingSpaces(buff);
                 fprintf(fp, formatstr, buff);
                 bpos = bpstart;
                 memset(buff, ' ', 80);
@@ -1606,6 +1619,7 @@
         }
         if (bpos > bpstart) {
             buff[bpos + 2] = '\0';
+            trimTrailingSpaces(buff);
             fprintf(fp, "%s\n", buff);
         }
         return;
@@ -1629,6 +1643,8 @@
             buff[cpos++] = c;
         }
         if (cpos > (cpstart + 15)) {
+            if (no_ascii)
+                trimTrailingSpaces(buff);
             fprintf(fp, formatstr, buff);
             bpos = bpstart;
             cpos = cpstart;
@@ -1640,6 +1656,8 @@
     }
     if (cpos > cpstart) {
         buff[cpos] = '\0';
+        if (no_ascii)
+            trimTrailingSpaces(buff);
         fprintf(fp, "%s\n", buff);
     }
 }
@@ -1671,8 +1689,11 @@
     char buff[122];
     int bpstart, bpos, k, n;
 
-    if (len <= 0)
+    if (len <= 0) {
+	if (b_len > 0)
+	    b[0] = '\0';
         return;
+    }
     if (0 != format) {
         ;       /* do nothing different for now */
     }
@@ -1697,7 +1718,8 @@
                     (int)(unsigned char)c);
         buff[bpos + 2] = ' ';
         if ((k > 0) && (0 == ((k + 1) % 16))) {
-            n += my_snprintf(b + n, b_len - n, "%.*s\n", bpstart + 48, buff);
+            trimTrailingSpaces(buff);
+            n += my_snprintf(b + n, b_len - n, "%s\n", buff);
             if (n >= (b_len - 1))
                 return;
             bpos = bpstart;
@@ -1707,8 +1729,10 @@
         } else
             bpos += 3;
     }
-    if (bpos > bpstart)
-        n += my_snprintf(b + n, b_len - n, "%.*s\n", bpstart + 48, buff);
+    if (bpos > bpstart) {
+        trimTrailingSpaces(buff);
+        n += my_snprintf(b + n, b_len - n, "%s\n", buff);
+    }
     return;
 }
 
diff --git a/lib/sg_lib_data.c b/lib/sg_lib_data.c
index d4002b0..199c480 100644
--- a/lib/sg_lib_data.c
+++ b/lib/sg_lib_data.c
@@ -17,7 +17,7 @@
 #endif
 
 
-const char * sg_lib_version_str = "2.01 20140521";  /* spc4r37, sbc4r02 */
+const char * sg_lib_version_str = "2.02 20140708";  /* spc4r37, sbc4r02 */
 
 #ifdef SG_SCSI_STRINGS
 struct sg_lib_value_name_t sg_lib_normal_opcodes[] = {
diff --git a/sg3_utils.spec b/sg3_utils.spec
index 7585246..5f5d1a8 100644
--- a/sg3_utils.spec
+++ b/sg3_utils.spec
@@ -79,7 +79,7 @@
 %{_libdir}/*.la
 
 %changelog
-* Fri Jun 13 2014 - dgilbert at interlog dot com
+* Thu Jul 10 2014 - dgilbert at interlog dot com
 - track t10 changes
   * sg3_utils-1.40
 
diff --git a/src/sg_format.c b/src/sg_format.c
index e84846e..2d4effa 100644
--- a/src/sg_format.c
+++ b/src/sg_format.c
@@ -13,23 +13,8 @@
  *   the Free Software Foundation; either version 2, or (at your option)
  *   any later version.
  *
- * http://www.t10.org/scsi-3.htm
- * http://www.tldp.org/HOWTO/SCSI-Generic-HOWTO
- *
- *
- *  List of some (older) disk manufacturers' block counts.
- *  These are not needed in newer disks which will automatically use
- *  the manufacturers' recommended block count if a count of -1 is given.
- *      Inquiry         Block Count (@512 byte blocks)
- *      ST150150N       8388315
- *      IBM_DCHS04F     8888543
- *      IBM_DGHS09Y     17916240
- *      ST336704FC      71132960
- *      ST318304FC      35145034  (Factory spec is 35885167 sectors)
- *      ST336605FC      ???
- *      ST336753FC      71132960  (Factory spec is 71687372 sectors)
- *  and a newer one:
- *      ST33000650SS    5860533168 (3 TB SAS disk)
+ * See http://www.t10.org for relevant standards and drafts. The most recent
+ * draft is SBC-4 revision 2.
  */
 
 #include <stdio.h>
@@ -47,13 +32,10 @@
 #include "sg_cmds_basic.h"
 #include "sg_cmds_extra.h"
 
-static const char * version_str = "1.26 20140516";
+static const char * version_str = "1.28 20140704";
 
-#define RW_ERROR_RECOVERY_PAGE 1  /* every disk should have one */
-#define FORMAT_DEV_PAGE 3         /* Format Device Mode Page [now obsolete] */
-#define CONTROL_MODE_PAGE 0xa     /* alternative page all devices have?? */
 
-#define THIS_MPAGE_EXISTS RW_ERROR_RECOVERY_PAGE
+#define RW_ERROR_RECOVERY_PAGE 1  /* can give alternate with --mode=MP */
 
 #define SHORT_TIMEOUT           20   /* 20 seconds unless --wait given */
 #define FORMAT_TIMEOUT          (20 * 3600)       /* 20 hours ! */
@@ -88,6 +70,7 @@
         {"help", no_argument, 0, 'h'},
         {"ip_def", no_argument, 0, 'I'},
         {"long", no_argument, 0, 'l'},
+        {"mode", required_argument, 0, 'M'},
         {"pinfo", no_argument, 0, 'p'},
         {"pfu", required_argument, 0, 'P'},
         {"pie", required_argument, 0, 'q'},
@@ -111,19 +94,19 @@
                "[--early]\n"
                "                 [--fmtpinfo=FPI] [--format] [--help] "
                "[--ip_def] [--long]\n"
-               "                 [--pfu=PFU] [--pie=PIE] [--pinfo] "
-               "[--poll=PT] [--resize]\n"
-               "                 [--rto_req] [--security] [--six] "
-               "[--size=SIZE] [--verbose]\n"
-               "                 [--version] [--wait] DEVICE\n"
+               "                 [--mode=MP] [--pfu=PFU] [--pie=PIE] "
+               "[--pinfo] [--poll=PT]\n"
+               "                 [--resize] [--rto_req] [--security] "
+               "[--six] [--size=SIZE]\n"
+               "                 [--verbose] [--version] [--wait] DEVICE\n"
                "  where:\n"
                "    --cmplst=0|1\n"
                "      -C 0|1        sets CMPLST bit in format cdb "
                "(default: 1)\n"
-               "    --count=COUNT|-c COUNT    number of blocks to "
-               "report after format or\n"
-               "                              resize. With format "
-               "defaults to same as current\n"
+               "    --count=COUNT|-c COUNT    number of blocks to report "
+               "after format or\n"
+               "                              resize. Format default is "
+               "same as current\n"
                "    --dcrt|-D       disable certification (doesn't "
                "verify media)\n"
                "    --early|-e      exit once format started (user can "
@@ -132,12 +115,14 @@
                "(default: 0)\n"
                "    --format|-F     format unit (default: report current "
                "count and size)\n"
+               "                    use thrice for FORMAT UNIT command "
+               "only\n"
                "    --help|-h       prints out this usage message\n"
                "    --ip_def|-I     initialization pattern: default\n"
                "    --long|-l       allow for 64 bit lbas (default: assume "
                "32 bit lbas)\n"
-               "    --pfu=PFU|-P PFU    Protection Field Usage value "
-               "(default: 0)\n"
+               "    --mode=MP|-M MP     mode page (def: 1 -> RW error "
+               "recovery mpage)\n"
                "    --pie=PIE|-q PIE    Protection Information Exponent "
                "(default: 0)\n"
                "    --pinfo|-p      set upper bit of FMTPINFO field\n"
@@ -397,7 +382,7 @@
 int
 main(int argc, char **argv)
 {
-        const int mode_page = THIS_MPAGE_EXISTS;        /* hopefully */
+        int mode_page = RW_ERROR_RECOVERY_PAGE;
         int fd, res, calc_len, bd_len, dev_specific_param;
         int offset, j, bd_blk_len, prob, len;
         uint64_t ull;
@@ -431,7 +416,7 @@
                 int option_index = 0;
                 int c;
 
-                c = getopt_long(argc, argv, "c:C:Def:FhIlpP:q:rRs:SvVwx:6",
+                c = getopt_long(argc, argv, "c:C:Def:FhIlM:pP:q:rRs:SvVwx:6",
                                 long_options, &option_index);
                 if (c == -1)
                         break;
@@ -473,7 +458,7 @@
                         }
                         break;
                 case 'F':
-                        format = 1;
+                        ++format;
                         break;
                 case 'h':
                         usage();
@@ -485,6 +470,14 @@
                         long_lba = 1;
                         do_rcap16 = 1;
                         break;
+                case 'M':
+                        mode_page = sg_get_num(optarg);
+                        if ((mode_page < 0) || ( mode_page > 62)) {
+                                fprintf(stderr, "bad argument to '--mode', "
+                                        "accepts 0 to 62 inclusive\n");
+                                return SG_LIB_SYNTAX_ERROR;
+                        }
+                        break;
                 case 'p':
                         pinfo = 1;
                         break;
@@ -603,6 +596,9 @@
                 return SG_LIB_FILE_ERROR;
         }
 
+        if (format > 2)
+                goto format_only;
+
         if (sg_simple_inquiry(fd, &inq_out, 1, verbose)) {
                 fprintf(stderr, "%s doesn't respond to a SCSI INQUIRY\n",
                         device_name);
@@ -835,7 +831,8 @@
                 goto out;
         }
 
-        if (format)
+        if (format) {
+format_only:
 #if 1
                 printf("\nA FORMAT will commence in 15 seconds\n");
                 printf("    ALL data on %s will be DESTROYED\n", device_name);
@@ -861,6 +858,7 @@
 #else
                 fprintf(stderr, "FORMAT ignored, testing\n");
 #endif
+        }
 
 out:
         res = sg_cmds_close_device(fd);
diff --git a/src/sg_inq.c b/src/sg_inq.c
index 3d83ba4..b9591a5 100644
--- a/src/sg_inq.c
+++ b/src/sg_inq.c
@@ -41,7 +41,7 @@
 #include "sg_cmds_basic.h"
 #include "sg_pt.h"
 
-static const char * version_str = "1.39 20140527";    /* SPC-4 rev 37 */
+static const char * version_str = "1.40 20140704";    /* SPC-4 rev 37 */
 
 /* INQUIRY notes:
  * It is recommended that the initial allocation length given to a
@@ -2274,11 +2274,15 @@
             (buff[16] << 8) + buff[17]);
 }
 
+/* VPD_BLOCK_LIMITS sbc */
+/* Sequential access device characteristics,  ssc+smc */
+/* OSD information, osd */
 static void
 decode_b0_vpd(unsigned char * buff, int len, int do_hex)
 {
-    int pdt;
+    int pdt, m;
     unsigned int u;
+    uint64_t mwsl;
 
     if (do_hex) {
         dStrHex((const char *)buff, len, (1 == do_hex) ? 0 : -1);
@@ -2325,6 +2329,27 @@
                     (buff[33] << 16) | (buff[34] << 8) | buff[35];
                 printf("  Unmap granularity alignment: %u\n", u);
             }
+            if (len > 43) {     /* added in sbc3r26 */
+                mwsl = 0;
+                for (m = 0; m < 8; ++m) {
+                    if (m > 0)
+                        mwsl <<= 8;
+                    mwsl |= buff[36 + m];
+                }
+                printf("  Maximum write same length: 0x%" PRIx64 " blocks\n",
+                       mwsl);
+            }
+            if (len > 44) {     /* added in sbc4r02 */
+                u = ((unsigned int)buff[44] << 24) | (buff[45] << 16) |
+                    (buff[46] << 8) | buff[47];
+                printf("  Maximum atomic transfer length: %u\n", u);
+                u = ((unsigned int)buff[48] << 24) | (buff[49] << 16) |
+                    (buff[50] << 8) | buff[51];
+                printf("  Atomic alignment: %u\n", u);
+                u = ((unsigned int)buff[52] << 24) | (buff[53] << 16) |
+                    (buff[54] << 8) | buff[55];
+                printf("  Atomic transfer length granularity: %u\n", u);
+            }
             break;
         case PDT_TAPE: case PDT_MCHANGER:
             printf("  WORM=%d\n", !!(buff[4] & 0x1));
diff --git a/src/sg_modes.c b/src/sg_modes.c
index 70335fb..289af99 100644
--- a/src/sg_modes.c
+++ b/src/sg_modes.c
@@ -15,6 +15,7 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <string.h>
 #include <ctype.h>
 #include <getopt.h>
@@ -25,7 +26,7 @@
 #include "sg_lib.h"
 #include "sg_cmds_basic.h"
 
-static const char * version_str = "1.43 20140514";
+static const char * version_str = "1.44 20140708";
 
 #define DEF_ALLOC_LEN (1024 * 4)
 #define DEF_6_ALLOC_LEN 252
@@ -84,6 +85,28 @@
     int opt_new;
 };
 
+
+#ifdef __GNUC__
+static int pr2serr(const char * fmt, ...)
+        __attribute__ ((format (printf, 1, 2)));
+#else
+static int pr2serr(const char * fmt, ...);
+#endif
+
+
+static int
+pr2serr(const char * fmt, ...)
+{
+    va_list args;
+    int n;
+
+    va_start(args, fmt);
+    n = vfprintf(stderr, fmt, args);
+    va_end(args);
+    return n;
+}
+
+
 static void
 usage()
 {
@@ -212,7 +235,7 @@
         case 'c':
             n = sg_get_num(optarg);
             if ((n < 0) || (n > 3)) {
-                fprintf(stderr, "bad argument to '--control='\n");
+                pr2serr("bad argument to '--control='\n");
                 usage();
                 return SG_LIB_SYNTAX_ERROR;
             }
@@ -246,7 +269,7 @@
         case 'm':
             n = sg_get_num(optarg);
             if ((n < 0) || (n > 65535)) {
-                fprintf(stderr, "bad argument to '--maxlen='\n");
+                pr2serr("bad argument to '--maxlen='\n");
                 usage();
                 return SG_LIB_SYNTAX_ERROR;
             }
@@ -261,15 +284,14 @@
             cp = strchr(optarg, ',');
             n = sg_get_num_nomult(optarg);
             if ((n < 0) || (n > 63)) {
-                fprintf(stderr, "Bad argument to '--page='\n");
+                pr2serr("Bad argument to '--page='\n");
                 usage();
                 return SG_LIB_SYNTAX_ERROR;
             }
             if (cp) {
                 nn = sg_get_num_nomult(cp + 1);
                 if ((nn < 0) || (nn > 255)) {
-                    fprintf(stderr, "Bad second value in argument to "
-                            "'--page='\n");
+                    pr2serr("Bad second value in argument to '--page='\n");
                     usage();
                     return SG_LIB_SYNTAX_ERROR;
                 }
@@ -295,7 +317,7 @@
             ++op->do_version;
             break;
         default:
-            fprintf(stderr, "unrecognised option code %c [0x%x]\n", c, c);
+            pr2serr("unrecognised option code %c [0x%x]\n", c, c);
             if (op->do_help)
                 break;
             usage();
@@ -309,7 +331,7 @@
         }
         if (optind < argc) {
             for (; optind < argc; ++optind)
-                fprintf(stderr, "Unexpected extra argument: %s\n",
+                pr2serr("Unexpected extra argument: %s\n",
                         argv[optind]);
             usage();
             return SG_LIB_SYNTAX_ERROR;
@@ -395,7 +417,7 @@
             if (0 == strncmp("c=", cp, 2)) {
                 num = sscanf(cp + 2, "%x", &u);
                 if ((1 != num) || (u > 3)) {
-                    fprintf(stderr, "Bad page control after 'c=' option\n");
+                    pr2serr("Bad page control after 'c=' option\n");
                     usage_old();
                     return SG_LIB_SYNTAX_ERROR;
                 }
@@ -403,7 +425,7 @@
             } else if (0 == strncmp("m=", cp, 2)) {
                 num = sscanf(cp + 2, "%d", &n);
                 if ((1 != num) || (n < 0) || (n > 65535)) {
-                    fprintf(stderr, "Bad argument after 'm=' option\n");
+                    pr2serr("Bad argument after 'm=' option\n");
                     usage_old();
                     return SG_LIB_SYNTAX_ERROR;
                 }
@@ -412,16 +434,14 @@
                 if (NULL == strchr(cp + 2, ',')) {
                     num = sscanf(cp + 2, "%x", &u);
                     if ((1 != num) || (u > 63)) {
-                        fprintf(stderr, "Bad page code value after 'p=' "
-                                "option\n");
+                        pr2serr("Bad page code value after 'p=' option\n");
                         usage_old();
                         return SG_LIB_SYNTAX_ERROR;
                     }
                     op->pg_code = u;
                 } else if (2 == sscanf(cp + 2, "%x,%x", &u, &uu)) {
                     if (uu > 255) {
-                        fprintf(stderr, "Bad sub page code value after 'p=' "
-                                "option\n");
+                        pr2serr("Bad subpage code value after 'p=' option\n");
                         usage_old();
                         return SG_LIB_SYNTAX_ERROR;
                     }
@@ -429,16 +449,15 @@
                     op->subpg_code = uu;
                     op->subpg_code_set = 1;
                 } else {
-                    fprintf(stderr, "Bad page code, subpage code sequence "
-                            "after 'p=' option\n");
+                    pr2serr("Bad page code, subpage code sequence after 'p=' "
+                            "option\n");
                     usage_old();
                     return SG_LIB_SYNTAX_ERROR;
                 }
             } else if (0 == strncmp("subp=", cp, 5)) {
                 num = sscanf(cp + 5, "%x", &u);
                 if ((1 != num) || (u > 255)) {
-                    fprintf(stderr, "Bad sub page code after 'subp=' "
-                            "option\n");
+                    pr2serr("Bad sub page code after 'subp=' option\n");
                     usage_old();
                     return SG_LIB_SYNTAX_ERROR;
                 }
@@ -449,15 +468,15 @@
             } else if (0 == strncmp("-old", cp, 4))
                 ;
             else if (jmp_out) {
-                fprintf(stderr, "Unrecognized option: %s\n", cp);
+                pr2serr("Unrecognized option: %s\n", cp);
                 usage_old();
                 return SG_LIB_SYNTAX_ERROR;
             }
         } else if (0 == op->device_name)
             op->device_name = cp;
         else {
-            fprintf(stderr, "too many arguments, got: %s, not expecting: "
-                    "%s\n", op->device_name, cp);
+            pr2serr("too many arguments, got: %s, not expecting: %s\n",
+                    op->device_name, cp);
             usage_old();
             return SG_LIB_SYNTAX_ERROR;
         }
@@ -868,22 +887,22 @@
             res = sg_ll_mode_sense6(sg_fd, 0, 0, k, 0, rbuf, mresp_len,
                                     1, op->do_verbose);
             if (SG_LIB_CAT_INVALID_OP == res) {
-                fprintf(stderr, ">>>>>> try again without the '-6' "
-                        "switch for a 10 byte MODE SENSE command\n");
+                pr2serr(">>>>>> try again without the '-6' switch for a 10 "
+                        "byte MODE SENSE command\n");
                 return res;
             } else if (SG_LIB_CAT_NOT_READY == res) {
-                fprintf(stderr, "MODE SENSE (6) failed, device not ready\n");
+                pr2serr("MODE SENSE (6) failed, device not ready\n");
                 return res;
             }
         } else {
             res = sg_ll_mode_sense10(sg_fd, 0, 0, 0, k, 0, rbuf, mresp_len,
                                      1, op->do_verbose);
             if (SG_LIB_CAT_INVALID_OP == res) {
-                fprintf(stderr, ">>>>>> try again with a '-6' "
-                        "switch for a 6 byte MODE SENSE command\n");
+                pr2serr(">>>>>> try again with a '-6' switch for a 6 byte "
+                        "MODE SENSE command\n");
                 return res;
             } else if (SG_LIB_CAT_NOT_READY == res) {
-                fprintf(stderr, "MODE SENSE (10) failed, device not ready\n");
+                pr2serr("MODE SENSE (10) failed, device not ready\n");
                 return res;
             }
         }
@@ -911,8 +930,8 @@
             char b[80];
 
             sg_get_category_sense_str(res, sizeof(b), b, op->do_verbose - 1);
-            fprintf(stderr, "MODE SENSE (%s) failed: %s\n",
-                    (op->do_six ? "6" : "10"), b);
+            pr2serr("MODE SENSE (%s) failed: %s\n", (op->do_six ? "6" : "10"),
+                    b);
         }
     }
     return res;
@@ -958,7 +977,7 @@
         return 0;
     }
     if (op->do_version) {
-        fprintf(stderr, "Version string: %s\n", version_str);
+        pr2serr("Version string: %s\n", version_str);
         return 0;
     }
 
@@ -978,34 +997,31 @@
             }
             return 0;
         }
-        fprintf(stderr, "No DEVICE argument given\n");
+        pr2serr("No DEVICE argument given\n");
         usage_for(op);
         return SG_LIB_SYNTAX_ERROR;
     }
 
     if (op->do_examine && (op->pg_code >= 0)) {
-        fprintf(stderr, "can't give '-e' and a page number\n");
+        pr2serr("can't give '-e' and a page number\n");
         return SG_LIB_SYNTAX_ERROR;
     }
 
     if ((op->do_six) && (op->do_llbaa)) {
-        fprintf(stderr, "LLBAA not defined for MODE SENSE 6, try "
-                "without '-L'\n");
+        pr2serr("LLBAA not defined for MODE SENSE 6, try without '-L'\n");
         return SG_LIB_SYNTAX_ERROR;
     }
     if (op->maxlen > 0) {
         if (op->do_six && (op->maxlen > 255)) {
-            fprintf(stderr, "For Mode Sense (6) maxlen cannot exceed "
-                    "255\n");
+            pr2serr("For Mode Sense (6) maxlen cannot exceed 255\n");
             return SG_LIB_SYNTAX_ERROR;
         }
         if (op->maxlen > DEF_ALLOC_LEN) {
             malloc_rsp_buff = (unsigned char *)malloc(op->maxlen);
             if (NULL == malloc_rsp_buff) {
-                fprintf(stderr, "Unable to malloc maxlen=%d bytes\n",
-                        op->maxlen);
+                pr2serr("Unable to malloc maxlen=%d bytes\n", op->maxlen);
                 return SG_LIB_SYNTAX_ERROR;
-            }
+        }
             rsp_buff = malloc_rsp_buff;
         } else
             rsp_buff = def_rsp_buff;
@@ -1027,16 +1043,15 @@
 
     if ((sg_fd = sg_cmds_open_device(op->device_name, 1 /* ro */,
                                      op->do_verbose)) < 0) {
-        fprintf(stderr, "error opening file: %s: %s\n",
-                op->device_name, safe_strerror(-sg_fd));
+        pr2serr("error opening file: %s: %s\n", op->device_name,
+                safe_strerror(-sg_fd));
         if (malloc_rsp_buff)
             free(malloc_rsp_buff);
         return SG_LIB_FILE_ERROR;
     }
 
     if (sg_simple_inquiry(sg_fd, &inq_out, 1, op->do_verbose)) {
-        fprintf(stderr, "%s doesn't respond to a SCSI INQUIRY\n",
-                op->device_name);
+        pr2serr("%s doesn't respond to a SCSI INQUIRY\n", op->device_name);
         ret = SG_LIB_CAT_OTHER;
         goto finish;
     }
@@ -1068,11 +1083,9 @@
     if (op->do_raw > 1) {
         if (op->do_all) {
             if (op->opt_new)
-                fprintf(stderr, "'-R' requires a specific (sub)page, not "
-                        "all\n");
+                pr2serr("'-R' requires a specific (sub)page, not all\n");
             else
-                fprintf(stderr, "'-r' requires a specific (sub)page, not "
-                        "all\n");
+                pr2serr("'-r' requires a specific (sub)page, not all\n");
             usage_for(op);
             ret = SG_LIB_SYNTAX_ERROR;
             goto finish;
@@ -1085,30 +1098,30 @@
                                 op->pg_code, op->subpg_code, rsp_buff,
                                 rsp_buff_size, 1, op->do_verbose);
         if (SG_LIB_CAT_INVALID_OP == res)
-            fprintf(stderr, ">>>>>> try again without the '-6' "
-                    "switch for a 10 byte MODE SENSE command\n");
+            pr2serr(">>>>>> try again without the '-6' switch for a 10 byte "
+                    "MODE SENSE command\n");
     } else {
         res = sg_ll_mode_sense10(sg_fd, op->do_llbaa, op->do_dbd,
                                  op->page_control, op->pg_code,
                                  op->subpg_code, rsp_buff, rsp_buff_size,
                                  1, op->do_verbose);
         if (SG_LIB_CAT_INVALID_OP == res)
-            fprintf(stderr, ">>>>>> try again with a '-6' "
-                    "switch for a 6 byte MODE SENSE command\n");
+            pr2serr(">>>>>> try again with a '-6' switch for a 6 byte MODE "
+                    "SENSE command\n");
     }
     if (SG_LIB_CAT_ILLEGAL_REQ == res) {
         if (op->subpg_code > 0)
-            fprintf(stderr, "invalid field in cdb (perhaps subpages "
-                    "not supported)\n");
+            pr2serr("invalid field in cdb (perhaps subpages not "
+                    "supported)\n");
         else if (op->page_control > 0)
-            fprintf(stderr, "invalid field in cdb (perhaps "
-                    "page control (PC) not supported)\n");
+            pr2serr("invalid field in cdb (perhaps page control (PC) not "
+                    "supported)\n");
         else
-            fprintf(stderr, "invalid field in cdb (perhaps "
-                "page 0x%x not supported)\n", op->pg_code);
+            pr2serr("invalid field in cdb (perhaps page 0x%x not "
+                    "supported)\n", op->pg_code);
     } else if (res) {
         sg_get_category_sense_str(res, sizeof(b), b, op->do_verbose);
-        fprintf(stderr, "%s\n", b);
+        pr2serr("%s\n", b);
     }
     ret = res;
     if (0 == res) {
@@ -1125,8 +1138,8 @@
                     (0 == rsp_buff[5]) && (0 == rsp_buff[6])) {
                     rsp_buff[1] = num;
                     rsp_buff[0] = 0;
-                    fprintf(stderr, ">>> msense(10) but resp[0]=%d and "
-                            "not msense(6) response so fix length\n", num);
+                    pr2serr(">>> msense(10) but resp[0]=%d and not msense(6) "
+                            "response so fix length\n", num);
                 } else
                     resp_mode6 = 1;
             }
@@ -1156,8 +1169,7 @@
             longlba = rsp_buff[4] & 1;
         }
         if ((bd_len + headerlen) > md_len) {
-            fprintf(stderr, "Invalid block descriptor length=%d, ignore\n",
-                    bd_len);
+            pr2serr("Invalid block descriptor length=%d, ignore\n", bd_len);
             bd_len = 0;
         }
         if (op->do_raw) {
@@ -1230,8 +1242,8 @@
         for (k = 0; md_len > 0; ++k) { /* got mode page(s) */
             if ((k > 0) && (! op->do_all) &&
                 (SPG_CODE_ALL != op->subpg_code)) {
-                fprintf(stderr, "Unexpectedly received extra mode page "
-                                "responses, ignore\n");
+                pr2serr("Unexpectedly received extra mode page responses, "
+                        "ignore\n");
                 break;
             }
             uc = *ucp;
@@ -1241,10 +1253,9 @@
             if (0x0 == page_num) {
                 ++num_ua_pages;
                 if((num_ua_pages > 3) && (md_len > 0xa00)) {
-                    fprintf(stderr, ">>> Seen 3 unit attention pages "
-                            "(only one should be at end)\n     and mpage "
-                            "length=%d, looks malformed, try '-f' option\n",
-                            md_len);
+                    pr2serr(">>> Seen 3 unit attention pages (only one "
+                            "should be at end)\n     and mpage length=%d, "
+                            "looks malformed, try '-f' option\n", md_len);
                     break;
                 }
             }
@@ -1281,8 +1292,8 @@
             num = (len > md_len) ? md_len : len;
             if ((k > 0) && (num > 256)) {
                 num = 256;
-                fprintf(stderr, ">>> page length (%d) > 256 bytes, unlikely "
-                                "trim\n    Try '-f' option\n", len);
+                pr2serr(">>> page length (%d) > 256 bytes, unlikely trim\n"
+                        "    Try '-f' option\n", len);
             }
             dStrHex((const char *)ucp, num , 1);
             ucp += len;
diff --git a/src/sg_vpd.c b/src/sg_vpd.c
index c363dc5..508d6bd 100644
--- a/src/sg_vpd.c
+++ b/src/sg_vpd.c
@@ -33,8 +33,7 @@
 
 */
 
-static const char * version_str = "0.87 20140529";  /* spc4r37 + sbc4r01 */
-        /* And with sbc3r35, vale Mark Evans */
+static const char * version_str = "0.88 20140704";  /* spc4r37 + sbc4r02 */
 
 void svpd_enumerate_vendor(int vp_num);
 int svpd_count_vendor_vpds(int num_vpd, int vp_num);
diff --git a/utils/tst_sg_lib.c b/utils/tst_sg_lib.c
index 1c4fa7b..188cf03 100644
--- a/utils/tst_sg_lib.c
+++ b/utils/tst_sg_lib.c
@@ -119,6 +119,7 @@
     int verbose = 0;
     int ret = 0;
     char b[2048];
+    char bb[128];
 
     while (1) {
         int option_index = 0;
@@ -275,6 +276,8 @@
             dStrHex(b, k, 0);
             dStrHex(b, k, 1);
             dStrHex(b, k, -1);
+            dStrHexStr(b, k, "dStrHexStr:^", 0, sizeof(bb), bb);
+            printf("%s", bb);
             printf("\n");
         }
     }