Add test cases for 16 bit PCMPxSTRx variants. See #293754.
(Eliot Moss, moss@cs.umass.edu)
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12389 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/none/tests/amd64/Makefile.am b/none/tests/amd64/Makefile.am
index a9f6e54..ac69bff 100644
--- a/none/tests/amd64/Makefile.am
+++ b/none/tests/amd64/Makefile.am
@@ -55,8 +55,12 @@
nibz_bennee_mmap.vgtest \
pcmpstr64.stderr.exp pcmpstr64.stdout.exp \
pcmpstr64.vgtest \
+ pcmpstr64w.stderr.exp pcmpstr64w.stdout.exp \
+ pcmpstr64w.vgtest \
pcmpxstrx64.stderr.exp pcmpxstrx64.stdout.exp \
pcmpxstrx64.vgtest \
+ pcmpxstrx64w.stderr.exp pcmpxstrx64w.stdout.exp \
+ pcmpxstrx64w.vgtest \
rcl-amd64.vgtest rcl-amd64.stdout.exp rcl-amd64.stderr.exp \
redundantRexW.vgtest redundantRexW.stdout.exp \
redundantRexW.stderr.exp \
@@ -92,7 +96,9 @@
check_PROGRAMS += lzcnt64
endif
if BUILD_SSE42_TESTS
- check_PROGRAMS += pcmpstr64 pcmpxstrx64 sse4-64 crc32 aes
+ check_PROGRAMS += \
+ pcmpstr64 pcmpxstrx64 sse4-64 crc32 aes \
+ pcmpstr64w pcmpxstrx64w
endif
# DDD: these need to be made to work on Darwin like the x86/ ones were.
diff --git a/none/tests/amd64/pcmpstr64w.c b/none/tests/amd64/pcmpstr64w.c
new file mode 100644
index 0000000..7f408fc
--- /dev/null
+++ b/none/tests/amd64/pcmpstr64w.c
@@ -0,0 +1,1269 @@
+
+/* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
+ pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
+ aspect. */
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned short UShort;
+typedef unsigned long long int ULong;
+typedef UChar Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+//typedef unsigned char V128[16];
+typedef
+ union {
+ UChar uChar[16];
+ UShort uShort[8];
+ UInt uInt[4];
+ UInt w32[4];
+ }
+ V128;
+
+#define SHIFT_O 11
+#define SHIFT_S 7
+#define SHIFT_Z 6
+#define SHIFT_A 4
+#define SHIFT_C 0
+#define SHIFT_P 2
+
+#define MASK_O (1ULL << SHIFT_O)
+#define MASK_S (1ULL << SHIFT_S)
+#define MASK_Z (1ULL << SHIFT_Z)
+#define MASK_A (1ULL << SHIFT_A)
+#define MASK_C (1ULL << SHIFT_C)
+#define MASK_P (1ULL << SHIFT_P)
+
+
+UInt clz32 ( UInt x )
+{
+ Int y, m, n;
+ y = -(x >> 16);
+ m = (y >> 16) & 16;
+ n = 16 - m;
+ x = x >> m;
+ y = x - 0x100;
+ m = (y >> 16) & 8;
+ n = n + m;
+ x = x << m;
+ y = x - 0x1000;
+ m = (y >> 16) & 4;
+ n = n + m;
+ x = x << m;
+ y = x - 0x4000;
+ m = (y >> 16) & 2;
+ n = n + m;
+ x = x << m;
+ y = x >> 14;
+ m = y & ~(y >> 1);
+ return n + 2 - m;
+}
+
+UInt ctz32 ( UInt x )
+{
+ return 32 - clz32((~x) & (x-1));
+}
+
+void expand ( V128* dst, char* summary )
+{
+ Int i;
+ assert( strlen(summary) == 16 );
+ for (i = 0; i < 16; i++) {
+ UChar xx = 0;
+ UChar x = summary[15-i];
+ if (x >= '0' && x <= '9') { xx = x - '0'; }
+ else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
+ else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
+ else assert(0);
+
+ assert(xx < 16);
+ xx = (xx << 4) | xx;
+ assert(xx < 256);
+ dst->uChar[i] = xx;
+ }
+}
+
+void try_istri ( char* which,
+ UInt(*h_fn)(V128*,V128*),
+ UInt(*s_fn)(V128*,V128*),
+ char* summL, char* summR )
+{
+ assert(strlen(which) == 2);
+ V128 argL, argR;
+ expand(&argL, summL);
+ expand(&argR, summR);
+ UInt h_res = h_fn(&argL, &argR);
+ UInt s_res = s_fn(&argL, &argR);
+ printf("istri %s %s %s -> %08x %08x %s\n",
+ which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
+}
+
+UInt zmask_from_V128 ( V128* arg )
+{
+ UInt i, res = 0;
+ for (i = 0; i < 8; i++) {
+ res |= ((arg->uShort[i] == 0) ? 1 : 0) << i;
+ }
+ return res;
+}
+
+//////////////////////////////////////////////////////////
+// //
+// GENERAL //
+// //
+//////////////////////////////////////////////////////////
+
+
+/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
+ basically), generate an I- or M-format output value, also the new
+ OSZACP flags. */
+static
+void PCMPxSTRx_WRK_gen_output_fmt_I_wide ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ UInt intRes1,
+ UInt zmaskL, UInt zmaskR,
+ UInt validL,
+ UInt pol, UInt idx )
+{
+ assert((pol >> 2) == 0);
+ assert((idx >> 1) == 0);
+
+ UInt intRes2 = 0;
+ switch (pol) {
+ case 0: intRes2 = intRes1; break; // pol +
+ case 1: intRes2 = ~intRes1; break; // pol -
+ case 2: intRes2 = intRes1; break; // pol m+
+ case 3: intRes2 = intRes1 ^ validL; break; // pol m-
+ }
+ intRes2 &= 0xFF;
+
+ // generate I-format output (an index in ECX)
+ // generate ecx value
+ UInt newECX = 0;
+ if (idx) {
+ // index of ms-1-bit
+ newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
+ } else {
+ // index of ls-1-bit
+ newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
+ }
+
+ resV->w32[0] = newECX;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+
+ // generate new flags, common to all ISTRI and ISTRM cases
+ *resOSZACP // A, P are zero
+ = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
+ | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
+ | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
+ | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+}
+
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants on 16-bit characters.
+
+ For xSTRI variants, the new ECX value is placed in the 32 bits
+ pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
+ variants, the result is a 128 bit value and is placed at *resV in
+ the obvious way.
+
+ For all variants, the new OSZACP value is placed at *resOSZACP.
+
+ argLV and argRV are the vector args. The caller must prepare a
+ 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
+ must be 1 for each zero byte of of the respective arg. For ESTRx
+ variants this is derived from the explicit length indication, and
+ must be 0 in all places except at the bit index corresponding to
+ the valid length (0 .. 8). If the valid length is 8 then the
+ mask must be all zeroes. In all cases, bits 31:8 must be zero.
+
+ imm8 is the original immediate from the instruction. isSTRM
+ indicates whether this is a xSTRM or xSTRI variant, which controls
+ how much of *res is written.
+
+ If the given imm8 case can be handled, the return value is True.
+ If not, False is returned, and neither *res not *resOSZACP are
+ altered.
+*/
+
+Bool pcmpXstrX_WRK_wide ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM )
+{
+ assert(imm8 < 0x80);
+ assert((zmaskL >> 8) == 0);
+ assert((zmaskR >> 8) == 0);
+
+ /* Explicitly reject any imm8 values that haven't been validated,
+ even if they would probably work. Life is too short to have
+ unvalidated cases in the code base. */
+ switch (imm8) {
+ case 0x01:
+ case 0x03: case 0x09: case 0x0B: case 0x0D: case 0x13:
+ case 0x1B: case 0x39: case 0x3B: case 0x45: case 0x4B:
+ break;
+ default:
+ return False;
+ }
+
+ UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
+ UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
+ UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
+ UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
+
+ /*----------------------------------------*/
+ /*-- strcmp on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 2/*equal each, aka strcmp*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+ Int i;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolResII = 0;
+ for (i = 7; i >= 0; i--) {
+ UShort cL = argL[i];
+ UShort cR = argR[i];
+ boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ }
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ // do invalidation, common to all equal-each cases
+ UInt intRes1
+ = (boolResII & validL & validR) // if both valid, use cmpres
+ | (~ (validL | validR)); // if both invalid, force 1
+ // else force 0
+ intRes1 &= 0xFF;
+
+ // generate I-format output
+ PCMPxSTRx_WRK_gen_output_fmt_I_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- set membership on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 0/*equal any, aka find chars in a set*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+ /* argL: the string, argR: charset */
+ UInt si, ci;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ for (si = 0; si < 8; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string.
+ break;
+ UInt m = 0;
+ for (ci = 0; ci < 8; ci++) {
+ if ((validR & (1 << ci)) == 0) break;
+ if (argR[ci] == argL[si]) { m = 1; break; }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ PCMPxSTRx_WRK_gen_output_fmt_I_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- substring search on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 3/*equal ordered, aka substring search*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+
+ /* argL: haystack, argR: needle */
+ UInt ni, hi;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (hi = 0; hi < 8; hi++) {
+ if ((validL & (1 << hi)) == 0)
+ // run off the end of the haystack
+ break;
+ UInt m = 1;
+ for (ni = 0; ni < 8; ni++) {
+ if ((validR & (1 << ni)) == 0) break;
+ UInt i = ni + hi;
+ if (i >= 8) break;
+ if (argL[i] != argR[ni]) { m = 0; break; }
+ }
+ boolRes |= (m << hi);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ PCMPxSTRx_WRK_gen_output_fmt_I_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- ranges, unsigned wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 1/*ranges*/
+ && fmt == 1/*uw*/) {
+
+ /* argL: string, argR: range-pairs */
+ UInt ri, si;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (si = 0; si < 8; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string
+ break;
+ UInt m = 0;
+ for (ri = 0; ri < 8; ri += 2) {
+ if ((validR & (3 << ri)) != (3 << ri)) break;
+ if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
+ m = 1; break;
+ }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ PCMPxSTRx_WRK_gen_output_fmt_I_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx
+ );
+
+ return True;
+ }
+
+ return False;
+}
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_4B //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_4B ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x4B, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_4B ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x4B, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_4B ( void )
+{
+ char* wot = "4B";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_4B;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_4B;
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
+}
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_3B //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_3B ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x3B, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_3B ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x3B, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_3B ( void )
+{
+ char* wot = "3B";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_3B;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_3B;
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
+}
+
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_0D //
+// //
+//////////////////////////////////////////////////////////
+
+__attribute__((noinline))
+UInt h_pcmpistri_0D ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res = 0, flags = 0;
+ __asm__ __volatile__(
+ "movdqa 0(%2), %%xmm2" "\n\t"
+ "movdqa 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x0D, %%xmm2, %%xmm11" "\n\t"
+ //"pcmpistrm $0x0D, %%xmm2, %%xmm11" "\n\t"
+ //"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_0D ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x0D, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_0D ( void )
+{
+ char* wot = "0D";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_0D;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_0D;
+
+ try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
+
+ try_istri(wot,h,s, "11111111abcdef11", "00abcdef00abcdef");
+
+ try_istri(wot,h,s, "11111111abcdef11", "0000000000abcdef");
+ try_istri(wot,h,s, "1111111111abcdef", "0000000000abcdef");
+ try_istri(wot,h,s, "111111111111abcd", "0000000000abcdef");
+
+ try_istri(wot,h,s, "1111abcd11abcd11", "000000000000abcd");
+
+ try_istri(wot,h,s, "11abcd1111abcd11", "000000000000abcd");
+ try_istri(wot,h,s, "abcd111111abcd11", "000000000000abcd");
+ try_istri(wot,h,s, "cd11111111abcd11", "000000000000abcd");
+
+ try_istri(wot,h,s, "01abcd11abcd1111", "000000000000abcd");
+ try_istri(wot,h,s, "00abcd11abcd1111", "000000000000abcd");
+ try_istri(wot,h,s, "0000cd11abcd1111", "000000000000abcd");
+
+ try_istri(wot,h,s, "00abcd1100abcd11", "000000000000abcd");
+ try_istri(wot,h,s, "00abcd110000cd11", "000000000000abcd");
+
+ try_istri(wot,h,s, "1111111111111234", "0000000000000000");
+ try_istri(wot,h,s, "1111111111111234", "0000000000000011");
+ try_istri(wot,h,s, "1111111111111234", "0000000000001111");
+
+ try_istri(wot,h,s, "1111111111111234", "1111111111111234");
+ try_istri(wot,h,s, "0a11111111111111", "000000000000000a");
+ try_istri(wot,h,s, "0b11111111111111", "000000000000000a");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_09 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_09 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x09, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_09 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x09, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_09 ( void )
+{
+ char* wot = "09";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_09;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_09;
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
+}
+
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_1B //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_1B ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x1B, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_1B ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x1B, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_1B ( void )
+{
+ char* wot = "1B";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_1B;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_1B;
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
+}
+
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_03 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_03 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x03, %%xmm2, %%xmm11" "\n\t"
+//"pcmpistrm $0x03, %%xmm2, %%xmm11" "\n\t"
+//"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_03 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x03, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_03 ( void )
+{
+ char* wot = "03";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_03;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_03;
+
+ try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
+ try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
+ try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
+
+ try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_13 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_13 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x13, %%xmm2, %%xmm11" "\n\t"
+//"pcmpistrm $0x13, %%xmm2, %%xmm11" "\n\t"
+//"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_13 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x13, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_13 ( void )
+{
+ char* wot = "13";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_13;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_13;
+
+ try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
+ try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
+ try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
+
+ try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_45 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_45 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x45, %%xmm2, %%xmm11" "\n\t"
+//"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
+//"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_45 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x45, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_45 ( void )
+{
+ char* wot = "45";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_45;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_45;
+
+ try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000bbcc");
+ try_istri(wot,h,s, "aaaabbbbccccdddd", "000000000000ccbb");
+ try_istri(wot,h,s, "baaabbbbccccdddd", "000000000000ccbb");
+ try_istri(wot,h,s, "baaabbbbccccdddc", "000000000000ccbb");
+
+ try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
+ try_istri(wot,h,s, "bbbbbbbb00bbbbbb", "000000000000ccbb");
+ try_istri(wot,h,s, "bbbbbbbbbbbb00bb", "000000000000ccbb");
+ try_istri(wot,h,s, "bbbbbbbbbbbbbb00", "000000000000ccbb");
+ try_istri(wot,h,s, "0000000000000000", "000000000000ccbb");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000ccbb");
+ try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000bb");
+ try_istri(wot,h,s, "bb44bb44bb44bb44", "000000006622ccbb");
+
+ try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000022ccbb");
+ try_istri(wot,h,s, "bb44bb44bb44bb44", "000000000000ccbb");
+ try_istri(wot,h,s, "bb44bb44bb44bb44", "00000000000000bb");
+
+ try_istri(wot,h,s, "0011223344556677", "0000997755442211");
+ try_istri(wot,h,s, "1122334455667711", "0000997755442211");
+
+ try_istri(wot,h,s, "0011223344556677", "0000aa8866553322");
+ try_istri(wot,h,s, "1122334455667711", "0000aa8866553322");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_01 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_01 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x01, %%xmm2, %%xmm11" "\n\t"
+//"pcmpistrm $0x01, %%xmm2, %%xmm11" "\n\t"
+//"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_01 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x01, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_01 ( void )
+{
+ char* wot = "01";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_01;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_01;
+
+ try_istri(wot,h,s, "aacdacbdaacdaacd", "00000000000000aa");
+ try_istri(wot,h,s, "aabbaabbaabbaabb", "00000000000000bb");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "000000000000aabb");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "00bbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaa00ccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabb00dd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbcc00", "00000000aabbccdd");
+
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aa00ccdd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabb00dd");
+ try_istri(wot,h,s, "aabbccddaabbccdd", "00000000aabbcc00");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000aabbaabbaabb", "000000000000bbbb");
+ try_istri(wot,h,s, "0000ccddaabbccdd", "00000000bbaabbaa");
+
+ try_istri(wot,h,s, "0000ccddaabbccdd", "000000bbaabbaa00");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_39 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_39 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x39, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_39 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK_wide( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x39, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_39 ( void )
+{
+ char* wot = "39";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_39;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_39;
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
+ try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa00aa");
+ try_istri(wot,h,s, "aaaaaaaa00aaaaaa", "aaaaaaaaaaaa00aa");
+
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaa00aa", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "8000000000000000", "aaaaaaaa00aaaaaa");
+ try_istri(wot,h,s, "0000000000000001", "aaaaaaaa00aaaaaa");
+
+ try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
+}
+
+
+
+//////////////////////////////////////////////////////////
+// //
+// main //
+// //
+//////////////////////////////////////////////////////////
+
+int main ( void )
+{
+ istri_4B();
+ istri_3B();
+ istri_09();
+ istri_1B();
+ istri_03();
+ istri_0D();
+ istri_13();
+ istri_45();
+ istri_01();
+ istri_39();
+ return 0;
+}
diff --git a/none/tests/amd64/pcmpstr64w.stderr.exp b/none/tests/amd64/pcmpstr64w.stderr.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/none/tests/amd64/pcmpstr64w.stderr.exp
diff --git a/none/tests/amd64/pcmpstr64w.stdout.exp b/none/tests/amd64/pcmpstr64w.stdout.exp
new file mode 100644
index 0000000..358b82e
--- /dev/null
+++ b/none/tests/amd64/pcmpstr64w.stdout.exp
@@ -0,0 +1,256 @@
+istri 4B 0000000000000000 0000000000000000 -> 08c10007 08c10007
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 00010007 00010007
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010006 08010006
+istri 4B b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010006 08010006
+istri 4B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010005 08010005
+istri 4B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010005 08010005
+istri 4B b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 08010005 08010005
+istri 4B b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 08010005 08010005
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010007 08010007
+istri 4B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 08810000 08810000
+istri 4B aaaaaaaaaaaa00aa aaaaaaaaaaaa00aa -> 08c10007 08c10007
+istri 4B aaaaaaaa00aaaaaa aaaaaaaaaaaaaaaa -> 08410002 08410002
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 08810000 08810000
+istri 4B aaaaaaaa00aaaaaa aaaaaaaaaaaa00aa -> 08c10007 08c10007
+istri 4B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 4B aaaaaaaaaaaaaaaa aaaaaaaa00aaaaaa -> 08810002 08810002
+istri 4B aaaaaaaaaaaa00aa aaaaaaaa00aaaaaa -> 08c10007 08c10007
+istri 4B 0000000000000000 aaaaaaaa00aaaaaa -> 00c10007 00c10007
+istri 4B 8000000000000000 aaaaaaaa00aaaaaa -> 00c10007 00c10007
+istri 4B 0000000000000001 aaaaaaaa00aaaaaa -> 00c10007 00c10007
+istri 4B 0000000000000000 aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 4B aaaaaaaaaaaaaaaa 0000000000000000 -> 00800008 00800008
+istri 3B 0000000000000000 0000000000000000 -> 08c10000 08c10000
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 3B aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010005 00010005
+istri 3B aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 00010003 00010003
+istri 3B aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 00010001 00010001
+istri 3B aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 00010005 00010005
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 00010003 00010003
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 08010000 08010000
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 3B baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 3B b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 3B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 3B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 3B b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 00010001 00010001
+istri 3B b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 00010001 00010001
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 3B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 3B aaaaaaaaaaaa00aa aaaaaaaaaaaa00aa -> 00c10001 00c10001
+istri 3B aaaaaaaa00aaaaaa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 3B aaaaaaaa00aaaaaa aaaaaaaaaaaa00aa -> 00c10001 00c10001
+istri 3B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 3B aaaaaaaaaaaaaaaa aaaaaaaa00aaaaaa -> 00810003 00810003
+istri 3B aaaaaaaaaaaa00aa aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 3B 0000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 3B 8000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 3B 0000000000000001 aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 3B 0000000000000000 aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 3B aaaaaaaaaaaaaaaa 0000000000000000 -> 08810000 08810000
+istri 09 0000000000000000 0000000000000000 -> 08c10000 08c10000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 00010001 00010001
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 08010000 08010000
+istri 09 b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 09 aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 08810000 08810000
+istri 09 aaaaaaaaaaaa00aa aaaaaaaaaaaa00aa -> 08c10000 08c10000
+istri 09 aaaaaaaa00aaaaaa aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 08810000 08810000
+istri 09 aaaaaaaa00aaaaaa aaaaaaaaaaaa00aa -> 08c10000 08c10000
+istri 09 aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 09 aaaaaaaaaaaaaaaa aaaaaaaa00aaaaaa -> 08810000 08810000
+istri 09 aaaaaaaaaaaa00aa aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 09 0000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 09 8000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 09 0000000000000001 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 09 0000000000000000 aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 09 aaaaaaaaaaaaaaaa 0000000000000000 -> 00800008 00800008
+istri 1B 0000000000000000 0000000000000000 -> 00c00008 00c00008
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 1B aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010005 00010005
+istri 1B aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 00010003 00010003
+istri 1B aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 00010001 00010001
+istri 1B aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 00010005 00010005
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 00010003 00010003
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 08010000 08010000
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 1B baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 1B b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 1B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 1B b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 1B b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 00010001 00010001
+istri 1B b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 00010001 00010001
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 1B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00410001 00410001
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 1B aaaaaaaaaaaa00aa aaaaaaaaaaaa00aa -> 00c00008 00c00008
+istri 1B aaaaaaaa00aaaaaa aaaaaaaaaaaaaaaa -> 00410003 00410003
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 1B aaaaaaaa00aaaaaa aaaaaaaaaaaa00aa -> 00c10001 00c10001
+istri 1B aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00410001 00410001
+istri 1B aaaaaaaaaaaaaaaa aaaaaaaa00aaaaaa -> 00810003 00810003
+istri 1B aaaaaaaaaaaa00aa aaaaaaaa00aaaaaa -> 00c10001 00c10001
+istri 1B 0000000000000000 aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 1B 8000000000000000 aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 1B 0000000000000001 aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 1B 0000000000000000 aaaaaaaaaaaaaaaa -> 08410000 08410000
+istri 1B aaaaaaaaaaaaaaaa 0000000000000000 -> 08810000 08810000
+istri 03 aacdacbdaacdaacd 00000000000000aa -> 00810001 00810001
+istri 03 aabbaabbaabbaabb 00000000000000bb -> 08810000 08810000
+istri 03 aabbccddaabbccdd 000000000000aabb -> 00810002 00810002
+istri 03 abcdabc0abcdabcd 000000000000abcd -> 08810000 08810000
+istri 03 aabbccddaabbccdd 00000000aabbccdd -> 08810000 08810000
+istri 03 00bbccddaabbccdd 00000000aabbccdd -> 08c10000 08c10000
+istri 03 aabbccddaa00ccdd 00000000aabbccdd -> 08c10000 08c10000
+istri 03 aabbccddaabb00dd 00000000aabbccdd -> 08c10000 08c10000
+istri 03 aabbccddaabbcc00 00000000aabbccdd -> 00c00008 00c00008
+istri 03 aabbccddaabbccdd 00000000aabbccdd -> 08810000 08810000
+istri 03 aabbccddaabbccdd 00000000aa00ccdd -> 08810000 08810000
+istri 03 aabbccddaabbccdd 00000000aabb00dd -> 08810000 08810000
+istri 03 aabbccddaabbccdd 00000000aabbcc00 -> 00800008 00800008
+istri 03 0000000000000000 0000000000000000 -> 00c00008 00c00008
+istri 03 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 03 0000abcdabcdabcd 000000000000abcd -> 08c10000 08c10000
+istri 03 0000abcdabcdabcd 000000000000dcba -> 00c00008 00c00008
+istri 03 0000aabbaabbaabb 000000000000bbbb -> 08c10000 08c10000
+istri 03 0000ccddaabbccdd 00000000bbaabbaa -> 00c10002 00c10002
+istri 03 0000ccddaabbccdd 000000bbaabbaa00 -> 00c00008 00c00008
+istri 03 0ddc0ffeebadf00d 00000000cafebabe -> 00810004 00810004
+istri 03 0ddc0ffeebadfeed 00000000cafebabe -> 00810001 00810001
+istri 0D 11111111abcdef11 0000000000abcdef -> 00810001 00810001
+istri 0D 11111111abcdef11 00abcdef00abcdef -> 00810001 00810001
+istri 0D 11111111abcdef11 0000000000abcdef -> 00810001 00810001
+istri 0D 1111111111abcdef 0000000000abcdef -> 08810000 08810000
+istri 0D 111111111111abcd 0000000000abcdef -> 00800008 00800008
+istri 0D 1111abcd11abcd11 000000000000abcd -> 00810001 00810001
+istri 0D 11abcd1111abcd11 000000000000abcd -> 00810001 00810001
+istri 0D abcd111111abcd11 000000000000abcd -> 00810001 00810001
+istri 0D cd11111111abcd11 000000000000abcd -> 00810001 00810001
+istri 0D 01abcd11abcd1111 000000000000abcd -> 00810002 00810002
+istri 0D 00abcd11abcd1111 000000000000abcd -> 00c10002 00c10002
+istri 0D 0000cd11abcd1111 000000000000abcd -> 00c10002 00c10002
+istri 0D 00abcd1100abcd11 000000000000abcd -> 00c10001 00c10001
+istri 0D 00abcd110000cd11 000000000000abcd -> 00c00008 00c00008
+istri 0D 1111111111111234 0000000000000000 -> 08810000 08810000
+istri 0D 1111111111111234 0000000000000011 -> 00810002 00810002
+istri 0D 1111111111111234 0000000000001111 -> 00810002 00810002
+istri 0D 1111111111111234 1111111111111234 -> 08010000 08010000
+istri 0D 0a11111111111111 000000000000000a -> 00810007 00810007
+istri 0D 0b11111111111111 000000000000000a -> 00800008 00800008
+istri 13 aacdacbdaacdaacd 00000000000000aa -> 08810000 08810000
+istri 13 aabbaabbaabbaabb 00000000000000bb -> 00810001 00810001
+istri 13 aabbccddaabbccdd 000000000000aabb -> 08810000 08810000
+istri 13 abcdabc0abcdabcd 000000000000abcd -> 00810004 00810004
+istri 13 aabbccddaabbccdd 00000000aabbccdd -> 00800008 00800008
+istri 13 00bbccddaabbccdd 00000000aabbccdd -> 00c10007 00c10007
+istri 13 aabbccddaa00ccdd 00000000aabbccdd -> 00c10002 00c10002
+istri 13 aabbccddaabb00dd 00000000aabbccdd -> 00c10001 00c10001
+istri 13 aabbccddaabbcc00 00000000aabbccdd -> 08c10000 08c10000
+istri 13 aabbccddaabbccdd 00000000aabbccdd -> 00800008 00800008
+istri 13 aabbccddaabbccdd 00000000aa00ccdd -> 00810002 00810002
+istri 13 aabbccddaabbccdd 00000000aabb00dd -> 00810001 00810001
+istri 13 aabbccddaabbccdd 00000000aabbcc00 -> 08810000 08810000
+istri 13 0000000000000000 0000000000000000 -> 08c10000 08c10000
+istri 13 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 13 0000abcdabcdabcd 000000000000abcd -> 00c10006 00c10006
+istri 13 0000abcdabcdabcd 000000000000dcba -> 08c10000 08c10000
+istri 13 0000aabbaabbaabb 000000000000bbbb -> 00c10001 00c10001
+istri 13 0000ccddaabbccdd 00000000bbaabbaa -> 08c10000 08c10000
+istri 13 0000ccddaabbccdd 000000bbaabbaa00 -> 08c10000 08c10000
+istri 13 0ddc0ffeebadf00d 00000000cafebabe -> 08810000 08810000
+istri 13 0ddc0ffeebadfeed 00000000cafebabe -> 08810000 08810000
+istri 45 aaaabbbbccccdddd 000000000000bbcc -> 00800008 00800008
+istri 45 aaaabbbbccccdddd 000000000000ccbb -> 00810005 00810005
+istri 45 baaabbbbccccdddd 000000000000ccbb -> 00810005 00810005
+istri 45 baaabbbbccccdddc 000000000000ccbb -> 00810005 00810005
+istri 45 bbbbbbbbbbbbbbbb 000000000000ccbb -> 08810007 08810007
+istri 45 bbbbbbbb00bbbbbb 000000000000ccbb -> 08c10002 08c10002
+istri 45 bbbbbbbbbbbb00bb 000000000000ccbb -> 08c10000 08c10000
+istri 45 bbbbbbbbbbbbbb00 000000000000ccbb -> 00c00008 00c00008
+istri 45 0000000000000000 000000000000ccbb -> 00c00008 00c00008
+istri 45 0000000000000000 0000000000000000 -> 00c00008 00c00008
+istri 45 bbbbbbbbbbbbbbbb 000000000000ccbb -> 08810007 08810007
+istri 45 bbbbbbbbbbbbbbbb 00000000000000bb -> 00800008 00800008
+istri 45 bb44bb44bb44bb44 000000006622ccbb -> 08810007 08810007
+istri 45 bb44bb44bb44bb44 000000000022ccbb -> 00810007 00810007
+istri 45 bb44bb44bb44bb44 000000000000ccbb -> 00810007 00810007
+istri 45 bb44bb44bb44bb44 00000000000000bb -> 00800008 00800008
+istri 45 0011223344556677 0000997755442211 -> 08c10006 08c10006
+istri 45 1122334455667711 0000997755442211 -> 08810007 08810007
+istri 45 0011223344556677 0000aa8866553322 -> 00c10005 00c10005
+istri 45 1122334455667711 0000aa8866553322 -> 00810006 00810006
+istri 01 aacdacbdaacdaacd 00000000000000aa -> 00810001 00810001
+istri 01 aabbaabbaabbaabb 00000000000000bb -> 08810000 08810000
+istri 01 aabbccddaabbccdd 000000000000aabb -> 00810002 00810002
+istri 01 abcdabc0abcdabcd 000000000000abcd -> 08810000 08810000
+istri 01 aabbccddaabbccdd 00000000aabbccdd -> 08810000 08810000
+istri 01 00bbccddaabbccdd 00000000aabbccdd -> 08c10000 08c10000
+istri 01 aabbccddaa00ccdd 00000000aabbccdd -> 08c10000 08c10000
+istri 01 aabbccddaabb00dd 00000000aabbccdd -> 08c10000 08c10000
+istri 01 aabbccddaabbcc00 00000000aabbccdd -> 00c00008 00c00008
+istri 01 aabbccddaabbccdd 00000000aabbccdd -> 08810000 08810000
+istri 01 aabbccddaabbccdd 00000000aa00ccdd -> 08810000 08810000
+istri 01 aabbccddaabbccdd 00000000aabb00dd -> 08810000 08810000
+istri 01 aabbccddaabbccdd 00000000aabbcc00 -> 00800008 00800008
+istri 01 0000000000000000 0000000000000000 -> 00c00008 00c00008
+istri 01 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 01 0000abcdabcdabcd 000000000000abcd -> 08c10000 08c10000
+istri 01 0000abcdabcdabcd 000000000000dcba -> 00c00008 00c00008
+istri 01 0000aabbaabbaabb 000000000000bbbb -> 08c10000 08c10000
+istri 01 0000ccddaabbccdd 00000000bbaabbaa -> 00c10002 00c10002
+istri 01 0000ccddaabbccdd 000000bbaabbaa00 -> 00c00008 00c00008
+istri 01 0ddc0ffeebadf00d 00000000cafebabe -> 00810004 00810004
+istri 01 0ddc0ffeebadfeed 00000000cafebabe -> 00810001 00810001
+istri 39 0000000000000000 0000000000000000 -> 08c10000 08c10000
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 39 aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010005 00010005
+istri 39 aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 00010003 00010003
+istri 39 aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 00010001 00010001
+istri 39 aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 00010005 00010005
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 00010003 00010003
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 08010000 08010000
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 39 baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 39 b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010007 00010007
+istri 39 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 39 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006
+istri 39 b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 00010001 00010001
+istri 39 b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 00010001 00010001
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000008 00000008
+istri 39 aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 39 aaaaaaaaaaaa00aa aaaaaaaaaaaa00aa -> 00c10001 00c10001
+istri 39 aaaaaaaa00aaaaaa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaaaaaa00aa -> 00810001 00810001
+istri 39 aaaaaaaa00aaaaaa aaaaaaaaaaaa00aa -> 00c10001 00c10001
+istri 39 aaaaaaaaaaaa00aa aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 39 aaaaaaaaaaaaaaaa aaaaaaaa00aaaaaa -> 00810003 00810003
+istri 39 aaaaaaaaaaaa00aa aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 39 0000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 39 8000000000000000 aaaaaaaa00aaaaaa -> 00c10003 00c10003
+istri 39 0000000000000001 aaaaaaaa00aaaaaa -> 08c10000 08c10000
+istri 39 0000000000000000 aaaaaaaaaaaaaaaa -> 00400008 00400008
+istri 39 aaaaaaaaaaaaaaaa 0000000000000000 -> 08810000 08810000
diff --git a/none/tests/amd64/pcmpstr64w.vgtest b/none/tests/amd64/pcmpstr64w.vgtest
new file mode 100644
index 0000000..d088a43
--- /dev/null
+++ b/none/tests/amd64/pcmpstr64w.vgtest
@@ -0,0 +1,3 @@
+prog: pcmpstr64w
+prereq: ../../../tests/x86_amd64_features amd64-sse42
+vgopts: -q
diff --git a/none/tests/amd64/pcmpxstrx64w.c b/none/tests/amd64/pcmpxstrx64w.c
new file mode 100644
index 0000000..f44b9e2
--- /dev/null
+++ b/none/tests/amd64/pcmpxstrx64w.c
@@ -0,0 +1,335 @@
+
+/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
+ check the core arithmetic in any detail. This file checks the 16-bit
+ character versions (w is for wide) */
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef unsigned char V128[16];
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned long long int ULong;
+typedef UChar Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+void show_V128 ( V128* vec )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (UInt)( (*vec)[i] ));
+}
+
+void expand ( V128* dst, char* summary )
+{
+ Int i;
+ assert( strlen(summary) == 16 );
+ for (i = 0; i < 16; i++) {
+ UChar xx = 0;
+ UChar x = summary[15-i];
+ if (x >= '0' && x <= '9') { xx = x - '0'; }
+ else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
+ else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
+ else assert(0);
+
+ assert(xx < 16);
+ xx = (xx << 4) | xx;
+ assert(xx < 256);
+ (*dst)[i] = xx;
+ }
+}
+
+void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
+{
+ V128 argL, argR;
+ expand( &argL, summL );
+ expand( &argR, summR );
+ printf("\n");
+ printf("rdx %016llx argL ", rdxIN);
+ show_V128(&argL);
+ printf(" rax %016llx argR ", raxIN);
+ show_V128(&argR);
+ printf("\n");
+
+ ULong block[ 2/*in:argL*/ // 0 0
+ + 2/*in:argR*/ // 2 16
+ + 1/*in:rdx*/ // 4 32
+ + 1/*in:rax*/ // 5 40
+ + 2/*inout:xmm0*/ // 6 48
+ + 1/*inout:rcx*/ // 8 64
+ + 1/*out:rflags*/ ]; // 9 72
+ assert(sizeof(block) == 80);
+
+ UChar* blockC = (UChar*)&block[0];
+
+ /* ---------------- ISTRI_4B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistri $0x4B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istri $0x4B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRI_0B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistri $0x0B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istri $0x0B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRM_4B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistrm $0x4B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istrm $0x4B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRM_0B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistrm $0x0B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istrm $0x0B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRI_4B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestri $0x4B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estri $0x4B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRI_0B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestri $0x0B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estri $0x0B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRM_4B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestrm $0x4B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estrm $0x4B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRM_0B ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestrm $0x0B, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estrm $0x0B: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+
+
+
+}
+
+int main ( void )
+{
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
+ one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
+
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
+
+ one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
+
+ return 0;
+}
diff --git a/none/tests/amd64/pcmpxstrx64w.stderr.exp b/none/tests/amd64/pcmpxstrx64w.stderr.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/none/tests/amd64/pcmpxstrx64w.stderr.exp
diff --git a/none/tests/amd64/pcmpxstrx64w.stdout.exp b/none/tests/amd64/pcmpxstrx64w.stdout.exp
new file mode 100644
index 0000000..d19ebdd
--- /dev/null
+++ b/none/tests/amd64/pcmpxstrx64w.stdout.exp
@@ -0,0 +1,210 @@
+
+rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaa0000aaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550002 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 00000000000000000000ffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 00000000000000000000000000000007 rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 000008c1
+
+rdx 0000000000000000 argL 00000000000000000000000000000000 rax 0000000000000000 argR aaaaaaaaaaaaaaaa0000aaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000000c1
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 000000c1
+ istrm $0x4B: xmm0 ffffffffffffffffffff000000000000 rcx 5555555555555555 flags 000000c1
+ istrm $0x0B: xmm0 000000000000000000000000000000f8 rcx 5555555555555555 flags 000000c1
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 000008c1
+
+rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 000008c1
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000000c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 000000c1
+ estrm $0x4B: xmm0 ffffffffffff00000000000000000000 rcx 5555555555555555 flags 000000c1
+ estrm $0x0B: xmm0 000000000000000000000000000000e0 rcx 5555555555555555 flags 000000c1
+
+rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000000c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550006 flags 000000c1
+ estrm $0x4B: xmm0 ffffffff000000000000000000000000 rcx 5555555555555555 flags 000000c1
+ estrm $0x0B: xmm0 000000000000000000000000000000c0 rcx 5555555555555555 flags 000000c1
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffff00000000ffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000cf rcx 5555555555555555 flags 000008c1
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 000000000000000f argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000010 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000011 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffffa argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffff00000000ffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000cf rcx 5555555555555555 flags 000008c1
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffff1 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffff0 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax ffffffffffffffef argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+
+rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffff00000000ffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000cf rcx 5555555555555555 flags 000008c1
+
+rdx 000000000000000f argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
+
+rdx 0000000000000010 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
+
+rdx 0000000000000011 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000881
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000881
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
+
+rdx fffffffffffffffb argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000801
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000801
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000008c1
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1
+ estrm $0x4B: xmm0 ffffffff00000000ffffffffffffffff rcx 5555555555555555 flags 000008c1
+ estrm $0x0B: xmm0 000000000000000000000000000000cf rcx 5555555555555555 flags 000008c1
+
+rdx fffffffffffffff1 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000801
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000801
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
+
+rdx fffffffffffffff0 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000801
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000801
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
+
+rdx ffffffffffffffef argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ istri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000801
+ istri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801
+ istrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801
+ istrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000801
+ estri $0x4B: xmm0 55555555555555555555555555555555 rcx 5555555555550003 flags 00000841
+ estri $0x0B: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841
+ estrm $0x4B: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000841
+ estrm $0x0B: xmm0 0000000000000000000000000000000f rcx 5555555555555555 flags 00000841
diff --git a/none/tests/amd64/pcmpxstrx64w.vgtest b/none/tests/amd64/pcmpxstrx64w.vgtest
new file mode 100644
index 0000000..4b49c51
--- /dev/null
+++ b/none/tests/amd64/pcmpxstrx64w.vgtest
@@ -0,0 +1,3 @@
+prog: pcmpxstrx64w
+prereq: ../../../tests/x86_amd64_features amd64-sse42
+vgopts: -q