Improve accuracy of definedness tracking through the x86 PMOVMSKB and
BSF instructions, as the lack of it causes false positives (VEX side).
Fixes #308627. Combined efforts of Patrick J. LoPresti
<lopresti@gmail.com> and me.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2559 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_amd64_defs.h b/priv/guest_amd64_defs.h
index bbcc95f..f687d90 100644
--- a/priv/guest_amd64_defs.h
+++ b/priv/guest_amd64_defs.h
@@ -141,8 +141,6 @@
extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong );
extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong );
-extern ULong amd64g_calculate_mmx_pmovmskb ( ULong );
-extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
extern ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi );
diff --git a/priv/guest_amd64_helpers.c b/priv/guest_amd64_helpers.c
index 5f2c6c5..8cc11ad 100644
--- a/priv/guest_amd64_helpers.c
+++ b/priv/guest_amd64_helpers.c
@@ -2998,21 +2998,6 @@
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
-{
- ULong r = 0;
- if (xx & (1ULL << (64-1))) r |= (1<<7);
- if (xx & (1ULL << (56-1))) r |= (1<<6);
- if (xx & (1ULL << (48-1))) r |= (1<<5);
- if (xx & (1ULL << (40-1))) r |= (1<<4);
- if (xx & (1ULL << (32-1))) r |= (1<<3);
- if (xx & (1ULL << (24-1))) r |= (1<<2);
- if (xx & (1ULL << (16-1))) r |= (1<<1);
- if (xx & (1ULL << ( 8-1))) r |= (1<<0);
- return r;
-}
-
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
{
UInt t = 0;
@@ -3029,14 +3014,6 @@
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
-{
- ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
- ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
- return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
-}
-
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
{
UShort t, min;
diff --git a/priv/guest_amd64_toIR.c b/priv/guest_amd64_toIR.c
index 7474802..84378f8 100644
--- a/priv/guest_amd64_toIR.c
+++ b/priv/guest_amd64_toIR.c
@@ -7789,11 +7789,15 @@
/* First, widen src to 64 bits if it is not already. */
assign( src64, widenUto64(mkexpr(src)) );
- /* Generate an 8-bit expression which is zero iff the
- original is zero, and nonzero otherwise */
+ /* Generate an 8-bit expression which is zero iff the original is
+ zero, and nonzero otherwise. Ask for a CmpNE version which, if
+ instrumented by Memcheck, is instrumented expensively, since
+ this may be used on the output of a preceding movmskb insn,
+ which has been known to be partially defined, and in need of
+ careful handling. */
assign( src8,
unop(Iop_1Uto8,
- binop(Iop_CmpNE64,
+ binop(Iop_ExpCmpNE64,
mkexpr(src64), mkU64(0))) );
/* Flags: Z is 1 iff source value is zero. All others
@@ -10277,14 +10281,15 @@
UInt rG = gregOfRexRM(pfx,modrm);
IRTemp t0 = newTemp(Ity_I64);
IRTemp t1 = newTemp(Ity_I64);
- IRTemp t5 = newTemp(Ity_I64);
+ IRTemp t5 = newTemp(Ity_I32);
assign(t0, getXMMRegLane64(rE, 0));
assign(t1, getXMMRegLane64(rE, 1));
- assign(t5, mkIRExprCCall( Ity_I64, 0/*regparms*/,
- "amd64g_calculate_sse_pmovmskb",
- &amd64g_calculate_sse_pmovmskb,
- mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
- putIReg32(rG, unop(Iop_64to32,mkexpr(t5)));
+ assign(t5,
+ unop(Iop_16Uto32,
+ binop(Iop_8HLto16,
+ unop(Iop_GetMSBs8x8, mkexpr(t1)),
+ unop(Iop_GetMSBs8x8, mkexpr(t0)))));
+ putIReg32(rG, mkexpr(t5));
DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
nameIReg32(rG));
delta += 1;
@@ -13443,7 +13448,7 @@
}
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
/* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
- mmx(G), turn them into a byte, and put zero-extend of it in
+ mmx(E), turn them into a byte, and put zero-extend of it in
ireg(G). */
if (haveNo66noF2noF3(pfx)
&& (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
@@ -13451,14 +13456,10 @@
if (epartIsReg(modrm)) {
do_MMX_preamble();
t0 = newTemp(Ity_I64);
- t1 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I32);
assign(t0, getMMXReg(eregLO3ofRM(modrm)));
- assign(t1, mkIRExprCCall(
- Ity_I64, 0/*regparms*/,
- "amd64g_calculate_mmx_pmovmskb",
- &amd64g_calculate_mmx_pmovmskb,
- mkIRExprVec_1(mkexpr(t0))));
- putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
+ assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
+ putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
nameIReg32(gregOfRexRM(pfx,modrm)));
delta += 1;
diff --git a/priv/guest_x86_defs.h b/priv/guest_x86_defs.h
index af83cb7..a47040a 100644
--- a/priv/guest_x86_defs.h
+++ b/priv/guest_x86_defs.h
@@ -134,8 +134,6 @@
extern ULong x86g_calculate_mmx_pmaddwd ( ULong, ULong );
extern ULong x86g_calculate_mmx_psadbw ( ULong, ULong );
-extern UInt x86g_calculate_mmx_pmovmskb ( ULong );
-extern UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
/* --- DIRTY HELPERS --- */
diff --git a/priv/guest_x86_helpers.c b/priv/guest_x86_helpers.c
index 4676276..35938c9 100644
--- a/priv/guest_x86_helpers.c
+++ b/priv/guest_x86_helpers.c
@@ -2514,21 +2514,6 @@
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
-{
- UInt r = 0;
- if (xx & (1ULL << (64-1))) r |= (1<<7);
- if (xx & (1ULL << (56-1))) r |= (1<<6);
- if (xx & (1ULL << (48-1))) r |= (1<<5);
- if (xx & (1ULL << (40-1))) r |= (1<<4);
- if (xx & (1ULL << (32-1))) r |= (1<<3);
- if (xx & (1ULL << (24-1))) r |= (1<<2);
- if (xx & (1ULL << (16-1))) r |= (1<<1);
- if (xx & (1ULL << ( 8-1))) r |= (1<<0);
- return r;
-}
-
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
{
UInt t = 0;
@@ -2544,14 +2529,6 @@
return (ULong)t;
}
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
-{
- UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
- UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
- return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
-}
-
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with segment overrides. ---*/
diff --git a/priv/guest_x86_toIR.c b/priv/guest_x86_toIR.c
index e98762d..1a17d89 100644
--- a/priv/guest_x86_toIR.c
+++ b/priv/guest_x86_toIR.c
@@ -716,6 +716,7 @@
|| op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
|| op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
|| op8 == Iop_CasCmpNE8
+ || op8 == Iop_ExpCmpNE8
|| op8 == Iop_Not8);
adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
return adj + op8;
@@ -6385,10 +6386,14 @@
( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
nameIReg(sz, gregOfRM(modrm)));
- /* Generate an 8-bit expression which is zero iff the
- original is zero, and nonzero otherwise */
+ /* Generate an 8-bit expression which is zero iff the original is
+ zero, and nonzero otherwise. Ask for a CmpNE version which, if
+ instrumented by Memcheck, is instrumented expensively, since
+ this may be used on the output of a preceding movmskb insn,
+ which has been known to be partially defined, and in need of
+ careful handling. */
assign( src8,
- unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8),
+ unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_ExpCmpNE8),
mkexpr(src), mkU(ty,0))) );
/* Flags: Z is 1 iff source value is zero. All others
@@ -9051,7 +9056,7 @@
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
/* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
- mmx(G), turn them into a byte, and put zero-extend of it in
+ mmx(E), turn them into a byte, and put zero-extend of it in
ireg(G). */
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
modrm = insn[2];
@@ -9060,11 +9065,7 @@
t0 = newTemp(Ity_I64);
t1 = newTemp(Ity_I32);
assign(t0, getMMXReg(eregOfRM(modrm)));
- assign(t1, mkIRExprCCall(
- Ity_I32, 0/*regparms*/,
- "x86g_calculate_mmx_pmovmskb",
- &x86g_calculate_mmx_pmovmskb,
- mkIRExprVec_1(mkexpr(t0))));
+ assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
putIReg(4, gregOfRM(modrm), mkexpr(t1));
DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
nameIReg(4,gregOfRM(modrm)));
@@ -10903,11 +10904,9 @@
goto decode_success;
}
- /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
- xmm(G), turn them into a byte, and put zero-extend of it in
- ireg(G). Doing this directly is just too cumbersome; give up
- therefore and call a helper. */
- /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
+ /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
+ in xmm(E), turn them into a byte, and put zero-extend of it in
+ ireg(G). */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
modrm = insn[2];
if (epartIsReg(modrm)) {
@@ -10916,11 +10915,11 @@
assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
t5 = newTemp(Ity_I32);
- assign(t5, mkIRExprCCall(
- Ity_I32, 0/*regparms*/,
- "x86g_calculate_sse_pmovmskb",
- &x86g_calculate_sse_pmovmskb,
- mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
+ assign(t5,
+ unop(Iop_16Uto32,
+ binop(Iop_8HLto16,
+ unop(Iop_GetMSBs8x8, mkexpr(t1)),
+ unop(Iop_GetMSBs8x8, mkexpr(t0)))));
putIReg(4, gregOfRM(modrm), mkexpr(t5));
DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
nameIReg(4,gregOfRM(modrm)));
diff --git a/priv/host_amd64_isel.c b/priv/host_amd64_isel.c
index 1296390..98e90f7 100644
--- a/priv/host_amd64_isel.c
+++ b/priv/host_amd64_isel.c
@@ -791,7 +791,7 @@
This should handle expressions of 64, 32, 16 and 8-bit type. All
results are returned in a 64-bit register. For 32-, 16- and 8-bit
- expressions, the upper 32/16/24 bits are arbitrary, so you should
+ expressions, the upper 32/48/56 bits are arbitrary, so you should
mask or sign extend partial values if necessary.
*/
@@ -1586,6 +1586,25 @@
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
+ case Iop_GetMSBs8x8: {
+ /* Note: the following assumes the helper is of
+ signature
+ UInt fn ( ULong ), and is not a regparm fn.
+ */
+ HReg dst = newVRegI(env);
+ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ fn = (HWord)h_generic_calc_GetMSBs8x8;
+ addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
+ /* MovxLQ is not exactly the right thing here. We just
+ need to get the bottom 8 bits of RAX into dst, and zero
+ out everything else. Assuming that the helper returns
+ a UInt with the top 24 bits zeroed out, it'll do,
+ though. */
+ addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
+ return dst;
+ }
+
default:
break;
}
@@ -2223,13 +2242,15 @@
|| e->Iex.Binop.op == Iop_CmpLE64S
|| e->Iex.Binop.op == Iop_CmpLE64U
|| e->Iex.Binop.op == Iop_CasCmpEQ64
- || e->Iex.Binop.op == Iop_CasCmpNE64)) {
+ || e->Iex.Binop.op == Iop_CasCmpNE64
+ || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
- case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
+ case Iop_CmpNE64:
+ case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
case Iop_CmpLT64S: return Acc_L;
case Iop_CmpLT64U: return Acc_B;
case Iop_CmpLE64S: return Acc_LE;
diff --git a/priv/host_generic_simd64.c b/priv/host_generic_simd64.c
index b70ce88..fdc9eed 100644
--- a/priv/host_generic_simd64.c
+++ b/priv/host_generic_simd64.c
@@ -1169,6 +1169,20 @@
);
}
+UInt h_generic_calc_GetMSBs8x8 ( ULong xx )
+{
+ UInt r = 0;
+ if (xx & (1ULL << (64-1))) r |= (1<<7);
+ if (xx & (1ULL << (56-1))) r |= (1<<6);
+ if (xx & (1ULL << (48-1))) r |= (1<<5);
+ if (xx & (1ULL << (40-1))) r |= (1<<4);
+ if (xx & (1ULL << (32-1))) r |= (1<<3);
+ if (xx & (1ULL << (24-1))) r |= (1<<2);
+ if (xx & (1ULL << (16-1))) r |= (1<<1);
+ if (xx & (1ULL << ( 8-1))) r |= (1<<0);
+ return r;
+}
+
/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
/* Tuple/select functions for 16x2 vectors. */
diff --git a/priv/host_generic_simd64.h b/priv/host_generic_simd64.h
index 0858583..deef944 100644
--- a/priv/host_generic_simd64.h
+++ b/priv/host_generic_simd64.h
@@ -123,6 +123,8 @@
extern ULong h_generic_calc_Min16Sx4 ( ULong, ULong );
extern ULong h_generic_calc_Min8Ux8 ( ULong, ULong );
+extern UInt h_generic_calc_GetMSBs8x8 ( ULong );
+
/* 32-bit SIMD HELPERS */
extern UInt h_generic_calc_Add16x2 ( UInt, UInt );
diff --git a/priv/host_x86_isel.c b/priv/host_x86_isel.c
index d342d92..5513d71 100644
--- a/priv/host_x86_isel.c
+++ b/priv/host_x86_isel.c
@@ -1293,6 +1293,23 @@
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
+ case Iop_GetMSBs8x8: {
+ /* Note: the following assumes the helper is of
+ signature
+ UInt fn ( ULong ), and is not a regparm fn.
+ */
+ HReg xLo, xHi;
+ HReg dst = newVRegI(env);
+ HWord fn = (HWord)h_generic_calc_GetMSBs8x8;
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
+ addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
+ add_to_esp(env, 2*4);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
+ return dst;
+ }
+
default:
break;
}
@@ -1840,7 +1857,8 @@
&& (e->Iex.Binop.op == Iop_CmpEQ16
|| e->Iex.Binop.op == Iop_CmpNE16
|| e->Iex.Binop.op == Iop_CasCmpEQ16
- || e->Iex.Binop.op == Iop_CasCmpNE16)) {
+ || e->Iex.Binop.op == Iop_CasCmpNE16
+ || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
HReg r = newVRegI(env);
@@ -1848,9 +1866,12 @@
addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
switch (e->Iex.Binop.op) {
- case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
- case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
- default: vpanic("iselCondCode(x86): CmpXX16");
+ case Iop_CmpEQ16: case Iop_CasCmpEQ16:
+ return Xcc_Z;
+ case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
+ return Xcc_NZ;
+ default:
+ vpanic("iselCondCode(x86): CmpXX16");
}
}
@@ -1882,13 +1903,15 @@
|| e->Iex.Binop.op == Iop_CmpLE32S
|| e->Iex.Binop.op == Iop_CmpLE32U
|| e->Iex.Binop.op == Iop_CasCmpEQ32
- || e->Iex.Binop.op == Iop_CasCmpNE32)) {
+ || e->Iex.Binop.op == Iop_CasCmpNE32
+ || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
- case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
+ case Iop_CmpNE32:
+ case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
case Iop_CmpLT32S: return Xcc_L;
case Iop_CmpLT32U: return Xcc_B;
case Iop_CmpLE32S: return Xcc_LE;
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index 99b2266..b356f60 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -147,6 +147,8 @@
str = "CasCmpEQ"; base = Iop_CasCmpEQ8; break;
case Iop_CasCmpNE8 ... Iop_CasCmpNE64:
str = "CasCmpNE"; base = Iop_CasCmpNE8; break;
+ case Iop_ExpCmpNE8 ... Iop_ExpCmpNE64:
+ str = "ExpCmpNE"; base = Iop_ExpCmpNE8; break;
case Iop_Not8 ... Iop_Not64:
str = "Not"; base = Iop_Not8; break;
/* other cases must explicitly "return;" */
@@ -581,6 +583,7 @@
case Iop_Reverse64_16x4: vex_printf("Reverse64_16x4"); return;
case Iop_Reverse64_32x2: vex_printf("Reverse64_32x2"); return;
case Iop_Abs32Fx2: vex_printf("Abs32Fx2"); return;
+ case Iop_GetMSBs8x8: vex_printf("GetMSBs8x8"); return;
case Iop_CmpNEZ32x2: vex_printf("CmpNEZ32x2"); return;
case Iop_CmpNEZ16x4: vex_printf("CmpNEZ16x4"); return;
@@ -2271,18 +2274,18 @@
UNARY(Ity_I64, Ity_I64);
case Iop_CmpEQ8: case Iop_CmpNE8:
- case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
+ case Iop_CasCmpEQ8: case Iop_CasCmpNE8: case Iop_ExpCmpNE8:
COMPARISON(Ity_I8);
case Iop_CmpEQ16: case Iop_CmpNE16:
- case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
+ case Iop_CasCmpEQ16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
COMPARISON(Ity_I16);
case Iop_CmpEQ32: case Iop_CmpNE32:
- case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
+ case Iop_CasCmpEQ32: case Iop_CasCmpNE32: case Iop_ExpCmpNE32:
case Iop_CmpLT32S: case Iop_CmpLE32S:
case Iop_CmpLT32U: case Iop_CmpLE32U:
COMPARISON(Ity_I32);
case Iop_CmpEQ64: case Iop_CmpNE64:
- case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
+ case Iop_CasCmpEQ64: case Iop_CasCmpNE64: case Iop_ExpCmpNE64:
case Iop_CmpLT64S: case Iop_CmpLE64S:
case Iop_CmpLT64U: case Iop_CmpLE64U:
COMPARISON(Ity_I64);
@@ -2296,6 +2299,7 @@
case Iop_Left16: UNARY(Ity_I16,Ity_I16);
case Iop_CmpwNEZ32: case Iop_Left32: UNARY(Ity_I32,Ity_I32);
case Iop_CmpwNEZ64: case Iop_Left64: UNARY(Ity_I64,Ity_I64);
+ case Iop_GetMSBs8x8: UNARY(Ity_I64, Ity_I8);
case Iop_MullU8: case Iop_MullS8:
BINARY(Ity_I8,Ity_I8, Ity_I16);
diff --git a/priv/ir_opt.c b/priv/ir_opt.c
index b7e3d9a..1537df6 100644
--- a/priv/ir_opt.c
+++ b/priv/ir_opt.c
@@ -1836,16 +1836,22 @@
/* -- CmpNE -- */
case Iop_CmpNE8:
+ case Iop_CasCmpNE8:
+ case Iop_ExpCmpNE8:
e2 = IRExpr_Const(IRConst_U1(toBool(
((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
!= (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
break;
case Iop_CmpNE32:
+ case Iop_CasCmpNE32:
+ case Iop_ExpCmpNE32:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
!= e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
break;
case Iop_CmpNE64:
+ case Iop_CasCmpNE64:
+ case Iop_ExpCmpNE64:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
!= e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index f399bb2..99eaaaf 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -437,6 +437,10 @@
Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
+ /* Exactly like CmpNE8/16/32/64, but carrying the additional
+ hint that these needs expensive definedness tracking. */
+ Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64,
+
/* -- Ordering not important after here. -- */
/* Widening multiplies */
@@ -991,6 +995,10 @@
is undefined. */
Iop_Perm8x8,
+ /* MISC CONVERSION -- get high bits of each byte lane, a la
+ x86/amd64 pmovmskb */
+ Iop_GetMSBs8x8, /* I64 -> I8 */
+
/* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
See floating-point equiwalents for details. */
Iop_Recip32x2, Iop_Rsqrte32x2,