Change how FXSAVE and FXRSTOR are done, so as to avoid pushing the XMM
register contents themselves through the helper functions. This
avoids the false positives reported in #291310.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2949 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_amd64_defs.h b/priv/guest_amd64_defs.h
index 008638e..c321127 100644
--- a/priv/guest_amd64_defs.h
+++ b/priv/guest_amd64_defs.h
@@ -170,8 +170,10 @@
extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
-extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord );
-extern VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State*, HWord );
+extern void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM
+ ( VexGuestAMD64State*, HWord );
+extern VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM
+ ( VexGuestAMD64State*, HWord );
extern ULong amd64g_dirtyhelper_RDTSC ( void );
extern void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st );
diff --git a/priv/guest_amd64_helpers.c b/priv/guest_amd64_helpers.c
index 6ca67e9..4ed9fdd 100644
--- a/priv/guest_amd64_helpers.c
+++ b/priv/guest_amd64_helpers.c
@@ -1723,7 +1723,8 @@
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest state, writes guest mem) */
/* NOTE: only handles 32-bit format (no REX.W on the insn) */
-void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
+void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
+ HWord addr )
{
/* Derived from values obtained from
vendor_id : AuthenticAMD
@@ -1738,7 +1739,6 @@
Fpu_State tmp;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
- U128* xmm = (U128*)(addr + 160);
UInt mxcsr;
UShort fp_tags;
UInt summary_tags;
@@ -1803,76 +1803,30 @@
}
/* That's the first 160 bytes of the image done. Now only %xmm0
- .. %xmm15 remain to be copied. If the host is big-endian, these
- need to be byte-swapped. */
- vassert(host_is_little_endian());
-
-# define COPY_U128(_dst,_src) \
- do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
- _dst[2] = _src[2]; _dst[3] = _src[3]; } \
- while (0)
-
- COPY_U128( xmm[0], gst->guest_YMM0 );
- COPY_U128( xmm[1], gst->guest_YMM1 );
- COPY_U128( xmm[2], gst->guest_YMM2 );
- COPY_U128( xmm[3], gst->guest_YMM3 );
- COPY_U128( xmm[4], gst->guest_YMM4 );
- COPY_U128( xmm[5], gst->guest_YMM5 );
- COPY_U128( xmm[6], gst->guest_YMM6 );
- COPY_U128( xmm[7], gst->guest_YMM7 );
- COPY_U128( xmm[8], gst->guest_YMM8 );
- COPY_U128( xmm[9], gst->guest_YMM9 );
- COPY_U128( xmm[10], gst->guest_YMM10 );
- COPY_U128( xmm[11], gst->guest_YMM11 );
- COPY_U128( xmm[12], gst->guest_YMM12 );
- COPY_U128( xmm[13], gst->guest_YMM13 );
- COPY_U128( xmm[14], gst->guest_YMM14 );
- COPY_U128( xmm[15], gst->guest_YMM15 );
-
-# undef COPY_U128
+ .. %xmm15 remain to be copied, and we let the generated IR do
+ that, so as to make Memcheck's definedness flow for the non-XMM
+ parts independant from that of the all the other control and
+ status words in the structure. This avoids the false positives
+ shown in #291310. */
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state, reads guest mem) */
-VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
+VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
+ HWord addr )
{
Fpu_State tmp;
VexEmNote warnX87 = EmNote_NONE;
VexEmNote warnXMM = EmNote_NONE;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
- U128* xmm = (U128*)(addr + 160);
UShort fp_tags;
Int r, stno, i;
- /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
- to be byte-swapped. */
- vassert(host_is_little_endian());
-
-# define COPY_U128(_dst,_src) \
- do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
- _dst[2] = _src[2]; _dst[3] = _src[3]; } \
- while (0)
-
- COPY_U128( gst->guest_YMM0, xmm[0] );
- COPY_U128( gst->guest_YMM1, xmm[1] );
- COPY_U128( gst->guest_YMM2, xmm[2] );
- COPY_U128( gst->guest_YMM3, xmm[3] );
- COPY_U128( gst->guest_YMM4, xmm[4] );
- COPY_U128( gst->guest_YMM5, xmm[5] );
- COPY_U128( gst->guest_YMM6, xmm[6] );
- COPY_U128( gst->guest_YMM7, xmm[7] );
- COPY_U128( gst->guest_YMM8, xmm[8] );
- COPY_U128( gst->guest_YMM9, xmm[9] );
- COPY_U128( gst->guest_YMM10, xmm[10] );
- COPY_U128( gst->guest_YMM11, xmm[11] );
- COPY_U128( gst->guest_YMM12, xmm[12] );
- COPY_U128( gst->guest_YMM13, xmm[13] );
- COPY_U128( gst->guest_YMM14, xmm[14] );
- COPY_U128( gst->guest_YMM15, xmm[15] );
-
-# undef COPY_U128
+ /* Don't restore %xmm0 .. %xmm15, for the same reasons that
+ amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See
+ comment in that function for details. */
/* Copy the x87 registers out of the image, into a temporary
Fpu_State struct. */
diff --git a/priv/guest_amd64_toIR.c b/priv/guest_amd64_toIR.c
index 78df883..0f85eca 100644
--- a/priv/guest_amd64_toIR.c
+++ b/priv/guest_amd64_toIR.c
@@ -13575,11 +13575,12 @@
DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
/* Uses dirty helper:
- void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */
+ void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
+ ULong ) */
d = unsafeIRDirty_0_N (
0/*regparms*/,
- "amd64g_dirtyhelper_FXSAVE",
- &amd64g_dirtyhelper_FXSAVE,
+ "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM",
+ &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM,
mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
);
@@ -13589,7 +13590,7 @@
d->mSize = 464; /* according to recent Intel docs */
/* declare we're reading guest state */
- d->nFxState = 7;
+ d->nFxState = 6;
vex_bzero(&d->fxState, sizeof(d->fxState));
d->fxState[0].fx = Ifx_Read;
@@ -13613,24 +13614,25 @@
d->fxState[4].size = sizeof(ULong);
d->fxState[5].fx = Ifx_Read;
- d->fxState[5].offset = OFFB_YMM0;
- d->fxState[5].size = sizeof(U128);
- /* plus 15 more of the above, spaced out in YMM sized steps */
- d->fxState[5].nRepeats = 15;
- d->fxState[5].repeatLen = sizeof(U256);
+ d->fxState[5].offset = OFFB_SSEROUND;
+ d->fxState[5].size = sizeof(ULong);
- d->fxState[6].fx = Ifx_Read;
- d->fxState[6].offset = OFFB_SSEROUND;
- d->fxState[6].size = sizeof(ULong);
-
- /* Be paranoid ... this assertion tries to ensure the 16 %ymm
- images are packed back-to-back. If not, the settings for
- d->fxState[5] are wrong. */
- vassert(32 == sizeof(U256));
- vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
-
+ /* Call the helper. This creates all parts of the in-memory
+ image except for the XMM[0..15] array, which we do
+ separately, in order that any undefinedness in the XMM
+ registers is tracked separately by Memcheck and does not
+ "infect" the in-memory shadow for the other parts of the
+ image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
+ SSEROUND). */
stmt( IRStmt_Dirty(d) );
+ /* And now the XMMs themselves. */
+ UInt xmm;
+ for (xmm = 0; xmm < 16; xmm++) {
+ storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)),
+ getXMMReg(xmm) );
+ }
+
goto decode_success;
}
/* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
@@ -13650,14 +13652,15 @@
DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
/* Uses dirty helper:
- VexEmNote amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong )
+ VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
+ ULong )
NOTE:
- the VexEmNote value is simply ignored
+ the VexEmNote value is simply ignored
*/
d = unsafeIRDirty_0_N (
0/*regparms*/,
- "amd64g_dirtyhelper_FXRSTOR",
- &amd64g_dirtyhelper_FXRSTOR,
+ "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM",
+ &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM,
mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
);
@@ -13667,7 +13670,7 @@
d->mSize = 464; /* according to recent Intel docs */
/* declare we're writing guest state */
- d->nFxState = 7;
+ d->nFxState = 6;
vex_bzero(&d->fxState, sizeof(d->fxState));
d->fxState[0].fx = Ifx_Write;
@@ -13691,24 +13694,26 @@
d->fxState[4].size = sizeof(ULong);
d->fxState[5].fx = Ifx_Write;
- d->fxState[5].offset = OFFB_YMM0;
- d->fxState[5].size = sizeof(U128);
- /* plus 15 more of the above, spaced out in YMM sized steps */
- d->fxState[5].nRepeats = 15;
- d->fxState[5].repeatLen = sizeof(U256);
+ d->fxState[5].offset = OFFB_SSEROUND;
+ d->fxState[5].size = sizeof(ULong);
- d->fxState[6].fx = Ifx_Write;
- d->fxState[6].offset = OFFB_SSEROUND;
- d->fxState[6].size = sizeof(ULong);
-
- /* Be paranoid ... this assertion tries to ensure the 16 %ymm
- images are packed back-to-back. If not, the settings for
- d->fxState[5] are wrong. */
- vassert(32 == sizeof(U256));
- vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
-
+ /* Call the helper. This reads all parts of the in-memory
+ image except for the XMM[0..15] array, which we do
+ separately, in order that any undefinedness in the XMM
+ registers is tracked separately by Memcheck and does not
+ "infect" the in-guest-state shadow for the other parts of the
+ image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
+ SSEROUND). */
stmt( IRStmt_Dirty(d) );
+ /* And now the XMMs themselves. */
+ UInt xmm;
+ for (xmm = 0; xmm < 16; xmm++) {
+ putXMMReg(xmm, loadLE(Ity_V128,
+ binop(Iop_Add64, mkexpr(addr),
+ mkU64(160 + xmm * 16))));
+ }
+
goto decode_success;
}
break;