Fix assertion failures resulting from change of arity of
Iop_{Add,Sub,Mul}32Fx4 introduced in r2809, in which said IROps
acquired a rounding-mode argument.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2853 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_ppc_toIR.c b/priv/guest_ppc_toIR.c
index 281b3f8..ebf0388 100644
--- a/priv/guest_ppc_toIR.c
+++ b/priv/guest_ppc_toIR.c
@@ -69,6 +69,12 @@
unconditional calls and returns (bl, blr). They should also be
emitted for conditional calls and returns, but we don't have a
way to express that right now. Ah well.
+
+ - Uses of Iop_{Add,Sub,Mul}32Fx4: the backend (host_ppc_isel.c)
+ ignores the rounding mode, and generates code that assumes
+ round-to-nearest. This means V will compute incorrect results
+ for uses of these IROps when the rounding mode (first) arg is
+ not mkU32(Irrm_NEAREST).
*/
/* "Special" instructions.
@@ -12981,17 +12987,23 @@
switch (opc2) {
case 0x100: // xvaddsp (VSX Vector Add Single-Precision)
DIP("xvaddsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
- putVSReg( XT, binop(Iop_Add32Fx4, getVSReg( XA ), getVSReg( XB )) );
+ // WARNING: BOGUS! The backend ignores rm on Iop_Add32Fx4
+ putVSReg( XT, triop(Iop_Add32Fx4, rm,
+ getVSReg( XA ), getVSReg( XB )) );
break;
case 0x140: // xvmulsp (VSX Vector Multiply Single-Precision)
DIP("xvmulsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
- putVSReg( XT, binop(Iop_Mul32Fx4, getVSReg( XA ), getVSReg( XB )) );
+ // WARNING: BOGUS! The backend ignores rm on Iop_Mul32Fx4
+ putVSReg( XT, triop(Iop_Mul32Fx4, rm,
+ getVSReg( XA ), getVSReg( XB )) );
break;
case 0x120: // xvsubsp (VSX Vector Subtract Single-Precision)
DIP("xvsubsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
- putVSReg( XT, binop(Iop_Sub32Fx4, getVSReg( XA ), getVSReg( XB )) );
+ // WARNING: BOGUS! The backend ignores rm on Iop_Sub32Fx4
+ putVSReg( XT, triop(Iop_Sub32Fx4, rm,
+ getVSReg( XA ), getVSReg( XB )) );
break;
case 0x160: // xvdivsp (VSX Vector Divide Single-Precision)
@@ -17775,23 +17787,29 @@
return False;
}
+ IRTemp rm = newTemp(Ity_I32);
+ assign(rm, get_IR_roundingmode());
+
opc2 = IFIELD( theInstr, 0, 6 );
switch (opc2) {
case 0x2E: // vmaddfp (Multiply Add FP, AV p177)
DIP("vmaddfp v%d,v%d,v%d,v%d\n",
vD_addr, vA_addr, vC_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_Add32Fx4, mkexpr(vB),
- binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) );
+ triop(Iop_Add32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(vB),
+ triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(vA), mkexpr(vC))) );
return True;
case 0x2F: { // vnmsubfp (Negative Multiply-Subtract FP, AV p215)
DIP("vnmsubfp v%d,v%d,v%d,v%d\n",
vD_addr, vA_addr, vC_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_Sub32Fx4,
+ triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST),
mkexpr(vB),
- binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) );
+ triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(vA), mkexpr(vC))) );
return True;
}
@@ -17803,12 +17821,14 @@
switch (opc2) {
case 0x00A: // vaddfp (Add FP, AV p137)
DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, triop(Iop_Add32Fx4,
+ mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) );
return True;
case 0x04A: // vsubfp (Subtract FP, AV p261)
DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, triop(Iop_Sub32Fx4,
+ mkU32(Irrm_NEAREST), mkexpr(vA), mkexpr(vB)) );
return True;
case 0x40A: // vmaxfp (Maximum FP, AV p178)
@@ -17925,8 +17945,9 @@
binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) );
assign( lt, unop(Iop_NotV128,
binop(Iop_CmpGE32Fx4, mkexpr(vA),
- binop(Iop_Sub32Fx4, mkexpr(zeros),
- mkexpr(vB)))) );
+ triop(Iop_Sub32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(zeros),
+ mkexpr(vB)))) );
// finally, just shift gt,lt to correct position
assign( vD, binop(Iop_ShlN32x4,
@@ -17987,7 +18008,7 @@
switch (opc2) {
case 0x30A: // vcfux (Convert from Unsigned Fixed-Point W, AV p156)
DIP("vcfux v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- putVReg( vD_addr, binop(Iop_Mul32Fx4,
+ putVReg( vD_addr, triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
unop(Iop_I32UtoFx4, mkexpr(vB)),
mkexpr(vInvScale)) );
return True;
@@ -17995,7 +18016,7 @@
case 0x34A: // vcfsx (Convert from Signed Fixed-Point W, AV p155)
DIP("vcfsx v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- putVReg( vD_addr, binop(Iop_Mul32Fx4,
+ putVReg( vD_addr, triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
unop(Iop_I32StoFx4, mkexpr(vB)),
mkexpr(vInvScale)) );
return True;
@@ -18004,14 +18025,16 @@
DIP("vctuxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
putVReg( vD_addr,
unop(Iop_QFtoI32Ux4_RZ,
- binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) );
+ triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(vB), mkexpr(vScale))) );
return True;
case 0x3CA: // vctsxs (Convert to Signed Fixed-Point W Saturate, AV p171)
DIP("vctsxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
putVReg( vD_addr,
unop(Iop_QFtoI32Sx4_RZ,
- binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) );
+ triop(Iop_Mul32Fx4, mkU32(Irrm_NEAREST),
+ mkexpr(vB), mkexpr(vScale))) );
return True;
default:
diff --git a/priv/host_ppc_isel.c b/priv/host_ppc_isel.c
index 6e1bfe6..850a9e6 100644
--- a/priv/host_ppc_isel.c
+++ b/priv/host_ppc_isel.c
@@ -4929,11 +4929,8 @@
}
}
- case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4;
- case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4;
case Iop_Max32Fx4: fpop = Pavfp_MAXF; goto do_32Fx4;
case Iop_Min32Fx4: fpop = Pavfp_MINF; goto do_32Fx4;
- case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4;
case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
@@ -5213,6 +5210,25 @@
return dst;
}
+ case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
+ case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
+ case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
+ do_32Fx4_with_rm:
+ {
+ HReg argL = iselVecExpr(env, triop->arg2);
+ HReg argR = iselVecExpr(env, triop->arg3);
+ HReg dst = newVRegV(env);
+ /* FIXME: this is bogus, in the sense that Altivec ignores
+ FPSCR.RM, at least for some FP operations. So setting the
+ RM is pointless. This is only really correct in the case
+ where the RM is known, at JIT time, to be Irrm_NEAREST,
+ since -- at least for Altivec FP add/sub/mul -- the
+ emitted insn is hardwired to round to nearest. */
+ set_FPU_rounding_mode(env, triop->arg1);
+ addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
+ return dst;
+ }
+
default:
break;
} /* switch (e->Iex.Triop.op) */