arm64: implement:
suqadd, usqadd (scalar)
suqadd, usqadd (vector)
git-svn-id: svn://svn.valgrind.org/vex/trunk@2928 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index a49b7b9..837abdd 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -621,6 +621,22 @@
return ops[size];
}
+static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
+ Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
+static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
+ const IROp ops[4]
+ = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
+ Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
+ vassert(size < 4);
+ return ops[size];
+}
+
static IROp mkVecSUB ( UInt size ) {
const IROp ops[4]
= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
@@ -8137,6 +8153,31 @@
UInt dd = INSN(4,0);
vassert(size < 4);
+ if (opcode == BITS5(0,0,0,1,1)) {
+ /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
+ /* -------- 1,xx,00011: USQADD std4_std4 -------- */
+ /* These are a bit tricky (to say the least). See comments on
+ the vector variants (in dis_AdvSIMD_two_reg_misc) below for
+ details. */
+ Bool isUSQADD = bitU == 1;
+ IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
+ : mkVecQADDEXTUSSATSS(size);
+ IROp nop = mkVecADD(size);
+ IRTemp argL = newTempV128();
+ IRTemp argR = newTempV128();
+ assign(argL, getQReg128(nn));
+ assign(argR, getQReg128(dd));
+ IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
+ size, binop(qop, mkexpr(argL), mkexpr(argR)));
+ IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
+ size, binop(nop, mkexpr(argL), mkexpr(argR)));
+ putQReg128(dd, mkexpr(qres));
+ updateQCFLAGwithDifference(qres, nres);
+ const HChar arr = "bhsd"[size];
+ DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
+ return True;
+ }
+
if (opcode == BITS5(0,0,1,1,1)) {
/* -------- 0,xx,00111 SQABS std4_std4 -------- */
/* -------- 1,xx,00111 SQNEG std4_std4 -------- */
@@ -9747,6 +9788,39 @@
return True;
}
+ if (opcode == BITS5(0,0,0,1,1)) {
+ /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
+ /* -------- 1,xx,00011: USQADD std7_std7 -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isUSQADD = bitU == 1;
+ /* This is switched (in the US vs SU sense) deliberately.
+ SUQADD corresponds to the ExtUSsatSS variants and
+ USQADD corresponds to the ExtSUsatUU variants.
+ See libvex_ir for more details. */
+ IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
+ : mkVecQADDEXTUSSATSS(size);
+ IROp nop = mkVecADD(size);
+ IRTemp argL = newTempV128();
+ IRTemp argR = newTempV128();
+ IRTemp qres = newTempV128();
+ IRTemp nres = newTempV128();
+ /* Because the two arguments to the addition are implicitly
+ extended differently (one signedly, the other unsignedly) it is
+ important to present them to the primop in the correct order. */
+ assign(argL, getQReg128(nn));
+ assign(argR, getQReg128(dd));
+ assign(qres, math_MAYBE_ZERO_HI64_fromE(
+ bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
+ assign(nres, math_MAYBE_ZERO_HI64_fromE(
+ bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
+ putQReg128(dd, mkexpr(qres));
+ updateQCFLAGwithDifference(qres, nres);
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
if (opcode == BITS5(0,0,1,0,0)) {
/* -------- 0,xx,00100: CLS std6_std6 -------- */
/* -------- 1,xx,00100: CLZ std6_std6 -------- */
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index b977d19..df9b427 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -702,6 +702,22 @@
}
}
+static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
+ /*OUT*/const HChar** ar,
+ ARM64VecModifyOp op ) {
+ switch (op) {
+ case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
+ case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
+ case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
+ case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
+ case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
+ case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
+ case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
+ case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
+ default: vpanic("showARM64VecModifyOp");
+ }
+}
+
static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
/*OUT*/const HChar** ar, ARM64VecUnaryOp op )
{
@@ -1117,6 +1133,14 @@
i->ARM64in.VBinV.argR = argR;
return i;
}
+ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+ i->tag = ARM64in_VModifyV;
+ i->ARM64in.VModifyV.op = op;
+ i->ARM64in.VModifyV.mod = mod;
+ i->ARM64in.VModifyV.arg = arg;
+ return i;
+}
ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
i->tag = ARM64in_VUnaryV;
@@ -1639,6 +1663,17 @@
vex_printf(".%s", ar);
return;
}
+ case ARM64in_VModifyV: {
+ const HChar* nm = "??";
+ const HChar* ar = "??";
+ showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
+ vex_printf("%s ", nm);
+ ppHRegARM64(i->ARM64in.VModifyV.mod);
+ vex_printf(".%s, ", ar);
+ ppHRegARM64(i->ARM64in.VModifyV.arg);
+ vex_printf(".%s", ar);
+ return;
+ }
case ARM64in_VUnaryV: {
const HChar* nm = "??";
const HChar* ar = "??";
@@ -2000,6 +2035,11 @@
addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
return;
+ case ARM64in_VModifyV:
+ addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
+ addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
+ addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
+ return;
case ARM64in_VUnaryV:
addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
@@ -2214,6 +2254,10 @@
i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
return;
+ case ARM64in_VModifyV:
+ i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
+ i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
+ return;
case ARM64in_VUnaryV:
i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
@@ -4493,6 +4537,43 @@
}
goto done;
}
+ case ARM64in_VModifyV: {
+ /* 31 23 20 15 9 4
+ 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
+ 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
+ */
+ UInt vD = qregNo(i->ARM64in.VModifyV.mod);
+ UInt vN = qregNo(i->ARM64in.VModifyV.arg);
+ switch (i->ARM64in.VModifyV.op) {
+ case ARM64vecmo_SUQADD64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_SUQADD32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_SUQADD16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_SUQADD8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_USQADD64x2:
+ *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_USQADD32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_USQADD16x8:
+ *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
+ break;
+ case ARM64vecmo_USQADD8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
+ break;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
case ARM64in_VUnaryV: {
/* 31 23 20 15 9 4
010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index 9755b52..e100b0f 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -386,6 +386,16 @@
typedef
enum {
+ ARM64vecmo_SUQADD64x2=335, ARM64vecmo_SUQADD32x4,
+ ARM64vecmo_SUQADD16x8, ARM64vecmo_SUQADD8x16,
+ ARM64vecmo_USQADD64x2, ARM64vecmo_USQADD32x4,
+ ARM64vecmo_USQADD16x8, ARM64vecmo_USQADD8x16,
+ ARM64vecmo_INVALID
+ }
+ ARM64VecModifyOp;
+
+typedef
+ enum {
ARM64vecu_FNEG64x2=300, ARM64vecu_FNEG32x4,
ARM64vecu_FABS64x2, ARM64vecu_FABS32x4,
ARM64vecu_NOT,
@@ -482,6 +492,7 @@
ARM64in_FPSR,
/* ARM64in_V*V: vector ops on vector registers */
ARM64in_VBinV,
+ ARM64in_VModifyV,
ARM64in_VUnaryV,
ARM64in_VNarrowV,
ARM64in_VShiftImmV,
@@ -746,6 +757,13 @@
HReg argL;
HReg argR;
} VBinV;
+ /* binary vector operation on vector registers.
+ Dst reg is also a src. */
+ struct {
+ ARM64VecModifyOp op;
+ HReg mod;
+ HReg arg;
+ } VModifyV;
/* unary vector operation on vector registers */
struct {
ARM64VecUnaryOp op;
@@ -871,6 +889,7 @@
extern ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg );
extern ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg );
extern ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, HReg, HReg, HReg );
+extern ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp, HReg, HReg );
extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg );
extern ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op, UInt dszBlg2,
HReg dst, HReg src );
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 0787419..70c8073 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -2296,6 +2296,7 @@
addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
return res;
}
+ /* -- Cases where we can generate a simple three-reg instruction. -- */
case Iop_AndV128:
case Iop_OrV128:
case Iop_XorV128:
@@ -2471,6 +2472,40 @@
}
return res;
}
+ /* -- These only have 2 operand instructions, so we have to first move
+ the first argument into a new register, for modification. -- */
+ case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
+ case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
+ case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
+ case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
+ {
+ HReg res = newVRegV(env);
+ HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
+ ARM64VecModifyOp op = ARM64vecmo_INVALID;
+ switch (e->Iex.Binop.op) {
+ /* In the following 8 cases, the US - SU switching is intended.
+ See comments on the libvex_ir.h for details. Also in the
+ ARM64 front end, where used these primops are generated. */
+ case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
+ case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
+ case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
+ case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
+ case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
+ case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
+ case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
+ case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
+ default: vassert(0);
+ }
+ /* The order of the operands is important. Although this is
+ basically addition, the two operands are extended differently,
+ making it important to get them into the correct registers in
+ the instruction. */
+ addInstr(env, ARM64Instr_VMov(16, res, argR));
+ addInstr(env, ARM64Instr_VModifyV(op, res, argL));
+ return res;
+ }
+ /* -- Shifts by an immediate. -- */
case Iop_ShrN64x2: case Iop_ShrN32x4:
case Iop_ShrN16x8: case Iop_ShrN8x16:
case Iop_SarN64x2: case Iop_SarN32x4:
@@ -2574,7 +2609,7 @@
/* else fall out; this is unhandled */
break;
}
-
+ /* -- Saturating narrowing by an immediate -- */
/* uu */
case Iop_QandQShrNnarrow16Uto8Ux8:
case Iop_QandQShrNnarrow32Uto16Ux4:
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index c305c22..c8f90fe 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -740,6 +740,16 @@
case Iop_QAdd32Sx4: vex_printf("QAdd32Sx4"); return;
case Iop_QAdd64Ux2: vex_printf("QAdd64Ux2"); return;
case Iop_QAdd64Sx2: vex_printf("QAdd64Sx2"); return;
+
+ case Iop_QAddExtUSsatSS8x16: vex_printf("QAddExtUSsatSS8x16"); return;
+ case Iop_QAddExtUSsatSS16x8: vex_printf("QAddExtUSsatSS16x8"); return;
+ case Iop_QAddExtUSsatSS32x4: vex_printf("QAddExtUSsatSS32x4"); return;
+ case Iop_QAddExtUSsatSS64x2: vex_printf("QAddExtUSsatSS64x2"); return;
+ case Iop_QAddExtSUsatUU8x16: vex_printf("QAddExtSUsatUU8x16"); return;
+ case Iop_QAddExtSUsatUU16x8: vex_printf("QAddExtSUsatUU16x8"); return;
+ case Iop_QAddExtSUsatUU32x4: vex_printf("QAddExtSUsatUU32x4"); return;
+ case Iop_QAddExtSUsatUU64x2: vex_printf("QAddExtSUsatUU64x2"); return;
+
case Iop_PwAdd8x16: vex_printf("PwAdd8x16"); return;
case Iop_PwAdd16x8: vex_printf("PwAdd16x8"); return;
case Iop_PwAdd32x4: vex_printf("PwAdd32x4"); return;
@@ -2892,6 +2902,10 @@
case Iop_QAdd32Ux4: case Iop_QAdd64Ux2:
case Iop_QAdd8Sx16: case Iop_QAdd16Sx8:
case Iop_QAdd32Sx4: case Iop_QAdd64Sx2:
+ case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
+ case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
+ case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
+ case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
case Iop_PwAdd8x16: case Iop_PwAdd16x8: case Iop_PwAdd32x4:
case Iop_Sub8x16: case Iop_Sub16x8:
case Iop_Sub32x4: case Iop_Sub64x2:
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 9f66681..bc2fa46 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -1397,10 +1397,20 @@
/* MISC (vector integer cmp != 0) */
Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
- /* ADDITION (normal / unsigned sat / signed sat) */
- Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
- Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
- Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
+ /* ADDITION (normal / U->U sat / S->S sat) */
+ Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
+ Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
+ Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
+
+ /* ADDITION, ARM64 specific saturating variants. */
+ /* Unsigned widen left arg, signed widen right arg, add, saturate S->S.
+ This corresponds to SUQADD. */
+ Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
+ Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2,
+ /* Signed widen left arg, unsigned widen right arg, add, saturate U->U.
+ This corresponds to USQADD. */
+ Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
+ Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2,
/* SUBTRACTION (normal / unsigned sat / signed sat) */
Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,