arm64: route all whole-vector shift/rotate/slice operations
through Iop_SliceV128, so as to give it some testing. Implement
Iop_SliceV128 in the back end.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2940 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index d0db663..735373e 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -6844,11 +6844,9 @@
if (imm4 == 0) {
assign(res, mkexpr(sLo));
} else {
- vassert(imm4 <= 15);
- assign(res,
- binop(Iop_OrV128,
- binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
- binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
+ vassert(imm4 >= 1 && imm4 <= 15);
+ assign(res, triop(Iop_SliceV128,
+ mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
}
putQReg128(dd, mkexpr(res));
DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
@@ -6857,10 +6855,12 @@
if (imm4 == 0) {
assign(res, mkexpr(sLo));
} else {
- assign(res,
- binop(Iop_ShrV128,
- binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
- mkU8(8 * imm4)));
+ vassert(imm4 >= 1 && imm4 <= 7);
+ IRTemp hi64lo64 = newTempV128();
+ assign(hi64lo64, binop(Iop_InterleaveLO64x2,
+ mkexpr(sHi), mkexpr(sLo)));
+ assign(res, triop(Iop_SliceV128,
+ mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
}
putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
@@ -7015,8 +7015,15 @@
IRTemp preR = newTempV128();
IRTemp res = newTempV128();
if (bitQ == 0 && !isZIP1) {
- assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32)));
- assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32)));
+ IRTemp z128 = newTempV128();
+ assign(z128, mkV128(0x0000));
+ // preL = Vm shifted left 32 bits
+ // preR = Vn shifted left 32 bits
+ assign(preL, triop(Iop_SliceV128,
+ getQReg128(mm), mkexpr(z128), mkU8(12)));
+ assign(preR, triop(Iop_SliceV128,
+ getQReg128(nn), mkexpr(z128), mkU8(12)));
+
} else {
assign(preL, getQReg128(mm));
assign(preR, getQReg128(nn));
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 884d2c7..233c275 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -2753,47 +2753,51 @@
break;
}
- case Iop_ShlV128:
- case Iop_ShrV128: {
- Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
- /* This is tricky. Generate an EXT instruction with zeroes in
- the high operand (shift right) or low operand (shift left).
- Note that we can only slice in the EXT instruction at a byte
- level of granularity, so the shift amount needs careful
- checking. */
- IRExpr* argL = e->Iex.Binop.arg1;
- IRExpr* argR = e->Iex.Binop.arg2;
- if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
- UInt amt = argR->Iex.Const.con->Ico.U8;
- Bool amtOK = False;
- switch (amt) {
- case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
- case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
- case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
- amtOK = True; break;
- }
- /* We could also deal with amt==0 by copying the source to
- the destination, but there's no need for that so far. */
- if (amtOK) {
- HReg src = iselV128Expr(env, argL);
- HReg srcZ = newVRegV(env);
- addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
- UInt immB = amt / 8;
- vassert(immB >= 1 && immB <= 15);
- HReg dst = newVRegV(env);
- if (isSHR) {
- addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
- immB));
- } else {
- addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
- 16 - immB));
- }
- return dst;
- }
- }
- /* else fall out; this is unhandled */
- break;
- }
+ // JRS 01 Sept 2014: these are tested and believed to be correct,
+ // but they are no longer used by the front end, hence commented
+ // out. They are replaced by Iop_SliceV128, which is more general
+ // and in many cases leads to better code overall.
+ //case Iop_ShlV128:
+ //case Iop_ShrV128: {
+ // Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
+ // /* This is tricky. Generate an EXT instruction with zeroes in
+ // the high operand (shift right) or low operand (shift left).
+ // Note that we can only slice in the EXT instruction at a byte
+ // level of granularity, so the shift amount needs careful
+ // checking. */
+ // IRExpr* argL = e->Iex.Binop.arg1;
+ // IRExpr* argR = e->Iex.Binop.arg2;
+ // if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
+ // UInt amt = argR->Iex.Const.con->Ico.U8;
+ // Bool amtOK = False;
+ // switch (amt) {
+ // case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
+ // case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
+ // case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
+ // amtOK = True; break;
+ // }
+ // /* We could also deal with amt==0 by copying the source to
+ // the destination, but there's no need for that so far. */
+ // if (amtOK) {
+ // HReg src = iselV128Expr(env, argL);
+ // HReg srcZ = newVRegV(env);
+ // addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
+ // UInt immB = amt / 8;
+ // vassert(immB >= 1 && immB <= 15);
+ // HReg dst = newVRegV(env);
+ // if (isSHR) {
+ // addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
+ // immB));
+ // } else {
+ // addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
+ // 16 - immB));
+ // }
+ // return dst;
+ // }
+ // }
+ // /* else fall out; this is unhandled */
+ // break;
+ //}
case Iop_PolynomialMull8x8:
case Iop_Mull32Ux2:
@@ -2857,7 +2861,30 @@
addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
return dst;
}
- }
+
+ if (triop->op == Iop_SliceV128) {
+ /* Note that, compared to ShlV128/ShrV128 just above, the shift
+ amount here is in bytes, not bits. */
+ IRExpr* argHi = triop->arg1;
+ IRExpr* argLo = triop->arg2;
+ IRExpr* argAmt = triop->arg3;
+ if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
+ UInt amt = argAmt->Iex.Const.con->Ico.U8;
+ Bool amtOK = amt >= 1 && amt <= 15;
+ /* We could also deal with amt==0 by copying argLO to
+ the destination, but there's no need for that so far. */
+ if (amtOK) {
+ HReg srcHi = iselV128Expr(env, argHi);
+ HReg srcLo = iselV128Expr(env, argLo);
+ HReg dst = newVRegV(env);
+ addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
+ return dst;
+ }
+ }
+ /* else fall out; this is unhandled */
+ }
+
+ } /* if (e->tag == Iex_Triop) */
v128_expr_bad:
ppIRExpr(e);