* add a missing extra m-reg check for some LD/ST vector cases
* implement
LD1/ST1 (multiple 1-elem structs to/from 2 regs
LD1/ST1 (multiple 1-elem structs to/from 3 regs
LD1/ST1 (multiple 1-elem structs to/from 4 regs
LD1R (single structure, replicate)
LD2R (single structure, replicate)
LD3R (single structure, replicate)
LD4R (single structure, replicate)
LD1/ST1 (single structure, to/from one lane)
LD2/ST2 (single structure, to/from one lane)
LD3/ST3 (single structure, to/from one lane)
LD4/ST4 (single structure, to/from one lane)
I believe this completes the implementation of load and store
instructions for AArch64 ARMv8.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2979 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 8021a48..7f22f13 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -5606,17 +5606,17 @@
/* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
/* 31 29 26 22 21 20 15 11 9 4
- 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
- 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
+ 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
- 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
- 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
+ 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
- 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
- 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
+ 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
- 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
- 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
+ 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
T = defined by Q and sz in the normal way
step = if m == 11111 then transfer-size else Xm
@@ -5642,6 +5642,12 @@
case BITS4(0,1,1,1): nRegs = 1; break;
default: break;
}
+
+ /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
+ If we see it, set nRegs to 0 so as to cause the next conditional
+ to fail. */
+ if (!isPX && mm != 0)
+ nRegs = 0;
if (nRegs == 1 /* .1d is allowed */
|| (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
@@ -5744,7 +5750,6 @@
binop(Iop_Add64, mkexpr(tTA),
mkU64(1 * step)))));
/* fallthru */
-
case 1:
assign(i0, MAYBE_WIDEN_FROM_64(
loadLE(loadTy,
@@ -5813,145 +5818,448 @@
/* else fall through */
}
+ /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
+ /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
+ /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
+ /* 31 29 26 22 21 20 15 11 9 4
+
+ 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
+
+ 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
+
+ 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
+ 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
+
+ T = defined by Q and sz in the normal way
+ step = if m == 11111 then transfer-size else Xm
+ xx = case L of 1 -> LD ; 0 -> ST
+ */
+ if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
+ && INSN(21,21) == 0) {
+ Bool bitQ = INSN(30,30);
+ Bool isPX = INSN(23,23) == 1;
+ Bool isLD = INSN(22,22) == 1;
+ UInt mm = INSN(20,16);
+ UInt opc = INSN(15,12);
+ UInt sz = INSN(11,10);
+ UInt nn = INSN(9,5);
+ UInt tt = INSN(4,0);
+ Bool isQ = bitQ == 1;
+ UInt nRegs = 0;
+ switch (opc) {
+ case BITS4(0,0,1,0): nRegs = 4; break;
+ case BITS4(0,1,1,0): nRegs = 3; break;
+ case BITS4(1,0,1,0): nRegs = 2; break;
+ default: break;
+ }
+
+ /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
+ If we see it, set nRegs to 0 so as to cause the next conditional
+ to fail. */
+ if (!isPX && mm != 0)
+ nRegs = 0;
+
+ if (nRegs >= 2 && nRegs <= 4) {
+
+ UInt xferSzB = (isQ ? 16 : 8) * nRegs;
+
+ /* Generate the transfer address (TA) and if necessary the
+ writeback address (WB) */
+ IRTemp tTA = newTemp(Ity_I64);
+ assign(tTA, getIReg64orSP(nn));
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
+ IRTemp tWB = IRTemp_INVALID;
+ if (isPX) {
+ tWB = newTemp(Ity_I64);
+ assign(tWB, binop(Iop_Add64,
+ mkexpr(tTA),
+ mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
+ : getIReg64orZR(mm)));
+ }
+
+ /* -- BEGIN generate the transfers -- */
+
+ IRTemp u0, u1, u2, u3;
+ u0 = u1 = u2 = u3 = IRTemp_INVALID;
+ switch (nRegs) {
+ case 4: u3 = newTempV128(); /* fallthru */
+ case 3: u2 = newTempV128(); /* fallthru */
+ case 2: u1 = newTempV128();
+ u0 = newTempV128(); break;
+ default: vassert(0);
+ }
+
+ /* -- Multiple 128 or 64 bit stores -- */
+ if (!isLD) {
+ switch (nRegs) {
+ case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
+ case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
+ case 2: assign(u1, getQReg128((tt+1) % 32));
+ assign(u0, getQReg128((tt+0) % 32)); break;
+ default: vassert(0);
+ }
+# define MAYBE_NARROW_TO_64(_expr) \
+ (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
+ UInt step = isQ ? 16 : 8;
+ switch (nRegs) {
+ case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
+ MAYBE_NARROW_TO_64(mkexpr(u3)) );
+ /* fallthru */
+ case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
+ MAYBE_NARROW_TO_64(mkexpr(u2)) );
+ /* fallthru */
+ case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
+ MAYBE_NARROW_TO_64(mkexpr(u1)) );
+ storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
+ MAYBE_NARROW_TO_64(mkexpr(u0)) );
+ break;
+ default: vassert(0);
+ }
+# undef MAYBE_NARROW_TO_64
+ }
+
+ /* -- Multiple 128 or 64 bit loads -- */
+ else /* isLD */ {
+ UInt step = isQ ? 16 : 8;
+ IRType loadTy = isQ ? Ity_V128 : Ity_I64;
+# define MAYBE_WIDEN_FROM_64(_expr) \
+ (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
+ switch (nRegs) {
+ case 4:
+ assign(u3, MAYBE_WIDEN_FROM_64(
+ loadLE(loadTy,
+ binop(Iop_Add64, mkexpr(tTA),
+ mkU64(3 * step)))));
+ /* fallthru */
+ case 3:
+ assign(u2, MAYBE_WIDEN_FROM_64(
+ loadLE(loadTy,
+ binop(Iop_Add64, mkexpr(tTA),
+ mkU64(2 * step)))));
+ /* fallthru */
+ case 2:
+ assign(u1, MAYBE_WIDEN_FROM_64(
+ loadLE(loadTy,
+ binop(Iop_Add64, mkexpr(tTA),
+ mkU64(1 * step)))));
+ assign(u0, MAYBE_WIDEN_FROM_64(
+ loadLE(loadTy,
+ binop(Iop_Add64, mkexpr(tTA),
+ mkU64(0 * step)))));
+ break;
+ default:
+ vassert(0);
+ }
+# undef MAYBE_WIDEN_FROM_64
+ switch (nRegs) {
+ case 4: putQReg128( (tt+3) % 32,
+ math_MAYBE_ZERO_HI64(bitQ, u3));
+ /* fallthru */
+ case 3: putQReg128( (tt+2) % 32,
+ math_MAYBE_ZERO_HI64(bitQ, u2));
+ /* fallthru */
+ case 2: putQReg128( (tt+1) % 32,
+ math_MAYBE_ZERO_HI64(bitQ, u1));
+ putQReg128( (tt+0) % 32,
+ math_MAYBE_ZERO_HI64(bitQ, u0));
+ break;
+ default: vassert(0);
+ }
+ }
+
+ /* -- END generate the transfers -- */
+
+ /* Do the writeback, if necessary */
+ if (isPX) {
+ putIReg64orSP(nn, mkexpr(tWB));
+ }
+
+ HChar pxStr[20];
+ pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
+ if (isPX) {
+ if (mm == BITS5(1,1,1,1,1))
+ vex_sprintf(pxStr, ", #%u", xferSzB);
+ else
+ vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
+ }
+ const HChar* arr = nameArr_Q_SZ(bitQ, sz);
+ DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
+ isLD ? "ld" : "st",
+ (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
+ pxStr);
+
+ return True;
+ }
+ /* else fall through */
+ }
+
/* ---------- LD1R (single structure, replicate) ---------- */
+ /* ---------- LD2R (single structure, replicate) ---------- */
+ /* ---------- LD3R (single structure, replicate) ---------- */
+ /* ---------- LD4R (single structure, replicate) ---------- */
/* 31 29 22 20 15 11 9 4
- 0q 001 1010 10 00000 110 0 sz n t LD1R Vt.T, [Xn|SP]
- 0q 001 1011 10 m 110 0 sz n t LD1R Vt.T, [Xn|SP], #sz (m=11111)
- , Xm (m!=11111)
+ 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
+ 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
+
+ 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
+ 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
+
+ 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
+ 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
+
+ 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
+ 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
+
+ step = if m == 11111 then transfer-size else Xm
*/
if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
- && INSN(22,21) == BITS2(1,0) && INSN(15,12) == BITS4(1,1,0,0)) {
- UInt bitQ = INSN(30,30);
- Bool isPX = INSN(23,23) == 1;
- UInt mm = INSN(20,16);
- UInt sz = INSN(11,10);
- UInt nn = INSN(9,5);
- UInt tt = INSN(4,0);
- IRType ty = integerIRTypeOfSize(1 << sz);
- IRTemp tEA = newTemp(Ity_I64);
- assign(tEA, getIReg64orSP(nn));
- if (nn == 31) { /* FIXME generate stack alignment check */ }
- IRTemp loaded = newTemp(ty);
- assign(loaded, loadLE(ty, mkexpr(tEA)));
- IRTemp dupd = math_DUP_TO_V128(loaded, ty);
- putQReg128(tt, math_MAYBE_ZERO_HI64(bitQ, dupd));
- const HChar* arr = nameArr_Q_SZ(bitQ, sz);
- /* Deal with the writeback, if any. */
- if (!isPX && mm == BITS5(0,0,0,0,0)) {
- /* No writeback. */
- DIP("ld1r v%u.%s, [%s]\n", tt, arr, nameIReg64orSP(nn));
- return True;
- }
- if (isPX) {
- putIReg64orSP(nn, binop(Iop_Add64, mkexpr(tEA),
- mm == BITS5(1,1,1,1,1) ? mkU64(1 << sz)
- : getIReg64orZR(mm)));
- if (mm == BITS5(1,1,1,1,1)) {
- DIP("ld1r v%u.%s, [%s], %s\n", tt, arr,
- nameIReg64orSP(nn), nameIReg64orZR(mm));
- } else {
- DIP("ld1r v%u.%s, [%s], #%u\n", tt, arr,
- nameIReg64orSP(nn), 1 << sz);
+ && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
+ && INSN(12,12) == 0) {
+ UInt bitQ = INSN(30,30);
+ Bool isPX = INSN(23,23) == 1;
+ UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
+ UInt mm = INSN(20,16);
+ UInt sz = INSN(11,10);
+ UInt nn = INSN(9,5);
+ UInt tt = INSN(4,0);
+
+ /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
+ if (isPX || mm == 0) {
+
+ IRType ty = integerIRTypeOfSize(1 << sz);
+
+ UInt laneSzB = 1 << sz;
+ UInt xferSzB = laneSzB * nRegs;
+
+ /* Generate the transfer address (TA) and if necessary the
+ writeback address (WB) */
+ IRTemp tTA = newTemp(Ity_I64);
+ assign(tTA, getIReg64orSP(nn));
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
+ IRTemp tWB = IRTemp_INVALID;
+ if (isPX) {
+ tWB = newTemp(Ity_I64);
+ assign(tWB, binop(Iop_Add64,
+ mkexpr(tTA),
+ mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
+ : getIReg64orZR(mm)));
}
+
+ /* Do the writeback, if necessary */
+ if (isPX) {
+ putIReg64orSP(nn, mkexpr(tWB));
+ }
+
+ IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
+ e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
+ switch (nRegs) {
+ case 4:
+ e3 = newTemp(ty);
+ assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
+ mkU64(3 * laneSzB))));
+ v3 = math_DUP_TO_V128(e3, ty);
+ putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
+ /* fallthrough */
+ case 3:
+ e2 = newTemp(ty);
+ assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
+ mkU64(2 * laneSzB))));
+ v2 = math_DUP_TO_V128(e2, ty);
+ putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
+ /* fallthrough */
+ case 2:
+ e1 = newTemp(ty);
+ assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
+ mkU64(1 * laneSzB))));
+ v1 = math_DUP_TO_V128(e1, ty);
+ putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
+ /* fallthrough */
+ case 1:
+ e0 = newTemp(ty);
+ assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
+ mkU64(0 * laneSzB))));
+ v0 = math_DUP_TO_V128(e0, ty);
+ putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
+ break;
+ default:
+ vassert(0);
+ }
+
+ HChar pxStr[20];
+ pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
+ if (isPX) {
+ if (mm == BITS5(1,1,1,1,1))
+ vex_sprintf(pxStr, ", #%u", xferSzB);
+ else
+ vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
+ }
+ const HChar* arr = nameArr_Q_SZ(bitQ, sz);
+ DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
+ nRegs,
+ (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
+ pxStr);
+
return True;
}
- return False;
+ /* else fall through */
}
- /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, no offset) -------- */
- /* Only a very few cases. */
- /* 31 23
- 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
- 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
- */
- if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
- || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
- ) {
- Bool isLD = INSN(22,22) == 1;
- UInt rN = INSN(9,5);
- UInt vT = INSN(4,0);
- IRTemp tEA = newTemp(Ity_I64);
- const HChar* name = "16b";
- assign(tEA, getIReg64orSP(rN));
- if (rN == 31) { /* FIXME generate stack alignment check */ }
- IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
- IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
- if (isLD) {
- putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
- putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
- } else {
- storeLE(tEA_0, getQReg128((vT+0) % 32));
- storeLE(tEA_16, getQReg128((vT+1) % 32));
- }
- DIP("%s {v%u.%s, v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
- (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
- return True;
- }
+ /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
+ /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
+ /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
+ /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
+ /* 31 29 22 21 20 15 11 9 4
+ 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
+ 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
- /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, post index) -------- */
- /* Only a very few cases. */
- /* 31 23
- 0100 1100 1101 1111 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
- 0100 1100 1001 1111 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
- */
- if ( (insn & 0xFFFFFC00) == 0x4CDFA000 // LD1
- || (insn & 0xFFFFFC00) == 0x4C9FA000 // ST1
- ) {
- Bool isLD = INSN(22,22) == 1;
- UInt rN = INSN(9,5);
- UInt vT = INSN(4,0);
- IRTemp tEA = newTemp(Ity_I64);
- const HChar* name = "16b";
- assign(tEA, getIReg64orSP(rN));
- if (rN == 31) { /* FIXME generate stack alignment check */ }
- IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
- IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
- if (isLD) {
- putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
- putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
- } else {
- storeLE(tEA_0, getQReg128((vT+0) % 32));
- storeLE(tEA_16, getQReg128((vT+1) % 32));
- }
- putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
- DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
- (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
- return True;
- }
+ 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
+ 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
- /* -------- LD1/ST1 (multi 1-elem structs, 3 regs, no offset) -------- */
- /* Only a very few cases. */
- /* 31 23
- 0100 1100 0100 0000 0110 00 n t LD1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
- 0100 1100 0000 0000 0110 00 n t ST1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
+ 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
+ 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
+
+ 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
+ 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
+
+ step = if m == 11111 then transfer-size else Xm
+ op = case L of 1 -> LD ; 0 -> ST
+
+ laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
+ 01:b:b:b0 -> 2, bbb
+ 10:b:b:00 -> 4, bb
+ 10:b:0:01 -> 8, b
*/
- if ( (insn & 0xFFFFFC00) == 0x4C406000 // LD1
- || (insn & 0xFFFFFC00) == 0x4C006000 // ST1
- ) {
- Bool isLD = INSN(22,22) == 1;
- UInt rN = INSN(9,5);
- UInt vT = INSN(4,0);
- IRTemp tEA = newTemp(Ity_I64);
- const HChar* name = "16b";
- assign(tEA, getIReg64orSP(rN));
- if (rN == 31) { /* FIXME generate stack alignment check */ }
- IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
- IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
- IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
- if (isLD) {
- putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
- putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
- putQReg128((vT+2) % 32, loadLE(Ity_V128, tEA_32));
- } else {
- storeLE(tEA_0, getQReg128((vT+0) % 32));
- storeLE(tEA_16, getQReg128((vT+1) % 32));
- storeLE(tEA_32, getQReg128((vT+2) % 32));
+ if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
+ UInt bitQ = INSN(30,30);
+ Bool isPX = INSN(23,23) == 1;
+ Bool isLD = INSN(22,22) == 1;
+ UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
+ UInt mm = INSN(20,16);
+ UInt xx = INSN(15,14);
+ UInt bitS = INSN(12,12);
+ UInt sz = INSN(11,10);
+ UInt nn = INSN(9,5);
+ UInt tt = INSN(4,0);
+
+ Bool valid = True;
+
+ /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
+ if (!isPX && mm != 0)
+ valid = False;
+
+ UInt laneSzB = 0; /* invalid */
+ UInt ix = 16; /* invalid */
+
+ UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
+ switch (xx_q_S_sz) {
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ case 0x04: case 0x05: case 0x06: case 0x07:
+ case 0x08: case 0x09: case 0x0A: case 0x0B:
+ case 0x0C: case 0x0D: case 0x0E: case 0x0F:
+ laneSzB = 1; ix = xx_q_S_sz & 0xF;
+ break;
+ case 0x10: case 0x12: case 0x14: case 0x16:
+ case 0x18: case 0x1A: case 0x1C: case 0x1E:
+ laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
+ break;
+ case 0x20: case 0x24: case 0x28: case 0x2C:
+ laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
+ break;
+ case 0x21: case 0x29:
+ laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
+ break;
+ default:
+ break;
}
- DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
- isLD ? "ld1" : "st1",
- (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
- nameIReg64orSP(rN));
- return True;
+
+ if (valid && laneSzB != 0) {
+
+ IRType ty = integerIRTypeOfSize(laneSzB);
+ UInt xferSzB = laneSzB * nRegs;
+
+ /* Generate the transfer address (TA) and if necessary the
+ writeback address (WB) */
+ IRTemp tTA = newTemp(Ity_I64);
+ assign(tTA, getIReg64orSP(nn));
+ if (nn == 31) { /* FIXME generate stack alignment check */ }
+ IRTemp tWB = IRTemp_INVALID;
+ if (isPX) {
+ tWB = newTemp(Ity_I64);
+ assign(tWB, binop(Iop_Add64,
+ mkexpr(tTA),
+ mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
+ : getIReg64orZR(mm)));
+ }
+
+ /* Do the writeback, if necessary */
+ if (isPX) {
+ putIReg64orSP(nn, mkexpr(tWB));
+ }
+
+ switch (nRegs) {
+ case 4: {
+ IRExpr* addr
+ = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
+ if (isLD) {
+ putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
+ } else {
+ storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
+ }
+ /* fallthrough */
+ }
+ case 3: {
+ IRExpr* addr
+ = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
+ if (isLD) {
+ putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
+ } else {
+ storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
+ }
+ /* fallthrough */
+ }
+ case 2: {
+ IRExpr* addr
+ = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
+ if (isLD) {
+ putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
+ } else {
+ storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
+ }
+ /* fallthrough */
+ }
+ case 1: {
+ IRExpr* addr
+ = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
+ if (isLD) {
+ putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
+ } else {
+ storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
+ }
+ break;
+ }
+ default:
+ vassert(0);
+ }
+
+ HChar pxStr[20];
+ pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
+ if (isPX) {
+ if (mm == BITS5(1,1,1,1,1))
+ vex_sprintf(pxStr, ", #%u", xferSzB);
+ else
+ vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
+ }
+ const HChar* arr = nameArr_Q_SZ(bitQ, sz);
+ DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
+ isLD ? "ld" : "st", nRegs,
+ (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
+ ix, nameIReg64orSP(nn), pxStr);
+
+ return True;
+ }
+ /* else fall through */
}
/* ------------------ LD{,A}X{R,RH,RB} ------------------ */