[sve2] Implement eorbt and eortb
Implement interleaving eorbt and eortb instructions.
Change-Id: Id0f7a4b8ff6e7ff0f18f9ecebc93ad1ce87bfb35
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index 400a6cd..e60492b 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -2702,6 +2702,17 @@
return dst;
}
+LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int index) {
+ if (index < 0) index += LaneCountFromFormat(vform);
+ VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
+ index *= LaneSizeInBytesFromFormat(vform);
+ return ext(kFormatVnB, dst, src, src, index);
+}
+
+
template <typename T>
LogicVRegister Simulator::fadda(VectorFormat vform,
LogicVRegister acc,
@@ -3027,7 +3038,6 @@
return sel(vform, dst, pg, src, dst);
}
-
LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
LogicVRegister dst,
const SimPRegister& pg,
@@ -3037,6 +3047,16 @@
return sel(vform, dst, pg, src, zero);
}
+LogicVRegister Simulator::mov_alternating(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int start_at) {
+ VIXL_ASSERT((start_at == 0) || (start_at == 1));
+ for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
+ dst.SetUint(vform, i, src.Uint(vform, i));
+ }
+ return dst;
+}
LogicPRegister Simulator::mov_merging(LogicPRegister dst,
const LogicPRegister& pg,
@@ -3044,7 +3064,6 @@
return sel(dst, pg, src, dst);
}
-
LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
const LogicPRegister& pg,
const LogicPRegister& src) {
@@ -3052,7 +3071,6 @@
return sel(dst, pg, src, pfalse(all_false));
}
-
LogicVRegister Simulator::movi(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 904a5f5..e9bd0d2 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -2261,13 +2261,12 @@
}
void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister& zd = ReadVRegister(instr->GetRd());
- USE(zd);
SimVRegister& zm = ReadVRegister(instr->GetRm());
- USE(zm);
SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
USE(zn);
-
switch (form_hash_) {
case Hash("bdep_z_zz"):
VIXL_UNIMPLEMENTED();
@@ -2279,10 +2278,14 @@
VIXL_UNIMPLEMENTED();
break;
case Hash("eorbt_z_zz"):
- VIXL_UNIMPLEMENTED();
+ rotate_elements_right(vform, result, zm, 1);
+ SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
+ mov_alternating(vform, zd, result, 0);
break;
case Hash("eortb_z_zz"):
- VIXL_UNIMPLEMENTED();
+ rotate_elements_right(vform, result, zm, -1);
+ SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
+ mov_alternating(vform, zd, result, 1);
break;
case Hash("mul_z_zz"):
VIXL_UNIMPLEMENTED();
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index d7dc79f..f6eff42 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -3271,6 +3271,10 @@
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
+ LogicVRegister rotate_elements_right(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int index);
template <typename T>
LogicVRegister fcadd(VectorFormat vform,
LogicVRegister dst,
@@ -3348,6 +3352,10 @@
LogicVRegister dst,
const SimPRegister& pg,
const LogicVRegister& src);
+ LogicVRegister mov_alternating(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int start_at);
LogicPRegister mov_merging(LogicPRegister dst,
const LogicPRegister& pg,
const LogicPRegister& src);
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index f7c6111..6d8f85f 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -7214,6 +7214,21 @@
CLEANUP();
}
+TEST(sve2_eorbt_eortb) {
+ SETUP();
+
+ COMPARE(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b, z8.b");
+ COMPARE(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d, z8.d");
+ COMPARE(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h, z8.h");
+ COMPARE(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s, z8.s");
+ COMPARE(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b, z15.b");
+ COMPARE(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d, z15.d");
+ COMPARE(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h, z15.h");
+ COMPARE(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s, z15.s");
+
+ CLEANUP();
+}
+
TEST(sve2_all_instructions) {
// TODO: split these instructions into more logical groups.
SETUP();
@@ -7294,22 +7309,6 @@
// z2.s, <const>");
// COMPARE_PREFIX(eor3(z10.VnD(), z10.VnD(), z24.VnD(), int Zk.VnD()), "eor3
// <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D");
- // COMPARE_PREFIX(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b,
- // z8.b");
- // COMPARE_PREFIX(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d,
- // z8.d");
- // COMPARE_PREFIX(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h,
- // z8.h");
- // COMPARE_PREFIX(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s,
- // z8.s");
- // COMPARE_PREFIX(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b,
- // z15.b");
- // COMPARE_PREFIX(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d,
- // z15.d");
- // COMPARE_PREFIX(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h,
- // z15.h");
- // COMPARE_PREFIX(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s,
- // z15.s");
// COMPARE_PREFIX(ext(z13.VnB(), z11.VnB(), z12.VnB()), "ext z13.b, { z15.b,
// z16.b }, #<imm>");
// COMPARE_PREFIX(faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()), "faddp
diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc
index 0782b77..e6988e9 100644
--- a/test/aarch64/test-simulator-sve2-aarch64.cc
+++ b/test/aarch64/test-simulator-sve2-aarch64.cc
@@ -1889,5 +1889,149 @@
}
}
+TEST_SVE(sve2_eorbt_eortb) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x451892b8); // eorbt z24.b, z21.b, z24.b
+ // vl128 state = 0xc3f2b082
+ __ dci(0x455893ba); // eorbt z26.h, z29.h, z24.h
+ // vl128 state = 0xc7421198
+ __ dci(0x455892f8); // eorbt z24.h, z23.h, z24.h
+ // vl128 state = 0x4e155b96
+ __ dci(0x455092bc); // eorbt z28.h, z21.h, z16.h
+ // vl128 state = 0x09393ad0
+ __ dci(0x455893be); // eorbt z30.h, z29.h, z24.h
+ // vl128 state = 0x6d660844
+ __ dci(0x4558922e); // eorbt z14.h, z17.h, z24.h
+ // vl128 state = 0x84f1ff20
+ __ dci(0x45d892aa); // eorbt z10.d, z21.d, z24.d
+ // vl128 state = 0x568612d4
+ __ dci(0x454892a8); // eorbt z8.h, z21.h, z8.h
+ // vl128 state = 0x699a3e24
+ __ dci(0x45c890ac); // eorbt z12.d, z5.d, z8.d
+ // vl128 state = 0x17bb6d9b
+ __ dci(0x45c990ed); // eorbt z13.d, z7.d, z9.d
+ // vl128 state = 0xee5be73f
+ __ dci(0x45c892fd); // eorbt z29.d, z23.d, z8.d
+ // vl128 state = 0x141c47ed
+ __ dci(0x45c892f9); // eorbt z25.d, z23.d, z8.d
+ // vl128 state = 0xc3259593
+ __ dci(0x45c892f8); // eorbt z24.d, z23.d, z8.d
+ // vl128 state = 0x3bca0bcc
+ __ dci(0x45c892e8); // eorbt z8.d, z23.d, z8.d
+ // vl128 state = 0x4714ab64
+ __ dci(0x454a92ea); // eorbt z10.h, z23.h, z10.h
+ // vl128 state = 0x51360c73
+ __ dci(0x454092e2); // eorbt z2.h, z23.h, z0.h
+ // vl128 state = 0xe33859fe
+ __ dci(0x454092f2); // eorbt z18.h, z23.h, z0.h
+ // vl128 state = 0xa0d81168
+ __ dci(0x4550927a); // eorbt z26.h, z19.h, z16.h
+ // vl128 state = 0xe4983274
+ __ dci(0x4551923b); // eorbt z27.h, z17.h, z17.h
+ // vl128 state = 0x8e89eab7
+ __ dci(0x45d3923f); // eorbt z31.d, z17.d, z19.d
+ // vl128 state = 0x472bd288
+ __ dci(0x4553921d); // eorbt z29.h, z16.h, z19.h
+ // vl128 state = 0x61090ed4
+ __ dci(0x4553932d); // eorbt z13.h, z25.h, z19.h
+ // vl128 state = 0x3ef228eb
+ __ dci(0x4513912c); // eorbt z12.b, z9.b, z19.b
+ // vl128 state = 0x96d4505c
+ __ dci(0x4551912d); // eorbt z13.h, z9.h, z17.h
+ // vl128 state = 0x1c32baef
+ __ dci(0x45119029); // eorbt z9.b, z1.b, z17.b
+ // vl128 state = 0xa138f554
+ __ dci(0x45149028); // eorbt z8.b, z1.b, z20.b
+ // vl128 state = 0xf0681d9a
+ __ dci(0x459490aa); // eorbt z10.s, z5.s, z20.s
+ // vl128 state = 0xbd4b30f5
+ __ dci(0x458590a8); // eorbt z8.s, z5.s, z5.s
+ // vl128 state = 0x45c5b437
+ __ dci(0x4585948c); // eortb z12.s, z4.s, z5.s
+ // vl128 state = 0x22f90a7b
+ __ dci(0x45cd949c); // eortb z28.d, z4.d, z13.d
+ // vl128 state = 0x5e4584ca
+ __ dci(0x4589949d); // eortb z29.s, z4.s, z9.s
+ // vl128 state = 0x65ac913e
+ __ dci(0x458990ad); // eorbt z13.s, z5.s, z9.s
+ // vl128 state = 0x4f13d973
+ __ dci(0x459b90ac); // eorbt z12.s, z5.s, z27.s
+ // vl128 state = 0xd13bb801
+ __ dci(0x45db90ee); // eorbt z14.d, z7.d, z27.d
+ // vl128 state = 0xf24115d0
+ __ dci(0x45db916f); // eorbt z15.d, z11.d, z27.d
+ // vl128 state = 0x04f38375
+ __ dci(0x45db95e7); // eortb z7.d, z15.d, z27.d
+ // vl128 state = 0xe1046ae5
+ __ dci(0x45db94a3); // eortb z3.d, z5.d, z27.d
+ // vl128 state = 0xaaeae67e
+ __ dci(0x45dd94a1); // eortb z1.d, z5.d, z29.d
+ // vl128 state = 0xd67f6823
+ __ dci(0x45dd94b1); // eortb z17.d, z5.d, z29.d
+ // vl128 state = 0xf172245b
+ __ dci(0x45dd90f3); // eorbt z19.d, z7.d, z29.d
+ // vl128 state = 0xc99195b8
+ __ dci(0x458d90e3); // eorbt z3.s, z7.s, z13.s
+ // vl128 state = 0xe1a146cf
+ __ dci(0x458994e2); // eortb z2.s, z7.s, z9.s
+ // vl128 state = 0x8038f273
+ __ dci(0x458b94a3); // eortb z3.s, z5.s, z11.s
+ // vl128 state = 0x50bda372
+ __ dci(0x459b9481); // eortb z1.s, z4.s, z27.s
+ // vl128 state = 0xe8d53012
+ __ dci(0x455b9485); // eortb z5.h, z4.h, z27.h
+ // vl128 state = 0xdba33ea5
+ __ dci(0x454b9087); // eorbt z7.h, z4.h, z11.h
+ // vl128 state = 0xff7f1815
+ __ dci(0x45499003); // eorbt z3.h, z0.h, z9.h
+ // vl128 state = 0x5d6e0104
+ __ dci(0x454d9022); // eorbt z2.h, z1.h, z13.h
+ // vl128 state = 0xe9161cfe
+ __ dci(0x45099026); // eorbt z6.b, z1.b, z9.b
+ // vl128 state = 0x48126fb9
+ __ dci(0x454b9024); // eorbt z4.h, z1.h, z11.h
+ // vl128 state = 0x53cbfc46
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x53cbfc46,
+ 0x0f81a01e,
+ 0xf97c4e96,
+ 0x745e9ed6,
+ 0x4487a0a1,
+ 0x7ad79509,
+ 0x53577280,
+ 0x1e589717,
+ 0xaaa96af0,
+ 0x4f2b0884,
+ 0x24d2cd1c,
+ 0x4d89438d,
+ 0x9b327a12,
+ 0xeabfd558,
+ 0xb63e33f1,
+ 0xebd7d9ca,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
} // namespace aarch64
} // namespace vixl