| // Copyright 2016, VIXL authors |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // * Redistributions of source code must retain the above copyright notice, |
| // this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // * Neither the name of ARM Limited nor the names of its contributors may be |
| // used to endorse or promote products derived from this software without |
| // specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND |
| // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <cfloat> |
| #include <cmath> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| |
| #include "test-runner.h" |
| #include "test-utils-aarch64.h" |
| |
| #include "aarch64/cpu-aarch64.h" |
| #include "aarch64/debugger-aarch64.h" |
| #include "aarch64/disasm-aarch64.h" |
| #include "aarch64/macro-assembler-aarch64.h" |
| #include "aarch64/simulator-aarch64.h" |
| |
| namespace vixl { |
| namespace aarch64 { |
| // Trace tests can only work with the simulator. |
| #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 |
| |
| #define __ masm-> |
| #define TEST(name) TEST_(TRACE_##name) |
| |
| static void GenerateTestSequenceBase(MacroAssembler* masm) { |
| ExactAssemblyScope guard(masm, |
| masm->GetBuffer()->GetRemainingBytes(), |
| ExactAssemblyScope::kMaximumSize); |
| |
| __ adc(w3, w4, w5); |
| __ adc(x6, x7, x8); |
| __ adcs(w9, w10, w11); |
| __ adcs(x12, x13, x14); |
| __ add(w15, w16, w17); |
| __ add(x18, x19, x20); |
| __ adds(w21, w22, w23); |
| __ adds(x24, x25, x26); |
| __ and_(w27, w28, w29); |
| __ and_(x2, x3, x4); |
| __ ands(w5, w6, w7); |
| __ ands(x8, x9, x10); |
| __ asr(w11, w12, 0); |
| __ asr(x13, x14, 1); |
| __ asrv(w15, w16, w17); |
| __ asrv(x18, x19, x20); |
| __ bfm(w21, w22, 5, 6); |
| __ bfm(x23, x24, 7, 8); |
| __ bic(w25, w26, w27); |
| __ bic(x28, x29, x2); |
| __ bics(w3, w4, w5); |
| __ bics(x6, x7, x8); |
| __ ccmn(w9, w10, NoFlag, al); |
| __ ccmn(w9, w10, NoFlag, eq); |
| __ ccmn(w9, w10, NoFlag, ne); |
| __ ccmn(x11, x12, CFlag, al); |
| __ ccmn(x11, x12, CFlag, cc); |
| __ ccmn(x11, x12, CFlag, cs); |
| __ ccmp(w13, w14, VFlag, al); |
| __ ccmp(w13, w14, VFlag, hi); |
| __ ccmp(w13, w14, VFlag, ls); |
| __ ccmp(x15, x16, CVFlag, al); |
| __ ccmp(x15, x16, CVFlag, eq); |
| __ ccmp(x15, x16, CVFlag, ne); |
| __ cinc(w17, w18, cc); |
| __ cinc(w17, w18, cs); |
| __ cinc(x19, x20, hi); |
| __ cinc(x19, x20, ls); |
| __ cinv(w21, w22, eq); |
| __ cinv(w21, w22, ne); |
| __ cinv(x23, x24, cc); |
| __ cinv(x23, x24, cs); |
| __ clrex(); |
| __ cls(w25, w26); |
| __ cls(x27, x28); |
| __ clz(w29, w2); |
| __ clz(x3, x4); |
| __ cmn(w5, w6); |
| __ cmn(x7, x8); |
| __ cmp(w9, w10); |
| __ cmp(x11, x12); |
| __ cneg(w13, w14, hi); |
| __ cneg(w13, w14, ls); |
| __ cneg(x15, x16, eq); |
| __ cneg(x15, x16, ne); |
| __ crc32b(w17, w18, w19); |
| __ crc32cb(w20, w21, w22); |
| __ crc32ch(w23, w24, w25); |
| __ crc32cw(w26, w27, w28); |
| __ crc32h(w4, w5, w6); |
| __ crc32w(w7, w8, w9); |
| __ csel(w13, w14, w15, cc); |
| __ csel(w13, w14, w15, cs); |
| __ csel(x16, x17, x18, hi); |
| __ csel(x16, x17, x18, ls); |
| __ cset(w19, eq); |
| __ cset(w19, ne); |
| __ cset(x20, cc); |
| __ cset(x20, cs); |
| __ csetm(w21, hi); |
| __ csetm(w21, ls); |
| __ csetm(x22, eq); |
| __ csetm(x22, ne); |
| __ csinc(w23, w24, w25, cc); |
| __ csinc(w23, w24, w25, cs); |
| __ csinc(x26, x27, x28, hi); |
| __ csinc(x26, x27, x28, ls); |
| __ csinv(w29, w2, w3, eq); |
| __ csinv(w29, w2, w3, ne); |
| __ csinv(x4, x5, x6, cc); |
| __ csinv(x4, x5, x6, cs); |
| __ csneg(w7, w8, w9, hi); |
| __ csneg(w7, w8, w9, ls); |
| __ csneg(x10, x11, x12, eq); |
| __ csneg(x10, x11, x12, ne); |
| __ dc(CVAC, x0); |
| __ dmb(InnerShareable, BarrierAll); |
| __ dsb(InnerShareable, BarrierAll); |
| __ eon(w13, w14, w15); |
| __ eon(x16, x17, x18); |
| __ eor(w19, w20, w21); |
| __ eor(x22, x23, x24); |
| __ extr(w25, w26, w27, 9); |
| __ extr(x28, x29, x2, 10); |
| __ hint(NOP); |
| __ ic(IVAU, x0); |
| __ isb(); |
| __ ldar(w3, MemOperand(x0)); |
| __ ldar(x4, MemOperand(x0)); |
| __ ldarb(w5, MemOperand(x0)); |
| __ ldarb(x6, MemOperand(x0)); |
| __ ldarh(w7, MemOperand(x0)); |
| __ ldarh(x8, MemOperand(x0)); |
| __ ldaxp(w9, w10, MemOperand(x0)); |
| __ ldaxp(x11, x12, MemOperand(x0)); |
| __ ldaxr(w13, MemOperand(x0)); |
| __ ldaxr(x14, MemOperand(x0)); |
| __ ldaxrb(w15, MemOperand(x0)); |
| __ ldaxrb(x16, MemOperand(x0)); |
| __ ldaxrh(w17, MemOperand(x0)); |
| __ ldaxrh(x18, MemOperand(x0)); |
| __ ldnp(w19, w20, MemOperand(x0)); |
| __ ldnp(x21, x22, MemOperand(x0)); |
| __ ldp(w23, w24, MemOperand(x0)); |
| __ ldp(w23, w24, MemOperand(x1, 8, PostIndex)); |
| __ ldp(w23, w24, MemOperand(x1, 8, PreIndex)); |
| __ ldp(x25, x26, MemOperand(x0)); |
| __ ldp(x25, x26, MemOperand(x1, 16, PostIndex)); |
| __ ldp(x25, x26, MemOperand(x1, 16, PreIndex)); |
| __ ldpsw(x27, x28, MemOperand(x0)); |
| __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex)); |
| __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex)); |
| __ ldr(w29, MemOperand(x0)); |
| __ ldr(w29, MemOperand(x1, 4, PostIndex)); |
| __ ldr(w29, MemOperand(x1, 4, PreIndex)); |
| __ ldr(x2, MemOperand(x0)); |
| __ ldr(x2, MemOperand(x1, 8, PostIndex)); |
| __ ldr(x2, MemOperand(x1, 8, PreIndex)); |
| __ ldrb(w3, MemOperand(x0)); |
| __ ldrb(w3, MemOperand(x1, 1, PostIndex)); |
| __ ldrb(w3, MemOperand(x1, 1, PreIndex)); |
| __ ldrb(x4, MemOperand(x0)); |
| __ ldrb(x4, MemOperand(x1, 1, PostIndex)); |
| __ ldrb(x4, MemOperand(x1, 1, PreIndex)); |
| __ ldrh(w5, MemOperand(x0)); |
| __ ldrh(w5, MemOperand(x1, 2, PostIndex)); |
| __ ldrh(w5, MemOperand(x1, 2, PreIndex)); |
| __ ldrh(x6, MemOperand(x0)); |
| __ ldrh(x6, MemOperand(x1, 2, PostIndex)); |
| __ ldrh(x6, MemOperand(x1, 2, PreIndex)); |
| __ ldrsb(w7, MemOperand(x0)); |
| __ ldrsb(w7, MemOperand(x1, 1, PostIndex)); |
| __ ldrsb(w7, MemOperand(x1, 1, PreIndex)); |
| __ ldrsb(x8, MemOperand(x0)); |
| __ ldrsb(x8, MemOperand(x1, 1, PostIndex)); |
| __ ldrsb(x8, MemOperand(x1, 1, PreIndex)); |
| __ ldrsh(w9, MemOperand(x0)); |
| __ ldrsh(w9, MemOperand(x1, 2, PostIndex)); |
| __ ldrsh(w9, MemOperand(x1, 2, PreIndex)); |
| __ ldrsh(x10, MemOperand(x0)); |
| __ ldrsh(x10, MemOperand(x1, 2, PostIndex)); |
| __ ldrsh(x10, MemOperand(x1, 2, PreIndex)); |
| __ ldrsw(x11, MemOperand(x0)); |
| __ ldrsw(x11, MemOperand(x1, 4, PostIndex)); |
| __ ldrsw(x11, MemOperand(x1, 4, PreIndex)); |
| __ ldur(w12, MemOperand(x0, 7)); |
| __ ldur(x13, MemOperand(x0, 15)); |
| __ ldurb(w14, MemOperand(x0, 1)); |
| __ ldurb(x15, MemOperand(x0, 1)); |
| __ ldurh(w16, MemOperand(x0, 3)); |
| __ ldurh(x17, MemOperand(x0, 3)); |
| __ ldursb(w18, MemOperand(x0, 1)); |
| __ ldursb(x19, MemOperand(x0, 1)); |
| __ ldursh(w20, MemOperand(x0, 3)); |
| __ ldursh(x21, MemOperand(x0, 3)); |
| __ ldursw(x22, MemOperand(x0, 7)); |
| __ ldxp(w23, w24, MemOperand(x0)); |
| __ ldxp(x25, x26, MemOperand(x0)); |
| __ ldxr(w27, MemOperand(x0)); |
| __ ldxr(x28, MemOperand(x0)); |
| __ ldxrb(w29, MemOperand(x0)); |
| __ ldxrb(x2, MemOperand(x0)); |
| __ ldxrh(w3, MemOperand(x0)); |
| __ ldxrh(x4, MemOperand(x0)); |
| __ lsl(w5, w6, 2); |
| __ lsl(x7, x8, 3); |
| __ lslv(w9, w10, w11); |
| __ lslv(x12, x13, x14); |
| __ lsr(w15, w16, 4); |
| __ lsr(x17, x18, 5); |
| __ lsrv(w19, w20, w21); |
| __ lsrv(x22, x23, x24); |
| __ madd(w25, w26, w27, w28); |
| __ madd(x29, x2, x3, x4); |
| __ mneg(w5, w6, w7); |
| __ mneg(x8, x9, x10); |
| __ mov(w11, w12); |
| __ mov(x13, x14); |
| __ movk(w15, 130); |
| __ movk(x16, 131); |
| __ movn(w17, 132); |
| __ movn(x18, 133); |
| __ movz(w19, 134); |
| __ movz(x20, 135); |
| __ msub(w22, w23, w24, w25); |
| __ msub(x26, x27, x28, x29); |
| __ mul(w2, w3, w4); |
| __ mul(x5, x6, x7); |
| __ mvn(w8, w9); |
| __ mvn(x10, x11); |
| __ neg(w12, w13); |
| __ neg(x14, x15); |
| __ negs(w16, w17); |
| __ negs(x18, x19); |
| __ ngc(w20, w21); |
| __ ngc(x22, x23); |
| __ ngcs(w24, w25); |
| __ ngcs(x26, x27); |
| __ nop(); |
| __ orn(w28, w29, w2); |
| __ orn(x3, x4, x5); |
| __ orr(w6, w7, w8); |
| __ orr(x9, x10, x11); |
| __ prfm(PLDL1KEEP, MemOperand(x0, 4)); |
| __ prfum(PLDL1KEEP, MemOperand(x0, 1)); |
| __ rbit(w12, w13); |
| __ rbit(x14, x15); |
| __ rev(w16, w17); |
| __ rev(x18, x19); |
| __ rev16(w20, w21); |
| __ rev16(x22, x23); |
| __ rev32(x24, x25); |
| __ rorv(w26, w27, w28); |
| __ rorv(x29, x2, x3); |
| __ sbc(w4, w5, w6); |
| __ sbc(x7, x8, x9); |
| __ sbcs(w10, w11, w12); |
| __ sbcs(x13, x14, x15); |
| __ sbfiz(w16, w17, 2, 3); |
| __ sbfiz(x18, x19, 4, 5); |
| __ sbfx(w22, w23, 6, 7); |
| __ sbfx(x24, x25, 8, 9); |
| __ sdiv(w26, w27, w28); |
| __ sdiv(x29, x2, x3); |
| __ smulh(x12, x13, x14); |
| __ stlr(w18, MemOperand(x0)); |
| __ stlr(x19, MemOperand(x0)); |
| __ stlrb(w20, MemOperand(x0)); |
| __ stlrb(x21, MemOperand(x0)); |
| __ stlrh(w22, MemOperand(x0)); |
| __ stlrh(x23, MemOperand(x0)); |
| __ stlxp(w24, w25, w26, MemOperand(x0)); |
| __ stlxp(x27, x28, x29, MemOperand(x0)); |
| __ stlxr(w2, w3, MemOperand(x0)); |
| __ stlxr(x4, x5, MemOperand(x0)); |
| __ stlxrb(w6, w7, MemOperand(x0)); |
| __ stlxrb(x8, x9, MemOperand(x0)); |
| __ stlxrh(w10, w11, MemOperand(x0)); |
| __ stlxrh(x12, x13, MemOperand(x0)); |
| __ stnp(w14, w15, MemOperand(x0)); |
| __ stnp(x16, x17, MemOperand(x0)); |
| __ stp(w18, w19, MemOperand(x0)); |
| __ stp(w18, w19, MemOperand(x1, 8, PostIndex)); |
| __ stp(w18, w19, MemOperand(x1, 8, PreIndex)); |
| __ stp(x20, x21, MemOperand(x0)); |
| __ stp(x20, x21, MemOperand(x1, 16, PostIndex)); |
| __ stp(x20, x21, MemOperand(x1, 16, PreIndex)); |
| __ str(w22, MemOperand(x0)); |
| __ str(w22, MemOperand(x1, 4, PostIndex)); |
| __ str(w22, MemOperand(x1, 4, PreIndex)); |
| __ str(x23, MemOperand(x0)); |
| __ str(x23, MemOperand(x1, 8, PostIndex)); |
| __ str(x23, MemOperand(x1, 8, PreIndex)); |
| __ strb(w24, MemOperand(x0)); |
| __ strb(w24, MemOperand(x1, 1, PostIndex)); |
| __ strb(w24, MemOperand(x1, 1, PreIndex)); |
| __ strb(x25, MemOperand(x0)); |
| __ strb(x25, MemOperand(x1, 1, PostIndex)); |
| __ strb(x25, MemOperand(x1, 1, PreIndex)); |
| __ strh(w26, MemOperand(x0)); |
| __ strh(w26, MemOperand(x1, 2, PostIndex)); |
| __ strh(w26, MemOperand(x1, 2, PreIndex)); |
| __ strh(x27, MemOperand(x0)); |
| __ strh(x27, MemOperand(x1, 2, PostIndex)); |
| __ strh(x27, MemOperand(x1, 2, PreIndex)); |
| __ stur(w28, MemOperand(x0, 7)); |
| __ stur(x29, MemOperand(x0, 15)); |
| __ sturb(w2, MemOperand(x0, 1)); |
| __ sturb(x3, MemOperand(x0, 1)); |
| __ sturh(w4, MemOperand(x0, 3)); |
| __ sturh(x5, MemOperand(x0, 3)); |
| __ stxp(w6, w7, w8, MemOperand(x0)); |
| __ stxp(x9, x10, x11, MemOperand(x0)); |
| __ stxr(w12, w13, MemOperand(x0)); |
| __ stxr(x14, x15, MemOperand(x0)); |
| __ stxrb(w16, w17, MemOperand(x0)); |
| __ stxrb(x18, x19, MemOperand(x0)); |
| __ stxrh(w20, w21, MemOperand(x0)); |
| __ stxrh(x22, x23, MemOperand(x0)); |
| __ sub(w24, w25, w26); |
| __ sub(x27, x28, x29); |
| __ subs(w2, w3, w4); |
| __ subs(x5, x6, x7); |
| __ sxtb(w8, w9); |
| __ sxtb(x10, x11); |
| __ sxth(w12, w13); |
| __ sxth(x14, x15); |
| __ sxtw(w16, w17); |
| __ sxtw(x18, x19); |
| __ tst(w20, w21); |
| __ tst(x22, x23); |
| __ ubfiz(w24, w25, 10, 11); |
| __ ubfiz(x26, x27, 12, 13); |
| __ ubfm(w28, w29, 14, 15); |
| __ ubfm(x2, x3, 1, 2); |
| __ ubfx(w4, w5, 3, 4); |
| __ ubfx(x6, x7, 5, 6); |
| __ udiv(w8, w9, w10); |
| __ udiv(x11, x12, x13); |
| __ umulh(x22, x23, x24); |
| __ uxtb(w28, w29); |
| __ uxtb(x2, x3); |
| __ uxth(w4, w5); |
| __ uxth(x6, x7); |
| __ uxtw(w8, w9); |
| __ uxtw(x10, x11); |
| |
| // Branch tests. |
| { |
| Label end; |
| // Branch to the next instruction. |
| __ b(&end); |
| __ bind(&end); |
| } |
| { |
| Label loop, end; |
| __ subs(x3, x3, x3); |
| __ bind(&loop); |
| // Not-taken branch (the first time). |
| // Taken branch (the second time). |
| __ b(&end, ne); |
| __ cmp(x3, 1); |
| // Backwards branch. |
| __ b(&loop); |
| __ bind(&end); |
| } |
| } |
| |
| |
| static void GenerateTestSequenceFP(MacroAssembler* masm) { |
| ExactAssemblyScope guard(masm, |
| masm->GetBuffer()->GetRemainingBytes(), |
| ExactAssemblyScope::kMaximumSize); |
| |
| // Scalar floating point instructions. |
| __ fabd(d13, d2, d19); |
| __ fabd(s8, s10, s30); |
| __ fabs(d1, d1); |
| __ fabs(s25, s7); |
| __ facge(d1, d23, d16); |
| __ facge(s4, s17, s1); |
| __ facgt(d2, d21, d24); |
| __ facgt(s12, s26, s12); |
| __ fadd(d13, d11, d22); |
| __ fadd(s27, s19, s8); |
| __ fccmp(d6, d10, NoFlag, hs); |
| __ fccmp(s29, s20, NZVFlag, ne); |
| __ fccmpe(d10, d2, NZCFlag, al); |
| __ fccmpe(s3, s3, NZVFlag, pl); |
| __ fcmeq(d19, d8, d10); |
| __ fcmeq(d0, d18, 0.0); |
| __ fcmeq(s1, s4, s30); |
| __ fcmeq(s22, s29, 0.0); |
| __ fcmge(d27, d18, d1); |
| __ fcmge(d31, d28, 0.0); |
| __ fcmge(s31, s19, s9); |
| __ fcmge(s1, s25, 0.0); |
| __ fcmgt(d18, d1, d15); |
| __ fcmgt(d3, d31, 0.0); |
| __ fcmgt(s11, s25, s2); |
| __ fcmgt(s17, s16, 0.0); |
| __ fcmle(d24, d17, 0.0); |
| __ fcmle(s11, s8, 0.0); |
| __ fcmlt(d5, d31, 0.0); |
| __ fcmlt(s18, s23, 0.0); |
| __ fcmp(d10, d24); |
| __ fcmp(d13, 0.0); |
| __ fcmp(s18, s6); |
| __ fcmp(s16, 0.0); |
| __ fcmpe(d9, d17); |
| __ fcmpe(d29, 0.0); |
| __ fcmpe(s16, s17); |
| __ fcmpe(s22, 0.0); |
| __ fcsel(d10, d14, d19, gt); |
| __ fcsel(s22, s18, s2, ge); |
| __ fcvt(d4, h24); |
| __ fcvt(d11, s2); |
| __ fcvt(h8, d9); |
| __ fcvt(h12, s1); |
| __ fcvt(s12, d31); |
| __ fcvt(s27, h25); |
| __ fcvtas(d28, d16); |
| __ fcvtas(s3, s5); |
| __ fcvtas(w18, d31); |
| __ fcvtas(w29, s24); |
| __ fcvtas(x9, d1); |
| __ fcvtas(x30, s2); |
| __ fcvtau(d14, d0); |
| __ fcvtau(s31, s14); |
| __ fcvtau(w16, d2); |
| __ fcvtau(w18, s0); |
| __ fcvtau(x26, d7); |
| __ fcvtau(x25, s19); |
| __ fcvtms(d30, d25); |
| __ fcvtms(s12, s15); |
| __ fcvtms(w9, d7); |
| __ fcvtms(w19, s6); |
| __ fcvtms(x6, d6); |
| __ fcvtms(x22, s7); |
| __ fcvtmu(d27, d0); |
| __ fcvtmu(s8, s22); |
| __ fcvtmu(w29, d19); |
| __ fcvtmu(w26, s0); |
| __ fcvtmu(x13, d5); |
| __ fcvtmu(x5, s18); |
| __ fcvtns(d30, d15); |
| __ fcvtns(s10, s11); |
| __ fcvtns(w21, d15); |
| __ fcvtns(w18, s10); |
| __ fcvtns(x8, d17); |
| __ fcvtns(x17, s12); |
| __ fcvtnu(d0, d21); |
| __ fcvtnu(s6, s25); |
| __ fcvtnu(w29, d11); |
| __ fcvtnu(w25, s31); |
| __ fcvtnu(x30, d11); |
| __ fcvtnu(x27, s18); |
| __ fcvtps(d11, d22); |
| __ fcvtps(s29, s20); |
| __ fcvtps(w15, d25); |
| __ fcvtps(w16, s7); |
| __ fcvtps(x13, d20); |
| __ fcvtps(x3, s23); |
| __ fcvtpu(d24, d1); |
| __ fcvtpu(s14, s24); |
| __ fcvtpu(w26, d29); |
| __ fcvtpu(wzr, s26); |
| __ fcvtpu(x27, d6); |
| __ fcvtpu(x29, s14); |
| __ fcvtxn(s12, d12); |
| __ fcvtzs(d15, d0); |
| __ fcvtzs(d13, d4, 42); |
| __ fcvtzs(s8, s11); |
| __ fcvtzs(s31, s6, 25); |
| __ fcvtzs(w6, d9); |
| __ fcvtzs(w25, d10, 20); |
| __ fcvtzs(w9, s1); |
| __ fcvtzs(w17, s29, 30); |
| __ fcvtzs(x19, d2); |
| __ fcvtzs(x22, d14, 1); |
| __ fcvtzs(x14, s20); |
| __ fcvtzs(x3, s30, 33); |
| __ fcvtzu(d28, d15); |
| __ fcvtzu(d0, d4, 3); |
| __ fcvtzu(s2, s5); |
| __ fcvtzu(s4, s0, 30); |
| __ fcvtzu(w11, d4); |
| __ fcvtzu(w7, d24, 32); |
| __ fcvtzu(w18, s24); |
| __ fcvtzu(w14, s27, 4); |
| __ fcvtzu(x22, d11); |
| __ fcvtzu(x8, d27, 52); |
| __ fcvtzu(x7, s20); |
| __ fcvtzu(x22, s7, 44); |
| __ fdiv(d6, d14, d15); |
| __ fdiv(s26, s5, s25); |
| __ fmadd(d18, d26, d12, d30); |
| __ fmadd(s13, s9, s28, s4); |
| __ fmax(d12, d5, d5); |
| __ fmax(s12, s28, s6); |
| __ fmaxnm(d28, d4, d2); |
| __ fmaxnm(s6, s10, s8); |
| __ fmin(d20, d20, d18); |
| __ fmin(s7, s13, s16); |
| __ fminnm(d19, d14, d30); |
| __ fminnm(s0, s1, s1); |
| __ fmov(d13, d6); |
| __ fmov(d2, x17); |
| __ fmov(d8, -2.5000); |
| __ fmov(s5, s3); |
| __ fmov(s25, w20); |
| __ fmov(s21, 2.8750f); |
| __ fmov(w18, s24); |
| __ fmov(x18, d2); |
| __ fmsub(d20, d30, d3, d19); |
| __ fmsub(s5, s19, s4, s12); |
| __ fmul(d30, d27, d23); |
| __ fmul(s25, s17, s15); |
| __ fmulx(d4, d17, d1); |
| __ fmulx(s14, s25, s4); |
| __ fneg(d15, d0); |
| __ fneg(s14, s15); |
| __ fnmadd(d0, d16, d22, d31); |
| __ fnmadd(s0, s18, s26, s18); |
| __ fnmsub(d19, d12, d15, d21); |
| __ fnmsub(s29, s0, s11, s26); |
| __ fnmul(d31, d19, d1); |
| __ fnmul(s18, s3, s17); |
| __ frecpe(d7, d21); |
| __ frecpe(s29, s17); |
| __ frecps(d11, d26, d17); |
| __ frecps(s18, s27, s1); |
| __ frecpx(d15, d18); |
| __ frecpx(s5, s10); |
| __ frinta(d16, d30); |
| __ frinta(s1, s22); |
| __ frinti(d19, d29); |
| __ frinti(s14, s21); |
| __ frintm(d20, d30); |
| __ frintm(s1, s16); |
| __ frintn(d30, d1); |
| __ frintn(s24, s10); |
| __ frintp(d4, d20); |
| __ frintp(s13, s3); |
| __ frintx(d13, d20); |
| __ frintx(s17, s7); |
| __ frintz(d0, d8); |
| __ frintz(s15, s29); |
| __ frsqrte(d21, d10); |
| __ frsqrte(s17, s25); |
| __ frsqrts(d4, d29, d17); |
| __ frsqrts(s14, s3, s24); |
| __ fsqrt(d14, d17); |
| __ fsqrt(s4, s14); |
| __ fsub(d13, d19, d7); |
| __ fsub(s3, s21, s27); |
| __ scvtf(d31, d16); |
| __ scvtf(d26, d31, 24); |
| __ scvtf(d6, w16); |
| __ scvtf(d5, w20, 6); |
| __ scvtf(d16, x8); |
| __ scvtf(d15, x8, 10); |
| __ scvtf(s7, s4); |
| __ scvtf(s8, s15, 14); |
| __ scvtf(s29, w10); |
| __ scvtf(s15, w21, 11); |
| __ scvtf(s27, x26); |
| __ scvtf(s26, x12, 38); |
| __ ucvtf(d0, d9); |
| __ ucvtf(d5, d22, 47); |
| __ ucvtf(d30, w27); |
| __ ucvtf(d3, w19, 1); |
| __ ucvtf(d28, x21); |
| __ ucvtf(d27, x30, 35); |
| __ ucvtf(s11, s5); |
| __ ucvtf(s0, s23, 14); |
| __ ucvtf(s20, w19); |
| __ ucvtf(s21, w22, 18); |
| __ ucvtf(s6, x13); |
| __ ucvtf(s7, x2, 21); |
| } |
| |
| |
| static void GenerateTestSequenceNEON(MacroAssembler* masm) { |
| ExactAssemblyScope guard(masm, |
| masm->GetBuffer()->GetRemainingBytes(), |
| ExactAssemblyScope::kMaximumSize); |
| |
| // NEON integer instructions. |
| __ abs(d19, d0); |
| __ abs(v16.V16B(), v11.V16B()); |
| __ abs(v0.V2D(), v31.V2D()); |
| __ abs(v27.V2S(), v25.V2S()); |
| __ abs(v21.V4H(), v27.V4H()); |
| __ abs(v16.V4S(), v1.V4S()); |
| __ abs(v31.V8B(), v5.V8B()); |
| __ abs(v29.V8H(), v13.V8H()); |
| __ add(d10, d5, d17); |
| __ add(v31.V16B(), v15.V16B(), v23.V16B()); |
| __ add(v10.V2D(), v31.V2D(), v14.V2D()); |
| __ add(v15.V2S(), v14.V2S(), v19.V2S()); |
| __ add(v27.V4H(), v23.V4H(), v17.V4H()); |
| __ add(v25.V4S(), v28.V4S(), v29.V4S()); |
| __ add(v13.V8B(), v7.V8B(), v18.V8B()); |
| __ add(v4.V8H(), v2.V8H(), v1.V8H()); |
| __ addhn(v10.V2S(), v14.V2D(), v15.V2D()); |
| __ addhn(v10.V4H(), v30.V4S(), v26.V4S()); |
| __ addhn(v31.V8B(), v12.V8H(), v22.V8H()); |
| __ addhn2(v16.V16B(), v21.V8H(), v20.V8H()); |
| __ addhn2(v0.V4S(), v2.V2D(), v17.V2D()); |
| __ addhn2(v31.V8H(), v7.V4S(), v17.V4S()); |
| __ addp(d14, v19.V2D()); |
| __ addp(v3.V16B(), v8.V16B(), v28.V16B()); |
| __ addp(v8.V2D(), v5.V2D(), v17.V2D()); |
| __ addp(v22.V2S(), v30.V2S(), v26.V2S()); |
| __ addp(v29.V4H(), v24.V4H(), v14.V4H()); |
| __ addp(v30.V4S(), v26.V4S(), v24.V4S()); |
| __ addp(v12.V8B(), v26.V8B(), v7.V8B()); |
| __ addp(v17.V8H(), v8.V8H(), v12.V8H()); |
| __ addv(b27, v23.V16B()); |
| __ addv(b12, v20.V8B()); |
| __ addv(h27, v30.V4H()); |
| __ addv(h19, v14.V8H()); |
| __ addv(s14, v27.V4S()); |
| __ and_(v10.V16B(), v8.V16B(), v27.V16B()); |
| __ and_(v5.V8B(), v1.V8B(), v16.V8B()); |
| __ bic(v26.V16B(), v3.V16B(), v24.V16B()); |
| __ bic(v7.V2S(), 0xe4, 16); |
| __ bic(v28.V4H(), 0x23, 8); |
| __ bic(v29.V4S(), 0xac); |
| __ bic(v12.V8B(), v31.V8B(), v21.V8B()); |
| __ bic(v18.V8H(), 0x98); |
| __ bif(v12.V16B(), v26.V16B(), v8.V16B()); |
| __ bif(v2.V8B(), v23.V8B(), v27.V8B()); |
| __ bit(v8.V16B(), v3.V16B(), v13.V16B()); |
| __ bit(v5.V8B(), v5.V8B(), v23.V8B()); |
| __ bsl(v9.V16B(), v31.V16B(), v23.V16B()); |
| __ bsl(v14.V8B(), v7.V8B(), v3.V8B()); |
| __ cls(v29.V16B(), v5.V16B()); |
| __ cls(v21.V2S(), v0.V2S()); |
| __ cls(v1.V4H(), v12.V4H()); |
| __ cls(v27.V4S(), v10.V4S()); |
| __ cls(v19.V8B(), v4.V8B()); |
| __ cls(v15.V8H(), v14.V8H()); |
| __ clz(v1.V16B(), v4.V16B()); |
| __ clz(v27.V2S(), v17.V2S()); |
| __ clz(v9.V4H(), v9.V4H()); |
| __ clz(v31.V4S(), v15.V4S()); |
| __ clz(v14.V8B(), v19.V8B()); |
| __ clz(v6.V8H(), v11.V8H()); |
| __ cmeq(d18, d5, d29); |
| __ cmeq(d14, d31, 0); |
| __ cmeq(v19.V16B(), v3.V16B(), v22.V16B()); |
| __ cmeq(v15.V16B(), v9.V16B(), 0); |
| __ cmeq(v12.V2D(), v16.V2D(), v10.V2D()); |
| __ cmeq(v8.V2D(), v22.V2D(), 0); |
| __ cmeq(v2.V2S(), v3.V2S(), v9.V2S()); |
| __ cmeq(v16.V2S(), v25.V2S(), 0); |
| __ cmeq(v6.V4H(), v23.V4H(), v20.V4H()); |
| __ cmeq(v16.V4H(), v13.V4H(), 0); |
| __ cmeq(v21.V4S(), v17.V4S(), v2.V4S()); |
| __ cmeq(v6.V4S(), v25.V4S(), 0); |
| __ cmeq(v16.V8B(), v13.V8B(), v2.V8B()); |
| __ cmeq(v21.V8B(), v16.V8B(), 0); |
| __ cmeq(v20.V8H(), v7.V8H(), v25.V8H()); |
| __ cmeq(v26.V8H(), v8.V8H(), 0); |
| __ cmge(d16, d13, d31); |
| __ cmge(d25, d24, 0); |
| __ cmge(v17.V16B(), v19.V16B(), v17.V16B()); |
| __ cmge(v22.V16B(), v30.V16B(), 0); |
| __ cmge(v28.V2D(), v20.V2D(), v26.V2D()); |
| __ cmge(v6.V2D(), v23.V2D(), 0); |
| __ cmge(v25.V2S(), v22.V2S(), v3.V2S()); |
| __ cmge(v21.V2S(), v11.V2S(), 0); |
| __ cmge(v16.V4H(), v3.V4H(), v12.V4H()); |
| __ cmge(v23.V4H(), v9.V4H(), 0); |
| __ cmge(v7.V4S(), v2.V4S(), v11.V4S()); |
| __ cmge(v0.V4S(), v22.V4S(), 0); |
| __ cmge(v10.V8B(), v30.V8B(), v9.V8B()); |
| __ cmge(v21.V8B(), v8.V8B(), 0); |
| __ cmge(v2.V8H(), v7.V8H(), v26.V8H()); |
| __ cmge(v19.V8H(), v10.V8H(), 0); |
| __ cmgt(d6, d13, d1); |
| __ cmgt(d30, d24, 0); |
| __ cmgt(v20.V16B(), v25.V16B(), v27.V16B()); |
| __ cmgt(v0.V16B(), v25.V16B(), 0); |
| __ cmgt(v22.V2D(), v25.V2D(), v1.V2D()); |
| __ cmgt(v16.V2D(), v16.V2D(), 0); |
| __ cmgt(v5.V2S(), v9.V2S(), v15.V2S()); |
| __ cmgt(v12.V2S(), v18.V2S(), 0); |
| __ cmgt(v28.V4H(), v18.V4H(), v11.V4H()); |
| __ cmgt(v22.V4H(), v3.V4H(), 0); |
| __ cmgt(v5.V4S(), v11.V4S(), v27.V4S()); |
| __ cmgt(v13.V4S(), v20.V4S(), 0); |
| __ cmgt(v27.V8B(), v31.V8B(), v7.V8B()); |
| __ cmgt(v5.V8B(), v0.V8B(), 0); |
| __ cmgt(v22.V8H(), v28.V8H(), v13.V8H()); |
| __ cmgt(v6.V8H(), v2.V8H(), 0); |
| __ cmhi(d21, d8, d22); |
| __ cmhi(v18.V16B(), v19.V16B(), v19.V16B()); |
| __ cmhi(v7.V2D(), v0.V2D(), v21.V2D()); |
| __ cmhi(v15.V2S(), v19.V2S(), v0.V2S()); |
| __ cmhi(v31.V4H(), v7.V4H(), v12.V4H()); |
| __ cmhi(v9.V4S(), v16.V4S(), v22.V4S()); |
| __ cmhi(v7.V8B(), v24.V8B(), v28.V8B()); |
| __ cmhi(v11.V8H(), v10.V8H(), v25.V8H()); |
| __ cmhs(d1, d12, d17); |
| __ cmhs(v21.V16B(), v25.V16B(), v30.V16B()); |
| __ cmhs(v8.V2D(), v2.V2D(), v26.V2D()); |
| __ cmhs(v1.V2S(), v22.V2S(), v29.V2S()); |
| __ cmhs(v26.V4H(), v30.V4H(), v30.V4H()); |
| __ cmhs(v19.V4S(), v20.V4S(), v16.V4S()); |
| __ cmhs(v1.V8B(), v3.V8B(), v26.V8B()); |
| __ cmhs(v20.V8H(), v28.V8H(), v8.V8H()); |
| __ cmle(d30, d24, 0); |
| __ cmle(v0.V16B(), v3.V16B(), 0); |
| __ cmle(v2.V2D(), v30.V2D(), 0); |
| __ cmle(v7.V2S(), v10.V2S(), 0); |
| __ cmle(v9.V4H(), v31.V4H(), 0); |
| __ cmle(v9.V4S(), v18.V4S(), 0); |
| __ cmle(v21.V8B(), v31.V8B(), 0); |
| __ cmle(v29.V8H(), v21.V8H(), 0); |
| __ cmlt(d25, d23, 0); |
| __ cmlt(v7.V16B(), v21.V16B(), 0); |
| __ cmlt(v7.V2D(), v30.V2D(), 0); |
| __ cmlt(v25.V2S(), v28.V2S(), 0); |
| __ cmlt(v0.V4H(), v11.V4H(), 0); |
| __ cmlt(v24.V4S(), v5.V4S(), 0); |
| __ cmlt(v26.V8B(), v11.V8B(), 0); |
| __ cmlt(v1.V8H(), v21.V8H(), 0); |
| __ cmtst(d28, d23, d30); |
| __ cmtst(v26.V16B(), v6.V16B(), v31.V16B()); |
| __ cmtst(v1.V2D(), v21.V2D(), v4.V2D()); |
| __ cmtst(v27.V2S(), v26.V2S(), v20.V2S()); |
| __ cmtst(v26.V4H(), v0.V4H(), v18.V4H()); |
| __ cmtst(v25.V4S(), v16.V4S(), v4.V4S()); |
| __ cmtst(v11.V8B(), v10.V8B(), v9.V8B()); |
| __ cmtst(v0.V8H(), v2.V8H(), v1.V8H()); |
| __ cnt(v25.V16B(), v15.V16B()); |
| __ cnt(v28.V8B(), v6.V8B()); |
| __ dup(v6.V16B(), v7.B(), 7); |
| __ dup(v9.V16B(), w20); |
| __ dup(v12.V2D(), v13.D(), 1); |
| __ dup(v9.V2D(), xzr); |
| __ dup(v4.V2S(), v26.S(), 2); |
| __ dup(v3.V2S(), w12); |
| __ dup(v22.V4H(), v5.H(), 7); |
| __ dup(v16.V4H(), w25); |
| __ dup(v20.V4S(), v10.S(), 2); |
| __ dup(v10.V4S(), w7); |
| __ dup(v30.V8B(), v30.B(), 2); |
| __ dup(v31.V8B(), w15); |
| __ dup(v28.V8H(), v17.H(), 4); |
| __ dup(v2.V8H(), w3); |
| __ eor(v29.V16B(), v25.V16B(), v3.V16B()); |
| __ eor(v3.V8B(), v16.V8B(), v28.V8B()); |
| __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1); |
| __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1); |
| __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); |
| __ ld1(v23.V16B(), |
| v24.V16B(), |
| v25.V16B(), |
| v26.V16B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v5.V16B(), |
| v6.V16B(), |
| v7.V16B(), |
| v8.V16B(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0)); |
| __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex)); |
| __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0)); |
| __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex)); |
| __ ld1(v29.V16B(), MemOperand(x0)); |
| __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0)); |
| __ ld1(v17.V1D(), |
| v18.V1D(), |
| v19.V1D(), |
| v20.V1D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v28.V1D(), |
| v29.V1D(), |
| v30.V1D(), |
| v31.V1D(), |
| MemOperand(x1, 32, PostIndex)); |
| __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0)); |
| __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex)); |
| __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0)); |
| __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v28.V1D(), MemOperand(x0)); |
| __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex)); |
| __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0)); |
| __ ld1(v8.V2D(), |
| v9.V2D(), |
| v10.V2D(), |
| v11.V2D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v14.V2D(), |
| v15.V2D(), |
| v16.V2D(), |
| v17.V2D(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0)); |
| __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex)); |
| __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0)); |
| __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex)); |
| __ ld1(v5.V2D(), MemOperand(x0)); |
| __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0)); |
| __ ld1(v24.V2S(), |
| v25.V2S(), |
| v26.V2S(), |
| v27.V2S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v27.V2S(), |
| v28.V2S(), |
| v29.V2S(), |
| v30.V2S(), |
| MemOperand(x1, 32, PostIndex)); |
| __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0)); |
| __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex)); |
| __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0)); |
| __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v26.V2S(), MemOperand(x0)); |
| __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex)); |
| __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); |
| __ ld1(v24.V4H(), |
| v25.V4H(), |
| v26.V4H(), |
| v27.V4H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); |
| __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0)); |
| __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex)); |
| __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0)); |
| __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v26.V4H(), MemOperand(x0)); |
| __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex)); |
| __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0)); |
| __ ld1(v28.V4S(), |
| v29.V4S(), |
| v30.V4S(), |
| v31.V4S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex)); |
| __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); |
| __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex)); |
| __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0)); |
| __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex)); |
| __ ld1(v15.V4S(), MemOperand(x0)); |
| __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0)); |
| __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v9.V8B(), |
| v10.V8B(), |
| v11.V8B(), |
| v12.V8B(), |
| MemOperand(x1, 32, PostIndex)); |
| __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0)); |
| __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); |
| __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0)); |
| __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v31.V8B(), MemOperand(x0)); |
| __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex)); |
| __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); |
| __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v10.V8H(), |
| v11.V8H(), |
| v12.V8H(), |
| v13.V8H(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); |
| __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex)); |
| __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0)); |
| __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); |
| __ ld1(v9.V8H(), MemOperand(x0)); |
| __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex)); |
| __ ld1(v19.B(), 1, MemOperand(x0)); |
| __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex)); |
| __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex)); |
| __ ld1(v10.D(), 1, MemOperand(x0)); |
| __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex)); |
| __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex)); |
| __ ld1(v19.H(), 5, MemOperand(x0)); |
| __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex)); |
| __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex)); |
| __ ld1(v21.S(), 2, MemOperand(x0)); |
| __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex)); |
| __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex)); |
| __ ld1r(v2.V16B(), MemOperand(x0)); |
| __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex)); |
| __ ld1r(v25.V1D(), MemOperand(x0)); |
| __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex)); |
| __ ld1r(v19.V2D(), MemOperand(x0)); |
| __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex)); |
| __ ld1r(v24.V2S(), MemOperand(x0)); |
| __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex)); |
| __ ld1r(v19.V4H(), MemOperand(x0)); |
| __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex)); |
| __ ld1r(v15.V4S(), MemOperand(x0)); |
| __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex)); |
| __ ld1r(v26.V8B(), MemOperand(x0)); |
| __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex)); |
| __ ld1r(v13.V8H(), MemOperand(x0)); |
| __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex)); |
| __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0)); |
| __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex)); |
| __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0)); |
| __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex)); |
| __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0)); |
| __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex)); |
| __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0)); |
| __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex)); |
| __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0)); |
| __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex)); |
| __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0)); |
| __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex)); |
| __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0)); |
| __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex)); |
| __ ld2(v5.B(), v6.B(), 12, MemOperand(x0)); |
| __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex)); |
| __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex)); |
| __ ld2(v11.D(), v12.D(), 1, MemOperand(x0)); |
| __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex)); |
| __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex)); |
| __ ld2(v18.H(), v19.H(), 7, MemOperand(x0)); |
| __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex)); |
| __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex)); |
| __ ld2(v29.S(), v30.S(), 3, MemOperand(x0)); |
| __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex)); |
| __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex)); |
| __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0)); |
| __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex)); |
| __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0)); |
| __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex)); |
| __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0)); |
| __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex)); |
| __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0)); |
| __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex)); |
| __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0)); |
| __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex)); |
| __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0)); |
| __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex)); |
| __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0)); |
| __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex)); |
| __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0)); |
| __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex)); |
| __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0)); |
| __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex)); |
| __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0)); |
| __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex)); |
| __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0)); |
| __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex)); |
| __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0)); |
| __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex)); |
| __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0)); |
| __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex)); |
| __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0)); |
| __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex)); |
| __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0)); |
| __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex)); |
| __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0)); |
| __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex)); |
| __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex)); |
| __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0)); |
| __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex)); |
| __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex)); |
| __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0)); |
| __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex)); |
| __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex)); |
| __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0)); |
| __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex)); |
| __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex)); |
| __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0)); |
| __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex)); |
| __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0)); |
| __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex)); |
| __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0)); |
| __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex)); |
| __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0)); |
| __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex)); |
| __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0)); |
| __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex)); |
| __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0)); |
| __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex)); |
| __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0)); |
| __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex)); |
| __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0)); |
| __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex)); |
| __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0)); |
| __ ld4(v2.V16B(), |
| v3.V16B(), |
| v4.V16B(), |
| v5.V16B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4(v5.V16B(), |
| v6.V16B(), |
| v7.V16B(), |
| v8.V16B(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0)); |
| __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ ld4(v29.V2D(), |
| v30.V2D(), |
| v31.V2D(), |
| v0.V2D(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0)); |
| __ ld4(v24.V2S(), |
| v25.V2S(), |
| v26.V2S(), |
| v27.V2S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex)); |
| __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0)); |
| __ ld4(v23.V4H(), |
| v24.V4H(), |
| v25.V4H(), |
| v26.V4H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex)); |
| __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0)); |
| __ ld4(v28.V4S(), |
| v29.V4S(), |
| v30.V4S(), |
| v31.V4S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4(v29.V4S(), |
| v30.V4S(), |
| v31.V4S(), |
| v0.V4S(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0)); |
| __ ld4(v27.V8B(), |
| v28.V8B(), |
| v29.V8B(), |
| v30.V8B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex)); |
| __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0)); |
| __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ ld4(v20.V8H(), |
| v21.V8H(), |
| v22.V8H(), |
| v23.V8H(), |
| MemOperand(x1, 64, PostIndex)); |
| __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0)); |
| __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex)); |
| __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex)); |
| __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0)); |
| __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); |
| __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex)); |
| __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0)); |
| __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex)); |
| __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex)); |
| __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0)); |
| __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex)); |
| __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex)); |
| __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0)); |
| __ ld4r(v13.V16B(), |
| v14.V16B(), |
| v15.V16B(), |
| v16.V16B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v9.V16B(), |
| v10.V16B(), |
| v11.V16B(), |
| v12.V16B(), |
| MemOperand(x1, 4, PostIndex)); |
| __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0)); |
| __ ld4r(v4.V1D(), |
| v5.V1D(), |
| v6.V1D(), |
| v7.V1D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v26.V1D(), |
| v27.V1D(), |
| v28.V1D(), |
| v29.V1D(), |
| MemOperand(x1, 32, PostIndex)); |
| __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0)); |
| __ ld4r(v28.V2D(), |
| v29.V2D(), |
| v30.V2D(), |
| v31.V2D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v15.V2D(), |
| v16.V2D(), |
| v17.V2D(), |
| v18.V2D(), |
| MemOperand(x1, 32, PostIndex)); |
| __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0)); |
| __ ld4r(v28.V2S(), |
| v29.V2S(), |
| v30.V2S(), |
| v31.V2S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v11.V2S(), |
| v12.V2S(), |
| v13.V2S(), |
| v14.V2S(), |
| MemOperand(x1, 16, PostIndex)); |
| __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0)); |
| __ ld4r(v22.V4H(), |
| v23.V4H(), |
| v24.V4H(), |
| v25.V4H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v20.V4H(), |
| v21.V4H(), |
| v22.V4H(), |
| v23.V4H(), |
| MemOperand(x1, 8, PostIndex)); |
| __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0)); |
| __ ld4r(v25.V4S(), |
| v26.V4S(), |
| v27.V4S(), |
| v28.V4S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v23.V4S(), |
| v24.V4S(), |
| v25.V4S(), |
| v26.V4S(), |
| MemOperand(x1, 16, PostIndex)); |
| __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0)); |
| __ ld4r(v27.V8B(), |
| v28.V8B(), |
| v29.V8B(), |
| v30.V8B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v29.V8B(), |
| v30.V8B(), |
| v31.V8B(), |
| v0.V8B(), |
| MemOperand(x1, 4, PostIndex)); |
| __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0)); |
| __ ld4r(v25.V8H(), |
| v26.V8H(), |
| v27.V8H(), |
| v28.V8H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ ld4r(v22.V8H(), |
| v23.V8H(), |
| v24.V8H(), |
| v25.V8H(), |
| MemOperand(x1, 8, PostIndex)); |
| __ mla(v29.V16B(), v7.V16B(), v26.V16B()); |
| __ mla(v6.V2S(), v4.V2S(), v14.V2S()); |
| __ mla(v9.V2S(), v11.V2S(), v0.S(), 2); |
| __ mla(v5.V4H(), v17.V4H(), v25.V4H()); |
| __ mla(v24.V4H(), v7.V4H(), v11.H(), 3); |
| __ mla(v12.V4S(), v3.V4S(), v4.V4S()); |
| __ mla(v10.V4S(), v7.V4S(), v7.S(), 3); |
| __ mla(v3.V8B(), v16.V8B(), v9.V8B()); |
| __ mla(v19.V8H(), v22.V8H(), v18.V8H()); |
| __ mla(v6.V8H(), v2.V8H(), v0.H(), 0); |
| __ mls(v23.V16B(), v10.V16B(), v11.V16B()); |
| __ mls(v14.V2S(), v31.V2S(), v22.V2S()); |
| __ mls(v28.V2S(), v13.V2S(), v1.S(), 3); |
| __ mls(v2.V4H(), v19.V4H(), v13.V4H()); |
| __ mls(v18.V4H(), v15.V4H(), v12.H(), 6); |
| __ mls(v6.V4S(), v11.V4S(), v16.V4S()); |
| __ mls(v23.V4S(), v16.V4S(), v10.S(), 2); |
| __ mls(v26.V8B(), v13.V8B(), v23.V8B()); |
| __ mls(v10.V8H(), v10.V8H(), v12.V8H()); |
| __ mls(v14.V8H(), v0.V8H(), v14.H(), 7); |
| __ mov(b22, v1.B(), 3); |
| __ mov(d7, v13.D(), 1); |
| __ mov(h26, v21.H(), 2); |
| __ mov(s26, v19.S(), 0); |
| __ mov(v26.V16B(), v11.V16B()); |
| __ mov(v20.V8B(), v0.V8B()); |
| __ mov(v19.B(), 13, v6.B(), 4); |
| __ mov(v4.B(), 13, w19); |
| __ mov(v11.D(), 1, v8.D(), 0); |
| __ mov(v3.D(), 0, x30); |
| __ mov(v29.H(), 4, v11.H(), 7); |
| __ mov(v2.H(), 6, w6); |
| __ mov(v22.S(), 0, v5.S(), 2); |
| __ mov(v24.S(), 3, w8); |
| __ mov(w18, v1.S(), 3); |
| __ mov(x28, v21.D(), 0); |
| __ movi(d24, 0xffff0000ffffff); |
| __ movi(v29.V16B(), 0x80); |
| __ movi(v12.V2D(), 0xffff00ff00ffff00); |
| __ movi(v12.V2S(), 0xec, LSL, 24); |
| __ movi(v10.V2S(), 0x4c, MSL, 16); |
| __ movi(v26.V4H(), 0xc0, LSL); |
| __ movi(v24.V4S(), 0x98, LSL, 16); |
| __ movi(v1.V4S(), 0xde, MSL, 16); |
| __ movi(v21.V8B(), 0x4d); |
| __ movi(v29.V8H(), 0x69, LSL); |
| __ mul(v1.V16B(), v15.V16B(), v17.V16B()); |
| __ mul(v21.V2S(), v19.V2S(), v29.V2S()); |
| __ mul(v19.V2S(), v5.V2S(), v3.S(), 0); |
| __ mul(v29.V4H(), v11.V4H(), v2.V4H()); |
| __ mul(v2.V4H(), v7.V4H(), v0.H(), 0); |
| __ mul(v25.V4S(), v26.V4S(), v16.V4S()); |
| __ mul(v26.V4S(), v6.V4S(), v15.S(), 2); |
| __ mul(v11.V8B(), v15.V8B(), v31.V8B()); |
| __ mul(v20.V8H(), v31.V8H(), v15.V8H()); |
| __ mul(v29.V8H(), v5.V8H(), v9.H(), 4); |
| __ mvn(v13.V16B(), v21.V16B()); |
| __ mvn(v28.V8B(), v19.V8B()); |
| __ mvni(v25.V2S(), 0xb8, LSL, 8); |
| __ mvni(v17.V2S(), 0x6c, MSL, 16); |
| __ mvni(v29.V4H(), 0x48, LSL); |
| __ mvni(v20.V4S(), 0x7a, LSL, 16); |
| __ mvni(v0.V4S(), 0x1e, MSL, 8); |
| __ mvni(v31.V8H(), 0x3e, LSL); |
| __ neg(d25, d11); |
| __ neg(v4.V16B(), v9.V16B()); |
| __ neg(v11.V2D(), v25.V2D()); |
| __ neg(v7.V2S(), v18.V2S()); |
| __ neg(v7.V4H(), v15.V4H()); |
| __ neg(v17.V4S(), v18.V4S()); |
| __ neg(v20.V8B(), v17.V8B()); |
| __ neg(v0.V8H(), v11.V8H()); |
| __ orn(v13.V16B(), v11.V16B(), v31.V16B()); |
| __ orn(v22.V8B(), v16.V8B(), v22.V8B()); |
| __ orr(v17.V16B(), v17.V16B(), v23.V16B()); |
| __ orr(v8.V2S(), 0xe3); |
| __ orr(v11.V4H(), 0x97, 8); |
| __ orr(v7.V4S(), 0xab); |
| __ orr(v8.V8B(), v4.V8B(), v3.V8B()); |
| __ orr(v31.V8H(), 0xb0, 8); |
| __ pmul(v11.V16B(), v18.V16B(), v23.V16B()); |
| __ pmul(v8.V8B(), v24.V8B(), v5.V8B()); |
| __ pmull(v24.V8H(), v18.V8B(), v22.V8B()); |
| __ pmull2(v13.V8H(), v3.V16B(), v21.V16B()); |
| __ raddhn(v22.V2S(), v10.V2D(), v21.V2D()); |
| __ raddhn(v5.V4H(), v13.V4S(), v13.V4S()); |
| __ raddhn(v10.V8B(), v17.V8H(), v26.V8H()); |
| __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H()); |
| __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D()); |
| __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S()); |
| __ rbit(v22.V16B(), v15.V16B()); |
| __ rbit(v30.V8B(), v3.V8B()); |
| __ rev16(v31.V16B(), v27.V16B()); |
| __ rev16(v12.V8B(), v26.V8B()); |
| __ rev32(v5.V16B(), v4.V16B()); |
| __ rev32(v16.V4H(), v26.V4H()); |
| __ rev32(v20.V8B(), v3.V8B()); |
| __ rev32(v20.V8H(), v28.V8H()); |
| __ rev64(v9.V16B(), v19.V16B()); |
| __ rev64(v5.V2S(), v16.V2S()); |
| __ rev64(v7.V4H(), v31.V4H()); |
| __ rev64(v15.V4S(), v26.V4S()); |
| __ rev64(v25.V8B(), v9.V8B()); |
| __ rev64(v11.V8H(), v5.V8H()); |
| __ rshrn(v18.V2S(), v13.V2D(), 1); |
| __ rshrn(v25.V4H(), v30.V4S(), 2); |
| __ rshrn(v13.V8B(), v9.V8H(), 8); |
| __ rshrn2(v3.V16B(), v6.V8H(), 8); |
| __ rshrn2(v0.V4S(), v29.V2D(), 25); |
| __ rshrn2(v27.V8H(), v26.V4S(), 15); |
| __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D()); |
| __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S()); |
| __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H()); |
| __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H()); |
| __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D()); |
| __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S()); |
| __ saba(v28.V16B(), v9.V16B(), v25.V16B()); |
| __ saba(v9.V2S(), v28.V2S(), v20.V2S()); |
| __ saba(v17.V4H(), v22.V4H(), v22.V4H()); |
| __ saba(v29.V4S(), v5.V4S(), v27.V4S()); |
| __ saba(v20.V8B(), v21.V8B(), v18.V8B()); |
| __ saba(v27.V8H(), v17.V8H(), v30.V8H()); |
| __ sabal(v20.V2D(), v13.V2S(), v7.V2S()); |
| __ sabal(v4.V4S(), v12.V4H(), v4.V4H()); |
| __ sabal(v23.V8H(), v24.V8B(), v20.V8B()); |
| __ sabal2(v26.V2D(), v21.V4S(), v18.V4S()); |
| __ sabal2(v27.V4S(), v28.V8H(), v8.V8H()); |
| __ sabal2(v12.V8H(), v16.V16B(), v21.V16B()); |
| __ sabd(v0.V16B(), v15.V16B(), v13.V16B()); |
| __ sabd(v15.V2S(), v7.V2S(), v30.V2S()); |
| __ sabd(v17.V4H(), v17.V4H(), v12.V4H()); |
| __ sabd(v7.V4S(), v4.V4S(), v22.V4S()); |
| __ sabd(v23.V8B(), v3.V8B(), v26.V8B()); |
| __ sabd(v20.V8H(), v28.V8H(), v5.V8H()); |
| __ sabdl(v27.V2D(), v22.V2S(), v20.V2S()); |
| __ sabdl(v31.V4S(), v20.V4H(), v23.V4H()); |
| __ sabdl(v0.V8H(), v20.V8B(), v27.V8B()); |
| __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S()); |
| __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H()); |
| __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B()); |
| __ sadalp(v8.V1D(), v26.V2S()); |
| __ sadalp(v12.V2D(), v26.V4S()); |
| __ sadalp(v12.V2S(), v26.V4H()); |
| __ sadalp(v4.V4H(), v1.V8B()); |
| __ sadalp(v15.V4S(), v17.V8H()); |
| __ sadalp(v21.V8H(), v25.V16B()); |
| __ saddl(v5.V2D(), v10.V2S(), v14.V2S()); |
| __ saddl(v18.V4S(), v3.V4H(), v15.V4H()); |
| __ saddl(v15.V8H(), v2.V8B(), v23.V8B()); |
| __ saddl2(v16.V2D(), v16.V4S(), v27.V4S()); |
| __ saddl2(v6.V4S(), v24.V8H(), v0.V8H()); |
| __ saddl2(v7.V8H(), v20.V16B(), v28.V16B()); |
| __ saddlp(v10.V1D(), v25.V2S()); |
| __ saddlp(v15.V2D(), v16.V4S()); |
| __ saddlp(v18.V2S(), v10.V4H()); |
| __ saddlp(v29.V4H(), v26.V8B()); |
| __ saddlp(v10.V4S(), v1.V8H()); |
| __ saddlp(v0.V8H(), v21.V16B()); |
| __ saddlv(d12, v7.V4S()); |
| __ saddlv(h14, v28.V16B()); |
| __ saddlv(h30, v30.V8B()); |
| __ saddlv(s27, v3.V4H()); |
| __ saddlv(s16, v16.V8H()); |
| __ saddw(v24.V2D(), v11.V2D(), v18.V2S()); |
| __ saddw(v13.V4S(), v12.V4S(), v6.V4H()); |
| __ saddw(v19.V8H(), v19.V8H(), v7.V8B()); |
| __ saddw2(v27.V2D(), v9.V2D(), v26.V4S()); |
| __ saddw2(v19.V4S(), v23.V4S(), v21.V8H()); |
| __ saddw2(v15.V8H(), v25.V8H(), v30.V16B()); |
| __ shadd(v7.V16B(), v4.V16B(), v9.V16B()); |
| __ shadd(v29.V2S(), v25.V2S(), v24.V2S()); |
| __ shadd(v31.V4H(), v10.V4H(), v13.V4H()); |
| __ shadd(v21.V4S(), v16.V4S(), v8.V4S()); |
| __ shadd(v14.V8B(), v29.V8B(), v22.V8B()); |
| __ shadd(v19.V8H(), v24.V8H(), v20.V8H()); |
| __ shl(d22, d25, 23); |
| __ shl(v5.V16B(), v17.V16B(), 7); |
| __ shl(v2.V2D(), v4.V2D(), 21); |
| __ shl(v4.V2S(), v3.V2S(), 26); |
| __ shl(v3.V4H(), v28.V4H(), 8); |
| __ shl(v4.V4S(), v31.V4S(), 24); |
| __ shl(v18.V8B(), v16.V8B(), 2); |
| __ shl(v0.V8H(), v11.V8H(), 3); |
| __ shll(v5.V2D(), v24.V2S(), 32); |
| __ shll(v26.V4S(), v20.V4H(), 16); |
| __ shll(v5.V8H(), v9.V8B(), 8); |
| __ shll2(v21.V2D(), v28.V4S(), 32); |
| __ shll2(v22.V4S(), v1.V8H(), 16); |
| __ shll2(v30.V8H(), v25.V16B(), 8); |
| __ shrn(v5.V2S(), v1.V2D(), 28); |
| __ shrn(v29.V4H(), v18.V4S(), 7); |
| __ shrn(v17.V8B(), v29.V8H(), 2); |
| __ shrn2(v5.V16B(), v30.V8H(), 3); |
| __ shrn2(v24.V4S(), v1.V2D(), 1); |
| __ shrn2(v5.V8H(), v14.V4S(), 16); |
| __ shsub(v30.V16B(), v22.V16B(), v23.V16B()); |
| __ shsub(v22.V2S(), v27.V2S(), v25.V2S()); |
| __ shsub(v13.V4H(), v22.V4H(), v1.V4H()); |
| __ shsub(v10.V4S(), v8.V4S(), v23.V4S()); |
| __ shsub(v6.V8B(), v9.V8B(), v31.V8B()); |
| __ shsub(v8.V8H(), v31.V8H(), v8.V8H()); |
| __ sli(d19, d29, 20); |
| __ sli(v9.V16B(), v24.V16B(), 0); |
| __ sli(v22.V2D(), v9.V2D(), 10); |
| __ sli(v11.V2S(), v27.V2S(), 20); |
| __ sli(v16.V4H(), v15.V4H(), 5); |
| __ sli(v8.V4S(), v8.V4S(), 25); |
| __ sli(v10.V8B(), v30.V8B(), 0); |
| __ sli(v7.V8H(), v28.V8H(), 6); |
| __ smax(v18.V16B(), v8.V16B(), v1.V16B()); |
| __ smax(v30.V2S(), v5.V2S(), v1.V2S()); |
| __ smax(v17.V4H(), v25.V4H(), v19.V4H()); |
| __ smax(v1.V4S(), v24.V4S(), v31.V4S()); |
| __ smax(v17.V8B(), v24.V8B(), v24.V8B()); |
| __ smax(v11.V8H(), v26.V8H(), v10.V8H()); |
| __ smaxp(v12.V16B(), v14.V16B(), v7.V16B()); |
| __ smaxp(v31.V2S(), v24.V2S(), v6.V2S()); |
| __ smaxp(v10.V4H(), v29.V4H(), v10.V4H()); |
| __ smaxp(v18.V4S(), v11.V4S(), v7.V4S()); |
| __ smaxp(v21.V8B(), v0.V8B(), v18.V8B()); |
| __ smaxp(v26.V8H(), v8.V8H(), v15.V8H()); |
| __ smaxv(b4, v5.V16B()); |
| __ smaxv(b23, v0.V8B()); |
| __ smaxv(h6, v0.V4H()); |
| __ smaxv(h24, v8.V8H()); |
| __ smaxv(s3, v16.V4S()); |
| __ smin(v24.V16B(), v8.V16B(), v18.V16B()); |
| __ smin(v29.V2S(), v8.V2S(), v23.V2S()); |
| __ smin(v6.V4H(), v11.V4H(), v21.V4H()); |
| __ smin(v24.V4S(), v23.V4S(), v15.V4S()); |
| __ smin(v8.V8B(), v16.V8B(), v4.V8B()); |
| __ smin(v12.V8H(), v1.V8H(), v10.V8H()); |
| __ sminp(v13.V16B(), v18.V16B(), v28.V16B()); |
| __ sminp(v22.V2S(), v28.V2S(), v16.V2S()); |
| __ sminp(v15.V4H(), v12.V4H(), v5.V4H()); |
| __ sminp(v15.V4S(), v17.V4S(), v8.V4S()); |
| __ sminp(v21.V8B(), v2.V8B(), v6.V8B()); |
| __ sminp(v21.V8H(), v12.V8H(), v6.V8H()); |
| __ sminv(b8, v6.V16B()); |
| __ sminv(b6, v18.V8B()); |
| __ sminv(h20, v1.V4H()); |
| __ sminv(h7, v17.V8H()); |
| __ sminv(s21, v4.V4S()); |
| __ smlal(v24.V2D(), v14.V2S(), v21.V2S()); |
| __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2); |
| __ smlal(v7.V4S(), v20.V4H(), v21.V4H()); |
| __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3); |
| __ smlal(v29.V8H(), v14.V8B(), v1.V8B()); |
| __ smlal2(v30.V2D(), v26.V4S(), v16.V4S()); |
| __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0); |
| __ smlal2(v17.V4S(), v6.V8H(), v3.V8H()); |
| __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7); |
| __ smlal2(v30.V8H(), v16.V16B(), v29.V16B()); |
| __ smlsl(v1.V2D(), v20.V2S(), v17.V2S()); |
| __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3); |
| __ smlsl(v0.V4S(), v26.V4H(), v1.V4H()); |
| __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5); |
| __ smlsl(v4.V8H(), v0.V8B(), v26.V8B()); |
| __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S()); |
| __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1); |
| __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H()); |
| __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6); |
| __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B()); |
| __ smov(w21, v6.B(), 3); |
| __ smov(w13, v26.H(), 7); |
| __ smov(x24, v16.B(), 7); |
| __ smov(x7, v4.H(), 3); |
| __ smov(x29, v7.S(), 1); |
| __ smull(v4.V2D(), v29.V2S(), v17.V2S()); |
| __ smull(v30.V2D(), v21.V2S(), v6.S(), 2); |
| __ smull(v23.V4S(), v5.V4H(), v23.V4H()); |
| __ smull(v8.V4S(), v9.V4H(), v2.H(), 1); |
| __ smull(v31.V8H(), v17.V8B(), v1.V8B()); |
| __ smull2(v3.V2D(), v3.V4S(), v23.V4S()); |
| __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1); |
| __ smull2(v19.V4S(), v20.V8H(), v30.V8H()); |
| __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4); |
| __ smull2(v25.V8H(), v8.V16B(), v27.V16B()); |
| __ sqabs(b3, b15); |
| __ sqabs(d14, d9); |
| __ sqabs(h31, h28); |
| __ sqabs(s8, s0); |
| __ sqabs(v14.V16B(), v7.V16B()); |
| __ sqabs(v23.V2D(), v19.V2D()); |
| __ sqabs(v10.V2S(), v24.V2S()); |
| __ sqabs(v31.V4H(), v19.V4H()); |
| __ sqabs(v23.V4S(), v0.V4S()); |
| __ sqabs(v29.V8B(), v23.V8B()); |
| __ sqabs(v17.V8H(), v21.V8H()); |
| __ sqadd(b9, b23, b13); |
| __ sqadd(d2, d25, d26); |
| __ sqadd(h7, h29, h25); |
| __ sqadd(s11, s7, s24); |
| __ sqadd(v20.V16B(), v16.V16B(), v29.V16B()); |
| __ sqadd(v23.V2D(), v30.V2D(), v28.V2D()); |
| __ sqadd(v8.V2S(), v19.V2S(), v2.V2S()); |
| __ sqadd(v20.V4H(), v12.V4H(), v31.V4H()); |
| __ sqadd(v14.V4S(), v15.V4S(), v17.V4S()); |
| __ sqadd(v2.V8B(), v29.V8B(), v13.V8B()); |
| __ sqadd(v7.V8H(), v19.V8H(), v14.V8H()); |
| __ sqdmlal(d15, s5, s30); |
| __ sqdmlal(d24, s10, v2.S(), 3); |
| __ sqdmlal(s9, h19, h8); |
| __ sqdmlal(s14, h1, v12.H(), 3); |
| __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S()); |
| __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1); |
| __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H()); |
| __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1); |
| __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S()); |
| __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0); |
| __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H()); |
| __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4); |
| __ sqdmlsl(d10, s29, s20); |
| __ sqdmlsl(d10, s9, v10.S(), 1); |
| __ sqdmlsl(s30, h9, h24); |
| __ sqdmlsl(s13, h24, v6.H(), 1); |
| __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S()); |
| __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3); |
| __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H()); |
| __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4); |
| __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S()); |
| __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0); |
| __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H()); |
| __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0); |
| __ sqdmulh(h17, h27, h12); |
| __ sqdmulh(h16, h5, v11.H(), 0); |
| __ sqdmulh(s1, s19, s16); |
| __ sqdmulh(s1, s16, v2.S(), 0); |
| __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S()); |
| __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0); |
| __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H()); |
| __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5); |
| __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S()); |
| __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3); |
| __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H()); |
| __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3); |
| __ sqdmull(d25, s2, s26); |
| __ sqdmull(d30, s14, v5.S(), 1); |
| __ sqdmull(s29, h18, h11); |
| __ sqdmull(s11, h13, v7.H(), 6); |
| __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S()); |
| __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1); |
| __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H()); |
| __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1); |
| __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S()); |
| __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2); |
| __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H()); |
| __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3); |
| __ sqneg(b2, b0); |
| __ sqneg(d24, d2); |
| __ sqneg(h29, h3); |
| __ sqneg(s4, s9); |
| __ sqneg(v14.V16B(), v29.V16B()); |
| __ sqneg(v30.V2D(), v12.V2D()); |
| __ sqneg(v28.V2S(), v26.V2S()); |
| __ sqneg(v4.V4H(), v4.V4H()); |
| __ sqneg(v9.V4S(), v8.V4S()); |
| __ sqneg(v20.V8B(), v20.V8B()); |
| __ sqneg(v27.V8H(), v10.V8H()); |
| __ sqrdmulh(h7, h24, h0); |
| __ sqrdmulh(h14, h3, v4.H(), 6); |
| __ sqrdmulh(s27, s19, s24); |
| __ sqrdmulh(s31, s21, v4.S(), 0); |
| __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S()); |
| __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0); |
| __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H()); |
| __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6); |
| __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S()); |
| __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1); |
| __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H()); |
| __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2); |
| __ sqrshl(b8, b21, b13); |
| __ sqrshl(d29, d7, d20); |
| __ sqrshl(h28, h14, h10); |
| __ sqrshl(s26, s18, s2); |
| __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B()); |
| __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D()); |
| __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S()); |
| __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H()); |
| __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S()); |
| __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B()); |
| __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H()); |
| __ sqrshrn(b6, h21, 4); |
| __ sqrshrn(h14, s17, 11); |
| __ sqrshrn(s25, d27, 10); |
| __ sqrshrn(v6.V2S(), v13.V2D(), 18); |
| __ sqrshrn(v5.V4H(), v9.V4S(), 15); |
| __ sqrshrn(v19.V8B(), v12.V8H(), 1); |
| __ sqrshrn2(v19.V16B(), v21.V8H(), 7); |
| __ sqrshrn2(v29.V4S(), v24.V2D(), 13); |
| __ sqrshrn2(v12.V8H(), v2.V4S(), 10); |
| __ sqrshrun(b16, h9, 5); |
| __ sqrshrun(h3, s24, 15); |
| __ sqrshrun(s16, d18, 8); |
| __ sqrshrun(v28.V2S(), v23.V2D(), 8); |
| __ sqrshrun(v31.V4H(), v25.V4S(), 10); |
| __ sqrshrun(v19.V8B(), v23.V8H(), 2); |
| __ sqrshrun2(v24.V16B(), v0.V8H(), 8); |
| __ sqrshrun2(v22.V4S(), v1.V2D(), 23); |
| __ sqrshrun2(v28.V8H(), v21.V4S(), 13); |
| __ sqshl(b6, b21, b8); |
| __ sqshl(b11, b26, 2); |
| __ sqshl(d29, d0, d4); |
| __ sqshl(d21, d7, 35); |
| __ sqshl(h20, h25, h17); |
| __ sqshl(h20, h0, 8); |
| __ sqshl(s29, s13, s4); |
| __ sqshl(s10, s11, 20); |
| __ sqshl(v8.V16B(), v18.V16B(), v28.V16B()); |
| __ sqshl(v29.V16B(), v29.V16B(), 2); |
| __ sqshl(v8.V2D(), v31.V2D(), v16.V2D()); |
| __ sqshl(v7.V2D(), v14.V2D(), 37); |
| __ sqshl(v0.V2S(), v26.V2S(), v7.V2S()); |
| __ sqshl(v5.V2S(), v11.V2S(), 19); |
| __ sqshl(v11.V4H(), v30.V4H(), v0.V4H()); |
| __ sqshl(v1.V4H(), v18.V4H(), 7); |
| __ sqshl(v22.V4S(), v3.V4S(), v30.V4S()); |
| __ sqshl(v16.V4S(), v15.V4S(), 28); |
| __ sqshl(v6.V8B(), v28.V8B(), v25.V8B()); |
| __ sqshl(v0.V8B(), v15.V8B(), 0); |
| __ sqshl(v6.V8H(), v16.V8H(), v30.V8H()); |
| __ sqshl(v3.V8H(), v20.V8H(), 14); |
| __ sqshlu(b13, b14, 6); |
| __ sqshlu(d0, d16, 44); |
| __ sqshlu(h5, h29, 15); |
| __ sqshlu(s29, s8, 13); |
| __ sqshlu(v27.V16B(), v20.V16B(), 2); |
| __ sqshlu(v24.V2D(), v12.V2D(), 11); |
| __ sqshlu(v12.V2S(), v19.V2S(), 22); |
| __ sqshlu(v8.V4H(), v12.V4H(), 11); |
| __ sqshlu(v18.V4S(), v3.V4S(), 8); |
| __ sqshlu(v3.V8B(), v10.V8B(), 1); |
| __ sqshlu(v30.V8H(), v24.V8H(), 4); |
| __ sqshrn(b1, h28, 1); |
| __ sqshrn(h31, s7, 10); |
| __ sqshrn(s4, d10, 24); |
| __ sqshrn(v10.V2S(), v1.V2D(), 29); |
| __ sqshrn(v3.V4H(), v13.V4S(), 14); |
| __ sqshrn(v27.V8B(), v6.V8H(), 7); |
| __ sqshrn2(v14.V16B(), v23.V8H(), 1); |
| __ sqshrn2(v25.V4S(), v22.V2D(), 27); |
| __ sqshrn2(v31.V8H(), v12.V4S(), 10); |
| __ sqshrun(b9, h0, 1); |
| __ sqshrun(h11, s6, 7); |
| __ sqshrun(s13, d12, 13); |
| __ sqshrun(v10.V2S(), v30.V2D(), 1); |
| __ sqshrun(v31.V4H(), v3.V4S(), 11); |
| __ sqshrun(v28.V8B(), v30.V8H(), 8); |
| __ sqshrun2(v16.V16B(), v27.V8H(), 3); |
| __ sqshrun2(v27.V4S(), v14.V2D(), 18); |
| __ sqshrun2(v23.V8H(), v14.V4S(), 1); |
| __ sqsub(b19, b29, b11); |
| __ sqsub(d21, d31, d6); |
| __ sqsub(h18, h10, h19); |
| __ sqsub(s6, s5, s0); |
| __ sqsub(v21.V16B(), v22.V16B(), v0.V16B()); |
| __ sqsub(v22.V2D(), v10.V2D(), v17.V2D()); |
| __ sqsub(v8.V2S(), v21.V2S(), v2.V2S()); |
| __ sqsub(v18.V4H(), v25.V4H(), v27.V4H()); |
| __ sqsub(v13.V4S(), v3.V4S(), v6.V4S()); |
| __ sqsub(v28.V8B(), v29.V8B(), v16.V8B()); |
| __ sqsub(v17.V8H(), v6.V8H(), v10.V8H()); |
| __ sqxtn(b27, h26); |
| __ sqxtn(h17, s11); |
| __ sqxtn(s22, d31); |
| __ sqxtn(v26.V2S(), v5.V2D()); |
| __ sqxtn(v13.V4H(), v7.V4S()); |
| __ sqxtn(v19.V8B(), v19.V8H()); |
| __ sqxtn2(v19.V16B(), v3.V8H()); |
| __ sqxtn2(v23.V4S(), v1.V2D()); |
| __ sqxtn2(v13.V8H(), v3.V4S()); |
| __ sqxtun(b26, h9); |
| __ sqxtun(h19, s12); |
| __ sqxtun(s3, d6); |
| __ sqxtun(v29.V2S(), v26.V2D()); |
| __ sqxtun(v26.V4H(), v10.V4S()); |
| __ sqxtun(v7.V8B(), v29.V8H()); |
| __ sqxtun2(v21.V16B(), v14.V8H()); |
| __ sqxtun2(v24.V4S(), v15.V2D()); |
| __ sqxtun2(v30.V8H(), v1.V4S()); |
| __ srhadd(v21.V16B(), v17.V16B(), v15.V16B()); |
| __ srhadd(v28.V2S(), v21.V2S(), v29.V2S()); |
| __ srhadd(v9.V4H(), v1.V4H(), v30.V4H()); |
| __ srhadd(v24.V4S(), v0.V4S(), v2.V4S()); |
| __ srhadd(v6.V8B(), v17.V8B(), v15.V8B()); |
| __ srhadd(v5.V8H(), v7.V8H(), v21.V8H()); |
| __ sri(d14, d14, 49); |
| __ sri(v23.V16B(), v8.V16B(), 4); |
| __ sri(v20.V2D(), v13.V2D(), 20); |
| __ sri(v16.V2S(), v2.V2S(), 24); |
| __ sri(v5.V4H(), v23.V4H(), 11); |
| __ sri(v27.V4S(), v15.V4S(), 23); |
| __ sri(v19.V8B(), v29.V8B(), 4); |
| __ sri(v7.V8H(), v29.V8H(), 3); |
| __ srshl(d2, d9, d26); |
| __ srshl(v29.V16B(), v17.V16B(), v11.V16B()); |
| __ srshl(v8.V2D(), v15.V2D(), v4.V2D()); |
| __ srshl(v25.V2S(), v17.V2S(), v8.V2S()); |
| __ srshl(v19.V4H(), v7.V4H(), v7.V4H()); |
| __ srshl(v13.V4S(), v2.V4S(), v17.V4S()); |
| __ srshl(v22.V8B(), v6.V8B(), v21.V8B()); |
| __ srshl(v10.V8H(), v17.V8H(), v4.V8H()); |
| __ srshr(d21, d18, 45); |
| __ srshr(v3.V16B(), v11.V16B(), 7); |
| __ srshr(v21.V2D(), v26.V2D(), 53); |
| __ srshr(v11.V2S(), v5.V2S(), 28); |
| __ srshr(v7.V4H(), v18.V4H(), 12); |
| __ srshr(v7.V4S(), v3.V4S(), 30); |
| __ srshr(v14.V8B(), v2.V8B(), 6); |
| __ srshr(v21.V8H(), v20.V8H(), 3); |
| __ srsra(d21, d30, 63); |
| __ srsra(v27.V16B(), v30.V16B(), 6); |
| __ srsra(v20.V2D(), v12.V2D(), 27); |
| __ srsra(v0.V2S(), v17.V2S(), 5); |
| __ srsra(v14.V4H(), v16.V4H(), 15); |
| __ srsra(v18.V4S(), v3.V4S(), 20); |
| __ srsra(v21.V8B(), v1.V8B(), 1); |
| __ srsra(v31.V8H(), v25.V8H(), 2); |
| __ sshl(d1, d13, d9); |
| __ sshl(v17.V16B(), v31.V16B(), v15.V16B()); |
| __ sshl(v13.V2D(), v16.V2D(), v0.V2D()); |
| __ sshl(v0.V2S(), v7.V2S(), v22.V2S()); |
| __ sshl(v23.V4H(), v19.V4H(), v4.V4H()); |
| __ sshl(v5.V4S(), v5.V4S(), v11.V4S()); |
| __ sshl(v23.V8B(), v27.V8B(), v7.V8B()); |
| __ sshl(v29.V8H(), v10.V8H(), v5.V8H()); |
| __ sshll(v0.V2D(), v2.V2S(), 23); |
| __ sshll(v11.V4S(), v8.V4H(), 8); |
| __ sshll(v4.V8H(), v29.V8B(), 1); |
| __ sshll2(v10.V2D(), v4.V4S(), 14); |
| __ sshll2(v26.V4S(), v31.V8H(), 6); |
| __ sshll2(v3.V8H(), v26.V16B(), 4); |
| __ sshr(d19, d21, 20); |
| __ sshr(v15.V16B(), v23.V16B(), 5); |
| __ sshr(v17.V2D(), v14.V2D(), 38); |
| __ sshr(v3.V2S(), v29.V2S(), 23); |
| __ sshr(v23.V4H(), v27.V4H(), 4); |
| __ sshr(v28.V4S(), v3.V4S(), 4); |
| __ sshr(v14.V8B(), v2.V8B(), 6); |
| __ sshr(v3.V8H(), v8.V8H(), 6); |
| __ ssra(d12, d28, 44); |
| __ ssra(v29.V16B(), v31.V16B(), 4); |
| __ ssra(v3.V2D(), v0.V2D(), 24); |
| __ ssra(v14.V2S(), v28.V2S(), 6); |
| __ ssra(v18.V4H(), v8.V4H(), 7); |
| __ ssra(v31.V4S(), v14.V4S(), 24); |
| __ ssra(v28.V8B(), v26.V8B(), 5); |
| __ ssra(v9.V8H(), v9.V8H(), 14); |
| __ ssubl(v13.V2D(), v14.V2S(), v3.V2S()); |
| __ ssubl(v5.V4S(), v16.V4H(), v8.V4H()); |
| __ ssubl(v0.V8H(), v28.V8B(), v6.V8B()); |
| __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S()); |
| __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H()); |
| __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B()); |
| __ ssubw(v25.V2D(), v23.V2D(), v26.V2S()); |
| __ ssubw(v21.V4S(), v18.V4S(), v24.V4H()); |
| __ ssubw(v30.V8H(), v22.V8H(), v3.V8B()); |
| __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S()); |
| __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H()); |
| __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B()); |
| __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0)); |
| __ st1(v10.V16B(), |
| v11.V16B(), |
| v12.V16B(), |
| v13.V16B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v27.V16B(), |
| v28.V16B(), |
| v29.V16B(), |
| v30.V16B(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0)); |
| __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex)); |
| __ st1(v7.V16B(), v8.V16B(), MemOperand(x0)); |
| __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex)); |
| __ st1(v23.V16B(), MemOperand(x0)); |
| __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0)); |
| __ st1(v12.V1D(), |
| v13.V1D(), |
| v14.V1D(), |
| v15.V1D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v30.V1D(), |
| v31.V1D(), |
| v0.V1D(), |
| v1.V1D(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0)); |
| __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex)); |
| __ st1(v18.V1D(), v19.V1D(), MemOperand(x0)); |
| __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v4.V1D(), MemOperand(x0)); |
| __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex)); |
| __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0)); |
| __ st1(v22.V2D(), |
| v23.V2D(), |
| v24.V2D(), |
| v25.V2D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v28.V2D(), |
| v29.V2D(), |
| v30.V2D(), |
| v31.V2D(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); |
| __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex)); |
| __ st1(v21.V2D(), v22.V2D(), MemOperand(x0)); |
| __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex)); |
| __ st1(v21.V2D(), MemOperand(x0)); |
| __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0)); |
| __ st1(v8.V2S(), |
| v9.V2S(), |
| v10.V2S(), |
| v11.V2S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v15.V2S(), |
| v16.V2S(), |
| v17.V2S(), |
| v18.V2S(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0)); |
| __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex)); |
| __ st1(v28.V2S(), v29.V2S(), MemOperand(x0)); |
| __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v6.V2S(), MemOperand(x0)); |
| __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex)); |
| __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0)); |
| __ st1(v9.V4H(), |
| v10.V4H(), |
| v11.V4H(), |
| v12.V4H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v25.V4H(), |
| v26.V4H(), |
| v27.V4H(), |
| v28.V4H(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0)); |
| __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex)); |
| __ st1(v13.V4H(), v14.V4H(), MemOperand(x0)); |
| __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v16.V4H(), MemOperand(x0)); |
| __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex)); |
| __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0)); |
| __ st1(v25.V4S(), |
| v26.V4S(), |
| v27.V4S(), |
| v28.V4S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex)); |
| __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0)); |
| __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex)); |
| __ st1(v17.V4S(), v18.V4S(), MemOperand(x0)); |
| __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex)); |
| __ st1(v26.V4S(), MemOperand(x0)); |
| __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); |
| __ st1(v10.V8B(), |
| v11.V8B(), |
| v12.V8B(), |
| v13.V8B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st1(v15.V8B(), |
| v16.V8B(), |
| v17.V8B(), |
| v18.V8B(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0)); |
| __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex)); |
| __ st1(v12.V8B(), v13.V8B(), MemOperand(x0)); |
| __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v16.V8B(), MemOperand(x0)); |
| __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex)); |
| __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0)); |
| __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v26.V8H(), |
| v27.V8H(), |
| v28.V8H(), |
| v29.V8H(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0)); |
| __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); |
| __ st1(v26.V8H(), v27.V8H(), MemOperand(x0)); |
| __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex)); |
| __ st1(v29.V8H(), MemOperand(x0)); |
| __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex)); |
| __ st1(v19.B(), 15, MemOperand(x0)); |
| __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex)); |
| __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex)); |
| __ st1(v13.D(), 0, MemOperand(x0)); |
| __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex)); |
| __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex)); |
| __ st1(v22.H(), 0, MemOperand(x0)); |
| __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex)); |
| __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex)); |
| __ st1(v0.S(), 0, MemOperand(x0)); |
| __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex)); |
| __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex)); |
| __ st2(v7.V16B(), v8.V16B(), MemOperand(x0)); |
| __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex)); |
| __ st2(v14.V2D(), v15.V2D(), MemOperand(x0)); |
| __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex)); |
| __ st2(v22.V2S(), v23.V2S(), MemOperand(x0)); |
| __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex)); |
| __ st2(v23.V4H(), v24.V4H(), MemOperand(x0)); |
| __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex)); |
| __ st2(v17.V4S(), v18.V4S(), MemOperand(x0)); |
| __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex)); |
| __ st2(v31.V8B(), v0.V8B(), MemOperand(x0)); |
| __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex)); |
| __ st2(v7.V8H(), v8.V8H(), MemOperand(x0)); |
| __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex)); |
| __ st2(v8.B(), v9.B(), 15, MemOperand(x0)); |
| __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex)); |
| __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex)); |
| __ st2(v25.D(), v26.D(), 0, MemOperand(x0)); |
| __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex)); |
| __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex)); |
| __ st2(v4.H(), v5.H(), 3, MemOperand(x0)); |
| __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex)); |
| __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex)); |
| __ st2(v14.S(), v15.S(), 3, MemOperand(x0)); |
| __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex)); |
| __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex)); |
| __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0)); |
| __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex)); |
| __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); |
| __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex)); |
| __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0)); |
| __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex)); |
| __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0)); |
| __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex)); |
| __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0)); |
| __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex)); |
| __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); |
| __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex)); |
| __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0)); |
| __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex)); |
| __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex)); |
| __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0)); |
| __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex)); |
| __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex)); |
| __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0)); |
| __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex)); |
| __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex)); |
| __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0)); |
| __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex)); |
| __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex)); |
| __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0)); |
| __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex)); |
| __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex)); |
| __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0)); |
| __ st4(v24.V16B(), |
| v25.V16B(), |
| v26.V16B(), |
| v27.V16B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v15.V16B(), |
| v16.V16B(), |
| v17.V16B(), |
| v18.V16B(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0)); |
| __ st4(v17.V2D(), |
| v18.V2D(), |
| v19.V2D(), |
| v20.V2D(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v9.V2D(), |
| v10.V2D(), |
| v11.V2D(), |
| v12.V2D(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0)); |
| __ st4(v15.V2S(), |
| v16.V2S(), |
| v17.V2S(), |
| v18.V2S(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v24.V2S(), |
| v25.V2S(), |
| v26.V2S(), |
| v27.V2S(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0)); |
| __ st4(v18.V4H(), |
| v19.V4H(), |
| v20.V4H(), |
| v21.V4H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex)); |
| __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0)); |
| __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex)); |
| __ st4(v15.V4S(), |
| v16.V4S(), |
| v17.V4S(), |
| v18.V4S(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0)); |
| __ st4(v25.V8B(), |
| v26.V8B(), |
| v27.V8B(), |
| v28.V8B(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v19.V8B(), |
| v20.V8B(), |
| v21.V8B(), |
| v22.V8B(), |
| MemOperand(x1, 32, PostIndex)); |
| __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0)); |
| __ st4(v15.V8H(), |
| v16.V8H(), |
| v17.V8H(), |
| v18.V8H(), |
| MemOperand(x1, x2, PostIndex)); |
| __ st4(v31.V8H(), |
| v0.V8H(), |
| v1.V8H(), |
| v2.V8H(), |
| MemOperand(x1, 64, PostIndex)); |
| __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0)); |
| __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex)); |
| __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex)); |
| __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0)); |
| __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex)); |
| __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex)); |
| __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0)); |
| __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex)); |
| __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex)); |
| __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0)); |
| __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex)); |
| __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex)); |
| __ sub(d12, d17, d2); |
| __ sub(v20.V16B(), v24.V16B(), v8.V16B()); |
| __ sub(v8.V2D(), v29.V2D(), v5.V2D()); |
| __ sub(v2.V2S(), v28.V2S(), v24.V2S()); |
| __ sub(v24.V4H(), v10.V4H(), v4.V4H()); |
| __ sub(v28.V4S(), v4.V4S(), v17.V4S()); |
| __ sub(v16.V8B(), v27.V8B(), v2.V8B()); |
| __ sub(v20.V8H(), v10.V8H(), v13.V8H()); |
| __ subhn(v5.V2S(), v14.V2D(), v13.V2D()); |
| __ subhn(v10.V4H(), v5.V4S(), v8.V4S()); |
| __ subhn(v6.V8B(), v10.V8H(), v22.V8H()); |
| __ subhn2(v11.V16B(), v6.V8H(), v9.V8H()); |
| __ subhn2(v25.V4S(), v18.V2D(), v24.V2D()); |
| __ subhn2(v20.V8H(), v21.V4S(), v1.V4S()); |
| __ suqadd(b25, b11); |
| __ suqadd(d13, d1); |
| __ suqadd(h0, h9); |
| __ suqadd(s22, s8); |
| __ suqadd(v24.V16B(), v27.V16B()); |
| __ suqadd(v26.V2D(), v14.V2D()); |
| __ suqadd(v7.V2S(), v10.V2S()); |
| __ suqadd(v25.V4H(), v12.V4H()); |
| __ suqadd(v4.V4S(), v3.V4S()); |
| __ suqadd(v14.V8B(), v18.V8B()); |
| __ suqadd(v31.V8H(), v8.V8H()); |
| __ sxtl(v16.V2D(), v20.V2S()); |
| __ sxtl(v27.V4S(), v28.V4H()); |
| __ sxtl(v0.V8H(), v22.V8B()); |
| __ sxtl2(v6.V2D(), v7.V4S()); |
| __ sxtl2(v9.V4S(), v27.V8H()); |
| __ sxtl2(v16.V8H(), v16.V16B()); |
| __ tbl(v25.V16B(), |
| v17.V16B(), |
| v18.V16B(), |
| v19.V16B(), |
| v20.V16B(), |
| v22.V16B()); |
| __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B()); |
| __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B()); |
| __ tbl(v20.V16B(), v15.V16B(), v4.V16B()); |
| __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B()); |
| __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B()); |
| __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B()); |
| __ tbl(v11.V8B(), v19.V16B(), v30.V8B()); |
| __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B()); |
| __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B()); |
| __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B()); |
| __ tbx(v13.V16B(), v3.V16B(), v20.V16B()); |
| __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B()); |
| __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B()); |
| __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B()); |
| __ tbx(v16.V8B(), v11.V16B(), v29.V8B()); |
| __ trn1(v19.V16B(), v24.V16B(), v12.V16B()); |
| __ trn1(v2.V2D(), v7.V2D(), v10.V2D()); |
| __ trn1(v22.V2S(), v0.V2S(), v21.V2S()); |
| __ trn1(v12.V4H(), v15.V4H(), v20.V4H()); |
| __ trn1(v30.V4S(), v17.V4S(), v9.V4S()); |
| __ trn1(v12.V8B(), v19.V8B(), v29.V8B()); |
| __ trn1(v23.V8H(), v8.V8H(), v9.V8H()); |
| __ trn2(v28.V16B(), v30.V16B(), v25.V16B()); |
| __ trn2(v7.V2D(), v27.V2D(), v7.V2D()); |
| __ trn2(v30.V2S(), v16.V2S(), v19.V2S()); |
| __ trn2(v24.V4H(), v6.V4H(), v25.V4H()); |
| __ trn2(v2.V4S(), v19.V4S(), v11.V4S()); |
| __ trn2(v25.V8B(), v27.V8B(), v18.V8B()); |
| __ trn2(v12.V8H(), v4.V8H(), v15.V8H()); |
| __ uaba(v31.V16B(), v12.V16B(), v28.V16B()); |
| __ uaba(v18.V2S(), v5.V2S(), v14.V2S()); |
| __ uaba(v9.V4H(), v20.V4H(), v21.V4H()); |
| __ uaba(v6.V4S(), v20.V4S(), v2.V4S()); |
| __ uaba(v16.V8B(), v12.V8B(), v5.V8B()); |
| __ uaba(v15.V8H(), v26.V8H(), v30.V8H()); |
| __ uabal(v10.V2D(), v18.V2S(), v15.V2S()); |
| __ uabal(v30.V4S(), v19.V4H(), v7.V4H()); |
| __ uabal(v4.V8H(), v27.V8B(), v0.V8B()); |
| __ uabal2(v19.V2D(), v12.V4S(), v2.V4S()); |
| __ uabal2(v26.V4S(), v5.V8H(), v12.V8H()); |
| __ uabal2(v19.V8H(), v20.V16B(), v28.V16B()); |
| __ uabd(v18.V16B(), v4.V16B(), v21.V16B()); |
| __ uabd(v30.V2S(), v21.V2S(), v16.V2S()); |
| __ uabd(v8.V4H(), v28.V4H(), v25.V4H()); |
| __ uabd(v28.V4S(), v12.V4S(), v21.V4S()); |
| __ uabd(v19.V8B(), v16.V8B(), v28.V8B()); |
| __ uabd(v9.V8H(), v12.V8H(), v29.V8H()); |
| __ uabdl(v26.V2D(), v0.V2S(), v8.V2S()); |
| __ uabdl(v29.V4S(), v31.V4H(), v25.V4H()); |
| __ uabdl(v27.V8H(), v29.V8B(), v14.V8B()); |
| __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S()); |
| __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H()); |
| __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B()); |
| __ uadalp(v9.V1D(), v15.V2S()); |
| __ uadalp(v14.V2D(), v12.V4S()); |
| __ uadalp(v28.V2S(), v12.V4H()); |
| __ uadalp(v0.V4H(), v17.V8B()); |
| __ uadalp(v1.V4S(), v29.V8H()); |
| __ uadalp(v15.V8H(), v22.V16B()); |
| __ uaddl(v1.V2D(), v20.V2S(), v27.V2S()); |
| __ uaddl(v31.V4S(), v25.V4H(), v5.V4H()); |
| __ uaddl(v12.V8H(), v3.V8B(), v3.V8B()); |
| __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S()); |
| __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H()); |
| __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B()); |
| __ uaddlp(v7.V1D(), v9.V2S()); |
| __ uaddlp(v26.V2D(), v4.V4S()); |
| __ uaddlp(v28.V2S(), v1.V4H()); |
| __ uaddlp(v20.V4H(), v31.V8B()); |
| __ uaddlp(v16.V4S(), v17.V8H()); |
| __ uaddlp(v6.V8H(), v2.V16B()); |
| __ uaddlv(d28, v22.V4S()); |
| __ uaddlv(h0, v19.V16B()); |
| __ uaddlv(h30, v30.V8B()); |
| __ uaddlv(s24, v18.V4H()); |
| __ uaddlv(s10, v0.V8H()); |
| __ uaddw(v9.V2D(), v17.V2D(), v14.V2S()); |
| __ uaddw(v9.V4S(), v25.V4S(), v3.V4H()); |
| __ uaddw(v18.V8H(), v1.V8H(), v0.V8B()); |
| __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S()); |
| __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H()); |
| __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B()); |
| __ uhadd(v13.V16B(), v9.V16B(), v3.V16B()); |
| __ uhadd(v17.V2S(), v25.V2S(), v24.V2S()); |
| __ uhadd(v25.V4H(), v23.V4H(), v13.V4H()); |
| __ uhadd(v0.V4S(), v20.V4S(), v16.V4S()); |
| __ uhadd(v5.V8B(), v5.V8B(), v25.V8B()); |
| __ uhadd(v3.V8H(), v29.V8H(), v18.V8H()); |
| __ uhsub(v1.V16B(), v22.V16B(), v13.V16B()); |
| __ uhsub(v14.V2S(), v30.V2S(), v30.V2S()); |
| __ uhsub(v29.V4H(), v14.V4H(), v17.V4H()); |
| __ uhsub(v26.V4S(), v5.V4S(), v18.V4S()); |
| __ uhsub(v3.V8B(), v7.V8B(), v12.V8B()); |
| __ uhsub(v25.V8H(), v21.V8H(), v5.V8H()); |
| __ umax(v28.V16B(), v12.V16B(), v6.V16B()); |
| __ umax(v20.V2S(), v19.V2S(), v26.V2S()); |
| __ umax(v0.V4H(), v31.V4H(), v18.V4H()); |
| __ umax(v6.V4S(), v21.V4S(), v28.V4S()); |
| __ umax(v0.V8B(), v2.V8B(), v20.V8B()); |
| __ umax(v4.V8H(), v11.V8H(), v22.V8H()); |
| __ umaxp(v1.V16B(), v6.V16B(), v29.V16B()); |
| __ umaxp(v19.V2S(), v17.V2S(), v27.V2S()); |
| __ umaxp(v21.V4H(), v16.V4H(), v7.V4H()); |
| __ umaxp(v9.V4S(), v20.V4S(), v29.V4S()); |
| __ umaxp(v13.V8B(), v1.V8B(), v16.V8B()); |
| __ umaxp(v19.V8H(), v23.V8H(), v26.V8H()); |
| __ umaxv(b17, v30.V16B()); |
| __ umaxv(b23, v12.V8B()); |
| __ umaxv(h31, v15.V4H()); |
| __ umaxv(h15, v25.V8H()); |
| __ umaxv(s18, v21.V4S()); |
| __ umin(v22.V16B(), v0.V16B(), v18.V16B()); |
| __ umin(v1.V2S(), v21.V2S(), v16.V2S()); |
| __ umin(v17.V4H(), v4.V4H(), v25.V4H()); |
| __ umin(v24.V4S(), v26.V4S(), v13.V4S()); |
| __ umin(v20.V8B(), v1.V8B(), v5.V8B()); |
| __ umin(v26.V8H(), v25.V8H(), v23.V8H()); |
| __ uminp(v5.V16B(), v1.V16B(), v23.V16B()); |
| __ uminp(v7.V2S(), v26.V2S(), v30.V2S()); |
| __ uminp(v9.V4H(), v5.V4H(), v25.V4H()); |
| __ uminp(v23.V4S(), v10.V4S(), v1.V4S()); |
| __ uminp(v4.V8B(), v29.V8B(), v14.V8B()); |
| __ uminp(v21.V8H(), v0.V8H(), v14.V8H()); |
| __ uminv(b0, v17.V16B()); |
| __ uminv(b0, v31.V8B()); |
| __ uminv(h24, v0.V4H()); |
| __ uminv(h29, v14.V8H()); |
| __ uminv(s30, v3.V4S()); |
| __ umlal(v11.V2D(), v11.V2S(), v24.V2S()); |
| __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3); |
| __ umlal(v0.V4S(), v9.V4H(), v26.V4H()); |
| __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4); |
| __ umlal(v16.V8H(), v21.V8B(), v6.V8B()); |
| __ umlal2(v17.V2D(), v19.V4S(), v23.V4S()); |
| __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0); |
| __ umlal2(v16.V4S(), v8.V8H(), v15.V8H()); |
| __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5); |
| __ umlal2(v30.V8H(), v1.V16B(), v17.V16B()); |
| __ umlsl(v18.V2D(), v19.V2S(), v28.V2S()); |
| __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0); |
| __ umlsl(v24.V4S(), v8.V4H(), v4.V4H()); |
| __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4); |
| __ umlsl(v28.V8H(), v14.V8B(), v20.V8B()); |
| __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S()); |
| __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2); |
| __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H()); |
| __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4); |
| __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B()); |
| __ umov(x30, v25.D(), 1); |
| __ umull(v12.V2D(), v10.V2S(), v29.V2S()); |
| __ umull(v22.V2D(), v30.V2S(), v5.S(), 3); |
| __ umull(v7.V4S(), v0.V4H(), v25.V4H()); |
| __ umull(v11.V4S(), v13.V4H(), v3.H(), 2); |
| __ umull(v25.V8H(), v16.V8B(), v10.V8B()); |
| __ umull2(v17.V2D(), v3.V4S(), v26.V4S()); |
| __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3); |
| __ umull2(v12.V4S(), v17.V8H(), v23.V8H()); |
| __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2); |
| __ umull2(v5.V8H(), v12.V16B(), v17.V16B()); |
| __ uqadd(b30, b4, b28); |
| __ uqadd(d27, d20, d16); |
| __ uqadd(h7, h14, h28); |
| __ uqadd(s28, s17, s4); |
| __ uqadd(v19.V16B(), v22.V16B(), v21.V16B()); |
| __ uqadd(v16.V2D(), v4.V2D(), v11.V2D()); |
| __ uqadd(v20.V2S(), v14.V2S(), v4.V2S()); |
| __ uqadd(v5.V4H(), v0.V4H(), v16.V4H()); |
| __ uqadd(v21.V4S(), v31.V4S(), v9.V4S()); |
| __ uqadd(v23.V8B(), v24.V8B(), v3.V8B()); |
| __ uqadd(v17.V8H(), v27.V8H(), v11.V8H()); |
| __ uqrshl(b10, b22, b10); |
| __ uqrshl(d29, d5, d11); |
| __ uqrshl(h27, h24, h30); |
| __ uqrshl(s10, s13, s8); |
| __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B()); |
| __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D()); |
| __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S()); |
| __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H()); |
| __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S()); |
| __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B()); |
| __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H()); |
| __ uqrshrn(b11, h26, 4); |
| __ uqrshrn(h7, s30, 5); |
| __ uqrshrn(s10, d8, 21); |
| __ uqrshrn(v15.V2S(), v6.V2D(), 11); |
| __ uqrshrn(v5.V4H(), v26.V4S(), 12); |
| __ uqrshrn(v28.V8B(), v25.V8H(), 5); |
| __ uqrshrn2(v25.V16B(), v30.V8H(), 2); |
| __ uqrshrn2(v21.V4S(), v14.V2D(), 32); |
| __ uqrshrn2(v13.V8H(), v7.V4S(), 2); |
| __ uqshl(b13, b0, b23); |
| __ uqshl(b9, b17, 4); |
| __ uqshl(d23, d6, d4); |
| __ uqshl(d8, d11, 44); |
| __ uqshl(h19, h13, h15); |
| __ uqshl(h25, h26, 6); |
| __ uqshl(s4, s24, s10); |
| __ uqshl(s19, s14, 1); |
| __ uqshl(v14.V16B(), v30.V16B(), v25.V16B()); |
| __ uqshl(v6.V16B(), v10.V16B(), 5); |
| __ uqshl(v18.V2D(), v8.V2D(), v7.V2D()); |
| __ uqshl(v25.V2D(), v14.V2D(), 18); |
| __ uqshl(v25.V2S(), v16.V2S(), v23.V2S()); |
| __ uqshl(v13.V2S(), v15.V2S(), 31); |
| __ uqshl(v28.V4H(), v24.V4H(), v15.V4H()); |
| __ uqshl(v4.V4H(), v17.V4H(), 1); |
| __ uqshl(v9.V4S(), v31.V4S(), v23.V4S()); |
| __ uqshl(v18.V4S(), v28.V4S(), 31); |
| __ uqshl(v31.V8B(), v21.V8B(), v15.V8B()); |
| __ uqshl(v6.V8B(), v21.V8B(), 1); |
| __ uqshl(v28.V8H(), v2.V8H(), v17.V8H()); |
| __ uqshl(v24.V8H(), v8.V8H(), 14); |
| __ uqshrn(b21, h27, 7); |
| __ uqshrn(h28, s26, 11); |
| __ uqshrn(s13, d31, 17); |
| __ uqshrn(v21.V2S(), v16.V2D(), 8); |
| __ uqshrn(v24.V4H(), v24.V4S(), 2); |
| __ uqshrn(v5.V8B(), v1.V8H(), 8); |
| __ uqshrn2(v16.V16B(), v29.V8H(), 6); |
| __ uqshrn2(v2.V4S(), v6.V2D(), 1); |
| __ uqshrn2(v16.V8H(), v10.V4S(), 14); |
| __ uqsub(b28, b20, b26); |
| __ uqsub(d0, d7, d10); |
| __ uqsub(h26, h24, h7); |
| __ uqsub(s23, s23, s16); |
| __ uqsub(v14.V16B(), v16.V16B(), v24.V16B()); |
| __ uqsub(v11.V2D(), v17.V2D(), v6.V2D()); |
| __ uqsub(v10.V2S(), v10.V2S(), v8.V2S()); |
| __ uqsub(v9.V4H(), v15.V4H(), v12.V4H()); |
| __ uqsub(v23.V4S(), v18.V4S(), v7.V4S()); |
| __ uqsub(v9.V8B(), v19.V8B(), v17.V8B()); |
| __ uqsub(v20.V8H(), v2.V8H(), v6.V8H()); |
| __ uqxtn(b29, h19); |
| __ uqxtn(h0, s13); |
| __ uqxtn(s26, d22); |
| __ uqxtn(v5.V2S(), v31.V2D()); |
| __ uqxtn(v30.V4H(), v19.V4S()); |
| __ uqxtn(v15.V8B(), v2.V8H()); |
| __ uqxtn2(v29.V16B(), v3.V8H()); |
| __ uqxtn2(v13.V4S(), v17.V2D()); |
| __ uqxtn2(v28.V8H(), v11.V4S()); |
| __ urecpe(v23.V2S(), v15.V2S()); |
| __ urecpe(v27.V4S(), v7.V4S()); |
| __ urhadd(v2.V16B(), v15.V16B(), v27.V16B()); |
| __ urhadd(v15.V2S(), v1.V2S(), v18.V2S()); |
| __ urhadd(v17.V4H(), v4.V4H(), v26.V4H()); |
| __ urhadd(v2.V4S(), v27.V4S(), v14.V4S()); |
| __ urhadd(v5.V8B(), v17.V8B(), v14.V8B()); |
| __ urhadd(v30.V8H(), v2.V8H(), v25.V8H()); |
| __ urshl(d4, d28, d30); |
| __ urshl(v13.V16B(), v31.V16B(), v19.V16B()); |
| __ urshl(v14.V2D(), v23.V2D(), v21.V2D()); |
| __ urshl(v10.V2S(), v7.V2S(), v8.V2S()); |
| __ urshl(v15.V4H(), v21.V4H(), v28.V4H()); |
| __ urshl(v30.V4S(), v8.V4S(), v23.V4S()); |
| __ urshl(v31.V8B(), v20.V8B(), v5.V8B()); |
| __ urshl(v30.V8H(), v27.V8H(), v30.V8H()); |
| __ urshr(d4, d13, 49); |
| __ urshr(v2.V16B(), v20.V16B(), 1); |
| __ urshr(v13.V2D(), v11.V2D(), 51); |
| __ urshr(v21.V2S(), v31.V2S(), 10); |
| __ urshr(v21.V4H(), v17.V4H(), 11); |
| __ urshr(v4.V4S(), v22.V4S(), 1); |
| __ urshr(v0.V8B(), v1.V8B(), 7); |
| __ urshr(v13.V8H(), v20.V8H(), 1); |
| __ ursqrte(v20.V2S(), v16.V2S()); |
| __ ursqrte(v28.V4S(), v8.V4S()); |
| __ ursra(d27, d16, 45); |
| __ ursra(v18.V16B(), v17.V16B(), 3); |
| __ ursra(v26.V2D(), v28.V2D(), 58); |
| __ ursra(v8.V2S(), v22.V2S(), 31); |
| __ ursra(v31.V4H(), v4.V4H(), 7); |
| __ ursra(v31.V4S(), v15.V4S(), 2); |
| __ ursra(v3.V8B(), v1.V8B(), 5); |
| __ ursra(v18.V8H(), v14.V8H(), 13); |
| __ ushl(d31, d0, d16); |
| __ ushl(v0.V16B(), v6.V16B(), v2.V16B()); |
| __ ushl(v18.V2D(), v1.V2D(), v18.V2D()); |
| __ ushl(v27.V2S(), v7.V2S(), v29.V2S()); |
| __ ushl(v14.V4H(), v14.V4H(), v13.V4H()); |
| __ ushl(v22.V4S(), v4.V4S(), v9.V4S()); |
| __ ushl(v23.V8B(), v22.V8B(), v27.V8B()); |
| __ ushl(v21.V8H(), v25.V8H(), v8.V8H()); |
| __ ushll(v11.V2D(), v0.V2S(), 21); |
| __ ushll(v2.V4S(), v17.V4H(), 8); |
| __ ushll(v11.V8H(), v14.V8B(), 1); |
| __ ushll2(v8.V2D(), v29.V4S(), 7); |
| __ ushll2(v29.V4S(), v9.V8H(), 2); |
| __ ushll2(v5.V8H(), v24.V16B(), 6); |
| __ ushr(d28, d27, 53); |
| __ ushr(v1.V16B(), v9.V16B(), 7); |
| __ ushr(v2.V2D(), v24.V2D(), 43); |
| __ ushr(v30.V2S(), v25.V2S(), 11); |
| __ ushr(v10.V4H(), v26.V4H(), 12); |
| __ ushr(v4.V4S(), v5.V4S(), 30); |
| __ ushr(v30.V8B(), v2.V8B(), 1); |
| __ ushr(v6.V8H(), v12.V8H(), 2); |
| __ usqadd(b19, b5); |
| __ usqadd(d9, d2); |
| __ usqadd(h2, h16); |
| __ usqadd(s16, s3); |
| __ usqadd(v31.V16B(), v29.V16B()); |
| __ usqadd(v8.V2D(), v10.V2D()); |
| __ usqadd(v18.V2S(), v9.V2S()); |
| __ usqadd(v24.V4H(), v14.V4H()); |
| __ usqadd(v10.V4S(), v30.V4S()); |
| __ usqadd(v16.V8B(), v20.V8B()); |
| __ usqadd(v12.V8H(), v16.V8H()); |
| __ usra(d28, d27, 37); |
| __ usra(v5.V16B(), v22.V16B(), 5); |
| __ usra(v2.V2D(), v19.V2D(), 33); |
| __ usra(v0.V2S(), v0.V2S(), 21); |
| __ usra(v7.V4H(), v6.V4H(), 12); |
| __ usra(v4.V4S(), v17.V4S(), 9); |
| __ usra(v9.V8B(), v12.V8B(), 7); |
| __ usra(v3.V8H(), v27.V8H(), 14); |
| __ usubl(v29.V2D(), v12.V2S(), v30.V2S()); |
| __ usubl(v29.V4S(), v28.V4H(), v6.V4H()); |
| __ usubl(v12.V8H(), v4.V8B(), v14.V8B()); |
| __ usubl2(v1.V2D(), v24.V4S(), v17.V4S()); |
| __ usubl2(v4.V4S(), v1.V8H(), v3.V8H()); |
| __ usubl2(v23.V8H(), v4.V16B(), v7.V16B()); |
| __ usubw(v9.V2D(), v20.V2D(), v30.V2S()); |
| __ usubw(v20.V4S(), v16.V4S(), v23.V4H()); |
| __ usubw(v25.V8H(), v8.V8H(), v29.V8B()); |
| __ usubw2(v18.V2D(), v29.V2D(), v6.V4S()); |
| __ usubw2(v6.V4S(), v6.V4S(), v20.V8H()); |
| __ usubw2(v18.V8H(), v4.V8H(), v16.V16B()); |
| __ uxtl(v27.V2D(), v21.V2S()); |
| __ uxtl(v0.V4S(), v31.V4H()); |
| __ uxtl(v27.V8H(), v10.V8B()); |
| __ uxtl2(v6.V2D(), v16.V4S()); |
| __ uxtl2(v22.V4S(), v20.V8H()); |
| __ uxtl2(v20.V8H(), v21.V16B()); |
| __ uzp1(v30.V16B(), v9.V16B(), v17.V16B()); |
| __ uzp1(v7.V2D(), v26.V2D(), v28.V2D()); |
| __ uzp1(v26.V2S(), v16.V2S(), v22.V2S()); |
| __ uzp1(v14.V4H(), v19.V4H(), v6.V4H()); |
| __ uzp1(v17.V4S(), v23.V4S(), v30.V4S()); |
| __ uzp1(v28.V8B(), v27.V8B(), v13.V8B()); |
| __ uzp1(v17.V8H(), v1.V8H(), v12.V8H()); |
| __ uzp2(v8.V16B(), v18.V16B(), v26.V16B()); |
| __ uzp2(v21.V2D(), v22.V2D(), v24.V2D()); |
| __ uzp2(v20.V2S(), v21.V2S(), v2.V2S()); |
| __ uzp2(v16.V4H(), v31.V4H(), v6.V4H()); |
| __ uzp2(v25.V4S(), v11.V4S(), v8.V4S()); |
| __ uzp2(v31.V8B(), v31.V8B(), v13.V8B()); |
| __ uzp2(v8.V8H(), v17.V8H(), v1.V8H()); |
| __ xtn(v17.V2S(), v26.V2D()); |
| __ xtn(v3.V4H(), v0.V4S()); |
| __ xtn(v18.V8B(), v8.V8H()); |
| __ xtn2(v0.V16B(), v0.V8H()); |
| __ xtn2(v15.V4S(), v4.V2D()); |
| __ xtn2(v31.V8H(), v18.V4S()); |
| __ zip1(v22.V16B(), v9.V16B(), v6.V16B()); |
| __ zip1(v23.V2D(), v11.V2D(), v2.V2D()); |
| __ zip1(v26.V2S(), v16.V2S(), v9.V2S()); |
| __ zip1(v1.V4H(), v9.V4H(), v7.V4H()); |
| __ zip1(v0.V4S(), v30.V4S(), v20.V4S()); |
| __ zip1(v30.V8B(), v17.V8B(), v15.V8B()); |
| __ zip1(v17.V8H(), v8.V8H(), v2.V8H()); |
| __ zip2(v23.V16B(), v10.V16B(), v11.V16B()); |
| __ zip2(v30.V2D(), v6.V2D(), v14.V2D()); |
| __ zip2(v9.V2S(), v10.V2S(), v21.V2S()); |
| __ zip2(v8.V4H(), v24.V4H(), v29.V4H()); |
| __ zip2(v0.V4S(), v21.V4S(), v23.V4S()); |
| __ zip2(v25.V8B(), v23.V8B(), v30.V8B()); |
| __ zip2(v7.V8H(), v10.V8H(), v30.V8H()); |
| } // NOLINT(readability/fn_size) |
| |
| |
| static void GenerateTestSequenceNEONFP(MacroAssembler* masm) { |
| ExactAssemblyScope guard(masm, |
| masm->GetBuffer()->GetRemainingBytes(), |
| ExactAssemblyScope::kMaximumSize); |
| |
| // NEON floating point instructions. |
| __ fabd(v3.V2D(), v25.V2D(), v8.V2D()); |
| __ fabd(v14.V2S(), v27.V2S(), v11.V2S()); |
| __ fabd(v9.V4S(), v22.V4S(), v18.V4S()); |
| __ fabs(v1.V2D(), v29.V2D()); |
| __ fabs(v6.V2S(), v21.V2S()); |
| __ fabs(v12.V4S(), v25.V4S()); |
| __ facge(v18.V2D(), v5.V2D(), v0.V2D()); |
| __ facge(v15.V2S(), v11.V2S(), v6.V2S()); |
| __ facge(v30.V4S(), v10.V4S(), v25.V4S()); |
| __ facgt(v28.V2D(), v16.V2D(), v31.V2D()); |
| __ facgt(v15.V2S(), v1.V2S(), v4.V2S()); |
| __ facgt(v22.V4S(), v3.V4S(), v10.V4S()); |
| __ fadd(v7.V2D(), v10.V2D(), v24.V2D()); |
| __ fadd(v10.V2S(), v23.V2S(), v7.V2S()); |
| __ fadd(v16.V4S(), v22.V4S(), v11.V4S()); |
| __ faddp(d27, v28.V2D()); |
| __ faddp(s20, v23.V2S()); |
| __ faddp(v21.V2D(), v4.V2D(), v11.V2D()); |
| __ faddp(v31.V2S(), v26.V2S(), v1.V2S()); |
| __ faddp(v13.V4S(), v27.V4S(), v28.V4S()); |
| __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D()); |
| __ fcmeq(v24.V2D(), v16.V2D(), 0.0); |
| __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S()); |
| __ fcmeq(v24.V2S(), v4.V2S(), 0.0); |
| __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S()); |
| __ fcmeq(v26.V4S(), v25.V4S(), 0.0); |
| __ fcmge(v27.V2D(), v0.V2D(), v0.V2D()); |
| __ fcmge(v22.V2D(), v30.V2D(), 0.0); |
| __ fcmge(v7.V2S(), v21.V2S(), v25.V2S()); |
| __ fcmge(v15.V2S(), v15.V2S(), 0.0); |
| __ fcmge(v29.V4S(), v4.V4S(), v27.V4S()); |
| __ fcmge(v22.V4S(), v21.V4S(), 0.0); |
| __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D()); |
| __ fcmgt(v15.V2D(), v23.V2D(), 0.0); |
| __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S()); |
| __ fcmgt(v1.V2S(), v13.V2S(), 0.0); |
| __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S()); |
| __ fcmgt(v13.V4S(), v8.V4S(), 0.0); |
| __ fcmle(v4.V2D(), v6.V2D(), 0.0); |
| __ fcmle(v24.V2S(), v31.V2S(), 0.0); |
| __ fcmle(v8.V4S(), v23.V4S(), 0.0); |
| __ fcmlt(v7.V2D(), v3.V2D(), 0.0); |
| __ fcmlt(v15.V2S(), v21.V2S(), 0.0); |
| __ fcmlt(v1.V4S(), v2.V4S(), 0.0); |
| __ fcvtas(v6.V2D(), v8.V2D()); |
| __ fcvtas(v1.V2S(), v9.V2S()); |
| __ fcvtas(v8.V4S(), v19.V4S()); |
| __ fcvtau(v5.V2D(), v31.V2D()); |
| __ fcvtau(v28.V2S(), v29.V2S()); |
| __ fcvtau(v11.V4S(), v26.V4S()); |
| __ fcvtl(v8.V2D(), v25.V2S()); |
| __ fcvtl(v27.V4S(), v14.V4H()); |
| __ fcvtl2(v1.V2D(), v6.V4S()); |
| __ fcvtl2(v24.V4S(), v9.V8H()); |
| __ fcvtms(v9.V2D(), v24.V2D()); |
| __ fcvtms(v7.V2S(), v11.V2S()); |
| __ fcvtms(v23.V4S(), v21.V4S()); |
| __ fcvtmu(v13.V2D(), v1.V2D()); |
| __ fcvtmu(v26.V2S(), v12.V2S()); |
| __ fcvtmu(v21.V4S(), v21.V4S()); |
| __ fcvtn(v11.V2S(), v1.V2D()); |
| __ fcvtn(v8.V4H(), v2.V4S()); |
| __ fcvtn2(v24.V4S(), v29.V2D()); |
| __ fcvtn2(v4.V8H(), v10.V4S()); |
| __ fcvtns(v25.V2D(), v10.V2D()); |
| __ fcvtns(v4.V2S(), v8.V2S()); |
| __ fcvtns(v29.V4S(), v27.V4S()); |
| __ fcvtnu(v18.V2D(), v27.V2D()); |
| __ fcvtnu(v11.V2S(), v14.V2S()); |
| __ fcvtnu(v27.V4S(), v21.V4S()); |
| __ fcvtps(v23.V2D(), v5.V2D()); |
| __ fcvtps(v24.V2S(), v15.V2S()); |
| __ fcvtps(v5.V4S(), v19.V4S()); |
| __ fcvtpu(v3.V2D(), v21.V2D()); |
| __ fcvtpu(v3.V2S(), v21.V2S()); |
| __ fcvtpu(v0.V4S(), v7.V4S()); |
| __ fcvtxn(v29.V2S(), v11.V2D()); |
| __ fcvtxn2(v31.V4S(), v25.V2D()); |
| __ fcvtzs(v19.V2D(), v17.V2D()); |
| __ fcvtzs(v12.V2D(), v24.V2D(), 64); |
| __ fcvtzs(v9.V2S(), v2.V2S()); |
| __ fcvtzs(v5.V2S(), v20.V2S(), 29); |
| __ fcvtzs(v21.V4S(), v25.V4S()); |
| __ fcvtzs(v26.V4S(), v1.V4S(), 6); |
| __ fcvtzu(v13.V2D(), v25.V2D()); |
| __ fcvtzu(v28.V2D(), v13.V2D(), 32); |
| __ fcvtzu(v26.V2S(), v6.V2S()); |
| __ fcvtzu(v9.V2S(), v10.V2S(), 15); |
| __ fcvtzu(v30.V4S(), v6.V4S()); |
| __ fcvtzu(v19.V4S(), v22.V4S(), 18); |
| __ fdiv(v15.V2D(), v8.V2D(), v15.V2D()); |
| __ fdiv(v12.V2S(), v9.V2S(), v26.V2S()); |
| __ fdiv(v19.V4S(), v22.V4S(), v19.V4S()); |
| __ fmax(v19.V2D(), v7.V2D(), v8.V2D()); |
| __ fmax(v25.V2S(), v12.V2S(), v29.V2S()); |
| __ fmax(v6.V4S(), v15.V4S(), v5.V4S()); |
| __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D()); |
| __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S()); |
| __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S()); |
| __ fmaxnmp(d6, v19.V2D()); |
| __ fmaxnmp(s27, v26.V2S()); |
| __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D()); |
| __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S()); |
| __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S()); |
| __ fmaxnmv(s27, v19.V4S()); |
| __ fmaxp(d20, v14.V2D()); |
| __ fmaxp(s18, v2.V2S()); |
| __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D()); |
| __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S()); |
| __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S()); |
| __ fmaxv(s31, v29.V4S()); |
| __ fmin(v2.V2D(), v5.V2D(), v2.V2D()); |
| __ fmin(v31.V2S(), v17.V2S(), v10.V2S()); |
| __ fmin(v10.V4S(), v4.V4S(), v16.V4S()); |
| __ fminnm(v21.V2D(), v6.V2D(), v5.V2D()); |
| __ fminnm(v22.V2S(), v18.V2S(), v14.V2S()); |
| __ fminnm(v25.V4S(), v31.V4S(), v3.V4S()); |
| __ fminnmp(d9, v1.V2D()); |
| __ fminnmp(s21, v20.V2S()); |
| __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D()); |
| __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S()); |
| __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S()); |
| __ fminnmv(s3, v4.V4S()); |
| __ fminp(d24, v26.V2D()); |
| __ fminp(s7, v17.V2S()); |
| __ fminp(v23.V2D(), v19.V2D(), v3.V2D()); |
| __ fminp(v29.V2S(), v21.V2S(), v9.V2S()); |
| __ fminp(v0.V4S(), v24.V4S(), v21.V4S()); |
| __ fminv(s25, v8.V4S()); |
| __ fmla(d23, d0, v9.D(), 1); |
| __ fmla(s23, s15, v7.S(), 0); |
| __ fmla(v17.V2D(), v11.V2D(), v6.V2D()); |
| __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0); |
| __ fmla(v19.V2S(), v12.V2S(), v6.V2S()); |
| __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0); |
| __ fmla(v16.V4S(), v11.V4S(), v11.V4S()); |
| __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2); |
| __ fmls(d27, d30, v6.D(), 0); |
| __ fmls(s21, s16, v2.S(), 0); |
| __ fmls(v5.V2D(), v19.V2D(), v21.V2D()); |
| __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0); |
| __ fmls(v5.V2S(), v16.V2S(), v7.V2S()); |
| __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1); |
| __ fmls(v27.V4S(), v5.V4S(), v30.V4S()); |
| __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3); |
| __ fmov(v14.V2D(), -0.34375); |
| __ fmov(v26.V2S(), 0.90625f); |
| __ fmov(v31.V4S(), -5.0000f); |
| __ fmov(v28.D(), 1, x25); |
| __ fmov(x18, v2.D(), 1); |
| __ fmul(d12, d4, v1.D(), 1); |
| __ fmul(s30, s1, v15.S(), 3); |
| __ fmul(v25.V2D(), v0.V2D(), v21.V2D()); |
| __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1); |
| __ fmul(v7.V2S(), v24.V2S(), v16.V2S()); |
| __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2); |
| __ fmul(v5.V4S(), v28.V4S(), v25.V4S()); |
| __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0); |
| __ fmulx(d28, d9, v3.D(), 1); |
| __ fmulx(s25, s21, v15.S(), 1); |
| __ fmulx(v31.V2D(), v28.V2D(), v8.V2D()); |
| __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0); |
| __ fmulx(v9.V2S(), v1.V2S(), v0.V2S()); |
| __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0); |
| __ fmulx(v2.V4S(), v4.V4S(), v5.V4S()); |
| __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0); |
| __ fneg(v1.V2D(), v25.V2D()); |
| __ fneg(v14.V2S(), v31.V2S()); |
| __ fneg(v5.V4S(), v4.V4S()); |
| __ frecpe(v18.V2D(), v12.V2D()); |
| __ frecpe(v10.V2S(), v22.V2S()); |
| __ frecpe(v5.V4S(), v6.V4S()); |
| __ frecps(v22.V2D(), v7.V2D(), v26.V2D()); |
| __ frecps(v31.V2S(), v27.V2S(), v2.V2S()); |
| __ frecps(v18.V4S(), v6.V4S(), v27.V4S()); |
| __ frinta(v26.V2D(), v13.V2D()); |
| __ frinta(v15.V2S(), v26.V2S()); |
| __ frinta(v13.V4S(), v16.V4S()); |
| __ frinti(v9.V2D(), v12.V2D()); |
| __ frinti(v5.V2S(), v19.V2S()); |
| __ frinti(v15.V4S(), v11.V4S()); |
| __ frintm(v17.V2D(), v29.V2D()); |
| __ frintm(v30.V2S(), v11.V2S()); |
| __ frintm(v1.V4S(), v20.V4S()); |
| __ frintn(v24.V2D(), v6.V2D()); |
| __ frintn(v12.V2S(), v17.V2S()); |
| __ frintn(v29.V4S(), v11.V4S()); |
| __ frintp(v10.V2D(), v7.V2D()); |
| __ frintp(v12.V2S(), v18.V2S()); |
| __ frintp(v26.V4S(), v31.V4S()); |
| __ frintx(v24.V2D(), v13.V2D()); |
| __ frintx(v7.V2S(), v9.V2S()); |
| __ frintx(v18.V4S(), v21.V4S()); |
| __ frintz(v19.V2D(), v25.V2D()); |
| __ frintz(v15.V2S(), v8.V2S()); |
| __ frintz(v20.V4S(), v3.V4S()); |
| __ frsqrte(v23.V2D(), v5.V2D()); |
| __ frsqrte(v9.V2S(), v7.V2S()); |
| __ frsqrte(v3.V4S(), v9.V4S()); |
| __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D()); |
| __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S()); |
| __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S()); |
| __ fsqrt(v6.V2D(), v18.V2D()); |
| __ fsqrt(v6.V2S(), v18.V2S()); |
| __ fsqrt(v0.V4S(), v31.V4S()); |
| __ fsub(v31.V2D(), v30.V2D(), v31.V2D()); |
| __ fsub(v11.V2S(), v8.V2S(), v6.V2S()); |
| __ fsub(v16.V4S(), v0.V4S(), v31.V4S()); |
| __ scvtf(v25.V2D(), v31.V2D()); |
| __ scvtf(v10.V2D(), v13.V2D(), 45); |
| __ scvtf(v10.V2S(), v15.V2S()); |
| __ scvtf(v18.V2S(), v4.V2S(), 27); |
| __ scvtf(v17.V4S(), v5.V4S()); |
| __ scvtf(v11.V4S(), v25.V4S(), 24); |
| __ ucvtf(v9.V2D(), v3.V2D()); |
| __ ucvtf(v26.V2D(), v30.V2D(), 46); |
| __ ucvtf(v11.V2S(), v4.V2S()); |
| __ ucvtf(v29.V2S(), v3.V2S(), 25); |
| __ ucvtf(v22.V4S(), v23.V4S()); |
| __ ucvtf(v18.V4S(), v9.V4S(), 25); |
| } |
| |
| |
| static void MaskAddresses(const char* trace) { |
| // Hexadecimal expressions of the form `\xab` do not work out-of-the box with |
| // BSD `sed`. So we use ANSI-C quoting to have the regular expressions below |
| // work both on Linux and BSD (and macOS). |
| #ifdef __APPLE__ |
| #define MAYBE_ANSI_C_QUOTE "$" |
| #define HEX(val) "\\x" #val |
| #define ESCAPE(c) "\\\\" #c |
| const char* sed_options = "-i \"\" -E"; |
| #else |
| #define MAYBE_ANSI_C_QUOTE |
| #define HEX(val) "\\x" #val |
| #define ESCAPE(c) "\\" #c |
| const char* sed_options = "--in-place --regexp-extended"; |
| #endif |
| #define COLOUR "(" HEX(1b) ESCAPE([) "[01];([0-9][0-9])?m)?" |
| struct { |
| const char* search; |
| const char* replace; |
| } patterns[] = |
| {// Mask registers that hold addresses that change from run to run. |
| {"((x0|x1|x2|sp): " COLOUR "0x)[0-9a-f]{16}", |
| ESCAPE(1) "~~~~~~~~~~~~~~~~"}, |
| // Mask accessed memory addresses. |
| {"((<-|->) " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"}, |
| // Mask instruction addresses. |
| {"^0x[0-9a-f]{16}", "0x~~~~~~~~~~~~~~~~"}, |
| // Mask branch targets. |
| {"(Branch" COLOUR " to 0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"}, |
| {"addr 0x[0-9a-f]+", "addr 0x~~~~~~~~~~~~~~~~"}}; |
| const size_t patterns_length = sizeof(patterns) / sizeof(patterns[0]); |
| // Rewrite `trace`, masking addresses and other values that legitimately vary |
| // from run to run. |
| char command[1024]; |
| for (size_t i = 0; i < patterns_length; i++) { |
| size_t length = snprintf(command, |
| sizeof(command), |
| "sed %s " MAYBE_ANSI_C_QUOTE "'s/%s/%s/' '%s'", |
| sed_options, |
| patterns[i].search, |
| patterns[i].replace, |
| trace); |
| VIXL_CHECK(length < sizeof(command)); |
| VIXL_CHECK(system(command) == 0); |
| } |
| } |
| |
| |
| static void TraceTestHelper(bool coloured_trace, |
| TraceParameters trace_parameters, |
| const char* ref_file) { |
| MacroAssembler masm(12 * KBytes); |
| |
| char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX"; |
| FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w"); |
| |
| Decoder decoder; |
| Simulator simulator(&decoder, trace_stream); |
| simulator.SetColouredTrace(coloured_trace); |
| simulator.SetTraceParameters(trace_parameters); |
| simulator.SilenceExclusiveAccessWarning(); |
| |
| // Set up a scratch buffer so we can test loads and stores. |
| const int kScratchSize = 64 * KBytes; |
| const int kScratchGuardSize = 128; |
| char scratch_buffer[kScratchSize + kScratchGuardSize]; |
| for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0])); |
| i++) { |
| scratch_buffer[i] = i & 0xff; |
| } |
| // Used for offset addressing. |
| simulator.WriteRegister(0, scratch_buffer); |
| // Used for pre-/post-index addressing. |
| simulator.WriteRegister(1, scratch_buffer); |
| |
| const int kPostIndexRegisterStep = 13; // Arbitrary interesting value. |
| // Used for post-index offsets. |
| simulator.WriteRegister(2, kPostIndexRegisterStep); |
| |
| // Initialize the other registers with unique values. |
| uint64_t initial_base_u64 = 0x0100001000100101; |
| for (unsigned i = 3; i < kNumberOfRegisters; i++) { |
| if (i == kLinkRegCode) continue; |
| if (i == kZeroRegCode) continue; |
| // NoRegLog suppresses the log now, but the registers will still be logged |
| // before the first instruction is executed since they have been written but |
| // not printed. |
| simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog); |
| } |
| float initial_base_f32 = 1.2345f; |
| double initial_base_f64 = 1.3456f; |
| for (unsigned i = 0; i < kNumberOfVRegisters; i++) { |
| // Try to initialise V registers with reasonable FP values. |
| uint64_t low = (DoubleToRawbits(initial_base_f64 * i) & ~kSRegMask) | |
| FloatToRawbits(initial_base_f32 * i); |
| uint64_t high = low ^ 0x0005555500555555; |
| LogicVRegister reg(simulator.ReadVRegister(i)); |
| reg.SetUint(kFormat2D, 0, low); |
| reg.SetUint(kFormat2D, 1, high); |
| } |
| |
| GenerateTestSequenceBase(&masm); |
| GenerateTestSequenceFP(&masm); |
| GenerateTestSequenceNEON(&masm); |
| GenerateTestSequenceNEONFP(&masm); |
| masm.Ret(); |
| masm.FinalizeCode(); |
| |
| simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); |
| |
| fclose(trace_stream); |
| MaskAddresses(trace_stream_filename); |
| |
| bool trace_matched_reference; |
| if (Test::generate_test_trace()) { |
| // Copy trace_stream to stdout. |
| trace_stream = fopen(trace_stream_filename, "r"); |
| VIXL_ASSERT(trace_stream != NULL); |
| fseek(trace_stream, 0, SEEK_SET); |
| int c; |
| while (1) { |
| c = getc(trace_stream); |
| if (c == EOF) break; |
| putc(c, stdout); |
| } |
| fclose(trace_stream); |
| trace_matched_reference = true; |
| } else { |
| // Check trace_stream against ref_file. |
| char command[1024]; |
| size_t length = snprintf(command, |
| sizeof(command), |
| "diff -u %s %s", |
| ref_file, |
| trace_stream_filename); |
| VIXL_CHECK(length < sizeof(command)); |
| trace_matched_reference = (system(command) == 0); |
| } |
| |
| uint64_t offset_base = simulator.ReadRegister<uint64_t>(0); |
| uint64_t index_base = simulator.ReadRegister<uint64_t>(1); |
| |
| // Clean up before checking the result; VIXL_CHECK aborts. |
| remove(trace_stream_filename); |
| |
| VIXL_CHECK(trace_matched_reference); |
| VIXL_CHECK(index_base >= offset_base); |
| VIXL_CHECK((index_base - offset_base) <= kScratchSize); |
| } |
| |
| |
| #define REF(name) "test/test-trace-reference/" name |
| |
| // Test individual options. |
| TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); } |
| TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); } |
| TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); } |
| TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); } |
| TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); } |
| TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); } |
| |
| // Test standard combinations. |
| TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); } |
| TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); } |
| TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); } |
| |
| |
| // Test individual options (with colour). |
| TEST(disasm_colour) { |
| TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour")); |
| } |
| TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); } |
| TEST(vregs_colour) { |
| TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour")); |
| } |
| TEST(sysregs_colour) { |
| TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour")); |
| } |
| TEST(write_colour) { |
| TraceTestHelper(true, LOG_WRITE, REF("log-write-colour")); |
| } |
| TEST(branch_colour) { |
| TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour")); |
| } |
| |
| // Test standard combinations (with colour). |
| TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); } |
| TEST(state_colour) { |
| TraceTestHelper(true, LOG_STATE, REF("log-state-colour")); |
| } |
| TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); } |
| |
| |
| #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 |
| } // namespace aarch64 |
| } // namespace vixl |