| .macro push_v_regs |
| stp q8, q9, [sp, #-32]! |
| stp q10, q11, [sp, #-32]! |
| stp q12, q13, [sp, #-32]! |
| stp q14, q15, [sp, #-32]! |
| //st1 { v8.2d, v9.2d, v10.2d, v11.2d}, [sp, #-64]! |
| //st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp, #-64]! |
| stp X8, X9, [sp, #-16]! |
| stp X10, X11, [sp, #-16]! |
| stp X12, X13, [sp, #-16]! |
| stp X14, X15, [sp, #-16]! |
| stp X16, X17, [sp, #-16]! |
| stp X18, X19, [sp, #-16]! |
| stp X20, X21, [sp, #-16]! |
| stp X22, X23, [sp, #-16]! |
| stp X24, X25, [sp, #-16]! |
| stp X26, X27, [sp, #-16]! |
| stp X28, X29, [sp, #-16]! |
| stp X30, X29, [sp, #-16]! |
| .endm |
| |
| .macro pop_v_regs |
| ldp X30, X29, [sp], #16 |
| ldp X28, X29, [sp], #16 |
| ldp X26, X27, [sp], #16 |
| ldp X24, X25, [sp], #16 |
| ldp X22, X23, [sp], #16 |
| ldp X20, X21, [sp], #16 |
| ldp X18, X19, [sp], #16 |
| ldp X16, X17, [sp], #16 |
| ldp X14, X15, [sp], #16 |
| ldp X12, X13, [sp], #16 |
| ldp X10, X11, [sp], #16 |
| ldp X8, X9, [sp], #16 |
| //ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64 |
| //ld1 { v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64 |
| ldp q14, q15, [sp], #32 |
| ldp q12, q13, [sp], #32 |
| ldp q10, q11, [sp], #32 |
| ldp q8, q9, [sp], #32 |
| .endm |
| |
| |
| .text |
| .p2align 2 |
| .global ixheaacd_post_twid_overlap_add_armv8 |
| |
| ixheaacd_post_twid_overlap_add_armv8: |
| |
| // STMFD sp!, {x4-x12} |
| push_v_regs |
| //stp x19, x20,[sp,#-16]! |
| //VPUSH {d8 - d15} |
| |
| //LDR w4, [sp, #100] |
| //sxtw x4,w4 |
| //LDR w5, [sp, #104] |
| //sxtw x5,w5 |
| //LDR w6, [sp, #108] |
| //sxtw x6,w6 |
| MOV x16, x5 |
| MOV x17, x7 |
| LSL x9, x3, #2 |
| ASR x9, x9, #1 |
| ADD x6, x6, x9 |
| SUB x6, x6, #4 |
| |
| LDR w8, =7500 |
| sxtw x8, w8 |
| ADD x2, x2, x8 |
| |
| |
| |
| movi v18.4h, #50 |
| sub x20, x5, #15 |
| neg x9, x20 |
| movi v20.4s, #0x80, LSL #8 |
| dup v16.4s, w5 |
| SUB x5, x5, #16 |
| //STR w5, [sp, #116] |
| MOV w25, w5 |
| sxtw x25, w25 |
| MOV x8, #1 |
| LSL x8, x8, x9 |
| //STR w8, [sp, #120] |
| MOV w26, w8 |
| |
| //sxtw x8,w8 |
| |
| |
| ARM_PROLOGUE: |
| |
| |
| LDR w8, [x1], #4 |
| sxtw x8, w8 |
| LDR w9, [x1], #4 |
| sxtw x9, w9 |
| |
| LDR w10, [x2], #4 |
| sxtw x10, w10 |
| |
| AND w19, w10, 0xFFFF |
| sxth x19, w19 |
| ASR w10, w10, #16 |
| // SMULWT x11, x8, x10 |
| // |
| // SMULWB x12, x9, x10 |
| // SMULWB x5, x8, x10 |
| // SMLAWT x7, x9, x10, x5 |
| |
| SMULL x11, w8, w10 |
| ASR x11, x11, #16 |
| SMULL x12, w9, w19 |
| ASR x12, x12, #16 |
| SMULL x5, w8, w19 |
| ASR x5, x5, #16 |
| SMULL x7, w9, w10 |
| ASR x7, x7, #16 |
| ADD x7, x7, x5 |
| |
| SUB x8, x12, x11 |
| MVN x5, x7 |
| ADD x5, x5, #1 |
| |
| |
| MOV x9, #50 |
| MOV x12, #-50 |
| AND w19, w9, 0xFFFF |
| sxth x19, w19 |
| SMULL x10, w5, w19 |
| ASR x10, x10, #16 |
| AND w19, w12, 0xFFFF |
| sxth x19, w19 |
| SMULL x11, w8, w19 |
| ASR x11, x11, #16 |
| |
| ADD x8, x8, x10 |
| ADD x5, x5, x11 |
| |
| //LDR w11, [sp, #104] |
| MOV w11, w16 |
| sxth x11, w11 |
| LDR w10, [x6], #-32 |
| sxtw x10, w10 |
| |
| AND w19, w10, 0xFFFF |
| sxth x19, w19 |
| ASR w20, w10, #16 |
| |
| //SMULWB x7, x8, x10 |
| SMULL x7, w8, w19 |
| ASR x7, x7, #16 |
| MVN x8, x8 |
| ADD x8, x8, #1 |
| //SMULWT x12, x8, x10 |
| SMULL x12, w8, w20 |
| ASR x12, x12, #16 |
| |
| CMP x11, #0 |
| BLT NEXT |
| |
| SUB x9, x11, #16 |
| negs x9, x9 |
| |
| |
| |
| |
| // LDR w8, [sp, #120] |
| //sxtw x8,w8 |
| MOV v1.s[0], w26 |
| MOV v2.s[0], w5 |
| |
| //sQADD w5, w5, w8 |
| //ASR w5, w5, w9 |
| |
| SQADD v2.2s, v2.2s, v1.2s |
| MOV w5, v2.s[0] |
| ASR w5, w5, w9 |
| |
| SUB x9, x11, #31 |
| negs x9, x9 |
| ASR x20, x7, x9 |
| //MOV x8, x20 |
| ADDS x8, x20, #0 |
| BGE NEXT2 |
| CMN x8, #1 |
| NEXT2: |
| LDR x20, =0x80000000 |
| csel x7, x20, x7, LT |
| LDR x20, =0x7fffffff |
| csel x7, x20, x7, GT |
| LSL x20, x7, x11 |
| csel x7, x20, x7, EQ |
| |
| SUB x9, x11, #31 |
| negs x9, x9 |
| ASR x20, x12, x9 |
| //MOV x8, x20 |
| ADDS x8, x20, #0 |
| BGE NEXT3 |
| CMN x8, #1 |
| NEXT3: |
| LDR x20, =0x80000000 |
| csel x12, x20, x12, LT |
| LDR x20, =0x7fffffff |
| csel x12, x20, x12, GT |
| LSL x20, x12, x11 |
| csel x12, x20, x12, EQ |
| |
| B NEXT1 |
| NEXT: |
| MVN w11, w11 |
| ADD w11, w11, #1 |
| ASR w5, w5, w11 |
| MOV w8, #0x8000 |
| |
| MOV v1.s[0], w8 |
| MOV v2.s[0], w5 |
| |
| //QADD x5, x5, x8 |
| |
| SQADD v2.2s, v2.2s, v1.2s |
| MOV w5, v2.s[0] |
| |
| ASR w5, w5, #16 |
| ASR w7, w7, w11 |
| ASR w12, w12, w11 |
| |
| NEXT1: |
| LDR w9, [x4] |
| sxtw x9, w9 |
| MOV w8, #0x8000 |
| //sxtw x8,w8 |
| |
| STR w5, [x4], #4 |
| sxtw x5, w5 |
| |
| |
| ROR w20, w10, #16 |
| //UXTH x5, x10, ROR #16 |
| UXTH w5, w20 |
| UXTH w10, w10 |
| |
| |
| dup v0.2s, w9 |
| dup v2.2s, w10 |
| dup v3.2s, w5 |
| //VZIP.32 D2, D3 |
| ZIP1 v28.2s, v2.2s, v3.2s |
| ZIP2 v3.2s, v2.2s, v3.2s |
| MOV v2.8b, v28.8b |
| sMULL v0.2d, v2.2s, v0.2s |
| Sqxtn v8.2s, v0.2d |
| |
| |
| dup v0.2s, w12 |
| dup v1.2s, w7 |
| |
| //VZIP.32 D0, D1 |
| |
| ZIP1 v28.2s, v0.2s, v1.2s |
| ZIP2 v1.2s, v0.2s, v1.2s |
| MOV v0.8b, v28.8b |
| |
| SQSUB v8.2s, v0.2s , v8.2s |
| |
| |
| sQshL v8.2s, v8.2s, #2 |
| dup v0.2s, w8 |
| SQADD v8.2s, v8.2s , v0.2s |
| sshR v8.2s, v8.2s, #16 |
| |
| |
| |
| MOV x7, x17 |
| //sxtw x7,w7 |
| LSL x10, x7, #1 |
| |
| ASR x5, x3, #1 |
| //SMULBB x5, x10, x5 |
| AND w5, w5, 0xFFFF |
| sxth x5, w5 |
| AND w19, w10, 0xFFFF |
| sxth x19, w19 |
| SMULL x5, w19, w5 |
| |
| ADD x5, x5, x0 |
| SUB x0, x5, x10 |
| MVN x9, x10 |
| ADD x9, x9, #1 |
| |
| ST1 {v8.h}[2], [x0], x9 |
| ST1 {v8.h}[0], [x5], x10 |
| |
| |
| MOV x8, x1 |
| LSL x12, x3, #2 |
| |
| ADD x1, x1, x12 |
| |
| SUB x1, x1, #40 |
| |
| MOV x12, #-32 |
| |
| |
| |
| PROLOGUE_NEON: |
| |
| ASR x3, x3, #2 |
| SUB x3, x3, #4 |
| ASR x3, x3, #2 |
| SUB x3, x3, #2 |
| |
| LD2 { v0.4s, v1.4s}, [x1] |
| MOV v2.16b, v1.16b |
| ADD x1, x1, x12 |
| |
| //VUZP.16 D0, D1 |
| UZP1 v28.8h, v0.8h, v0.8h |
| UZP2 v29.8h, v0.8h, v0.8h |
| MOV v0.d[0], v28.d[0] |
| MOV v0.d[1], v29.d[0] |
| |
| //VUZP.16 D2, D3 |
| |
| UZP1 v28.8h, v2.8h, v2.8h |
| UZP2 v29.8h, v2.8h, v2.8h |
| MOV v2.d[0], v28.d[0] |
| MOV v2.d[1], v29.d[0] |
| |
| |
| //rev64 v0.8h, v0.8h |
| rev64 v0.8h, v0.8h |
| MOV v1.d[0], v0.d[1] |
| rev64 v2.8h, v2.8h |
| MOV v3.d[0], v2.d[1] |
| LD2 {v8.4h, v9.4h}, [x2] |
| ADD x2, x2, #16 |
| |
| LD2 { v4.4s, v5.4s}, [x8] |
| MOV v6.16b, v5.16b |
| ADD x8, x8, #32 |
| uMULL v30.4s, v0.4h, v9.4h |
| |
| // VUZP.16 D4, D5 |
| |
| UZP1 v28.8h, v4.8h, v4.8h |
| UZP2 v29.8h, v4.8h, v4.8h |
| MOV v4.d[0], v28.d[0] |
| MOV v5.d[0], v29.d[0] |
| |
| uMULL v28.4s, v2.4h, v8.4h |
| |
| // VUZP.16 D6, D7 |
| UZP1 v26.8h, v6.8h, v6.8h |
| UZP2 v27.8h, v6.8h, v6.8h |
| MOV v6.d[0], v26.d[0] |
| MOV v7.d[0], v27.d[0] |
| |
| uMULL v26.4s, v0.4h, v8.4h |
| |
| |
| uMULL v24.4s, v2.4h, v9.4h |
| |
| LD2 { v10.4s, v11.4s}, [x6] |
| MOV v12.16b, v11.16b |
| ADD x6, x6, x12 |
| ushR v30.4s, v30.4s, #16 |
| |
| //VUZP.16 D10, D11 |
| |
| UZP1 v22.8h, v10.8h, v10.8h |
| UZP2 v23.8h, v10.8h, v10.8h |
| MOV v10.d[0], v22.d[0] |
| MOV v10.d[1], v23.d[0] |
| |
| ushR v28.4s, v28.4s, #16 |
| |
| //VUZP.16 D12, D13 |
| |
| UZP1 v22.8h, v12.8h, v12.8h |
| UZP2 v23.8h, v12.8h, v12.8h |
| MOV v12.d[0], v22.d[0] |
| MOV v12.d[1], v23.d[0] |
| |
| sMLAL v30.4s, v1.4h, v9.4h |
| |
| rev64 v10.8h, v10.8h |
| MOV v11.d[0], v10.d[1] |
| sMLAL v28.4s, v3.4h, v8.4h |
| |
| rev64 v12.8h, v12.8h |
| MOV v13.d[0], v12.d[1] |
| ushR v26.4s, v26.4s, #16 |
| |
| |
| ushR v24.4s, v24.4s, #16 |
| |
| sMLAL v26.4s, v1.4h, v8.4h |
| sMLAL v24.4s, v3.4h, v9.4h |
| |
| |
| |
| ADD v30.4s, v30.4s , v28.4s |
| NEG v30.4s, v30.4s |
| |
| uMULL v22.4s, v4.4h, v8.4h |
| |
| SUB v28.4s, v24.4s , v26.4s |
| |
| |
| mov v26.16b, v30.16b |
| mov v24.16b, v28.16b |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| |
| // VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| uMULL v2.4s, v24.4h, v18.4h |
| |
| uMULL v0.4s, v26.4h, v18.4h |
| |
| ushR v22.4s, v22.4s, #16 |
| sMLAL v22.4s, v5.4h, v8.4h |
| |
| ushR v2.4s, v2.4s, #16 |
| ushR v0.4s, v0.4s, #16 |
| sMLAL v2.4s, v25.4h, v18.4h |
| sMLAL v0.4s, v27.4h, v18.4h |
| |
| uMULL v24.4s, v4.4h, v9.4h |
| uMULL v26.4s, v6.4h, v8.4h |
| |
| NEG v2.4s, v2.4s |
| ADD v28.4s, v28.4s , v0.4s |
| ADD v30.4s, v30.4s , v2.4s |
| |
| uMULL v0.4s, v6.4h, v9.4h |
| sshR v24.4s, v24.4s, #16 |
| sMLAL v24.4s, v5.4h, v9.4h |
| sshR v26.4s, v26.4s, #16 |
| sshR v0.4s, v0.4s, #16 |
| sMLAL v26.4s, v7.4h, v8.4h |
| sMLAL v0.4s, v7.4h, v9.4h |
| |
| |
| |
| |
| ADD v22.4s, v22.4s , v0.4s |
| NEG v22.4s, v22.4s |
| SUB v24.4s, v26.4s , v24.4s |
| |
| |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v14.4s, w11 |
| SQADD v28.4s, v28.4s , v14.4s |
| //LDR w11, [sp, #116] |
| MOV w11, w25 |
| //sxtw x11,w11 |
| dup v0.4s, w11 |
| sQshL v28.4s, v28.4s, v0.4s |
| |
| mov v0.16b, v22.16b |
| mov v14.16b, v24.16b |
| |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| uMULL v8.4s, v24.4h, v18.4h |
| uMULL v26.4s, v22.4h, v18.4h |
| |
| NEG v2.4s, v30.4s |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v3.d[0], v21.d[0] |
| |
| uMULL v4.4s, v30.4h, v12.4h |
| |
| uMULL v6.4s, v2.4h, v13.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| ushR v26.4s, v26.4s, #16 |
| |
| sMLAL v8.4s, v25.4h, v18.4h |
| sMLAL v26.4s, v23.4h, v18.4h |
| |
| ushR v4.4s, v4.4s, #16 |
| ushR v6.4s, v6.4s, #16 |
| |
| MOV v19.d[0], v30.d[1] |
| |
| sMLAL v4.4s, v19.4h, v12.4h |
| sMLAL v6.4s, v3.4h, v13.4h |
| |
| NEG v8.4s, v8.4s |
| ADD v14.4s, v14.4s , v26.4s |
| ADD v0.4s, v0.4s , v8.4s |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v8.4s, w11 |
| SQADD v0.4s, v0.4s , v8.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v26.4s, w11 |
| sQshL v0.4s, v0.4s, v26.4s |
| |
| mov v26.16b, v28.16b |
| |
| LD2 { v28.4s, v29.4s}, [x4] |
| MOV v30.16b, v29.16b |
| MOV v29.d[0], v28.d[1] |
| // VZIP.32 Q13, Q0 |
| |
| ZIP1 v19.4s, v26.4s, v0.4s |
| ZIP2 v0.4s, v26.4s, v0.4s |
| MOV v26.16b, v19.16b |
| |
| ST1 { v26.4s}, [x4], #16 |
| ST1 { v0.4s}, [x4], #16 |
| |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D13, D1 |
| |
| SADDL v0.4s, v13.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| sMULL v26.2d, v28.2s, v0.2s |
| Sqxtn v8.2s, v26.2d |
| sMULL v26.2d, v29.2s, v1.2s |
| Sqxtn v9.2s, v26.2d |
| MOV v8.d[1], v9.d[0] |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D12, D1 |
| SADDL v0.4s, v12.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| sMULL v24.2d, v28.2s, v0.2s |
| Sqxtn v26.2s, v24.2d |
| sMULL v24.2d, v29.2s, v1.2s |
| Sqxtn v27.2s, v24.2d |
| MOV v26.d[1], v27.d[0] |
| |
| sQshL v4.4s, v4.4s, v16.4s |
| sQshL v6.4s, v6.4s, v16.4s |
| |
| SQSUB v4.4s, v4.4s , v8.4s |
| SQSUB v6.4s, v6.4s , v26.4s |
| |
| NEG v26.4s, v14.4s |
| // VUZP.16 D14, D15 |
| |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| // VUZP.16 D26, D27 |
| |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D10, D1 |
| SADDL v0.4s, v10.4h, v1.4h |
| MOV v1.d[0], v0.d[0] |
| sMULL v22.2d, v30.2s, v0.2s |
| Sqxtn v24.2s, v22.2d |
| sMULL2 v22.2d, v30.4s, v0.4s |
| Sqxtn v25.2s, v22.2d |
| MOV v24.d[1], v25.d[0] |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D11, D1 |
| SADDL v0.4s, v11.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v8.2d, v30.2s, v0.2s |
| Sqxtn v22.2s, v8.2d |
| sMULL2 v8.2d, v30.4s, v0.4s |
| Sqxtn v23.2s, v8.2d |
| MOV v22.d[1], v23.d[0] |
| uMULL v8.4s, v26.4h, v11.4h |
| uMULL v30.4s, v14.4h, v10.4h |
| |
| LD2 { v0.4s, v1.4s}, [x1] |
| MOV v2.16b, v1.16b |
| ADD x1, x1, x12 |
| |
| // VUZP.16 D0, D1 |
| |
| UZP1 v19.8h, v0.8h, v0.8h |
| UZP2 v21.8h, v0.8h, v0.8h |
| MOV v0.d[0], v19.d[0] |
| MOV v0.d[1], v21.d[0] |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v2.d[1], v21.d[0] |
| |
| ushR v8.4s, v8.4s, #16 |
| |
| rev64 v0.8h, v0.8h |
| MOV v1.d[0], v0.d[1] |
| ushR v30.4s, v30.4s, #16 |
| |
| rev64 v2.8h, v2.8h |
| MOV v3.d[0], v2.d[1] |
| sMLAL v8.4s, v27.4h, v11.4h |
| |
| sMLAL v30.4s, v15.4h, v10.4h |
| |
| LD2 { v10.4s, v11.4s}, [x6] |
| ADD x6, x6, x12 |
| MOV v12.16b, v11.16b |
| sQshL v4.4s, v4.4s, #2 |
| |
| // VUZP.16 D10, D11 |
| |
| UZP1 v19.8h, v10.8h, v10.8h |
| UZP2 v21.8h, v10.8h, v10.8h |
| MOV v10.d[0], v19.d[0] |
| MOV v10.d[1], v21.d[0] |
| |
| sQshL v6.4s, v6.4s, #2 |
| |
| // VUZP.16 D12, D13 |
| |
| UZP1 v19.8h, v12.8h, v12.8h |
| UZP2 v21.8h, v12.8h, v12.8h |
| MOV v12.d[0], v19.d[0] |
| MOV v12.d[1], v21.d[0] |
| |
| SQADD v14.4s, v4.4s , v20.4s |
| |
| rev64 v10.8h, v10.8h |
| MOV v11.d[0], v10.d[1] |
| SQADD v6.4s, v6.4s , v20.4s |
| |
| rev64 v12.8h, v12.8h |
| MOV v13.d[0], v12.d[1] |
| sshR v14.4s, v14.4s, #16 |
| |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| sshR v6.4s, v6.4s, #16 |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| mov v15.8b, v6.8b |
| sQshL v8.4s, v8.4s, v16.4s |
| |
| LD2 { v4.4s, v5.4s}, [x8] |
| ADD x8, x8, #32 |
| MOV v6.16b, v5.16b |
| sQshL v30.4s, v30.4s, v16.4s |
| |
| // VUZP.16 D4, D5 |
| |
| UZP1 v19.8h, v4.8h, v4.8h |
| UZP2 v21.8h, v4.8h, v4.8h |
| MOV v4.d[0], v19.d[0] |
| MOV v5.d[0], v21.d[0] |
| |
| SQSUB v8.4s, v8.4s , v24.4s |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| SQSUB v22.4s, v30.4s , v22.4s |
| |
| sQshL v30.4s, v8.4s, #2 |
| |
| LD2 {v8.4h, v9.4h}, [x2] |
| ADD x2, x2, #16 |
| sQshL v22.4s, v22.4s, #2 |
| |
| SQADD v30.4s, v30.4s , v20.4s |
| SQADD v22.4s, v22.4s , v20.4s |
| |
| sshR v30.4s, v30.4s, #16 |
| |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| sshR v22.4s, v22.4s, #16 |
| |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| |
| mov v23.8b, v30.8b |
| |
| CORE_LOOP: |
| ST1 {v14.h}[0], [x0] |
| ADD x0, x0, x9 |
| uMULL v30.4s, v0.4h, v9.4h |
| |
| ST1 {v22.h}[0], [x0] |
| ADD x0, x0, x9 |
| uMULL v28.4s, v2.4h, v8.4h |
| |
| ST1 {v14.h}[1], [x0] |
| ADD x0, x0, x9 |
| uMULL v26.4s, v0.4h, v8.4h |
| |
| ST1 {v22.h}[1], [x0] |
| ADD x0, x0, x9 |
| uMULL v24.4s, v2.4h, v9.4h |
| |
| ST1 {v14.h}[2], [x0] |
| ADD x0, x0, x9 |
| ushR v30.4s, v30.4s, #16 |
| |
| ST1 {v22.h}[2], [x0] |
| ADD x0, x0, x9 |
| ushR v28.4s, v28.4s, #16 |
| |
| ST1 {v14.h}[3], [x0] |
| ADD x0, x0, x9 |
| sMLAL v30.4s, v1.4h, v9.4h |
| |
| ST1 {v22.h}[3], [x0] |
| ADD x0, x0, x9 |
| sMLAL v28.4s, v3.4h, v8.4h |
| |
| ST1 {v15.h}[0], [x5] |
| ADD x5, x5, x10 |
| ushR v26.4s, v26.4s, #16 |
| |
| ST1 {v23.h}[0], [x5] |
| ADD x5, x5, x10 |
| ushR v24.4s, v24.4s, #16 |
| |
| ST1 {v15.h}[1], [x5] |
| ADD x5, x5, x10 |
| sMLAL v26.4s, v1.4h, v8.4h |
| |
| ST1 {v23.h}[1], [x5] |
| ADD x5, x5, x10 |
| sMLAL v24.4s, v3.4h, v9.4h |
| |
| ST1 {v15.h}[2], [x5] |
| ADD x5, x5, x10 |
| ADD v30.4s, v30.4s , v28.4s |
| |
| ST1 {v23.h}[2], [x5] |
| ADD x5, x5, x10 |
| NEG v30.4s, v30.4s |
| |
| ST1 {v15.h}[3], [x5] |
| ADD x5, x5, x10 |
| |
| ST1 {v23.h}[3], [x5] |
| ADD x5, x5, x10 |
| SUB v28.4s, v24.4s , v26.4s |
| |
| |
| mov v26.16b, v30.16b |
| uMULL v22.4s, v4.4h, v8.4h |
| |
| mov v24.16b, v28.16b |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| |
| // VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| uMULL v2.4s, v24.4h, v18.4h |
| uMULL v0.4s, v26.4h, v18.4h |
| |
| ushR v22.4s, v22.4s, #16 |
| sMLAL v22.4s, v5.4h, v8.4h |
| |
| ushR v2.4s, v2.4s, #16 |
| ushR v0.4s, v0.4s, #16 |
| sMLAL v2.4s, v25.4h, v18.4h |
| sMLAL v0.4s, v27.4h, v18.4h |
| |
| uMULL v24.4s, v4.4h, v9.4h |
| uMULL v26.4s, v6.4h, v8.4h |
| |
| NEG v2.4s, v2.4s |
| ADD v28.4s, v28.4s , v0.4s |
| ADD v30.4s, v30.4s , v2.4s |
| |
| uMULL v0.4s, v6.4h, v9.4h |
| sshR v24.4s, v24.4s, #16 |
| sMLAL v24.4s, v5.4h, v9.4h |
| sshR v26.4s, v26.4s, #16 |
| sshR v0.4s, v0.4s, #16 |
| sMLAL v26.4s, v7.4h, v8.4h |
| sMLAL v0.4s, v7.4h, v9.4h |
| |
| |
| |
| ADD v22.4s, v22.4s , v0.4s |
| |
| NEG v22.4s, v22.4s |
| SUB v24.4s, v26.4s , v24.4s |
| |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v14.4s, w11 |
| SQADD v28.4s, v28.4s , v14.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v0.4s, w11 |
| sQshL v28.4s, v28.4s, v0.4s |
| |
| |
| mov v0.16b, v22.16b |
| mov v14.16b, v24.16b |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| uMULL v8.4s, v24.4h, v18.4h |
| uMULL v26.4s, v22.4h, v18.4h |
| |
| NEG v2.4s, v30.4s |
| |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v3.d[0], v21.d[0] |
| |
| uMULL v4.4s, v30.4h, v12.4h |
| uMULL v6.4s, v2.4h, v13.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| ushR v26.4s, v26.4s, #16 |
| |
| sMLAL v8.4s, v25.4h, v18.4h |
| sMLAL v26.4s, v23.4h, v18.4h |
| |
| ushR v4.4s, v4.4s, #16 |
| ushR v6.4s, v6.4s, #16 |
| |
| MOV v19.d[0], v30.d[1] |
| |
| sMLAL v4.4s, v19.4h, v12.4h |
| sMLAL v6.4s, v3.4h, v13.4h |
| |
| NEG v8.4s, v8.4s |
| ADD v14.4s, v14.4s , v26.4s |
| ADD v0.4s, v0.4s , v8.4s |
| |
| |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v8.4s, w11 |
| SQADD v0.4s, v0.4s , v8.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v26.4s, w11 |
| sQshL v0.4s, v0.4s, v26.4s |
| mov v26.16b, v28.16b |
| |
| LD2 { v28.4s, v29.4s}, [x4] |
| MOV v30.16b, v29.16b |
| MOV v29.d[0], v28.d[1] |
| // VZIP.32 Q13, Q0 |
| |
| ZIP1 v19.4s, v26.4s, v0.4s |
| ZIP2 v0.4s, v26.4s, v0.4s |
| MOV v26.16b, v19.16b |
| |
| ST1 { v26.4s}, [x4] |
| ADD x4, x4, #16 |
| ST1 { v0.4s}, [x4] |
| ADD x4, x4, #16 |
| |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D13, D1 |
| SADDL v0.4s, v13.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v26.2d, v28.2s, v0.2s |
| Sqxtn v8.2s, v26.2d |
| sMULL v26.2d, v29.2s, v1.2s |
| Sqxtn v9.2s, v26.2d |
| MOV v8.d[1], v9.d[0] |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D12, D1 |
| SADDL v0.4s, v12.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v24.2d, v28.2s, v0.2s |
| Sqxtn v26.2s, v24.2d |
| sMULL v24.2d, v29.2s, v1.2s |
| Sqxtn v27.2s, v24.2d |
| MOV v26.d[1], v27.d[0] |
| sQshL v4.4s, v4.4s, v16.4s |
| sQshL v6.4s, v6.4s, v16.4s |
| |
| |
| |
| SQSUB v4.4s, v4.4s , v8.4s |
| SQSUB v6.4s, v6.4s , v26.4s |
| |
| NEG v26.4s, v14.4s |
| // VUZP.16 D26, D27 |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D10, D1 |
| SADDL v0.4s, v10.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v22.2d, v30.2s, v0.2s |
| Sqxtn v24.2s, v22.2d |
| sMULL2 v22.2d, v30.4s, v0.4s |
| Sqxtn v25.2s, v22.2d |
| MOV v24.d[1], v25.d[0] |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D11, D1 |
| SADDL v0.4s, v11.4h, v1.4h |
| |
| sMULL v8.2d, v30.2s, v0.2s |
| Sqxtn v22.2s, v8.2d |
| sMULL2 v8.2d, v30.4s, v0.4s |
| Sqxtn v23.2s, v8.2d |
| MOV v22.d[1], v23.d[0] |
| |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| uMULL v8.4s, v26.4h, v11.4h |
| uMULL v30.4s, v14.4h, v10.4h |
| |
| |
| LD2 { v0.4s, v1.4s}, [x1] |
| MOV v2.16b, v1.16b |
| ADD X1, X1, x12 |
| |
| // VUZP.16 D0, D1 |
| UZP1 v19.8h, v0.8h, v0.8h |
| UZP2 v21.8h, v0.8h, v0.8h |
| MOV v0.d[0], v19.d[0] |
| MOV v0.d[1], v21.d[0] |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v2.d[1], v21.d[0] |
| |
| ushR v8.4s, v8.4s, #16 |
| |
| rev64 v0.8h, v0.8h |
| MOV v1.d[0], v0.d[1] |
| ushR v30.4s, v30.4s, #16 |
| |
| rev64 v2.8h, v2.8h |
| MOV v3.d[0], v2.d[1] |
| sMLAL v8.4s, v27.4h, v11.4h |
| |
| sMLAL v30.4s, v15.4h, v10.4h |
| |
| LD2 { v10.4s, v11.4s}, [x6] |
| add X6, x6, x12 |
| MOV v12.16b, v11.16b |
| sQshL v4.4s, v4.4s, #2 |
| |
| //VUZP.16 D10, D11 |
| |
| UZP1 v19.8h, v10.8h, v10.8h |
| UZP2 v21.8h, v10.8h, v10.8h |
| MOV v10.d[0], v19.d[0] |
| MOV v10.d[1], v21.d[0] |
| |
| sQshL v6.4s, v6.4s, #2 |
| |
| // VUZP.16 D12, D13 |
| |
| UZP1 v19.8h, v12.8h, v12.8h |
| UZP2 v21.8h, v12.8h, v12.8h |
| MOV v12.d[0], v19.d[0] |
| MOV v12.d[1], v21.d[0] |
| |
| |
| SQADD v14.4s, v4.4s , v20.4s |
| |
| rev64 v10.8h, v10.8h |
| MOV v11.d[0], v10.d[1] |
| SQADD v6.4s, v6.4s , v20.4s |
| |
| rev64 v12.8h, v12.8h |
| MOV v13.d[0], v12.d[1] |
| sshR v14.4s, v14.4s, #16 |
| |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| |
| sshR v6.4s, v6.4s, #16 |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| |
| mov v15.8b, v6.8b |
| sQshL v8.4s, v8.4s, v16.4s |
| |
| LD2 { v4.4s, v5.4s}, [x8] |
| ADD x8, x8, #32 |
| MOV v6.16b, v5.16b |
| |
| sQshL v30.4s, v30.4s, v16.4s |
| |
| // VUZP.16 D4, D5 |
| |
| UZP1 v19.8h, v4.8h, v4.8h |
| UZP2 v21.8h, v4.8h, v4.8h |
| MOV v4.d[0], v19.d[0] |
| MOV v5.d[0], v21.d[0] |
| |
| |
| SQSUB v8.4s, v8.4s , v24.4s |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| |
| SQSUB v22.4s, v30.4s , v22.4s |
| |
| sQshL v30.4s, v8.4s, #2 |
| |
| LD2 {v8.4h, v9.4h}, [x2] |
| ADD x2, x2, #16 |
| sQshL v22.4s, v22.4s, #2 |
| |
| SQADD v30.4s, v30.4s , v20.4s |
| SQADD v22.4s, v22.4s , v20.4s |
| |
| sshR v30.4s, v30.4s, #16 |
| |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| |
| sshR v22.4s, v22.4s, #16 |
| |
| |
| // VUZP.16 D22, D23 |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| |
| mov v23.8b, v30.8b |
| |
| SUBS x3, x3, #1 |
| BNE CORE_LOOP |
| |
| |
| |
| |
| |
| EPILOGUE: |
| |
| ST1 {v14.h}[0], [x0] |
| ADD x0, x0, x9 |
| uMULL v30.4s, v0.4h, v9.4h |
| |
| ST1 {v22.h}[0], [x0] |
| ADD x0, x0, x9 |
| uMULL v28.4s, v2.4h, v8.4h |
| |
| ST1 {v14.h}[1], [x0] |
| ADD x0, x0, x9 |
| uMULL v26.4s, v0.4h, v8.4h |
| |
| ST1 {v22.h}[1], [x0] |
| ADD x0, x0, x9 |
| uMULL v24.4s, v2.4h, v9.4h |
| |
| ST1 {v14.h}[2], [x0] |
| ADD x0, x0, x9 |
| ushR v30.4s, v30.4s, #16 |
| |
| ST1 {v22.h}[2], [x0] |
| ADD x0, x0, x9 |
| ushR v28.4s, v28.4s, #16 |
| |
| ST1 {v14.h}[3], [x0] |
| ADD x0, x0, x9 |
| sMLAL v30.4s, v1.4h, v9.4h |
| |
| ST1 {v22.h}[3], [x0] |
| ADD x0, x0, x9 |
| sMLAL v28.4s, v3.4h, v8.4h |
| |
| ST1 {v15.h}[0], [x5] |
| ADD x5, x5, x10 |
| ushR v26.4s, v26.4s, #16 |
| |
| ST1 {v23.h}[0], [x5] |
| ADD x5, x5, x10 |
| ushR v24.4s, v24.4s, #16 |
| |
| ST1 {v15.h}[1], [x5] |
| ADD x5, x5, x10 |
| sMLAL v26.4s, v1.4h, v8.4h |
| |
| ST1 {v23.h}[1], [x5] |
| ADD x5, x5, x10 |
| sMLAL v24.4s, v3.4h, v9.4h |
| |
| ST1 {v15.h}[2], [x5] |
| ADD x5, x5, x10 |
| ADD v30.4s, v30.4s , v28.4s |
| |
| ST1 {v23.h}[2], [x5] |
| ADD x5, x5, x10 |
| NEG v30.4s, v30.4s |
| |
| ST1 {v15.h}[3], [x5] |
| ADD x5, x5, x10 |
| |
| |
| ST1 {v23.h}[3], [x5] |
| ADD x5, x5, x10 |
| SUB v28.4s, v24.4s , v26.4s |
| |
| |
| uMULL v22.4s, v4.4h, v8.4h |
| mov v26.16b, v30.16b |
| mov v24.16b, v28.16b |
| |
| mov v26.16b, v30.16b |
| mov v24.16b, v28.16b |
| |
| //VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| uMULL v2.4s, v24.4h, v18.4h |
| uMULL v0.4s, v26.4h, v18.4h |
| |
| ushR v22.4s, v22.4s, #16 |
| sMLAL v22.4s, v5.4h, v8.4h |
| |
| ushR v2.4s, v2.4s, #16 |
| ushR v0.4s, v0.4s, #16 |
| sMLAL v2.4s, v25.4h, v18.4h |
| sMLAL v0.4s, v27.4h, v18.4h |
| |
| uMULL v24.4s, v4.4h, v9.4h |
| uMULL v26.4s, v6.4h, v8.4h |
| |
| NEG v2.4s, v2.4s |
| ADD v28.4s, v28.4s , v0.4s |
| ADD v30.4s, v30.4s , v2.4s |
| |
| uMULL v0.4s, v6.4h, v9.4h |
| sshR v24.4s, v24.4s, #16 |
| sMLAL v24.4s, v5.4h, v9.4h |
| sshR v26.4s, v26.4s, #16 |
| sshR v0.4s, v0.4s, #16 |
| sMLAL v26.4s, v7.4h, v8.4h |
| sMLAL v0.4s, v7.4h, v9.4h |
| |
| |
| |
| |
| |
| ADD v22.4s, v22.4s , v0.4s |
| NEG v22.4s, v22.4s |
| SUB v24.4s, v26.4s , v24.4s |
| |
| |
| |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v14.4s, w11 |
| SQADD v28.4s, v28.4s , v14.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v0.4s, w11 |
| sQshL v28.4s, v28.4s, v0.4s |
| |
| |
| mov v0.16b, v22.16b |
| mov v14.16b, v24.16b |
| |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| uMULL v8.4s, v24.4h, v18.4h |
| uMULL v26.4s, v22.4h, v18.4h |
| |
| NEG v2.4s, v30.4s |
| |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v3.d[0], v21.d[0] |
| |
| uMULL v4.4s, v30.4h, v12.4h |
| uMULL v6.4s, v2.4h, v13.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| ushR v26.4s, v26.4s, #16 |
| |
| sMLAL v8.4s, v25.4h, v18.4h |
| sMLAL v26.4s, v23.4h, v18.4h |
| |
| ushR v4.4s, v4.4s, #16 |
| ushR v6.4s, v6.4s, #16 |
| |
| MOV v19.d[0], v30.d[1] |
| |
| sMLAL v4.4s, v19.4h, v12.4h |
| sMLAL v6.4s, v3.4h, v13.4h |
| |
| NEG v8.4s, v8.4s |
| ADD v14.4s, v14.4s , v26.4s |
| ADD v0.4s, v0.4s , v8.4s |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v8.4s, w11 |
| SQADD v0.4s, v0.4s , v8.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v26.4s, w11 |
| sQshL v0.4s, v0.4s, v26.4s |
| |
| |
| mov v26.16b, v28.16b |
| |
| LD2 { v28.4s, v29.4s}, [x4] |
| MOV v30.16b, v29.16b |
| MOV v29.d[0], v28.d[1] |
| // VZIP.32 Q13, Q0 |
| |
| ZIP1 v19.4s, v26.4s, v0.4s |
| ZIP2 v0.4s, v26.4s, v0.4s |
| MOV v26.16b, v19.16b |
| |
| ST1 { v26.4s}, [x4], #16 |
| ST1 { v0.4s}, [x4], #16 |
| |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D13, D1 |
| SADDL v0.4s, v13.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v26.2d, v28.2s, v0.2s |
| Sqxtn v8.2s, v26.2d |
| sMULL v26.2d, v29.2s, v1.2s |
| Sqxtn v9.2s, v26.2d |
| MOV v8.d[1], v9.d[0] |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D12, D1 |
| SADDL v0.4s, v12.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v24.2d, v28.2s, v0.2s |
| Sqxtn v26.2s, v24.2d |
| sMULL v24.2d, v29.2s, v1.2s |
| Sqxtn v27.2s, v24.2d |
| MOV v26.d[1], v27.d[0] |
| |
| sQshL v4.4s, v4.4s, v16.4s |
| sQshL v6.4s, v6.4s, v16.4s |
| |
| SQSUB v4.4s, v4.4s , v8.4s |
| SQSUB v6.4s, v6.4s , v26.4s |
| |
| NEG v26.4s, v14.4s |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| |
| // VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D10, D1 |
| SADDL v0.4s, v10.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v22.2d, v30.2s, v0.2s |
| Sqxtn v24.2s, v22.2d |
| sMULL2 v22.2d, v30.4s, v0.4s |
| Sqxtn v25.2s, v22.2d |
| MOV v24.d[1], v25.d[0] |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D11, D1 |
| SADDL v0.4s, v11.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v8.2d, v30.2s, v0.2s |
| Sqxtn v22.2s, v8.2d |
| sMULL2 v8.2d, v30.4s, v0.4s |
| Sqxtn v23.2s, v8.2d |
| MOV v22.d[1], v23.d[0] |
| |
| uMULL v8.4s, v26.4h, v11.4h |
| uMULL v30.4s, v14.4h, v10.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| |
| ushR v30.4s, v30.4s, #16 |
| |
| sMLAL v8.4s, v27.4h, v11.4h |
| |
| sMLAL v30.4s, v15.4h, v10.4h |
| |
| sQshL v4.4s, v4.4s, #2 |
| |
| sQshL v6.4s, v6.4s, #2 |
| |
| SQADD v14.4s, v4.4s , v20.4s |
| |
| SQADD v6.4s, v6.4s , v20.4s |
| |
| sshR v14.4s, v14.4s, #16 |
| |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| sshR v6.4s, v6.4s, #16 |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| mov v15.8b, v6.8b |
| sQshL v8.4s, v8.4s, v16.4s |
| |
| sQshL v30.4s, v30.4s, v16.4s |
| |
| SQSUB v8.4s, v8.4s , v24.4s |
| |
| SQSUB v22.4s, v30.4s , v22.4s |
| |
| sQshL v30.4s, v8.4s, #2 |
| |
| sQshL v22.4s, v22.4s, #2 |
| |
| SQADD v30.4s, v30.4s , v20.4s |
| SQADD v22.4s, v22.4s , v20.4s |
| |
| sshR v30.4s, v30.4s, #16 |
| |
| //VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| sshR v22.4s, v22.4s, #16 |
| |
| // VUZP.16 D22, D23 |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| mov v23.8b, v30.8b |
| |
| |
| |
| |
| ST1 {v14.h}[0], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[0], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[1], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[1], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[2], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[2], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[3], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[3], [x0] |
| ADD x0, x0, x9 |
| ST1 {v15.h}[0], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[0], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[1], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[1], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[2], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[2], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[3], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[3], [x5] |
| ADD x5, x5, x10 |
| |
| ARM_EPILOGUE: |
| |
| ARM_LOOP: |
| |
| LD2 { v0.4s, v1.4s}, [x1] |
| MOV v2.16b, v1.16b |
| |
| //VUZP.16 D0, D1 |
| UZP1 v19.8h, v0.8h, v0.8h |
| UZP2 v21.8h, v0.8h, v0.8h |
| MOV v0.d[0], v19.d[0] |
| MOV v0.d[1], v21.d[0] |
| |
| //VUZP.16 D2, D3 |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v2.d[1], v21.d[0] |
| |
| |
| rev64 v0.8h, v0.8h |
| MOV v1.d[0], v0.d[1] |
| rev64 v2.8h, v2.8h |
| MOV v3.d[0], v2.d[1] |
| |
| LD2 {v8.4h, v9.4h}, [x2] |
| ADD x2, x2, #16 |
| |
| LD2 {v4.2s, v5.2s}, [x8] |
| ADD x8, x8, #16 |
| MOV v6.16b, v5.16b |
| movi v5.2s, #0x00000000 |
| movi v7.2s, #0x00000000 |
| |
| LD1 {v5.s}[0], [x8], #4 |
| LD1 {v7.s}[0], [x8] |
| |
| MOV x12, #16 |
| MOV v4.d[1], v5.d[0] |
| MOV v6.d[1], v7.d[0] |
| // VUZP.16 D4, D5 |
| |
| UZP1 v19.8h, v4.8h, v4.8h |
| UZP2 v21.8h, v4.8h, v4.8h |
| MOV v4.d[0], v19.d[0] |
| MOV v5.d[0], v21.d[0] |
| |
| // VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| ADD x6, x6, #16 |
| |
| MOV x12, #-4 |
| LD2 {v11.2s, v12.2s}, [x6] |
| ADD x6, x6, x12 |
| MOV v13.16b, v12.16b |
| |
| |
| movi v10.2s, #0x00000000 |
| |
| LD1 {v12.s}[1], [x6] |
| ADD x6, x6, x12 |
| LD1 {v10.s}[1], [x6] |
| ADD x6, x6, x12 |
| LD1 {v12.s}[0], [x6] |
| ADD x6, x6, x12 |
| |
| MOV v10.d[1], v11.d[0] |
| MOV v12.d[1], v13.d[0] |
| |
| //VUZP.16 D10, D11 |
| |
| UZP1 v19.8h, v10.8h, v10.8h |
| UZP2 v21.8h, v10.8h, v10.8h |
| MOV v10.d[0], v19.d[0] |
| MOV v10.d[1], v21.d[0] |
| |
| //VUZP.16 D12, D13 |
| |
| UZP1 v19.8h, v12.8h, v12.8h |
| UZP2 v21.8h, v12.8h, v12.8h |
| MOV v12.d[0], v19.d[0] |
| MOV v12.d[1], v21.d[0] |
| |
| |
| rev64 v10.8h, v10.8h |
| MOV v11.d[0], v10.d[1] |
| rev64 v12.8h, v12.8h |
| MOV v13.d[0], v12.d[1] |
| |
| uMULL v30.4s, v0.4h, v9.4h |
| uMULL v28.4s, v2.4h, v8.4h |
| uMULL v26.4s, v0.4h, v8.4h |
| uMULL v24.4s, v2.4h, v9.4h |
| |
| ushR v30.4s, v30.4s, #16 |
| ushR v28.4s, v28.4s, #16 |
| |
| sMLAL v30.4s, v1.4h, v9.4h |
| sMLAL v28.4s, v3.4h, v8.4h |
| |
| ushR v26.4s, v26.4s, #16 |
| ushR v24.4s, v24.4s, #16 |
| |
| sMLAL v26.4s, v1.4h, v8.4h |
| sMLAL v24.4s, v3.4h, v9.4h |
| |
| ADD v30.4s, v30.4s , v28.4s |
| NEG v30.4s, v30.4s |
| |
| uMULL v22.4s, v4.4h, v8.4h |
| |
| SUB v28.4s, v24.4s , v26.4s |
| |
| |
| mov v26.16b, v30.16b |
| mov v24.16b, v28.16b |
| |
| // VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| //VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| uMULL v2.4s, v24.4h, v18.4h |
| uMULL v0.4s, v26.4h, v18.4h |
| |
| ushR v22.4s, v22.4s, #16 |
| sMLAL v22.4s, v5.4h, v8.4h |
| |
| ushR v2.4s, v2.4s, #16 |
| ushR v0.4s, v0.4s, #16 |
| sMLAL v2.4s, v25.4h, v18.4h |
| sMLAL v0.4s, v27.4h, v18.4h |
| |
| uMULL v24.4s, v4.4h, v9.4h |
| uMULL v26.4s, v6.4h, v8.4h |
| |
| NEG v2.4s, v2.4s |
| ADD v28.4s, v28.4s , v0.4s |
| ADD v30.4s, v30.4s , v2.4s |
| |
| uMULL v0.4s, v6.4h, v9.4h |
| sshR v24.4s, v24.4s, #16 |
| sMLAL v24.4s, v5.4h, v9.4h |
| sshR v26.4s, v26.4s, #16 |
| sshR v0.4s, v0.4s, #16 |
| sMLAL v26.4s, v7.4h, v8.4h |
| sMLAL v0.4s, v7.4h, v9.4h |
| |
| ADD v22.4s, v22.4s , v0.4s |
| NEG v22.4s, v22.4s |
| SUB v24.4s, v26.4s , v24.4s |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v14.4s, w11 |
| SQADD v28.4s, v28.4s , v14.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v0.4s, w11 |
| sQshL v28.4s, v28.4s, v0.4s |
| |
| mov v0.16b, v22.16b |
| mov v14.16b, v24.16b |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| // VUZP.16 D24, D25 |
| |
| UZP1 v19.8h, v24.8h, v24.8h |
| UZP2 v21.8h, v24.8h, v24.8h |
| MOV v24.d[0], v19.d[0] |
| MOV v25.d[0], v21.d[0] |
| |
| uMULL v8.4s, v24.4h, v18.4h |
| uMULL v26.4s, v22.4h, v18.4h |
| |
| NEG v2.4s, v30.4s |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| // VUZP.16 D2, D3 |
| |
| UZP1 v19.8h, v2.8h, v2.8h |
| UZP2 v21.8h, v2.8h, v2.8h |
| MOV v2.d[0], v19.d[0] |
| MOV v3.d[0], v21.d[0] |
| |
| uMULL v4.4s, v30.4h, v12.4h |
| uMULL v6.4s, v2.4h, v13.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| ushR v26.4s, v26.4s, #16 |
| |
| sMLAL v8.4s, v25.4h, v18.4h |
| sMLAL v26.4s, v23.4h, v18.4h |
| |
| ushR v4.4s, v4.4s, #16 |
| ushR v6.4s, v6.4s, #16 |
| |
| MOV v19.d[0], v30.d[1] |
| |
| sMLAL v4.4s, v19.4h, v12.4h |
| sMLAL v6.4s, v3.4h, v13.4h |
| |
| NEG v8.4s, v8.4s |
| ADD v14.4s, v14.4s , v26.4s |
| ADD v0.4s, v0.4s , v8.4s |
| |
| //LDR w11, [sp, #120] |
| //sxtw x11,w11 |
| MOV w11, w26 |
| dup v8.4s, w11 |
| SQADD v0.4s, v0.4s , v8.4s |
| //LDR w11, [sp, #116] |
| //sxtw x11,w11 |
| MOV w11, w25 |
| dup v26.4s, w11 |
| sQshL v0.4s, v0.4s, v26.4s |
| |
| mov v26.16b, v28.16b |
| |
| MOV x6, x4 |
| |
| LD1 {v28.2s, v29.2s}, [x4], #16 |
| movi v19.2s, #0x00000000 |
| LD1 {v30.s}[0], [x4], #4 |
| LD1 {v30.s}[1], [x4], #4 |
| LD1 {v19.s}[0], [x4], #4 |
| |
| MOV v28.d[1], v29.d[0] |
| MOV v30.d[1], v19.d[0] |
| |
| //VUZP.32 Q14, Q15 |
| |
| UZP1 v19.4s, v28.4s, v30.4s |
| UZP2 v30.4s, v28.4s, v30.4s |
| MOV v28.16b, v19.16b |
| MOV v29.d[0], v28.d[1] |
| |
| ST1 {v26.s}[0], [x6], #4 |
| ST1 {v0.s}[0], [x6], #4 |
| ST1 {v26.s}[1], [x6], #4 |
| ST1 {v0.s}[1], [x6], #4 |
| ST1 {v26.s}[2], [x6], #4 |
| ST1 {v0.s}[2], [x6], #4 |
| ST1 {v26.s}[3], [x6], #4 |
| |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D13, D1 |
| SADDL v0.4s, v13.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v26.2d, v28.2s, v0.2s |
| Sqxtn v8.2s, v26.2d |
| sMULL v26.2d, v29.2s, v1.2s |
| Sqxtn v9.2s, v26.2d |
| MOV v8.d[1], v9.d[0] |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D12, D1 |
| SADDL v0.4s, v12.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v24.2d, v28.2s, v0.2s |
| Sqxtn v26.2s, v24.2d |
| sMULL v24.2d, v29.2s, v1.2s |
| Sqxtn v27.2s, v24.2d |
| MOV v26.d[1], v27.d[0] |
| |
| sQshL v4.4s, v4.4s, v16.4s |
| sQshL v6.4s, v6.4s, v16.4s |
| |
| SQSUB v4.4s, v4.4s , v8.4s |
| SQSUB v6.4s, v6.4s , v26.4s |
| |
| NEG v26.4s, v14.4s |
| //VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| // VUZP.16 D26, D27 |
| |
| UZP1 v19.8h, v26.8h, v26.8h |
| UZP2 v21.8h, v26.8h, v26.8h |
| MOV v26.d[0], v19.d[0] |
| MOV v27.d[0], v21.d[0] |
| |
| |
| movi v1.2s, #0 |
| //VADDL.S16 Q0, D10, D1 |
| SADDL v0.4s, v10.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v22.2d, v30.2s, v0.2s |
| Sqxtn v24.2s, v22.2d |
| sMULL2 v22.2d, v30.4s, v0.4s |
| Sqxtn v25.2s, v22.2d |
| MOV v24.d[1], v25.d[0] |
| |
| movi v1.2s, #0 |
| // VADDL.S16 Q0, D11, D1 |
| SADDL v0.4s, v11.4h, v1.4h |
| MOV v1.d[0], v0.d[1] |
| |
| sMULL v8.2d, v30.2s, v0.2s |
| Sqxtn v22.2s, v8.2d |
| sMULL2 v8.2d, v30.4s, v0.4s |
| Sqxtn v23.2s, v8.2d |
| MOV v22.d[1], v23.d[0] |
| |
| uMULL v8.4s, v26.4h, v11.4h |
| uMULL v30.4s, v14.4h, v10.4h |
| |
| ushR v8.4s, v8.4s, #16 |
| |
| ushR v30.4s, v30.4s, #16 |
| |
| sMLAL v8.4s, v27.4h, v11.4h |
| |
| sMLAL v30.4s, v15.4h, v10.4h |
| |
| sQshL v4.4s, v4.4s, #2 |
| |
| sQshL v6.4s, v6.4s, #2 |
| |
| SQADD v14.4s, v4.4s , v20.4s |
| |
| SQADD v6.4s, v6.4s , v20.4s |
| |
| sshR v14.4s, v14.4s, #16 |
| |
| // VUZP.16 D14, D15 |
| |
| UZP1 v19.8h, v14.8h, v14.8h |
| UZP2 v21.8h, v14.8h, v14.8h |
| MOV v14.d[0], v19.d[0] |
| MOV v15.d[0], v21.d[0] |
| |
| sshR v6.4s, v6.4s, #16 |
| |
| //VUZP.16 D6, D7 |
| |
| UZP1 v19.8h, v6.8h, v6.8h |
| UZP2 v21.8h, v6.8h, v6.8h |
| MOV v6.d[0], v19.d[0] |
| MOV v7.d[0], v21.d[0] |
| |
| mov v15.8b, v6.8b |
| sQshL v8.4s, v8.4s, v16.4s |
| |
| sQshL v30.4s, v30.4s, v16.4s |
| |
| SQSUB v8.4s, v8.4s , v24.4s |
| |
| SQSUB v22.4s, v30.4s , v22.4s |
| |
| sQshL v30.4s, v8.4s, #2 |
| |
| sQshL v22.4s, v22.4s, #2 |
| |
| SQADD v30.4s, v30.4s , v20.4s |
| SQADD v22.4s, v22.4s , v20.4s |
| |
| sshR v30.4s, v30.4s, #16 |
| |
| // VUZP.16 D30, D31 |
| |
| UZP1 v19.8h, v30.8h, v30.8h |
| UZP2 v21.8h, v30.8h, v30.8h |
| MOV v30.d[0], v19.d[0] |
| MOV v30.d[1], v21.d[0] |
| |
| sshR v22.4s, v22.4s, #16 |
| |
| // VUZP.16 D22, D23 |
| |
| UZP1 v19.8h, v22.8h, v22.8h |
| UZP2 v21.8h, v22.8h, v22.8h |
| MOV v22.d[0], v19.d[0] |
| MOV v23.d[0], v21.d[0] |
| |
| mov v23.8b, v30.8b |
| |
| |
| |
| |
| ST1 {v14.h}[0], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[0], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[1], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[1], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[2], [x0] |
| ADD x0, x0, x9 |
| ST1 {v22.h}[2], [x0] |
| ADD x0, x0, x9 |
| ST1 {v14.h}[3], [x0] |
| ADD x0, x0, x9 |
| |
| ST1 {v15.h}[0], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[0], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[1], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[1], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[2], [x5] |
| ADD x5, x5, x10 |
| ST1 {v23.h}[2], [x5] |
| ADD x5, x5, x10 |
| ST1 {v15.h}[3], [x5] |
| ADD x5, x5, x10 |
| |
| // VPOP {d8 - d15} |
| // LDMFD sp!, {x4-x12} |
| //ldp x19, x20,[sp],#16 |
| pop_v_regs |
| ret |
| //BX x14 |