| .macro push_v_regs |
| stp d8, d9, [sp, #-16]! |
| stp d10, d11, [sp, #-16]! |
| stp d12, d13, [sp, #-16]! |
| stp d14, d15, [sp, #-16]! |
| stp X8, X9, [sp, #-16]! |
| stp X10, X11, [sp, #-16]! |
| stp X12, X13, [sp, #-16]! |
| stp X14, X15, [sp, #-16]! |
| stp X16, X17, [sp, #-16]! |
| stp X29, X30, [sp, #-16]! |
| .endm |
| .macro pop_v_regs |
| ldp X29, X30, [sp], #16 |
| ldp X16, X17, [sp], #16 |
| ldp X14, X15, [sp], #16 |
| ldp X12, X13, [sp], #16 |
| ldp X10, X11, [sp], #16 |
| ldp X8, X9, [sp], #16 |
| ldp d14, d15, [sp], #16 |
| ldp d12, d13, [sp], #16 |
| ldp d10, d11, [sp], #16 |
| ldp d8, d9, [sp], #16 |
| .endm |
| |
| .text |
| .p2align 2 |
| .global ixheaacd_sbr_qmfanal32_winadds |
| |
| ixheaacd_sbr_qmfanal32_winadds: // PROC |
| |
| // STMFD sp!, {x4-x12, x14} |
| push_v_regs |
| stp x19, x20, [sp, #-16]! |
| //VPUSH {D8 - D15} |
| //LDR w5, [SP, #108] //filterStates |
| //sxtw x5,w5 |
| //LDR w6, [SP, #112] //timeIn |
| //sxtw x6,w6 |
| //LDR w7, [SP, #116] //stride |
| //sxtw x7,w7 |
| |
| LSL x9, x7, #1 |
| |
| |
| MOV x20, x4 |
| ADD x5, x5, #64 |
| MOV w10, #3 |
| |
| //ADD x5, x5, #56 |
| //MOV x10, #1 |
| ////SUB x6, x6, x9 |
| //CMP x7, #1 |
| //MOV x11, #-8 |
| //BGT LOOP_SKIP_ODD |
| |
| LOOP: |
| LDRSH w4 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w8 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w11 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w12 , [x6] |
| ADD x6, x6, x9 |
| |
| STRH w4 , [x5 , #-2]! |
| STRH w8 , [x5 , #-2]! |
| STRH w11 , [x5 , #-2]! |
| STRH w12 , [x5 , #-2]! |
| |
| LDRSH w4 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w8 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w11 , [x6] |
| ADD x6, x6, x9 |
| LDRSH w12 , [x6] |
| ADD x6, x6, x9 |
| |
| STRH w4 , [x5 , #-2]! |
| STRH w8 , [x5 , #-2]! |
| STRH w11 , [x5 , #-2]! |
| STRH w12 , [x5 , #-2]! |
| SUBS w10, w10, #1 |
| |
| BPL LOOP |
| |
| |
| //LOOP: |
| // LD1 {v0.4h} , [x6], #8 |
| // LD1 {v1.4h} , [x6], #8 |
| // |
| // REV64 v4.4h , v0.4h |
| // REV64 v5.4h , v1.4h |
| // |
| // ST1 {v4.4h} , [x5] , x11 |
| // ST1 {v5.4h} , [x5] , x11 |
| // |
| // LD1 {v2.4h} , [x6], #8 |
| // LD1 {v3.4h} , [x6], #8 |
| // |
| // REV64 v6.4h , v2.4h |
| // REV64 v7.4h , v3.4h |
| // |
| // ST1 {v6.4h} , [x5] , x11 |
| // ST1 {v7.4h} , [x5] , x11 |
| // |
| // SUBS x10, x10, #1 |
| // BPL LOOP |
| // B SKIP_LOOP |
| // |
| //LOOP_SKIP_ODD: |
| // LD2 {v0.4h , v1.4h} , [x6], #16 |
| // LD2 {v2.4h , v3.4h} , [x6], #16 |
| // |
| // REV64 v1.4h , v0.4h |
| // REV64 v3.4h , v2.4h |
| // |
| // ST1 {v1.4h} , [x5], x11 |
| // ST1 {v3.4h} , [x5], x11 |
| // |
| // LD2 {v4.4h , v5.4h} , [x6], #16 |
| // LD2 {v6.4h , v7.4h} , [x6], #16 |
| // |
| // |
| // REV64 v5.4h , v4.4h |
| // REV64 v7.4h , v6.4h |
| // |
| // ST1 {v5.4h} , [x5], x11 |
| // ST1 {v7.4h} , [x5], x11 |
| // |
| // SUBS x10, x10, #1 |
| // BPL LOOP_SKIP_ODD |
| |
| SKIP_LOOP: |
| |
| //LDR w4, [SP, #104] //winAdd |
| // sxtw x4,w4 |
| |
| MOV x4, x20 |
| MOV x5, #8 |
| LD1 {v0.4h}, [x0], #8 |
| MOV x6, #64 |
| |
| LSL x6, x6, #1 |
| LD2 {v1.4h, v2.4h}, [x2], #16 |
| MOV x7, #244 |
| |
| MOV x9, x0 |
| ADD x0, x0, #120 |
| |
| MOV x11, x4 |
| LD1 {v2.4h}, [x0], x6 |
| ADD x11, x11, #128 |
| |
| |
| |
| |
| MOV x10, x2 |
| ADD x2, x2, #240 |
| |
| sMULL v30.4s, v0.4h, v1.4h |
| LD2 {v3.4h, v4.4h}, [x2], #16 |
| ADD x2, x2, #240 |
| |
| |
| LD1 {v4.4h}, [x0], x6 |
| sMLAL v30.4s, v2.4h, v3.4h |
| |
| LD2 {v5.4h, v6.4h}, [x2], #16 |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v6.4h}, [x0], x6 |
| sMLAL v30.4s, v4.4h, v5.4h |
| |
| LD2 {v7.4h, v8.4h}, [x2], #16 |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v8.4h}, [x0], x6 |
| sMLAL v30.4s, v6.4h, v7.4h |
| |
| MOV x0, x9 |
| LD2 {v9.4h, v10.4h}, [x2], #16 |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v10.4h}, [x1], #8 |
| sMLAL v30.4s, v8.4h, v9.4h |
| |
| |
| |
| MOV x9, x1 |
| LD2 {v11.4h, v12.4h}, [x3], #16 |
| ADD x1, x1, #120 |
| |
| |
| MOV x2, x10 |
| LD1 {v12.4h}, [x1], x6 |
| MOV x10, x3 |
| |
| ADD x3, x3, #240 |
| LD2 {v13.4h, v14.4h}, [x3], #16 |
| ADD x3, x3, #240 |
| |
| |
| LD2 {v15.4h, v16.4h}, [x3], #16 |
| |
| LD1 {v14.4h}, [x1], x6 |
| ADD x3, x3, #240 |
| |
| |
| |
| LD1 {v16.4h}, [x1], x6 |
| SUB x5, x5, #1 |
| |
| LD2 {v17.4h, v18.4h}, [x3], #16 |
| |
| |
| ADD x3, x3, #240 |
| LD1 {v18.4h}, [x1], x6 |
| |
| MOV x1, x9 |
| LD2 {v19.4h, v20.4h}, [x3], #16 |
| |
| ADD x3, x3, #240 |
| |
| MOV x3, x10 |
| |
| |
| LOOP_1: |
| |
| |
| LD1 {v0.4h}, [x0], #8 |
| |
| MOV x9, x0 |
| LD2 {v1.4h, v2.4h}, [x2], #16 |
| ADD x0, x0, #120 |
| |
| MOV x10, x2 |
| ST1 { v30.4s}, [x4], #16 |
| ADD x2, x2, #240 |
| |
| |
| sMULL v30.4s, v10.4h, v11.4h |
| LD1 {v2.4h}, [x0], x6 |
| sMLAL v30.4s, v12.4h, v13.4h |
| |
| sMLAL v30.4s, v14.4h, v15.4h |
| LD2 {v3.4h, v4.4h}, [x2], #16 |
| sMLAL v30.4s, v16.4h, v17.4h |
| |
| sMLAL v30.4s, v18.4h, v19.4h |
| LD1 {v4.4h}, [x0], x6 |
| ADD x2, x2, #240 |
| |
| ST1 { v30.4s}, [x11], #16 |
| |
| |
| sMULL v30.4s, v0.4h, v1.4h |
| LD2 {v5.4h, v6.4h}, [x2], #16 |
| sMLAL v30.4s, v2.4h, v3.4h |
| |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v6.4h}, [x0], x6 |
| sMLAL v30.4s, v4.4h, v5.4h |
| |
| LD2 {v7.4h, v8.4h}, [x2], #16 |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v8.4h}, [x0], x6 |
| sMLAL v30.4s, v6.4h, v7.4h |
| |
| MOV x0, x9 |
| LD2 {v9.4h, v10.4h}, [x2], #16 |
| |
| |
| |
| ADD x2, x2, #240 |
| LD1 {v10.4h}, [x1], #8 |
| MOV x2, x10 |
| |
| MOV x9, x1 |
| LD2 {v11.4h, v12.4h}, [x3], #16 |
| ADD x1, x1, #120 |
| |
| |
| sMLAL v30.4s, v8.4h, v9.4h |
| LD1 {v12.4h}, [x1], x6 |
| MOV x10, x3 |
| |
| |
| ADD x3, x3, #240 |
| LD2 {v13.4h, v14.4h}, [x3], #16 |
| ADD x3, x3, #240 |
| |
| |
| |
| LD1 {v14.4h}, [x1], x6 |
| LD2 {v15.4h, v16.4h}, [x3], #16 |
| ADD x3, x3, #240 |
| |
| |
| LD1 {v16.4h}, [x1], x6 |
| LD2 {v17.4h, v18.4h}, [x3], #16 |
| ADD x3, x3, #240 |
| |
| |
| LD1 {v18.4h}, [x1], x6 |
| SUBS x5, x5, #1 |
| |
| MOV x1, x9 |
| LD2 {v19.4h, v20.4h}, [x3], #16 |
| |
| ADD x3, x3, #240 |
| |
| MOV x3, x10 |
| |
| BGT LOOP_1 |
| |
| ST1 { v30.4s}, [x4], #16 |
| sMULL v30.4s, v10.4h, v11.4h |
| sMLAL v30.4s, v12.4h, v13.4h |
| |
| sMLAL v30.4s, v14.4h, v15.4h |
| sMLAL v30.4s, v16.4h, v17.4h |
| sMLAL v30.4s, v18.4h, v19.4h |
| |
| ST1 { v30.4s}, [x11], #16 |
| |
| //VPOP {D8 - D15} |
| // LDMFD sp!, {x4-x12, x15} |
| ldp x19, x20, [sp], #16 |
| pop_v_regs |
| ret |
| // ENDP |