blob: 146f02877fd0d5dfcfbf3330fe7b1636d86f6810 [file] [log] [blame]
.macro push_v_regs
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp d14, d15, [sp], #16
ldp d12, d13, [sp], #16
ldp d10, d11, [sp], #16
ldp d8, d9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_sbr_qmfanal32_winadds
ixheaacd_sbr_qmfanal32_winadds: // PROC
// STMFD sp!, {x4-x12, x14}
push_v_regs
stp x19, x20, [sp, #-16]!
//VPUSH {D8 - D15}
//LDR w5, [SP, #108] //filterStates
//sxtw x5,w5
//LDR w6, [SP, #112] //timeIn
//sxtw x6,w6
//LDR w7, [SP, #116] //stride
//sxtw x7,w7
LSL x9, x7, #1
MOV x20, x4
ADD x5, x5, #64
MOV w10, #3
//ADD x5, x5, #56
//MOV x10, #1
////SUB x6, x6, x9
//CMP x7, #1
//MOV x11, #-8
//BGT LOOP_SKIP_ODD
LOOP:
LDRSH w4 , [x6]
ADD x6, x6, x9
LDRSH w8 , [x6]
ADD x6, x6, x9
LDRSH w11 , [x6]
ADD x6, x6, x9
LDRSH w12 , [x6]
ADD x6, x6, x9
STRH w4 , [x5 , #-2]!
STRH w8 , [x5 , #-2]!
STRH w11 , [x5 , #-2]!
STRH w12 , [x5 , #-2]!
LDRSH w4 , [x6]
ADD x6, x6, x9
LDRSH w8 , [x6]
ADD x6, x6, x9
LDRSH w11 , [x6]
ADD x6, x6, x9
LDRSH w12 , [x6]
ADD x6, x6, x9
STRH w4 , [x5 , #-2]!
STRH w8 , [x5 , #-2]!
STRH w11 , [x5 , #-2]!
STRH w12 , [x5 , #-2]!
SUBS w10, w10, #1
BPL LOOP
//LOOP:
// LD1 {v0.4h} , [x6], #8
// LD1 {v1.4h} , [x6], #8
//
// REV64 v4.4h , v0.4h
// REV64 v5.4h , v1.4h
//
// ST1 {v4.4h} , [x5] , x11
// ST1 {v5.4h} , [x5] , x11
//
// LD1 {v2.4h} , [x6], #8
// LD1 {v3.4h} , [x6], #8
//
// REV64 v6.4h , v2.4h
// REV64 v7.4h , v3.4h
//
// ST1 {v6.4h} , [x5] , x11
// ST1 {v7.4h} , [x5] , x11
//
// SUBS x10, x10, #1
// BPL LOOP
// B SKIP_LOOP
//
//LOOP_SKIP_ODD:
// LD2 {v0.4h , v1.4h} , [x6], #16
// LD2 {v2.4h , v3.4h} , [x6], #16
//
// REV64 v1.4h , v0.4h
// REV64 v3.4h , v2.4h
//
// ST1 {v1.4h} , [x5], x11
// ST1 {v3.4h} , [x5], x11
//
// LD2 {v4.4h , v5.4h} , [x6], #16
// LD2 {v6.4h , v7.4h} , [x6], #16
//
//
// REV64 v5.4h , v4.4h
// REV64 v7.4h , v6.4h
//
// ST1 {v5.4h} , [x5], x11
// ST1 {v7.4h} , [x5], x11
//
// SUBS x10, x10, #1
// BPL LOOP_SKIP_ODD
SKIP_LOOP:
//LDR w4, [SP, #104] //winAdd
// sxtw x4,w4
MOV x4, x20
MOV x5, #8
LD1 {v0.4h}, [x0], #8
MOV x6, #64
LSL x6, x6, #1
LD2 {v1.4h, v2.4h}, [x2], #16
MOV x7, #244
MOV x9, x0
ADD x0, x0, #120
MOV x11, x4
LD1 {v2.4h}, [x0], x6
ADD x11, x11, #128
MOV x10, x2
ADD x2, x2, #240
sMULL v30.4s, v0.4h, v1.4h
LD2 {v3.4h, v4.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v4.4h}, [x0], x6
sMLAL v30.4s, v2.4h, v3.4h
LD2 {v5.4h, v6.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v6.4h}, [x0], x6
sMLAL v30.4s, v4.4h, v5.4h
LD2 {v7.4h, v8.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v8.4h}, [x0], x6
sMLAL v30.4s, v6.4h, v7.4h
MOV x0, x9
LD2 {v9.4h, v10.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v10.4h}, [x1], #8
sMLAL v30.4s, v8.4h, v9.4h
MOV x9, x1
LD2 {v11.4h, v12.4h}, [x3], #16
ADD x1, x1, #120
MOV x2, x10
LD1 {v12.4h}, [x1], x6
MOV x10, x3
ADD x3, x3, #240
LD2 {v13.4h, v14.4h}, [x3], #16
ADD x3, x3, #240
LD2 {v15.4h, v16.4h}, [x3], #16
LD1 {v14.4h}, [x1], x6
ADD x3, x3, #240
LD1 {v16.4h}, [x1], x6
SUB x5, x5, #1
LD2 {v17.4h, v18.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v18.4h}, [x1], x6
MOV x1, x9
LD2 {v19.4h, v20.4h}, [x3], #16
ADD x3, x3, #240
MOV x3, x10
LOOP_1:
LD1 {v0.4h}, [x0], #8
MOV x9, x0
LD2 {v1.4h, v2.4h}, [x2], #16
ADD x0, x0, #120
MOV x10, x2
ST1 { v30.4s}, [x4], #16
ADD x2, x2, #240
sMULL v30.4s, v10.4h, v11.4h
LD1 {v2.4h}, [x0], x6
sMLAL v30.4s, v12.4h, v13.4h
sMLAL v30.4s, v14.4h, v15.4h
LD2 {v3.4h, v4.4h}, [x2], #16
sMLAL v30.4s, v16.4h, v17.4h
sMLAL v30.4s, v18.4h, v19.4h
LD1 {v4.4h}, [x0], x6
ADD x2, x2, #240
ST1 { v30.4s}, [x11], #16
sMULL v30.4s, v0.4h, v1.4h
LD2 {v5.4h, v6.4h}, [x2], #16
sMLAL v30.4s, v2.4h, v3.4h
ADD x2, x2, #240
LD1 {v6.4h}, [x0], x6
sMLAL v30.4s, v4.4h, v5.4h
LD2 {v7.4h, v8.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v8.4h}, [x0], x6
sMLAL v30.4s, v6.4h, v7.4h
MOV x0, x9
LD2 {v9.4h, v10.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v10.4h}, [x1], #8
MOV x2, x10
MOV x9, x1
LD2 {v11.4h, v12.4h}, [x3], #16
ADD x1, x1, #120
sMLAL v30.4s, v8.4h, v9.4h
LD1 {v12.4h}, [x1], x6
MOV x10, x3
ADD x3, x3, #240
LD2 {v13.4h, v14.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v14.4h}, [x1], x6
LD2 {v15.4h, v16.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v16.4h}, [x1], x6
LD2 {v17.4h, v18.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v18.4h}, [x1], x6
SUBS x5, x5, #1
MOV x1, x9
LD2 {v19.4h, v20.4h}, [x3], #16
ADD x3, x3, #240
MOV x3, x10
BGT LOOP_1
ST1 { v30.4s}, [x4], #16
sMULL v30.4s, v10.4h, v11.4h
sMLAL v30.4s, v12.4h, v13.4h
sMLAL v30.4s, v14.4h, v15.4h
sMLAL v30.4s, v16.4h, v17.4h
sMLAL v30.4s, v18.4h, v19.4h
ST1 { v30.4s}, [x11], #16
//VPOP {D8 - D15}
// LDMFD sp!, {x4-x12, x15}
ldp x19, x20, [sp], #16
pop_v_regs
ret
// ENDP