| ///****************************************************************************** |
| // * |
| // * Copyright (C) 2018 The Android Open Source Project |
| // * |
| // * Licensed under the Apache License, Version 2.0 (the "License"); |
| // * you may not use this file except in compliance with the License. |
| // * You may obtain a copy of the License at: |
| // * |
| // * http://www.apache.org/licenses/LICENSE-2.0 |
| // * |
| // * Unless required by applicable law or agreed to in writing, software |
| // * distributed under the License is distributed on an "AS IS" BASIS, |
| // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // * See the License for the specific language governing permissions and |
| // * limitations under the License. |
| // * |
| // ***************************************************************************** |
| // * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| //*/ |
| |
| |
| .macro push_v_regs |
| stp q8, q9, [sp, #-32]! |
| stp q10, q11, [sp, #-32]! |
| stp q12, q13, [sp, #-32]! |
| stp q14, q15, [sp, #-32]! |
| stp x21, x22, [sp, #-16]! |
| stp x23, x24, [sp, #-16]! |
| .endm |
| .macro pop_v_regs |
| ldp x23, x24, [sp], #16 |
| ldp x21, x22, [sp], #16 |
| ldp q14, q15, [sp], #32 |
| ldp q12, q13, [sp], #32 |
| ldp q10, q11, [sp], #32 |
| ldp q8, q9, [sp], #32 |
| .endm |
| |
| .macro swp reg1, reg2 |
| MOV X16, \reg1 |
| MOV \reg1, \reg2 |
| MOV \reg2, x16 |
| .endm |
| .text |
| .global ixheaacd_sbr_qmfsyn64_winadd |
| |
| ixheaacd_sbr_qmfsyn64_winadd: |
| |
| push_v_regs |
| |
| |
| |
| MOV w7, #0x8000 |
| LD1 {v0.4h}, [x0], #8 |
| MOV x12, x2 |
| |
| dup v30.4s, w7 |
| LD1 {v1.4h}, [x2], #8 |
| dup v22.4s, w4 |
| |
| MOV x10, x0 |
| MOV x11, x2 |
| ADD x0, x0, #504 |
| ADD x2, x2, #248 |
| |
| NEG v28.4s, v22.4s |
| sshL v20.4s, v30.4s, v28.4s |
| MOV x6, #64 |
| LSL x6, x6, #1 |
| ADD x12, x12, x6 |
| MOV x7, #128 |
| LSL x9, x7, #1 |
| ADD x1, x1, x9 |
| MOV x6, #16 |
| MOV x7, #128 |
| LSL x9, x7, #1 |
| MOV x7, #256 |
| LSL x8, x7, #1 |
| |
| LSL x5, x5, #1 |
| LD1 {v2.4h}, [x0], x8 |
| mov v26.16b, v20.16b |
| |
| |
| sMLAL v26.4s, v0.4h, v1.4h |
| LD1 {v3.4h}, [x2], x9 |
| |
| LD1 {v4.4h}, [x0], x8 |
| sMLAL v26.4s, v2.4h, v3.4h |
| |
| LD1 {v5.4h}, [x2], x9 |
| |
| LD1 {v6.4h}, [x0], x8 |
| sMLAL v26.4s, v5.4h, v4.4h |
| |
| LD1 {v7.4h}, [x2], x9 |
| |
| LD1 {v8.4h}, [x0], x8 |
| sMLAL v26.4s, v7.4h, v6.4h |
| |
| LD1 {v9.4h}, [x2], x9 |
| MOV x0, x10 |
| |
| |
| MOV x2, x11 |
| LD1 {v10.4h}, [x1], #8 |
| sMLAL v26.4s, v9.4h, v8.4h |
| |
| MOV x10, x1 |
| LD1 {v11.4h}, [x12], #8 |
| ADD x1, x1, #504 |
| |
| |
| |
| MOV x11, x12 |
| LD1 {v12.4h}, [x1], x8 |
| ADD x12, x12, #248 |
| |
| sMLAL v26.4s, v10.4h, v11.4h |
| LD1 {v13.4h}, [x12], x9 |
| |
| LD1 {v14.4h}, [x1], x8 |
| sMLAL v26.4s, v12.4h, v13.4h |
| |
| LD1 {v15.4h}, [x12], x9 |
| |
| LD1 {v16.4h}, [x1], x8 |
| sMLAL v26.4s, v15.4h, v14.4h |
| |
| LD1 {v17.4h}, [x12], x9 |
| |
| LD1 {v18.4h}, [x1], x8 |
| sMLAL v26.4s, v17.4h, v16.4h |
| |
| LD1 {v19.4h}, [x12], x9 |
| |
| sMLAL v26.4s, v19.4h, v18.4h |
| LD1 {v0.4h}, [x0], #8 |
| MOV x12, x11 |
| |
| MOV x1, x10 |
| LD1 {v1.4h}, [x2], #8 |
| MOV x10, x0 |
| |
| sQshL v26.4s, v26.4s, v22.4s |
| |
| ADD x0, x0, #504 |
| |
| MOV x11, x2 |
| LD1 {v2.4h}, [x0], x8 |
| ADD x2, x2, #248 |
| |
| sshR v28.4s, v26.4s, #16 |
| LD1 {v3.4h}, [x2], x9 |
| |
| |
| UZP2 v29.8h, v28.8h, v28.8h |
| UZP1 v28.8h, v28.8h, v28.8h |
| mov v26.16b, v20.16b |
| |
| |
| |
| |
| LD1 {v4.4h}, [x0], x8 |
| LD1 {v5.4h}, [x2], x9 |
| |
| LD1 {v6.4h}, [x0], x8 |
| LD1 {v7.4h}, [x2], x9 |
| |
| LD1 {v8.4h}, [x0], x8 |
| LD1 {v9.4h}, [x2], x9 |
| MOV x0, x10 |
| |
| |
| MOV x2, x11 |
| LD1 {v10.4h}, [x1], #8 |
| |
| MOV x10, x1 |
| LD1 {v11.4h}, [x12], #8 |
| ADD x1, x1, #504 |
| |
| |
| MOV x11, x12 |
| LD1 {v12.4h}, [x1], x8 |
| ADD x12, x12, #248 |
| |
| |
| LD1 {v13.4h}, [x12], x9 |
| |
| LD1 {v14.4h}, [x1], x8 |
| LD1 {v15.4h}, [x12], x9 |
| |
| LD1 {v16.4h}, [x1], x8 |
| LD1 {v17.4h}, [x12], x9 |
| |
| LD1 {v18.4h}, [x1], x8 |
| SUB x6, x6, #2 |
| LD1 {v19.4h}, [x12], x9 |
| MOV x1, x10 |
| |
| MOV x12, x11 |
| |
| LOOP_1: |
| |
| sMLAL v26.4s, v0.4h, v1.4h |
| ST1 {v28.h}[0], [x3], x5 |
| |
| sMLAL v26.4s, v2.4h, v3.4h |
| LD1 {v0.4h}, [x0], #8 |
| sMLAL v26.4s, v5.4h, v4.4h |
| |
| sMLAL v26.4s, v7.4h, v6.4h |
| ST1 {v28.h}[1], [x3], x5 |
| |
| |
| MOV x10, x0 |
| LD1 {v1.4h}, [x2], #8 |
| ADD x0, x0, #504 |
| |
| sMLAL v26.4s, v9.4h, v8.4h |
| ST1 {v28.h}[2], [x3], x5 |
| |
| sMLAL v26.4s, v10.4h, v11.4h |
| ST1 {v28.h}[3], [x3], x5 |
| |
| MOV x11, x2 |
| LD1 {v2.4h}, [x0], x8 |
| ADD x2, x2, #248 |
| |
| sMLAL v26.4s, v12.4h, v13.4h |
| LD1 {v3.4h}, [x2], x9 |
| sMLAL v26.4s, v15.4h, v14.4h |
| |
| sMLAL v26.4s, v17.4h, v16.4h |
| LD1 {v4.4h}, [x0], x8 |
| sMLAL v26.4s, v19.4h, v18.4h |
| |
| LD1 {v5.4h}, [x2], x9 |
| |
| LD1 {v6.4h}, [x0], x8 |
| sQshL v26.4s, v26.4s, v22.4s |
| |
| sshR v28.4s, v26.4s, #16 |
| LD1 {v7.4h}, [x2], x9 |
| mov v26.16b, v20.16b |
| |
| |
| UZP2 v29.8h, v28.8h, v28.8h |
| UZP1 v28.8h, v28.8h, v28.8h |
| sMLAL v26.4s, v0.4h, v1.4h |
| |
| sMLAL v26.4s, v2.4h, v3.4h |
| LD1 {v8.4h}, [x0], x8 |
| sMLAL v26.4s, v5.4h, v4.4h |
| |
| sMLAL v26.4s, v7.4h, v6.4h |
| LD1 {v9.4h}, [x2], x9 |
| |
| |
| LD1 {v10.4h}, [x1], #8 |
| sMLAL v26.4s, v9.4h, v8.4h |
| |
| MOV x2, x11 |
| LD1 {v11.4h}, [x12], #8 |
| MOV x0, x10 |
| |
| MOV x10, x1 |
| |
| ADD x1, x1, #504 |
| |
| MOV x11, x12 |
| LD1 {v12.4h}, [x1], x8 |
| ADD x12, x12, #248 |
| |
| LD1 {v13.4h}, [x12], x9 |
| sMLAL v26.4s, v10.4h, v11.4h |
| |
| LD1 {v14.4h}, [x1], x8 |
| sMLAL v26.4s, v12.4h, v13.4h |
| |
| LD1 {v15.4h}, [x12], x9 |
| |
| LD1 {v16.4h}, [x1], x8 |
| sMLAL v26.4s, v15.4h, v14.4h |
| |
| LD1 {v17.4h}, [x12], x9 |
| |
| LD1 {v18.4h}, [x1], x8 |
| sMLAL v26.4s, v17.4h, v16.4h |
| |
| LD1 {v19.4h}, [x12], x9 |
| MOV x1, x10 |
| |
| sMLAL v26.4s, v19.4h, v18.4h |
| ST1 {v28.h}[0], [x3], x5 |
| |
| MOV x12, x11 |
| LD1 {v0.4h}, [x0], #8 |
| |
| LD1 {v1.4h}, [x2], #8 |
| sQshL v26.4s, v26.4s, v22.4s |
| |
| |
| ST1 {v28.h}[1], [x3], x5 |
| MOV x10, x0 |
| |
| ST1 {v28.h}[2], [x3], x5 |
| ADD x0, x0, #504 |
| |
| ST1 {v28.h}[3], [x3], x5 |
| MOV x11, x2 |
| |
| sshR v28.4s, v26.4s, #16 |
| LD1 {v2.4h}, [x0], x8 |
| ADD x2, x2, #248 |
| |
| LD1 {v3.4h}, [x2], x9 |
| LD1 {v4.4h}, [x0], x8 |
| LD1 {v5.4h}, [x2], x9 |
| LD1 {v6.4h}, [x0], x8 |
| LD1 {v7.4h}, [x2], x9 |
| LD1 {v8.4h}, [x0], x8 |
| LD1 {v9.4h}, [x2], x9 |
| |
| UZP2 v29.8h, v28.8h, v28.8h |
| UZP1 v28.8h, v28.8h, v28.8h |
| mov v26.16b, v20.16b |
| |
| |
| |
| |
| MOV x0, x10 |
| LD1 {v10.4h}, [x1], #8 |
| MOV x2, x11 |
| |
| MOV x10, x1 |
| LD1 {v11.4h}, [x12], #8 |
| ADD x1, x1, #504 |
| |
| |
| MOV x11, x12 |
| LD1 {v12.4h}, [x1], x8 |
| ADD x12, x12, #248 |
| |
| |
| LD1 {v13.4h}, [x12], x9 |
| |
| LD1 {v14.4h}, [x1], x8 |
| LD1 {v15.4h}, [x12], x9 |
| |
| LD1 {v16.4h}, [x1], x8 |
| LD1 {v17.4h}, [x12], x9 |
| |
| SUBS x6, x6, #2 |
| LD1 {v18.4h}, [x1], x8 |
| |
| MOV x1, x10 |
| LD1 {v19.4h}, [x12], x9 |
| |
| MOV x12, x11 |
| |
| |
| BGT LOOP_1 |
| |
| sMLAL v26.4s, v0.4h, v1.4h |
| ST1 {v28.h}[0], [x3], x5 |
| sMLAL v26.4s, v2.4h, v3.4h |
| |
| sMLAL v26.4s, v5.4h, v4.4h |
| ST1 {v28.h}[1], [x3], x5 |
| sMLAL v26.4s, v7.4h, v6.4h |
| |
| sMLAL v26.4s, v9.4h, v8.4h |
| ST1 {v28.h}[2], [x3], x5 |
| sMLAL v26.4s, v10.4h, v11.4h |
| |
| sMLAL v26.4s, v12.4h, v13.4h |
| ST1 {v28.h}[3], [x3], x5 |
| sMLAL v26.4s, v15.4h, v14.4h |
| |
| |
| |
| sMLAL v26.4s, v17.4h, v16.4h |
| |
| sMLAL v26.4s, v19.4h, v18.4h |
| |
| sQshL v26.4s, v26.4s, v22.4s |
| |
| sshR v28.4s, v26.4s, #16 |
| |
| UZP2 v29.8h, v28.8h, v28.8h |
| UZP1 v28.8h, v28.8h, v28.8h |
| |
| |
| ST1 {v28.h}[0], [x3], x5 |
| ST1 {v28.h}[1], [x3], x5 |
| ST1 {v28.h}[2], [x3], x5 |
| ST1 {v28.h}[3], [x3], x5 |
| |
| |
| pop_v_regs |
| ret |
| |