| ///****************************************************************************** |
| // * |
| // * Copyright (C) 2018 The Android Open Source Project |
| // * |
| // * Licensed under the Apache License, Version 2.0 (the "License"); |
| // * you may not use this file except in compliance with the License. |
| // * You may obtain a copy of the License at: |
| // * |
| // * http://www.apache.org/licenses/LICENSE-2.0 |
| // * |
| // * Unless required by applicable law or agreed to in writing, software |
| // * distributed under the License is distributed on an "AS IS" BASIS, |
| // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // * See the License for the specific language governing permissions and |
| // * limitations under the License. |
| // * |
| // ***************************************************************************** |
| // * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| //*/ |
| |
| |
| .macro push_v_regs |
| stp q8, q9, [sp, #-32]! |
| stp q10, q11, [sp, #-32]! |
| stp q12, q13, [sp, #-32]! |
| stp q14, q15, [sp, #-32]! |
| stp X8, X9, [sp, #-16]! |
| stp X10, X11, [sp, #-16]! |
| stp X12, X13, [sp, #-16]! |
| stp X14, X15, [sp, #-16]! |
| stp X16, X17, [sp, #-16]! |
| stp X29, X30, [sp, #-16]! |
| .endm |
| .macro pop_v_regs |
| ldp X29, X30, [sp], #16 |
| ldp X16, X17, [sp], #16 |
| ldp X14, X15, [sp], #16 |
| ldp X12, X13, [sp], #16 |
| ldp X10, X11, [sp], #16 |
| ldp X8, X9, [sp], #16 |
| ldp q14, q15, [sp], #32 |
| ldp q12, q13, [sp], #32 |
| ldp q10, q11, [sp], #32 |
| ldp q8, q9, [sp], #32 |
| .endm |
| .text |
| .global ixheaacd_neg_shift_spec_armv8 |
| ixheaacd_neg_shift_spec_armv8: |
| push_v_regs |
| MOV X5, #448 |
| SUB X6, X5, #1 |
| LSL X6, X6, #2 |
| ADD X6, X6, X0 |
| MOV X8, #-16 |
| SUB X6, X6, #12 |
| LSL X7, X3, #1 |
| DUP V31.4S, W2 |
| MOV W4, #0x8000 |
| DUP V30.4S, W4 |
| |
| LD1 {V0.4S}, [X6], X8 |
| SQNEG V0.4S, V0.4S |
| |
| LD1 {V6.4S}, [X6], X8 |
| SQSHL V25.4S, V0.4S, V31.4S |
| SQADD V24.4S, V25.4S, V30.4S |
| SSHR V23.4S, V24.4S, #16 |
| REV64 V23.4S, V23.4S |
| SUB X5, X5, #8 |
| |
| UZP1 V27.8H, V23.8H, V23.8H |
| SQNEG V29.4S, V6.4S |
| |
| LOOP_1: |
| |
| ST1 {V27.H}[2], [X1], X7 |
| SQSHL V22.4S, V29.4S, V31.4S |
| LD1 {V0.4S}, [X6], X8 |
| ST1 {V27.H}[3], [X1], X7 |
| SQADD V21.4S, V22.4S, V30.4S |
| ST1 {V27.H}[0], [X1], X7 |
| SQNEG V0.4S, V0.4S |
| ST1 {V27.H}[1], [X1], X7 |
| SSHR V20.4S, V21.4S, #16 |
| REV64 V20.4S, V20.4S |
| SUBS X5, X5, #8 |
| |
| |
| UZP1 V27.8H, V20.8H, V20.8H |
| SQSHL V25.4S, V0.4S, V31.4S |
| ST1 {V27.H}[2], [X1], X7 |
| LD1 {V6.4S}, [X6], X8 |
| SQADD V24.4S, V25.4S, V30.4S |
| ST1 {V27.H}[3], [X1], X7 |
| SSHR V23.4S, V24.4S, #16 |
| ST1 {V27.H}[0], [X1], X7 |
| REV64 V23.4S, V23.4S |
| ST1 {V27.H}[1], [X1], X7 |
| |
| |
| UZP1 V27.8H, V23.8H, V23.8H |
| SQNEG V29.4S, V6.4S |
| |
| BGT LOOP_1 |
| |
| ST1 {V27.H}[2], [X1], X7 |
| SQSHL V22.4S, V29.4S, V31.4S |
| ST1 {V27.H}[3], [X1], X7 |
| ST1 {V27.H}[0], [X1], X7 |
| SQADD V21.4S, V22.4S, V30.4S |
| ST1 {V27.H}[1], [X1], X7 |
| SSHR V20.4S, V21.4S, #16 |
| |
| REV64 V20.4S, V20.4S |
| |
| UZP1 V27.8H, V20.8H, V20.8H |
| |
| ST1 {V27.H}[2], [X1], X7 |
| ST1 {V27.H}[3], [X1], X7 |
| ST1 {V27.H}[0], [X1], X7 |
| ST1 {V27.H}[1], [X1], X7 |
| pop_v_regs |
| RET |