@/* | |
@ ** Copyright 2003-2010, VisualOn, Inc. | |
@ ** | |
@ ** Licensed under the Apache License, Version 2.0 (the "License"); | |
@ ** you may not use this file except in compliance with the License. | |
@ ** You may obtain a copy of the License at | |
@ ** | |
@ ** http://www.apache.org/licenses/LICENSE-2.0 | |
@ ** | |
@ ** Unless required by applicable law or agreed to in writing, software | |
@ ** distributed under the License is distributed on an "AS IS" BASIS, | |
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
@ ** See the License for the specific language governing permissions and | |
@ ** limitations under the License. | |
@ */ | |
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
@ File: R4R8First_v7.s | |
@ | |
@ Content: Radix8First and Radix4First function armv7 assemble | |
@ | |
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
.section .text | |
.global Radix8First | |
Radix8First: | |
stmdb sp!, {r4 - r11, lr} | |
ldr r3, SQRT1_2 | |
cmp r1, #0 | |
VDUP.I32 Q15, r3 | |
beq Radix8First_END | |
Radix8First_LOOP: | |
VLD1.I32 {d0, d1, d2, d3}, [r0]! | |
VLD1.I32 {d8, d9, d10, d11}, [r0]! | |
VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ | |
VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ | |
VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ | |
VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ | |
VREV64.I32 d7, d7 | |
VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) | |
VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ | |
VREV64.I32 d3, d3 | |
VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ | |
VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ | |
VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ | |
VREV64.I32 d7, d7 | |
VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ | |
VTRN.32 d1, d3 | |
VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ | |
VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ | |
VREV64.I32 d3, d3 | |
VSHR.S32 d8, d8, #1 | |
VSHR.S32 Q0, Q0, #1 | |
VREV64.I32 d10, d10 | |
VTRN.32 d11, d9 | |
VSHR.S32 Q1, Q1, #1 | |
VSHR.S32 d10, d10, #1 | |
VREV64.I32 d9, d9 | |
sub r0, r0, #0x40 | |
VADD.S32 d12, d0, d8 | |
VSUB.S32 d16, d0, d8 | |
VADD.S32 d14, d2, d10 | |
VSUB.S32 d18, d2, d10 | |
VSUB.S32 d4, d11, d9 | |
VADD.S32 d5, d11, d9 | |
VREV64.I32 d18, d18 | |
VQDMULH.S32 Q3, Q2, Q15 | |
VTRN.32 d14, d18 | |
VTRN.32 d6, d7 | |
VREV64.I32 d18, d18 | |
VSUB.S32 d15, d3, d6 | |
VREV64.I32 d7, d7 | |
VADD.S32 d19, d3, d6 | |
VADD.S32 d13, d1, d7 | |
VSUB.S32 d17, d1, d7 | |
VREV64.I32 d17, d17 | |
VTRN.32 d13, d17 | |
VREV64.I32 d17, d17 | |
subs r1, r1, #1 | |
VST1.I32 {d12, d13, d14, d15}, [r0]! | |
VST1.I32 {d16, d17, d18, d19}, [r0]! | |
bne Radix8First_LOOP | |
Radix8First_END: | |
ldmia sp!, {r4 - r11, pc} | |
SQRT1_2: | |
.word 0x2d413ccd | |
@ENDP @ |Radix8First| | |
.section .text | |
.global Radix4First | |
Radix4First: | |
stmdb sp!, {r4 - r11, lr} | |
cmp r1, #0 | |
beq Radix4First_END | |
Radix4First_LOOP: | |
VLD1.I32 {d0, d1, d2, d3}, [r0] | |
VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ | |
VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ | |
VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ | |
VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ | |
VREV64.I32 d7, d7 @ | |
VADD.S32 Q4, Q2, Q3 | |
VSUB.S32 Q5, Q2, Q3 | |
VREV64.I32 d11, d11 | |
VTRN.32 d9, d11 | |
subs r1, r1, #1 | |
VREV64.I32 d11, d11 | |
VST1.I32 {d8, d9, d10, d11}, [r0]! | |
bne Radix4First_LOOP | |
Radix4First_END: | |
ldmia sp!, {r4 - r11, pc} | |
@ENDP @ |Radix4First| | |
.end |