blob: e37cab86b68dd98b9d5a285d2c5b6c1ba0f142cd [file] [log] [blame]
/* Intel SIMD MMX implementation of Viterbi ACS butterflies
for 256-state (k=9) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits);
*/
# These are offsets into struct v29, defined in viterbi29.h
.set DP,512
.set OLDMETRICS,516
.set NEWMETRICS,520
.text
.global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2
.type update_viterbi29_blk_mmx,@function
.align 16
# MMX (64-bit SIMD) version
# requires Pentium-MMX, Pentium-II or better
update_viterbi29_blk_mmx:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
movl 12(%ebp),%ebx # ebx = syms
movw (%ebx),%ax # ax = second symbol : first symbol
addl $2,%ebx
movl %ebx,12(%ebp)
movb %ah,%bl
andl $255,%eax
andl $255,%ebx
# shift into first array index dimension slot
shll $7,%eax
shll $7,%ebx
# each invocation of this macro will do 8 butterflies in parallel
.MACRO butterfly GROUP
# Compute branch metrics
movq (Mettab29_1+8*\GROUP)(%eax),%mm3
movq fifteens,%mm0
paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3
paddb ones,%mm3 # emulate pavgb - this may not be necessary
psrlq $1,%mm3
pand %mm0,%mm3
movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1
movq %mm6,%mm1
movq %mm2,%mm7
paddb %mm3,%mm6
paddb %mm3,%mm2
pxor %mm0,%mm3 # invert branch metric
paddb %mm3,%mm7 # path metric for inverted symbols
paddb %mm3,%mm1
# live registers 1 2 6 7
# Compare mm6 and mm7; mm1 and mm2
pxor %mm3,%mm3
movq %mm6,%mm4
movq %mm1,%mm5
psubb %mm7,%mm4 # mm4 = mm6 - mm7
psubb %mm2,%mm5 # mm5 = mm1 - mm2
pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
# live registers 1 2 4 5 6 7
# select survivors
movq %mm4,%mm0
pand %mm4,%mm7
movq %mm5,%mm3
pand %mm5,%mm2
pandn %mm6,%mm0
pandn %mm1,%mm3
por %mm0,%mm7 # mm7 = first set of survivors
por %mm3,%mm2 # mm2 = second set of survivors
# live registers 2 4 5 7
# interleave & store decisions in mm4, mm5
# interleave & store new branch metrics in mm2, mm7
movq %mm4,%mm3
movq %mm7,%mm0
punpckhbw %mm5,%mm4
punpcklbw %mm5,%mm3
punpcklbw %mm2,%mm7 # interleave second 8 new metrics
punpckhbw %mm2,%mm0 # interleave first 8 new metrics
movq %mm4,(16*\GROUP+8)(%edx)
movq %mm3,(16*\GROUP)(%edx)
movq %mm7,(16*\GROUP)(%edi)
movq %mm0,(16*\GROUP+8)(%edi)
.endm
# invoke macro 16 times for a total of 128 butterflies
butterfly GROUP=0
butterfly GROUP=1
butterfly GROUP=2
butterfly GROUP=3
butterfly GROUP=4
butterfly GROUP=5
butterfly GROUP=6
butterfly GROUP=7
butterfly GROUP=8
butterfly GROUP=9
butterfly GROUP=10
butterfly GROUP=11
butterfly GROUP=12
butterfly GROUP=13
butterfly GROUP=14
butterfly GROUP=15
addl $256,%edx # bump decision pointer
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: emms
movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 8
fifteens:
.byte 15,15,15,15,15,15,15,15
.align 8
ones: .byte 1,1,1,1,1,1,1,1