| /*
|
| Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
| This program is free software; you can redistribute it and/or
|
| modify it under the terms of the GNU General Public License
|
| as published by the Free Software Foundation; either version 2
|
| of the License, or (at your option) any later version.
|
|
|
| This program is distributed in the hope that it will be useful,
|
| but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
| See the GNU General Public License for more details.
|
|
|
| You should have received a copy of the GNU General Public License
|
| along with this program; if not, write to the Free Software
|
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
| */
|
| // |
| // d_draw.s |
| // x86 assembly-language horizontal 8-bpp span-drawing code. |
| // |
| |
| #include "asm_i386.h" |
| #include "quakeasm.h" |
| #include "asm_draw.h" |
| #include "d_ifacea.h" |
| |
| #if id386 |
| |
| //---------------------------------------------------------------------- |
| // 8-bpp horizontal span drawing code for polygons, with no transparency. |
| // |
| // Assumes there is at least one span in pspans, and that every span |
| // contains at least one pixel |
| //---------------------------------------------------------------------- |
| |
| .text |
| |
| // out-of-line, rarely-needed clamping code |
| |
| LClampHigh0: |
| movl C(bbextents),%esi |
| jmp LClampReentry0 |
| LClampHighOrLow0: |
| jg LClampHigh0 |
| xorl %esi,%esi |
| jmp LClampReentry0 |
| |
| LClampHigh1: |
| movl C(bbextentt),%edx |
| jmp LClampReentry1 |
| LClampHighOrLow1: |
| jg LClampHigh1 |
| xorl %edx,%edx |
| jmp LClampReentry1 |
| |
| LClampLow2: |
| movl $2048,%ebp |
| jmp LClampReentry2 |
| LClampHigh2: |
| movl C(bbextents),%ebp |
| jmp LClampReentry2 |
| |
| LClampLow3: |
| movl $2048,%ecx |
| jmp LClampReentry3 |
| LClampHigh3: |
| movl C(bbextentt),%ecx |
| jmp LClampReentry3 |
| |
| LClampLow4: |
| movl $2048,%eax |
| jmp LClampReentry4 |
| LClampHigh4: |
| movl C(bbextents),%eax |
| jmp LClampReentry4 |
| |
| LClampLow5: |
| movl $2048,%ebx |
| jmp LClampReentry5 |
| LClampHigh5: |
| movl C(bbextentt),%ebx |
| jmp LClampReentry5 |
| |
| |
| #define pspans 4+16 |
| |
| .align 4 |
| .globl C(D_DrawSpans8) |
| C(D_DrawSpans8): |
| pushl %ebp // preserve caller's stack frame |
| pushl %edi |
| pushl %esi // preserve register variables |
| pushl %ebx |
| |
| // |
| // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock |
| // and span list pointers |
| // |
| // TODO: any overlap from rearranging? |
| flds C(d_sdivzstepu) |
| fmuls fp_8 |
| movl C(cacheblock),%edx |
| flds C(d_tdivzstepu) |
| fmuls fp_8 |
| movl pspans(%esp),%ebx // point to the first span descriptor |
| flds C(d_zistepu) |
| fmuls fp_8 |
| movl %edx,pbase // pbase = cacheblock |
| fstps zi8stepu |
| fstps tdivz8stepu |
| fstps sdivz8stepu |
| |
| LSpanLoop: |
| // |
| // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the |
| // initial s and t values |
| // |
| // FIXME: pipeline FILD? |
| fildl espan_t_v(%ebx) |
| fildl espan_t_u(%ebx) |
| |
| fld %st(1) // dv | du | dv |
| fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv |
| fld %st(1) // du | dv*d_sdivzstepv | du | dv |
| fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
| fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
| fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | |
| // dv*d_sdivzstepv | du | dv |
| fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | |
| // dv*d_sdivzstepv | du | dv |
| faddp %st(0),%st(2) // du*d_tdivzstepu | |
| // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv |
| fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | |
| // du*d_tdivzstepu | du | dv |
| fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | |
| // du*d_tdivzstepu | du | dv |
| fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | |
| // du*d_sdivzstepu + dv*d_sdivzstepv | |
| // du*d_tdivzstepu | du | dv |
| fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | |
| // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv |
| fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + |
| // du*d_sdivzstepu; stays in %st(2) at end |
| fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | |
| // s/z |
| fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | |
| // du*d_tdivzstepu | du | s/z |
| fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | |
| // du*d_tdivzstepu | du | s/z |
| faddp %st(0),%st(2) // dv*d_zistepv | |
| // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z |
| fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | |
| // dv*d_zistepv | s/z |
| fmuls C(d_zistepu) // du*d_zistepu | |
| // dv*d_tdivzstepv + du*d_tdivzstepu | |
| // dv*d_zistepv | s/z |
| fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | |
| // du*d_zistepu | dv*d_zistepv | s/z |
| fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + |
| // du*d_tdivzstepu; stays in %st(1) at end |
| fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z |
| faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z |
| |
| flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z |
| fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z |
| fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + |
| // du*d_zistepu; stays in %st(0) at end |
| // 1/z | fp_64k | t/z | s/z |
| // |
| // calculate and clamp s & t |
| // |
| fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z |
| |
| // |
| // point %edi to the first pixel in the span |
| // |
| movl C(d_viewbuffer),%ecx |
| movl espan_t_v(%ebx),%eax |
| movl %ebx,pspantemp // preserve spans pointer |
| |
| movl C(tadjust),%edx |
| movl C(sadjust),%esi |
| movl C(d_scantable)(,%eax,4),%edi // v * screenwidth |
| addl %ecx,%edi |
| movl espan_t_u(%ebx),%ecx |
| addl %ecx,%edi // pdest = &pdestspan[scans->u]; |
| movl espan_t_count(%ebx),%ecx |
| |
| // |
| // now start the FDIV for the end of the span |
| // |
| cmpl $8,%ecx |
| ja LSetupNotLast1 |
| |
| decl %ecx |
| jz LCleanup1 // if only one pixel, no need to start an FDIV |
| movl %ecx,spancountminus1 |
| |
| // finish up the s and t calcs |
| fxch %st(1) // z*64k | 1/z | t/z | s/z |
| |
| fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
| fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
| fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
| fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
| fxch %st(1) // s | t | 1/z | t/z | s/z |
| fistpl s // 1/z | t | t/z | s/z |
| fistpl t // 1/z | t/z | s/z |
| |
| fildl spancountminus1 |
| |
| flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 |
| flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 |
| fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 |
| fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 |
| fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 |
| fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 |
| fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | |
| // C(d_tdivzstepu)*scm1 |
| fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | |
| // C(d_tdivzstepu)*scm1 |
| faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 |
| fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 |
| faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 |
| faddp %st(0),%st(3) |
| |
| flds fp_64k |
| fdiv %st(1),%st(0) // this is what we've gone to all this trouble to |
| // overlap |
| jmp LFDIVInFlight1 |
| |
| LCleanup1: |
| // finish up the s and t calcs |
| fxch %st(1) // z*64k | 1/z | t/z | s/z |
| |
| fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
| fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
| fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
| fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
| fxch %st(1) // s | t | 1/z | t/z | s/z |
| fistpl s // 1/z | t | t/z | s/z |
| fistpl t // 1/z | t/z | s/z |
| jmp LFDIVInFlight1 |
| |
| .align 4 |
| LSetupNotLast1: |
| // finish up the s and t calcs |
| fxch %st(1) // z*64k | 1/z | t/z | s/z |
| |
| fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
| fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
| fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
| fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
| fxch %st(1) // s | t | 1/z | t/z | s/z |
| fistpl s // 1/z | t | t/z | s/z |
| fistpl t // 1/z | t/z | s/z |
| |
| fadds zi8stepu |
| fxch %st(2) |
| fadds sdivz8stepu |
| fxch %st(2) |
| flds tdivz8stepu |
| faddp %st(0),%st(2) |
| flds fp_64k |
| fdiv %st(1),%st(0) // z = 1/1/z |
| // this is what we've gone to all this trouble to |
| // overlap |
| LFDIVInFlight1: |
| |
| addl s,%esi |
| addl t,%edx |
| movl C(bbextents),%ebx |
| movl C(bbextentt),%ebp |
| cmpl %ebx,%esi |
| ja LClampHighOrLow0 |
| LClampReentry0: |
| movl %esi,s |
| movl pbase,%ebx |
| shll $16,%esi |
| cmpl %ebp,%edx |
| movl %esi,sfracf |
| ja LClampHighOrLow1 |
| LClampReentry1: |
| movl %edx,t |
| movl s,%esi // sfrac = scans->sfrac; |
| shll $16,%edx |
| movl t,%eax // tfrac = scans->tfrac; |
| sarl $16,%esi |
| movl %edx,tfracf |
| |
| // |
| // calculate the texture starting address |
| // |
| sarl $16,%eax |
| movl C(cachewidth),%edx |
| imull %edx,%eax // (tfrac >> 16) * cachewidth |
| addl %ebx,%esi |
| addl %eax,%esi // psource = pbase + (sfrac >> 16) + |
| // ((tfrac >> 16) * cachewidth); |
| |
| // |
| // determine whether last span or not |
| // |
| cmpl $8,%ecx |
| jna LLastSegment |
| |
| // |
| // not the last segment; do full 8-wide segment |
| // |
| LNotLastSegment: |
| |
| // |
| // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
| // get there |
| // |
| |
| // pick up after the FDIV that was left in flight previously |
| |
| fld %st(0) // duplicate it |
| fmul %st(4),%st(0) // s = s/z * z |
| fxch %st(1) |
| fmul %st(3),%st(0) // t = t/z * z |
| fxch %st(1) |
| fistpl snext |
| fistpl tnext |
| movl snext,%eax |
| movl tnext,%edx |
| |
| movb (%esi),%bl // get first source texel |
| subl $8,%ecx // count off this segments' pixels |
| movl C(sadjust),%ebp |
| movl %ecx,counttemp // remember count of remaining pixels |
| |
| movl C(tadjust),%ecx |
| movb %bl,(%edi) // store first dest pixel |
| |
| addl %eax,%ebp |
| addl %edx,%ecx |
| |
| movl C(bbextents),%eax |
| movl C(bbextentt),%edx |
| |
| cmpl $2048,%ebp |
| jl LClampLow2 |
| cmpl %eax,%ebp |
| ja LClampHigh2 |
| LClampReentry2: |
| |
| cmpl $2048,%ecx |
| jl LClampLow3 |
| cmpl %edx,%ecx |
| ja LClampHigh3 |
| LClampReentry3: |
| |
| movl %ebp,snext |
| movl %ecx,tnext |
| |
| subl s,%ebp |
| subl t,%ecx |
| |
| // |
| // set up advancetable |
| // |
| movl %ecx,%eax |
| movl %ebp,%edx |
| sarl $19,%eax // tstep >>= 16; |
| jz LZero |
| sarl $19,%edx // sstep >>= 16; |
| movl C(cachewidth),%ebx |
| imull %ebx,%eax |
| jmp LSetUp1 |
| |
| LZero: |
| sarl $19,%edx // sstep >>= 16; |
| movl C(cachewidth),%ebx |
| |
| LSetUp1: |
| |
| addl %edx,%eax // add in sstep |
| // (tstep >> 16) * cachewidth + (sstep >> 16); |
| movl tfracf,%edx |
| movl %eax,advancetable+4 // advance base in t |
| addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + |
| // (sstep >> 16); |
| shll $13,%ebp // left-justify sstep fractional part |
| movl sfracf,%ebx |
| shll $13,%ecx // left-justify tstep fractional part |
| movl %eax,advancetable // advance extra in t |
| |
| movl %ecx,tstep |
| addl %ecx,%edx // advance tfrac fractional part by tstep frac |
| |
| sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) |
| addl %ebp,%ebx // advance sfrac fractional part by sstep frac |
| adcl advancetable+4(,%ecx,4),%esi // point to next source texel |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb (%esi),%al |
| addl %ebp,%ebx |
| movb %al,1(%edi) |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,2(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,3(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| |
| // |
| // start FDIV for end of next segment in flight, so it can overlap |
| // |
| movl counttemp,%ecx |
| cmpl $8,%ecx // more than one segment after this? |
| ja LSetupNotLast2 // yes |
| |
| decl %ecx |
| jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV |
| movl %ecx,spancountminus1 |
| fildl spancountminus1 |
| |
| flds C(d_zistepu) // C(d_zistepu) | spancountminus1 |
| fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 |
| flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 |
| fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 |
| fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 |
| faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 |
| fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 |
| fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 |
| fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 |
| faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 |
| flds fp_64k // 64k | C(d_sdivzstepu)*scm1 |
| fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k |
| faddp %st(0),%st(4) // 64k |
| |
| fdiv %st(1),%st(0) // this is what we've gone to all this trouble to |
| // overlap |
| jmp LFDIVInFlight2 |
| |
| .align 4 |
| LSetupNotLast2: |
| fadds zi8stepu |
| fxch %st(2) |
| fadds sdivz8stepu |
| fxch %st(2) |
| flds tdivz8stepu |
| faddp %st(0),%st(2) |
| flds fp_64k |
| fdiv %st(1),%st(0) // z = 1/1/z |
| // this is what we've gone to all this trouble to |
| // overlap |
| LFDIVInFlight2: |
| movl %ecx,counttemp |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,4(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,5(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,6(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl $8,%edi |
| movl %edx,tfracf |
| movl snext,%edx |
| movl %ebx,sfracf |
| movl tnext,%ebx |
| movl %edx,s |
| movl %ebx,t |
| |
| movl counttemp,%ecx // retrieve count |
| |
| // |
| // determine whether last span or not |
| // |
| cmpl $8,%ecx // are there multiple segments remaining? |
| movb %al,-1(%edi) |
| ja LNotLastSegment // yes |
| |
| // |
| // last segment of scan |
| // |
| LLastSegment: |
| |
| // |
| // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
| // get there. The number of pixels left is variable, and we want to land on the |
| // last pixel, not step one past it, so we can't run into arithmetic problems |
| // |
| testl %ecx,%ecx |
| jz LNoSteps // just draw the last pixel and we're done |
| |
| // pick up after the FDIV that was left in flight previously |
| |
| |
| fld %st(0) // duplicate it |
| fmul %st(4),%st(0) // s = s/z * z |
| fxch %st(1) |
| fmul %st(3),%st(0) // t = t/z * z |
| fxch %st(1) |
| fistpl snext |
| fistpl tnext |
| |
| movb (%esi),%al // load first texel in segment |
| movl C(tadjust),%ebx |
| movb %al,(%edi) // store first pixel in segment |
| movl C(sadjust),%eax |
| |
| addl snext,%eax |
| addl tnext,%ebx |
| |
| movl C(bbextents),%ebp |
| movl C(bbextentt),%edx |
| |
| cmpl $2048,%eax |
| jl LClampLow4 |
| cmpl %ebp,%eax |
| ja LClampHigh4 |
| LClampReentry4: |
| movl %eax,snext |
| |
| cmpl $2048,%ebx |
| jl LClampLow5 |
| cmpl %edx,%ebx |
| ja LClampHigh5 |
| LClampReentry5: |
| |
| cmpl $1,%ecx // don't bother |
| je LOnlyOneStep // if two pixels in segment, there's only one step, |
| // of the segment length |
| subl s,%eax |
| subl t,%ebx |
| |
| addl %eax,%eax // convert to 15.17 format so multiply by 1.31 |
| addl %ebx,%ebx // reciprocal yields 16.48 |
| |
| imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) |
| movl %edx,%ebp |
| |
| movl %ebx,%eax |
| imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) |
| |
| LSetEntryvec: |
| // |
| // set up advancetable |
| // |
| movl entryvec_table(,%ecx,4),%ebx |
| movl %edx,%eax |
| movl %ebx,jumptemp // entry point into code for RET later |
| movl %ebp,%ecx |
| sarl $16,%edx // tstep >>= 16; |
| movl C(cachewidth),%ebx |
| sarl $16,%ecx // sstep >>= 16; |
| imull %ebx,%edx |
| |
| addl %ecx,%edx // add in sstep |
| // (tstep >> 16) * cachewidth + (sstep >> 16); |
| movl tfracf,%ecx |
| movl %edx,advancetable+4 // advance base in t |
| addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + |
| // (sstep >> 16); |
| shll $16,%ebp // left-justify sstep fractional part |
| movl sfracf,%ebx |
| shll $16,%eax // left-justify tstep fractional part |
| movl %edx,advancetable // advance extra in t |
| |
| movl %eax,tstep |
| movl %ecx,%edx |
| addl %eax,%edx |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| jmp *jumptemp // jump to the number-of-pixels handler |
| |
| //---------------------------------------- |
| |
| LNoSteps: |
| movb (%esi),%al // load first texel in segment |
| subl $7,%edi // adjust for hardwired offset |
| jmp LEndSpan |
| |
| |
| LOnlyOneStep: |
| subl s,%eax |
| subl t,%ebx |
| movl %eax,%ebp |
| movl %ebx,%edx |
| jmp LSetEntryvec |
| |
| //---------------------------------------- |
| |
| .globl Entry2_8 |
| Entry2_8: |
| subl $6,%edi // adjust for hardwired offsets |
| movb (%esi),%al |
| jmp LLEntry2_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry3_8 |
| Entry3_8: |
| subl $5,%edi // adjust for hardwired offsets |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| jmp LLEntry3_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry4_8 |
| Entry4_8: |
| subl $4,%edi // adjust for hardwired offsets |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| jmp LLEntry4_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry5_8 |
| Entry5_8: |
| subl $3,%edi // adjust for hardwired offsets |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| jmp LLEntry5_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry6_8 |
| Entry6_8: |
| subl $2,%edi // adjust for hardwired offsets |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| jmp LLEntry6_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry7_8 |
| Entry7_8: |
| decl %edi // adjust for hardwired offsets |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| jmp LLEntry7_8 |
| |
| //---------------------------------------- |
| |
| .globl Entry8_8 |
| Entry8_8: |
| addl %eax,%edx |
| movb (%esi),%al |
| sbbl %ecx,%ecx |
| addl %ebp,%ebx |
| adcl advancetable+4(,%ecx,4),%esi |
| |
| addl tstep,%edx |
| sbbl %ecx,%ecx |
| movb %al,1(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| LLEntry7_8: |
| sbbl %ecx,%ecx |
| movb %al,2(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| LLEntry6_8: |
| sbbl %ecx,%ecx |
| movb %al,3(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| LLEntry5_8: |
| sbbl %ecx,%ecx |
| movb %al,4(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| addl tstep,%edx |
| LLEntry4_8: |
| sbbl %ecx,%ecx |
| movb %al,5(%edi) |
| addl %ebp,%ebx |
| movb (%esi),%al |
| adcl advancetable+4(,%ecx,4),%esi |
| LLEntry3_8: |
| movb %al,6(%edi) |
| movb (%esi),%al |
| LLEntry2_8: |
| |
| LEndSpan: |
| |
| // |
| // clear s/z, t/z, 1/z from FP stack |
| // |
| fstp %st(0) |
| fstp %st(0) |
| fstp %st(0) |
| |
| movl pspantemp,%ebx // restore spans pointer |
| movl espan_t_pnext(%ebx),%ebx // point to next span |
| testl %ebx,%ebx // any more spans? |
| movb %al,7(%edi) |
| jnz LSpanLoop // more spans |
| |
| popl %ebx // restore register variables |
| popl %esi |
| popl %edi |
| popl %ebp // restore the caller's stack frame |
| ret |
| |
| //---------------------------------------------------------------------- |
| // 8-bpp horizontal span z drawing codefor polygons, with no transparency. |
| // |
| // Assumes there is at least one span in pzspans, and that every span |
| // contains at least one pixel |
| //---------------------------------------------------------------------- |
| |
| .text |
| |
| // z-clamp on a non-negative gradient span |
| LClamp: |
| movl $0x40000000,%edx |
| xorl %ebx,%ebx |
| fstp %st(0) |
| jmp LZDraw |
| |
| // z-clamp on a negative gradient span |
| LClampNeg: |
| movl $0x40000000,%edx |
| xorl %ebx,%ebx |
| fstp %st(0) |
| jmp LZDrawNeg |
| |
| |
| #define pzspans 4+16 |
| |
| .globl C(D_DrawZSpans) |
| C(D_DrawZSpans): |
| pushl %ebp // preserve caller's stack frame |
| pushl %edi |
| pushl %esi // preserve register variables |
| pushl %ebx |
| |
| flds C(d_zistepu) |
| movl C(d_zistepu),%eax |
| movl pzspans(%esp),%esi |
| testl %eax,%eax |
| jz LFNegSpan |
| |
| fmuls Float2ToThe31nd |
| fistpl izistep // note: we are relying on FP exceptions being turned |
| // off here to avoid range problems |
| movl izistep,%ebx // remains loaded for all spans |
| |
| LFSpanLoop: |
| // set up the initial 1/z value |
| fildl espan_t_v(%esi) |
| fildl espan_t_u(%esi) |
| movl espan_t_v(%esi),%ecx |
| movl C(d_pzbuffer),%edi |
| fmuls C(d_zistepu) |
| fxch %st(1) |
| fmuls C(d_zistepv) |
| fxch %st(1) |
| fadds C(d_ziorigin) |
| imull C(d_zrowbytes),%ecx |
| faddp %st(0),%st(1) |
| |
| // clamp if z is nearer than 2 (1/z > 0.5) |
| fcoms float_point5 |
| addl %ecx,%edi |
| movl espan_t_u(%esi),%edx |
| addl %edx,%edx // word count |
| movl espan_t_count(%esi),%ecx |
| addl %edx,%edi // pdest = &pdestspan[scans->u]; |
| pushl %esi // preserve spans pointer |
| fnstsw %ax |
| testb $0x45,%ah |
| jz LClamp |
| |
| fmuls Float2ToThe31nd |
| fistpl izi // note: we are relying on FP exceptions being turned |
| // off here to avoid problems when the span is closer |
| // than 1/(2**31) |
| movl izi,%edx |
| |
| // at this point: |
| // %ebx = izistep |
| // %ecx = count |
| // %edx = izi |
| // %edi = pdest |
| |
| LZDraw: |
| |
| // do a single pixel up front, if necessary to dword align the destination |
| testl $2,%edi |
| jz LFMiddle |
| movl %edx,%eax |
| addl %ebx,%edx |
| shrl $16,%eax |
| decl %ecx |
| movw %ax,(%edi) |
| addl $2,%edi |
| |
| // do middle a pair of aligned dwords at a time |
| LFMiddle: |
| pushl %ecx |
| shrl $1,%ecx // count / 2 |
| jz LFLast // no aligned dwords to do |
| shrl $1,%ecx // (count / 2) / 2 |
| jnc LFMiddleLoop // even number of aligned dwords to do |
| |
| movl %edx,%eax |
| addl %ebx,%edx |
| shrl $16,%eax |
| movl %edx,%esi |
| addl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%eax |
| movl %eax,(%edi) |
| addl $4,%edi |
| andl %ecx,%ecx |
| jz LFLast |
| |
| LFMiddleLoop: |
| movl %edx,%eax |
| addl %ebx,%edx |
| shrl $16,%eax |
| movl %edx,%esi |
| addl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%eax |
| movl %edx,%ebp |
| movl %eax,(%edi) |
| addl %ebx,%edx |
| shrl $16,%ebp |
| movl %edx,%esi |
| addl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%ebp |
| movl %ebp,4(%edi) // FIXME: eliminate register contention |
| addl $8,%edi |
| |
| decl %ecx |
| jnz LFMiddleLoop |
| |
| LFLast: |
| popl %ecx // retrieve count |
| popl %esi // retrieve span pointer |
| |
| // do the last, unaligned pixel, if there is one |
| andl $1,%ecx // is there an odd pixel left to do? |
| jz LFSpanDone // no |
| shrl $16,%edx |
| movw %dx,(%edi) // do the final pixel's z |
| |
| LFSpanDone: |
| movl espan_t_pnext(%esi),%esi |
| testl %esi,%esi |
| jnz LFSpanLoop |
| |
| jmp LFDone |
| |
| LFNegSpan: |
| fmuls FloatMinus2ToThe31nd |
| fistpl izistep // note: we are relying on FP exceptions being turned |
| // off here to avoid range problems |
| movl izistep,%ebx // remains loaded for all spans |
| |
| LFNegSpanLoop: |
| // set up the initial 1/z value |
| fildl espan_t_v(%esi) |
| fildl espan_t_u(%esi) |
| movl espan_t_v(%esi),%ecx |
| movl C(d_pzbuffer),%edi |
| fmuls C(d_zistepu) |
| fxch %st(1) |
| fmuls C(d_zistepv) |
| fxch %st(1) |
| fadds C(d_ziorigin) |
| imull C(d_zrowbytes),%ecx |
| faddp %st(0),%st(1) |
| |
| // clamp if z is nearer than 2 (1/z > 0.5) |
| fcoms float_point5 |
| addl %ecx,%edi |
| movl espan_t_u(%esi),%edx |
| addl %edx,%edx // word count |
| movl espan_t_count(%esi),%ecx |
| addl %edx,%edi // pdest = &pdestspan[scans->u]; |
| pushl %esi // preserve spans pointer |
| fnstsw %ax |
| testb $0x45,%ah |
| jz LClampNeg |
| |
| fmuls Float2ToThe31nd |
| fistpl izi // note: we are relying on FP exceptions being turned |
| // off here to avoid problems when the span is closer |
| // than 1/(2**31) |
| movl izi,%edx |
| |
| // at this point: |
| // %ebx = izistep |
| // %ecx = count |
| // %edx = izi |
| // %edi = pdest |
| |
| LZDrawNeg: |
| |
| // do a single pixel up front, if necessary to dword align the destination |
| testl $2,%edi |
| jz LFNegMiddle |
| movl %edx,%eax |
| subl %ebx,%edx |
| shrl $16,%eax |
| decl %ecx |
| movw %ax,(%edi) |
| addl $2,%edi |
| |
| // do middle a pair of aligned dwords at a time |
| LFNegMiddle: |
| pushl %ecx |
| shrl $1,%ecx // count / 2 |
| jz LFNegLast // no aligned dwords to do |
| shrl $1,%ecx // (count / 2) / 2 |
| jnc LFNegMiddleLoop // even number of aligned dwords to do |
| |
| movl %edx,%eax |
| subl %ebx,%edx |
| shrl $16,%eax |
| movl %edx,%esi |
| subl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%eax |
| movl %eax,(%edi) |
| addl $4,%edi |
| andl %ecx,%ecx |
| jz LFNegLast |
| |
| LFNegMiddleLoop: |
| movl %edx,%eax |
| subl %ebx,%edx |
| shrl $16,%eax |
| movl %edx,%esi |
| subl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%eax |
| movl %edx,%ebp |
| movl %eax,(%edi) |
| subl %ebx,%edx |
| shrl $16,%ebp |
| movl %edx,%esi |
| subl %ebx,%edx |
| andl $0xFFFF0000,%esi |
| orl %esi,%ebp |
| movl %ebp,4(%edi) // FIXME: eliminate register contention |
| addl $8,%edi |
| |
| decl %ecx |
| jnz LFNegMiddleLoop |
| |
| LFNegLast: |
| popl %ecx // retrieve count |
| popl %esi // retrieve span pointer |
| |
| // do the last, unaligned pixel, if there is one |
| andl $1,%ecx // is there an odd pixel left to do? |
| jz LFNegSpanDone // no |
| shrl $16,%edx |
| movw %dx,(%edi) // do the final pixel's z |
| |
| LFNegSpanDone: |
| movl espan_t_pnext(%esi),%esi |
| testl %esi,%esi |
| jnz LFNegSpanLoop |
| |
| LFDone: |
| popl %ebx // restore register variables |
| popl %esi |
| popl %edi |
| popl %ebp // restore the caller's stack frame |
| ret |
| |
| #endif // id386 |