| /*
|
| Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
| This program is free software; you can redistribute it and/or
|
| modify it under the terms of the GNU General Public License
|
| as published by the Free Software Foundation; either version 2
|
| of the License, or (at your option) any later version.
|
|
|
| This program is distributed in the hope that it will be useful,
|
| but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
| See the GNU General Public License for more details.
|
|
|
| You should have received a copy of the GNU General Public License
|
| along with this program; if not, write to the Free Software
|
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
| */
|
| // |
| // d_polysa.s |
| // x86 assembly-language polygon model drawing code |
| // |
| |
| #include "asm_i386.h" |
| #include "quakeasm.h" |
| #include "asm_draw.h" |
| #include "d_ifacea.h" |
| |
| #if id386 |
| |
| // !!! if this is changed, it must be changed in d_polyse.c too !!! |
| #define DPS_MAXSPANS MAXHEIGHT+1 |
| // 1 extra for spanpackage that marks end |
| |
| //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) |
| #define SPAN_SIZE (1024+1+1+1)*32 |
| |
| |
| .data |
| |
| .align 4 |
| p10_minus_p20: .single 0 |
| p01_minus_p21: .single 0 |
| temp0: .single 0 |
| temp1: .single 0 |
| Ltemp: .single 0 |
| |
| aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 |
| .long LDraw4, LDraw3, LDraw2, LDraw1 |
| |
| lzistepx: .long 0 |
| |
| |
| .text |
| |
| #ifndef NeXT |
| .extern C(D_PolysetSetEdgeTable) |
| .extern C(D_RasterizeAliasPolySmooth) |
| #endif |
| |
| //---------------------------------------------------------------------- |
| // affine triangle gradient calculation code |
| //---------------------------------------------------------------------- |
| |
| #define skinwidth 4+0 |
| |
| .globl C(D_PolysetCalcGradients) |
| C(D_PolysetCalcGradients): |
| |
| // p00_minus_p20 = r_p0[0] - r_p2[0]; |
| // p01_minus_p21 = r_p0[1] - r_p2[1]; |
| // p10_minus_p20 = r_p1[0] - r_p2[0]; |
| // p11_minus_p21 = r_p1[1] - r_p2[1]; |
| // |
| // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - |
| // p00_minus_p20 * p11_minus_p21); |
| // |
| // ystepdenominv = -xstepdenominv; |
| |
| fildl C(r_p0)+0 // r_p0[0] |
| fildl C(r_p2)+0 // r_p2[0] | r_p0[0] |
| fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] |
| fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] |
| fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] |
| fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | |
| // r_p2[0] | r_p0[0] |
| fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | |
| // r_p2[0] | r_p0[0] |
| fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | |
| // r_p2[0] | r_p0[0] |
| fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | |
| // r_p2[0] | r_p0[0] |
| fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | |
| // r_p1[1] | r_p2[0] | r_p0[0] |
| fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | |
| // r_p1[1] | r_p2[0] | p10_minus_p20 |
| fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | |
| // p00_minus_p20 | p10_minus_p20 |
| fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | |
| // p00_minus_p20 | p10_minus_p20 |
| fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | |
| // p00_minus_p20 | p10_minus_p20 |
| fxch %st(1) // p01_minus_p21 | p11_minus_p21 | |
| // p00_minus_p20 | p10_minus_p20 |
| flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | |
| // p00_minus_p20 | p10_minus_p20 |
| fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | |
| // p00_minus_p20 | d_xdenom |
| fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | |
| // p00_minus_p20 | d_xdenom |
| fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv |
| fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| |
| //// ceil () for light so positive steps are exaggerated, negative steps |
| //// diminished, pushing us away from underflow toward overflow. Underflow is |
| //// very visible, overflow is very unlikely, because of ambient lighting |
| // t0 = r_p0[4] - r_p2[4]; |
| // t1 = r_p1[4] - r_p2[4]; |
| |
| fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| |
| // r_lstepx = (int) |
| // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); |
| // r_lstepy = (int) |
| // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); |
| |
| fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | |
| // t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | |
| // t1*p00_minus_p20 | t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(2) // xstepdenominv | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmuls float_minus_1 // ystepdenominv | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | |
| // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fldcw ceil_cw |
| fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fldcw single_cw |
| |
| // t0 = r_p0[2] - r_p2[2]; |
| // t1 = r_p1[2] - r_p2[2]; |
| |
| fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| |
| // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * |
| // xstepdenominv); |
| // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * |
| // ystepdenominv); |
| |
| fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
| fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | |
| // t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | |
| // t1*p00_minus_p20 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | |
| // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | |
| // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| |
| // t0 = r_p0[3] - r_p2[3]; |
| // t1 = r_p1[3] - r_p2[3]; |
| |
| fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| |
| // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * |
| // xstepdenominv); |
| // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * |
| // ystepdenominv); |
| |
| fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | |
| // t0*p11_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | |
| // t1*p01_minus_p21 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | |
| // t1*p00_minus_p20 | t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | |
| // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* |
| // ystepdenominv | |
| // (t1*p01_minus_p21 - t0*p11_minus_p21)* |
| // xstepdenominv | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| |
| // t0 = r_p0[5] - r_p2[5]; |
| // t1 = r_p1[5] - r_p2[5]; |
| |
| fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // p11_minus_p21 |
| fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | p11_minus_p21 |
| fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| |
| // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * |
| // xstepdenominv); |
| // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * |
| // ystepdenominv); |
| |
| fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | p11_minus_p21 |
| fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | t0*p11_minus_p21 |
| fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | t0*p11_minus_p21 |
| fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | |
| // p00_minus_p20 | t0*p11_minus_p21 |
| fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | |
| // t0*p11_minus_p21 |
| fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | |
| // xstepdenominv | p00_minus_p20 | |
| // t0*p11_minus_p21 |
| fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // t0*p11_minus_p21 |
| fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | |
| // ystepdenominv | xstepdenominv | p00_minus_p20 | |
| // t0*p11_minus_p21 |
| fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | |
| // ystepdenominv | xstepdenominv | |
| // t1*p00_minus_p20 | t0*p11_minus_p21 |
| fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | |
| // ystepdenominv | xstepdenominv | |
| // t1*p00_minus_p20 | t0*p10_minus_p20 |
| fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // ystepdenominv | xstepdenominv | |
| // t1*p00_minus_p20 | t0*p10_minus_p20 |
| fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | |
| // xstepdenominv | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // t0*p10_minus_p20 |
| fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 |
| fxch %st(1) // xstepdenominv | ystepdenominv | |
| // t1*p01_minus_p21 - t0*p11_minus_p21 | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 |
| fmulp %st(0),%st(2) // ystepdenominv | |
| // (t1*p01_minus_p21 - t0*p11_minus_p21) * |
| // xstepdenominv | |
| // t1*p00_minus_p20 - t0*p10_minus_p20 |
| fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * |
| // xstepdenominv | |
| // (t1*p00_minus_p20 - t0*p10_minus_p20) * |
| // ystepdenominv |
| fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * |
| // ystepdenominv |
| fistpl C(r_zistepy) |
| |
| // a_sstepxfrac = r_sstepx << 16; |
| // a_tstepxfrac = r_tstepx << 16; |
| // |
| // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + |
| // (r_sstepx >> 16); |
| |
| movl C(r_sstepx),%eax |
| movl C(r_tstepx),%edx |
| shll $16,%eax |
| shll $16,%edx |
| movl %eax,C(a_sstepxfrac) |
| movl %edx,C(a_tstepxfrac) |
| |
| movl C(r_sstepx),%ecx |
| movl C(r_tstepx),%eax |
| sarl $16,%ecx |
| sarl $16,%eax |
| imull skinwidth(%esp) |
| addl %ecx,%eax |
| movl %eax,C(a_ststepxwhole) |
| |
| ret |
| |
| |
| //---------------------------------------------------------------------- |
| // recursive subdivision affine triangle drawing code |
| // |
| // not C-callable because of stdcall return |
| //---------------------------------------------------------------------- |
| |
| #define lp1 4+16 |
| #define lp2 8+16 |
| #define lp3 12+16 |
| |
| .globl C(D_PolysetRecursiveTriangle) |
| C(D_PolysetRecursiveTriangle): |
| pushl %ebp // preserve caller stack frame pointer |
| pushl %esi // preserve register variables |
| pushl %edi |
| pushl %ebx |
| |
| // int *temp; |
| // int d; |
| // int new[6]; |
| // int i; |
| // int z; |
| // short *zbuf; |
| movl lp2(%esp),%esi |
| movl lp1(%esp),%ebx |
| movl lp3(%esp),%edi |
| |
| // d = lp2[0] - lp1[0]; |
| // if (d < -1 || d > 1) |
| // goto split; |
| movl 0(%esi),%eax |
| |
| movl 0(%ebx),%edx |
| movl 4(%esi),%ebp |
| |
| subl %edx,%eax |
| movl 4(%ebx),%ecx |
| |
| subl %ecx,%ebp |
| incl %eax |
| |
| cmpl $2,%eax |
| ja LSplit |
| |
| // d = lp2[1] - lp1[1]; |
| // if (d < -1 || d > 1) |
| // goto split; |
| movl 0(%edi),%eax |
| incl %ebp |
| |
| cmpl $2,%ebp |
| ja LSplit |
| |
| // d = lp3[0] - lp2[0]; |
| // if (d < -1 || d > 1) |
| // goto split2; |
| movl 0(%esi),%edx |
| movl 4(%edi),%ebp |
| |
| subl %edx,%eax |
| movl 4(%esi),%ecx |
| |
| subl %ecx,%ebp |
| incl %eax |
| |
| cmpl $2,%eax |
| ja LSplit2 |
| |
| // d = lp3[1] - lp2[1]; |
| // if (d < -1 || d > 1) |
| // goto split2; |
| movl 0(%ebx),%eax |
| incl %ebp |
| |
| cmpl $2,%ebp |
| ja LSplit2 |
| |
| // d = lp1[0] - lp3[0]; |
| // if (d < -1 || d > 1) |
| // goto split3; |
| movl 0(%edi),%edx |
| movl 4(%ebx),%ebp |
| |
| subl %edx,%eax |
| movl 4(%edi),%ecx |
| |
| subl %ecx,%ebp |
| incl %eax |
| |
| incl %ebp |
| movl %ebx,%edx |
| |
| cmpl $2,%eax |
| ja LSplit3 |
| |
| // d = lp1[1] - lp3[1]; |
| // if (d < -1 || d > 1) |
| // { |
| //split3: |
| // temp = lp1; |
| // lp3 = lp2; |
| // lp1 = lp3; |
| // lp2 = temp; |
| // goto split; |
| // } |
| // |
| // return; // entire tri is filled |
| // |
| cmpl $2,%ebp |
| jna LDone |
| |
| LSplit3: |
| movl %edi,%ebx |
| movl %esi,%edi |
| movl %edx,%esi |
| jmp LSplit |
| |
| //split2: |
| LSplit2: |
| |
| // temp = lp1; |
| // lp1 = lp2; |
| // lp2 = lp3; |
| // lp3 = temp; |
| movl %ebx,%eax |
| movl %esi,%ebx |
| movl %edi,%esi |
| movl %eax,%edi |
| |
| //split: |
| LSplit: |
| |
| subl $24,%esp // allocate space for a new vertex |
| |
| //// split this edge |
| // new[0] = (lp1[0] + lp2[0]) >> 1; |
| // new[1] = (lp1[1] + lp2[1]) >> 1; |
| // new[2] = (lp1[2] + lp2[2]) >> 1; |
| // new[3] = (lp1[3] + lp2[3]) >> 1; |
| // new[5] = (lp1[5] + lp2[5]) >> 1; |
| movl 8(%ebx),%eax |
| |
| movl 8(%esi),%edx |
| movl 12(%ebx),%ecx |
| |
| addl %edx,%eax |
| movl 12(%esi),%edx |
| |
| sarl $1,%eax |
| addl %edx,%ecx |
| |
| movl %eax,8(%esp) |
| movl 20(%ebx),%eax |
| |
| sarl $1,%ecx |
| movl 20(%esi),%edx |
| |
| movl %ecx,12(%esp) |
| addl %edx,%eax |
| |
| movl 0(%ebx),%ecx |
| movl 0(%esi),%edx |
| |
| sarl $1,%eax |
| addl %ecx,%edx |
| |
| movl %eax,20(%esp) |
| movl 4(%ebx),%eax |
| |
| sarl $1,%edx |
| movl 4(%esi),%ebp |
| |
| movl %edx,0(%esp) |
| addl %eax,%ebp |
| |
| sarl $1,%ebp |
| movl %ebp,4(%esp) |
| |
| //// draw the point if splitting a leading edge |
| // if (lp2[1] > lp1[1]) |
| // goto nodraw; |
| cmpl %eax,4(%esi) |
| jg LNoDraw |
| |
| // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) |
| // goto nodraw; |
| movl 0(%esi),%edx |
| jnz LDraw |
| |
| cmpl %ecx,%edx |
| jl LNoDraw |
| |
| LDraw: |
| |
| // z = new[5] >> 16; |
| movl 20(%esp),%edx |
| movl 4(%esp),%ecx |
| |
| sarl $16,%edx |
| movl 0(%esp),%ebp |
| |
| // zbuf = zspantable[new[1]] + new[0]; |
| movl C(zspantable)(,%ecx,4),%eax |
| |
| // if (z >= *zbuf) |
| // { |
| cmpw (%eax,%ebp,2),%dx |
| jnge LNoDraw |
| |
| // int pix; |
| // |
| // *zbuf = z; |
| movw %dx,(%eax,%ebp,2) |
| |
| // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; |
| movl 12(%esp),%eax |
| |
| sarl $16,%eax |
| movl 8(%esp),%edx |
| |
| sarl $16,%edx |
| subl %ecx,%ecx |
| |
| movl C(skintable)(,%eax,4),%eax |
| movl 4(%esp),%ebp |
| |
| movb (%eax,%edx,),%cl |
| movl C(d_pcolormap),%edx |
| |
| movb (%edx,%ecx,),%dl |
| movl 0(%esp),%ecx |
| |
| // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; |
| movl C(d_scantable)(,%ebp,4),%eax |
| addl %eax,%ecx |
| movl C(d_viewbuffer),%eax |
| movb %dl,(%eax,%ecx,1) |
| |
| // } |
| // |
| //nodraw: |
| LNoDraw: |
| |
| //// recursively continue |
| // D_PolysetRecursiveTriangle (lp3, lp1, new); |
| pushl %esp |
| pushl %ebx |
| pushl %edi |
| call C(D_PolysetRecursiveTriangle) |
| |
| // D_PolysetRecursiveTriangle (lp3, new, lp2); |
| movl %esp,%ebx |
| pushl %esi |
| pushl %ebx |
| pushl %edi |
| call C(D_PolysetRecursiveTriangle) |
| addl $24,%esp |
| |
| LDone: |
| popl %ebx // restore register variables |
| popl %edi |
| popl %esi |
| popl %ebp // restore caller stack frame pointer |
| ret $12 |
| |
| |
| //---------------------------------------------------------------------- |
| // 8-bpp horizontal span drawing code for affine polygons, with smooth |
| // shading and no transparency |
| //---------------------------------------------------------------------- |
| |
| #define pspans 4+8 |
| |
| .globl C(D_PolysetAff8Start) |
| C(D_PolysetAff8Start): |
| |
| .globl C(D_PolysetDrawSpans8) |
| C(D_PolysetDrawSpans8): |
| pushl %esi // preserve register variables |
| pushl %ebx |
| |
| movl pspans(%esp),%esi // point to the first span descriptor |
| movl C(r_zistepx),%ecx |
| |
| pushl %ebp // preserve caller's stack frame |
| pushl %edi |
| |
| rorl $16,%ecx // put high 16 bits of 1/z step in low word |
| movl spanpackage_t_count(%esi),%edx |
| |
| movl %ecx,lzistepx |
| |
| LSpanLoop: |
| |
| // lcount = d_aspancount - pspanpackage->count; |
| // |
| // errorterm += erroradjustup; |
| // if (errorterm >= 0) |
| // { |
| // d_aspancount += d_countextrastep; |
| // errorterm -= erroradjustdown; |
| // } |
| // else |
| // { |
| // d_aspancount += ubasestep; |
| // } |
| movl C(d_aspancount),%eax |
| subl %edx,%eax |
| |
| movl C(erroradjustup),%edx |
| movl C(errorterm),%ebx |
| addl %edx,%ebx |
| js LNoTurnover |
| |
| movl C(erroradjustdown),%edx |
| movl C(d_countextrastep),%edi |
| subl %edx,%ebx |
| movl C(d_aspancount),%ebp |
| movl %ebx,C(errorterm) |
| addl %edi,%ebp |
| movl %ebp,C(d_aspancount) |
| jmp LRightEdgeStepped |
| |
| LNoTurnover: |
| movl C(d_aspancount),%edi |
| movl C(ubasestep),%edx |
| movl %ebx,C(errorterm) |
| addl %edx,%edi |
| movl %edi,C(d_aspancount) |
| |
| LRightEdgeStepped: |
| cmpl $1,%eax |
| |
| jl LNextSpan |
| jz LExactlyOneLong |
| |
| // |
| // set up advancetable |
| // |
| movl C(a_ststepxwhole),%ecx |
| movl C(r_affinetridesc)+atd_skinwidth,%edx |
| |
| movl %ecx,advancetable+4 // advance base in t |
| addl %edx,%ecx |
| |
| movl %ecx,advancetable // advance extra in t |
| movl C(a_tstepxfrac),%ecx |
| |
| movw C(r_lstepx),%cx |
| movl %eax,%edx // count |
| |
| movl %ecx,tstep |
| addl $7,%edx |
| |
| shrl $3,%edx // count of full and partial loops |
| movl spanpackage_t_sfrac(%esi),%ebx |
| |
| movw %dx,%bx |
| movl spanpackage_t_pz(%esi),%ecx |
| |
| negl %eax |
| |
| movl spanpackage_t_pdest(%esi),%edi |
| andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 |
| |
| subl %eax,%edi // compensate for hardwired offsets |
| subl %eax,%ecx |
| |
| subl %eax,%ecx |
| movl spanpackage_t_tfrac(%esi),%edx |
| |
| movw spanpackage_t_light(%esi),%dx |
| movl spanpackage_t_zi(%esi),%ebp |
| |
| rorl $16,%ebp // put high 16 bits of 1/z in low word |
| pushl %esi |
| |
| movl spanpackage_t_ptex(%esi),%esi |
| jmp aff8entryvec_table(,%eax,4) |
| |
| // %bx = count of full and partial loops |
| // %ebx high word = sfrac |
| // %ecx = pz |
| // %dx = light |
| // %edx high word = tfrac |
| // %esi = ptex |
| // %edi = pdest |
| // %ebp = 1/z |
| // tstep low word = C(r_lstepx) |
| // tstep high word = C(a_tstepxfrac) |
| // C(a_sstepxfrac) low word = 0 |
| // C(a_sstepxfrac) high word = C(a_sstepxfrac) |
| |
| LDrawLoop: |
| |
| // FIXME: do we need to clamp light? We may need at least a buffer bit to |
| // keep it from poking into tfrac and causing problems |
| |
| LDraw8: |
| cmpw (%ecx),%bp |
| jl Lp1 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch8: |
| movb %al,(%edi) |
| Lp1: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw7: |
| cmpw 2(%ecx),%bp |
| jl Lp2 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,2(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch7: |
| movb %al,1(%edi) |
| Lp2: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw6: |
| cmpw 4(%ecx),%bp |
| jl Lp3 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,4(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch6: |
| movb %al,2(%edi) |
| Lp3: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw5: |
| cmpw 6(%ecx),%bp |
| jl Lp4 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,6(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch5: |
| movb %al,3(%edi) |
| Lp4: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw4: |
| cmpw 8(%ecx),%bp |
| jl Lp5 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,8(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch4: |
| movb %al,4(%edi) |
| Lp5: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw3: |
| cmpw 10(%ecx),%bp |
| jl Lp6 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,10(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch3: |
| movb %al,5(%edi) |
| Lp6: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw2: |
| cmpw 12(%ecx),%bp |
| jl Lp7 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,12(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch2: |
| movb %al,6(%edi) |
| Lp7: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| LDraw1: |
| cmpw 14(%ecx),%bp |
| jl Lp8 |
| xorl %eax,%eax |
| movb %dh,%ah |
| movb (%esi),%al |
| movw %bp,14(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch1: |
| movb %al,7(%edi) |
| Lp8: |
| addl tstep,%edx |
| sbbl %eax,%eax |
| addl lzistepx,%ebp |
| adcl $0,%ebp |
| addl C(a_sstepxfrac),%ebx |
| adcl advancetable+4(,%eax,4),%esi |
| |
| addl $8,%edi |
| addl $16,%ecx |
| |
| decw %bx |
| jnz LDrawLoop |
| |
| popl %esi // restore spans pointer |
| LNextSpan: |
| addl $(spanpackage_t_size),%esi // point to next span |
| LNextSpanESISet: |
| movl spanpackage_t_count(%esi),%edx |
| cmpl $-999999,%edx // any more spans? |
| jnz LSpanLoop // yes |
| |
| popl %edi |
| popl %ebp // restore the caller's stack frame |
| popl %ebx // restore register variables |
| popl %esi |
| ret |
| |
| |
| // draw a one-long span |
| |
| LExactlyOneLong: |
| |
| movl spanpackage_t_pz(%esi),%ecx |
| movl spanpackage_t_zi(%esi),%ebp |
| |
| rorl $16,%ebp // put high 16 bits of 1/z in low word |
| movl spanpackage_t_ptex(%esi),%ebx |
| |
| cmpw (%ecx),%bp |
| jl LNextSpan |
| xorl %eax,%eax |
| movl spanpackage_t_pdest(%esi),%edi |
| movb spanpackage_t_light+1(%esi),%ah |
| addl $(spanpackage_t_size),%esi // point to next span |
| movb (%ebx),%al |
| movw %bp,(%ecx) |
| movb 0x12345678(%eax),%al |
| LPatch9: |
| movb %al,(%edi) |
| |
| jmp LNextSpanESISet |
| |
| .globl C(D_PolysetAff8End) |
| C(D_PolysetAff8End): |
| |
| |
| #define pcolormap 4 |
| |
| .globl C(D_Aff8Patch) |
| C(D_Aff8Patch): |
| movl pcolormap(%esp),%eax |
| movl %eax,LPatch1-4 |
| movl %eax,LPatch2-4 |
| movl %eax,LPatch3-4 |
| movl %eax,LPatch4-4 |
| movl %eax,LPatch5-4 |
| movl %eax,LPatch6-4 |
| movl %eax,LPatch7-4 |
| movl %eax,LPatch8-4 |
| movl %eax,LPatch9-4 |
| |
| ret |
| |
| |
| //---------------------------------------------------------------------- |
| // Alias model polygon dispatching code, combined with subdivided affine |
| // triangle drawing code |
| //---------------------------------------------------------------------- |
| |
| .globl C(D_PolysetDraw) |
| C(D_PolysetDraw): |
| |
| // spanpackage_t spans[DPS_MAXSPANS + 1 + |
| // ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1]; |
| // // one extra because of cache line pretouching |
| // |
| // a_spans = (spanpackage_t *) |
| // (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1)); |
| subl $(SPAN_SIZE),%esp |
| movl %esp,%eax |
| addl $(CACHE_SIZE - 1),%eax |
| andl $(~(CACHE_SIZE - 1)),%eax |
| movl %eax,C(a_spans) |
| |
| // if (r_affinetridesc.drawtype) |
| // D_DrawSubdiv (); |
| // else |
| // D_DrawNonSubdiv (); |
| movl C(r_affinetridesc)+atd_drawtype,%eax |
| testl %eax,%eax |
| jz C(D_DrawNonSubdiv) |
| |
| pushl %ebp // preserve caller stack frame pointer |
| |
| // lnumtriangles = r_affinetridesc.numtriangles; |
| movl C(r_affinetridesc)+atd_numtriangles,%ebp |
| |
| pushl %esi // preserve register variables |
| shll $4,%ebp |
| |
| pushl %ebx |
| // ptri = r_affinetridesc.ptriangles; |
| movl C(r_affinetridesc)+atd_ptriangles,%ebx |
| |
| pushl %edi |
| |
| // mtriangle_t *ptri; |
| // finalvert_t *pfv, *index0, *index1, *index2; |
| // int i; |
| // int lnumtriangles; |
| // int s0, s1, s2; |
| |
| // pfv = r_affinetridesc.pfinalverts; |
| movl C(r_affinetridesc)+atd_pfinalverts,%edi |
| |
| // for (i=0 ; i<lnumtriangles ; i++) |
| // { |
| |
| Llooptop: |
| |
| // index0 = pfv + ptri[i].vertindex[0]; |
| // index1 = pfv + ptri[i].vertindex[1]; |
| // index2 = pfv + ptri[i].vertindex[2]; |
| movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx |
| movl mtri_vertindex-16+4(%ebx,%ebp,),%esi |
| |
| shll $(fv_shift),%ecx |
| movl mtri_vertindex-16+8(%ebx,%ebp,),%edx |
| |
| shll $(fv_shift),%esi |
| addl %edi,%ecx |
| |
| shll $(fv_shift),%edx |
| addl %edi,%esi |
| |
| addl %edi,%edx |
| |
| // if (((index0->v[1]-index1->v[1]) * |
| // (index0->v[0]-index2->v[0]) - |
| // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0) |
| // { |
| // continue; |
| // } |
| // |
| // d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00]; |
| fildl fv_v+4(%ecx) // i0v1 |
| fildl fv_v+4(%esi) // i1v1 | i0v1 |
| fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1 |
| fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1 |
| fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1 |
| fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1 |
| fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1 |
| fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1 |
| fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1 |
| fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 |
| fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 |
| fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 |
| fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 |
| fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1 |
| fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1 |
| movl fv_v+16(%ecx),%eax |
| andl $0xFF00,%eax |
| fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1 |
| addl C(acolormap),%eax |
| fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1) |
| movl %eax,C(d_pcolormap) |
| fstps Ltemp |
| movl Ltemp,%eax |
| subl $0x80000001,%eax |
| jc Lskip |
| |
| // if (ptri[i].facesfront) |
| // { |
| // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); |
| movl mtri_facesfront-16(%ebx,%ebp,),%eax |
| testl %eax,%eax |
| jz Lfacesback |
| |
| pushl %edx |
| pushl %esi |
| pushl %ecx |
| call C(D_PolysetRecursiveTriangle) |
| |
| subl $16,%ebp |
| jnz Llooptop |
| jmp Ldone2 |
| |
| // } |
| // else |
| // { |
| Lfacesback: |
| |
| // s0 = index0->v[2]; |
| // s1 = index1->v[2]; |
| // s2 = index2->v[2]; |
| movl fv_v+8(%ecx),%eax |
| pushl %eax |
| movl fv_v+8(%esi),%eax |
| pushl %eax |
| movl fv_v+8(%edx),%eax |
| pushl %eax |
| pushl %ecx |
| pushl %edx |
| |
| // if (index0->flags & ALIAS_ONSEAM) |
| // index0->v[2] += r_affinetridesc.seamfixupX16; |
| movl C(r_affinetridesc)+atd_seamfixupX16,%eax |
| testl $(ALIAS_ONSEAM),fv_flags(%ecx) |
| jz Lp11 |
| addl %eax,fv_v+8(%ecx) |
| Lp11: |
| |
| // if (index1->flags & ALIAS_ONSEAM) |
| // index1->v[2] += r_affinetridesc.seamfixupX16; |
| testl $(ALIAS_ONSEAM),fv_flags(%esi) |
| jz Lp12 |
| addl %eax,fv_v+8(%esi) |
| Lp12: |
| |
| // if (index2->flags & ALIAS_ONSEAM) |
| // index2->v[2] += r_affinetridesc.seamfixupX16; |
| testl $(ALIAS_ONSEAM),fv_flags(%edx) |
| jz Lp13 |
| addl %eax,fv_v+8(%edx) |
| Lp13: |
| |
| // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); |
| pushl %edx |
| pushl %esi |
| pushl %ecx |
| call C(D_PolysetRecursiveTriangle) |
| |
| // index0->v[2] = s0; |
| // index1->v[2] = s1; |
| // index2->v[2] = s2; |
| popl %edx |
| popl %ecx |
| popl %eax |
| movl %eax,fv_v+8(%edx) |
| popl %eax |
| movl %eax,fv_v+8(%esi) |
| popl %eax |
| movl %eax,fv_v+8(%ecx) |
| |
| // } |
| // } |
| Lskip: |
| subl $16,%ebp |
| jnz Llooptop |
| |
| Ldone2: |
| popl %edi // restore the caller's stack frame |
| popl %ebx |
| popl %esi // restore register variables |
| popl %ebp |
| |
| addl $(SPAN_SIZE),%esp |
| |
| ret |
| |
| |
| //---------------------------------------------------------------------- |
| // Alias model triangle left-edge scanning code |
| //---------------------------------------------------------------------- |
| |
| #define height 4+16 |
| |
| .globl C(D_PolysetScanLeftEdge) |
| C(D_PolysetScanLeftEdge): |
| pushl %ebp // preserve caller stack frame pointer |
| pushl %esi // preserve register variables |
| pushl %edi |
| pushl %ebx |
| |
| movl height(%esp),%eax |
| movl C(d_sfrac),%ecx |
| andl $0xFFFF,%eax |
| movl C(d_ptex),%ebx |
| orl %eax,%ecx |
| movl C(d_pedgespanpackage),%esi |
| movl C(d_tfrac),%edx |
| movl C(d_light),%edi |
| movl C(d_zi),%ebp |
| |
| // %eax: scratch |
| // %ebx: d_ptex |
| // %ecx: d_sfrac in high word, count in low word |
| // %edx: d_tfrac |
| // %esi: d_pedgespanpackage, errorterm, scratch alternately |
| // %edi: d_light |
| // %ebp: d_zi |
| |
| // do |
| // { |
| |
| LScanLoop: |
| |
| // d_pedgespanpackage->ptex = ptex; |
| // d_pedgespanpackage->pdest = d_pdest; |
| // d_pedgespanpackage->pz = d_pz; |
| // d_pedgespanpackage->count = d_aspancount; |
| // d_pedgespanpackage->light = d_light; |
| // d_pedgespanpackage->zi = d_zi; |
| // d_pedgespanpackage->sfrac = d_sfrac << 16; |
| // d_pedgespanpackage->tfrac = d_tfrac << 16; |
| movl %ebx,spanpackage_t_ptex(%esi) |
| movl C(d_pdest),%eax |
| movl %eax,spanpackage_t_pdest(%esi) |
| movl C(d_pz),%eax |
| movl %eax,spanpackage_t_pz(%esi) |
| movl C(d_aspancount),%eax |
| movl %eax,spanpackage_t_count(%esi) |
| movl %edi,spanpackage_t_light(%esi) |
| movl %ebp,spanpackage_t_zi(%esi) |
| movl %ecx,spanpackage_t_sfrac(%esi) |
| movl %edx,spanpackage_t_tfrac(%esi) |
| |
| // pretouch the next cache line |
| movb spanpackage_t_size(%esi),%al |
| |
| // d_pedgespanpackage++; |
| addl $(spanpackage_t_size),%esi |
| movl C(erroradjustup),%eax |
| movl %esi,C(d_pedgespanpackage) |
| |
| // errorterm += erroradjustup; |
| movl C(errorterm),%esi |
| addl %eax,%esi |
| movl C(d_pdest),%eax |
| |
| // if (errorterm >= 0) |
| // { |
| js LNoLeftEdgeTurnover |
| |
| // errorterm -= erroradjustdown; |
| // d_pdest += d_pdestextrastep; |
| subl C(erroradjustdown),%esi |
| addl C(d_pdestextrastep),%eax |
| movl %esi,C(errorterm) |
| movl %eax,C(d_pdest) |
| |
| // d_pz += d_pzextrastep; |
| // d_aspancount += d_countextrastep; |
| // d_ptex += d_ptexextrastep; |
| // d_sfrac += d_sfracextrastep; |
| // d_ptex += d_sfrac >> 16; |
| // d_sfrac &= 0xFFFF; |
| // d_tfrac += d_tfracextrastep; |
| movl C(d_pz),%eax |
| movl C(d_aspancount),%esi |
| addl C(d_pzextrastep),%eax |
| addl C(d_sfracextrastep),%ecx |
| adcl C(d_ptexextrastep),%ebx |
| addl C(d_countextrastep),%esi |
| movl %eax,C(d_pz) |
| movl C(d_tfracextrastep),%eax |
| movl %esi,C(d_aspancount) |
| addl %eax,%edx |
| |
| // if (d_tfrac & 0x10000) |
| // { |
| jnc LSkip1 |
| |
| // d_ptex += r_affinetridesc.skinwidth; |
| // d_tfrac &= 0xFFFF; |
| addl C(r_affinetridesc)+atd_skinwidth,%ebx |
| |
| // } |
| |
| LSkip1: |
| |
| // d_light += d_lightextrastep; |
| // d_zi += d_ziextrastep; |
| addl C(d_lightextrastep),%edi |
| addl C(d_ziextrastep),%ebp |
| |
| // } |
| movl C(d_pedgespanpackage),%esi |
| decl %ecx |
| testl $0xFFFF,%ecx |
| jnz LScanLoop |
| |
| popl %ebx |
| popl %edi |
| popl %esi |
| popl %ebp |
| ret |
| |
| // else |
| // { |
| |
| LNoLeftEdgeTurnover: |
| movl %esi,C(errorterm) |
| |
| // d_pdest += d_pdestbasestep; |
| addl C(d_pdestbasestep),%eax |
| movl %eax,C(d_pdest) |
| |
| // d_pz += d_pzbasestep; |
| // d_aspancount += ubasestep; |
| // d_ptex += d_ptexbasestep; |
| // d_sfrac += d_sfracbasestep; |
| // d_ptex += d_sfrac >> 16; |
| // d_sfrac &= 0xFFFF; |
| movl C(d_pz),%eax |
| movl C(d_aspancount),%esi |
| addl C(d_pzbasestep),%eax |
| addl C(d_sfracbasestep),%ecx |
| adcl C(d_ptexbasestep),%ebx |
| addl C(ubasestep),%esi |
| movl %eax,C(d_pz) |
| movl %esi,C(d_aspancount) |
| |
| // d_tfrac += d_tfracbasestep; |
| movl C(d_tfracbasestep),%esi |
| addl %esi,%edx |
| |
| // if (d_tfrac & 0x10000) |
| // { |
| jnc LSkip2 |
| |
| // d_ptex += r_affinetridesc.skinwidth; |
| // d_tfrac &= 0xFFFF; |
| addl C(r_affinetridesc)+atd_skinwidth,%ebx |
| |
| // } |
| |
| LSkip2: |
| |
| // d_light += d_lightbasestep; |
| // d_zi += d_zibasestep; |
| addl C(d_lightbasestep),%edi |
| addl C(d_zibasestep),%ebp |
| |
| // } |
| // } while (--height); |
| movl C(d_pedgespanpackage),%esi |
| decl %ecx |
| testl $0xFFFF,%ecx |
| jnz LScanLoop |
| |
| popl %ebx |
| popl %edi |
| popl %esi |
| popl %ebp |
| ret |
| |
| |
| //---------------------------------------------------------------------- |
| // Alias model vertex drawing code |
| //---------------------------------------------------------------------- |
| |
| #define fv 4+8 |
| #define numverts 8+8 |
| |
| .globl C(D_PolysetDrawFinalVerts) |
| C(D_PolysetDrawFinalVerts): |
| pushl %ebp // preserve caller stack frame pointer |
| pushl %ebx |
| |
| // int i, z; |
| // short *zbuf; |
| |
| movl numverts(%esp),%ecx |
| movl fv(%esp),%ebx |
| |
| pushl %esi // preserve register variables |
| pushl %edi |
| |
| LFVLoop: |
| |
| // for (i=0 ; i<numverts ; i++, fv++) |
| // { |
| // // valid triangle coordinates for filling can include the bottom and |
| // // right clip edges, due to the fill rule; these shouldn't be drawn |
| // if ((fv->v[0] < r_refdef.vrectright) && |
| // (fv->v[1] < r_refdef.vrectbottom)) |
| // { |
| movl fv_v+0(%ebx),%eax |
| movl C(r_refdef)+rd_vrectright,%edx |
| cmpl %edx,%eax |
| jge LNextVert |
| movl fv_v+4(%ebx),%esi |
| movl C(r_refdef)+rd_vrectbottom,%edx |
| cmpl %edx,%esi |
| jge LNextVert |
| |
| // zbuf = zspantable[fv->v[1]] + fv->v[0]; |
| movl C(zspantable)(,%esi,4),%edi |
| |
| // z = fv->v[5]>>16; |
| movl fv_v+20(%ebx),%edx |
| shrl $16,%edx |
| |
| // if (z >= *zbuf) |
| // { |
| // int pix; |
| cmpw (%edi,%eax,2),%dx |
| jl LNextVert |
| |
| // *zbuf = z; |
| movw %dx,(%edi,%eax,2) |
| |
| // pix = skintable[fv->v[3]>>16][fv->v[2]>>16]; |
| movl fv_v+12(%ebx),%edi |
| shrl $16,%edi |
| movl C(skintable)(,%edi,4),%edi |
| movl fv_v+8(%ebx),%edx |
| shrl $16,%edx |
| movb (%edi,%edx),%dl |
| |
| // pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)]; |
| movl fv_v+16(%ebx),%edi |
| andl $0xFF00,%edi |
| andl $0x00FF,%edx |
| addl %edx,%edi |
| movl C(acolormap),%edx |
| movb (%edx,%edi,1),%dl |
| |
| // d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix; |
| movl C(d_scantable)(,%esi,4),%edi |
| movl C(d_viewbuffer),%esi |
| addl %eax,%edi |
| movb %dl,(%esi,%edi) |
| |
| // } |
| // } |
| // } |
| LNextVert: |
| addl $(fv_size),%ebx |
| decl %ecx |
| jnz LFVLoop |
| |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| |
| |
| //---------------------------------------------------------------------- |
| // Alias model non-subdivided polygon dispatching code |
| // |
| // not C-callable because of stack buffer cleanup |
| //---------------------------------------------------------------------- |
| |
| .globl C(D_DrawNonSubdiv) |
| C(D_DrawNonSubdiv): |
| pushl %ebp // preserve caller stack frame pointer |
| movl C(r_affinetridesc)+atd_numtriangles,%ebp |
| pushl %ebx |
| shll $(mtri_shift),%ebp |
| pushl %esi // preserve register variables |
| movl C(r_affinetridesc)+atd_ptriangles,%esi |
| pushl %edi |
| |
| // mtriangle_t *ptri; |
| // finalvert_t *pfv, *index0, *index1, *index2; |
| // int i; |
| // int lnumtriangles; |
| |
| // pfv = r_affinetridesc.pfinalverts; |
| // ptri = r_affinetridesc.ptriangles; |
| // lnumtriangles = r_affinetridesc.numtriangles; |
| |
| LNDLoop: |
| |
| // for (i=0 ; i<lnumtriangles ; i++, ptri++) |
| // { |
| // index0 = pfv + ptri->vertindex[0]; |
| // index1 = pfv + ptri->vertindex[1]; |
| // index2 = pfv + ptri->vertindex[2]; |
| movl C(r_affinetridesc)+atd_pfinalverts,%edi |
| movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx |
| shll $(fv_shift),%ecx |
| movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx |
| shll $(fv_shift),%edx |
| movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx |
| shll $(fv_shift),%ebx |
| addl %edi,%ecx |
| addl %edi,%edx |
| addl %edi,%ebx |
| |
| // d_xdenom = (index0->v[1]-index1->v[1]) * |
| // (index0->v[0]-index2->v[0]) - |
| // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]); |
| movl fv_v+4(%ecx),%eax |
| movl fv_v+0(%ecx),%esi |
| subl fv_v+4(%edx),%eax |
| subl fv_v+0(%ebx),%esi |
| imull %esi,%eax |
| movl fv_v+0(%ecx),%esi |
| movl fv_v+4(%ecx),%edi |
| subl fv_v+0(%edx),%esi |
| subl fv_v+4(%ebx),%edi |
| imull %esi,%edi |
| subl %edi,%eax |
| |
| // if (d_xdenom >= 0) |
| // { |
| // continue; |
| jns LNextTri |
| |
| // } |
| |
| movl %eax,C(d_xdenom) |
| fildl C(d_xdenom) |
| |
| // r_p0[0] = index0->v[0]; // u |
| // r_p0[1] = index0->v[1]; // v |
| // r_p0[2] = index0->v[2]; // s |
| // r_p0[3] = index0->v[3]; // t |
| // r_p0[4] = index0->v[4]; // light |
| // r_p0[5] = index0->v[5]; // iz |
| movl fv_v+0(%ecx),%eax |
| movl fv_v+4(%ecx),%esi |
| movl %eax,C(r_p0)+0 |
| movl %esi,C(r_p0)+4 |
| movl fv_v+8(%ecx),%eax |
| movl fv_v+12(%ecx),%esi |
| movl %eax,C(r_p0)+8 |
| movl %esi,C(r_p0)+12 |
| movl fv_v+16(%ecx),%eax |
| movl fv_v+20(%ecx),%esi |
| movl %eax,C(r_p0)+16 |
| movl %esi,C(r_p0)+20 |
| |
| fdivrs float_1 |
| |
| // r_p1[0] = index1->v[0]; |
| // r_p1[1] = index1->v[1]; |
| // r_p1[2] = index1->v[2]; |
| // r_p1[3] = index1->v[3]; |
| // r_p1[4] = index1->v[4]; |
| // r_p1[5] = index1->v[5]; |
| movl fv_v+0(%edx),%eax |
| movl fv_v+4(%edx),%esi |
| movl %eax,C(r_p1)+0 |
| movl %esi,C(r_p1)+4 |
| movl fv_v+8(%edx),%eax |
| movl fv_v+12(%edx),%esi |
| movl %eax,C(r_p1)+8 |
| movl %esi,C(r_p1)+12 |
| movl fv_v+16(%edx),%eax |
| movl fv_v+20(%edx),%esi |
| movl %eax,C(r_p1)+16 |
| movl %esi,C(r_p1)+20 |
| |
| // r_p2[0] = index2->v[0]; |
| // r_p2[1] = index2->v[1]; |
| // r_p2[2] = index2->v[2]; |
| // r_p2[3] = index2->v[3]; |
| // r_p2[4] = index2->v[4]; |
| // r_p2[5] = index2->v[5]; |
| movl fv_v+0(%ebx),%eax |
| movl fv_v+4(%ebx),%esi |
| movl %eax,C(r_p2)+0 |
| movl %esi,C(r_p2)+4 |
| movl fv_v+8(%ebx),%eax |
| movl fv_v+12(%ebx),%esi |
| movl %eax,C(r_p2)+8 |
| movl %esi,C(r_p2)+12 |
| movl fv_v+16(%ebx),%eax |
| movl fv_v+20(%ebx),%esi |
| movl %eax,C(r_p2)+16 |
| movl C(r_affinetridesc)+atd_ptriangles,%edi |
| movl %esi,C(r_p2)+20 |
| movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax |
| |
| // if (!ptri->facesfront) |
| // { |
| testl %eax,%eax |
| jnz LFacesFront |
| |
| // if (index0->flags & ALIAS_ONSEAM) |
| // r_p0[2] += r_affinetridesc.seamfixupX16; |
| movl fv_flags(%ecx),%eax |
| movl fv_flags(%edx),%esi |
| movl fv_flags(%ebx),%edi |
| testl $(ALIAS_ONSEAM),%eax |
| movl C(r_affinetridesc)+atd_seamfixupX16,%eax |
| jz LOnseamDone0 |
| addl %eax,C(r_p0)+8 |
| LOnseamDone0: |
| |
| // if (index1->flags & ALIAS_ONSEAM) |
| // r_p1[2] += r_affinetridesc.seamfixupX16; |
| testl $(ALIAS_ONSEAM),%esi |
| jz LOnseamDone1 |
| addl %eax,C(r_p1)+8 |
| LOnseamDone1: |
| |
| // if (index2->flags & ALIAS_ONSEAM) |
| // r_p2[2] += r_affinetridesc.seamfixupX16; |
| testl $(ALIAS_ONSEAM),%edi |
| jz LOnseamDone2 |
| addl %eax,C(r_p2)+8 |
| LOnseamDone2: |
| |
| // } |
| |
| LFacesFront: |
| |
| fstps C(d_xdenom) |
| |
| // D_PolysetSetEdgeTable (); |
| // D_RasterizeAliasPolySmooth (); |
| call C(D_PolysetSetEdgeTable) |
| call C(D_RasterizeAliasPolySmooth) |
| |
| LNextTri: |
| movl C(r_affinetridesc)+atd_ptriangles,%esi |
| subl $16,%ebp |
| jnz LNDLoop |
| // } |
| |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| |
| addl $(SPAN_SIZE),%esp |
| |
| ret |
| |
| |
| #endif // id386 |
| |