| Fix PIC issues in mmx routines |
| |
| --- video/mmxflags_asm.S |
| +++ video/mmxflags_asm.S |
| @@ -1,11 +1,6 @@ |
| |
| #if defined(i386) && defined(USE_MMX) |
| |
| -.data |
| - .align 16 |
| - .type flags,@object |
| -flags: .long 0 |
| - |
| .text |
| .align 4 |
| .globl cpu_flags |
| @@ -40,16 +35,13 @@ cpu_flags: |
| xorl %ecx,%eax |
| je cpu_flags.L1 |
| |
| - pusha |
| + pushl %ebx |
| |
| movl $1,%eax |
| cpuid |
| + movl %edx,%eax |
| |
| - movl %edx,flags |
| - |
| - popa |
| - |
| - movl flags,%eax |
| + popl %ebx |
| |
| cpu_flags.L1: |
| ret |
| --- video/mmxidct_asm.S |
| +++ video/mmxidct_asm.S |
| @@ -31,11 +31,6 @@ preSC: .short 16384,22725,21407,19266, |
| x0005000200010001: |
| .long 0x00010001,0x00050002 |
| .align 8 |
| - .type x0040000000000000,@object |
| - .size x0040000000000000,8 |
| -x0040000000000000: |
| - .long 0, 0x00400000 |
| - .align 8 |
| .type x5a825a825a825a82,@object |
| .size x5a825a825a825a82,8 |
| x5a825a825a825a82: |
| @@ -80,8 +75,21 @@ scratch7: |
| x0: |
| .long 0,0 |
| .align 8 |
| + |
| .text |
| .align 4 |
| + |
| +#ifdef __PIC__ |
| +# undef __i686 /* gcc define gets in our way */ |
| +# define MUNG(sym) sym ## @GOTOFF(%ebx) |
| +# define INIT_PIC() \ |
| + call __i686.get_pc_thunk.bx ; \ |
| + addl $_GLOBAL_OFFSET_TABLE_, %ebx |
| +#else |
| +# define MUNG(sym) sym |
| +# define INIT_PIC() |
| +#endif |
| + |
| .globl IDCT_mmx |
| .type IDCT_mmx,@function |
| IDCT_mmx: |
| @@ -92,8 +100,9 @@ IDCT_mmx: |
| pushl %edx |
| pushl %esi |
| pushl %edi |
| + INIT_PIC() |
| movl 8(%ebp),%esi /* source matrix */ |
| - leal preSC, %ecx |
| + leal MUNG(preSC), %ecx |
| /* column 0: even part |
| * use V4, V12, V0, V8 to produce V22..V25 |
| */ |
| @@ -109,7 +118,7 @@ IDCT_mmx: |
| movq %mm1, %mm2 /* added 11/1/96 */ |
| pmulhw 8*8(%esi),%mm5 /* V8 */ |
| psubsw %mm0, %mm1 /* V16 */ |
| - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */ |
| paddsw %mm0, %mm2 /* V17 */ |
| movq %mm2, %mm0 /* duplicate V17 */ |
| psraw $1, %mm2 /* t75=t82 */ |
| @@ -150,7 +159,7 @@ IDCT_mmx: |
| paddsw %mm0, %mm3 /* V29 ; free mm0 */ |
| movq %mm7, %mm1 /* duplicate V26 */ |
| psraw $1, %mm3 /* t91=t94 */ |
| - pmulhw x539f539f539f539f,%mm7 /* V33 */ |
| + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */ |
| psraw $1, %mm1 /* t96 */ |
| movq %mm5, %mm0 /* duplicate V2 */ |
| psraw $2, %mm4 /* t85=t87 */ |
| @@ -158,15 +167,15 @@ IDCT_mmx: |
| psubsw %mm4, %mm0 /* V28 ; free mm4 */ |
| movq %mm0, %mm2 /* duplicate V28 */ |
| psraw $1, %mm5 /* t90=t93 */ |
| - pmulhw x4546454645464546,%mm0 /* V35 */ |
| + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */ |
| psraw $1, %mm2 /* t97 */ |
| movq %mm5, %mm4 /* duplicate t90=t93 */ |
| psubsw %mm2, %mm1 /* V32 ; free mm2 */ |
| - pmulhw x61f861f861f861f8,%mm1 /* V36 */ |
| + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */ |
| psllw $1, %mm7 /* t107 */ |
| paddsw %mm3, %mm5 /* V31 */ |
| psubsw %mm3, %mm4 /* V30 ; free mm3 */ |
| - pmulhw x5a825a825a825a82,%mm4 /* V34 */ |
| + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */ |
| nop |
| psubsw %mm1, %mm0 /* V38 */ |
| psubsw %mm7, %mm1 /* V37 ; free mm7 */ |
| @@ -233,7 +242,7 @@ IDCT_mmx: |
| psubsw %mm7, %mm1 /* V50 */ |
| pmulhw 8*9(%esi), %mm5 /* V9 */ |
| paddsw %mm7, %mm2 /* V51 */ |
| - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */ |
| movq %mm2, %mm6 /* duplicate V51 */ |
| psraw $1, %mm2 /* t138=t144 */ |
| movq %mm3, %mm4 /* duplicate V1 */ |
| @@ -274,11 +283,11 @@ IDCT_mmx: |
| * even more by doing the correction step in a later stage when the number |
| * is actually multiplied by 16 |
| */ |
| - paddw x0005000200010001, %mm4 |
| + paddw MUNG(x0005000200010001), %mm4 |
| psubsw %mm6, %mm3 /* V60 ; free mm6 */ |
| psraw $1, %mm0 /* t154=t156 */ |
| movq %mm3, %mm1 /* duplicate V60 */ |
| - pmulhw x539f539f539f539f, %mm1 /* V67 */ |
| + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */ |
| movq %mm5, %mm6 /* duplicate V3 */ |
| psraw $2, %mm4 /* t148=t150 */ |
| paddsw %mm4, %mm5 /* V61 */ |
| @@ -287,13 +296,13 @@ IDCT_mmx: |
| psllw $1, %mm1 /* t169 */ |
| paddsw %mm0, %mm5 /* V65 -> result */ |
| psubsw %mm0, %mm4 /* V64 ; free mm0 */ |
| - pmulhw x5a825a825a825a82, %mm4 /* V68 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */ |
| psraw $1, %mm3 /* t158 */ |
| psubsw %mm6, %mm3 /* V66 */ |
| movq %mm5, %mm2 /* duplicate V65 */ |
| - pmulhw x61f861f861f861f8, %mm3 /* V70 */ |
| + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */ |
| psllw $1, %mm6 /* t165 */ |
| - pmulhw x4546454645464546, %mm6 /* V69 */ |
| + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */ |
| psraw $1, %mm2 /* t172 */ |
| /* moved from next block */ |
| movq 8*5(%esi), %mm0 /* V56 */ |
| @@ -418,7 +427,7 @@ IDCT_mmx: |
| * movq 8*13(%esi), %mm4 tmt13 |
| */ |
| psubsw %mm4, %mm3 /* V134 */ |
| - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */ |
| movq 8*9(%esi), %mm6 /* tmt9 */ |
| paddsw %mm4, %mm5 /* V135 ; mm4 free */ |
| movq %mm0, %mm4 /* duplicate tmt1 */ |
| @@ -447,17 +456,17 @@ IDCT_mmx: |
| psubsw %mm7, %mm0 /* V144 */ |
| movq %mm0, %mm3 /* duplicate V144 */ |
| paddsw %mm7, %mm2 /* V147 ; free mm7 */ |
| - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ |
| + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */ |
| movq %mm1, %mm7 /* duplicate tmt3 */ |
| paddsw %mm5, %mm7 /* V145 */ |
| psubsw %mm5, %mm1 /* V146 ; free mm5 */ |
| psubsw %mm1, %mm3 /* V150 */ |
| movq %mm7, %mm5 /* duplicate V145 */ |
| - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ |
| + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */ |
| psubsw %mm2, %mm5 /* V148 */ |
| - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ |
| + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */ |
| psllw $2, %mm0 /* t311 */ |
| - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */ |
| paddsw %mm2, %mm7 /* V149 ; free mm2 */ |
| psllw $1, %mm1 /* t313 */ |
| nop /* without the nop - freeze here for one clock */ |
| @@ -483,7 +492,7 @@ IDCT_mmx: |
| paddsw %mm3, %mm6 /* V164 ; free mm3 */ |
| movq %mm4, %mm3 /* duplicate V142 */ |
| psubsw %mm5, %mm4 /* V165 ; free mm5 */ |
| - movq %mm2, scratch7 /* out7 */ |
| + movq %mm2, MUNG(scratch7) /* out7 */ |
| psraw $4, %mm6 |
| psraw $4, %mm4 |
| paddsw %mm5, %mm3 /* V162 */ |
| @@ -494,11 +503,11 @@ IDCT_mmx: |
| */ |
| movq %mm6, 8*9(%esi) /* out9 */ |
| paddsw %mm1, %mm0 /* V161 */ |
| - movq %mm3, scratch5 /* out5 */ |
| + movq %mm3, MUNG(scratch5) /* out5 */ |
| psubsw %mm1, %mm5 /* V166 ; free mm1 */ |
| movq %mm4, 8*11(%esi) /* out11 */ |
| psraw $4, %mm5 |
| - movq %mm0, scratch3 /* out3 */ |
| + movq %mm0, MUNG(scratch3) /* out3 */ |
| movq %mm2, %mm4 /* duplicate V140 */ |
| movq %mm5, 8*13(%esi) /* out13 */ |
| paddsw %mm7, %mm2 /* V160 */ |
| @@ -508,7 +517,7 @@ IDCT_mmx: |
| /* moved from the next block */ |
| movq 8*3(%esi), %mm7 |
| psraw $4, %mm4 |
| - movq %mm2, scratch1 /* out1 */ |
| + movq %mm2, MUNG(scratch1) /* out1 */ |
| /* moved from the next block */ |
| movq %mm0, %mm1 |
| movq %mm4, 8*15(%esi) /* out15 */ |
| @@ -565,15 +574,15 @@ IDCT_mmx: |
| paddsw %mm4, %mm3 /* V113 ; free mm4 */ |
| movq %mm0, %mm4 /* duplicate V110 */ |
| paddsw %mm1, %mm2 /* V111 */ |
| - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ |
| + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */ |
| psubsw %mm1, %mm5 /* V112 ; free mm1 */ |
| psubsw %mm5, %mm4 /* V116 */ |
| movq %mm2, %mm1 /* duplicate V111 */ |
| - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ |
| + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */ |
| psubsw %mm3, %mm2 /* V114 */ |
| - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ |
| + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */ |
| paddsw %mm3, %mm1 /* V115 ; free mm3 */ |
| - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */ |
| psllw $2, %mm0 /* t266 */ |
| movq %mm1, (%esi) /* save V115 */ |
| psllw $1, %mm5 /* t268 */ |
| @@ -591,7 +600,7 @@ IDCT_mmx: |
| movq %mm6, %mm3 /* duplicate tmt4 */ |
| psubsw %mm0, %mm6 /* V100 */ |
| paddsw %mm0, %mm3 /* V101 ; free mm0 */ |
| - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ |
| + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */ |
| movq %mm7, %mm5 /* duplicate tmt0 */ |
| movq 8*8(%esi), %mm1 /* tmt8 */ |
| paddsw %mm1, %mm7 /* V103 */ |
| @@ -625,10 +634,10 @@ IDCT_mmx: |
| movq 8*2(%esi), %mm3 /* V123 */ |
| paddsw %mm4, %mm7 /* out0 */ |
| /* moved up from next block */ |
| - movq scratch3, %mm0 |
| + movq MUNG(scratch3), %mm0 |
| psraw $4, %mm7 |
| /* moved up from next block */ |
| - movq scratch5, %mm6 |
| + movq MUNG(scratch5), %mm6 |
| psubsw %mm4, %mm1 /* out14 ; free mm4 */ |
| paddsw %mm3, %mm5 /* out2 */ |
| psraw $4, %mm1 |
| @@ -639,7 +648,7 @@ IDCT_mmx: |
| movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ |
| psraw $4, %mm2 |
| /* moved up to the prev block */ |
| - movq scratch7, %mm4 |
| + movq MUNG(scratch7), %mm4 |
| /* moved up to the prev block */ |
| psraw $4, %mm0 |
| movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ |
| @@ -647,13 +656,13 @@ IDCT_mmx: |
| psraw $4, %mm6 |
| /* move back the data to its correct place |
| * moved up to the prev block |
| - * movq scratch3, %mm0 |
| - * movq scratch5, %mm6 |
| - * movq scratch7, %mm4 |
| + * movq MUNG(scratch3), %mm0 |
| + * movq MUNG(scratch5), %mm6 |
| + * movq MUNG(scratch7), %mm4 |
| * psraw $4, %mm0 |
| * psraw $4, %mm6 |
| */ |
| - movq scratch1, %mm1 |
| + movq MUNG(scratch1), %mm1 |
| psraw $4, %mm4 |
| movq %mm0, 8*3(%esi) /* out3 */ |
| psraw $4, %mm1 |
| @@ -671,6 +680,15 @@ IDCT_mmx: |
| .Lfe1: |
| .size IDCT_mmx,.Lfe1-IDCT_mmx |
| |
| +#ifdef __PIC__ |
| + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits |
| +.globl __i686.get_pc_thunk.bx |
| + .hidden __i686.get_pc_thunk.bx |
| + .type __i686.get_pc_thunk.bx,@function |
| + __i686.get_pc_thunk.bx: |
| + movl (%esp), %ebx |
| + ret |
| +#endif |
| |
| #endif /* i386 && USE_MMX */ |
| |