blob: 13c4a92e1399451b4c35dcde1f68867ed04eef2f [file] [log] [blame]
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2006 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
/* This is the software implementation of the YUV video overlay support */
/* This code was derived from code carrying the following copyright notices:
* Copyright (c) 1995 The Regents of the University of California.
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without written agreement is
* hereby granted, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
* OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
* CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
* Copyright (c) 1995 Erik Corry
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without written agreement is
* hereby granted, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
* SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
* THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
* BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
* UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
* Portions of this software Copyright (c) 1995 Brown University.
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without written agreement
* is hereby granted, provided that the above copyright notice and the
* following two paragraphs appear in all copies of this software.
*
* IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
* OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
* UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
* BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
* SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
#include "SDL_video.h"
#include "SDL_cpuinfo.h"
#include "SDL_stretch_c.h"
#include "SDL_yuvfuncs.h"
#include "SDL_yuv_sw_c.h"
/* The functions used to manipulate software video overlays */
static struct private_yuvhwfuncs sw_yuvfuncs = {
SDL_LockYUV_SW,
SDL_UnlockYUV_SW,
SDL_DisplayYUV_SW,
SDL_FreeYUV_SW
};
/* RGB conversion lookup tables */
struct private_yuvhwdata {
SDL_Surface *stretch;
SDL_Surface *display;
Uint8 *pixels;
int *colortab;
Uint32 *rgb_2_pix;
void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod );
void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod );
/* These are just so we don't have to allocate them separately */
Uint16 pitches[3];
Uint8 *planes[3];
};
/* The colorspace conversion functions */
#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod );
extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod );
#endif
static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned short* row1;
unsigned short* row2;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row1 = (unsigned short*) out;
row2 = row1 + cols + mod;
lum2 = lum + cols;
mod += cols + mod;
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
*row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum++;
*row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
/* Now, do second row. */
L = *lum2++;
*row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum2++;
*row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int value;
unsigned char* row1;
unsigned char* row2;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row1 = out;
row2 = row1 + cols*3 + mod*3;
lum2 = lum + cols;
mod += cols + mod;
mod *= 3;
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row1++ = (value ) & 0xFF;
*row1++ = (value >> 8) & 0xFF;
*row1++ = (value >> 16) & 0xFF;
L = *lum++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row1++ = (value ) & 0xFF;
*row1++ = (value >> 8) & 0xFF;
*row1++ = (value >> 16) & 0xFF;
/* Now, do second row. */
L = *lum2++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row2++ = (value ) & 0xFF;
*row2++ = (value >> 8) & 0xFF;
*row2++ = (value >> 16) & 0xFF;
L = *lum2++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row2++ = (value ) & 0xFF;
*row2++ = (value >> 8) & 0xFF;
*row2++ = (value >> 16) & 0xFF;
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row1;
unsigned int* row2;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row1 = (unsigned int*) out;
row2 = row1 + cols + mod;
lum2 = lum + cols;
mod += cols + mod;
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
*row1++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum++;
*row1++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
/* Now, do second row. */
L = *lum2++;
*row2++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum2++;
*row2++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
/*
* In this function I make use of a nasty trick. The tables have the lower
* 16 bits replicated in the upper 16. This means I can write ints and get
* the horisontal doubling for free (almost).
*/
static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row1 = (unsigned int*) out;
const int next_row = cols+(mod/2);
unsigned int* row2 = row1 + 2*next_row;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
lum2 = lum + cols;
mod = (next_row * 3) + (mod/2);
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1++;
L = *lum++;
row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1++;
/* Now, do second row. */
L = *lum2++;
row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2++;
L = *lum2++;
row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2++;
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int value;
unsigned char* row1 = out;
const int next_row = (cols*2 + mod) * 3;
unsigned char* row2 = row1 + 2*next_row;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
lum2 = lum + cols;
mod = next_row*3 + mod*3;
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
(value ) & 0xFF;
row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
(value >> 8) & 0xFF;
row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
(value >> 16) & 0xFF;
row1 += 2*3;
L = *lum++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
(value ) & 0xFF;
row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
(value >> 8) & 0xFF;
row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
(value >> 16) & 0xFF;
row1 += 2*3;
/* Now, do second row. */
L = *lum2++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
(value ) & 0xFF;
row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
(value >> 8) & 0xFF;
row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
(value >> 16) & 0xFF;
row2 += 2*3;
L = *lum2++;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
(value ) & 0xFF;
row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
(value >> 8) & 0xFF;
row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
(value >> 16) & 0xFF;
row2 += 2*3;
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row1 = (unsigned int*) out;
const int next_row = cols*2+mod;
unsigned int* row2 = row1 + 2*next_row;
unsigned char* lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
lum2 = lum + cols;
mod = (next_row * 3) + mod;
y = rows / 2;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
++cr; ++cb;
L = *lum++;
row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1 += 2;
L = *lum++;
row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row1 += 2;
/* Now, do second row. */
L = *lum2++;
row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2 += 2;
L = *lum2++;
row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row2 += 2;
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum += cols;
lum2 += cols;
row1 += mod;
row2 += mod;
}
}
static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned short* row;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row = (unsigned short*) out;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
*row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum; lum += 2;
*row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
}
row += mod;
}
}
static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int value;
unsigned char* row;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row = (unsigned char*) out;
mod *= 3;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row++ = (value ) & 0xFF;
*row++ = (value >> 8) & 0xFF;
*row++ = (value >> 16) & 0xFF;
L = *lum; lum += 2;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
*row++ = (value ) & 0xFF;
*row++ = (value >> 8) & 0xFF;
*row++ = (value >> 16) & 0xFF;
}
row += mod;
}
}
static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
row = (unsigned int*) out;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
*row++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
L = *lum; lum += 2;
*row++ = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
}
row += mod;
}
}
/*
* In this function I make use of a nasty trick. The tables have the lower
* 16 bits replicated in the upper 16. This means I can write ints and get
* the horisontal doubling for free (almost).
*/
static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row = (unsigned int*) out;
const int next_row = cols+(mod/2);
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row++;
L = *lum; lum += 2;
row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row++;
}
row += next_row;
}
}
static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int value;
unsigned char* row = out;
const int next_row = (cols*2 + mod) * 3;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
(value ) & 0xFF;
row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
(value >> 8) & 0xFF;
row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
(value >> 16) & 0xFF;
row += 2*3;
L = *lum; lum += 2;
value = (rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
(value ) & 0xFF;
row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
(value >> 8) & 0xFF;
row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
(value >> 16) & 0xFF;
row += 2*3;
}
row += next_row;
}
}
static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
unsigned int* row = (unsigned int*) out;
const int next_row = cols*2+mod;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols / 2;
mod+=mod;
y = rows;
while( y-- )
{
x = cols_2;
while( x-- )
{
register int L;
cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
+ colortab[ *cb + 2*256 ];
cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
cr += 4; cb += 4;
L = *lum; lum += 2;
row[0] = row[1] = row[next_row] = row[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row += 2;
L = *lum; lum += 2;
row[0] = row[1] = row[next_row] = row[next_row+1] =
(rgb_2_pix[ L + cr_r ] |
rgb_2_pix[ L + crb_g ] |
rgb_2_pix[ L + cb_b ]);
row += 2;
}
row += next_row;
}
}
/*
* How many 1 bits are there in the Uint32.
* Low performance, do not call often.
*/
static int number_of_bits_set( Uint32 a )
{
if(!a) return 0;
if(a & 1) return 1 + number_of_bits_set(a >> 1);
return(number_of_bits_set(a >> 1));
}
/*
* How many 0 bits are there at least significant end of Uint32.
* Low performance, do not call often.
*/
static int free_bits_at_bottom( Uint32 a )
{
/* assume char is 8 bits */
if(!a) return sizeof(Uint32) * 8;
if(((Sint32)a) & 1l) return 0;
return 1 + free_bits_at_bottom ( a >> 1);
}
SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
{
SDL_Overlay *overlay;
struct private_yuvhwdata *swdata;
int *Cr_r_tab;
int *Cr_g_tab;
int *Cb_g_tab;
int *Cb_b_tab;
Uint32 *r_2_pix_alloc;
Uint32 *g_2_pix_alloc;
Uint32 *b_2_pix_alloc;
int i;
int CR, CB;
Uint32 Rmask, Gmask, Bmask;
/* Only RGB packed pixel conversion supported */
if ( (display->format->BytesPerPixel != 2) &&
(display->format->BytesPerPixel != 3) &&
(display->format->BytesPerPixel != 4) ) {
SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
return(NULL);
}
/* Verify that we support the format */
switch (format) {
case SDL_YV12_OVERLAY:
case SDL_IYUV_OVERLAY:
case SDL_YUY2_OVERLAY:
case SDL_UYVY_OVERLAY:
case SDL_YVYU_OVERLAY:
break;
default:
SDL_SetError("Unsupported YUV format");
return(NULL);
}
/* Create the overlay structure */
overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
if ( overlay == NULL ) {
SDL_OutOfMemory();
return(NULL);
}
SDL_memset(overlay, 0, (sizeof *overlay));
/* Fill in the basic members */
overlay->format = format;
overlay->w = width;
overlay->h = height;
/* Set up the YUV surface function structure */
overlay->hwfuncs = &sw_yuvfuncs;
/* Create the pixel data and lookup tables */
swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
overlay->hwdata = swdata;
if ( swdata == NULL ) {
SDL_OutOfMemory();
SDL_FreeYUVOverlay(overlay);
return(NULL);
}
swdata->stretch = NULL;
swdata->display = display;
swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
Cr_r_tab = &swdata->colortab[0*256];
Cr_g_tab = &swdata->colortab[1*256];
Cb_g_tab = &swdata->colortab[2*256];
Cb_b_tab = &swdata->colortab[3*256];
swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
SDL_OutOfMemory();
SDL_FreeYUVOverlay(overlay);
return(NULL);
}
/* Generate the tables for the display surface */
for (i=0; i<256; i++) {
/* Gamma correction (luminescence table) and chroma correction
would be done here. See the Berkeley mpeg_play sources.
*/
CB = CR = (i-128);
Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
Cb_g_tab[i] = (int) (-(0.114/0.331) * CB);
Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
}
/*
* Set up entries 0-255 in rgb-to-pixel value tables.
*/
Rmask = display->format->Rmask;
Gmask = display->format->Gmask;
Bmask = display->format->Bmask;
for ( i=0; i<256; ++i ) {
r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
}
/*
* If we have 16-bit output depth, then we double the value
* in the top word. This means that we can write out both
* pixels in the pixel doubling mode with one op. It is
* harmless in the normal case as storing a 32-bit value
* through a short pointer will lose the top bits anyway.
*/
if( display->format->BytesPerPixel == 2 ) {
for ( i=0; i<256; ++i ) {
r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
}
}
/*
* Spread out the values we have to the rest of the array so that
* we do not need to check for overflow.
*/
for ( i=0; i<256; ++i ) {
r_2_pix_alloc[i] = r_2_pix_alloc[256];
r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
g_2_pix_alloc[i] = g_2_pix_alloc[256];
g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
b_2_pix_alloc[i] = b_2_pix_alloc[256];
b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
}
/* You have chosen wisely... */
switch (format) {
case SDL_YV12_OVERLAY:
case SDL_IYUV_OVERLAY:
if ( display->format->BytesPerPixel == 2 ) {
#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
/* inline assembly functions */
if ( SDL_HasMMX() && (Rmask == 0xF800) &&
(Gmask == 0x07E0) &&
(Bmask == 0x001F) &&
(width & 15) == 0) {
/*printf("Using MMX 16-bit 565 dither\n");*/
swdata->Display1X = Color565DitherYV12MMX1X;
} else {
/*printf("Using C 16-bit dither\n");*/
swdata->Display1X = Color16DitherYV12Mod1X;
}
#else
swdata->Display1X = Color16DitherYV12Mod1X;
#endif
swdata->Display2X = Color16DitherYV12Mod2X;
}
if ( display->format->BytesPerPixel == 3 ) {
swdata->Display1X = Color24DitherYV12Mod1X;
swdata->Display2X = Color24DitherYV12Mod2X;
}
if ( display->format->BytesPerPixel == 4 ) {
#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
/* inline assembly functions */
if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
(Gmask == 0x0000FF00) &&
(Bmask == 0x000000FF) &&
(width & 15) == 0) {
/*printf("Using MMX 32-bit dither\n");*/
swdata->Display1X = ColorRGBDitherYV12MMX1X;
} else {
/*printf("Using C 32-bit dither\n");*/
swdata->Display1X = Color32DitherYV12Mod1X;
}
#else
swdata->Display1X = Color32DitherYV12Mod1X;
#endif
swdata->Display2X = Color32DitherYV12Mod2X;
}
break;
case SDL_YUY2_OVERLAY:
case SDL_UYVY_OVERLAY:
case SDL_YVYU_OVERLAY:
if ( display->format->BytesPerPixel == 2 ) {
swdata->Display1X = Color16DitherYUY2Mod1X;
swdata->Display2X = Color16DitherYUY2Mod2X;
}
if ( display->format->BytesPerPixel == 3 ) {
swdata->Display1X = Color24DitherYUY2Mod1X;
swdata->Display2X = Color24DitherYUY2Mod2X;
}
if ( display->format->BytesPerPixel == 4 ) {
swdata->Display1X = Color32DitherYUY2Mod1X;
swdata->Display2X = Color32DitherYUY2Mod2X;
}
break;
default:
/* We should never get here (caught above) */
break;
}
/* Find the pitch and offset values for the overlay */
overlay->pitches = swdata->pitches;
overlay->pixels = swdata->planes;
switch (format) {
case SDL_YV12_OVERLAY:
case SDL_IYUV_OVERLAY:
overlay->pitches[0] = overlay->w;
overlay->pitches[1] = overlay->pitches[0] / 2;
overlay->pitches[2] = overlay->pitches[0] / 2;
overlay->pixels[0] = swdata->pixels;
overlay->pixels[1] = overlay->pixels[0] +
overlay->pitches[0] * overlay->h;
overlay->pixels[2] = overlay->pixels[1] +
overlay->pitches[1] * overlay->h / 2;
overlay->planes = 3;
break;
case SDL_YUY2_OVERLAY:
case SDL_UYVY_OVERLAY:
case SDL_YVYU_OVERLAY:
overlay->pitches[0] = overlay->w*2;
overlay->pixels[0] = swdata->pixels;
overlay->planes = 1;
break;
default:
/* We should never get here (caught above) */
break;
}
/* We're all done.. */
return(overlay);
}
int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
{
return(0);
}
void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
{
return;
}
int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
{
struct private_yuvhwdata *swdata;
int stretch;
int scale_2x;
SDL_Surface *display;
Uint8 *lum, *Cr, *Cb;
Uint8 *dstp;
int mod;
swdata = overlay->hwdata;
stretch = 0;
scale_2x = 0;
if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) {
/* The source rectangle has been clipped.
Using a scratch surface is easier than adding clipped
source support to all the blitters, plus that would
slow them down in the general unclipped case.
*/
stretch = 1;
} else if ( (src->w != dst->w) || (src->h != dst->h) ) {
if ( (dst->w == 2*src->w) &&
(dst->h == 2*src->h) ) {
scale_2x = 1;
} else {
stretch = 1;
}
}
if ( stretch ) {
if ( ! swdata->stretch ) {
display = swdata->display;
swdata->stretch = SDL_CreateRGBSurface(
SDL_SWSURFACE,
overlay->w, overlay->h,
display->format->BitsPerPixel,
display->format->Rmask,
display->format->Gmask,
display->format->Bmask, 0);
if ( ! swdata->stretch ) {
return(-1);
}
}
display = swdata->stretch;
} else {
display = swdata->display;
}
switch (overlay->format) {
case SDL_YV12_OVERLAY:
lum = overlay->pixels[0];
Cr = overlay->pixels[1];
Cb = overlay->pixels[2];
break;
case SDL_IYUV_OVERLAY:
lum = overlay->pixels[0];
Cr = overlay->pixels[2];
Cb = overlay->pixels[1];
break;
case SDL_YUY2_OVERLAY:
lum = overlay->pixels[0];
Cr = lum + 3;
Cb = lum + 1;
break;
case SDL_UYVY_OVERLAY:
lum = overlay->pixels[0]+1;
Cr = lum + 1;
Cb = lum - 1;
break;
case SDL_YVYU_OVERLAY:
lum = overlay->pixels[0];
Cr = lum + 1;
Cb = lum + 3;
break;
default:
SDL_SetError("Unsupported YUV format in blit");
return(-1);
}
if ( SDL_MUSTLOCK(display) ) {
if ( SDL_LockSurface(display) < 0 ) {
return(-1);
}
}
if ( stretch ) {
dstp = (Uint8 *)swdata->stretch->pixels;
} else {
dstp = (Uint8 *)display->pixels
+ dst->x * display->format->BytesPerPixel
+ dst->y * display->pitch;
}
mod = (display->pitch / display->format->BytesPerPixel);
if ( scale_2x ) {
mod -= (overlay->w * 2);
swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
} else {
mod -= overlay->w;
swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
}
if ( SDL_MUSTLOCK(display) ) {
SDL_UnlockSurface(display);
}
if ( stretch ) {
display = swdata->display;
SDL_SoftStretch(swdata->stretch, src, display, dst);
}
SDL_UpdateRects(display, 1, dst);
return(0);
}
void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
{
struct private_yuvhwdata *swdata;
swdata = overlay->hwdata;
if ( swdata ) {
if ( swdata->stretch ) {
SDL_FreeSurface(swdata->stretch);
}
if ( swdata->pixels ) {
SDL_free(swdata->pixels);
}
if ( swdata->colortab ) {
SDL_free(swdata->colortab);
}
if ( swdata->rgb_2_pix ) {
SDL_free(swdata->rgb_2_pix);
}
SDL_free(swdata);
}
}