blob: 5ed853fc5fd978ae7da4605cb7d42fddf4d3ceda [file] [log] [blame]
/*
* Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include <vis_proto.h>
#include "java2d_Mlib.h"
/*#define USE_TWO_BC_TABLES*/ /* a little more precise, but slow on Ultra-III */
/***************************************************************/
#define MUL_16x16(src1, src2) \
vis_fpadd16(vis_fmul8sux16((src1), (src2)), \
vis_fmul8ulx16((src1), (src2)))
#define BILINEAR \
xf = vis_fand(xf, mask7fff); \
yf = vis_fand(yf, mask7fff); \
xr = vis_fpsub32(mask7fff, xf); \
yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf)); \
yf1 = vis_fmul8x16au(mask80, vis_read_lo(yf)); \
\
a0 = vis_fmul8x16au(vis_read_hi(a01), vis_read_hi(xr)); \
a1 = vis_fmul8x16au(vis_read_lo(a01), vis_read_hi(xf)); \
a2 = vis_fmul8x16au(vis_read_hi(a23), vis_read_hi(xr)); \
a3 = vis_fmul8x16au(vis_read_lo(a23), vis_read_hi(xf)); \
a0 = vis_fpadd16(a0, a1); \
a2 = vis_fpadd16(a2, a3); \
a2 = vis_fpsub16(a2, a0); \
a2 = MUL_16x16(a2, yf0); \
a0 = vis_fmul8x16(mask40, a0); \
a0 = vis_fpadd16(a0, a2); \
a0 = vis_fpadd16(a0, d_rnd); \
\
b0 = vis_fmul8x16au(vis_read_hi(b01), vis_read_lo(xr)); \
b1 = vis_fmul8x16au(vis_read_lo(b01), vis_read_lo(xf)); \
b2 = vis_fmul8x16au(vis_read_hi(b23), vis_read_lo(xr)); \
b3 = vis_fmul8x16au(vis_read_lo(b23), vis_read_lo(xf)); \
b0 = vis_fpadd16(b0, b1); \
b2 = vis_fpadd16(b2, b3); \
b2 = vis_fpsub16(b2, b0); \
b2 = MUL_16x16(b2, yf1); \
b0 = vis_fmul8x16(mask40, b0); \
b0 = vis_fpadd16(b0, b2); \
b0 = vis_fpadd16(b0, d_rnd); \
\
xf = vis_fpadd32(xf, dx); \
yf = vis_fpadd32(yf, dy)
void
vis_BilinearBlend(jint *pRGB, jint numpix,
jint xfract, jint dxfract,
jint yfract, jint dyfract)
{
mlib_d64 *p_src = (void*)pRGB;
mlib_f32 *p_dst = (void*)pRGB;
mlib_d64 a01, a23, a0, a1, a2, a3;
mlib_d64 b01, b23, b0, b1, b2, b3;
mlib_d64 xf, xr, dx, yf, yf0, yf1, dy;
mlib_d64 mask7fff, d_rnd;
mlib_f32 mask80, mask40;
mlib_s32 i;
vis_write_gsr(2 << 3);
xf = vis_to_double(xfract >> 1, (xfract + dxfract) >> 1);
yf = vis_to_double(yfract >> 1, (yfract + dyfract) >> 1);
dx = vis_to_double_dup(dxfract);
dy = vis_to_double_dup(dyfract);
mask7fff = vis_to_double_dup(0x7fffffff);
d_rnd = vis_to_double_dup(0x00100010);
mask80 = vis_to_float(0x80808080);
mask40 = vis_to_float(0x40404040);
#pragma pipeloop(0)
for (i = 0; i < numpix/2; i++) {
a01 = p_src[0];
a23 = p_src[1];
b01 = p_src[2];
b23 = p_src[3];
p_src += 4;
BILINEAR;
((mlib_d64*)p_dst)[0] = vis_fpack16_pair(a0, b0);
p_dst += 2;
}
if (numpix & 1) {
a01 = p_src[0];
a23 = p_src[1];
BILINEAR;
p_dst[0] = vis_fpack16(a0);
}
}
/***************************************************************/
static jboolean vis_bicubic_table_inited = 0;
static mlib_d64 vis_bicubic_coeff[256 + 1];
#ifdef USE_TWO_BC_TABLES
static mlib_d64 vis_bicubic_coeff2[512 + 1];
#endif
/*
* REMIND: The following formulas are designed to give smooth
* results when 'A' is -0.5 or -1.0.
*/
static void
init_vis_bicubic_table(jdouble A)
{
mlib_s16 *p_tbl = (void*)vis_bicubic_coeff;
#ifdef USE_TWO_BC_TABLES
mlib_s16 *p_tbl2 = (void*)vis_bicubic_coeff2;
#endif
mlib_d64 x, y;
int i;
for (i = 0; i <= 256; i++) {
x = i*(1.0/256.0);
/* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
y = ((A+2)*x - (A+3))*x*x + 1;
y *= 16384;
p_tbl[4*i + 1] = p_tbl[4*(256 - i) + 2] = (mlib_s16)y;
#ifdef USE_TWO_BC_TABLES
y *= 2;
if (y >= 32767) y = 32767;
p_tbl2[4*i] = p_tbl2[4*i + 1] =
p_tbl2[4*i + 2] = p_tbl2[4*i + 3] = (mlib_s16)y;
#endif
/* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
x += 1.0;
y = ((A*x - 5*A)*x + 8*A)*x - 4*A;
y *= 16384;
p_tbl[4*i] = p_tbl[4*(256 - i) + 3] = (mlib_s16)y;
#ifdef USE_TWO_BC_TABLES
y *= 2;
if (y >= 32767) y = 32767;
p_tbl2[4*i + 1024] = p_tbl2[4*i + 1025] =
p_tbl2[4*i + 1026] = p_tbl2[4*i + 1027] = (mlib_s16)y;
#endif
}
vis_bicubic_table_inited = 1;
}
/***************************************************************/
#define MUL_BC_COEFF(x0, x1, coeff) \
vis_fpadd16(vis_fmul8x16au(x0, coeff), vis_fmul8x16al(x1, coeff))
#define SAT(val, max) \
do { \
val -= max; /* only overflows are now positive */ \
val &= (val >> 31); /* positives become 0 */ \
val += max; /* range is now [0 -> max] */ \
} while (0)
void
vis_BicubicBlend(jint *pRGB, jint numpix,
jint xfract, jint dxfract,
jint yfract, jint dyfract)
{
mlib_d64 *p_src = (void*)pRGB;
union {
jint theInt;
mlib_f32 theF32;
} p_dst;
mlib_d64 a0, a1, a2, a3, a4, a5, a6, a7;
mlib_d64 xf, yf, yf0, yf1, yf2, yf3;
mlib_d64 d_rnd;
mlib_f32 mask80;
mlib_s32 i;
if (!vis_bicubic_table_inited) {
init_vis_bicubic_table(-0.5);
}
#ifdef USE_TWO_BC_TABLES
vis_write_gsr(2 << 3);
d_rnd = vis_to_double_dup(0x000f000f);
#else
vis_write_gsr(4 << 3);
d_rnd = vis_to_double_dup(0x00030003);
#endif
mask80 = vis_to_float(0x80808080);
#pragma pipeloop(0)
for (i = 0; i < numpix; i++) {
jint xfactor, yfactor;
xfactor = URShift(xfract, 32-8);
xfract += dxfract;
xf = vis_bicubic_coeff[xfactor];
a0 = p_src[0];
a1 = p_src[1];
a2 = p_src[2];
a3 = p_src[3];
a4 = p_src[4];
a5 = p_src[5];
a6 = p_src[6];
a7 = p_src[7];
p_src += 8;
a0 = MUL_BC_COEFF(vis_read_hi(a0), vis_read_lo(a0), vis_read_hi(xf));
a1 = MUL_BC_COEFF(vis_read_hi(a1), vis_read_lo(a1), vis_read_lo(xf));
a2 = MUL_BC_COEFF(vis_read_hi(a2), vis_read_lo(a2), vis_read_hi(xf));
a3 = MUL_BC_COEFF(vis_read_hi(a3), vis_read_lo(a3), vis_read_lo(xf));
a4 = MUL_BC_COEFF(vis_read_hi(a4), vis_read_lo(a4), vis_read_hi(xf));
a5 = MUL_BC_COEFF(vis_read_hi(a5), vis_read_lo(a5), vis_read_lo(xf));
a6 = MUL_BC_COEFF(vis_read_hi(a6), vis_read_lo(a6), vis_read_hi(xf));
a7 = MUL_BC_COEFF(vis_read_hi(a7), vis_read_lo(a7), vis_read_lo(xf));
a0 = vis_fpadd16(a0, a1);
a1 = vis_fpadd16(a2, a3);
a2 = vis_fpadd16(a4, a5);
a3 = vis_fpadd16(a6, a7);
yfactor = URShift(yfract, 32-8);
yfract += dyfract;
#ifdef USE_TWO_BC_TABLES
yf0 = vis_bicubic_coeff2[256 + yfactor];
yf1 = vis_bicubic_coeff2[yfactor];
yf2 = vis_bicubic_coeff2[256 - yfactor];
yf3 = vis_bicubic_coeff2[512 - yfactor];
#else
yf = vis_bicubic_coeff[yfactor];
yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf));
yf1 = vis_fmul8x16al(mask80, vis_read_hi(yf));
yf2 = vis_fmul8x16au(mask80, vis_read_lo(yf));
yf3 = vis_fmul8x16al(mask80, vis_read_lo(yf));
#endif
a0 = MUL_16x16(a0, yf0);
a1 = MUL_16x16(a1, yf1);
a2 = MUL_16x16(a2, yf2);
a3 = MUL_16x16(a3, yf3);
a0 = vis_fpadd16(a0, d_rnd);
a0 = vis_fpadd16(vis_fpadd16(a0, a1), vis_fpadd16(a2, a3));
p_dst.theF32 = vis_fpack16(a0);
{
int a, r, g, b;
b = p_dst.theInt;
a = (b >> 24) & 0xff;
r = (b >> 16) & 0xff;
g = (b >> 8) & 0xff;
b = (b ) & 0xff;
SAT(r, a);
SAT(g, a);
SAT(b, a);
*pRGB++ = ((a << 24) | (r << 16) | (g << 8) | (b));
}
}
}
/***************************************************************/