blob: 0b77679d235fc415e4022b52118d7d2a81d28e9d [file] [log] [blame]
/*
* Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* FUNCTION
* Image affine transformation with Bicubic filtering
* SYNOPSIS
* mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
* mlib_s32 *rightEdges,
* mlib_s32 *xStarts,
* mlib_s32 *yStarts,
* mlib_s32 *sides,
* mlib_u8 *dstData,
* mlib_u8 **lineAddr,
* mlib_s32 dstYStride,
* mlib_s32 is_affine,
* mlib_s32 srcYStride,
* mlib_filter filter)
*
*
* ARGUMENTS
* leftEdges array[dstHeight] of xLeft coordinates
* RightEdges array[dstHeight] of xRight coordinates
* xStarts array[dstHeight] of xStart * 65536 coordinates
* yStarts array[dstHeight] of yStart * 65536 coordinates
* sides output array[4]. sides[0] is yStart, sides[1] is yFinish,
* sides[2] is dx * 65536, sides[3] is dy * 65536
* dstData pointer to the first pixel on (yStart - 1) line
* lineAddr array[srcHeight] of pointers to the first pixel on
* the corresponding lines
* dstYStride stride of destination image
* is_affine indicator (Affine - GridWarp)
* srcYStride stride of source image
* filter type of resampling filter
*
* DESCRIPTION
* The functions step along the lines from xLeft to xRight and apply
* the bicubic filtering.
*
*/
#include "mlib_ImageAffine.h"
#define DTYPE mlib_u8
#define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
#define FILTER_BITS 8
/***************************************************************/
#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
#undef FILTER_ELEM_BITS
#define FILTER_ELEM_BITS 4
#ifdef MLIB_USE_FTOI_CLAMPING
#define SAT8(DST) \
DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80
#else
#define SAT8(DST) \
val0 -= sat; \
if (val0 >= MLIB_S32_MAX) \
DST = MLIB_U8_MAX; \
else if (val0 <= MLIB_S32_MIN) \
DST = MLIB_U8_MIN; \
else \
DST = ((mlib_s32)val0 >> 24) ^ 0x80
#endif /* MLIB_USE_FTOI_CLAMPING */
/***************************************************************/
mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
mlib_d64 sat = (mlib_d64) 0x7F800000;
const mlib_f32 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = mlib_filters_u8f_bc;
}
else {
mlib_filters_table = mlib_filters_u8f_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_d64 xf0, xf1, xf2, xf3;
mlib_d64 yf0, yf1, yf2, yf3;
mlib_d64 c0, c1, c2, c3, val0;
mlib_s32 filterpos;
mlib_f32 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(1);
dstLineEnd = (DTYPE *) dstData + xRight;
filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X >> MLIB_SHIFT) - 1;
ySrc = (Y >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[1];
s2 = srcPixelPtr[2];
s3 = srcPixelPtr[3];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
X += dX;
Y += dY;
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
SAT8(dstPixelPtr[0]);
xSrc = (X >> MLIB_SHIFT) - 1;
ySrc = (Y >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[1];
s2 = srcPixelPtr[2];
s3 = srcPixelPtr[3];
}
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
SAT8(dstPixelPtr[0]);
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
mlib_d64 sat = (mlib_d64) 0x7F800000;
const mlib_f32 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = mlib_filters_u8f_bc;
}
else {
mlib_filters_table = mlib_filters_u8f_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_d64 xf0, xf1, xf2, xf3;
mlib_d64 yf0, yf1, yf2, yf3;
mlib_d64 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_f32 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(2);
dstLineEnd = (DTYPE *) dstData + 2 * xRight;
for (k = 0; k < 2; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[2];
s2 = srcPixelPtr[4];
s3 = srcPixelPtr[6];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
X1 += dX;
Y1 += dY;
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
SAT8(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[2];
s2 = srcPixelPtr[4];
s3 = srcPixelPtr[6];
}
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
SAT8(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
mlib_d64 sat = (mlib_d64) 0x7F800000;
const mlib_f32 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = mlib_filters_u8f_bc;
}
else {
mlib_filters_table = mlib_filters_u8f_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_d64 xf0, xf1, xf2, xf3;
mlib_d64 yf0, yf1, yf2, yf3;
mlib_d64 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_f32 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(3);
dstLineEnd = (DTYPE *) dstData + 3 * xRight;
for (k = 0; k < 3; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[3];
s2 = srcPixelPtr[6];
s3 = srcPixelPtr[9];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
X1 += dX;
Y1 += dY;
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
SAT8(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[3];
s2 = srcPixelPtr[6];
s3 = srcPixelPtr[9];
}
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
SAT8(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
mlib_d64 sat = (mlib_d64) 0x7F800000;
const mlib_f32 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = mlib_filters_u8f_bc;
}
else {
mlib_filters_table = mlib_filters_u8f_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_d64 xf0, xf1, xf2, xf3;
mlib_d64 yf0, yf1, yf2, yf3;
mlib_d64 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_f32 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(4);
dstLineEnd = (DTYPE *) dstData + 4 * xRight;
for (k = 0; k < 4; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[4];
s2 = srcPixelPtr[8];
s3 = srcPixelPtr[12];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
X1 += dX;
Y1 += dY;
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
SAT8(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[4];
s2 = srcPixelPtr[8];
s3 = srcPixelPtr[12];
}
c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
SAT8(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
#else /* for x86, using integer multiplies is faster */
#define SHIFT_X 12
#define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
#define SHIFT_Y (14 + 14 - SHIFT_X)
#define ROUND_Y (1 << (SHIFT_Y - 1))
/***************************************************************/
/* Test for the presence of any "1" bit in bits
8 to 31 of val. If present, then val is either
negative or >255. If over/underflows of 8 bits
are uncommon, then this technique can be a win,
since only a single test, rather than two, is
necessary to determine if clamping is needed.
On the other hand, if over/underflows are common,
it adds an extra test.
*/
#define S32_TO_U8_SAT(DST) \
if (val0 & 0xffffff00) { \
if (val0 < MLIB_U8_MIN) \
DST = MLIB_U8_MIN; \
else \
DST = MLIB_U8_MAX; \
} else { \
DST = (mlib_u8)val0; \
}
/***************************************************************/
mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
const mlib_s16 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
}
else {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_s32 xf0, xf1, xf2, xf3;
mlib_s32 yf0, yf1, yf2, yf3;
mlib_s32 c0, c1, c2, c3, val0;
mlib_s32 filterpos;
mlib_s16 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(1);
dstLineEnd = (DTYPE *) dstData + xRight;
filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X >> MLIB_SHIFT) - 1;
ySrc = (Y >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[1];
s2 = srcPixelPtr[2];
s3 = srcPixelPtr[3];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
X += dX;
Y += dY;
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
S32_TO_U8_SAT(dstPixelPtr[0]);
xSrc = (X >> MLIB_SHIFT) - 1;
ySrc = (Y >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[1];
s2 = srcPixelPtr[2];
s3 = srcPixelPtr[3];
}
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
S32_TO_U8_SAT(dstPixelPtr[0]);
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
const mlib_s16 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
}
else {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_s32 xf0, xf1, xf2, xf3;
mlib_s32 yf0, yf1, yf2, yf3;
mlib_s32 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_s16 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(2);
dstLineEnd = (DTYPE *) dstData + 2 * xRight;
for (k = 0; k < 2; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[2];
s2 = srcPixelPtr[4];
s3 = srcPixelPtr[6];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
X1 += dX;
Y1 += dY;
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
S32_TO_U8_SAT(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[2];
s2 = srcPixelPtr[4];
s3 = srcPixelPtr[6];
}
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
S32_TO_U8_SAT(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
const mlib_s16 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
}
else {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_s32 xf0, xf1, xf2, xf3;
mlib_s32 yf0, yf1, yf2, yf3;
mlib_s32 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_s16 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(3);
dstLineEnd = (DTYPE *) dstData + 3 * xRight;
for (k = 0; k < 3; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[3];
s2 = srcPixelPtr[6];
s3 = srcPixelPtr[9];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
X1 += dX;
Y1 += dY;
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
S32_TO_U8_SAT(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[3];
s2 = srcPixelPtr[6];
s3 = srcPixelPtr[9];
}
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
S32_TO_U8_SAT(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
{
DECLAREVAR_BC();
DTYPE *dstLineEnd;
const mlib_s16 *mlib_filters_table;
if (filter == MLIB_BICUBIC) {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
}
else {
mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
}
for (j = yStart; j <= yFinish; j++) {
mlib_s32 xf0, xf1, xf2, xf3;
mlib_s32 yf0, yf1, yf2, yf3;
mlib_s32 c0, c1, c2, c3, val0;
mlib_s32 filterpos, k;
mlib_s16 *fptr;
mlib_u8 s0, s1, s2, s3;
CLIP(4);
dstLineEnd = (DTYPE *) dstData + 4 * xRight;
for (k = 0; k < 4; k++) {
mlib_s32 X1 = X;
mlib_s32 Y1 = Y;
DTYPE *dPtr = dstPixelPtr + k;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[4];
s2 = srcPixelPtr[8];
s3 = srcPixelPtr[12];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
X1 += dX;
Y1 += dY;
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
xf0 = fptr[0];
xf1 = fptr[1];
xf2 = fptr[2];
xf3 = fptr[3];
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
yf0 = fptr[0];
yf1 = fptr[1];
yf2 = fptr[2];
yf3 = fptr[3];
S32_TO_U8_SAT(dPtr[0]);
xSrc = (X1 >> MLIB_SHIFT) - 1;
ySrc = (Y1 >> MLIB_SHIFT) - 1;
srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
s0 = srcPixelPtr[0];
s1 = srcPixelPtr[4];
s2 = srcPixelPtr[8];
s3 = srcPixelPtr[12];
}
c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
S32_TO_U8_SAT(dPtr[0]);
}
}
return MLIB_SUCCESS;
}
#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
/***************************************************************/