blob: f10cbe61eb2dba8890c6bf601d1cc97381dc6f07 [file] [log] [blame]
/*
* Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* FUNCTION
* Internal functions for mlib_ImageConv2x2 on U8/S16/U16 types
* and MLIB_EDGE_DST_NO_WRITE mask.
*/
#include "mlib_image.h"
#include "mlib_ImageConv.h"
#include "mlib_c_ImageConv.h"
/***************************************************************/
#ifdef i386 /* do not copy by mlib_d64 data type for x86 */
typedef struct {
mlib_s32 int0, int1;
} two_int;
#define TYPE_64BIT two_int
#else /* i386 */
#define TYPE_64BIT mlib_d64
#endif /* i386 ( do not copy by mlib_d64 data type for x86 ) */
/***************************************************************/
#define LOAD_KERNEL_INTO_DOUBLE() \
while (scalef_expon > 30) { \
scalef /= (1 << 30); \
scalef_expon -= 30; \
} \
\
scalef /= (1 << scalef_expon); \
\
/* keep kernel in regs */ \
k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \
k3 = scalef * kern[3]
/***************************************************************/
#define GET_SRC_DST_PARAMETERS(type) \
hgt = mlib_ImageGetHeight(src); \
wid = mlib_ImageGetWidth(src); \
nchannel = mlib_ImageGetChannels(src); \
sll = mlib_ImageGetStride(src) / sizeof(type); \
dll = mlib_ImageGetStride(dst) / sizeof(type); \
adr_src = (type *)mlib_ImageGetData(src); \
adr_dst = (type *)mlib_ImageGetData(dst)
/***************************************************************/
#ifndef MLIB_USE_FTOI_CLAMPING
#define CLAMP_S32(x) \
(((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : \
(((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
#else
#define CLAMP_S32(x) ((mlib_s32)(x))
#endif /* MLIB_USE_FTOI_CLAMPING */
/***************************************************************/
#if defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG)
/* NB: Explicit cast to DTYPE is necessary to avoid warning from Microsoft VC compiler.
And we need to explicitly define cast behavior if source exceeds destination range.
(it is undefined according to C99 spec). We use mask here because this macro is typically
used to extract bit regions. */
#define STORE2(res0, res1) \
dp[0 ] = (DTYPE) ((res1) & DTYPE_MASK); \
dp[chan1] = (DTYPE) ((res0) & DTYPE_MASK)
#else
#define STORE2(res0, res1) \
dp[0 ] = (DTYPE) ((res0) & DTYPE_MASK); \
dp[chan1] = (DTYPE) ((res1) & DTYPE_MASK)
#endif /* defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) */
/***************************************************************/
#ifdef _NO_LONGLONG
#define LOAD_BUFF(buff) \
buff[i ] = sp[0]; \
buff[i + 1] = sp[chan1]
#else /* _NO_LONGLONG */
#ifdef _LITTLE_ENDIAN
#define LOAD_BUFF(buff) \
*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | ((mlib_s64)sp[0] & 0xffffffff)
#else /* _LITTLE_ENDIAN */
#define LOAD_BUFF(buff) \
*(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | ((mlib_s64)sp[chan1] & 0xffffffff)
#endif /* _LITTLE_ENDIAN */
#endif /* _NO_LONGLONG */
/***************************************************************/
typedef union {
TYPE_64BIT d64;
struct {
mlib_s32 i0, i1;
} i32s;
} d64_2x32;
/***************************************************************/
#define D_KER 1
#define BUFF_LINE 256
/***************************************************************/
#define XOR_80(x) x ^= 0x80
void mlib_ImageXor80_aa(mlib_u8 *dl,
mlib_s32 wid,
mlib_s32 hgt,
mlib_s32 str)
{
mlib_u8 *dp, *dend;
#ifdef _NO_LONGLONG
mlib_u32 cadd = 0x80808080;
#else /* _NO_LONGLONG */
mlib_u64 cadd = MLIB_U64_CONST(0x8080808080808080);
#endif /* _NO_LONGLONG */
mlib_s32 j;
if (wid == str) {
wid *= hgt;
hgt = 1;
}
for (j = 0; j < hgt; j++) {
dend = dl + wid;
for (dp = dl; ((mlib_addr)dp & 7) && (dp < dend); dp++) XOR_80(dp[0]);
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (; dp <= (dend - 8); dp += 8) {
#ifdef _NO_LONGLONG
*((mlib_s32*)dp) ^= cadd;
*((mlib_s32*)dp+1) ^= cadd;
#else /* _NO_LONGLONG */
*((mlib_u64*)dp) ^= cadd;
#endif /* _NO_LONGLONG */
}
for (; (dp < dend); dp++) XOR_80(dp[0]);
dl += str;
}
}
/***************************************************************/
void mlib_ImageXor80(mlib_u8 *dl,
mlib_s32 wid,
mlib_s32 hgt,
mlib_s32 str,
mlib_s32 nchan,
mlib_s32 cmask)
{
mlib_s32 i, j, c;
for (j = 0; j < hgt; j++) {
for (c = 0; c < nchan; c++) {
if (cmask & (1 << (nchan - 1 - c))) {
mlib_u8 *dp = dl + c;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < wid; i++) XOR_80(dp[i*nchan]);
}
}
dl += str;
}
}
/***************************************************************/
#define DTYPE mlib_s16
#define DTYPE_MASK 0xffff
mlib_status mlib_c_conv2x2nw_s16(mlib_image *dst,
const mlib_image *src,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[2*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
wid1 = (wid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
chan1 = nchannel;
chan2 = chan1 + chan1;
wid -= D_KER;
hgt -= D_KER;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
sl1 = sl + sll;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < wid + D_KER; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
sl += (D_KER + 1)*sll;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
STORE2(o64_1 >> 16, o64_2 >> 16);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
STORE2(o64 >> 48, o64 >> 16);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
dp[0] = buffo[i] >> 16;
sp += chan1;
dp += chan1;
}
sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status mlib_c_conv2x2ext_s16(mlib_image *dst,
const mlib_image *src,
mlib_s32 dx_l,
mlib_s32 dx_r,
mlib_s32 dy_t,
mlib_s32 dy_b,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[2*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c, swid;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
swid = wid + D_KER;
wid1 = (swid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
swid -= dx_r;
chan1 = nchannel;
chan2 = chan1 + chan1;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
if ((hgt - dy_b) > 0) sl1 = sl + sll;
else sl1 = sl;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < swid; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
if (dx_r != 0) {
buff0[swid - 1] = buff0[swid - 2];
buff1[swid - 1] = buff1[swid - 2];
}
if ((hgt - dy_b) > 1) sl = sl1 + sll;
else sl = sl1;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
STORE2(o64_1 >> 16, o64_2 >> 16);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
STORE2(o64 >> 48, o64 >> 16);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
dp[0] = buffo[i] >> 16;
sp += chan1;
dp += chan1;
}
if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
if (j < hgt - dy_b - 2) sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/
#undef DTYPE
#define DTYPE mlib_u16
mlib_status mlib_c_conv2x2nw_u16(mlib_image *dst,
const mlib_image *src,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[2*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c;
mlib_d64 doff = 0x7FFF8000;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
wid1 = (wid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
chan1 = nchannel;
chan2 = chan1 + chan1;
wid -= D_KER;
hgt -= D_KER;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
sl1 = sl + sll;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < wid + D_KER; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
sl += (D_KER + 1)*sll;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
o64_1 = o64_1 ^ 0x80000000U;
o64_2 = o64_2 ^ 0x80000000U;
STORE2(o64_1 >> 16, o64_2 >> 16);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
STORE2(o64 >> 48, o64 >> 16);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
dp[0] = (buffo[i] >> 16) ^ 0x8000;
sp += chan1;
dp += chan1;
}
sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status mlib_c_conv2x2ext_u16(mlib_image *dst,
const mlib_image *src,
mlib_s32 dx_l,
mlib_s32 dx_r,
mlib_s32 dy_t,
mlib_s32 dy_b,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[2*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c, swid;
mlib_d64 doff = 0x7FFF8000;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
swid = wid + D_KER;
wid1 = (swid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
swid -= dx_r;
chan1 = nchannel;
chan2 = chan1 + chan1;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
if ((hgt - dy_b) > 0) sl1 = sl + sll;
else sl1 = sl;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < swid; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
if (dx_r != 0) {
buff0[swid - 1] = buff0[swid - 2];
buff1[swid - 1] = buff1[swid - 2];
}
if ((hgt - dy_b) > 1) sl = sl1 + sll;
else sl = sl1;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
o64_1 = o64_1 ^ 0x80000000U;
o64_2 = o64_2 ^ 0x80000000U;
STORE2(o64_1 >> 16, o64_2 >> 16);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
STORE2(o64 >> 48, o64 >> 16);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
dp[0] = (buffo[i] >> 16) ^ 0x8000;
sp += chan1;
dp += chan1;
}
if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
if (j < hgt - dy_b - 2) sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/
#undef DTYPE
#define DTYPE mlib_u8
mlib_status mlib_c_conv2x2nw_u8(mlib_image *dst,
const mlib_image *src,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[2*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
wid1 = (wid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
chan1 = nchannel;
chan2 = chan1 + chan1;
wid -= D_KER;
hgt -= D_KER;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
sl1 = sl + sll;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < wid + D_KER; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
sl += (D_KER + 1)*sll;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
STORE2(o64_1 >> 24, o64_2 >> 24);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
STORE2(o64 >> 56, o64 >> 24);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
dp[0] = (buffo[i] >> 24);
sp += chan1;
dp += chan1;
}
sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
{
mlib_s32 amask = (1 << nchannel) - 1;
if ((cmask & amask) != amask) {
mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
} else {
mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/
mlib_status mlib_c_conv2x2ext_u8(mlib_image *dst,
const mlib_image *src,
mlib_s32 dx_l,
mlib_s32 dx_r,
mlib_s32 dy_t,
mlib_s32 dy_b,
const mlib_s32 *kern,
mlib_s32 scalef_expon,
mlib_s32 cmask)
{
mlib_d64 buff_arr[4*BUFF_LINE];
mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
DTYPE *adr_src, *sl, *sp, *sl1;
DTYPE *adr_dst, *dl, *dp;
mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
mlib_d64 p00, p01, p02,
p10, p11, p12;
mlib_s32 wid, hgt, sll, dll, wid1;
mlib_s32 nchannel, chan1, chan2;
mlib_s32 i, j, c, swid;
LOAD_KERNEL_INTO_DOUBLE();
GET_SRC_DST_PARAMETERS(DTYPE);
swid = wid + D_KER;
wid1 = (swid + 1) &~ 1;
if (wid1 > BUFF_LINE) {
pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
if (pbuff == NULL) return MLIB_FAILURE;
}
buffo = pbuff;
buff0 = buffo + wid1;
buff1 = buff0 + wid1;
buff2 = buff1 + wid1;
chan1 = nchannel;
chan2 = chan1 + chan1;
swid -= dx_r;
for (c = 0; c < nchannel; c++) {
if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
sl = adr_src + c;
dl = adr_dst + c;
if ((hgt - dy_b) > 0) sl1 = sl + sll;
else sl1 = sl;
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i < swid; i++) {
buff0[i - 1] = (mlib_s32)sl[i*chan1];
buff1[i - 1] = (mlib_s32)sl1[i*chan1];
}
if (dx_r != 0) {
buff0[swid - 1] = buff0[swid - 2];
buff1[swid - 1] = buff1[swid - 2];
}
if ((hgt - dy_b) > 1) sl = sl1 + sll;
else sl = sl1;
for (j = 0; j < hgt; j++) {
sp = sl;
dp = dl;
buff2[-1] = (mlib_s32)sp[0];
sp += chan1;
p02 = buff0[-1];
p12 = buff1[-1];
#ifdef __SUNPRO_C
#pragma pipeloop(0)
#endif /* __SUNPRO_C */
for (i = 0; i <= (wid - 2); i += 2) {
#ifdef _NO_LONGLONG
mlib_s32 o64_1, o64_2;
#else /* _NO_LONGLONG */
mlib_s64 o64;
#endif /* _NO_LONGLONG */
d64_2x32 sd0, sd1, dd;
p00 = p02; p10 = p12;
sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
p01 = (mlib_d64)sd0.i32s.i0;
p02 = (mlib_d64)sd0.i32s.i1;
p11 = (mlib_d64)sd1.i32s.i0;
p12 = (mlib_d64)sd1.i32s.i1;
LOAD_BUFF(buff2);
dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
*(TYPE_64BIT*)(buffo + i) = dd.d64;
#ifdef _NO_LONGLONG
o64_1 = buffo[i];
o64_2 = buffo[i+1];
STORE2(o64_1 >> 24, o64_2 >> 24);
#else /* _NO_LONGLONG */
o64 = *(mlib_s64*)(buffo + i);
STORE2(o64 >> 56, o64 >> 24);
#endif /* _NO_LONGLONG */
sp += chan2;
dp += chan2;
}
for (; i < wid; i++) {
p00 = buff0[i - 1]; p10 = buff1[i - 1];
p01 = buff0[i]; p11 = buff1[i];
buff2[i] = (mlib_s32)sp[0];
buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
dp[0] = (buffo[i] >> 24);
sp += chan1;
dp += chan1;
}
if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
if (j < hgt - dy_b - 2) sl += sll;
dl += dll;
buffT = buff0;
buff0 = buff1;
buff1 = buff2;
buff2 = buffT;
}
}
{
mlib_s32 amask = (1 << nchannel) - 1;
if ((cmask & amask) != amask) {
mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
} else {
mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
}
}
if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
return MLIB_SUCCESS;
}
/***************************************************************/