| /* |
| * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| |
| |
| /* |
| * The functions step along the lines from xLeft to xRight and apply |
| * the bicubic filtering. |
| * |
| */ |
| |
| #include "vis_proto.h" |
| #include "mlib_ImageAffine.h" |
| #include "mlib_v_ImageFilters.h" |
| |
| /***************************************************************/ |
| #define DTYPE mlib_s16 |
| |
| #define FILTER_BITS 9 |
| |
| /***************************************************************/ |
| #define sPtr srcPixelPtr |
| |
| /***************************************************************/ |
| #define NEXT_PIXEL_1BC_S16() \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc |
| |
| /***************************************************************/ |
| #define LOAD_BC_S16_1CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| row0 = vis_faligndata(data0, data1); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| row1 = vis_faligndata(data0, data1); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| row2 = vis_faligndata(data0, data1); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| row3 = vis_faligndata(data0, data1); \ |
| filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| yFilter0 = yPtr[0]; \ |
| yFilter1 = yPtr[1]; \ |
| yFilter2 = yPtr[2]; \ |
| yFilter3 = yPtr[3]; \ |
| filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ |
| xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ |
| X += dX; \ |
| Y += dY |
| |
| /***************************************************************/ |
| #define RESULT_1BC_S16_1PIXEL() \ |
| u0 = vis_fmul8sux16(row0, yFilter0); \ |
| u1 = vis_fmul8ulx16(row0, yFilter0); \ |
| u2 = vis_fmul8sux16(row1, yFilter1); \ |
| v0 = vis_fpadd16(u0, u1); \ |
| u3 = vis_fmul8ulx16(row1, yFilter1); \ |
| u0 = vis_fmul8sux16(row2, yFilter2); \ |
| v1 = vis_fpadd16(u2, u3); \ |
| u1 = vis_fmul8ulx16(row2, yFilter2); \ |
| sum = vis_fpadd16(v0, v1); \ |
| u2 = vis_fmul8sux16(row3, yFilter3); \ |
| v2 = vis_fpadd16(u0, u1); \ |
| u3 = vis_fmul8ulx16(row3, yFilter3); \ |
| sum = vis_fpadd16(sum, v2); \ |
| v3 = vis_fpadd16(u2, u3); \ |
| sum = vis_fpadd16(sum, v3); \ |
| d00 = vis_fmul8sux16(sum, xFilter); \ |
| d10 = vis_fmul8ulx16(sum, xFilter); \ |
| d0 = vis_fpadd16(d00, d10); \ |
| p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ |
| d0 = vis_fmuld8sux16(f_x01000100, p0); \ |
| d1 = vis_write_lo(d1, vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0))); \ |
| res = vis_fpackfix_pair(d1, d1) |
| |
| /***************************************************************/ |
| #define BC_S16_1CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ |
| u0 = vis_fmul8sux16(row0, yFilter0); \ |
| u1 = vis_fmul8ulx16(row0, yFilter0); \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u2 = vis_fmul8sux16(row1, yFilter1); \ |
| v0 = vis_fpadd16(u0, u1); \ |
| data0 = dpSrc[0]; \ |
| filterposy = (Y >> FILTER_SHIFT); \ |
| u3 = vis_fmul8ulx16(row1, yFilter1); \ |
| data1 = dpSrc[1]; \ |
| row0 = vis_faligndata(data0, data1); \ |
| filterposx = (X >> FILTER_SHIFT); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u0 = vis_fmul8sux16(row2, yFilter2); \ |
| v1 = vis_fpadd16(u2, u3); \ |
| data0 = dpSrc[0]; \ |
| u1 = vis_fmul8ulx16(row2, yFilter2); \ |
| sum = vis_fpadd16(v0, v1); \ |
| X += dX; \ |
| data1 = dpSrc[1]; \ |
| row1 = vis_faligndata(data0, data1); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u2 = vis_fmul8sux16(row3, yFilter3); \ |
| v2 = vis_fpadd16(u0, u1); \ |
| Y += dY; \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| data0 = dpSrc[0]; \ |
| u3 = vis_fmul8ulx16(row3, yFilter3); \ |
| sum = vis_fpadd16(sum, v2); \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| data1 = dpSrc[1]; \ |
| filterposy &= FILTER_MASK; \ |
| row2 = vis_faligndata(data0, data1); \ |
| sPtr += srcYStride; \ |
| filterposx &= FILTER_MASK; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| v3 = vis_fpadd16(u2, u3); \ |
| data1 = dpSrc[1]; \ |
| row3 = vis_faligndata(data0, data1); \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| yFilter0 = yPtr[0]; \ |
| sum = vis_fpadd16(sum, v3); \ |
| yFilter1 = yPtr[1]; \ |
| d0 = vis_fmul8sux16(sum, xFilter); \ |
| yFilter2 = yPtr[2]; \ |
| d1 = vis_fmul8ulx16(sum, xFilter); \ |
| yFilter3 = yPtr[3]; \ |
| xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ |
| d0##ind = vis_fpadd16(d0, d1); \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc |
| |
| /***************************************************************/ |
| #define FADD_1BC_S16() \ |
| p0 = vis_fpadd16s(vis_read_hi(d00), vis_read_lo(d00)); \ |
| p1 = vis_fpadd16s(vis_read_hi(d01), vis_read_lo(d01)); \ |
| p2 = vis_fpadd16s(vis_read_hi(d02), vis_read_lo(d02)); \ |
| p3 = vis_fpadd16s(vis_read_hi(d03), vis_read_lo(d03)); \ |
| d0 = vis_fmuld8sux16(f_x01000100, p0); \ |
| d1 = vis_fmuld8sux16(f_x01000100, p1); \ |
| d2 = vis_fmuld8sux16(f_x01000100, p2); \ |
| d3 = vis_fmuld8sux16(f_x01000100, p3); \ |
| d0 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0)), \ |
| vis_fpadd32s(vis_read_hi(d1), vis_read_lo(d1))); \ |
| d1 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d2), vis_read_lo(d2)), \ |
| vis_fpadd32s(vis_read_hi(d3), vis_read_lo(d3))); \ |
| res = vis_fpackfix_pair(d0, d1) |
| |
| /***************************************************************/ |
| mlib_status mlib_ImageAffine_s16_1ch_bc (mlib_affine_param *param) |
| { |
| DECLAREVAR_BC(); |
| mlib_s32 filterposx, filterposy; |
| mlib_d64 data0, data1; |
| mlib_d64 sum; |
| mlib_d64 row0, row1, row2, row3; |
| mlib_f32 p0, p1, p2, p3; |
| mlib_d64 xFilter, yFilter0, yFilter1, yFilter2, yFilter3; |
| mlib_d64 v0, v1, v2, v3; |
| mlib_d64 u0, u1, u2, u3; |
| mlib_d64 d0, d1, d2, d3; |
| mlib_d64 d00, d10, d01, d02, d03; |
| mlib_d64 *yPtr; |
| mlib_d64 *dpSrc; |
| mlib_s32 align, cols, i; |
| mlib_d64 res; |
| mlib_f32 f_x01000100 = vis_to_float(0x01000100); |
| const mlib_s16 *mlib_filters_table ; |
| const mlib_s16 *mlib_filters_table_4; |
| |
| if (filter == MLIB_BICUBIC) { |
| mlib_filters_table = mlib_filters_s16_bc; |
| mlib_filters_table_4 = mlib_filters_s16_bc_4; |
| } else { |
| mlib_filters_table = mlib_filters_s16_bc2; |
| mlib_filters_table_4 = mlib_filters_s16_bc2_4; |
| } |
| |
| srcYStride >>= 1; |
| |
| for (j = yStart; j <= yFinish; j++) { |
| |
| vis_write_gsr(10 << 3); |
| |
| CLIP(1); |
| |
| cols = xRight - xLeft + 1; |
| align = (8 - ((mlib_addr)dstPixelPtr) & 7) & 7; |
| align >>= 1; |
| align = (cols < align)? cols : align; |
| |
| for (i = 0; i < align; i++) { |
| NEXT_PIXEL_1BC_S16(); |
| LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_1BC_S16_1PIXEL(); |
| vis_st_u16(res, dstPixelPtr++); |
| } |
| |
| if (i <= cols - 10) { |
| |
| NEXT_PIXEL_1BC_S16(); |
| LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| |
| NEXT_PIXEL_1BC_S16(); |
| |
| BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); |
| |
| FADD_1BC_S16(); |
| |
| BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); |
| |
| #pragma pipeloop(0) |
| for (; i <= cols - 14; i += 4) { |
| *(mlib_d64*)dstPixelPtr = res; |
| FADD_1BC_S16(); |
| BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); |
| dstPixelPtr += 4; |
| } |
| |
| *(mlib_d64*)dstPixelPtr = res; |
| dstPixelPtr += 4; |
| FADD_1BC_S16(); |
| *(mlib_d64*)dstPixelPtr = res; |
| dstPixelPtr += 4; |
| |
| RESULT_1BC_S16_1PIXEL(); |
| vis_st_u16(res, dstPixelPtr++); |
| |
| LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_1BC_S16_1PIXEL(); |
| vis_st_u16(res, dstPixelPtr++); |
| i += 10; |
| } |
| |
| for (; i < cols; i++) { |
| NEXT_PIXEL_1BC_S16(); |
| LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_1BC_S16_1PIXEL(); |
| vis_st_u16(res, dstPixelPtr++); |
| } |
| } |
| |
| return MLIB_SUCCESS; |
| } |
| |
| /***************************************************************/ |
| #define NEXT_PIXEL_2BC_S16() \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) |
| |
| /***************************************************************/ |
| #define LOAD_BC_S16_2CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| row00 = vis_faligndata(data0, data1); \ |
| row01 = vis_faligndata(data1, data2); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| row11 = vis_faligndata(data1, data2); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| row20 = vis_faligndata(data0, data1); \ |
| row21 = vis_faligndata(data1, data2); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| row31 = vis_faligndata(data1, data2); \ |
| filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| yFilter0 = yPtr[0]; \ |
| yFilter1 = yPtr[1]; \ |
| yFilter2 = yPtr[2]; \ |
| yFilter3 = yPtr[3]; \ |
| filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ |
| xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ |
| X += dX; \ |
| Y += dY |
| |
| /***************************************************************/ |
| #define RESULT_2BC_S16_1PIXEL() \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ |
| u20 = vis_fmul8sux16(row10, yFilter1); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u21 = vis_fmul8ulx16(row10, yFilter1); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| u00 = vis_fmul8sux16(row11, yFilter1); \ |
| xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ |
| u01 = vis_fmul8ulx16(row11, yFilter1); \ |
| u10 = vis_fmul8sux16(row20, yFilter2); \ |
| u11 = vis_fmul8ulx16(row20, yFilter2); \ |
| v10 = vis_fpadd16(u20, u21); \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| u20 = vis_fmul8sux16(row21, yFilter2); \ |
| v11 = vis_fpadd16(u00, u01); \ |
| u21 = vis_fmul8ulx16(row21, yFilter2); \ |
| xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| v20 = vis_fpadd16(u10, u11); \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| v21 = vis_fpadd16(u20, u21); \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| v30 = vis_fpadd16(u00, u01); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| v31 = vis_fpadd16(u10, u11); \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| d00 = vis_fmul8sux16(sum0, xFilter0); \ |
| d10 = vis_fmul8ulx16(sum0, xFilter0); \ |
| d20 = vis_fmul8sux16(sum1, xFilter1); \ |
| d30 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0 = vis_fpadd16(d00, d10); \ |
| d1 = vis_fpadd16(d20, d30); \ |
| d0 = vis_fpadd16(d0, d1); \ |
| p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ |
| d0 = vis_fmuld8sux16(f_x01000100, p0); \ |
| res = vis_fpackfix_pair(d0, d0) |
| |
| /***************************************************************/ |
| #define BC_S16_2CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u20 = vis_fmul8sux16(row10, yFilter1); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u21 = vis_fmul8ulx16(row10, yFilter1); \ |
| data0 = dpSrc[0]; \ |
| filterposy = (Y >> FILTER_SHIFT); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| data1 = dpSrc[1]; \ |
| u00 = vis_fmul8sux16(row11, yFilter1); \ |
| xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ |
| data2 = dpSrc[2]; \ |
| u01 = vis_fmul8ulx16(row11, yFilter1); \ |
| row00 = vis_faligndata(data0, data1); \ |
| u10 = vis_fmul8sux16(row20, yFilter2); \ |
| row01 = vis_faligndata(data1, data2); \ |
| filterposx = (X >> FILTER_SHIFT); \ |
| sPtr += srcYStride; \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u11 = vis_fmul8ulx16(row20, yFilter2); \ |
| v10 = vis_fpadd16(u20, u21); \ |
| data0 = dpSrc[0]; \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| X += dX; \ |
| data1 = dpSrc[1]; \ |
| u20 = vis_fmul8sux16(row21, yFilter2); \ |
| v11 = vis_fpadd16(u00, u01); \ |
| data2 = dpSrc[2]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| u21 = vis_fmul8ulx16(row21, yFilter2); \ |
| row11 = vis_faligndata(data1, data2); \ |
| sPtr += srcYStride; \ |
| xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| v20 = vis_fpadd16(u10, u11); \ |
| Y += dY; \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| data0 = dpSrc[0]; \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| data1 = dpSrc[1]; \ |
| v21 = vis_fpadd16(u20, u21); \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| data2 = dpSrc[2]; \ |
| v30 = vis_fpadd16(u00, u01); \ |
| filterposy &= FILTER_MASK; \ |
| row20 = vis_faligndata(data0, data1); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| row21 = vis_faligndata(data1, data2); \ |
| sPtr += srcYStride; \ |
| filterposx &= FILTER_MASK; \ |
| v31 = vis_fpadd16(u10, u11); \ |
| vis_alignaddr(sPtr, 0); \ |
| dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ |
| data0 = dpSrc[0]; \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| data1 = dpSrc[1]; \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| data2 = dpSrc[2]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| d0 = vis_fmul8sux16(sum0, xFilter0); \ |
| row31 = vis_faligndata(data1, data2); \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| d1 = vis_fmul8ulx16(sum0, xFilter0); \ |
| yFilter0 = yPtr[0]; \ |
| d2 = vis_fmul8sux16(sum1, xFilter1); \ |
| yFilter1 = yPtr[1]; \ |
| d3 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0##ind = vis_fpadd16(d0, d1); \ |
| yFilter2 = yPtr[2]; \ |
| yFilter3 = yPtr[3]; \ |
| d1##ind = vis_fpadd16(d2, d3); \ |
| xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) |
| |
| /***************************************************************/ |
| #define FADD_2BC_S16() \ |
| d0 = vis_fpadd16(d00, d10); \ |
| d2 = vis_fpadd16(d01, d11); \ |
| p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ |
| p1 = vis_fpadd16s(vis_read_hi(d2), vis_read_lo(d2)); \ |
| d0 = vis_fmuld8sux16(f_x01000100, p0); \ |
| d1 = vis_fmuld8sux16(f_x01000100, p1); \ |
| res = vis_fpackfix_pair(d0, d1) |
| |
| /***************************************************************/ |
| mlib_status mlib_ImageAffine_s16_2ch_bc (mlib_affine_param *param) |
| { |
| DECLAREVAR_BC(); |
| DTYPE *dstLineEnd; |
| mlib_s32 filterposx, filterposy; |
| mlib_d64 data0, data1, data2; |
| mlib_d64 sum0, sum1; |
| mlib_d64 row00, row10, row20, row30; |
| mlib_d64 row01, row11, row21, row31; |
| mlib_f32 p0, p1; |
| mlib_d64 xFilter, xFilter0, xFilter1; |
| mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; |
| mlib_d64 v00, v01, v10, v11, v20, v21, v30, v31; |
| mlib_d64 u00, u01, u10, u11, u20, u21; |
| mlib_d64 d0, d1, d2, d3; |
| mlib_d64 d00, d10, d20, d30, d01, d11; |
| mlib_d64 *yPtr; |
| mlib_d64 *dp, *dpSrc; |
| mlib_s32 cols, i, mask, emask; |
| mlib_d64 res, res1; |
| mlib_d64 dr, dr1; |
| mlib_f32 f_x01000100 = vis_to_float(0x01000100); |
| const mlib_s16 *mlib_filters_table ; |
| const mlib_s16 *mlib_filters_table_4; |
| |
| if (filter == MLIB_BICUBIC) { |
| mlib_filters_table = mlib_filters_s16_bc; |
| mlib_filters_table_4 = mlib_filters_s16_bc_4; |
| } else { |
| mlib_filters_table = mlib_filters_s16_bc2; |
| mlib_filters_table_4 = mlib_filters_s16_bc2_4; |
| } |
| |
| srcYStride >>= 1; |
| |
| for (j = yStart; j <= yFinish; j++) { |
| |
| vis_write_gsr(10 << 3); |
| |
| CLIP(2); |
| dstLineEnd = (DTYPE*)dstData + 2 * xRight; |
| |
| cols = xRight - xLeft + 1; |
| dp = vis_alignaddr(dstPixelPtr, 0); |
| dstLineEnd += 1; |
| mask = vis_edge16(dstPixelPtr, dstLineEnd); |
| i = 0; |
| |
| if (i <= cols - 6) { |
| |
| NEXT_PIXEL_2BC_S16(); |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| |
| NEXT_PIXEL_2BC_S16(); |
| |
| BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); |
| |
| FADD_2BC_S16(); |
| |
| BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); |
| |
| #pragma pipeloop(0) |
| for (; i <= cols-8; i += 2) { |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| FADD_2BC_S16(); |
| BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); |
| } |
| |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| FADD_2BC_S16(); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| RESULT_2BC_S16_1PIXEL(); |
| res1 = res; |
| |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_2BC_S16_1PIXEL(); |
| res = vis_write_hi(res, vis_read_hi(res1)); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| i += 6; |
| } |
| |
| if (i <= cols - 4) { |
| NEXT_PIXEL_2BC_S16(); |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| |
| NEXT_PIXEL_2BC_S16(); |
| |
| BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); |
| BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); |
| |
| FADD_2BC_S16(); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| RESULT_2BC_S16_1PIXEL(); |
| res1 = res; |
| |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_2BC_S16_1PIXEL(); |
| res = vis_write_hi(res, vis_read_hi(res1)); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| i += 4; |
| } |
| |
| if (i <= cols - 2) { |
| NEXT_PIXEL_2BC_S16(); |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_2BC_S16_1PIXEL(); |
| res1 = res; |
| |
| NEXT_PIXEL_2BC_S16(); |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_2BC_S16_1PIXEL(); |
| res = vis_write_hi(res, vis_read_hi(res1)); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| i += 2; |
| } |
| |
| if (i < cols) { |
| NEXT_PIXEL_2BC_S16(); |
| LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); |
| RESULT_2BC_S16_1PIXEL(); |
| vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); |
| res = vis_faligndata(res, res); |
| emask = vis_edge16(dp, dstLineEnd); |
| vis_pst_16(res, dp++, mask & emask); |
| |
| if ((mlib_s16*)dp <= dstLineEnd) { |
| mask = vis_edge16(dp, dstLineEnd); |
| vis_pst_16(res, dp, mask); |
| } |
| } |
| } |
| |
| return MLIB_SUCCESS; |
| } |
| |
| /***************************************************************/ |
| #define NEXT_PIXEL_3BC_S16() \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) |
| |
| /***************************************************************/ |
| #define LOAD_BC_S16_3CH_1PIXEL(mlib_filters_s16_3, mlib_filters_s16_4) \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| row00 = vis_faligndata(data0, data1); \ |
| row01 = vis_faligndata(data1, data2); \ |
| row02 = vis_faligndata(data2, data3); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| row11 = vis_faligndata(data1, data2); \ |
| row12 = vis_faligndata(data2, data3); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| row20 = vis_faligndata(data0, data1); \ |
| row21 = vis_faligndata(data1, data2); \ |
| row22 = vis_faligndata(data2, data3); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| row31 = vis_faligndata(data1, data2); \ |
| row32 = vis_faligndata(data2, data3); \ |
| filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| yFilter0 = yPtr[0]; \ |
| yFilter1 = yPtr[1]; \ |
| yFilter2 = yPtr[2]; \ |
| yFilter3 = yPtr[3]; \ |
| filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ |
| xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ |
| xFilter0 = xPtr[0]; \ |
| xFilter1 = xPtr[1]; \ |
| xFilter2 = xPtr[2]; \ |
| X += dX; \ |
| Y += dY |
| |
| /***************************************************************/ |
| #define STORE_BC_S16_3CH_1PIXEL() \ |
| dstPixelPtr[0] = f0.t[0]; \ |
| dstPixelPtr[1] = f0.t[1]; \ |
| dstPixelPtr[2] = f0.t[2]; \ |
| dstPixelPtr += 3 |
| |
| /***************************************************************/ |
| #define RESULT_3BC_S16_1PIXEL() \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row02, yFilter0); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| u21 = vis_fmul8ulx16(row02, yFilter0); \ |
| u00 = vis_fmul8sux16(row10, yFilter1); \ |
| u01 = vis_fmul8ulx16(row10, yFilter1); \ |
| v02 = vis_fpadd16(u20, u21); \ |
| u10 = vis_fmul8sux16(row11, yFilter1); \ |
| u11 = vis_fmul8ulx16(row11, yFilter1); \ |
| v10 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row12, yFilter1); \ |
| u21 = vis_fmul8ulx16(row12, yFilter1); \ |
| u00 = vis_fmul8sux16(row20, yFilter2); \ |
| v11 = vis_fpadd16(u10, u11); \ |
| u01 = vis_fmul8ulx16(row20, yFilter2); \ |
| v12 = vis_fpadd16(u20, u21); \ |
| u10 = vis_fmul8sux16(row21, yFilter2); \ |
| u11 = vis_fmul8ulx16(row21, yFilter2); \ |
| v20 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row22, yFilter2); \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| u21 = vis_fmul8ulx16(row22, yFilter2); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| v21 = vis_fpadd16(u10, u11); \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| sum2 = vis_fpadd16(v02, v12); \ |
| v22 = vis_fpadd16(u20, u21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| u20 = vis_fmul8sux16(row32, yFilter3); \ |
| v30 = vis_fpadd16(u00, u01); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u21 = vis_fmul8ulx16(row32, yFilter3); \ |
| v31 = vis_fpadd16(u10, u11); \ |
| sum2 = vis_fpadd16(sum2, v22); \ |
| v32 = vis_fpadd16(u20, u21); \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| row30 = vis_faligndata(data0, data1); \ |
| v00 = vis_fmul8sux16(sum0, xFilter0); \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| sum2 = vis_fpadd16(sum2, v32); \ |
| v01 = vis_fmul8ulx16(sum0, xFilter0); \ |
| v10 = vis_fmul8sux16(sum1, xFilter1); \ |
| v11 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0 = vis_fpadd16(v00, v01); \ |
| v20 = vis_fmul8sux16(sum2, xFilter2); \ |
| v21 = vis_fmul8ulx16(sum2, xFilter2); \ |
| d1 = vis_fpadd16(v10, v11); \ |
| d2 = vis_fpadd16(v20, v21); \ |
| vis_alignaddr((void*)6, 0); \ |
| d3 = vis_faligndata(d0, d1); \ |
| vis_alignaddr((void*)2, 0); \ |
| d4 = vis_faligndata(d1, d2); \ |
| d0 = vis_fpadd16(d0, d3); \ |
| d2 = vis_fpadd16(d2, d4); \ |
| d1 = vis_faligndata(d2, d2); \ |
| d0 = vis_fpadd16(d0, d1); \ |
| d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ |
| d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ |
| f0.d = vis_fpackfix_pair(d2, d3) |
| |
| /***************************************************************/ |
| #define BC_S16_3CH(mlib_filters_s16_3, mlib_filters_s16_4) \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row02, yFilter0); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| u21 = vis_fmul8ulx16(row02, yFilter0); \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| u00 = vis_fmul8sux16(row10, yFilter1); \ |
| u01 = vis_fmul8ulx16(row10, yFilter1); \ |
| data0 = dpSrc[0]; \ |
| filterposy = (Y >> FILTER_SHIFT); \ |
| v02 = vis_fpadd16(u20, u21); \ |
| data1 = dpSrc[1]; \ |
| u10 = vis_fmul8sux16(row11, yFilter1); \ |
| data2 = dpSrc[2]; \ |
| u11 = vis_fmul8ulx16(row11, yFilter1); \ |
| v10 = vis_fpadd16(u00, u01); \ |
| data3 = dpSrc[3]; \ |
| u20 = vis_fmul8sux16(row12, yFilter1); \ |
| row00 = vis_faligndata(data0, data1); \ |
| u21 = vis_fmul8ulx16(row12, yFilter1); \ |
| row01 = vis_faligndata(data1, data2); \ |
| u00 = vis_fmul8sux16(row20, yFilter2); \ |
| row02 = vis_faligndata(data2, data3); \ |
| filterposx = (X >> FILTER_SHIFT); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| v11 = vis_fpadd16(u10, u11); \ |
| u01 = vis_fmul8ulx16(row20, yFilter2); \ |
| v12 = vis_fpadd16(u20, u21); \ |
| data0 = dpSrc[0]; \ |
| u10 = vis_fmul8sux16(row21, yFilter2); \ |
| X += dX; \ |
| data1 = dpSrc[1]; \ |
| u11 = vis_fmul8ulx16(row21, yFilter2); \ |
| v20 = vis_fpadd16(u00, u01); \ |
| data2 = dpSrc[2]; \ |
| u20 = vis_fmul8sux16(row22, yFilter2); \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| data3 = dpSrc[3]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| u21 = vis_fmul8ulx16(row22, yFilter2); \ |
| row11 = vis_faligndata(data1, data2); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| row12 = vis_faligndata(data2, data3); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| v21 = vis_fpadd16(u10, u11); \ |
| Y += dY; \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| data0 = dpSrc[0]; \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| sum2 = vis_fpadd16(v02, v12); \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| data1 = dpSrc[1]; \ |
| v22 = vis_fpadd16(u20, u21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| data2 = dpSrc[2]; \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| u20 = vis_fmul8sux16(row32, yFilter3); \ |
| data3 = dpSrc[3]; \ |
| v30 = vis_fpadd16(u00, u01); \ |
| filterposy &= FILTER_MASK; \ |
| row20 = vis_faligndata(data0, data1); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u21 = vis_fmul8ulx16(row32, yFilter3); \ |
| row21 = vis_faligndata(data1, data2); \ |
| row22 = vis_faligndata(data2, data3); \ |
| sPtr += srcYStride; \ |
| filterposx &= FILTER_MASK; \ |
| v31 = vis_fpadd16(u10, u11); \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| sum2 = vis_fpadd16(sum2, v22); \ |
| data1 = dpSrc[1]; \ |
| v32 = vis_fpadd16(u20, u21); \ |
| data2 = dpSrc[2]; \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| data3 = dpSrc[3]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| v00 = vis_fmul8sux16(sum0, xFilter0); \ |
| row31 = vis_faligndata(data1, data2); \ |
| row32 = vis_faligndata(data2, data3); \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| yFilter0 = yPtr[0]; \ |
| sum2 = vis_fpadd16(sum2, v32); \ |
| v01 = vis_fmul8ulx16(sum0, xFilter0); \ |
| yFilter1 = yPtr[1]; \ |
| v10 = vis_fmul8sux16(sum1, xFilter1); \ |
| yFilter2 = yPtr[2]; \ |
| v11 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0 = vis_fpadd16(v00, v01); \ |
| yFilter3 = yPtr[3]; \ |
| xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ |
| v20 = vis_fmul8sux16(sum2, xFilter2); \ |
| xFilter0 = xPtr[0]; \ |
| v21 = vis_fmul8ulx16(sum2, xFilter2); \ |
| d1 = vis_fpadd16(v10, v11); \ |
| xFilter1 = xPtr[1]; \ |
| d2 = vis_fpadd16(v20, v21); \ |
| xFilter2 = xPtr[2]; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) |
| |
| /***************************************************************/ |
| #define FADD_3BC_S16() \ |
| vis_alignaddr((void*)6, 0); \ |
| d3 = vis_faligndata(d0, d1); \ |
| vis_alignaddr((void*)2, 0); \ |
| d4 = vis_faligndata(d1, d2); \ |
| d0 = vis_fpadd16(d0, d3); \ |
| d2 = vis_fpadd16(d2, d4); \ |
| d1 = vis_faligndata(d2, d2); \ |
| d0 = vis_fpadd16(d0, d1); \ |
| d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ |
| d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ |
| f0.d = vis_fpackfix_pair(d2, d3) |
| |
| /***************************************************************/ |
| mlib_status mlib_ImageAffine_s16_3ch_bc (mlib_affine_param *param) |
| { |
| DECLAREVAR_BC(); |
| mlib_s32 filterposx, filterposy; |
| mlib_d64 data0, data1, data2, data3; |
| mlib_d64 sum0, sum1, sum2; |
| mlib_d64 row00, row10, row20, row30; |
| mlib_d64 row01, row11, row21, row31; |
| mlib_d64 row02, row12, row22, row32; |
| mlib_d64 xFilter0, xFilter1, xFilter2; |
| mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; |
| mlib_d64 v00, v01, v02, v10, v11, v12, v20, v21, v22, v30, v31, v32; |
| mlib_d64 u00, u01, u10, u11, u20, u21; |
| mlib_d64 d0, d1, d2, d3, d4; |
| mlib_d64 *yPtr, *xPtr; |
| mlib_d64 *dpSrc; |
| mlib_s32 cols, i; |
| mlib_f32 f_x01000100 = vis_to_float(0x01000100); |
| union { |
| mlib_s16 t[4]; |
| mlib_d64 d; |
| } f0; |
| const mlib_s16 *mlib_filters_table_3; |
| const mlib_s16 *mlib_filters_table_4; |
| |
| if (filter == MLIB_BICUBIC) { |
| mlib_filters_table_3 = mlib_filters_s16_bc_3; |
| mlib_filters_table_4 = mlib_filters_s16_bc_4; |
| } else { |
| mlib_filters_table_3 = mlib_filters_s16_bc2_3; |
| mlib_filters_table_4 = mlib_filters_s16_bc2_4; |
| } |
| |
| srcYStride >>= 1; |
| |
| for (j = yStart; j <= yFinish; j++) { |
| |
| vis_write_gsr(10 << 3); |
| |
| CLIP(3); |
| |
| cols = xRight - xLeft + 1; |
| |
| i = 0; |
| |
| if (i <= cols - 4) { |
| |
| NEXT_PIXEL_3BC_S16(); |
| LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); |
| |
| NEXT_PIXEL_3BC_S16(); |
| |
| BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); |
| FADD_3BC_S16(); |
| |
| BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); |
| |
| #pragma pipeloop(0) |
| for (; i < cols-4; i++) { |
| STORE_BC_S16_3CH_1PIXEL(); |
| |
| FADD_3BC_S16(); |
| BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); |
| } |
| |
| STORE_BC_S16_3CH_1PIXEL(); |
| |
| FADD_3BC_S16(); |
| STORE_BC_S16_3CH_1PIXEL(); |
| |
| RESULT_3BC_S16_1PIXEL(); |
| STORE_BC_S16_3CH_1PIXEL(); |
| |
| LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); |
| RESULT_3BC_S16_1PIXEL(); |
| STORE_BC_S16_3CH_1PIXEL(); |
| i += 4; |
| } |
| |
| for (; i < cols; i++) { |
| NEXT_PIXEL_3BC_S16(); |
| LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); |
| RESULT_3BC_S16_1PIXEL(); |
| STORE_BC_S16_3CH_1PIXEL(); |
| } |
| } |
| |
| return MLIB_SUCCESS; |
| } |
| |
| /***************************************************************/ |
| #define NEXT_PIXEL_4BC_S16() \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) |
| |
| /***************************************************************/ |
| #define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| data4 = dpSrc[4]; \ |
| row00 = vis_faligndata(data0, data1); \ |
| row01 = vis_faligndata(data1, data2); \ |
| row02 = vis_faligndata(data2, data3); \ |
| row03 = vis_faligndata(data3, data4); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| data4 = dpSrc[4]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| row11 = vis_faligndata(data1, data2); \ |
| row12 = vis_faligndata(data2, data3); \ |
| row13 = vis_faligndata(data3, data4); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| data4 = dpSrc[4]; \ |
| row20 = vis_faligndata(data0, data1); \ |
| row21 = vis_faligndata(data1, data2); \ |
| row22 = vis_faligndata(data2, data3); \ |
| row23 = vis_faligndata(data3, data4); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| data1 = dpSrc[1]; \ |
| data2 = dpSrc[2]; \ |
| data3 = dpSrc[3]; \ |
| data4 = dpSrc[4]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| row31 = vis_faligndata(data1, data2); \ |
| row32 = vis_faligndata(data2, data3); \ |
| row33 = vis_faligndata(data3, data4); \ |
| filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| yFilter0 = yPtr[0]; \ |
| yFilter1 = yPtr[1]; \ |
| yFilter2 = yPtr[2]; \ |
| yFilter3 = yPtr[3]; \ |
| filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ |
| xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ |
| xFilter0 = xPtr[0]; \ |
| xFilter1 = xPtr[1]; \ |
| xFilter2 = xPtr[2]; \ |
| xFilter3 = xPtr[3]; \ |
| X += dX; \ |
| Y += dY |
| |
| /***************************************************************/ |
| #define RESULT_4BC_S16_1PIXEL() \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row02, yFilter0); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| u21 = vis_fmul8ulx16(row02, yFilter0); \ |
| u30 = vis_fmul8sux16(row03, yFilter0); \ |
| u31 = vis_fmul8ulx16(row03, yFilter0); \ |
| v02 = vis_fpadd16(u20, u21); \ |
| u00 = vis_fmul8sux16(row10, yFilter1); \ |
| u01 = vis_fmul8ulx16(row10, yFilter1); \ |
| v03 = vis_fpadd16(u30, u31); \ |
| u10 = vis_fmul8sux16(row11, yFilter1); \ |
| u11 = vis_fmul8ulx16(row11, yFilter1); \ |
| v10 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row12, yFilter1); \ |
| v11 = vis_fpadd16(u10, u11); \ |
| u21 = vis_fmul8ulx16(row12, yFilter1); \ |
| u30 = vis_fmul8sux16(row13, yFilter1); \ |
| u31 = vis_fmul8ulx16(row13, yFilter1); \ |
| u00 = vis_fmul8sux16(row20, yFilter2); \ |
| v12 = vis_fpadd16(u20, u21); \ |
| u01 = vis_fmul8ulx16(row20, yFilter2); \ |
| v13 = vis_fpadd16(u30, u31); \ |
| u10 = vis_fmul8sux16(row21, yFilter2); \ |
| u11 = vis_fmul8ulx16(row21, yFilter2); \ |
| v20 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row22, yFilter2); \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| u21 = vis_fmul8ulx16(row22, yFilter2); \ |
| u30 = vis_fmul8sux16(row23, yFilter2); \ |
| u31 = vis_fmul8ulx16(row23, yFilter2); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| v21 = vis_fpadd16(u10, u11); \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| sum2 = vis_fpadd16(v02, v12); \ |
| sum3 = vis_fpadd16(v03, v13); \ |
| v22 = vis_fpadd16(u20, u21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| u20 = vis_fmul8sux16(row32, yFilter3); \ |
| u21 = vis_fmul8ulx16(row32, yFilter3); \ |
| v23 = vis_fpadd16(u30, u31); \ |
| v30 = vis_fpadd16(u00, u01); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u30 = vis_fmul8sux16(row33, yFilter3); \ |
| u31 = vis_fmul8ulx16(row33, yFilter3); \ |
| v31 = vis_fpadd16(u10, u11); \ |
| sum2 = vis_fpadd16(sum2, v22); \ |
| sum3 = vis_fpadd16(sum3, v23); \ |
| v32 = vis_fpadd16(u20, u21); \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| v33 = vis_fpadd16(u30, u31); \ |
| v00 = vis_fmul8sux16(sum0, xFilter0); \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| sum2 = vis_fpadd16(sum2, v32); \ |
| v01 = vis_fmul8ulx16(sum0, xFilter0); \ |
| v10 = vis_fmul8sux16(sum1, xFilter1); \ |
| sum3 = vis_fpadd16(sum3, v33); \ |
| v11 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0 = vis_fpadd16(v00, v01); \ |
| v20 = vis_fmul8sux16(sum2, xFilter2); \ |
| v21 = vis_fmul8ulx16(sum2, xFilter2); \ |
| d1 = vis_fpadd16(v10, v11); \ |
| v30 = vis_fmul8sux16(sum3, xFilter3); \ |
| v31 = vis_fmul8ulx16(sum3, xFilter3); \ |
| d2 = vis_fpadd16(v20, v21); \ |
| d3 = vis_fpadd16(v30, v31); \ |
| d0 = vis_fpadd16(d0, d1); \ |
| d2 = vis_fpadd16(d2, d3); \ |
| d0 = vis_fpadd16(d0, d2); \ |
| d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ |
| d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ |
| res = vis_fpackfix_pair(d2, d3) |
| |
| /***************************************************************/ |
| #define BC_S16_4CH(mlib_filters_s16_4) \ |
| u00 = vis_fmul8sux16(row00, yFilter0); \ |
| u01 = vis_fmul8ulx16(row00, yFilter0); \ |
| u10 = vis_fmul8sux16(row01, yFilter0); \ |
| u11 = vis_fmul8ulx16(row01, yFilter0); \ |
| v00 = vis_fpadd16(u00, u01); \ |
| u20 = vis_fmul8sux16(row02, yFilter0); \ |
| v01 = vis_fpadd16(u10, u11); \ |
| u21 = vis_fmul8ulx16(row02, yFilter0); \ |
| u30 = vis_fmul8sux16(row03, yFilter0); \ |
| u31 = vis_fmul8ulx16(row03, yFilter0); \ |
| v02 = vis_fpadd16(u20, u21); \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| u00 = vis_fmul8sux16(row10, yFilter1); \ |
| u01 = vis_fmul8ulx16(row10, yFilter1); \ |
| data0 = dpSrc[0]; \ |
| filterposy = (Y >> FILTER_SHIFT); \ |
| v03 = vis_fpadd16(u30, u31); \ |
| data1 = dpSrc[1]; \ |
| u10 = vis_fmul8sux16(row11, yFilter1); \ |
| data2 = dpSrc[2]; \ |
| u11 = vis_fmul8ulx16(row11, yFilter1); \ |
| v10 = vis_fpadd16(u00, u01); \ |
| data3 = dpSrc[3]; \ |
| u20 = vis_fmul8sux16(row12, yFilter1); \ |
| v11 = vis_fpadd16(u10, u11); \ |
| data4 = dpSrc[4]; \ |
| u21 = vis_fmul8ulx16(row12, yFilter1); \ |
| row00 = vis_faligndata(data0, data1); \ |
| u30 = vis_fmul8sux16(row13, yFilter1); \ |
| row01 = vis_faligndata(data1, data2); \ |
| u31 = vis_fmul8ulx16(row13, yFilter1); \ |
| row02 = vis_faligndata(data2, data3); \ |
| u00 = vis_fmul8sux16(row20, yFilter2); \ |
| row03 = vis_faligndata(data3, data4); \ |
| filterposx = (X >> FILTER_SHIFT); \ |
| sPtr += srcYStride; \ |
| v12 = vis_fpadd16(u20, u21); \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| u01 = vis_fmul8ulx16(row20, yFilter2); \ |
| v13 = vis_fpadd16(u30, u31); \ |
| data0 = dpSrc[0]; \ |
| u10 = vis_fmul8sux16(row21, yFilter2); \ |
| X += dX; \ |
| data1 = dpSrc[1]; \ |
| u11 = vis_fmul8ulx16(row21, yFilter2); \ |
| v20 = vis_fpadd16(u00, u01); \ |
| data2 = dpSrc[2]; \ |
| u20 = vis_fmul8sux16(row22, yFilter2); \ |
| sum0 = vis_fpadd16(v00, v10); \ |
| data3 = dpSrc[3]; \ |
| u21 = vis_fmul8ulx16(row22, yFilter2); \ |
| data4 = dpSrc[4]; \ |
| row10 = vis_faligndata(data0, data1); \ |
| u30 = vis_fmul8sux16(row23, yFilter2); \ |
| row11 = vis_faligndata(data1, data2); \ |
| u31 = vis_fmul8ulx16(row23, yFilter2); \ |
| row12 = vis_faligndata(data2, data3); \ |
| u00 = vis_fmul8sux16(row30, yFilter3); \ |
| row13 = vis_faligndata(data3, data4); \ |
| sPtr += srcYStride; \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| u01 = vis_fmul8ulx16(row30, yFilter3); \ |
| v21 = vis_fpadd16(u10, u11); \ |
| Y += dY; \ |
| xSrc = (X >> MLIB_SHIFT)-1; \ |
| sum1 = vis_fpadd16(v01, v11); \ |
| data0 = dpSrc[0]; \ |
| u10 = vis_fmul8sux16(row31, yFilter3); \ |
| sum2 = vis_fpadd16(v02, v12); \ |
| sum3 = vis_fpadd16(v03, v13); \ |
| ySrc = (Y >> MLIB_SHIFT)-1; \ |
| data1 = dpSrc[1]; \ |
| v22 = vis_fpadd16(u20, u21); \ |
| u11 = vis_fmul8ulx16(row31, yFilter3); \ |
| data2 = dpSrc[2]; \ |
| sum0 = vis_fpadd16(sum0, v20); \ |
| u20 = vis_fmul8sux16(row32, yFilter3); \ |
| data3 = dpSrc[3]; \ |
| u21 = vis_fmul8ulx16(row32, yFilter3); \ |
| v23 = vis_fpadd16(u30, u31); \ |
| data4 = dpSrc[4]; \ |
| v30 = vis_fpadd16(u00, u01); \ |
| filterposy &= FILTER_MASK; \ |
| row20 = vis_faligndata(data0, data1); \ |
| sum1 = vis_fpadd16(sum1, v21); \ |
| u30 = vis_fmul8sux16(row33, yFilter3); \ |
| row21 = vis_faligndata(data1, data2); \ |
| u31 = vis_fmul8ulx16(row33, yFilter3); \ |
| row22 = vis_faligndata(data2, data3); \ |
| row23 = vis_faligndata(data3, data4); \ |
| sPtr += srcYStride; \ |
| filterposx &= FILTER_MASK; \ |
| v31 = vis_fpadd16(u10, u11); \ |
| dpSrc = vis_alignaddr(sPtr, 0); \ |
| data0 = dpSrc[0]; \ |
| sum2 = vis_fpadd16(sum2, v22); \ |
| sum3 = vis_fpadd16(sum3, v23); \ |
| data1 = dpSrc[1]; \ |
| v32 = vis_fpadd16(u20, u21); \ |
| data2 = dpSrc[2]; \ |
| sum0 = vis_fpadd16(sum0, v30); \ |
| data3 = dpSrc[3]; \ |
| v33 = vis_fpadd16(u30, u31); \ |
| data4 = dpSrc[4]; \ |
| row30 = vis_faligndata(data0, data1); \ |
| v00 = vis_fmul8sux16(sum0, xFilter0); \ |
| row31 = vis_faligndata(data1, data2); \ |
| row32 = vis_faligndata(data2, data3); \ |
| row33 = vis_faligndata(data3, data4); \ |
| yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ |
| sum1 = vis_fpadd16(sum1, v31); \ |
| yFilter0 = yPtr[0]; \ |
| sum2 = vis_fpadd16(sum2, v32); \ |
| v01 = vis_fmul8ulx16(sum0, xFilter0); \ |
| yFilter1 = yPtr[1]; \ |
| v10 = vis_fmul8sux16(sum1, xFilter1); \ |
| sum3 = vis_fpadd16(sum3, v33); \ |
| yFilter2 = yPtr[2]; \ |
| v11 = vis_fmul8ulx16(sum1, xFilter1); \ |
| d0 = vis_fpadd16(v00, v01); \ |
| yFilter3 = yPtr[3]; \ |
| xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ |
| v20 = vis_fmul8sux16(sum2, xFilter2); \ |
| xFilter0 = xPtr[0]; \ |
| v21 = vis_fmul8ulx16(sum2, xFilter2); \ |
| d1 = vis_fpadd16(v10, v11); \ |
| xFilter1 = xPtr[1]; \ |
| v30 = vis_fmul8sux16(sum3, xFilter3); \ |
| v31 = vis_fmul8ulx16(sum3, xFilter3); \ |
| d2 = vis_fpadd16(v20, v21); \ |
| xFilter2 = xPtr[2]; \ |
| d3 = vis_fpadd16(v30, v31); \ |
| xFilter3 = xPtr[3]; \ |
| sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) |
| |
| /***************************************************************/ |
| #define FADD_4BC_S16() \ |
| d0 = vis_fpadd16(d0, d1); \ |
| d2 = vis_fpadd16(d2, d3); \ |
| d0 = vis_fpadd16(d0, d2); \ |
| d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ |
| d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ |
| res = vis_fpackfix_pair(d2, d3) |
| |
| /***************************************************************/ |
| mlib_status mlib_ImageAffine_s16_4ch_bc (mlib_affine_param *param) |
| { |
| DECLAREVAR_BC(); |
| DTYPE *dstLineEnd; |
| mlib_s32 filterposx, filterposy; |
| mlib_d64 data0, data1, data2, data3, data4; |
| mlib_d64 sum0, sum1, sum2, sum3; |
| mlib_d64 row00, row10, row20, row30; |
| mlib_d64 row01, row11, row21, row31; |
| mlib_d64 row02, row12, row22, row32; |
| mlib_d64 row03, row13, row23, row33; |
| mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; |
| mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; |
| mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; |
| mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; |
| mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; |
| mlib_d64 d0, d1, d2, d3; |
| mlib_d64 *yPtr, *xPtr; |
| mlib_d64 *dp, *dpSrc; |
| mlib_s32 cols, i, mask, gsrd; |
| mlib_d64 res; |
| mlib_f32 f_x01000100 = vis_to_float(0x01000100); |
| const mlib_s16 *mlib_filters_table_4; |
| |
| if (filter == MLIB_BICUBIC) { |
| mlib_filters_table_4 = mlib_filters_s16_bc_4; |
| } else { |
| mlib_filters_table_4 = mlib_filters_s16_bc2_4; |
| } |
| |
| srcYStride >>= 1; |
| |
| for (j = yStart; j <= yFinish; j++) { |
| |
| vis_write_gsr(10 << 3); |
| |
| CLIP(4); |
| dstLineEnd = (DTYPE*)dstData + 4 * xRight; |
| |
| cols = xRight - xLeft + 1; |
| dp = vis_alignaddr(dstPixelPtr, 0); |
| dstLineEnd += 3; |
| mask = vis_edge16(dstPixelPtr, dstLineEnd); |
| gsrd = ((8 - (mlib_addr)dstPixelPtr) & 7); |
| |
| i = 0; |
| |
| if (i <= cols - 4) { |
| |
| NEXT_PIXEL_4BC_S16(); |
| LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); |
| |
| NEXT_PIXEL_4BC_S16(); |
| |
| BC_S16_4CH(mlib_filters_table_4); |
| FADD_4BC_S16(); |
| |
| BC_S16_4CH(mlib_filters_table_4); |
| |
| #pragma pipeloop(0) |
| for (; i < cols-4; i++) { |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| FADD_4BC_S16(); |
| BC_S16_4CH(mlib_filters_table_4); |
| } |
| |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| FADD_4BC_S16(); |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| RESULT_4BC_S16_1PIXEL(); |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| |
| LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); |
| RESULT_4BC_S16_1PIXEL(); |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| i += 4; |
| } |
| |
| #pragma pipeloop(0) |
| for (; i < cols; i++) { |
| NEXT_PIXEL_4BC_S16(); |
| LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); |
| RESULT_4BC_S16_1PIXEL(); |
| vis_alignaddr((void *)gsrd, 0); |
| res = vis_faligndata(res, res); |
| vis_pst_16(res, dp++, mask); |
| vis_pst_16(res, dp, ~mask); |
| } |
| } |
| |
| return MLIB_SUCCESS; |
| } |
| |
| /***************************************************************/ |