| /* |
| * Copyright (C) 2009 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /*------------------------------------------------------------------------------ |
| |
| Table of contents |
| |
| 1. Include headers |
| 2. External compiler flags |
| 3. Module defines |
| 4. Local function prototypes |
| 5. Functions |
| |
| ------------------------------------------------------------------------------*/ |
| |
| /*------------------------------------------------------------------------------ |
| 1. Include headers |
| ------------------------------------------------------------------------------*/ |
| |
| #include "basetype.h" |
| #include "h264bsd_reconstruct.h" |
| #include "h264bsd_macroblock_layer.h" |
| #include "h264bsd_image.h" |
| #include "h264bsd_util.h" |
| |
| #ifdef H264DEC_OMXDL |
| #include "omxtypes.h" |
| #include "omxVC.h" |
| #include "armVC.h" |
| #endif /* H264DEC_OMXDL */ |
| |
| #define UNUSED(x) (void)(x) |
| |
| /*------------------------------------------------------------------------------ |
| 2. External compiler flags |
| -------------------------------------------------------------------------------- |
| |
| -------------------------------------------------------------------------------- |
| 3. Module defines |
| ------------------------------------------------------------------------------*/ |
| |
| /* Switch off the following Lint messages for this file: |
| * Info 701: Shift left of signed quantity (int) |
| * Info 702: Shift right of signed quantity (int) |
| */ |
| /*lint -e701 -e702 */ |
| |
| /* Luma fractional-sample positions |
| * |
| * G a b c H |
| * d e f g |
| * h i j k m |
| * n p q r |
| * M s N |
| * |
| * G, H, M and N are integer sample positions |
| * a-s are fractional samples that need to be interpolated. |
| */ |
| #ifndef H264DEC_OMXDL |
| static const u32 lumaFracPos[4][4] = { |
| /* G d h n a e i p b f j q c g k r */ |
| {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}}; |
| #endif /* H264DEC_OMXDL */ |
| |
| /* clipping table, defined in h264bsd_intra_prediction.c */ |
| extern const u8 h264bsdClip[]; |
| |
| /*------------------------------------------------------------------------------ |
| 4. Local function prototypes |
| ------------------------------------------------------------------------------*/ |
| |
| #ifndef H264DEC_OMXDL |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateChromaHor |
| |
| Functional description: |
| This function performs chroma interpolation in horizontal direction. |
| Overfilling is done only if needed. Reference image (pRef) is |
| read at correct position and the predicted part is written to |
| macroblock's chrominance (predPartChroma) |
| Inputs: |
| pRef pointer to reference frame Cb top-left corner |
| x0 integer x-coordinate for prediction |
| y0 integer y-coordinate for prediction |
| width width of the reference frame chrominance in pixels |
| height height of the reference frame chrominance in pixels |
| xFrac horizontal fraction for prediction in 1/8 pixels |
| chromaPartWidth width of the predicted part in pixels |
| chromaPartHeight height of the predicted part in pixels |
| Outputs: |
| predPartChroma pointer where predicted part is written |
| |
| ------------------------------------------------------------------------------*/ |
| #ifndef H264DEC_ARM11 |
| void h264bsdInterpolateChromaHor( |
| u8 *pRef, |
| u8 *predPartChroma, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 xFrac, |
| u32 chromaPartWidth, |
| u32 chromaPartHeight) |
| { |
| |
| /* Variables */ |
| |
| u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val; |
| u8 *ptrA, *cbr; |
| u32 comp; |
| u8 block[9*8*2]; |
| |
| /* Code */ |
| |
| ASSERT(predPartChroma); |
| ASSERT(chromaPartWidth); |
| ASSERT(chromaPartHeight); |
| ASSERT(xFrac < 8); |
| ASSERT(pRef); |
| |
| if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || |
| (y0 < 0) || ((u32)y0+chromaPartHeight > height)) |
| { |
| h264bsdFillBlock(pRef, block, x0, y0, width, height, |
| chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1); |
| pRef += width * height; |
| h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight, |
| x0, y0, width, height, chromaPartWidth + 1, |
| chromaPartHeight, chromaPartWidth + 1); |
| |
| pRef = block; |
| x0 = 0; |
| y0 = 0; |
| width = chromaPartWidth+1; |
| height = chromaPartHeight; |
| } |
| |
| val = 8 - xFrac; |
| |
| for (comp = 0; comp <= 1; comp++) |
| { |
| |
| ptrA = pRef + (comp * height + (u32)y0) * width + x0; |
| cbr = predPartChroma + comp * 8 * 8; |
| |
| /* 2x2 pels per iteration |
| * bilinear horizontal interpolation */ |
| for (y = (chromaPartHeight >> 1); y; y--) |
| { |
| for (x = (chromaPartWidth >> 1); x; x--) |
| { |
| tmp1 = ptrA[width]; |
| tmp2 = *ptrA++; |
| tmp3 = ptrA[width]; |
| tmp4 = *ptrA++; |
| c = ((val * tmp1 + xFrac * tmp3) << 3) + 32; |
| c >>= 6; |
| cbr[8] = (u8)c; |
| c = ((val * tmp2 + xFrac * tmp4) << 3) + 32; |
| c >>= 6; |
| *cbr++ = (u8)c; |
| tmp1 = ptrA[width]; |
| tmp2 = *ptrA; |
| c = ((val * tmp3 + xFrac * tmp1) << 3) + 32; |
| c >>= 6; |
| cbr[8] = (u8)c; |
| c = ((val * tmp4 + xFrac * tmp2) << 3) + 32; |
| c >>= 6; |
| *cbr++ = (u8)c; |
| } |
| cbr += 2*8 - chromaPartWidth; |
| ptrA += 2*width - chromaPartWidth; |
| } |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateChromaVer |
| |
| Functional description: |
| This function performs chroma interpolation in vertical direction. |
| Overfilling is done only if needed. Reference image (pRef) is |
| read at correct position and the predicted part is written to |
| macroblock's chrominance (predPartChroma) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateChromaVer( |
| u8 *pRef, |
| u8 *predPartChroma, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 yFrac, |
| u32 chromaPartWidth, |
| u32 chromaPartHeight) |
| { |
| |
| /* Variables */ |
| |
| u32 x, y, tmp1, tmp2, tmp3, c, val; |
| u8 *ptrA, *cbr; |
| u32 comp; |
| u8 block[9*8*2]; |
| |
| /* Code */ |
| |
| ASSERT(predPartChroma); |
| ASSERT(chromaPartWidth); |
| ASSERT(chromaPartHeight); |
| ASSERT(yFrac < 8); |
| ASSERT(pRef); |
| |
| if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) || |
| (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) |
| { |
| h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth, |
| chromaPartHeight + 1, chromaPartWidth); |
| pRef += width * height; |
| h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1), |
| x0, y0, width, height, chromaPartWidth, |
| chromaPartHeight + 1, chromaPartWidth); |
| |
| pRef = block; |
| x0 = 0; |
| y0 = 0; |
| width = chromaPartWidth; |
| height = chromaPartHeight+1; |
| } |
| |
| val = 8 - yFrac; |
| |
| for (comp = 0; comp <= 1; comp++) |
| { |
| |
| ptrA = pRef + (comp * height + (u32)y0) * width + x0; |
| cbr = predPartChroma + comp * 8 * 8; |
| |
| /* 2x2 pels per iteration |
| * bilinear vertical interpolation */ |
| for (y = (chromaPartHeight >> 1); y; y--) |
| { |
| for (x = (chromaPartWidth >> 1); x; x--) |
| { |
| tmp3 = ptrA[width*2]; |
| tmp2 = ptrA[width]; |
| tmp1 = *ptrA++; |
| c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; |
| c >>= 6; |
| cbr[8] = (u8)c; |
| c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; |
| c >>= 6; |
| *cbr++ = (u8)c; |
| tmp3 = ptrA[width*2]; |
| tmp2 = ptrA[width]; |
| tmp1 = *ptrA++; |
| c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; |
| c >>= 6; |
| cbr[8] = (u8)c; |
| c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; |
| c >>= 6; |
| *cbr++ = (u8)c; |
| } |
| cbr += 2*8 - chromaPartWidth; |
| ptrA += 2*width - chromaPartWidth; |
| } |
| } |
| |
| } |
| #endif |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateChromaHorVer |
| |
| Functional description: |
| This function performs chroma interpolation in horizontal and |
| vertical direction. Overfilling is done only if needed. Reference |
| image (ref) is read at correct position and the predicted part |
| is written to macroblock's chrominance (predPartChroma) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateChromaHorVer( |
| u8 *ref, |
| u8 *predPartChroma, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 xFrac, |
| u32 yFrac, |
| u32 chromaPartWidth, |
| u32 chromaPartHeight) |
| { |
| u8 block[9*9*2]; |
| u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32; |
| u32 comp; |
| u8 *ptrA, *cbr; |
| |
| /* Code */ |
| |
| ASSERT(predPartChroma); |
| ASSERT(chromaPartWidth); |
| ASSERT(chromaPartHeight); |
| ASSERT(xFrac < 8); |
| ASSERT(yFrac < 8); |
| ASSERT(ref); |
| |
| if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || |
| (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) |
| { |
| h264bsdFillBlock(ref, block, x0, y0, width, height, |
| chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1); |
| ref += width * height; |
| h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1), |
| x0, y0, width, height, chromaPartWidth + 1, |
| chromaPartHeight + 1, chromaPartWidth + 1); |
| |
| ref = block; |
| x0 = 0; |
| y0 = 0; |
| width = chromaPartWidth+1; |
| height = chromaPartHeight+1; |
| } |
| |
| valX = 8 - xFrac; |
| valY = 8 - yFrac; |
| |
| for (comp = 0; comp <= 1; comp++) |
| { |
| |
| ptrA = ref + (comp * height + (u32)y0) * width + x0; |
| cbr = predPartChroma + comp * 8 * 8; |
| |
| /* 2x2 pels per iteration |
| * bilinear vertical and horizontal interpolation */ |
| for (y = (chromaPartHeight >> 1); y; y--) |
| { |
| tmp1 = *ptrA; |
| tmp3 = ptrA[width]; |
| tmp5 = ptrA[width*2]; |
| tmp1 *= valY; |
| tmp1 += tmp3 * yFrac; |
| tmp3 *= valY; |
| tmp3 += tmp5 * yFrac; |
| for (x = (chromaPartWidth >> 1); x; x--) |
| { |
| tmp2 = *++ptrA; |
| tmp4 = ptrA[width]; |
| tmp6 = ptrA[width*2]; |
| tmp2 *= valY; |
| tmp2 += tmp4 * yFrac; |
| tmp4 *= valY; |
| tmp4 += tmp6 * yFrac; |
| tmp1 = tmp1 * valX + plus32; |
| tmp3 = tmp3 * valX + plus32; |
| tmp1 += tmp2 * xFrac; |
| tmp1 >>= 6; |
| tmp3 += tmp4 * xFrac; |
| tmp3 >>= 6; |
| cbr[8] = (u8)tmp3; |
| *cbr++ = (u8)tmp1; |
| |
| tmp1 = *++ptrA; |
| tmp3 = ptrA[width]; |
| tmp5 = ptrA[width*2]; |
| tmp1 *= valY; |
| tmp1 += tmp3 * yFrac; |
| tmp3 *= valY; |
| tmp3 += tmp5 * yFrac; |
| tmp2 = tmp2 * valX + plus32; |
| tmp4 = tmp4 * valX + plus32; |
| tmp2 += tmp1 * xFrac; |
| tmp2 >>= 6; |
| tmp4 += tmp3 * xFrac; |
| tmp4 >>= 6; |
| cbr[8] = (u8)tmp4; |
| *cbr++ = (u8)tmp2; |
| } |
| cbr += 2*8 - chromaPartWidth; |
| ptrA += 2*width - chromaPartWidth; |
| } |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: PredictChroma |
| |
| Functional description: |
| Top level chroma prediction function that calls the appropriate |
| interpolation function. The output is written to macroblock array. |
| |
| ------------------------------------------------------------------------------*/ |
| |
| static void PredictChroma( |
| u8 *mbPartChroma, |
| u32 xAL, |
| u32 yAL, |
| u32 partWidth, |
| u32 partHeight, |
| mv_t *mv, |
| image_t *refPic) |
| { |
| |
| /* Variables */ |
| |
| u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight; |
| i32 xInt, yInt; |
| u8 *ref; |
| |
| /* Code */ |
| |
| ASSERT(mv); |
| ASSERT(refPic); |
| ASSERT(refPic->data); |
| ASSERT(refPic->width); |
| ASSERT(refPic->height); |
| |
| width = 8 * refPic->width; |
| height = 8 * refPic->height; |
| |
| xInt = (xAL >> 1) + (mv->hor >> 3); |
| yInt = (yAL >> 1) + (mv->ver >> 3); |
| xFrac = mv->hor & 0x7; |
| yFrac = mv->ver & 0x7; |
| |
| chromaPartWidth = partWidth >> 1; |
| chromaPartHeight = partHeight >> 1; |
| ref = refPic->data + 256 * refPic->width * refPic->height; |
| |
| if (xFrac && yFrac) |
| { |
| h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width, |
| height, xFrac, yFrac, chromaPartWidth, chromaPartHeight); |
| } |
| else if (xFrac) |
| { |
| h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width, |
| height, xFrac, chromaPartWidth, chromaPartHeight); |
| } |
| else if (yFrac) |
| { |
| h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width, |
| height, yFrac, chromaPartWidth, chromaPartHeight); |
| } |
| else |
| { |
| h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height, |
| chromaPartWidth, chromaPartHeight, 8); |
| ref += width * height; |
| h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height, |
| chromaPartWidth, chromaPartHeight, 8); |
| } |
| |
| } |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateVerHalf |
| |
| Functional description: |
| Function to perform vertical interpolation of pixel position 'h' |
| for a block. Overfilling is done only if needed. Reference |
| image (ref) is read at correct position and the predicted part |
| is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| #ifndef H264DEC_ARM11 |
| void h264bsdInterpolateVerHalf( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight) |
| { |
| u32 p1[21*21/4+1]; |
| u32 i, j; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| u8 *ptrC, *ptrV; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth, partHeight+5, partWidth); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| ptrC = ref + width; |
| ptrV = ptrC + 5*width; |
| |
| /* 4 pixels per iteration, interpolate using 5 vertical samples */ |
| for (i = (partHeight >> 2); i; i--) |
| { |
| /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ |
| for (j = partWidth; j; j--) |
| { |
| tmp4 = ptrV[-(i32)width*2]; |
| tmp5 = ptrV[-(i32)width]; |
| tmp1 = ptrV[width]; |
| tmp2 = ptrV[width*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp2 += 16; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[width*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp2 += tmp3; |
| tmp2 = clp[tmp2>>5]; |
| tmp1 += 16; |
| mb[48] = (u8)tmp2; |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[width]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp1 += tmp2; |
| tmp1 = clp[tmp1>>5]; |
| tmp6 += 16; |
| mb[32] = (u8)tmp1; |
| |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp1 = *ptrC; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>5]; |
| tmp5 += 16; |
| mb[16] = (u8)tmp6; |
| |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp6 = ptrC[-(i32)width]; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>5]; |
| *mb++ = (u8)tmp5; |
| ptrC++; |
| } |
| ptrC += 4*width - partWidth; |
| ptrV += 4*width - partWidth; |
| mb += 4*16 - partWidth; |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateVerQuarter |
| |
| Functional description: |
| Function to perform vertical interpolation of pixel position 'd' |
| or 'n' for a block. Overfilling is done only if needed. Reference |
| image (ref) is read at correct position and the predicted part |
| is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateVerQuarter( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight, |
| u32 verOffset) /* 0 for pixel d, 1 for pixel n */ |
| { |
| u32 p1[21*21/4+1]; |
| u32 i, j; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| u8 *ptrC, *ptrV, *ptrInt; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth, partHeight+5, partWidth); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| ptrC = ref + width; |
| ptrV = ptrC + 5*width; |
| |
| /* Pointer to integer sample position, either M or R */ |
| ptrInt = ptrC + (2+verOffset)*width; |
| |
| /* 4 pixels per iteration |
| * interpolate using 5 vertical samples and average between |
| * interpolated value and integer sample value */ |
| for (i = (partHeight >> 2); i; i--) |
| { |
| /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ |
| for (j = partWidth; j; j--) |
| { |
| tmp4 = ptrV[-(i32)width*2]; |
| tmp5 = ptrV[-(i32)width]; |
| tmp1 = ptrV[width]; |
| tmp2 = ptrV[width*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp2 += 16; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[width*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp2 += tmp3; |
| tmp2 = clp[tmp2>>5]; |
| tmp7 = ptrInt[width*2]; |
| tmp1 += 16; |
| tmp2++; |
| mb[48] = (u8)((tmp2 + tmp7) >> 1); |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[width]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp1 += tmp2; |
| tmp1 = clp[tmp1>>5]; |
| tmp7 = ptrInt[width]; |
| tmp6 += 16; |
| tmp1++; |
| mb[32] = (u8)((tmp1 + tmp7) >> 1); |
| |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp1 = *ptrC; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>5]; |
| tmp7 = *ptrInt; |
| tmp5 += 16; |
| tmp6++; |
| mb[16] = (u8)((tmp6 + tmp7) >> 1); |
| |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp6 = ptrC[-(i32)width]; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>5]; |
| tmp7 = ptrInt[-(i32)width]; |
| tmp5++; |
| *mb++ = (u8)((tmp5 + tmp7) >> 1); |
| ptrC++; |
| ptrInt++; |
| } |
| ptrC += 4*width - partWidth; |
| ptrV += 4*width - partWidth; |
| ptrInt += 4*width - partWidth; |
| mb += 4*16 - partWidth; |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateHorHalf |
| |
| Functional description: |
| Function to perform horizontal interpolation of pixel position 'b' |
| for a block. Overfilling is done only if needed. Reference |
| image (ref) is read at correct position and the predicted part |
| is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateHorHalf( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight) |
| { |
| u32 p1[21*21/4+1]; |
| u8 *ptrJ; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| ASSERT((partWidth&0x3) == 0); |
| ASSERT((partHeight&0x3) == 0); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth + 5; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| ptrJ = ref + 5; |
| |
| for (y = partHeight; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| |
| /* calculate 4 pels per iteration */ |
| for (x = (partWidth >> 2); x; x--) |
| { |
| /* First pixel */ |
| tmp6 += 16; |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>5]; |
| /* Second pixel */ |
| tmp5 += 16; |
| tmp7 = tmp2 + tmp3; |
| *mb++ = (u8)tmp6; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>5]; |
| /* Third pixel */ |
| tmp4 += 16; |
| tmp7 = tmp1 + tmp2; |
| *mb++ = (u8)tmp5; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp4 += tmp5; |
| tmp4 = clp[tmp4>>5]; |
| /* Fourth pixel */ |
| tmp3 += 16; |
| tmp7 = tmp6 + tmp1; |
| *mb++ = (u8)tmp4; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp3 += tmp4; |
| tmp3 = clp[tmp3>>5]; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| *mb++ = (u8)tmp3; |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| } |
| ptrJ += width - partWidth; |
| mb += 16 - partWidth; |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateHorQuarter |
| |
| Functional description: |
| Function to perform horizontal interpolation of pixel position 'a' |
| or 'c' for a block. Overfilling is done only if needed. Reference |
| image (ref) is read at correct position and the predicted part |
| is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateHorQuarter( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight, |
| u32 horOffset) /* 0 for pixel a, 1 for pixel c */ |
| { |
| u32 p1[21*21/4+1]; |
| u8 *ptrJ; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth + 5; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| ptrJ = ref + 5; |
| |
| for (y = partHeight; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| |
| /* calculate 4 pels per iteration */ |
| for (x = (partWidth >> 2); x; x--) |
| { |
| /* First pixel */ |
| tmp6 += 16; |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>5]; |
| tmp5 += 16; |
| if (!horOffset) |
| tmp6 += tmp4; |
| else |
| tmp6 += tmp3; |
| *mb++ = (u8)((tmp6 + 1) >> 1); |
| /* Second pixel */ |
| tmp7 = tmp2 + tmp3; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>5]; |
| tmp4 += 16; |
| if (!horOffset) |
| tmp5 += tmp3; |
| else |
| tmp5 += tmp2; |
| *mb++ = (u8)((tmp5 + 1) >> 1); |
| /* Third pixel */ |
| tmp7 = tmp1 + tmp2; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp4 += tmp5; |
| tmp4 = clp[tmp4>>5]; |
| tmp3 += 16; |
| if (!horOffset) |
| tmp4 += tmp2; |
| else |
| tmp4 += tmp1; |
| *mb++ = (u8)((tmp4 + 1) >> 1); |
| /* Fourth pixel */ |
| tmp7 = tmp6 + tmp1; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp3 += tmp4; |
| tmp3 = clp[tmp3>>5]; |
| if (!horOffset) |
| tmp3 += tmp1; |
| else |
| tmp3 += tmp6; |
| *mb++ = (u8)((tmp3 + 1) >> 1); |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| } |
| ptrJ += width - partWidth; |
| mb += 16 - partWidth; |
| } |
| |
| } |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateHorVerQuarter |
| |
| Functional description: |
| Function to perform horizontal and vertical interpolation of pixel |
| position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only |
| if needed. Reference image (ref) is read at correct position and |
| the predicted part is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateHorVerQuarter( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight, |
| u32 horVerOffset) /* 0 for pixel e, 1 for pixel g, |
| 2 for pixel p, 3 for pixel r */ |
| { |
| u32 p1[21*21/4+1]; |
| u8 *ptrC, *ptrJ, *ptrV; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight+5, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth+5; |
| } |
| |
| /* Ref points to G + (-2, -2) */ |
| ref += (u32)y0 * width + (u32)x0; |
| |
| /* ptrJ points to either J or Q, depending on vertical offset */ |
| ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5; |
| |
| /* ptrC points to either C or D, depending on horizontal offset */ |
| ptrC = ref + width + 2 + (horVerOffset & 0x1); |
| |
| for (y = partHeight; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| |
| /* Horizontal interpolation, calculate 4 pels per iteration */ |
| for (x = (partWidth >> 2); x; x--) |
| { |
| /* First pixel */ |
| tmp6 += 16; |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>5]; |
| /* Second pixel */ |
| tmp5 += 16; |
| tmp7 = tmp2 + tmp3; |
| *mb++ = (u8)tmp6; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>5]; |
| /* Third pixel */ |
| tmp4 += 16; |
| tmp7 = tmp1 + tmp2; |
| *mb++ = (u8)tmp5; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp4 += tmp5; |
| tmp4 = clp[tmp4>>5]; |
| /* Fourth pixel */ |
| tmp3 += 16; |
| tmp7 = tmp6 + tmp1; |
| *mb++ = (u8)tmp4; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp3 += tmp4; |
| tmp3 = clp[tmp3>>5]; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| *mb++ = (u8)tmp3; |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| } |
| ptrJ += width - partWidth; |
| mb += 16 - partWidth; |
| } |
| |
| mb -= 16*partHeight; |
| ptrV = ptrC + 5*width; |
| |
| for (y = (partHeight >> 2); y; y--) |
| { |
| /* Vertical interpolation and averaging, 4 pels per iteration */ |
| for (x = partWidth; x; x--) |
| { |
| tmp4 = ptrV[-(i32)width*2]; |
| tmp5 = ptrV[-(i32)width]; |
| tmp1 = ptrV[width]; |
| tmp2 = ptrV[width*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp2 += 16; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[width*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp2 += tmp3; |
| tmp7 = clp[tmp2>>5]; |
| tmp2 = mb[48]; |
| tmp1 += 16; |
| tmp7++; |
| mb[48] = (u8)((tmp2 + tmp7) >> 1); |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[width]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp1 += tmp2; |
| tmp7 = clp[tmp1>>5]; |
| tmp1 = mb[32]; |
| tmp6 += 16; |
| tmp7++; |
| mb[32] = (u8)((tmp1 + tmp7) >> 1); |
| |
| tmp1 = *ptrC; |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp6 += tmp1; |
| tmp7 = clp[tmp6>>5]; |
| tmp6 = mb[16]; |
| tmp5 += 16; |
| tmp7++; |
| mb[16] = (u8)((tmp6 + tmp7) >> 1); |
| |
| tmp6 = ptrC[-(i32)width]; |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp5 += tmp6; |
| tmp7 = clp[tmp5>>5]; |
| tmp5 = *mb; |
| tmp7++; |
| *mb++ = (u8)((tmp5 + tmp7) >> 1); |
| ptrC++; |
| |
| } |
| ptrC += 4*width - partWidth; |
| ptrV += 4*width - partWidth; |
| mb += 4*16 - partWidth; |
| } |
| |
| } |
| #endif |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateMidHalf |
| |
| Functional description: |
| Function to perform horizontal and vertical interpolation of pixel |
| position 'j' for a block. Overfilling is done only if needed. |
| Reference image (ref) is read at correct position and the predicted |
| part is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateMidHalf( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight) |
| { |
| u32 p1[21*21/4+1]; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| i32 *ptrC, *ptrV, *b1; |
| u8 *ptrJ; |
| i32 table[21*16]; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight+5, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth+5; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| b1 = table; |
| ptrJ = ref + 5; |
| |
| /* First step: calculate intermediate values for |
| * horizontal interpolation */ |
| for (y = partHeight + 5; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| |
| /* 4 pels per iteration */ |
| for (x = (partWidth >> 2); x; x--) |
| { |
| /* First pixel */ |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp6 += tmp1; |
| *b1++ = tmp6; |
| /* Second pixel */ |
| tmp7 = tmp2 + tmp3; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp5 += tmp6; |
| *b1++ = tmp5; |
| /* Third pixel */ |
| tmp7 = tmp1 + tmp2; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp4 += tmp5; |
| *b1++ = tmp4; |
| /* Fourth pixel */ |
| tmp7 = tmp6 + tmp1; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp3 += tmp4; |
| *b1++ = tmp3; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| } |
| ptrJ += width - partWidth; |
| } |
| |
| /* Second step: calculate vertical interpolation */ |
| ptrC = table + partWidth; |
| ptrV = ptrC + 5*partWidth; |
| for (y = (partHeight >> 2); y; y--) |
| { |
| /* 4 pels per iteration */ |
| for (x = partWidth; x; x--) |
| { |
| tmp4 = ptrV[-(i32)partWidth*2]; |
| tmp5 = ptrV[-(i32)partWidth]; |
| tmp1 = ptrV[partWidth]; |
| tmp2 = ptrV[partWidth*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp2 += 512; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[partWidth*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp2 += tmp3; |
| tmp7 = clp[tmp2>>10]; |
| tmp1 += 512; |
| mb[48] = (u8)tmp7; |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[partWidth]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp1 += tmp2; |
| tmp7 = clp[tmp1>>10]; |
| tmp6 += 512; |
| mb[32] = (u8)tmp7; |
| |
| tmp1 = *ptrC; |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp6 += tmp1; |
| tmp7 = clp[tmp6>>10]; |
| tmp5 += 512; |
| mb[16] = (u8)tmp7; |
| |
| tmp6 = ptrC[-(i32)partWidth]; |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp5 += tmp6; |
| tmp7 = clp[tmp5>>10]; |
| *mb++ = (u8)tmp7; |
| ptrC++; |
| } |
| mb += 4*16 - partWidth; |
| ptrC += 3*partWidth; |
| ptrV += 3*partWidth; |
| } |
| |
| } |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateMidVerQuarter |
| |
| Functional description: |
| Function to perform horizontal and vertical interpolation of pixel |
| position 'f' or 'q' for a block. Overfilling is done only if needed. |
| Reference image (ref) is read at correct position and the predicted |
| part is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateMidVerQuarter( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight, |
| u32 verOffset) /* 0 for pixel f, 1 for pixel q */ |
| { |
| u32 p1[21*21/4+1]; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| i32 *ptrC, *ptrV, *ptrInt, *b1; |
| u8 *ptrJ; |
| i32 table[21*16]; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight+5, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth+5; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| b1 = table; |
| ptrJ = ref + 5; |
| |
| /* First step: calculate intermediate values for |
| * horizontal interpolation */ |
| for (y = partHeight + 5; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| for (x = (partWidth >> 2); x; x--) |
| { |
| /* First pixel */ |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp6 += tmp1; |
| *b1++ = tmp6; |
| /* Second pixel */ |
| tmp7 = tmp2 + tmp3; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp5 += tmp6; |
| *b1++ = tmp5; |
| /* Third pixel */ |
| tmp7 = tmp1 + tmp2; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp4 += tmp5; |
| *b1++ = tmp4; |
| /* Fourth pixel */ |
| tmp7 = tmp6 + tmp1; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp3 += tmp4; |
| *b1++ = tmp3; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| } |
| ptrJ += width - partWidth; |
| } |
| |
| /* Second step: calculate vertical interpolation and average */ |
| ptrC = table + partWidth; |
| ptrV = ptrC + 5*partWidth; |
| /* Pointer to integer sample position, either M or R */ |
| ptrInt = ptrC + (2+verOffset)*partWidth; |
| for (y = (partHeight >> 2); y; y--) |
| { |
| for (x = partWidth; x; x--) |
| { |
| tmp4 = ptrV[-(i32)partWidth*2]; |
| tmp5 = ptrV[-(i32)partWidth]; |
| tmp1 = ptrV[partWidth]; |
| tmp2 = ptrV[partWidth*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp2 += 512; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[partWidth*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp7 = ptrInt[partWidth*2]; |
| tmp2 += tmp3; |
| tmp2 = clp[tmp2>>10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7>>5]; |
| tmp1 += 512; |
| tmp2++; |
| mb[48] = (u8)((tmp7 + tmp2) >> 1); |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[partWidth]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp7 = ptrInt[partWidth]; |
| tmp1 += tmp2; |
| tmp1 = clp[tmp1>>10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7>>5]; |
| tmp6 += 512; |
| tmp1++; |
| mb[32] = (u8)((tmp7 + tmp1) >> 1); |
| |
| tmp1 = *ptrC; |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = *ptrInt; |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6>>10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7>>5]; |
| tmp5 += 512; |
| tmp6++; |
| mb[16] = (u8)((tmp7 + tmp6) >> 1); |
| |
| tmp6 = ptrC[-(i32)partWidth]; |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp7 = ptrInt[-(i32)partWidth]; |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5>>10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7>>5]; |
| tmp5++; |
| *mb++ = (u8)((tmp7 + tmp5) >> 1); |
| ptrC++; |
| ptrInt++; |
| } |
| mb += 4*16 - partWidth; |
| ptrC += 3*partWidth; |
| ptrV += 3*partWidth; |
| ptrInt += 3*partWidth; |
| } |
| |
| } |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdInterpolateMidHorQuarter |
| |
| Functional description: |
| Function to perform horizontal and vertical interpolation of pixel |
| position 'i' or 'k' for a block. Overfilling is done only if needed. |
| Reference image (ref) is read at correct position and the predicted |
| part is written to macroblock array (mb) |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdInterpolateMidHorQuarter( |
| u8 *ref, |
| u8 *mb, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 partWidth, |
| u32 partHeight, |
| u32 horOffset) /* 0 for pixel i, 1 for pixel k */ |
| { |
| u32 p1[21*21/4+1]; |
| u32 x, y; |
| i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| i32 *ptrJ, *ptrInt, *h1; |
| u8 *ptrC, *ptrV; |
| i32 table[21*16]; |
| i32 tableWidth = (i32)partWidth+5; |
| const u8 *clp = h264bsdClip + 512; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(mb); |
| |
| if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || |
| (y0 < 0) || ((u32)y0+partHeight+5 > height)) |
| { |
| h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, |
| partWidth+5, partHeight+5, partWidth+5); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = (u8*)p1; |
| width = partWidth+5; |
| } |
| |
| ref += (u32)y0 * width + (u32)x0; |
| |
| h1 = table + tableWidth; |
| ptrC = ref + width; |
| ptrV = ptrC + 5*width; |
| |
| /* First step: calculate intermediate values for |
| * vertical interpolation */ |
| for (y = (partHeight >> 2); y; y--) |
| { |
| for (x = (u32)tableWidth; x; x--) |
| { |
| tmp4 = ptrV[-(i32)width*2]; |
| tmp5 = ptrV[-(i32)width]; |
| tmp1 = ptrV[width]; |
| tmp2 = ptrV[width*2]; |
| tmp6 = *ptrV++; |
| |
| tmp7 = tmp4 + tmp1; |
| tmp2 -= (tmp7 << 2); |
| tmp2 -= tmp7; |
| tmp7 = tmp5 + tmp6; |
| tmp3 = ptrC[width*2]; |
| tmp2 += (tmp7 << 4); |
| tmp2 += (tmp7 << 2); |
| tmp2 += tmp3; |
| h1[tableWidth*2] = tmp2; |
| |
| tmp7 = tmp3 + tmp6; |
| tmp1 -= (tmp7 << 2); |
| tmp1 -= tmp7; |
| tmp7 = tmp4 + tmp5; |
| tmp2 = ptrC[width]; |
| tmp1 += (tmp7 << 4); |
| tmp1 += (tmp7 << 2); |
| tmp1 += tmp2; |
| h1[tableWidth] = tmp1; |
| |
| tmp1 = *ptrC; |
| tmp7 = tmp2 + tmp5; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = tmp4 + tmp3; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp6 += tmp1; |
| *h1 = tmp6; |
| |
| tmp6 = ptrC[-(i32)width]; |
| tmp1 += tmp4; |
| tmp5 -= (tmp1 << 2); |
| tmp5 -= tmp1; |
| tmp3 += tmp2; |
| tmp5 += (tmp3 << 4); |
| tmp5 += (tmp3 << 2); |
| tmp5 += tmp6; |
| h1[-tableWidth] = tmp5; |
| h1++; |
| ptrC++; |
| } |
| ptrC += 4*width - partWidth - 5; |
| ptrV += 4*width - partWidth - 5; |
| h1 += 3*tableWidth; |
| } |
| |
| /* Second step: calculate horizontal interpolation and average */ |
| ptrJ = table + 5; |
| /* Pointer to integer sample position, either G or H */ |
| ptrInt = table + 2 + horOffset; |
| for (y = partHeight; y; y--) |
| { |
| tmp6 = *(ptrJ - 5); |
| tmp5 = *(ptrJ - 4); |
| tmp4 = *(ptrJ - 3); |
| tmp3 = *(ptrJ - 2); |
| tmp2 = *(ptrJ - 1); |
| for (x = (partWidth>>2); x; x--) |
| { |
| /* First pixel */ |
| tmp6 += 512; |
| tmp7 = tmp3 + tmp4; |
| tmp6 += (tmp7 << 4); |
| tmp6 += (tmp7 << 2); |
| tmp7 = tmp2 + tmp5; |
| tmp1 = *ptrJ++; |
| tmp6 -= (tmp7 << 2); |
| tmp6 -= tmp7; |
| tmp7 = *ptrInt++; |
| tmp6 += tmp1; |
| tmp6 = clp[tmp6 >> 10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7 >> 5]; |
| tmp5 += 512; |
| tmp6++; |
| *mb++ = (u8)((tmp6 + tmp7) >> 1); |
| /* Second pixel */ |
| tmp7 = tmp2 + tmp3; |
| tmp5 += (tmp7 << 4); |
| tmp5 += (tmp7 << 2); |
| tmp7 = tmp1 + tmp4; |
| tmp6 = *ptrJ++; |
| tmp5 -= (tmp7 << 2); |
| tmp5 -= tmp7; |
| tmp7 = *ptrInt++; |
| tmp5 += tmp6; |
| tmp5 = clp[tmp5 >> 10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7 >> 5]; |
| tmp4 += 512; |
| tmp5++; |
| *mb++ = (u8)((tmp5 + tmp7) >> 1); |
| /* Third pixel */ |
| tmp7 = tmp1 + tmp2; |
| tmp4 += (tmp7 << 4); |
| tmp4 += (tmp7 << 2); |
| tmp7 = tmp6 + tmp3; |
| tmp5 = *ptrJ++; |
| tmp4 -= (tmp7 << 2); |
| tmp4 -= tmp7; |
| tmp7 = *ptrInt++; |
| tmp4 += tmp5; |
| tmp4 = clp[tmp4 >> 10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7 >> 5]; |
| tmp3 += 512; |
| tmp4++; |
| *mb++ = (u8)((tmp4 + tmp7) >> 1); |
| /* Fourth pixel */ |
| tmp7 = tmp6 + tmp1; |
| tmp3 += (tmp7 << 4); |
| tmp3 += (tmp7 << 2); |
| tmp7 = tmp5 + tmp2; |
| tmp4 = *ptrJ++; |
| tmp3 -= (tmp7 << 2); |
| tmp3 -= tmp7; |
| tmp7 = *ptrInt++; |
| tmp3 += tmp4; |
| tmp3 = clp[tmp3 >> 10]; |
| tmp7 += 16; |
| tmp7 = clp[tmp7 >> 5]; |
| tmp3++; |
| *mb++ = (u8)((tmp3 + tmp7) >> 1); |
| tmp3 = tmp5; |
| tmp5 = tmp1; |
| tmp7 = tmp4; |
| tmp4 = tmp6; |
| tmp6 = tmp2; |
| tmp2 = tmp7; |
| } |
| ptrJ += 5; |
| ptrInt += 5; |
| mb += 16 - partWidth; |
| } |
| |
| } |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdPredictSamples |
| |
| Functional description: |
| This function reconstructs a prediction for a macroblock partition. |
| The prediction is either copied or interpolated using the reference |
| frame and the motion vector. Both luminance and chrominance parts are |
| predicted. The prediction is stored in given macroblock array (data). |
| Inputs: |
| data pointer to macroblock array (384 bytes) for output |
| mv pointer to motion vector used for prediction |
| refPic pointer to reference picture structure |
| xA x-coordinate for current macroblock |
| yA y-coordinate for current macroblock |
| partX x-offset for partition in macroblock |
| partY y-offset for partition in macroblock |
| partWidth width of partition |
| partHeight height of partition |
| Outputs: |
| data macroblock array (16x16+8x8+8x8) where predicted |
| partition is stored at correct position |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdPredictSamples( |
| u8 *data, |
| mv_t *mv, |
| image_t *refPic, |
| u32 xA, |
| u32 yA, |
| u32 partX, |
| u32 partY, |
| u32 partWidth, |
| u32 partHeight) |
| |
| { |
| |
| /* Variables */ |
| |
| u32 xFrac, yFrac, width, height; |
| i32 xInt, yInt; |
| u8 *lumaPartData; |
| |
| /* Code */ |
| |
| ASSERT(data); |
| ASSERT(mv); |
| ASSERT(partWidth); |
| ASSERT(partHeight); |
| ASSERT(refPic); |
| ASSERT(refPic->data); |
| ASSERT(refPic->width); |
| ASSERT(refPic->height); |
| |
| /* luma */ |
| lumaPartData = data + 16*partY + partX; |
| |
| xFrac = mv->hor & 0x3; |
| yFrac = mv->ver & 0x3; |
| |
| width = 16 * refPic->width; |
| height = 16 * refPic->height; |
| |
| xInt = (i32)xA + (i32)partX + (mv->hor >> 2); |
| yInt = (i32)yA + (i32)partY + (mv->ver >> 2); |
| |
| ASSERT(lumaFracPos[xFrac][yFrac] < 16); |
| |
| switch (lumaFracPos[xFrac][yFrac]) |
| { |
| case 0: /* G */ |
| h264bsdFillBlock(refPic->data, lumaPartData, |
| xInt,yInt,width,height,partWidth,partHeight,16); |
| break; |
| case 1: /* d */ |
| h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, |
| xInt, yInt-2, width, height, partWidth, partHeight, 0); |
| break; |
| case 2: /* h */ |
| h264bsdInterpolateVerHalf(refPic->data, lumaPartData, |
| xInt, yInt-2, width, height, partWidth, partHeight); |
| break; |
| case 3: /* n */ |
| h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, |
| xInt, yInt-2, width, height, partWidth, partHeight, 1); |
| break; |
| case 4: /* a */ |
| h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt, width, height, partWidth, partHeight, 0); |
| break; |
| case 5: /* e */ |
| h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 0); |
| break; |
| case 6: /* i */ |
| h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 0); |
| break; |
| case 7: /* p */ |
| h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 2); |
| break; |
| case 8: /* b */ |
| h264bsdInterpolateHorHalf(refPic->data, lumaPartData, |
| xInt-2, yInt, width, height, partWidth, partHeight); |
| break; |
| case 9: /* f */ |
| h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 0); |
| break; |
| case 10: /* j */ |
| h264bsdInterpolateMidHalf(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight); |
| break; |
| case 11: /* q */ |
| h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 1); |
| break; |
| case 12: /* c */ |
| h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt, width, height, partWidth, partHeight, 1); |
| break; |
| case 13: /* g */ |
| h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 1); |
| break; |
| case 14: /* k */ |
| h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 1); |
| break; |
| default: /* case 15, r */ |
| h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, |
| xInt-2, yInt-2, width, height, partWidth, partHeight, 3); |
| break; |
| } |
| |
| /* chroma */ |
| PredictChroma( |
| data + 16*16 + (partY>>1)*8 + (partX>>1), |
| xA + partX, |
| yA + partY, |
| partWidth, |
| partHeight, |
| mv, |
| refPic); |
| |
| } |
| |
| #else /* H264DEC_OMXDL */ |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdPredictSamples |
| |
| Functional description: |
| This function reconstructs a prediction for a macroblock partition. |
| The prediction is either copied or interpolated using the reference |
| frame and the motion vector. Both luminance and chrominance parts are |
| predicted. The prediction is stored in given macroblock array (data). |
| Inputs: |
| data pointer to macroblock array (384 bytes) for output |
| mv pointer to motion vector used for prediction |
| refPic pointer to reference picture structure |
| xA x-coordinate for current macroblock |
| yA y-coordinate for current macroblock |
| partX x-offset for partition in macroblock |
| partY y-offset for partition in macroblock |
| partWidth width of partition |
| partHeight height of partition |
| Outputs: |
| data macroblock array (16x16+8x8+8x8) where predicted |
| partition is stored at correct position |
| |
| ------------------------------------------------------------------------------*/ |
| |
| /*lint -e{550} Symbol 'res' not accessed */ |
| void h264bsdPredictSamples( |
| u8 *data, |
| mv_t *mv, |
| image_t *refPic, |
| u32 colAndRow, |
| u32 part, |
| u8 *pFill) |
| |
| { |
| |
| /* Variables */ |
| |
| u32 xFrac, yFrac; |
| u32 width, height; |
| i32 xInt, yInt, x0, y0; |
| u8 *partData, *ref; |
| OMXSize roi; |
| u32 fillWidth; |
| u32 fillHeight; |
| OMXResult res; |
| u32 xA, yA; |
| u32 partX, partY; |
| u32 partWidth, partHeight; |
| |
| /* Code */ |
| |
| ASSERT(data); |
| ASSERT(mv); |
| ASSERT(refPic); |
| ASSERT(refPic->data); |
| ASSERT(refPic->width); |
| ASSERT(refPic->height); |
| |
| xA = (colAndRow & 0xFFFF0000) >> 16; |
| yA = (colAndRow & 0x0000FFFF); |
| |
| partX = (part & 0xFF000000) >> 24; |
| partY = (part & 0x00FF0000) >> 16; |
| partWidth = (part & 0x0000FF00) >> 8; |
| partHeight = (part & 0x000000FF); |
| |
| ASSERT(partWidth); |
| ASSERT(partHeight); |
| |
| /* luma */ |
| partData = data + 16*partY + partX; |
| |
| xFrac = mv->hor & 0x3; |
| yFrac = mv->ver & 0x3; |
| |
| width = 16 * refPic->width; |
| height = 16 * refPic->height; |
| |
| xInt = (i32)xA + (i32)partX + (mv->hor >> 2); |
| yInt = (i32)yA + (i32)partY + (mv->ver >> 2); |
| |
| x0 = (xFrac) ? xInt-2 : xInt; |
| y0 = (yFrac) ? yInt-2 : yInt; |
| |
| if (xFrac) |
| { |
| if (partWidth == 16) |
| fillWidth = 32; |
| else |
| fillWidth = 16; |
| } |
| else |
| fillWidth = (partWidth*2); |
| if (yFrac) |
| fillHeight = partHeight+5; |
| else |
| fillHeight = partHeight; |
| |
| |
| if ((x0 < 0) || ((u32)x0+fillWidth > width) || |
| (y0 < 0) || ((u32)y0+fillHeight > height)) |
| { |
| h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height, |
| fillWidth, fillHeight, fillWidth); |
| |
| x0 = 0; |
| y0 = 0; |
| ref = pFill; |
| width = fillWidth; |
| if (yFrac) |
| ref += 2*width; |
| if (xFrac) |
| ref += 2; |
| } |
| else |
| { |
| /*lint --e(737) Loss of sign */ |
| ref = refPic->data + yInt*width + xInt; |
| } |
| /* Luma interpolation */ |
| roi.width = (i32)partWidth; |
| roi.height = (i32)partHeight; |
| |
| res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16, |
| (i32)xFrac, (i32)yFrac, roi); |
| ASSERT(res == 0); |
| |
| /* Chroma */ |
| width = 8 * refPic->width; |
| height = 8 * refPic->height; |
| |
| x0 = ((xA + partX) >> 1) + (mv->hor >> 3); |
| y0 = ((yA + partY) >> 1) + (mv->ver >> 3); |
| xFrac = mv->hor & 0x7; |
| yFrac = mv->ver & 0x7; |
| |
| ref = refPic->data + 256 * refPic->width * refPic->height; |
| |
| roi.width = (i32)(partWidth >> 1); |
| fillWidth = ((partWidth >> 1) + 8) & ~0x7; |
| roi.height = (i32)(partHeight >> 1); |
| fillHeight = (partHeight >> 1) + 1; |
| |
| if ((x0 < 0) || ((u32)x0+fillWidth > width) || |
| (y0 < 0) || ((u32)y0+fillHeight > height)) |
| { |
| h264bsdFillBlock(ref, pFill, x0, y0, width, height, |
| fillWidth, fillHeight, fillWidth); |
| ref += width * height; |
| h264bsdFillBlock(ref, pFill + fillWidth*fillHeight, |
| x0, y0, width, height, fillWidth, |
| fillHeight, fillWidth); |
| |
| ref = pFill; |
| x0 = 0; |
| y0 = 0; |
| width = fillWidth; |
| height = fillHeight; |
| } |
| |
| partData = data + 16*16 + (partY>>1)*8 + (partX>>1); |
| |
| /* Chroma interpolation */ |
| /*lint --e(737) Loss of sign */ |
| ref += y0 * width + x0; |
| res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, |
| (u32)roi.width, (u32)roi.height, xFrac, yFrac); |
| ASSERT(res == 0); |
| partData += 8 * 8; |
| ref += height * width; |
| res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, |
| (u32)roi.width, (u32)roi.height, xFrac, yFrac); |
| ASSERT(res == 0); |
| |
| } |
| |
| #endif /* H264DEC_OMXDL */ |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: FillRow1 |
| |
| Functional description: |
| This function gets a row of reference pels in a 'normal' case when no |
| overfilling is necessary. |
| |
| ------------------------------------------------------------------------------*/ |
| |
| static void FillRow1( |
| u8 *ref, |
| u8 *fill, |
| i32 left, |
| i32 center, |
| i32 right) |
| { |
| UNUSED(left); |
| UNUSED(right); |
| ASSERT(ref); |
| ASSERT(fill); |
| |
| H264SwDecMemcpy(fill, ref, (u32)center); |
| |
| /*lint -e(715) */ |
| } |
| |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdFillRow7 |
| |
| Functional description: |
| This function gets a row of reference pels when horizontal coordinate |
| is partly negative or partly greater than reference picture width |
| (overfilling some pels on left and/or right edge). |
| Inputs: |
| ref pointer to reference samples |
| left amount of pixels to overfill on left-edge |
| center amount of pixels to copy |
| right amount of pixels to overfill on right-edge |
| Outputs: |
| fill pointer where samples are stored |
| |
| ------------------------------------------------------------------------------*/ |
| #ifndef H264DEC_NEON |
| void h264bsdFillRow7( |
| u8 *ref, |
| u8 *fill, |
| i32 left, |
| i32 center, |
| i32 right) |
| { |
| u8 tmp; |
| |
| ASSERT(ref); |
| ASSERT(fill); |
| |
| if (left) |
| tmp = *ref; |
| |
| for ( ; left; left--) |
| /*lint -esym(644,tmp) tmp is initialized if used */ |
| *fill++ = tmp; |
| |
| for ( ; center; center--) |
| *fill++ = *ref++; |
| |
| if (right) |
| tmp = ref[-1]; |
| |
| for ( ; right; right--) |
| /*lint -esym(644,tmp) tmp is initialized if used */ |
| *fill++ = tmp; |
| } |
| #endif |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdFillBlock |
| |
| Functional description: |
| This function gets a block of reference pels. It determines whether |
| overfilling is needed or not and repeatedly calls an appropriate |
| function (by using a function pointer) that fills one row the block. |
| Inputs: |
| ref pointer to reference frame |
| x0 x-coordinate for block |
| y0 y-coordinate for block |
| width width of reference frame |
| height height of reference frame |
| blockWidth width of block |
| blockHeight height of block |
| fillScanLength length of a line in output array (pixels) |
| Outputs: |
| fill pointer to array where output block is written |
| |
| ------------------------------------------------------------------------------*/ |
| |
| void h264bsdFillBlock( |
| u8 *ref, |
| u8 *fill, |
| i32 x0, |
| i32 y0, |
| u32 width, |
| u32 height, |
| u32 blockWidth, |
| u32 blockHeight, |
| u32 fillScanLength) |
| |
| { |
| |
| /* Variables */ |
| |
| i32 xstop, ystop; |
| void (*fp)(u8*, u8*, i32, i32, i32); |
| i32 left, x, right; |
| i32 top, y, bottom; |
| |
| /* Code */ |
| |
| ASSERT(ref); |
| ASSERT(fill); |
| ASSERT(width); |
| ASSERT(height); |
| ASSERT(fill); |
| ASSERT(blockWidth); |
| ASSERT(blockHeight); |
| |
| xstop = x0 + (i32)blockWidth; |
| ystop = y0 + (i32)blockHeight; |
| |
| /* Choose correct function whether overfilling on left-edge or right-edge |
| * is needed or not */ |
| if (x0 >= 0 && xstop <= (i32)width) |
| fp = FillRow1; |
| else |
| fp = h264bsdFillRow7; |
| |
| if (ystop < 0) |
| y0 = -(i32)blockHeight; |
| |
| if (xstop < 0) |
| x0 = -(i32)blockWidth; |
| |
| if (y0 > (i32)height) |
| y0 = (i32)height; |
| |
| if (x0 > (i32)width) |
| x0 = (i32)width; |
| |
| xstop = x0 + (i32)blockWidth; |
| ystop = y0 + (i32)blockHeight; |
| |
| if (x0 > 0) |
| ref += x0; |
| |
| if (y0 > 0) |
| ref += y0 * (i32)width; |
| |
| left = x0 < 0 ? -x0 : 0; |
| right = xstop > (i32)width ? xstop - (i32)width : 0; |
| x = (i32)blockWidth - left - right; |
| |
| top = y0 < 0 ? -y0 : 0; |
| bottom = ystop > (i32)height ? ystop - (i32)height : 0; |
| y = (i32)blockHeight - top - bottom; |
| |
| /* Top-overfilling */ |
| for ( ; top; top-- ) |
| { |
| (*fp)(ref, fill, left, x, right); |
| fill += fillScanLength; |
| } |
| |
| /* Lines inside reference image */ |
| for ( ; y; y-- ) |
| { |
| (*fp)(ref, fill, left, x, right); |
| ref += width; |
| fill += fillScanLength; |
| } |
| |
| ref -= width; |
| |
| /* Bottom-overfilling */ |
| for ( ; bottom; bottom-- ) |
| { |
| (*fp)(ref, fill, left, x, right); |
| fill += fillScanLength; |
| } |
| } |
| |
| /*lint +e701 +e702 */ |
| |
| |