| /* ------------------------------------------------------------------ |
| * Copyright (C) 2008 PacketVideo |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
| * express or implied. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * ------------------------------------------------------------------- |
| */ |
| #include "avcenc_lib.h" |
| #include "oscl_base_macros.h" |
| /* 3/29/01 fast half-pel search based on neighboring guess */ |
| /* value ranging from 0 to 4, high complexity (more accurate) to |
| low complexity (less accurate) */ |
| #define HP_DISTANCE_TH 5 // 2 /* half-pel distance threshold */ |
| |
| #define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/ |
| |
| const static int distance_tab[9][9] = /* [hp_guess][k] */ |
| { |
| {0, 1, 1, 1, 1, 1, 1, 1, 1}, |
| {1, 0, 1, 2, 3, 4, 3, 2, 1}, |
| {1, 0, 0, 0, 1, 2, 3, 2, 1}, |
| {1, 2, 1, 0, 1, 2, 3, 4, 3}, |
| {1, 2, 1, 0, 0, 0, 1, 2, 3}, |
| {1, 4, 3, 2, 1, 0, 1, 2, 3}, |
| {1, 2, 3, 2, 1, 0, 0, 0, 1}, |
| {1, 2, 3, 4, 3, 2, 1, 0, 1}, |
| {1, 0, 1, 2, 3, 2, 1, 0, 0} |
| }; |
| |
| |
| /*===================================================================== |
| Function: AVCFindHalfPelMB |
| Date: 10/31/2007 |
| Purpose: Find half pel resolution MV surrounding the full-pel MV |
| =====================================================================*/ |
| |
| void AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand, |
| int xpos, int ypos, int hp_guess, int cmvx, int cmvy) |
| { |
| AVCPictureData *currPic = encvid->common->currPic; |
| int lx = currPic->pitch; |
| int d, dmin; |
| uint8* cand; |
| int lambda_motion = encvid->lambda_motion; |
| uint8 *mvbits = encvid->mvbits; |
| int mvcost; |
| /* list of candidate to go through for half-pel search*/ |
| uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions |
| uint8** hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */ |
| uint8** qpel_cand; |
| int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2}; |
| int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2}; |
| int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1}; |
| int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1}; |
| int h, hmin, q, qmin; |
| |
| OSCL_UNUSED_ARG(xpos); |
| OSCL_UNUSED_ARG(ypos); |
| OSCL_UNUSED_ARG(hp_guess); |
| |
| GenerateSubPelPred(subpel_pred, ncand, lx); |
| |
| cur = encvid->currYMB; // pre-load current original MB |
| |
| cand = hpel_cand[0]; |
| |
| // find cost for the current full-pel position |
| dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD |
| mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy); |
| dmin += mvcost; |
| hmin = 0; |
| |
| /* find half-pel */ |
| for (h = 1; h < 9; h++) |
| { |
| d = SATD_MB(hpel_cand[h], cur, dmin); |
| mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy); |
| d += mvcost; |
| |
| if (d < dmin) |
| { |
| dmin = d; |
| hmin = h; |
| } |
| } |
| |
| mot->sad = dmin; |
| mot->x += xh[hmin]; |
| mot->y += yh[hmin]; |
| encvid->best_hpel_pos = hmin; |
| |
| /*** search for quarter-pel ****/ |
| qpel_cand = encvid->qpel_cand[hmin]; |
| encvid->best_qpel_pos = qmin = -1; |
| |
| for (q = 0; q < 8; q++) |
| { |
| d = SATD_MB(qpel_cand[q], cur, dmin); |
| mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy); |
| d += mvcost; |
| if (d < dmin) |
| { |
| dmin = d; |
| qmin = q; |
| } |
| } |
| |
| if (qmin != -1) |
| { |
| mot->sad = dmin; |
| mot->x += xq[qmin]; |
| mot->y += yq[qmin]; |
| encvid->best_qpel_pos = qmin; |
| } |
| |
| return ; |
| } |
| |
| #if 0 |
| void FindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand, |
| int xpos, int ypos, int hp_guess, int cmvx, int cmvy) |
| { |
| // hp_mem = ULong *vertArray; /* 20x17 */ |
| // ULong *horzArray; /* 20x16 */ |
| // ULong *diagArray; /* 20x17 */ |
| int dmin, d; |
| |
| int xh, yh; |
| int k, kmin = 0; |
| int imin, jmin, ilow, jlow; |
| int in_range[9] = {0, 1, 1, 1, 1, 1, 1, 1, 1}; /* 3/29/01 */ |
| int range = encvid->rateCtrl->mvRange; |
| AVCPictureData *currPic = encvid->common->currPic; |
| int lx = currPic->pitch; |
| int width = currPic->width; /* padding */ |
| int height = currPic->height; |
| int (**SAD_MB_HalfPel)(uint8*, uint8*, int, void*) = |
| encvid->functionPointer->SAD_MB_HalfPel; |
| void *extra_info = encvid->sad_extra_info; |
| |
| int next_hp_pos[9][2] = {{0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}, {0, -1}}; |
| int next_ncand[9] = {0, 1 , lx, lx, 0, -1, -1, -lx, -lx}; |
| int xhmin, yhmin; |
| int lambda_motion = encvid->lambda_motion; |
| uint8 *mvbits = encvid->mvbits; |
| int mvcost; |
| |
| cur = encvid->currYMB; // pre-load current original MB |
| |
| /**************** check range ***************************/ |
| /* 3/29/01 */ |
| imin = xpos + (mot[0].x >> 2); |
| jmin = ypos + (mot[0].y >> 2); |
| ilow = xpos - range; |
| jlow = ypos - range; |
| |
| if (imin <= -15 || imin == ilow) |
| in_range[1] = in_range[7] = in_range[8] = 0; |
| else if (imin >= width - 1) |
| in_range[3] = in_range[4] = in_range[5] = 0; |
| if (jmin <= -15 || jmin == jlow) |
| in_range[1] = in_range[2] = in_range[3] = 0; |
| else if (jmin >= height - 1) |
| in_range[5] = in_range[6] = in_range[7] = 0; |
| |
| xhmin = 0; |
| yhmin = 0; |
| dmin = mot->sad; |
| |
| xh = 0; |
| yh = -1; |
| ncand -= lx; /* initial position */ |
| |
| for (k = 2; k <= 8; k += 2) |
| { |
| if (distance_tab[hp_guess][k] < HP_DISTANCE_TH) |
| { |
| if (in_range[k]) |
| { |
| d = (*(SAD_MB_HalfPel[((yh&1)<<1)+(xh&1)]))(ncand, cur, (dmin << 16) | lx, extra_info); |
| mvcost = MV_COST_S(lambda_motion, mot[0].x + (xh << 1), mot[0].y + (yh << 1), cmvx, cmvy); |
| d += mvcost; |
| |
| if (d < dmin) |
| { |
| dmin = d; |
| xhmin = xh; |
| yhmin = yh; |
| kmin = k; |
| } |
| } |
| } |
| xh += next_hp_pos[k][0]; |
| yh += next_hp_pos[k][1]; |
| ncand += next_ncand[k]; |
| |
| if (k == 8) |
| { |
| if (xhmin != 0 || yhmin != 0) |
| { |
| k = -1; |
| hp_guess = kmin; |
| } |
| } |
| } |
| |
| mot->sad = dmin; |
| mot->x += (xhmin << 1); |
| mot->y += (yhmin << 1); |
| |
| return ; |
| } |
| #endif |
| |
| /** This function generates sub-pel prediction around the full-pel candidate. |
| Each sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */ |
| /** The sub-pel position is labeled in spiral manner from the center. */ |
| |
| void GenerateSubPelPred(uint8* subpel_pred, uint8 *ncand, int lx) |
| { |
| /* let's do straightforward way first */ |
| uint8 *ref; |
| uint8 *dst, *dst2, *dst3, *dst4, *dst5, *src; |
| uint8 tmp8; |
| int32 tmp32; |
| int16 tmp_horz[18*22], *dst_16, *src_16; |
| int i, j; |
| |
| /* first copy full-pel to the first array */ |
| ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */ |
| dst = subpel_pred; |
| |
| dst -= 4; /* offset */ |
| for (j = 0; j < 22; j++) /* 24x22 */ |
| { |
| i = 6; |
| while (i > 0) |
| { |
| tmp32 = *ref++; |
| tmp8 = *ref++; |
| tmp32 |= (tmp8 << 8); |
| tmp8 = *ref++; |
| tmp32 |= (tmp8 << 16); |
| tmp8 = *ref++; |
| tmp32 |= (tmp8 << 24); |
| *((uint32*)(dst += 4)) = tmp32; |
| i--; |
| } |
| ref += (lx - 24); |
| } |
| |
| /* from the first array, we do horizontal interp */ |
| ref = subpel_pred + 2; |
| dst_16 = tmp_horz; /* 17 x 22 */ |
| |
| for (j = -2; j < 0; j++) |
| { |
| for (i = 0; i < 16; i++) |
| { |
| *dst_16++ = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| ref++; |
| } |
| /* do the 17th column here */ |
| *dst_16 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| dst_16 += 2; /* stride for tmp_horz is 18 */ |
| ref += 8; /* stride for ref is 24 */ |
| } |
| |
| dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/ |
| |
| for (i = 0; i < 16; i++) |
| { |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16++ = tmp32; |
| ref++; |
| tmp32 = (tmp32 + 16) >> 5; |
| *dst++ = AVC_CLIP(tmp32); |
| } |
| /* do the 17th column here */ |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16 = tmp32; |
| tmp32 = (tmp32 + 16) >> 5; |
| *dst = AVC_CLIP(tmp32); |
| |
| dst += 8; /* stride for dst is 24 */ |
| dst_16 += 2; /* stride for tmp_horz is 18 */ |
| ref += 8; /* stride for ref is 24 */ |
| |
| dst3 = subpel_pred + V0Q_H1Q * SUBPEL_PRED_BLK_SIZE; /* 3rd array 17x16 */ |
| dst4 = subpel_pred + V0Q_H3Q * SUBPEL_PRED_BLK_SIZE; /* 7th array 17x16 */ |
| |
| for (j = 0; j < 16; j++) |
| { |
| for (i = 0; i < 16; i++) |
| { |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16++ = tmp32; |
| ref++; |
| tmp32 = (tmp32 + 16) >> 5; |
| tmp32 = AVC_CLIP(tmp32); |
| *dst++ = tmp32; |
| *dst3++ = (tmp32 + ref[-1] + 1) >> 1; |
| *dst4++ = (tmp32 + ref[0] + 1) >> 1; |
| } |
| /* do the 17th column here */ |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16 = tmp32; |
| tmp32 = (tmp32 + 16) >> 5; |
| tmp32 = AVC_CLIP(tmp32); |
| *dst = tmp32; |
| *dst3 = (tmp32 + ref[0] + 1) >> 1; |
| *dst4 = (tmp32 + ref[1] + 1) >> 1; |
| |
| dst += 8; /* stride for dst is 24 */ |
| dst3 += 8; |
| dst4 += 8; |
| dst_16 += 2; /* stride for tmp_horz is 18 */ |
| ref += 8; /* stride for ref is 24 */ |
| } |
| |
| for (i = 0; i < 16; i++) |
| { |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16++ = tmp32; |
| ref++; |
| tmp32 = (tmp32 + 16) >> 5; |
| *dst++ = AVC_CLIP(tmp32); |
| } |
| /* do the 17th column here */ |
| tmp32 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| *dst_16 = tmp32; |
| tmp32 = (tmp32 + 16) >> 5; |
| *dst = AVC_CLIP(tmp32); |
| |
| dst += 8; /* stride for dst is 24 */ |
| dst_16 += 2; /* stride for tmp_horz is 18 */ |
| ref += 8; /* stride for ref is 24 */ |
| |
| for (j = 17; j < 19; j++) |
| { |
| for (i = 0; i < 16; i++) |
| { |
| *dst_16++ = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| ref++; |
| } |
| /* do the 17th column here */ |
| *dst_16 = ref[-2] + ref[3] - 5 * (ref[-1] + ref[2]) + 20 * (ref[0] + ref[1]); |
| dst_16 += 2; /* stride for tmp_horz is 18 */ |
| ref += 8; /* stride for ref is 24 */ |
| } |
| |
| /* Do vertical filtering and vertical cross */ |
| src_16 = tmp_horz; /* 17 x 22 */ |
| src = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 14th array 17x18 */ |
| dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/ |
| dst3 = subpel_pred + V1Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 15th array 17x17 */ |
| dst4 = subpel_pred + V3Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 13th array 17x17 */ |
| |
| dst -= 24; // offset |
| dst3 -= 24; |
| dst4 -= 24; |
| for (i = 0; i < 17; i++) |
| { |
| for (j = 0; j < 17; j++) |
| { |
| tmp32 = src_16[0] + src_16[18*5] - 5 * (src_16[18] + src_16[18*4]) + 20 * (src_16[18*2] + src_16[18*3]); |
| tmp32 = (tmp32 + 512) >> 10; |
| tmp32 = AVC_CLIP(tmp32); |
| *(dst += 24) = tmp32; |
| *(dst3 += 24) = (tmp32 + *src + 1) >> 1; |
| *(dst4 += 24) = (tmp32 + *(src += 24) + 1) >> 1; |
| src_16 += 18; |
| } |
| src_16 -= ((18 * 17) - 1); |
| dst -= ((24 * 17) - 1); |
| dst3 -= ((24 * 17) - 1); |
| dst4 -= ((24 * 17) - 1); |
| src -= ((24 * 17) - 1); |
| } |
| |
| /* do vertical interpolation */ |
| ref = subpel_pred + 2; |
| dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */ |
| dst -= 24; // offset |
| src = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17 */ |
| src -= 24; // offset |
| dst4 = subpel_pred + V2Q_H1Q * SUBPEL_PRED_BLK_SIZE; /* 11th array 17x17 */ |
| dst4 -= 24; // offset |
| |
| for (j = 0; j < 17; j++) |
| { |
| tmp32 = ref[0] + ref[24*5] - 5 * (ref[24] + ref[24*4]) + 20 * (ref[24*2] + ref[24*3]); |
| ref += 24; |
| tmp32 = (tmp32 + 16) >> 5; |
| tmp32 = AVC_CLIP(tmp32); |
| *(dst += 24) = tmp32; |
| *(dst4 += 24) = (tmp32 + *(src += 24) + 1) >> 1; |
| } |
| dst -= ((24 * 17) - 1); |
| dst4 -= ((24 * 17) - 1); |
| ref -= ((24 * 17) - 1); |
| src -= ((24 * 17) - 1); // 12th |
| |
| dst2 = subpel_pred + V1Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 5th array 16x17 */ |
| dst2 -= 24; //offset |
| dst3 = subpel_pred + V3Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 1st array 16x17 */ |
| dst3 -= 24; //offset |
| dst5 = subpel_pred + V2Q_H3Q * SUBPEL_PRED_BLK_SIZE; /* 9th array 17x17 */ |
| dst5 -= 24; //offset |
| |
| for (i = 0; i < 16; i++) |
| { |
| for (j = 0; j < 17; j++) |
| { |
| tmp32 = ref[0] + ref[24*5] - 5 * (ref[24] + ref[24*4]) + 20 * (ref[24*2] + ref[24*3]); |
| ref += 24; |
| tmp32 = (tmp32 + 16) >> 5; |
| tmp32 = AVC_CLIP(tmp32); |
| *(dst += 24) = tmp32; // 10th |
| *(dst2 += 24) = (tmp32 + ref[24] + 1) >> 1; // 5th |
| *(dst3 += 24) = (tmp32 + ref[24*2] + 1) >> 1; // 1st |
| *(dst4 += 24) = (tmp32 + *(src += 24) + 1) >> 1; // 11th |
| *(dst5 += 24) = (tmp32 + src[-1] + 1) >> 1; // 9th |
| } |
| |
| dst -= ((24 * 17) - 1); |
| dst2 -= ((24 * 17) - 1); |
| dst3 -= ((24 * 17) - 1); |
| dst4 -= ((24 * 17) - 1); |
| dst5 -= ((24 * 17) - 1); |
| ref -= ((24 * 17) - 1); |
| src -= ((24 * 17) - 1); |
| } |
| |
| src--; |
| for (j = 0; j < 17; j++) |
| { |
| tmp32 = ref[0] + ref[24*5] - 5 * (ref[24] + ref[24*4]) + 20 * (ref[24*2] + ref[24*3]); |
| ref += 24; |
| tmp32 = (tmp32 + 16) >> 5; |
| tmp32 = AVC_CLIP(tmp32); |
| *(dst += 24) = tmp32; |
| *(dst5 += 24) = (tmp32 + *(src += 24) + 1) >> 1; |
| } |
| |
| /* now diagonal direction */ |
| ref = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; // 14th |
| src = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; // 10th |
| dst = subpel_pred + V1Q_H1Q * SUBPEL_PRED_BLK_SIZE; // 4th |
| dst2 = subpel_pred + V1Q_H3Q * SUBPEL_PRED_BLK_SIZE; // 6th |
| dst3 = subpel_pred + V3Q_H1Q * SUBPEL_PRED_BLK_SIZE; // 2th |
| dst4 = subpel_pred + V3Q_H3Q * SUBPEL_PRED_BLK_SIZE; // 8th |
| |
| for (j = 0; j < 17; j++) |
| { |
| for (i = 0; i < 17; i++) |
| { |
| *dst3++ = (ref[24] + *src + 1) >> 1; |
| *dst2++ = (*ref + src[1] + 1) >> 1; |
| *dst4++ = (ref[24] + src[1] + 1) >> 1; |
| *dst++ = (*ref++ + *src++ + 1) >> 1; |
| } |
| dst += 7; |
| dst2 += 7; |
| dst3 += 7; |
| dst4 += 7; |
| ref += 7; |
| src += 7; |
| } |
| |
| return ; |
| } |
| |
| /* assuming cand always has a pitch of 24 */ |
| int SATD_MB(uint8 *cand, uint8 *cur, int dmin) |
| { |
| int cost; |
| int j, k; |
| int16 res[256], *pres; // residue |
| int m0, m1, m2, m3; |
| |
| // calculate SATD |
| pres = res; |
| // horizontal transform |
| for (j = 0; j < 16; j++) |
| { |
| k = 4; |
| while (k > 0) |
| { |
| m0 = cur[0] - cand[0]; |
| m3 = cur[3] - cand[3]; |
| m0 += m3; |
| m3 = m0 - (m3 << 1); |
| m1 = cur[1] - cand[1]; |
| m2 = cur[2] - cand[2]; |
| m1 += m2; |
| m2 = m1 - (m2 << 1); |
| pres[0] = m0 + m1; |
| pres[2] = m0 - m1; |
| pres[1] = m2 + m3; |
| pres[3] = m3 - m2; |
| |
| cur += 4; |
| pres += 4; |
| cand += 4; |
| k--; |
| } |
| cand += 8; |
| } |
| /* vertical transform */ |
| cost = 0; |
| for (j = 0; j < 4; j++) |
| { |
| pres = res + (j << 6); |
| k = 16; |
| while (k > 0) |
| { |
| m0 = pres[0]; |
| m3 = pres[3<<4]; |
| m0 += m3; |
| m3 = m0 - (m3 << 1); |
| m1 = pres[1<<4]; |
| m2 = pres[2<<4]; |
| m1 += m2; |
| m2 = m1 - (m2 << 1); |
| |
| pres[0] = m0 = m0 + m1; |
| cost += ((m0 > 0) ? m0 : -m0); |
| m1 = m0 - (m1 << 1); |
| cost += ((m1 > 0) ? m1 : -m1); |
| m3 = m2 + m3; |
| cost += ((m3 > 0) ? m3 : -m3); |
| m2 = m3 - (m2 << 1); |
| cost += ((m2 > 0) ? m2 : -m2); |
| |
| pres++; |
| k--; |
| } |
| if ((cost >> 1) > dmin) /* early drop out */ |
| { |
| return (cost >> 1); |
| } |
| } |
| |
| return (cost >> 1); |
| } |
| |
| |
| |
| |
| |