| /* ------------------------------------------------------------------ |
| * Copyright (C) 2008 PacketVideo |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
| * express or implied. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * ------------------------------------------------------------------- |
| */ |
| /* |
| ------------------------------------------------------------------------------ |
| INPUT AND OUTPUT DEFINITIONS |
| |
| Inputs: |
| [input_variable_name] = [description of the input to module, its type |
| definition, and length (when applicable)] |
| |
| Local Stores/Buffers/Pointers Needed: |
| [local_store_name] = [description of the local store, its type |
| definition, and length (when applicable)] |
| [local_buffer_name] = [description of the local buffer, its type |
| definition, and length (when applicable)] |
| [local_ptr_name] = [description of the local pointer, its type |
| definition, and length (when applicable)] |
| |
| Global Stores/Buffers/Pointers Needed: |
| [global_store_name] = [description of the global store, its type |
| definition, and length (when applicable)] |
| [global_buffer_name] = [description of the global buffer, its type |
| definition, and length (when applicable)] |
| [global_ptr_name] = [description of the global pointer, its type |
| definition, and length (when applicable)] |
| |
| Outputs: |
| [return_variable_name] = [description of data/pointer returned |
| by module, its type definition, and length |
| (when applicable)] |
| |
| Pointers and Buffers Modified: |
| [variable_bfr_ptr] points to the [describe where the |
| variable_bfr_ptr points to, its type definition, and length |
| (when applicable)] |
| [variable_bfr] contents are [describe the new contents of |
| variable_bfr] |
| |
| Local Stores Modified: |
| [local_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| Global Stores Modified: |
| [global_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| ------------------------------------------------------------------------------ |
| FUNCTION DESCRIPTION |
| |
| ------------------------------------------------------------------------------ |
| REQUIREMENTS |
| |
| ------------------------------------------------------------------------------ |
| REFERENCES |
| |
| ------------------------------------------------------------------------------ |
| PSEUDO-CODE |
| |
| ------------------------------------------------------------------------------ |
| RESOURCES USED |
| When the code is written for a specific target processor the |
| the resources used should be documented below. |
| |
| STACK USAGE: [stack count for this module] + [variable to represent |
| stack usage for each subroutine called] |
| |
| where: [stack usage variable] = stack usage for [subroutine |
| name] (see [filename].ext) |
| |
| DATA MEMORY USED: x words |
| |
| PROGRAM MEMORY USED: x words |
| |
| CLOCK CYCLES: [cycle count equation for this module] + [variable |
| used to represent cycle count for each subroutine |
| called] |
| |
| where: [cycle count variable] = cycle count for [subroutine |
| name] (see [filename].ext) |
| |
| ------------------------------------------------------------------------------ |
| */ |
| |
| /*---------------------------------------------------------------------------- |
| ; INCLUDES |
| ----------------------------------------------------------------------------*/ |
| #include "mp4dec_lib.h" |
| #include "idct.h" |
| #include "motion_comp.h" |
| |
| #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA |
| #include "osclconfig_compiler_warnings.h" |
| /*---------------------------------------------------------------------------- |
| ; MACROS |
| ; Define module specific macros here |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; DEFINES |
| ; Include all pre-processor statements here. Include conditional |
| ; compile variables also. |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; LOCAL FUNCTION DEFINITIONS |
| ; Function Prototype declaration |
| ----------------------------------------------------------------------------*/ |
| /* private prototypes */ |
| static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width); |
| static void idctrow_intra(int16 *blk, PIXEL *, int width); |
| static void idctcol(int16 *blk); |
| |
| #ifdef FAST_IDCT |
| // mapping from nz_coefs to functions to be used |
| |
| |
| // ARM4 does not allow global data when they are not constant hence |
| // an array of function pointers cannot be considered as array of constants |
| // (actual addresses are only known when the dll is loaded). |
| // So instead of arrays of function pointers, we'll store here |
| // arrays of rows or columns and then call the idct function |
| // corresponding to such the row/column number: |
| |
| |
| static void (*const idctcolVCA[10][4])(int16*) = |
| { |
| {&idctcol1, &idctcol0, &idctcol0, &idctcol0}, |
| {&idctcol1, &idctcol1, &idctcol0, &idctcol0}, |
| {&idctcol2, &idctcol1, &idctcol0, &idctcol0}, |
| {&idctcol3, &idctcol1, &idctcol0, &idctcol0}, |
| {&idctcol3, &idctcol2, &idctcol0, &idctcol0}, |
| {&idctcol3, &idctcol2, &idctcol1, &idctcol0}, |
| {&idctcol3, &idctcol2, &idctcol1, &idctcol1}, |
| {&idctcol3, &idctcol2, &idctcol2, &idctcol1}, |
| {&idctcol3, &idctcol3, &idctcol2, &idctcol1}, |
| {&idctcol4, &idctcol3, &idctcol2, &idctcol1} |
| }; |
| |
| |
| static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) = |
| { |
| &idctrow1, |
| &idctrow2, |
| &idctrow2, |
| &idctrow2, |
| &idctrow2, |
| &idctrow3, |
| &idctrow4, |
| &idctrow4, |
| &idctrow4, |
| &idctrow4 |
| }; |
| |
| |
| static void (*const idctcolVCA2[16])(int16*) = |
| { |
| &idctcol0, &idctcol4, &idctcol3, &idctcol4, |
| &idctcol2, &idctcol4, &idctcol3, &idctcol4, |
| &idctcol1, &idctcol4, &idctcol3, &idctcol4, |
| &idctcol2, &idctcol4, &idctcol3, &idctcol4 |
| }; |
| |
| static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) = |
| { |
| &idctrow1, &idctrow4, &idctrow3, &idctrow4, |
| &idctrow2, &idctrow4, &idctrow3, &idctrow4 |
| }; |
| |
| static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) = |
| { |
| &idctrow1_intra, |
| &idctrow2_intra, |
| &idctrow2_intra, |
| &idctrow2_intra, |
| &idctrow2_intra, |
| &idctrow3_intra, |
| &idctrow4_intra, |
| &idctrow4_intra, |
| &idctrow4_intra, |
| &idctrow4_intra |
| }; |
| |
| static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) = |
| { |
| &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra, |
| &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra |
| }; |
| #endif |
| |
| /*---------------------------------------------------------------------------- |
| ; LOCAL STORE/BUFFER/POINTER DEFINITIONS |
| ; Variable declaration - defined here and used outside this module |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; EXTERNAL FUNCTION REFERENCES |
| ; Declare functions defined elsewhere and referenced in this module |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES |
| ; Declare variables used in this module but defined elsewhere |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; FUNCTION CODE |
| ----------------------------------------------------------------------------*/ |
| void MBlockIDCT(VideoDecData *video) |
| { |
| Vop *currVop = video->currVop; |
| MacroBlock *mblock = video->mblock; |
| PIXEL *c_comp; |
| PIXEL *cu_comp; |
| PIXEL *cv_comp; |
| int x_pos = video->mbnum_col; |
| int y_pos = video->mbnum_row; |
| int width, width_uv; |
| int32 offset; |
| width = video->width; |
| width_uv = width >> 1; |
| offset = (int32)(y_pos << 4) * width + (x_pos << 4); |
| |
| c_comp = currVop->yChan + offset; |
| cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2); |
| cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2); |
| |
| BlockIDCT_intra(mblock, c_comp, 0, width); |
| BlockIDCT_intra(mblock, c_comp + 8, 1, width); |
| BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width); |
| BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width); |
| BlockIDCT_intra(mblock, cu_comp, 4, width_uv); |
| BlockIDCT_intra(mblock, cv_comp, 5, width_uv); |
| } |
| |
| |
| void BlockIDCT_intra( |
| MacroBlock *mblock, PIXEL *c_comp, int comp, int width) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| int16 *coeff_in = mblock->block[comp]; |
| #ifdef INTEGER_IDCT |
| #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/ |
| int i, bmapr; |
| int nz_coefs = mblock->no_coeff[comp]; |
| uint8 *bitmapcol = mblock->bitmapcol[comp]; |
| uint8 bitmaprow = mblock->bitmaprow[comp]; |
| |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| if (nz_coefs <= 10) |
| { |
| bmapr = (nz_coefs - 1); |
| |
| (*(idctcolVCA[bmapr]))(coeff_in); |
| (*(idctcolVCA[bmapr][1]))(coeff_in + 1); |
| (*(idctcolVCA[bmapr][2]))(coeff_in + 2); |
| (*(idctcolVCA[bmapr][3]))(coeff_in + 3); |
| |
| (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width); |
| } |
| else |
| { |
| i = 8; |
| while (i--) |
| { |
| bmapr = (int)bitmapcol[i]; |
| if (bmapr) |
| { |
| if ((bmapr&0xf) == 0) /* 07/18/01 */ |
| { |
| (*(idctcolVCA2[bmapr>>4]))(coeff_in + i); |
| } |
| else |
| { |
| idctcol(coeff_in + i); |
| } |
| } |
| } |
| if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0) |
| { |
| bitmaprow >>= 4; |
| (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width); |
| } |
| else |
| { |
| idctrow_intra(coeff_in, c_comp, width); |
| } |
| } |
| #else |
| void idct_intra(int *block, uint8 *comp, int width); |
| idct_intra(coeff_in, c_comp, width); |
| #endif |
| #else |
| void idctref_intra(int *block, uint8 *comp, int width); |
| idctref_intra(coeff_in, c_comp, width); |
| #endif |
| |
| |
| /*---------------------------------------------------------------------------- |
| ; Return nothing or data or data pointer |
| ----------------------------------------------------------------------------*/ |
| return; |
| } |
| |
| /* 08/04/05, no residue, just copy from pred to output */ |
| void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width) |
| { |
| /* copy 4 bytes at a time */ |
| width -= 4; |
| *((uint32*)dst) = *((uint32*)pred); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| *((uint32*)(dst += width)) = *((uint32*)(pred += 12)); |
| *((uint32*)(dst += 4)) = *((uint32*)(pred += 4)); |
| |
| return ; |
| } |
| #if 0 |
| /* 08/04/05, main function for IDCT+MC */ |
| void MBlockIDCTAdd( |
| VideoDecData *video, |
| int nz_coefs[] |
| ) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| Vop *currVop = video->currVop; |
| MacroBlock *mblock = video->mblock; |
| PIXEL *c_comp; |
| PIXEL *cu_comp; |
| PIXEL *cv_comp; |
| uint8 *pred_block; |
| int x_pos = video->mbnum_col; |
| int y_pos = video->mbnum_row; |
| int width; |
| int32 offset; |
| width = video->width; |
| offset = (int32)(y_pos << 4) * width + (x_pos << 4); |
| |
| c_comp = currVop->yChan + offset; |
| cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2); |
| cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2); |
| |
| pred_block = mblock->pred_block; |
| |
| if (nz_coefs[0]) |
| { |
| BlockIDCT(c_comp, pred_block, mblock->block[0], width, nz_coefs[0], |
| mblock->bitmapcol[0], mblock->bitmaprow[0]); |
| } |
| |
| if (nz_coefs[1]) |
| { |
| BlockIDCT(c_comp + 8, pred_block + 8, mblock->block[1], width, nz_coefs[1], |
| mblock->bitmapcol[1], mblock->bitmaprow[1]); |
| } |
| |
| if (nz_coefs[2]) |
| { |
| BlockIDCT(c_comp + (width << 3), pred_block + 128, mblock->block[2], width, nz_coefs[2], |
| mblock->bitmapcol[2], mblock->bitmaprow[2]); |
| } |
| |
| if (nz_coefs[3]) |
| { |
| BlockIDCT(c_comp + (width << 3) + 8, pred_block + 136, mblock->block[3], width, nz_coefs[3], |
| mblock->bitmapcol[3], mblock->bitmaprow[3]); |
| } |
| |
| if (nz_coefs[4]) |
| { |
| BlockIDCT(cu_comp, pred_block + 256, mblock->block[4], width >> 1, nz_coefs[4], |
| mblock->bitmapcol[4], mblock->bitmaprow[4]); |
| } |
| |
| if (nz_coefs[5]) |
| { |
| BlockIDCT(cv_comp, pred_block + 264, mblock->block[5], width >> 1, nz_coefs[5], |
| mblock->bitmapcol[5], mblock->bitmaprow[5]); |
| } |
| |
| return ; |
| } |
| #endif |
| |
| /* 08/04/05 compute IDCT and add prediction at the end */ |
| void BlockIDCT( |
| uint8 *dst, /* destination */ |
| uint8 *pred, /* prediction block, pitch 16 */ |
| int16 *coeff_in, /* DCT data, size 64 */ |
| int width, /* width of dst */ |
| int nz_coefs, |
| uint8 *bitmapcol, |
| uint8 bitmaprow |
| ) |
| { |
| #ifdef INTEGER_IDCT |
| #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/ |
| int i, bmapr; |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| if (nz_coefs <= 10) |
| { |
| bmapr = (nz_coefs - 1); |
| (*(idctcolVCA[bmapr]))(coeff_in); |
| (*(idctcolVCA[bmapr][1]))(coeff_in + 1); |
| (*(idctcolVCA[bmapr][2]))(coeff_in + 2); |
| (*(idctcolVCA[bmapr][3]))(coeff_in + 3); |
| |
| (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width); |
| return ; |
| } |
| else |
| { |
| i = 8; |
| |
| while (i--) |
| { |
| bmapr = (int)bitmapcol[i]; |
| if (bmapr) |
| { |
| if ((bmapr&0xf) == 0) /* 07/18/01 */ |
| { |
| (*(idctcolVCA2[bmapr>>4]))(coeff_in + i); |
| } |
| else |
| { |
| idctcol(coeff_in + i); |
| } |
| } |
| } |
| if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0) |
| { |
| (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width); |
| } |
| else |
| { |
| idctrow(coeff_in, pred, dst, width); |
| } |
| return ; |
| } |
| #else // FAST_IDCT |
| void idct(int *block, uint8 *pred, uint8 *dst, int width); |
| idct(coeff_in, pred, dst, width); |
| return; |
| #endif // FAST_IDCT |
| #else // INTEGER_IDCT |
| void idctref(int *block, uint8 *pred, uint8 *dst, int width); |
| idctref(coeff_in, pred, dst, width); |
| return; |
| #endif // INTEGER_IDCT |
| |
| } |
| /*---------------------------------------------------------------------------- |
| ; End Function: block_idct |
| ----------------------------------------------------------------------------*/ |
| |
| |
| /****************************************************************************/ |
| |
| /* |
| ------------------------------------------------------------------------------ |
| FUNCTION NAME: idctrow |
| ------------------------------------------------------------------------------ |
| INPUT AND OUTPUT DEFINITIONS FOR idctrow |
| |
| Inputs: |
| [input_variable_name] = [description of the input to module, its type |
| definition, and length (when applicable)] |
| |
| Local Stores/Buffers/Pointers Needed: |
| [local_store_name] = [description of the local store, its type |
| definition, and length (when applicable)] |
| [local_buffer_name] = [description of the local buffer, its type |
| definition, and length (when applicable)] |
| [local_ptr_name] = [description of the local pointer, its type |
| definition, and length (when applicable)] |
| |
| Global Stores/Buffers/Pointers Needed: |
| [global_store_name] = [description of the global store, its type |
| definition, and length (when applicable)] |
| [global_buffer_name] = [description of the global buffer, its type |
| definition, and length (when applicable)] |
| [global_ptr_name] = [description of the global pointer, its type |
| definition, and length (when applicable)] |
| |
| Outputs: |
| [return_variable_name] = [description of data/pointer returned |
| by module, its type definition, and length |
| (when applicable)] |
| |
| Pointers and Buffers Modified: |
| [variable_bfr_ptr] points to the [describe where the |
| variable_bfr_ptr points to, its type definition, and length |
| (when applicable)] |
| [variable_bfr] contents are [describe the new contents of |
| variable_bfr] |
| |
| Local Stores Modified: |
| [local_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| Global Stores Modified: |
| [global_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| ------------------------------------------------------------------------------ |
| FUNCTION DESCRIPTION FOR idctrow |
| |
| ------------------------------------------------------------------------------ |
| REQUIREMENTS FOR idctrow |
| |
| ------------------------------------------------------------------------------ |
| REFERENCES FOR idctrow |
| |
| ------------------------------------------------------------------------------ |
| PSEUDO-CODE FOR idctrow |
| |
| ------------------------------------------------------------------------------ |
| RESOURCES USED FOR idctrow |
| When the code is written for a specific target processor the |
| the resources used should be documented below. |
| |
| STACK USAGE: [stack count for this module] + [variable to represent |
| stack usage for each subroutine called] |
| |
| where: [stack usage variable] = stack usage for [subroutine |
| name] (see [filename].ext) |
| |
| DATA MEMORY USED: x words |
| |
| PROGRAM MEMORY USED: x words |
| |
| CLOCK CYCLES: [cycle count equation for this module] + [variable |
| used to represent cycle count for each subroutine |
| called] |
| |
| where: [cycle count variable] = cycle count for [subroutine |
| name] (see [filename].ext) |
| |
| ------------------------------------------------------------------------------ |
| */ |
| |
| /*---------------------------------------------------------------------------- |
| ; Function Code FOR idctrow |
| ----------------------------------------------------------------------------*/ |
| #if 1 |
| void idctrow( |
| int16 *blk, uint8 *pred, uint8 *dst, int width |
| ) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; |
| int i = 8; |
| uint32 pred_word, dst_word; |
| int res, res2; |
| |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| /* row (horizontal) IDCT |
| * |
| * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * |
| * ( k + - ) * l ) l=0 8 2 |
| * |
| * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ |
| |
| /* preset the offset, such that we can take advantage pre-offset addressing mode */ |
| width -= 4; |
| dst -= width; |
| pred -= 12; |
| blk -= 8; |
| |
| while (i--) |
| { |
| x1 = (int32)blk[12] << 8; |
| blk[12] = 0; |
| x2 = blk[14]; |
| blk[14] = 0; |
| x3 = blk[10]; |
| blk[10] = 0; |
| x4 = blk[9]; |
| blk[9] = 0; |
| x5 = blk[15]; |
| blk[15] = 0; |
| x6 = blk[13]; |
| blk[13] = 0; |
| x7 = blk[11]; |
| blk[11] = 0; |
| x0 = ((*(blk += 8)) << 8) + 8192; |
| blk[0] = 0; /* for proper rounding in the fourth stage */ |
| |
| /* first stage */ |
| x8 = W7 * (x4 + x5) + 4; |
| x4 = (x8 + (W1 - W7) * x4) >> 3; |
| x5 = (x8 - (W1 + W7) * x5) >> 3; |
| x8 = W3 * (x6 + x7) + 4; |
| x6 = (x8 - (W3 - W5) * x6) >> 3; |
| x7 = (x8 - (W3 + W5) * x7) >> 3; |
| |
| /* second stage */ |
| x8 = x0 + x1; |
| x0 -= x1; |
| x1 = W6 * (x3 + x2) + 4; |
| x2 = (x1 - (W2 + W6) * x2) >> 3; |
| x3 = (x1 + (W2 - W6) * x3) >> 3; |
| x1 = x4 + x6; |
| x4 -= x6; |
| x6 = x5 + x7; |
| x5 -= x7; |
| |
| /* third stage */ |
| x7 = x8 + x3; |
| x8 -= x3; |
| x3 = x0 + x2; |
| x0 -= x2; |
| x2 = (181 * (x4 + x5) + 128) >> 8; |
| x4 = (181 * (x4 - x5) + 128) >> 8; |
| |
| /* fourth stage */ |
| pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */ |
| |
| res = (x7 + x1) >> 14; |
| ADD_AND_CLIP1(res); |
| res2 = (x3 + x2) >> 14; |
| ADD_AND_CLIP2(res2); |
| dst_word = (res2 << 8) | res; |
| res = (x0 + x4) >> 14; |
| ADD_AND_CLIP3(res); |
| dst_word |= (res << 16); |
| res = (x8 + x6) >> 14; |
| ADD_AND_CLIP4(res); |
| dst_word |= (res << 24); |
| *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */ |
| |
| pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */ |
| |
| res = (x8 - x6) >> 14; |
| ADD_AND_CLIP1(res); |
| res2 = (x0 - x4) >> 14; |
| ADD_AND_CLIP2(res2); |
| dst_word = (res2 << 8) | res; |
| res = (x3 - x2) >> 14; |
| ADD_AND_CLIP3(res); |
| dst_word |= (res << 16); |
| res = (x7 - x1) >> 14; |
| ADD_AND_CLIP4(res); |
| dst_word |= (res << 24); |
| *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */ |
| } |
| /*---------------------------------------------------------------------------- |
| ; Return nothing or data or data pointer |
| ----------------------------------------------------------------------------*/ |
| return; |
| } |
| #endif |
| |
| void idctrow_intra( |
| int16 *blk, PIXEL *comp, int width |
| ) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp; |
| int i = 8; |
| int offset = width; |
| int32 word; |
| |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| /* row (horizontal) IDCT |
| * |
| * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * |
| * ( k + - ) * l ) l=0 8 2 |
| * |
| * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ |
| while (i--) |
| { |
| x1 = (int32)blk[4] << 8; |
| blk[4] = 0; |
| x2 = blk[6]; |
| blk[6] = 0; |
| x3 = blk[2]; |
| blk[2] = 0; |
| x4 = blk[1]; |
| blk[1] = 0; |
| x5 = blk[7]; |
| blk[7] = 0; |
| x6 = blk[5]; |
| blk[5] = 0; |
| x7 = blk[3]; |
| blk[3] = 0; |
| #ifndef FAST_IDCT |
| /* shortcut */ /* covered by idctrow1 01/9/2001 */ |
| if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) |
| { |
| blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6; |
| return; |
| } |
| #endif |
| x0 = ((int32)blk[0] << 8) + 8192; |
| blk[0] = 0; /* for proper rounding in the fourth stage */ |
| |
| /* first stage */ |
| x8 = W7 * (x4 + x5) + 4; |
| x4 = (x8 + (W1 - W7) * x4) >> 3; |
| x5 = (x8 - (W1 + W7) * x5) >> 3; |
| x8 = W3 * (x6 + x7) + 4; |
| x6 = (x8 - (W3 - W5) * x6) >> 3; |
| x7 = (x8 - (W3 + W5) * x7) >> 3; |
| |
| /* second stage */ |
| x8 = x0 + x1; |
| x0 -= x1; |
| x1 = W6 * (x3 + x2) + 4; |
| x2 = (x1 - (W2 + W6) * x2) >> 3; |
| x3 = (x1 + (W2 - W6) * x3) >> 3; |
| x1 = x4 + x6; |
| x4 -= x6; |
| x6 = x5 + x7; |
| x5 -= x7; |
| |
| /* third stage */ |
| x7 = x8 + x3; |
| x8 -= x3; |
| x3 = x0 + x2; |
| x0 -= x2; |
| x2 = (181 * (x4 + x5) + 128) >> 8; |
| x4 = (181 * (x4 - x5) + 128) >> 8; |
| |
| /* fourth stage */ |
| word = ((x7 + x1) >> 14); |
| CLIP_RESULT(word) |
| |
| temp = ((x3 + x2) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 8); |
| |
| temp = ((x0 + x4) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 16); |
| |
| temp = ((x8 + x6) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 24); |
| *((int32*)(comp)) = word; |
| |
| word = ((x8 - x6) >> 14); |
| CLIP_RESULT(word) |
| |
| temp = ((x0 - x4) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 8); |
| |
| temp = ((x3 - x2) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 16); |
| |
| temp = ((x7 - x1) >> 14); |
| CLIP_RESULT(temp) |
| word = word | (temp << 24); |
| *((int32*)(comp + 4)) = word; |
| comp += offset; |
| |
| blk += B_SIZE; |
| } |
| /*---------------------------------------------------------------------------- |
| ; Return nothing or data or data pointer |
| ----------------------------------------------------------------------------*/ |
| return; |
| } |
| |
| #if 0 |
| void idctrow( |
| int16 *blk, uint8 *pred, uint8 *dst, int width |
| ) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9; |
| int i = 8; |
| |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| /* row (horizontal) IDCT |
| * |
| * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * |
| * ( k + - ) * l ) l=0 8 2 |
| * |
| * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ |
| |
| /* preset the offset, such that we can take advantage pre-offset addressing mode */ |
| width -= 4; |
| dst -= width; |
| pred -= 12; |
| blk -= 8; |
| |
| while (i--) |
| { |
| x2 = *((uint32*)(blk + 14)); /* load blk[6]=x2, blk[7]=x5 to [x5|x2] */ |
| x1 = *((uint32*)(blk + 12)); /* load blk[4]=x1, blk[5]=x6 to [x6|x1] */ |
| x3 = *((uint32*)(blk + 10)); /* load blk[2]=x3, blk[3]=x7 to [x7|x3] */ |
| x0 = *((uint32*)(blk += 8)); /* load blk[0]=x0, blk[1]=x4 to [x4|x0] */ |
| |
| x5 = x2 >> 16; |
| x6 = x1 >> 16; |
| x7 = x3 >> 16; |
| x4 = x0 >> 16; |
| |
| x8 = W7 * (x4 + x5) + 4; |
| x9 = W3 * (x6 + x7) + 4; |
| x4 = (x8 + W1mW7 * x4); /* to be >> 3 */ |
| x5 = (x8 - W1pW7 * x5); /* to be >> 3 */ |
| x6 = (x9 + W5mW3 * x6); /* to be >> 3 */ |
| x7 = (x9 + mW3mW5 * x7); /* to be >> 3 */ |
| |
| x4 >>= 3; |
| x5 >>= 3; |
| x6 = x4 + (x6 >> 3); /* x1 = x4 + x6 */ |
| x7 = x5 + (x7 >> 3); /* x6 = x5 + x7 */ |
| x4 = (x4 << 1) - x6; /* x4-=x6 */ |
| x5 = (x5 << 1) - x7; /* x5-=x7 */ |
| |
| /*** x2 = (181 * (x4 + x5) + 128) >> 8;***/ |
| /*** x4 = (181 * (x4 - x5) + 128) >> 8;***/ |
| x5 += x4; |
| x4 = (x4 << 1) - x5; |
| x5 = 181 * x5 + 128; /* to be >> 8 */ |
| x4 = 181 * x4 + 128; /* to be >> 8 */ |
| |
| /*** lower 4 ****/ |
| x0 += 32; |
| /* may not needed if we use in-line SMULxy */ |
| x1 <<= 16; /* to be >> 8 */ |
| x2 <<= 16; |
| x3 <<= 16; |
| x0 <<= 16; /* to be >> 8 */ |
| x2 >>= 16; |
| x3 >>= 16; |
| |
| x8 = x3 + x2; |
| x8 = W6 * x8 + 4; /* x8 = W6 * (x3 + x2) + 4; */ |
| x1 >>= 8; |
| x1 += (x0 >> 8); /* x8 = x1+x0*/ |
| x0 = (x0 >> 7) - x1; /* x0-=x1 */ |
| x2 = (x8 + mW2mW6 * x2); /* to be >> 3; */ |
| x3 = (x8 + W2mW6 * x3); /* to be >> 3; */ |
| |
| x3 = x1 + (x3 >> 3); /* x7 = x8 + x3 */ |
| x1 = (x1 << 1) - x3; /* x8-=x3 */ |
| x2 = x0 + (x2 >> 3); /* x3 = x0 + x2 */ |
| x0 = (x0 << 1) - x2; /* x0-=x2 */ |
| |
| /** final stage **/ |
| x6 = x3 + x6; /* (x7+x1)>>14 */ |
| x5 = x2 + (x5 >> 8);/* (x3+x2)>>14 */ |
| x4 = x0 + (x4 >> 8);/* (x0+x4)>>14 */ |
| x7 = x1 + x7; /* (x8+x6)>>14 */ |
| x1 = (x1 << 1) - x7;/* (x8-x6)>>14 */ |
| x0 = (x0 << 1) - x4;/* (x0-x4)>>14 */ |
| x2 = (x2 << 1) - x5;/* (x3-x2)>>14 */ |
| x3 = (x3 << 1) - x6;/* (x7-x1)>>14 */ |
| |
| x8 = *((uint32*)(pred += 12)); /* read 4 bytes from pred */ |
| |
| |
| x9 = x8 & 0xFF; |
| x6 = x9 + (x6 >> 14); /* compensate first byte */ |
| x9 = (x8 >> 8) & 0xFF; |
| x5 = x9 + (x5 >> 14); /* compensate second byte */ |
| x9 = (x8 >> 16) & 0xFF; |
| x4 = x9 + (x4 >> 14); /* compensate third byte */ |
| x9 = (x8 >> 24) & 0xFF; |
| x7 = x9 + (x7 >> 14); /* compensate fourth byte */ |
| |
| if ((x7 >> 8)) x7 = ~(x7 >> 31); /* x7 is clipped to 0 or FFFFFFFF */ |
| x8 = ~(x4 >> 8); /* check range of x4 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x4 = x8 | (x7 << 8); |
| } |
| else |
| { |
| x4 = x4 | (x7 << 8); /* x7 x4 */ |
| } |
| |
| x8 = ~(x5 >> 8); /* check range of x5 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x5 = x8 | (x4 << 8); |
| } |
| else |
| { |
| x5 = x5 | (x4 << 8); /* x7 x4 x5 */ |
| } |
| |
| x8 = ~(x6 >> 8); /* check range of x6 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x6 = x8 | (x5 << 8); |
| } |
| else |
| { |
| x6 = x6 | (x5 << 8); /* x7 x4 x5 x6 */ |
| } |
| *((uint32*)(dst += width)) = x6; /* save 4 bytes to dst */ |
| |
| |
| x8 = *((uint32*)(pred += 4)); /* read 4 bytes from pred */ |
| x9 = x8 & 0xFF; |
| x1 = x9 + (x1 >> 14); /* compensate first byte */ |
| x9 = (x8 >> 8) & 0xFF; |
| x0 = x9 + (x0 >> 14); /* compensate second byte */ |
| x9 = (x8 >> 16) & 0xFF; |
| x2 = x9 + (x2 >> 14); /* compensate third byte */ |
| x9 = (x8 >> 24) & 0xFF; |
| x3 = x9 + (x3 >> 14); /* compensate fourth byte */ |
| |
| if ((x3 >> 8)) x3 = ~(x3 >> 31); /* x3 is clipped to 0 or FFFFFFFF */ |
| x8 = ~(x2 >> 8); /* check range of x4 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x2 = x8 | (x3 << 8); |
| } |
| else |
| { |
| x2 = x2 | (x3 << 8); /* x3 x2 */ |
| } |
| |
| x8 = ~(x0 >> 8); /* check range of x5 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x0 = x8 | (x2 << 8); |
| } |
| else |
| { |
| x0 = x0 | (x2 << 8); /* x3 x2 x0 */ |
| } |
| |
| x8 = ~(x1 >> 8); /* check range of x6 */ |
| if (x8 != -1) |
| { |
| x8 = ((uint32)x8) >> 24; /* 255 or 0 */ |
| x1 = x8 | (x0 << 8); |
| } |
| else |
| { |
| x1 = x1 | (x0 << 8); /* x3 x2 x0 x1 */ |
| } |
| *((uint32*)(dst += 4)) = x1; /* save 4 bytes to dst */ |
| } |
| |
| return ; |
| } |
| #endif |
| /*---------------------------------------------------------------------------- |
| ; End Function: idctrow |
| ----------------------------------------------------------------------------*/ |
| |
| |
| /****************************************************************************/ |
| |
| /* |
| ------------------------------------------------------------------------------ |
| FUNCTION NAME: idctcol |
| ------------------------------------------------------------------------------ |
| INPUT AND OUTPUT DEFINITIONS FOR idctcol |
| |
| Inputs: |
| [input_variable_name] = [description of the input to module, its type |
| definition, and length (when applicable)] |
| |
| Local Stores/Buffers/Pointers Needed: |
| [local_store_name] = [description of the local store, its type |
| definition, and length (when applicable)] |
| [local_buffer_name] = [description of the local buffer, its type |
| definition, and length (when applicable)] |
| [local_ptr_name] = [description of the local pointer, its type |
| definition, and length (when applicable)] |
| |
| Global Stores/Buffers/Pointers Needed: |
| [global_store_name] = [description of the global store, its type |
| definition, and length (when applicable)] |
| [global_buffer_name] = [description of the global buffer, its type |
| definition, and length (when applicable)] |
| [global_ptr_name] = [description of the global pointer, its type |
| definition, and length (when applicable)] |
| |
| Outputs: |
| [return_variable_name] = [description of data/pointer returned |
| by module, its type definition, and length |
| (when applicable)] |
| |
| Pointers and Buffers Modified: |
| [variable_bfr_ptr] points to the [describe where the |
| variable_bfr_ptr points to, its type definition, and length |
| (when applicable)] |
| [variable_bfr] contents are [describe the new contents of |
| variable_bfr] |
| |
| Local Stores Modified: |
| [local_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| Global Stores Modified: |
| [global_store_name] = [describe new contents, its type |
| definition, and length (when applicable)] |
| |
| ------------------------------------------------------------------------------ |
| FUNCTION DESCRIPTION FOR idctcol |
| |
| ------------------------------------------------------------------------------ |
| REQUIREMENTS FOR idctcol |
| |
| ------------------------------------------------------------------------------ |
| REFERENCES FOR idctcol |
| |
| ------------------------------------------------------------------------------ |
| PSEUDO-CODE FOR idctcol |
| |
| ------------------------------------------------------------------------------ |
| RESOURCES USED FOR idctcol |
| When the code is written for a specific target processor the |
| the resources used should be documented below. |
| |
| STACK USAGE: [stack count for this module] + [variable to represent |
| stack usage for each subroutine called] |
| |
| where: [stack usage variable] = stack usage for [subroutine |
| name] (see [filename].ext) |
| |
| DATA MEMORY USED: x words |
| |
| PROGRAM MEMORY USED: x words |
| |
| CLOCK CYCLES: [cycle count equation for this module] + [variable |
| used to represent cycle count for each subroutine |
| called] |
| |
| where: [cycle count variable] = cycle count for [subroutine |
| name] (see [filename].ext) |
| |
| ------------------------------------------------------------------------------ |
| */ |
| |
| /*---------------------------------------------------------------------------- |
| ; Function Code FOR idctcol |
| ----------------------------------------------------------------------------*/ |
| void idctcol( |
| int16 *blk |
| ) |
| { |
| /*---------------------------------------------------------------------------- |
| ; Define all local variables |
| ----------------------------------------------------------------------------*/ |
| int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; |
| |
| /*---------------------------------------------------------------------------- |
| ; Function body here |
| ----------------------------------------------------------------------------*/ |
| /* column (vertical) IDCT |
| * |
| * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] * |
| * cos( -- * ( k + - ) * l ) l=0 8 2 |
| * |
| * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */ |
| x1 = (int32)blk[32] << 11; |
| x2 = blk[48]; |
| x3 = blk[16]; |
| x4 = blk[8]; |
| x5 = blk[56]; |
| x6 = blk[40]; |
| x7 = blk[24]; |
| #ifndef FAST_IDCT |
| /* shortcut */ /* covered by idctcolumn1 01/9/2001 */ |
| if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) |
| { |
| blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56] |
| = blk[0] << 3; |
| return; |
| } |
| #endif |
| |
| x0 = ((int32)blk[0] << 11) + 128; |
| |
| /* first stage */ |
| x8 = W7 * (x4 + x5); |
| x4 = x8 + (W1 - W7) * x4; |
| x5 = x8 - (W1 + W7) * x5; |
| x8 = W3 * (x6 + x7); |
| x6 = x8 - (W3 - W5) * x6; |
| x7 = x8 - (W3 + W5) * x7; |
| |
| /* second stage */ |
| x8 = x0 + x1; |
| x0 -= x1; |
| x1 = W6 * (x3 + x2); |
| x2 = x1 - (W2 + W6) * x2; |
| x3 = x1 + (W2 - W6) * x3; |
| x1 = x4 + x6; |
| x4 -= x6; |
| x6 = x5 + x7; |
| x5 -= x7; |
| |
| /* third stage */ |
| x7 = x8 + x3; |
| x8 -= x3; |
| x3 = x0 + x2; |
| x0 -= x2; |
| x2 = (181 * (x4 + x5) + 128) >> 8; |
| x4 = (181 * (x4 - x5) + 128) >> 8; |
| |
| /* fourth stage */ |
| blk[0] = (x7 + x1) >> 8; |
| blk[8] = (x3 + x2) >> 8; |
| blk[16] = (x0 + x4) >> 8; |
| blk[24] = (x8 + x6) >> 8; |
| blk[32] = (x8 - x6) >> 8; |
| blk[40] = (x0 - x4) >> 8; |
| blk[48] = (x3 - x2) >> 8; |
| blk[56] = (x7 - x1) >> 8; |
| /*---------------------------------------------------------------------------- |
| ; Return nothing or data or data pointer |
| ----------------------------------------------------------------------------*/ |
| return; |
| } |
| /*---------------------------------------------------------------------------- |
| ; End Function: idctcol |
| ----------------------------------------------------------------------------*/ |
| |