blob: 008081007ec1c59dd5e531cbada95cbe7d671f2a [file] [log] [blame]
/*
** Copyright 2003-2010, VisualOn, Inc.
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
/*******************************************************************************
File: transform.c
Content: MDCT Transform functionss
*******************************************************************************/
#include "basic_op.h"
#include "psy_const.h"
#include "transform.h"
#include "aac_rom.h"
#define LS_TRANS ((FRAME_LEN_LONG-FRAME_LEN_SHORT)/2) /* 448 */
#define SQRT1_2 0x5a82799a /* sqrt(1/2) in Q31 */
#define swap2(p0,p1) \
t = p0; t1 = *(&(p0)+1); \
p0 = p1; *(&(p0)+1) = *(&(p1)+1); \
p1 = t; *(&(p1)+1) = t1
/*********************************************************************************
*
* function name: Shuffle
* description: Shuffle points prepared function for fft
*
**********************************************************************************/
static void Shuffle(int *buf, int num, const unsigned char* bitTab)
{
int *part0, *part1;
int i, j;
int t, t1;
part0 = buf;
part1 = buf + num;
while ((i = *bitTab++) != 0) {
j = *bitTab++;
swap2(part0[4*i+0], part0[4*j+0]);
swap2(part0[4*i+2], part1[4*j+0]);
swap2(part1[4*i+0], part0[4*j+2]);
swap2(part1[4*i+2], part1[4*j+2]);
}
do {
swap2(part0[4*i+2], part1[4*i+0]);
} while ((i = *bitTab++) != 0);
}
#if !defined(ARMV5E) && !defined(ARMV7Neon)
/*****************************************************************************
*
* function name: Radix4First
* description: Radix 4 point prepared function for fft
*
**********************************************************************************/
static void Radix4First(int *buf, int num)
{
int r0, r1, r2, r3;
int r4, r5, r6, r7;
for (; num != 0; num--)
{
r0 = buf[0] + buf[2];
r1 = buf[1] + buf[3];
r2 = buf[0] - buf[2];
r3 = buf[1] - buf[3];
r4 = buf[4] + buf[6];
r5 = buf[5] + buf[7];
r6 = buf[4] - buf[6];
r7 = buf[5] - buf[7];
buf[0] = r0 + r4;
buf[1] = r1 + r5;
buf[4] = r0 - r4;
buf[5] = r1 - r5;
buf[2] = r2 + r7;
buf[3] = r3 - r6;
buf[6] = r2 - r7;
buf[7] = r3 + r6;
buf += 8;
}
}
/*****************************************************************************
*
* function name: Radix8First
* description: Radix 8 point prepared function for fft
*
**********************************************************************************/
static void Radix8First(int *buf, int num)
{
int r0, r1, r2, r3;
int i0, i1, i2, i3;
int r4, r5, r6, r7;
int i4, i5, i6, i7;
int t0, t1, t2, t3;
for ( ; num != 0; num--)
{
r0 = buf[0] + buf[2];
i0 = buf[1] + buf[3];
r1 = buf[0] - buf[2];
i1 = buf[1] - buf[3];
r2 = buf[4] + buf[6];
i2 = buf[5] + buf[7];
r3 = buf[4] - buf[6];
i3 = buf[5] - buf[7];
r4 = (r0 + r2) >> 1;
i4 = (i0 + i2) >> 1;
r5 = (r0 - r2) >> 1;
i5 = (i0 - i2) >> 1;
r6 = (r1 - i3) >> 1;
i6 = (i1 + r3) >> 1;
r7 = (r1 + i3) >> 1;
i7 = (i1 - r3) >> 1;
r0 = buf[ 8] + buf[10];
i0 = buf[ 9] + buf[11];
r1 = buf[ 8] - buf[10];
i1 = buf[ 9] - buf[11];
r2 = buf[12] + buf[14];
i2 = buf[13] + buf[15];
r3 = buf[12] - buf[14];
i3 = buf[13] - buf[15];
t0 = (r0 + r2) >> 1;
t1 = (i0 + i2) >> 1;
t2 = (r0 - r2) >> 1;
t3 = (i0 - i2) >> 1;
buf[ 0] = r4 + t0;
buf[ 1] = i4 + t1;
buf[ 8] = r4 - t0;
buf[ 9] = i4 - t1;
buf[ 4] = r5 + t3;
buf[ 5] = i5 - t2;
buf[12] = r5 - t3;
buf[13] = i5 + t2;
r0 = r1 - i3;
i0 = i1 + r3;
r2 = r1 + i3;
i2 = i1 - r3;
t0 = MULHIGH(SQRT1_2, r0 - i0);
t1 = MULHIGH(SQRT1_2, r0 + i0);
t2 = MULHIGH(SQRT1_2, r2 - i2);
t3 = MULHIGH(SQRT1_2, r2 + i2);
buf[ 6] = r6 - t0;
buf[ 7] = i6 - t1;
buf[14] = r6 + t0;
buf[15] = i6 + t1;
buf[ 2] = r7 + t3;
buf[ 3] = i7 - t2;
buf[10] = r7 - t3;
buf[11] = i7 + t2;
buf += 16;
}
}
/*****************************************************************************
*
* function name: Radix4FFT
* description: Radix 4 point fft core function
*
**********************************************************************************/
static void Radix4FFT(int *buf, int num, int bgn, int *twidTab)
{
int r0, r1, r2, r3;
int r4, r5, r6, r7;
int t0, t1;
int sinx, cosx;
int i, j, step;
int *xptr, *csptr;
for (num >>= 2; num != 0; num >>= 2)
{
step = 2*bgn;
xptr = buf;
for (i = num; i != 0; i--)
{
csptr = twidTab;
for (j = bgn; j != 0; j--)
{
r0 = xptr[0];
r1 = xptr[1];
xptr += step;
t0 = xptr[0];
t1 = xptr[1];
cosx = csptr[0];
sinx = csptr[1];
r2 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1); /* cos*br + sin*bi */
r3 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0); /* cos*bi - sin*br */
xptr += step;
t0 = r0 >> 2;
t1 = r1 >> 2;
r0 = t0 - r2;
r1 = t1 - r3;
r2 = t0 + r2;
r3 = t1 + r3;
t0 = xptr[0];
t1 = xptr[1];
cosx = csptr[2];
sinx = csptr[3];
r4 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1); /* cos*cr + sin*ci */
r5 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0); /* cos*ci - sin*cr */
xptr += step;
t0 = xptr[0];
t1 = xptr[1];
cosx = csptr[4];
sinx = csptr[5];
r6 = MULHIGH(cosx, t0) + MULHIGH(sinx, t1); /* cos*cr + sin*ci */
r7 = MULHIGH(cosx, t1) - MULHIGH(sinx, t0); /* cos*ci - sin*cr */
csptr += 6;
t0 = r4;
t1 = r5;
r4 = t0 + r6;
r5 = r7 - t1;
r6 = t0 - r6;
r7 = r7 + t1;
xptr[0] = r0 + r5;
xptr[1] = r1 + r6;
xptr -= step;
xptr[0] = r2 - r4;
xptr[1] = r3 - r7;
xptr -= step;
xptr[0] = r0 - r5;
xptr[1] = r1 - r6;
xptr -= step;
xptr[0] = r2 + r4;
xptr[1] = r3 + r7;
xptr += 2;
}
xptr += 3*step;
}
twidTab += 3*step;
bgn <<= 2;
}
}
/*********************************************************************************
*
* function name: PreMDCT
* description: prepare MDCT process for next FFT compute
*
**********************************************************************************/
static void PreMDCT(int *buf0, int num, const int *csptr)
{
int i;
int tr1, ti1, tr2, ti2;
int cosa, sina, cosb, sinb;
int *buf1;
buf1 = buf0 + num - 1;
for(i = num >> 2; i != 0; i--)
{
cosa = *csptr++;
sina = *csptr++;
cosb = *csptr++;
sinb = *csptr++;
tr1 = *(buf0 + 0);
ti2 = *(buf0 + 1);
tr2 = *(buf1 - 1);
ti1 = *(buf1 + 0);
*buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1);
*buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1);
*buf1-- = MULHIGH(cosb, ti2) - MULHIGH(sinb, tr2);
*buf1-- = MULHIGH(cosb, tr2) + MULHIGH(sinb, ti2);
}
}
/*********************************************************************************
*
* function name: PostMDCT
* description: post MDCT process after next FFT for MDCT
*
**********************************************************************************/
static void PostMDCT(int *buf0, int num, const int *csptr)
{
int i;
int tr1, ti1, tr2, ti2;
int cosa, sina, cosb, sinb;
int *buf1;
buf1 = buf0 + num - 1;
for(i = num >> 2; i != 0; i--)
{
cosa = *csptr++;
sina = *csptr++;
cosb = *csptr++;
sinb = *csptr++;
tr1 = *(buf0 + 0);
ti1 = *(buf0 + 1);
ti2 = *(buf1 + 0);
tr2 = *(buf1 - 1);
*buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1);
*buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1);
*buf0++ = MULHIGH(sinb, tr2) - MULHIGH(cosb, ti2);
*buf1-- = MULHIGH(cosb, tr2) + MULHIGH(sinb, ti2);
}
}
#else
void Radix4First(int *buf, int num);
void Radix8First(int *buf, int num);
void Radix4FFT(int *buf, int num, int bgn, int *twidTab);
void PreMDCT(int *buf0, int num, const int *csptr);
void PostMDCT(int *buf0, int num, const int *csptr);
#endif
/**********************************************************************************
*
* function name: Mdct_Long
* description: the long block mdct, include long_start block, end_long block
*
**********************************************************************************/
void Mdct_Long(int *buf)
{
PreMDCT(buf, 1024, cossintab + 128);
Shuffle(buf, 512, bitrevTab + 17);
Radix8First(buf, 512 >> 3);
Radix4FFT(buf, 512 >> 3, 8, (int *)twidTab512);
PostMDCT(buf, 1024, cossintab + 128);
}
/**********************************************************************************
*
* function name: Mdct_Short
* description: the short block mdct
*
**********************************************************************************/
void Mdct_Short(int *buf)
{
PreMDCT(buf, 128, cossintab);
Shuffle(buf, 64, bitrevTab);
Radix4First(buf, 64 >> 2);
Radix4FFT(buf, 64 >> 2, 4, (int *)twidTab64);
PostMDCT(buf, 128, cossintab);
}
/*****************************************************************************
*
* function name: shiftMdctDelayBuffer
* description: the mdct delay buffer has a size of 1600,
* so the calculation of LONG,STOP must be spilt in two
* passes with 1024 samples and a mid shift,
* the SHORT transforms can be completed in the delay buffer,
* and afterwards a shift
*
**********************************************************************************/
static void shiftMdctDelayBuffer(Word16 *mdctDelayBuffer, /*! start of mdct delay buffer */
Word16 *timeSignal, /*! pointer to new time signal samples, interleaved */
Word16 chIncrement /*! number of channels */
)
{
Word32 i;
Word16 *srBuf = mdctDelayBuffer;
Word16 *dsBuf = mdctDelayBuffer+FRAME_LEN_LONG;
for(i = 0; i < BLOCK_SWITCHING_OFFSET-FRAME_LEN_LONG; i+= 8)
{
*srBuf++ = *dsBuf++; *srBuf++ = *dsBuf++;
*srBuf++ = *dsBuf++; *srBuf++ = *dsBuf++;
*srBuf++ = *dsBuf++; *srBuf++ = *dsBuf++;
*srBuf++ = *dsBuf++; *srBuf++ = *dsBuf++;
}
srBuf = mdctDelayBuffer + BLOCK_SWITCHING_OFFSET-FRAME_LEN_LONG;
dsBuf = timeSignal;
for(i=0; i<FRAME_LEN_LONG; i+=8)
{
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
*srBuf++ = *dsBuf; dsBuf += chIncrement;
}
}
/*****************************************************************************
*
* function name: getScalefactorOfShortVectorStride
* description: Calculate max possible scale factor for input vector of shorts
* returns: Maximum scale factor
*
**********************************************************************************/
static Word16 getScalefactorOfShortVectorStride(const Word16 *vector, /*!< Pointer to input vector */
Word16 len, /*!< Length of input vector */
Word16 stride) /*!< Stride of input vector */
{
Word16 maxVal = 0;
Word16 absVal;
Word16 i;
for(i=0; i<len; i++){
absVal = abs_s(vector[i*stride]);
maxVal |= absVal;
}
return( maxVal ? norm_s(maxVal) : 15);
}
/*****************************************************************************
*
* function name: Transform_Real
* description: Calculate transform filter for input vector of shorts
* returns: TRUE if success
*
**********************************************************************************/
void Transform_Real(Word16 *mdctDelayBuffer,
Word16 *timeSignal,
Word16 chIncrement,
Word32 *realOut,
Word16 *mdctScale,
Word16 blockType
)
{
Word32 i,w;
Word32 timeSignalSample;
Word32 ws1,ws2;
Word16 *dctIn0, *dctIn1;
Word32 *outData0, *outData1;
Word32 *winPtr;
Word32 delayBufferSf,timeSignalSf,minSf;
switch(blockType){
case LONG_WINDOW:
/*
we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + 448 new timeSignal samples
and get the biggest scale factor for next calculate more precise
*/
delayBufferSf = getScalefactorOfShortVectorStride(mdctDelayBuffer,BLOCK_SWITCHING_OFFSET,1);
timeSignalSf = getScalefactorOfShortVectorStride(timeSignal,2*FRAME_LEN_LONG-BLOCK_SWITCHING_OFFSET,chIncrement);
minSf = min(delayBufferSf,timeSignalSf);
minSf = min(minSf,14);
dctIn0 = mdctDelayBuffer;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
outData0 = realOut + FRAME_LEN_LONG/2;
/* add windows and pre add for mdct to last buffer*/
winPtr = (int *)LongWindowKBD;
for(i=0;i<FRAME_LEN_LONG/2;i++){
timeSignalSample = (*dctIn0++) << minSf;
ws1 = timeSignalSample * (*winPtr >> 16);
timeSignalSample = (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr & 0xffff);
winPtr ++;
/* shift 2 to avoid overflow next */
*outData0++ = (ws1 >> 2) - (ws2 >> 2);
}
shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
/* add windows and pre add for mdct to new buffer*/
dctIn0 = mdctDelayBuffer;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
outData0 = realOut + FRAME_LEN_LONG/2 - 1;
winPtr = (int *)LongWindowKBD;
for(i=0;i<FRAME_LEN_LONG/2;i++){
timeSignalSample = (*dctIn0++) << minSf;
ws1 = timeSignalSample * (*winPtr & 0xffff);
timeSignalSample = (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr >> 16);
winPtr++;
/* shift 2 to avoid overflow next */
*outData0-- = -((ws1 >> 2) + (ws2 >> 2));
}
Mdct_Long(realOut);
/* update scale factor */
minSf = 14 - minSf;
*mdctScale=minSf;
break;
case START_WINDOW:
/*
we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + no timeSignal samples
and get the biggest scale factor for next calculate more precise
*/
minSf = getScalefactorOfShortVectorStride(mdctDelayBuffer,BLOCK_SWITCHING_OFFSET,1);
minSf = min(minSf,14);
dctIn0 = mdctDelayBuffer;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
outData0 = realOut + FRAME_LEN_LONG/2;
winPtr = (int *)LongWindowKBD;
/* add windows and pre add for mdct to last buffer*/
for(i=0;i<FRAME_LEN_LONG/2;i++){
timeSignalSample = (*dctIn0++) << minSf;
ws1 = timeSignalSample * (*winPtr >> 16);
timeSignalSample = (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr & 0xffff);
winPtr ++;
*outData0++ = (ws1 >> 2) - (ws2 >> 2); /* shift 2 to avoid overflow next */
}
shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
outData0 = realOut + FRAME_LEN_LONG/2 - 1;
for(i=0;i<LS_TRANS;i++){
*outData0-- = -mdctDelayBuffer[i] << (15 - 2 + minSf);
}
/* add windows and pre add for mdct to new buffer*/
dctIn0 = mdctDelayBuffer + LS_TRANS;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1 - LS_TRANS;
outData0 = realOut + FRAME_LEN_LONG/2 - 1 -LS_TRANS;
winPtr = (int *)ShortWindowSine;
for(i=0;i<FRAME_LEN_SHORT/2;i++){
timeSignalSample= (*dctIn0++) << minSf;
ws1 = timeSignalSample * (*winPtr & 0xffff);
timeSignalSample= (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr >> 16);
winPtr++;
*outData0-- = -((ws1 >> 2) + (ws2 >> 2)); /* shift 2 to avoid overflow next */
}
Mdct_Long(realOut);
/* update scale factor */
minSf = 14 - minSf;
*mdctScale= minSf;
break;
case STOP_WINDOW:
/*
we access BLOCK_SWITCHING_OFFSET-LS_TRANS (1600-448 ) delay buffer samples + 448 new timeSignal samples
and get the biggest scale factor for next calculate more precise
*/
delayBufferSf = getScalefactorOfShortVectorStride(mdctDelayBuffer+LS_TRANS,BLOCK_SWITCHING_OFFSET-LS_TRANS,1);
timeSignalSf = getScalefactorOfShortVectorStride(timeSignal,2*FRAME_LEN_LONG-BLOCK_SWITCHING_OFFSET,chIncrement);
minSf = min(delayBufferSf,timeSignalSf);
minSf = min(minSf,13);
outData0 = realOut + FRAME_LEN_LONG/2;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
for(i=0;i<LS_TRANS;i++){
*outData0++ = -(*dctIn1--) << (15 - 2 + minSf);
}
/* add windows and pre add for mdct to last buffer*/
dctIn0 = mdctDelayBuffer + LS_TRANS;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1 - LS_TRANS;
outData0 = realOut + FRAME_LEN_LONG/2 + LS_TRANS;
winPtr = (int *)ShortWindowSine;
for(i=0;i<FRAME_LEN_SHORT/2;i++){
timeSignalSample = (*dctIn0++) << minSf;
ws1 = timeSignalSample * (*winPtr >> 16);
timeSignalSample= (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr & 0xffff);
winPtr++;
*outData0++ = (ws1 >> 2) - (ws2 >> 2); /* shift 2 to avoid overflow next */
}
shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
/* add windows and pre add for mdct to new buffer*/
dctIn0 = mdctDelayBuffer;
dctIn1 = mdctDelayBuffer + FRAME_LEN_LONG - 1;
outData0 = realOut + FRAME_LEN_LONG/2 - 1;
winPtr = (int *)LongWindowKBD;
for(i=0;i<FRAME_LEN_LONG/2;i++){
timeSignalSample= (*dctIn0++) << minSf;
ws1 = timeSignalSample *(*winPtr & 0xffff);
timeSignalSample= (*dctIn1--) << minSf;
ws2 = timeSignalSample * (*winPtr >> 16);
*outData0-- = -((ws1 >> 2) + (ws2 >> 2)); /* shift 2 to avoid overflow next */
winPtr++;
}
Mdct_Long(realOut);
minSf = 14 - minSf;
*mdctScale= minSf; /* update scale factor */
break;
case SHORT_WINDOW:
/*
we access BLOCK_SWITCHING_OFFSET (1600 ) delay buffer samples + no new timeSignal samples
and get the biggest scale factor for next calculate more precise
*/
minSf = getScalefactorOfShortVectorStride(mdctDelayBuffer+TRANSFORM_OFFSET_SHORT,9*FRAME_LEN_SHORT,1);
minSf = min(minSf,10);
for(w=0;w<TRANS_FAC;w++){
dctIn0 = mdctDelayBuffer+w*FRAME_LEN_SHORT+TRANSFORM_OFFSET_SHORT;
dctIn1 = mdctDelayBuffer+w*FRAME_LEN_SHORT+TRANSFORM_OFFSET_SHORT + FRAME_LEN_SHORT-1;
outData0 = realOut + FRAME_LEN_SHORT/2;
outData1 = realOut + FRAME_LEN_SHORT/2 - 1;
winPtr = (int *)ShortWindowSine;
for(i=0;i<FRAME_LEN_SHORT/2;i++){
timeSignalSample= *dctIn0 << minSf;
ws1 = timeSignalSample * (*winPtr >> 16);
timeSignalSample= *dctIn1 << minSf;
ws2 = timeSignalSample * (*winPtr & 0xffff);
*outData0++ = (ws1 >> 2) - (ws2 >> 2); /* shift 2 to avoid overflow next */
timeSignalSample= *(dctIn0 + FRAME_LEN_SHORT) << minSf;
ws1 = timeSignalSample * (*winPtr & 0xffff);
timeSignalSample= *(dctIn1 + FRAME_LEN_SHORT) << minSf;
ws2 = timeSignalSample * (*winPtr >> 16);
*outData1-- = -((ws1 >> 2) + (ws2 >> 2)); /* shift 2 to avoid overflow next */
winPtr++;
dctIn0++;
dctIn1--;
}
Mdct_Short(realOut);
realOut += FRAME_LEN_SHORT;
}
minSf = 11 - minSf;
*mdctScale = minSf; /* update scale factor */
shiftMdctDelayBuffer(mdctDelayBuffer,timeSignal,chIncrement);
break;
}
}