srec/clib/swicms.c - platform/external/srec.git - Git at Google

 /*---------------------------------------------------------------------------*
  *  swicms.c                                                                 *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/

 #include <string.h>
 #include"swicms.h"
 #include"srec_sizes.h"
 #include"prelib.h"

 #include "passert.h"
 #include "ESR_Session.h"
 #include "ESR_SessionType.h"
 #include "IntArrayList.h"
 #include "portable.h"

 #define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %x", (int)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }

 /* Cross-utterance CMN calculation:
    We try to normalize the speech frames before they get to the recognizer.
    The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
    We collect these speech frames during recognition. At the end of
    recognition we exclude the silence frames from the collected data, and
    generate a new channel average based on the previous average and the new
    data, using an exponential decay formula.

    In-utterance CMN calculation:
    A new short-term average mechanism was introduced, with faster update,
    to improve recognition on the very first recognition after init or reset.
    We wait for a minimum number of new data frames to apply this. We also
    disable the fast updater after some frames, because we assume the
    cross-utterance estimator to be more reliable, particularly in its
    ability to exclude silence frames from the calculation.
 */

 /* default settings for cross-utterance cms */
 #define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
 #define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
 /* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
 /* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */

 /* default settings for in-utterance cms */
 #define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
 #define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
 /* disable this when cross-utt become more reliable */
 #define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
 /* wait while the estimate is poor */
 #define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10

 /**
  * Logging Stuff
  */
 #define LOG_LEVEL 2
 #define MODULE_NAME L("swicms.c")
 //static const char* MTAG = MODULE_NAME;

 static const char *rcsid = 0 ? (const char *) &rcsid :
                            "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";

 static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;

 /* these are good values from cmn/tmn files */
 static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
   {
     158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
   };

 static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
   {
     163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
   };

 static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
   {
     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
   };

 static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
   {
     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
   };

 static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
 {
   size_t i, size;
   ESR_ReturnCode rc;
   ESR_BOOL exists;
   IntArrayList* intList = 0;

   CHKLOG(rc, ESR_SessionContains(parname, &exists));
   if (exists) {
     rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
     if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
       /* no match will revert to default data already in static array */
       PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
       return ESR_FATAL_ERROR;
     }
     else if (rc == ESR_SUCCESS) {
       CHKLOG(rc, IntArrayListGetSize(intList, &size));
       if(size != reqSize) {
 	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
 	return ESR_FATAL_ERROR;
       }
       if(reqSize == 1)
 	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
       else {
 	for (i=0; i<size; ++i)
 	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
       }
     }
   }
   return ESR_SUCCESS;
  CLEANUP:
   return rc;
 }

 int swicms_init(swicms_norm_info* swicms)
 {
   ESR_ReturnCode    rc = ESR_SUCCESS;
   size_t            i;
   ESR_BOOL          exists, sessionExists;
   size_t 	    sample_rate;

   /* defaults */
   swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
   swicms->cached_num_frames = 0;
   swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
   swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
   swicms->num_frames_in_cmn = 0;

   CHKLOG(rc, ESR_SessionExists(&sessionExists));

   if (sessionExists)
   {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
     rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );

     if ( rc != ESR_SUCCESS )
       return ( rc );
   }
   else
     sample_rate = 11025;

   /* init the data structures by copying the static data so that we can have a copy if we need to reset */
   if ( sample_rate == 8000 )
   {
     for ( i = 0; i < MAX_CHAN_DIM; i++ )
     {
       swicms->cmn [i] = gswicms_cmn1_8 [i];
       swicms->tmn [i] = gswicms_tmn1_8 [i];
 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
     }
   }
   else
   {
     for ( i = 0; i < MAX_CHAN_DIM; i++ )
     {
       swicms->cmn [i] = gswicms_cmn1_11 [i];
       swicms->tmn [i] = gswicms_tmn1_11 [i];
 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
     }
   }
   CHKLOG(rc, ESR_SessionExists(&sessionExists));

   if (sessionExists)
   {
     const LCHAR* parname = L("CREC.Frontend.swicms.debug");
     CHKLOG(rc, ESR_SessionContains(parname, &exists));
     if (exists) {
       rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
       if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
         PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
         return rc;
       }
     }

     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
 			   &swicms->forget_factor, 1);
     if(rc != ESR_SUCCESS) return rc;

     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
 			   &swicms->sbindex, 1);
     if(rc != ESR_SUCCESS) return rc;

     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
 			   &swicms->cmn[0], MAX_CHAN_DIM);
     if(rc != ESR_SUCCESS) return rc;

     if ( sample_rate == 8000 )
     {
       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);

       if(rc != ESR_SUCCESS)
         return rc;
     }
     else
     {
       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);

       if(rc != ESR_SUCCESS)
         return rc;
     }

     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
 			   &swicms->tmn[0], MAX_CHAN_DIM);
     if(rc != ESR_SUCCESS) return rc;
   }

   swicms->is_valid = 0;
   for (i = 0; i < MAX_CHAN_DIM; i++)
     swicms->adjust[i] = 255;

 #ifdef SREC_ENGINE_VERBOSE_LOGGING
   PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
   PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
   PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
 #endif

   /* in-utt cms parameters */
   swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
   swicms->inutt.disable_after  = 200;
   swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
   swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
   swicms->inutt.num_frames_since_bou = 0;
   swicms->inutt.num_frames_in_accum = 0;
   for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;

   if (sessionExists) {
     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
 			  &swicms->inutt.forget_factor2, 1);
     if(rc != ESR_SUCCESS) return rc;

     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
 			  &swicms->inutt.disable_after, 1);
     if(rc != ESR_SUCCESS) return rc;

     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
 			  &swicms->inutt.enable_after, 1);
     if(rc != ESR_SUCCESS) return rc;

     /* we need to estimate the in-utt cmn from speech frames only! so let's
        make sure to skip some frames before collecting data, */
     ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
     if (exists) {
       ESR_BOOL do_skip_even_frames = ESR_TRUE;
       ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
       ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
       if( do_skip_even_frames)
 	swicms->inutt.num_bou_frames_to_skip /= 2;
       swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
     }
   }

   return 0;
  CLEANUP:
   return rc;
 }


 ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
 {
   int dim_count;
   int i;
   imeldata temp[MAX_CHAN_DIM];
   const size_t INT_LENGTH = 12;

   if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
   {
     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
       temp [dim_count] = swicms->lda_cmn [dim_count];
     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
   }
   else	/* lda does not exist give them raw cmn values */
   {
     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
       temp [dim_count] = swicms->cmn [dim_count];
   }

   for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
   {
     i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
     if (i + INT_LENGTH >= *len) {
         *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
         return ESR_BUFFER_OVERFLOW;
     }
   }

   return ESR_SUCCESS;
 }


 ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
 {
   ESR_ReturnCode    set_status;
   int               length_of_params;
   int               dim_count;
   int               got_word;
   int               current_position;
   char              *copy_of_params;
   char              *parsed_strings [MAX_CHAN_DIM];
   int               temp_cmn [MAX_CHAN_DIM];

   length_of_params = strlen ( cmn_params ) + 1;
   copy_of_params = (char*)MALLOC ( length_of_params, NULL );

   if ( copy_of_params != NULL )
   {
     set_status = ESR_SUCCESS;
     memcpy ( copy_of_params, cmn_params, length_of_params );
     dim_count = 0;
     current_position = 0;
     got_word = 0;
     parsed_strings [dim_count] = copy_of_params + current_position;

     while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
     {
       switch ( *( copy_of_params + current_position ) )
       {
         case '\0':
           if ( got_word == 1 )
           {
             if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
               dim_count++;
             else
             {
               PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
               set_status = ESR_INVALID_ARGUMENT;
             }
           }
           else
           {
             PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
             set_status = ESR_INVALID_ARGUMENT;
           }
           break;

         case ',':
           if ( got_word == 1 )
           {
             if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
             {
               dim_count++;
               *( copy_of_params + current_position) = '\0';
               current_position++;

               if ( current_position == length_of_params )
               {
                 PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
                 set_status = ESR_INVALID_ARGUMENT;
               }
               parsed_strings [dim_count] = copy_of_params + current_position;
               got_word = 0;
             }
             else
             {
               PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
               set_status = ESR_INVALID_ARGUMENT;
             }
           }
           else
           {
             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
             set_status = ESR_INVALID_ARGUMENT;
           }
           break;

         case '0':
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
         case '8':
         case '9':
           got_word = 1;
           current_position++;

           if ( current_position == length_of_params )
           {
             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
             set_status = ESR_INVALID_ARGUMENT;
           }
           break;

         default:
           PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
           set_status = ESR_INVALID_ARGUMENT;
           break;
       }
     }
     if ( set_status == ESR_SUCCESS )
     {
       dim_count = 0;

       while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
       {
         temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );

         if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
         {
           set_status = ESR_INVALID_ARGUMENT;
         }

         dim_count++;
       }
       if ( set_status == ESR_SUCCESS )
       {
         for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
           swicms->cmn [dim_count] = temp_cmn [dim_count];
         if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
           linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
       }
     }
     FREE ( copy_of_params );
   }
   else
   {
     PLogError ( "Channel Normalization Out Of Memory Error\n" );
     set_status = ESR_OUT_OF_MEMORY;
   }
   swicms->num_frames_in_cmn = 0;
   return ( set_status );
 }


 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
 {
   int i;
   imeldata *pcache, *pframe;

   ASSERT(dimen == MAX_CHAN_DIM);
   i = swicms->cached_num_frames / swicms->cache_resolution;
   if (i < SWICMS_CACHE_SIZE_DEFAULT)
   {
     pcache = swicms->cached_sections[ i];
     if (swicms->cached_num_frames % swicms->cache_resolution == 0)
     {
       for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
       pcache -= MAX_CHAN_DIM;
     }
     pframe = frame;
     for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
     swicms->cached_num_frames++;
   }

   return 0;
 }

 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
     imeldata* oframe,
     imeldata* iframe, int dimen)
 {
   int ii;
   ASSERT(dimen == MAX_CHAN_DIM);

   /* IF inutt is activated at all */
   if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
     /* AND IF we have not disabled it (due to x-utt more reliable) */
     if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
       /* AND IF we have skipped past the silence frames */
       if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
 	swicms->inutt.num_frames_in_accum++;
 	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
 	/* AND IF we've already seen at least 10 frames (presumably) of speech */
 	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
 	  /* THEN we update the adjustment in-line with the current utterance! */
 	  for(ii=0;ii<dimen;ii++) {
 	    imeldata denom = ( swicms->inutt.forget_factor2
 			       + swicms->inutt.num_frames_in_accum );
 	    /* tmp: weighted average of the old lda_cmn and the new accum */
 	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
 			  + swicms->inutt.accum[ii] + denom/2) / denom;
 	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
 	  }
 	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
 	}
       }
     }
     swicms->inutt.num_frames_since_bou++;
   }

   for (ii = 0; ii < dimen; ii++)
     oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
   return 0;
 }

 int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
 {
   int i, j;
   asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
   int ff;
   int nn, speech_nn, backgr_nn;
   int num_frames = swicms->cached_num_frames;
   int cache_start, cache_end, backgr_cache_end;
   int sbindex = swicms->sbindex;

   /* init for utterance */
   swicms->inutt.num_frames_since_bou = 0;

   swicms->cached_num_frames = 0;
   cache_start = speech_start;
   cache_start -= (cache_start % swicms->cache_resolution);
   cache_start /= swicms->cache_resolution;

   if (speech_end == MAXframeID)
   {
     cache_end = SWICMS_CACHE_SIZE_DEFAULT;
   }
   else
   {
     if (speech_end < num_frames)
       cache_end = speech_end;
     else
       cache_end = num_frames;
     cache_end -= (cache_end % swicms->cache_resolution);
     cache_end /= swicms->cache_resolution;
   }

   if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
   {
     if (speech_end != 0 || speech_start != 0)
       PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
                 speech_start, speech_end, num_frames);
 	if (SWICMS_DEBUG) {
       //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
     }
     return 1;
   }

   backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;

   speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
   backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;

   for (i = 0; i < MAX_CHAN_DIM; i++)
   {
     speech_avg[i] = 0;
     backgr_avg[i] = 0;
     for (j = cache_start; j < cache_end; j++)
       speech_avg[i] += swicms->cached_sections[j][i];
     for (j = 0; j < cache_start; j++)
       backgr_avg[i] += swicms->cached_sections[j][i];
     for (j = cache_end; j < backgr_cache_end; j++)
       backgr_avg[i] += swicms->cached_sections[j][i];
     if (speech_nn == 0 && backgr_nn > 0)
     {
       backgr_avg[i] /= backgr_nn;
       speech_avg[i] = backgr_avg[i];
       speech_nn = backgr_nn;
     }
     else if (speech_nn > 0 && backgr_nn == 0)
     {
       speech_avg[i] /= speech_nn;
       backgr_avg[i] = speech_avg[i];
       backgr_nn = speech_nn;
     }
     else if (speech_nn > 0 && backgr_nn > 0)
     {
       speech_avg[i] /= speech_nn;
       backgr_avg[i] /= backgr_nn;
     }
     else
     {
       return 0;
     }

     avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
   }
   nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;

   for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
   {
     ff += (swicms->lda_tmn[i] - avg[i]);
   }
   ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
   if (ff > 5)
   {
     PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
     //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
     return 1;
   }
   ff = swicms->forget_factor;
   if (ff < 9999)
   {
     for (i = 0; i < MAX_CHAN_DIM; i++)
     {
       swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
       swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
     }
   }

   if (SWICMS_DEBUG)
     {
       imeldata temp[MAX_CHAN_DIM];
       PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);

       for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
       inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
       /* use this dump, to put back into CREC.Frontend.swicms.cmn */
       printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);

       //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
       //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
       //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
     }
   else
     {
 #ifndef NDEBUG
       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
 #endif
     }
   swicms->num_frames_in_cmn += nn;
   return 0;
 }

 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
 {
   int i;

   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
   linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
   linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);

   for (i = 0; i < MAX_CHAN_DIM; i++)
   {
     swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
   }

 #ifndef NDEBUG
   //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
 #endif
   swicms->is_valid = 1;
   swicms->_prep = prep;

   if(SWICMS_DEBUG) {
     imeldata temp[MAX_CHAN_DIM];
     printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
     printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
     //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
     //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
     //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);

     //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
     //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
     //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);

     for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
     printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
   }
   return 0;
 }
	/---------------------------------------------------------------------------
	* swicms.c *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/

	#include <string.h>
	#include"swicms.h"
	#include"srec_sizes.h"
	#include"prelib.h"

	#include "passert.h"
	#include "ESR_Session.h"
	#include "ESR_SessionType.h"
	#include "IntArrayList.h"
	#include "portable.h"

	#define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %x", (int)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }

	/* Cross-utterance CMN calculation:
	We try to normalize the speech frames before they get to the recognizer.
	The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
	We collect these speech frames during recognition. At the end of
	recognition we exclude the silence frames from the collected data, and
	generate a new channel average based on the previous average and the new
	data, using an exponential decay formula.

	In-utterance CMN calculation:
	A new short-term average mechanism was introduced, with faster update,
	to improve recognition on the very first recognition after init or reset.
	We wait for a minimum number of new data frames to apply this. We also
	disable the fast updater after some frames, because we assume the
	cross-utterance estimator to be more reliable, particularly in its
	ability to exclude silence frames from the calculation.
	*/

	/* default settings for cross-utterance cms */
	#define SWICMS_FORGET_FACTOR_DEFAULT 400 /* effective frms of history */
	#define SWICMS_SBINDEX_DEFAULT 100 /* use speech frames only */
	/* #define SWICMS_CACHE_RESOLUTION_DEFAULT see swicms.h */
	/* #define SWICMS_CACHE_SIZE_DEFAULT see swicms.h */

	/* default settings for in-utterance cms */
	#define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
	#define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
	/* disable this when cross-utt become more reliable */
	#define SWICMS_INUTT_DISABLE_AFTER_FRAMES 200
	/* wait while the estimate is poor */
	#define SWICMS_INUTT_ENABLE_AFTER_FRAMES 10

	/**
	* Logging Stuff
	*/
	#define LOG_LEVEL 2
	#define MODULE_NAME L("swicms.c")
	//static const char* MTAG = MODULE_NAME;

	static const char rcsid = 0 ? (const char ) &rcsid :
	"$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";

	static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;

	/* these are good values from cmn/tmn files */
	static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
	{
	158, 141, 99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
	};

	static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
	{
	163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
	};

	static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
	{
	108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
	};

	static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
	{
	108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
	127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
	};

	static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
	{
	size_t i, size;
	ESR_ReturnCode rc;
	ESR_BOOL exists;
	IntArrayList* intList = 0;

	CHKLOG(rc, ESR_SessionContains(parname, &exists));
	if (exists) {
	rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
	if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
	/* no match will revert to default data already in static array */
	PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
	return ESR_FATAL_ERROR;
	}
	else if (rc == ESR_SUCCESS) {
	CHKLOG(rc, IntArrayListGetSize(intList, &size));
	if(size != reqSize) {
	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
	return ESR_FATAL_ERROR;
	}
	if(reqSize == 1)
	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
	else {
	for (i=0; i<size; ++i)
	CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
	}
	}
	}
	return ESR_SUCCESS;
	CLEANUP:
	return rc;
	}

	int swicms_init(swicms_norm_info* swicms)
	{
	ESR_ReturnCode rc = ESR_SUCCESS;
	size_t i;
	ESR_BOOL exists, sessionExists;
	size_t sample_rate;

	/* defaults */
	swicms->sbindex = SWICMS_SBINDEX_DEFAULT;
	swicms->cached_num_frames = 0;
	swicms->forget_factor = SWICMS_FORGET_FACTOR_DEFAULT;
	swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
	swicms->num_frames_in_cmn = 0;

	CHKLOG(rc, ESR_SessionExists(&sessionExists));

	if (sessionExists)
	{ /* We'll assume this rate is valid or someone else will be complaining. SteveR */
	rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );

	if ( rc != ESR_SUCCESS )
	return ( rc );
	}
	else
	sample_rate = 11025;

	/* init the data structures by copying the static data so that we can have a copy if we need to reset */
	if ( sample_rate == 8000 )
	{
	for ( i = 0; i < MAX_CHAN_DIM; i++ )
	{
	swicms->cmn [i] = gswicms_cmn1_8 [i];
	swicms->tmn [i] = gswicms_tmn1_8 [i];
	// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
	swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
	swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
	}
	}
	else
	{
	for ( i = 0; i < MAX_CHAN_DIM; i++ )
	{
	swicms->cmn [i] = gswicms_cmn1_11 [i];
	swicms->tmn [i] = gswicms_tmn1_11 [i];
	// _lda_*mn below are OK, but are recalculated in swicms_lda_process()
	swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
	swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
	}
	}
	CHKLOG(rc, ESR_SessionExists(&sessionExists));

	if (sessionExists)
	{
	const LCHAR* parname = L("CREC.Frontend.swicms.debug");
	CHKLOG(rc, ESR_SessionContains(parname, &exists));
	if (exists) {
	rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
	if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
	PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
	return rc;
	}
	}

	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
	&swicms->forget_factor, 1);
	if(rc != ESR_SUCCESS) return rc;

	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
	&swicms->sbindex, 1);
	if(rc != ESR_SUCCESS) return rc;

	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
	&swicms->cmn[0], MAX_CHAN_DIM);
	if(rc != ESR_SUCCESS) return rc;

	if ( sample_rate == 8000 )
	{
	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);

	if(rc != ESR_SUCCESS)
	return rc;
	}
	else
	{
	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);

	if(rc != ESR_SUCCESS)
	return rc;
	}

	rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
	&swicms->tmn[0], MAX_CHAN_DIM);
	if(rc != ESR_SUCCESS) return rc;
	}

	swicms->is_valid = 0;
	for (i = 0; i < MAX_CHAN_DIM; i++)
	swicms->adjust[i] = 255;

	#ifdef SREC_ENGINE_VERBOSE_LOGGING
	PLogMessage("swicms->forget_factor = %d\n", swicms->forget_factor);
	PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
	PLogMessage("swicms->sbindex = %d\n", swicms->sbindex);
	#endif

	/* in-utt cms parameters */
	swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
	swicms->inutt.disable_after = 200;
	swicms->inutt.enable_after = 10; /* in-utt is less reliable */
	swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
	swicms->inutt.num_frames_since_bou = 0;
	swicms->inutt.num_frames_in_accum = 0;
	for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;

	if (sessionExists) {
	rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
	&swicms->inutt.forget_factor2, 1);
	if(rc != ESR_SUCCESS) return rc;

	rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
	&swicms->inutt.disable_after, 1);
	if(rc != ESR_SUCCESS) return rc;

	rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
	&swicms->inutt.enable_after, 1);
	if(rc != ESR_SUCCESS) return rc;

	/* we need to estimate the in-utt cmn from speech frames only! so let's
	make sure to skip some frames before collecting data, */
	ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
	if (exists) {
	ESR_BOOL do_skip_even_frames = ESR_TRUE;
	ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
	ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
	if( do_skip_even_frames)
	swicms->inutt.num_bou_frames_to_skip /= 2;
	swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
	}
	}

	return 0;
	CLEANUP:
	return rc;
	}


	ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR cmn_params, size_t len )
	{
	int dim_count;
	int i;
	imeldata temp[MAX_CHAN_DIM];
	const size_t INT_LENGTH = 12;

	if ( swicms->_prep != NULL ) /* lda exists give them transformed lda. */
	{
	for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
	temp [dim_count] = swicms->lda_cmn [dim_count];
	inverse_transform_frame( swicms->_prep, temp, 1 /do_shift/);
	}
	else /* lda does not exist give them raw cmn values */
	{
	for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
	temp [dim_count] = swicms->cmn [dim_count];
	}

	for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
	{
	i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
	if (i + INT_LENGTH >= *len) {
	len = MAX_CHAN_DIM (INT_LENGTH + 2) * sizeof(LCHAR);
	return ESR_BUFFER_OVERFLOW;
	}
	}

	return ESR_SUCCESS;
	}


	ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
	{
	ESR_ReturnCode set_status;
	int length_of_params;
	int dim_count;
	int got_word;
	int current_position;
	char *copy_of_params;
	char *parsed_strings [MAX_CHAN_DIM];
	int temp_cmn [MAX_CHAN_DIM];

	length_of_params = strlen ( cmn_params ) + 1;
	copy_of_params = (char*)MALLOC ( length_of_params, NULL );

	if ( copy_of_params != NULL )
	{
	set_status = ESR_SUCCESS;
	memcpy ( copy_of_params, cmn_params, length_of_params );
	dim_count = 0;
	current_position = 0;
	got_word = 0;
	parsed_strings [dim_count] = copy_of_params + current_position;

	while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
	{
	switch ( *( copy_of_params + current_position ) )
	{
	case '\0':
	if ( got_word == 1 )
	{
	if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
	dim_count++;
	else
	{
	PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
	set_status = ESR_INVALID_ARGUMENT;
	}
	}
	else
	{
	PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
	set_status = ESR_INVALID_ARGUMENT;
	}
	break;

	case ',':
	if ( got_word == 1 )
	{
	if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
	{
	dim_count++;
	*( copy_of_params + current_position) = '\0';
	current_position++;

	if ( current_position == length_of_params )
	{
	PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
	set_status = ESR_INVALID_ARGUMENT;
	}
	parsed_strings [dim_count] = copy_of_params + current_position;
	got_word = 0;
	}
	else
	{
	PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
	set_status = ESR_INVALID_ARGUMENT;
	}
	}
	else
	{
	PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
	set_status = ESR_INVALID_ARGUMENT;
	}
	break;

	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	got_word = 1;
	current_position++;

	if ( current_position == length_of_params )
	{
	PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
	set_status = ESR_INVALID_ARGUMENT;
	}
	break;

	default:
	PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
	set_status = ESR_INVALID_ARGUMENT;
	break;
	}
	}
	if ( set_status == ESR_SUCCESS )
	{
	dim_count = 0;

	while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
	{
	temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );

	if ( ( temp_cmn [dim_count] < 0 ) \|\| ( temp_cmn [dim_count] > 255 ) )
	{
	set_status = ESR_INVALID_ARGUMENT;
	}

	dim_count++;
	}
	if ( set_status == ESR_SUCCESS )
	{
	for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
	swicms->cmn [dim_count] = temp_cmn [dim_count];
	if ( swicms->_prep != NULL ) /* Set now if NULL it will automatically be set on first utterance */
	linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /do_shift/);
	}
	}
	FREE ( copy_of_params );
	}
	else
	{
	PLogError ( "Channel Normalization Out Of Memory Error\n" );
	set_status = ESR_OUT_OF_MEMORY;
	}
	swicms->num_frames_in_cmn = 0;
	return ( set_status );
	}


	int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
	{
	int i;
	imeldata pcache, pframe;

	ASSERT(dimen == MAX_CHAN_DIM);
	i = swicms->cached_num_frames / swicms->cache_resolution;
	if (i < SWICMS_CACHE_SIZE_DEFAULT)
	{
	pcache = swicms->cached_sections[ i];
	if (swicms->cached_num_frames % swicms->cache_resolution == 0)
	{
	for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
	pcache -= MAX_CHAN_DIM;
	}
	pframe = frame;
	for (i = 0; i < MAX_CHAN_DIM; i++) pcache++ += pframe++;
	swicms->cached_num_frames++;
	}

	return 0;
	}

	int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
	imeldata* oframe,
	imeldata* iframe, int dimen)
	{
	int ii;
	ASSERT(dimen == MAX_CHAN_DIM);

	/* IF inutt is activated at all */
	if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
	/* AND IF we have not disabled it (due to x-utt more reliable) */
	if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
	/* AND IF we have skipped past the silence frames */
	if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
	swicms->inutt.num_frames_in_accum++;
	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
	/* AND IF we've already seen at least 10 frames (presumably) of speech */
	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
	/* THEN we update the adjustment in-line with the current utterance! */
	for(ii=0;ii<dimen;ii++) {
	imeldata denom = ( swicms->inutt.forget_factor2
	+ swicms->inutt.num_frames_in_accum );
	/* tmp: weighted average of the old lda_cmn and the new accum */
	imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
	+ swicms->inutt.accum[ii] + denom/2) / denom;
	swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
	}
	//printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
	}
	}
	}
	swicms->inutt.num_frames_since_bou++;
	}

	for (ii = 0; ii < dimen; ii++)
	oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
	return 0;
	}

	int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
	{
	int i, j;
	asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
	int ff;
	int nn, speech_nn, backgr_nn;
	int num_frames = swicms->cached_num_frames;
	int cache_start, cache_end, backgr_cache_end;
	int sbindex = swicms->sbindex;

	/* init for utterance */
	swicms->inutt.num_frames_since_bou = 0;

	swicms->cached_num_frames = 0;
	cache_start = speech_start;
	cache_start -= (cache_start % swicms->cache_resolution);
	cache_start /= swicms->cache_resolution;

	if (speech_end == MAXframeID)
	{
	cache_end = SWICMS_CACHE_SIZE_DEFAULT;
	}
	else
	{
	if (speech_end < num_frames)
	cache_end = speech_end;
	else
	cache_end = num_frames;
	cache_end -= (cache_end % swicms->cache_resolution);
	cache_end /= swicms->cache_resolution;
	}

	if (num_frames == 0 \|\| speech_end == 0 \|\| speech_start == speech_end \|\| speech_end == MAXframeID)
	{
	if (speech_end != 0 \|\| speech_start != 0)
	PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
	speech_start, speech_end, num_frames);
	if (SWICMS_DEBUG) {
	//printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
	}
	return 1;
	}

	backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;

	speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
	backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;

	for (i = 0; i < MAX_CHAN_DIM; i++)
	{
	speech_avg[i] = 0;
	backgr_avg[i] = 0;
	for (j = cache_start; j < cache_end; j++)
	speech_avg[i] += swicms->cached_sections[j][i];
	for (j = 0; j < cache_start; j++)
	backgr_avg[i] += swicms->cached_sections[j][i];
	for (j = cache_end; j < backgr_cache_end; j++)
	backgr_avg[i] += swicms->cached_sections[j][i];
	if (speech_nn == 0 && backgr_nn > 0)
	{
	backgr_avg[i] /= backgr_nn;
	speech_avg[i] = backgr_avg[i];
	speech_nn = backgr_nn;
	}
	else if (speech_nn > 0 && backgr_nn == 0)
	{
	speech_avg[i] /= speech_nn;
	backgr_avg[i] = speech_avg[i];
	backgr_nn = speech_nn;
	}
	else if (speech_nn > 0 && backgr_nn > 0)
	{
	speech_avg[i] /= speech_nn;
	backgr_avg[i] /= backgr_nn;
	}
	else
	{
	return 0;
	}

	avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
	}
	nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;

	for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
	{
	ff += (swicms->lda_tmn[i] - avg[i]);
	}
	ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
	if (ff > 5)
	{
	PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
	//printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
	return 1;
	}
	ff = swicms->forget_factor;
	if (ff < 9999)
	{
	for (i = 0; i < MAX_CHAN_DIM; i++)
	{
	swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2) / (ff + nn);
	swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
	}
	}

	if (SWICMS_DEBUG)
	{
	imeldata temp[MAX_CHAN_DIM];
	PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);

	for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
	inverse_transform_frame( swicms->_prep, temp, 1 /do_shift/);
	/* use this dump, to put back into CREC.Frontend.swicms.cmn */
	printf_vector("swicms.cmn(r) ", " %d", temp, MAX_CHAN_DIM);

	//printf_vector("swicms.lda_cmn ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
	//printf_vector("swicms.lda_tmn ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
	//printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
	//printf_vector("avg.speech ", " %d", avg, MAX_CHAN_DIM);
	}
	else
	{
	#ifndef NDEBUG
	//printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
	#endif
	}
	swicms->num_frames_in_cmn += nn;
	return 0;
	}

	int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
	{
	int i;

	for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
	for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
	linear_transform_frame(prep, swicms->lda_tmn, 1 /do_shift/);
	linear_transform_frame(prep, swicms->lda_cmn, 1 /do_shift/);

	for (i = 0; i < MAX_CHAN_DIM; i++)
	{
	swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
	}

	#ifndef NDEBUG
	//printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
	#endif
	swicms->is_valid = 1;
	swicms->_prep = prep;

	if(SWICMS_DEBUG) {
	imeldata temp[MAX_CHAN_DIM];
	printf_vector("swicms->cmn ", " %d", swicms->cmn, MAX_CHAN_DIM);
	printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
	//printf_vector("swicms->tmn ", " %d", swicms->tmn, MAX_CHAN_DIM);
	//printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
	//printf_vector("swicms->adjust ", " %d", swicms->adjust, MAX_CHAN_DIM);

	//for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
	//inverse_transform_frame( swicms->_prep, temp, 1 /do_shift/);
	//printf_vector("swicms->tmn(r) ", " %d", temp, MAX_CHAN_DIM);

	for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
	inverse_transform_frame( swicms->_prep, temp, 1 /do_shift/);
	printf_vector("swicms->cmn(r) ", " %d", temp, MAX_CHAN_DIM);
	}
	return 0;
	}