srec/include/swicms.h - platform/external/srec - Git at Google

 /*---------------------------------------------------------------------------*
  *  swicms.h                                                                 *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/

 #ifndef __SWICMS_H__
 #define __SWICMS_H__

 #include"all_defs.h"
 #include"sizes.h"
 #include"fronttyp.h"
 #include"pre_desc.h"

 #define DEBUG_SWICMS        0
 #define MAX_CACHED_FRAMES 800
 #define SWICMS_CACHE_RESOLUTION_DEFAULT   8
 #define SWICMS_CACHE_SIZE_DEFAULT         100 /* equals #frames/resolution */

 /**
  * This is used for casting in debugger, just type (imelvec*)tmn.
  */
 typedef struct
 {
   imeldata vec[MAX_CHAN_DIM];
 }
 imelvec;

 /**
  * Does channel normalization without using fine recognition segmenation.  It remembers the
  * frames of speech and uses that as a channel mean for the next utterance.  A forget_factor
  * is used to weigh the new speech mean estimate with an older one.
  */
 typedef struct
 {
   imeldata tmn [MAX_CHAN_DIM];                 /* target mean */
   imeldata cmn [MAX_CHAN_DIM];                 /* channel mean */

   imeldata lda_tmn [MAX_CHAN_DIM];                 /* target mean */
   imeldata lda_cmn [MAX_CHAN_DIM];                 /* channel mean */

   imeldata adjust[MAX_CHAN_DIM]; /* target less channel */

   int is_valid;
   int forget_factor;           /* in frames, mass of cmn average */
   int sbindex;                 /* speech to background index
         100 -> use only speech to calculate CMN
         000 -> use only background to calculate CMN
         050 -> use half/half ..
         all numbers in between are acceptable */

   int num_frames_in_cmn; /* num frames used to estimate cmn (or lda_cmn) */

   /* for in-utterance channel normalization */
   struct {
     int forget_factor2;     /* cmn is given this weight to start off */
     int disable_after;      /* we disable in-utt cms after this many fr*/
     int enable_after;       /* we enable in-utt cms after this many fr*/
     int num_bou_frames_to_skip;   /* don't start accum 'til this many frames */
     int num_frames_since_bou;     /* counter for above, bou=begin-of-utt     */
     int num_frames_in_accum;      /* number of frames in accum */
     imeldata accum[MAX_CHAN_DIM]; /* accumulates frames of the current utt */
   } inutt;

   int cached_num_frames;       /* we cache frames, until recognition is done
         and can calculate speech mean from these */
   int cache_resolution;        /* we'll avg this many frames per section */
   imeldata cached_sections[SWICMS_CACHE_SIZE_DEFAULT][MAX_CHAN_DIM];
   /*const*/ preprocessed* _prep;
 }
 swicms_norm_info;

 int swicms_init(swicms_norm_info* swicms);
 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen);
 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
     imeldata* oframe, imeldata* iframe,
     int dimen);
 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep);

 int swicms_update(swicms_norm_info* swicms, int speech_start_frame, int speech_end_frame);

 ESR_ReturnCode swicms_set_cmn(swicms_norm_info *swicms, const LCHAR *new_cmn_params );
 ESR_ReturnCode swicms_get_cmn(swicms_norm_info *swicms, LCHAR *cmn_params, size_t* len );

 #if DEBUG_SWICMS
 int swicms_compare(swicms_norm_info* swicms, imeldata* imelda_adjust);
 int swicms_dump_stats(swicms_norm_info* swicms);
 #else
 #define swicms_compare(swicms,ia)
 #define swicms_dump_stats(swicms)
 #endif

 #endif
	/---------------------------------------------------------------------------
	* swicms.h *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/

	#ifndef __SWICMS_H__
	#define __SWICMS_H__

	#include"all_defs.h"
	#include"sizes.h"
	#include"fronttyp.h"
	#include"pre_desc.h"

	#define DEBUG_SWICMS 0
	#define MAX_CACHED_FRAMES 800
	#define SWICMS_CACHE_RESOLUTION_DEFAULT 8
	#define SWICMS_CACHE_SIZE_DEFAULT 100 /* equals #frames/resolution */

	/**
	* This is used for casting in debugger, just type (imelvec*)tmn.
	*/
	typedef struct
	{
	imeldata vec[MAX_CHAN_DIM];
	}
	imelvec;

	/**
	* Does channel normalization without using fine recognition segmenation. It remembers the
	* frames of speech and uses that as a channel mean for the next utterance. A forget_factor
	* is used to weigh the new speech mean estimate with an older one.
	*/
	typedef struct
	{
	imeldata tmn [MAX_CHAN_DIM]; /* target mean */
	imeldata cmn [MAX_CHAN_DIM]; /* channel mean */

	imeldata lda_tmn [MAX_CHAN_DIM]; /* target mean */
	imeldata lda_cmn [MAX_CHAN_DIM]; /* channel mean */

	imeldata adjust[MAX_CHAN_DIM]; /* target less channel */

	int is_valid;
	int forget_factor; /* in frames, mass of cmn average */
	int sbindex; /* speech to background index
	100 -> use only speech to calculate CMN
	000 -> use only background to calculate CMN
	050 -> use half/half ..
	all numbers in between are acceptable */

	int num_frames_in_cmn; /* num frames used to estimate cmn (or lda_cmn) */

	/* for in-utterance channel normalization */
	struct {
	int forget_factor2; /* cmn is given this weight to start off */
	int disable_after; /* we disable in-utt cms after this many fr*/
	int enable_after; /* we enable in-utt cms after this many fr*/
	int num_bou_frames_to_skip; /* don't start accum 'til this many frames */
	int num_frames_since_bou; /* counter for above, bou=begin-of-utt */
	int num_frames_in_accum; /* number of frames in accum */
	imeldata accum[MAX_CHAN_DIM]; /* accumulates frames of the current utt */
	} inutt;

	int cached_num_frames; /* we cache frames, until recognition is done
	and can calculate speech mean from these */
	int cache_resolution; /* we'll avg this many frames per section */
	imeldata cached_sections[SWICMS_CACHE_SIZE_DEFAULT][MAX_CHAN_DIM];
	/const/ preprocessed* _prep;
	}
	swicms_norm_info;

	int swicms_init(swicms_norm_info* swicms);
	int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen);
	int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
	imeldata* oframe, imeldata* iframe,
	int dimen);
	int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep);

	int swicms_update(swicms_norm_info* swicms, int speech_start_frame, int speech_end_frame);

	ESR_ReturnCode swicms_set_cmn(swicms_norm_info swicms, const LCHAR new_cmn_params );
	ESR_ReturnCode swicms_get_cmn(swicms_norm_info swicms, LCHAR cmn_params, size_t* len );

	#if DEBUG_SWICMS
	int swicms_compare(swicms_norm_info* swicms, imeldata* imelda_adjust);
	int swicms_dump_stats(swicms_norm_info* swicms);
	#else
	#define swicms_compare(swicms,ia)
	#define swicms_dump_stats(swicms)
	#endif

	#endif