srec/include/pre_desc.h - platform/external/srec - Git at Google

 /*---------------------------------------------------------------------------*
  *  pre_desc.h  *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/


 #ifndef _h_pre_desc_
 #define _h_pre_desc_

 #ifdef SET_RCSID
 static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
 #endif


 #include "all_defs.h"
 #include "hmm_type.h"
 #include "specnorm.h"
 #ifndef _RTT
 #include "duk_io.h"
 #endif

 #define DO_SUBTRACTED_SEGMENTATION  0

 #ifndef NONE
 #define NONE   0
 #endif
 #define SCALE   1 /* Scaling the channels */
 #define LIN_TRAN  2 /* Linear Transformation */
 #define VFR   4 /* Variable frame rate */
 #define USE_MULTAB      8 /* Set up multable distance calculations */

 /**
  * @todo document
  */
 typedef struct
 {  /* mul-table data types */
   unsigned short sigma;
   int   num;
   short *pdf;
 }
 mul_table;

 /**
  * @todo document
  */
 typedef struct
 {
   unsigned short num_dev8_index;
   unsigned char  *dev8_index;
   unsigned short *wt_index;
   short    *gauss_dist_table;
   short    **dist_ptr;
   prdata    multable_factor; /* euclidean to multable */
   prdata    multable_factor_gaussian; /* euclidean to multable */
   prdata    grand_mod_cov; /* grand covariance modulus */
   prdata    grand_mod_cov_gaussian; /* grand covariance modulus */
 }
 mul_table_info;

 /**
  * @todo document
  */
 typedef struct
 {
   const prdata *table;
   prdata add_log_limit;
   prdata scale;   /* X - scale to log function */
   prdata inv_scale;
   float logscale;  /* Y - scale to log function */
 }
 logadd_table_info;

 /**
  * @todo document
  */
 typedef struct
 {
   unsigned long num;
   accdata **between;
   accdata *bmean;
   accdata **within;
   accdata *wmean;
 }
 transform_info;

 /**
  * @todo document
  */
 typedef struct
 {   /* Segmentation parameters */
   int  rel_low;
   int  rel_high;
   int  gap_period;
   int  click_period;
   int  breath_period;
   int  extend_annotation;
   int  param;
   int         min_initial_quiet_frames;    /* num silence frames needed before input */
   int         min_annotation_frames;          /* minimum overall length */
   int         max_annotation_frames;          /* maximum overall length */
   int         delete_leading_segments;        /* num segments to delete. 0=no action */
   int         leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
   int         leading_segment_min_frames;   /* remove unless shorter */
   int         leading_segment_max_frames;   /* remove unless exceeded */
   int         leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
   int  beep_size;  /*X201 beep filter */
   int  beep_threshold;  /*X201 beep filter */
   int  min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */

 #if DO_SUBTRACTED_SEGMENTATION
   int         snr_holdoff;    /* Ignore first n frames when estimating speech level for SNR measure */
   int         min_acceptable_snr; /* for an acceptable segmentation */
 #endif
 }
 endpoint_info;


 /**
  * @todo document
  */
 typedef struct
 {  /* processed speech data/front end output */
   int  ref_count; /* reference counts */
   /* Pattern vector section */
   int  dim;  /* dimension of frame vector */
   int  use_dim; /* dimension used for recognition */
   int  whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
   int  use_from; /* first channel used for recognition */
   featdata *last_frame; /* last frame processed in frame buffer */
   imeldata *seq;  /* current valid frame */
   imeldata *seq_unnorm; /* current valid frame, for whole-word models */
   prdata seq_sq_sum; /* sum of the squared of frames */
   prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
   prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
   int  voicing_status; /* voicing code */
   int  post_proc; /* post processing functions */
   imeldata *offset; /* offset vector with transformation */
   imeldata **matrix; /* linear transformation matrix */
   int  imel_shift; /* Imelda scale factor (in shifts) */
   covdata **imelda; /* linear transformation matrix, PMC or RN */
   imeldata **invmat; /* inverse transformation matrix */
   int  inv_shift; /* inverse Imelda scale factor (in shifts) */
   covdata **inverse; /* inverse linear transformation matrix, PMC or RN */
 #if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
   int  partial_distance_calc_dim;  /* number of params to calc distance over, before approximating if beyond threshold */
   scodata partial_distance_threshold;
   prdata partial_distance_calc_threshold;
   prdata partial_distance_offset;
   prdata global_distance_over_n_params;
   int  global_model_means[MAX_DIMEN];
   prdata partial_mean_sq_sum;
   prdata partial_seq_sq_sum;
   prdata partial_seq_unnorm_sq_sum;
 #endif
   imeldata *chan_offset;
   /* Channel Normalization etc */

   /* Tables */
   prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
   mul_table_info mul;  /* Mul-table */
   logadd_table_info add; /* logadd-table */
   /* ENC */
   booldata is_setup_for_noise;
   booldata do_whole_enc; /* to enable ENC */
   booldata do_sub_enc; /* to enable ENC */
   booldata enc_count;
   booldata ambient_valid; /* ambient estimates valid */
   imeldata **pmc_fixmat; /* ENC matrix */
   imeldata **pmc_fixinv; /* inverse ENC matrix */
   covdata **pmc_matrix; /* ENC matrix in float */
   covdata **pmc_inverse; /* inverse ENC matrix in float */
   int  pmc_matshift; /* scaling */
   int  pmc_invshift; /* scaling */
   imeldata    *ambient_mean; /* ambient mean vector */
   imeldata    *ambient_prof; /* ambient estimates, pseudo space */
   imeldata    *ambient_prof_unnorm; /* ambient estimates, unnormalised */
   logadd_table_info fbadd; /* logadd-table for ENC */
 #if DO_SUBTRACTED_SEGMENTATION
   int  mel_dim;
   covdata **spec_inverse;
   imeldata **spec_fixinv;
   int  spec_invshift;
   int  *cep_offset;
 #endif
   /* Parameters */
   prdata mix_score_scale; /* Mixture score scaling constant */
   prdata uni_score_scale; /* Unimodal score scaling constant */
   prdata uni_score_offset; /* Unimodal score offset constant */
   prdata imelda_scale;  /* Imelda grand variance */
   /* Endpoint data */
   endpoint_info end;

 }
 preprocessed;

 /**
  * @todo document
  */
 typedef struct
 {
   preprocessed    *prep; /* The preprocessed data structure */
   /* The following stuff cannot be cloned */
   booldata do_imelda; /* Alignment based accumulation */
   transform_info  imelda_acc;
 }
 pattern_info;

 #endif /* _h_pre_desc_ */
	/---------------------------------------------------------------------------
	* pre_desc.h *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/



	#ifndef _h_pre_desc_
	#define _h_pre_desc_

	#ifdef SET_RCSID
	static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
	#endif


	#include "all_defs.h"
	#include "hmm_type.h"
	#include "specnorm.h"
	#ifndef _RTT
	#include "duk_io.h"
	#endif

	#define DO_SUBTRACTED_SEGMENTATION 0

	#ifndef NONE
	#define NONE 0
	#endif
	#define SCALE 1 /* Scaling the channels */
	#define LIN_TRAN 2 /* Linear Transformation */
	#define VFR 4 /* Variable frame rate */
	#define USE_MULTAB 8 /* Set up multable distance calculations */

	/**
	* @todo document
	*/
	typedef struct
	{ /* mul-table data types */
	unsigned short sigma;
	int num;
	short *pdf;
	}
	mul_table;

	/**
	* @todo document
	*/
	typedef struct
	{
	unsigned short num_dev8_index;
	unsigned char *dev8_index;
	unsigned short *wt_index;
	short *gauss_dist_table;
	short **dist_ptr;
	prdata multable_factor; /* euclidean to multable */
	prdata multable_factor_gaussian; /* euclidean to multable */
	prdata grand_mod_cov; /* grand covariance modulus */
	prdata grand_mod_cov_gaussian; /* grand covariance modulus */
	}
	mul_table_info;

	/**
	* @todo document
	*/
	typedef struct
	{
	const prdata *table;
	prdata add_log_limit;
	prdata scale; /* X - scale to log function */
	prdata inv_scale;
	float logscale; /* Y - scale to log function */
	}
	logadd_table_info;

	/**
	* @todo document
	*/
	typedef struct
	{
	unsigned long num;
	accdata **between;
	accdata *bmean;
	accdata **within;
	accdata *wmean;
	}
	transform_info;

	/**
	* @todo document
	*/
	typedef struct
	{ /* Segmentation parameters */
	int rel_low;
	int rel_high;
	int gap_period;
	int click_period;
	int breath_period;
	int extend_annotation;
	int param;
	int min_initial_quiet_frames; /* num silence frames needed before input */
	int min_annotation_frames; /* minimum overall length */
	int max_annotation_frames; /* maximum overall length */
	int delete_leading_segments; /* num segments to delete. 0=no action */
	int leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
	int leading_segment_min_frames; /* remove unless shorter */
	int leading_segment_max_frames; /* remove unless exceeded */
	int leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
	int beep_size; /X201 beep filter /
	int beep_threshold; /X201 beep filter /
	int min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */

	#if DO_SUBTRACTED_SEGMENTATION
	int snr_holdoff; /* Ignore first n frames when estimating speech level for SNR measure */
	int min_acceptable_snr; /* for an acceptable segmentation */
	#endif
	}
	endpoint_info;


	/**
	* @todo document
	*/
	typedef struct
	{ /* processed speech data/front end output */
	int ref_count; /* reference counts */
	/* Pattern vector section */
	int dim; /* dimension of frame vector */
	int use_dim; /* dimension used for recognition */
	int whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
	int use_from; /* first channel used for recognition */
	featdata last_frame; / last frame processed in frame buffer */
	imeldata seq; / current valid frame */
	imeldata seq_unnorm; / current valid frame, for whole-word models */
	prdata seq_sq_sum; /* sum of the squared of frames */
	prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
	prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
	int voicing_status; /* voicing code */
	int post_proc; /* post processing functions */
	imeldata offset; / offset vector with transformation */
	imeldata *matrix; / linear transformation matrix */
	int imel_shift; /* Imelda scale factor (in shifts) */
	covdata *imelda; / linear transformation matrix, PMC or RN */
	imeldata *invmat; / inverse transformation matrix */
	int inv_shift; /* inverse Imelda scale factor (in shifts) */
	covdata *inverse; / inverse linear transformation matrix, PMC or RN */
	#if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
	int partial_distance_calc_dim; /* number of params to calc distance over, before approximating if beyond threshold */
	scodata partial_distance_threshold;
	prdata partial_distance_calc_threshold;
	prdata partial_distance_offset;
	prdata global_distance_over_n_params;
	int global_model_means[MAX_DIMEN];
	prdata partial_mean_sq_sum;
	prdata partial_seq_sq_sum;
	prdata partial_seq_unnorm_sq_sum;
	#endif
	imeldata *chan_offset;
	/* Channel Normalization etc */

	/* Tables */
	prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
	mul_table_info mul; /* Mul-table */
	logadd_table_info add; /* logadd-table */
	/* ENC */
	booldata is_setup_for_noise;
	booldata do_whole_enc; /* to enable ENC */
	booldata do_sub_enc; /* to enable ENC */
	booldata enc_count;
	booldata ambient_valid; /* ambient estimates valid */
	imeldata *pmc_fixmat; / ENC matrix */
	imeldata *pmc_fixinv; / inverse ENC matrix */
	covdata *pmc_matrix; / ENC matrix in float */
	covdata *pmc_inverse; / inverse ENC matrix in float */
	int pmc_matshift; /* scaling */
	int pmc_invshift; /* scaling */
	imeldata ambient_mean; / ambient mean vector */
	imeldata ambient_prof; / ambient estimates, pseudo space */
	imeldata ambient_prof_unnorm; / ambient estimates, unnormalised */
	logadd_table_info fbadd; /* logadd-table for ENC */
	#if DO_SUBTRACTED_SEGMENTATION
	int mel_dim;
	covdata **spec_inverse;
	imeldata **spec_fixinv;
	int spec_invshift;
	int *cep_offset;
	#endif
	/* Parameters */
	prdata mix_score_scale; /* Mixture score scaling constant */
	prdata uni_score_scale; /* Unimodal score scaling constant */
	prdata uni_score_offset; /* Unimodal score offset constant */
	prdata imelda_scale; /* Imelda grand variance */
	/* Endpoint data */
	endpoint_info end;

	}
	preprocessed;

	/**
	* @todo document
	*/
	typedef struct
	{
	preprocessed prep; / The preprocessed data structure */
	/* The following stuff cannot be cloned */
	booldata do_imelda; /* Alignment based accumulation */
	transform_info imelda_acc;
	}
	pattern_info;

	#endif /* _h_pre_desc_ */