blob: bdfb7f8678724533437e5b5e5e5eb603d1833a4d [file] [log] [blame]
/*---------------------------------------------------------------------------*
* pre_desc.h *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#ifndef _h_pre_desc_
#define _h_pre_desc_
#ifdef SET_RCSID
static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
#endif
#include "all_defs.h"
#include "hmm_type.h"
#include "specnorm.h"
#ifndef _RTT
#include "duk_io.h"
#endif
#define DO_SUBTRACTED_SEGMENTATION 0
#ifndef NONE
#define NONE 0
#endif
#define SCALE 1 /* Scaling the channels */
#define LIN_TRAN 2 /* Linear Transformation */
#define VFR 4 /* Variable frame rate */
#define USE_MULTAB 8 /* Set up multable distance calculations */
/**
* @todo document
*/
typedef struct
{ /* mul-table data types */
unsigned short sigma;
int num;
short *pdf;
}
mul_table;
/**
* @todo document
*/
typedef struct
{
unsigned short num_dev8_index;
unsigned char *dev8_index;
unsigned short *wt_index;
short *gauss_dist_table;
short **dist_ptr;
prdata multable_factor; /* euclidean to multable */
prdata multable_factor_gaussian; /* euclidean to multable */
prdata grand_mod_cov; /* grand covariance modulus */
prdata grand_mod_cov_gaussian; /* grand covariance modulus */
}
mul_table_info;
/**
* @todo document
*/
typedef struct
{
const prdata *table;
prdata add_log_limit;
prdata scale; /* X - scale to log function */
prdata inv_scale;
float logscale; /* Y - scale to log function */
}
logadd_table_info;
/**
* @todo document
*/
typedef struct
{
unsigned long num;
accdata **between;
accdata *bmean;
accdata **within;
accdata *wmean;
}
transform_info;
/**
* @todo document
*/
typedef struct
{ /* Segmentation parameters */
int rel_low;
int rel_high;
int gap_period;
int click_period;
int breath_period;
int extend_annotation;
int param;
int min_initial_quiet_frames; /* num silence frames needed before input */
int min_annotation_frames; /* minimum overall length */
int max_annotation_frames; /* maximum overall length */
int delete_leading_segments; /* num segments to delete. 0=no action */
int leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
int leading_segment_min_frames; /* remove unless shorter */
int leading_segment_max_frames; /* remove unless exceeded */
int leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
int beep_size; /*X201 beep filter */
int beep_threshold; /*X201 beep filter */
int min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */
#if DO_SUBTRACTED_SEGMENTATION
int snr_holdoff; /* Ignore first n frames when estimating speech level for SNR measure */
int min_acceptable_snr; /* for an acceptable segmentation */
#endif
}
endpoint_info;
/**
* @todo document
*/
typedef struct
{ /* processed speech data/front end output */
int ref_count; /* reference counts */
/* Pattern vector section */
int dim; /* dimension of frame vector */
int use_dim; /* dimension used for recognition */
int whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
int use_from; /* first channel used for recognition */
featdata *last_frame; /* last frame processed in frame buffer */
imeldata *seq; /* current valid frame */
imeldata *seq_unnorm; /* current valid frame, for whole-word models */
prdata seq_sq_sum; /* sum of the squared of frames */
prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
int voicing_status; /* voicing code */
int post_proc; /* post processing functions */
imeldata *offset; /* offset vector with transformation */
imeldata **matrix; /* linear transformation matrix */
int imel_shift; /* Imelda scale factor (in shifts) */
covdata **imelda; /* linear transformation matrix, PMC or RN */
imeldata **invmat; /* inverse transformation matrix */
int inv_shift; /* inverse Imelda scale factor (in shifts) */
covdata **inverse; /* inverse linear transformation matrix, PMC or RN */
#if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
int partial_distance_calc_dim; /* number of params to calc distance over, before approximating if beyond threshold */
scodata partial_distance_threshold;
prdata partial_distance_calc_threshold;
prdata partial_distance_offset;
prdata global_distance_over_n_params;
int global_model_means[MAX_DIMEN];
prdata partial_mean_sq_sum;
prdata partial_seq_sq_sum;
prdata partial_seq_unnorm_sq_sum;
#endif
imeldata *chan_offset;
/* Channel Normalization etc */
/* Tables */
prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
mul_table_info mul; /* Mul-table */
logadd_table_info add; /* logadd-table */
/* ENC */
booldata is_setup_for_noise;
booldata do_whole_enc; /* to enable ENC */
booldata do_sub_enc; /* to enable ENC */
booldata enc_count;
booldata ambient_valid; /* ambient estimates valid */
imeldata **pmc_fixmat; /* ENC matrix */
imeldata **pmc_fixinv; /* inverse ENC matrix */
covdata **pmc_matrix; /* ENC matrix in float */
covdata **pmc_inverse; /* inverse ENC matrix in float */
int pmc_matshift; /* scaling */
int pmc_invshift; /* scaling */
imeldata *ambient_mean; /* ambient mean vector */
imeldata *ambient_prof; /* ambient estimates, pseudo space */
imeldata *ambient_prof_unnorm; /* ambient estimates, unnormalised */
logadd_table_info fbadd; /* logadd-table for ENC */
#if DO_SUBTRACTED_SEGMENTATION
int mel_dim;
covdata **spec_inverse;
imeldata **spec_fixinv;
int spec_invshift;
int *cep_offset;
#endif
/* Parameters */
prdata mix_score_scale; /* Mixture score scaling constant */
prdata uni_score_scale; /* Unimodal score scaling constant */
prdata uni_score_offset; /* Unimodal score offset constant */
prdata imelda_scale; /* Imelda grand variance */
/* Endpoint data */
endpoint_info end;
}
preprocessed;
/**
* @todo document
*/
typedef struct
{
preprocessed *prep; /* The preprocessed data structure */
/* The following stuff cannot be cloned */
booldata do_imelda; /* Alignment based accumulation */
transform_info imelda_acc;
}
pattern_info;
#endif /* _h_pre_desc_ */