| /*---------------------------------------------------------------------------* |
| * swicms.h * |
| * * |
| * Copyright 2007, 2008 Nuance Communciations, Inc. * |
| * * |
| * Licensed under the Apache License, Version 2.0 (the 'License'); * |
| * you may not use this file except in compliance with the License. * |
| * * |
| * You may obtain a copy of the License at * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, software * |
| * distributed under the License is distributed on an 'AS IS' BASIS, * |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * |
| * See the License for the specific language governing permissions and * |
| * limitations under the License. * |
| * * |
| *---------------------------------------------------------------------------*/ |
| |
| #ifndef __SWICMS_H__ |
| #define __SWICMS_H__ |
| |
| #include"all_defs.h" |
| #include"sizes.h" |
| #include"fronttyp.h" |
| #include"pre_desc.h" |
| |
| #define DEBUG_SWICMS 0 |
| #define MAX_CACHED_FRAMES 800 |
| #define SWICMS_CACHE_RESOLUTION_DEFAULT 8 |
| #define SWICMS_CACHE_SIZE_DEFAULT 100 /* equals #frames/resolution */ |
| |
| /** |
| * This is used for casting in debugger, just type (imelvec*)tmn. |
| */ |
| typedef struct |
| { |
| imeldata vec[MAX_CHAN_DIM]; |
| } |
| imelvec; |
| |
| /** |
| * Does channel normalization without using fine recognition segmenation. It remembers the |
| * frames of speech and uses that as a channel mean for the next utterance. A forget_factor |
| * is used to weigh the new speech mean estimate with an older one. |
| */ |
| typedef struct |
| { |
| imeldata tmn [MAX_CHAN_DIM]; /* target mean */ |
| imeldata cmn [MAX_CHAN_DIM]; /* channel mean */ |
| |
| imeldata lda_tmn [MAX_CHAN_DIM]; /* target mean */ |
| imeldata lda_cmn [MAX_CHAN_DIM]; /* channel mean */ |
| |
| imeldata adjust[MAX_CHAN_DIM]; /* target less channel */ |
| |
| int is_valid; |
| int forget_factor; /* in frames, mass of cmn average */ |
| int sbindex; /* speech to background index |
| 100 -> use only speech to calculate CMN |
| 000 -> use only background to calculate CMN |
| 050 -> use half/half .. |
| all numbers in between are acceptable */ |
| |
| int num_frames_in_cmn; /* num frames used to estimate cmn (or lda_cmn) */ |
| |
| /* for in-utterance channel normalization */ |
| struct { |
| int forget_factor2; /* cmn is given this weight to start off */ |
| int disable_after; /* we disable in-utt cms after this many fr*/ |
| int enable_after; /* we enable in-utt cms after this many fr*/ |
| int num_bou_frames_to_skip; /* don't start accum 'til this many frames */ |
| int num_frames_since_bou; /* counter for above, bou=begin-of-utt */ |
| int num_frames_in_accum; /* number of frames in accum */ |
| imeldata accum[MAX_CHAN_DIM]; /* accumulates frames of the current utt */ |
| } inutt; |
| |
| int cached_num_frames; /* we cache frames, until recognition is done |
| and can calculate speech mean from these */ |
| int cache_resolution; /* we'll avg this many frames per section */ |
| imeldata cached_sections[SWICMS_CACHE_SIZE_DEFAULT][MAX_CHAN_DIM]; |
| /*const*/ preprocessed* _prep; |
| } |
| swicms_norm_info; |
| |
| int swicms_init(swicms_norm_info* swicms); |
| int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen); |
| int apply_channel_normalization_in_swicms(swicms_norm_info *swicms, |
| imeldata* oframe, imeldata* iframe, |
| int dimen); |
| int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep); |
| |
| int swicms_update(swicms_norm_info* swicms, int speech_start_frame, int speech_end_frame); |
| |
| ESR_ReturnCode swicms_set_cmn(swicms_norm_info *swicms, const LCHAR *new_cmn_params ); |
| ESR_ReturnCode swicms_get_cmn(swicms_norm_info *swicms, LCHAR *cmn_params, size_t* len ); |
| |
| #if DEBUG_SWICMS |
| int swicms_compare(swicms_norm_info* swicms, imeldata* imelda_adjust); |
| int swicms_dump_stats(swicms_norm_info* swicms); |
| #else |
| #define swicms_compare(swicms,ia) |
| #define swicms_dump_stats(swicms) |
| #endif |
| |
| #endif |
| |