blob: e9e9037abb2aabb055d363946f6ae22c71acfa1d [file] [log] [blame]
/*---------------------------------------------------------------------------*
* srec.c *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "pstdio.h"
#include "passert.h"
#include "portable.h"
#include "hmm_desc.h"
#include "utteranc.h"
#include "hmmlib.h"
#include "srec_sizes.h"
#include "search_network.h"
#include "srec_context.h"
#include "srec.h"
#include "srec_stats.h"
#include "srec_debug.h"
#include "srec_tokens.h"
#include "word_lattice.h"
#include "swimodel.h"
#if USE_COMP_STATS
#include "comp_stats.h"
#endif
#include "c42mul.h"
#ifdef SET_RCSID
static const char *rcsid = 0 ? (const char *) &rcsid :
"$Id: srec.c,v 1.39.4.31 2008/06/23 17:20:39 dahan Exp $";
#endif
#define SILENCE_MODEL_INDEX 0
#define PRUNE_TIGHTEN 0.9 /*if we run out of room in the state arrays,
keep multiplying pruning thresh by this amount
until there is room */
/*--------------------------------------------------------------------------*
* *
* protos *
* *
*--------------------------------------------------------------------------*/
static void reset_cost_offsets(multi_srec* rec, frameID current_search_frame,
costdata current_best_cost);
static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
costdata *pcurrent_best_cost,
costdata *precomputed_model_scores);
/*--------------------------------------------------------------------------*
* *
* utils *
* *
*--------------------------------------------------------------------------*/
static void dump_core(char *msg)
{
PLogError ( msg );
ASSERT(0);
}
static altword_token* reprune_altword_token_batch(srec* rec,
altword_token* batch,
bigcostdata costlimit)
{
altword_token *awtoken, *next_awtoken;
altword_token **awtokenp;
/* look for previous invalidate, see below */
if (batch->costbasis == MAXcostdata / 2)
{ /* was costdelta // costlimit */
free_altword_token(rec, batch);
return AWTNULL;
}
/* a flag to check whether we already pruned this batch would be nice */
/* first prune the CDR of the list, ie everything but the head */
awtokenp = &batch->next_token;
for (awtoken = batch->next_token; awtoken != AWTNULL; awtoken = next_awtoken)
{
next_awtoken = awtoken->next_token;
if ((bigcostdata)batch->costbasis + awtoken->costdelta > costlimit)
{
(*awtokenp) = awtoken->next_token;
awtoken->refcount = 1; /* to make sure it frees */
free_altword_token(rec, awtoken);
}
else
awtokenp = &awtoken->next_token;
}
/* now see about the CAR of the list, ie the head */
if ((bigcostdata)(batch->costbasis) + batch->costdelta < costlimit)
{
/* head of list survives pruning => no problem */
}
else if (batch->next_token != AWTNULL)
{
/* head of list does not survive => since we can't change the pointer
we copy a survivor into it and free that original */
awtoken = batch->next_token;
batch->costdelta = awtoken->costdelta;
batch->word = awtoken->word;
batch->word_backtrace = awtoken->word_backtrace;
/*ASSERT( batch->refcount == awtoken->refcount); */
/* batch->refcount = awtoken->refcount; */
batch->next_token = awtoken->next_token;
awtoken->refcount = 1; /* to make sure it frees */
free_altword_token(rec, awtoken);
}
else
{
/* head of list does not survive, nothing survives, so invalidate it */
batch->costbasis = MAXcostdata / 2; /* was costdelta */
free_altword_token(rec, batch);
batch = AWTNULL;
}
return batch;
}
static void reprune_altword_tokens(srec* rec)
{
stokenID i, j;
fsmarc_token* stoken;
fsmnode_token* ftoken;
bigcostdata current_prune_delta = rec->prune_delta;
altword_token* awtoken;
/* first clear the costbases */
for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
{
stoken = &rec->fsmarc_token_array[i];
for (j = 0; j < stoken->num_hmm_states; j++)
if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
awtoken->costbasis = MAXcostdata;
}
for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
{
ftoken = &rec->fsmnode_token_array[i];
if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
awtoken->costbasis = MAXcostdata;
}
/* costbases for altword attached to stoken */
for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
{
stoken = &rec->fsmarc_token_array[i];
for (j = 0; j < stoken->num_hmm_states; j++)
if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
if (awtoken->costbasis > stoken->cost[j])
awtoken->costbasis = stoken->cost[j];
}
/* costbases for altword attached to ftokens */
for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
{
ftoken = &rec->fsmnode_token_array[i];
if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
if (awtoken->costbasis > ftoken->cost)
awtoken->costbasis = ftoken->cost;
}
/* now reprune */
while (rec->altword_token_freelist_len < rec->altword_token_array_size / 4
|| rec->altword_token_freelist_len < 2*rec->word_priority_q->max_in_q)
{
SREC_STATS_INC_AWTOKEN_REPRUNES(1);
current_prune_delta = (costdata)(PRUNE_TIGHTEN * PRUNE_TIGHTEN * current_prune_delta);
for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
{
stoken = &rec->fsmarc_token_array[i];
for (j = 0; j < stoken->num_hmm_states; j++)
{
if (stoken->aword_backtrace[j] != AWTNULL)
{
stoken->aword_backtrace[j] =
reprune_altword_token_batch(rec, stoken->aword_backtrace[j],
current_prune_delta);
}
}
}
for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
{
ftoken = &rec->fsmnode_token_array[i];
if (ftoken->aword_backtrace != AWTNULL)
{
ftoken->aword_backtrace =
reprune_altword_token_batch(rec, ftoken->aword_backtrace,
current_prune_delta);
}
}
ASSERT(current_prune_delta > 0);
}
}
#define refcopy_altwords(rEc, aWtOkEn) (aWtOkEn?(aWtOkEn->refcount++,aWtOkEn):aWtOkEn)
static altword_token* copy_altwords(srec* rec, altword_token* list1, costdata delta)
{
altword_token *q2, dummy, *last_q2 = &dummy;
costdata q2_costdelta;
/* first check for space */
#if BUILD & BUILD_DEBUG
int num = 0;
for (num = 0, q2 = list1; q2 != AWTNULL; q2 = q2->next_token)
num++;
if (num > rec->altword_token_freelist_len)
{
printf("warning: mid-copy reprune_altword_tokens()\n");
ASSERT(0);
reprune_altword_tokens(rec);
}
#endif
/* now do the copy */
for (; list1 != AWTNULL; list1 = list1->next_token)
{
ASSERT(list1->refcount >= 1);
q2_costdelta = list1->costdelta + delta;
ASSERT(list1->costdelta != MAXcostdata);
if (q2_costdelta > rec->prune_delta)
continue;
q2 = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
if (!q2) /* this should never happen */
break;
last_q2->next_token = q2;
q2->costdelta = q2_costdelta;
q2->word = list1->word;
q2->word_backtrace = list1->word_backtrace;
last_q2 = q2;
}
last_q2->next_token = AWTNULL;
return dummy.next_token;
}
#if 1
/* sizewise_altwords just makes sure the list of altwords is no longer than
the number of possible word ends. Any tokens beyond that length will get
ignored later anyways, so we may as well kill them here.
This also is related to the anticipated repruning. This code currently
makes use of calloc/free and qsort, but can easily be rewritten to just
to a linear in-place sort, linear looking for the 10 best score should not
take too long. This is on the todo list!
*/
int altword_token_ptr_cmp(const void* a, const void* b)
{
const altword_token** A = (const altword_token**)a, **B = (const altword_token**)b;
if ((*A)->costdelta > (*B)->costdelta) return 1;
else if ((*A)->costdelta < (*B)->costdelta) return -1;
else return 0;
}
static altword_token* sizewise_altwords(srec* rec, altword_token* awtoken_head)
{
#define SIZEWISE_THRESH (rec->word_priority_q->max_in_q)
#define SIZEWISE_TARGET (rec->word_priority_q->max_in_q*4/5)
int i, num;
altword_token *awtoken, **list;
if( SIZEWISE_TARGET == 0) {
free_altword_token(rec, awtoken_head);
return NULL;
}
num = count_altword_token(rec, awtoken_head);
/* if the linked list is shorter than max_in_q we're fine */
if (num <= SIZEWISE_THRESH)
return awtoken_head;
else
{
list = (altword_token**)CALLOC(num, sizeof(altword_token*), L("search.srec.altword_tokens"));
ASSERT(awtoken_head->refcount == 1);
for (i = 0, awtoken = awtoken_head; i < num; i++, awtoken = awtoken->next_token)
list[i] = awtoken;
qsort(list, num, sizeof(altword_token*), altword_token_ptr_cmp);
for (i = 0; i < SIZEWISE_TARGET; i++)
list[i]->next_token = list[i+1];
if(i>0) list[i-1]->next_token = AWTNULL;
for (; i < num; i++)
{
list[i]->refcount = 1; /* make sure it frees */
free_altword_token(rec, list[i]);
}
awtoken_head = list[0];
awtoken_head->refcount = 1;
FREE(list);
return awtoken_head;
}
}
#else
#define sizewise_altwords(ReC,hEad) hEad
#endif
/*--------------------------------------------------------------------------*
* *
* acoustic scoring utils *
* *
*--------------------------------------------------------------------------*/
#define DO_COMPUTE_MODEL 0
#define DO_NOT_COMPUTE_MODEL MAXcostdata
static asr_uint16_t best_uint16(asr_uint16_t* p, int n)
{
asr_uint16_t rv = p[0];
for (;--n > 0;p++) if (rv > *p) rv = *p;
return rv;
}
static int compute_model_scores(costdata *current_model_scores, const SWIModel *acoustic_models,
pattern_info *pattern, frameID current_search_frame)
{
int i;
int num_models_computed = 0;
for (i = 0; i < acoustic_models->num_hmmstates; i++)
{
if (current_model_scores[i] == DO_COMPUTE_MODEL)
{
scodata score = mixture_diagonal_gaussian_swimodel(pattern->prep,
&acoustic_models->hmmstates[i], acoustic_models->num_dims);
ASSERT(score <= 0 && "model score out of range");
current_model_scores[i] = (costdata) - score;
num_models_computed++;
}
}
return num_models_computed;
}
/*precompute all needed models to be used by next frame of search*/
static int find_which_models_to_compute(srec *rec, const SWIModel *acoustic_models)
{
int i;
modelID model_index;
stokenID current_token_index;
ftokenID current_ftoken_index;
fsmarc_token *current_token;
fsmnode_token *current_ftoken;
costdata *current_model_scores;
/* arcID arc_index; */
arcID fsm_arc_index;
HMMInfo* hmm_info;
FSMnode* fsm_node;
FSMarc* fsm_arc;
/*use the current_model_scores array both to tell the model computing stuff
what models to compute and to get the scores back. This is a bit ugly, but
saves having another array to allocate*/
/* this belongs elsewhere at initialization,
eg. where we'll associate search to acoustic models
*/
rec->avg_state_durations = acoustic_models->avg_state_durations;
current_model_scores = rec->current_model_scores;
for (model_index = 0; model_index < acoustic_models->num_hmmstates; model_index++)
{
current_model_scores[model_index] = DO_NOT_COMPUTE_MODEL;
}
current_token_index = rec->active_fsmarc_tokens;
while (current_token_index != MAXstokenID)
{
current_token = &(rec->fsmarc_token_array[current_token_index]);
/*need to compute all models needed within this HMM*/
fsm_arc = &rec->context->FSMarc_list[ current_token->FSMarc_index];
hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
/*handle all states that are alive in this hmm*/
for (i = 0;i < hmm_info->num_states;i++)
{
if ((current_token->cost[i] != MAXcostdata) ||
((i > 0) && current_token->cost[i-1] != MAXcostdata))
{
model_index = hmm_info->state_indices[i];
current_model_scores[model_index] = DO_COMPUTE_MODEL;
}
}
current_token_index = current_token->next_token_index;
}
/*for each active FSM node, find models which can come from node*/
current_ftoken_index = rec->active_fsmnode_tokens;
while (current_ftoken_index != MAXftokenID)
{
current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
fsm_arc = NULL;
for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; fsm_arc_index != MAXarcID;
fsm_arc_index = fsm_arc->linkl_next_arc) {
fsm_arc = rec->context->FSMarc_list+fsm_arc_index;
if (fsm_arc->ilabel != MAXlabelID)
{
hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
if (hmm_info->num_states > 0)
{
/* we should build in here a check that this arc has reasonable weight */
/* if(fsm_arc->cost < rec->prune_delta) */
current_model_scores[hmm_info->state_indices[0]] = DO_COMPUTE_MODEL;
}
}
}
current_ftoken_index = current_ftoken->next_token_index;
}
/*compute the scores in a batch - this allows the model computing code to
chunk it up however it wants*/
return 0;
}
/*--------------------------------------------------------------------------*
* *
* pruning utils *
* *
*--------------------------------------------------------------------------*/
/*this is called at the end of the search*/
static int prune_new_tokens(srec *rec, costdata current_prune_thresh)
{
int i;
nodeID num_deleted;
stokenID token_index;
fsmarc_token *token;
stokenID next_token_index;
stokenID *list_head_pointer;
char any_alive;
num_deleted = 0;
list_head_pointer = &(rec->active_fsmarc_tokens);
for (token_index = rec->active_fsmarc_tokens; token_index != MAXstokenID;
token_index = next_token_index)
{
token = &(rec->fsmarc_token_array[token_index]);
next_token_index = token->next_token_index;
any_alive = 0;
for (i = 0;i < token->num_hmm_states;i++)
{
if (token->cost[i] < current_prune_thresh)
{
any_alive = 1;
}
}
if (!any_alive)
{ /*everything pruned so recylce the token*/
*list_head_pointer = next_token_index;
rec->best_token_for_arc[rec->fsmarc_token_array[token_index].FSMarc_index] = MAXstokenID;
free_fsmarc_token(rec, token_index);
num_deleted++;
rec->num_new_states--;
}
else
{
list_head_pointer = &token->next_token_index;
}
}
return num_deleted;
}
static void reprune_word_tokens_if_necessary(srec *rec)
{
word_token* wtoken;
wtokenID wtoken_index = rec->word_token_freelist;
wtokenID num_free_wtokens = 0;
for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
{
wtoken = &rec->word_token_array[ wtoken_index];
num_free_wtokens++;
}
if (num_free_wtokens < 2*rec->word_priority_q->max_in_q)
reprune_word_tokens(rec, 0);
}
/*this is called when we run out of room in the state token arrays and need to make more room -
it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
static costdata reprune_new_states(srec *rec, costdata current_best_cost, costdata current_prune_delta)
{
int num_deleted;
/*first check to see if current pruning thresholds make enough room
(because of better max)*/
prune_new_tokens(rec, current_best_cost + current_prune_delta);
ASSERT(((float)current_best_cost) + current_prune_delta < (float)SHRT_MAX);
while ((rec->num_new_states >= rec->max_new_states - 1)
|| rec->fsmarc_token_freelist == MAXstokenID)
{
SREC_STATS_INC_STOKEN_REPRUNES(1);
current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
if (current_prune_delta <= 1)
{
/*this seems like an unlikely case, but if we can't prune enough to make room, give up
on the utterance (better than being stuck here forever!)*/
/*FIX replace with crec abort mechanism*/
PLogError ("reprune_new_states: can't seem to prune enough - best cost %d num_new_states %d\n",
current_best_cost, rec->num_new_states);
print_fsmarc_token_list(rec, rec->active_fsmarc_tokens, "CANNOT PRUNE");
dump_core("reprune died\n");
}
num_deleted = prune_new_tokens(rec, current_best_cost + current_prune_delta);
ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
}
return current_prune_delta;
}
static void prune_fsmnode_tokens(srec *rec, costdata current_prune_thresh, ftokenID not_this_one)
{
nodeID num_deleted;
ftokenID token_index;
fsmnode_token *token;
ftokenID next_token_index;
ftokenID *list_head_pointer;
num_deleted = 0;
token_index = rec->active_fsmnode_tokens;
token = &(rec->fsmnode_token_array[token_index]);
list_head_pointer = &(rec->active_fsmnode_tokens);
while (token_index != MAXftokenID)
{
next_token_index = token->next_token_index;
if( token_index!=not_this_one && token->cost >= current_prune_thresh)
{
/*pruned so recycle the token*/
*list_head_pointer = next_token_index;
rec->best_token_for_node[token->FSMnode_index] = MAXftokenID;
free_fsmnode_token(rec, token_index);
num_deleted++;
}
else
{
list_head_pointer = &token->next_token_index;
}
token_index = next_token_index;
token = &(rec->fsmnode_token_array[token_index]);
}
}
/*this is called when we run out of room in the state token arrays and need to make more room -
it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
static costdata reprune_fsmnode_tokens(srec *rec, costdata current_best_cost, costdata current_prune_delta,
ftokenID not_this_one)
{
/*first check to see if current pruning thresholds make enough
room (because of better max)*/
prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
ASSERT((float)current_best_cost + (float)current_prune_delta < (float)SHRT_MAX);
while (rec->fsmnode_token_freelist == MAXftokenID)
{
SREC_STATS_INC_FTOKEN_REPRUNES(1);
current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
if (current_prune_delta <= 1)
{
/*this seems like an unlikely case, but if we can't prune enough to make room, give up
on the utterance (better than being stuck here forever!)*/
/*FIX replace with crec abort mechanism*/
PLogError ("reprune_fsmnode_tokens: can't seem to prune enough - best cost %d\n",
current_best_cost);
print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "CANNOT PRUNE: ");
dump_core("reprune_fsmnode_tokens() died\n");
}
prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
ASSERT((float)current_best_cost + (float)current_prune_delta < (float)USHRT_MAX);
}
return current_prune_delta;
}
void reset_best_cost_to_zero(srec* rec, costdata current_best_cost)
{
fsmarc_token* stoken;
stokenID token_index;
short i, num_states;
/*do the state tokens*/
for (token_index = rec->active_fsmarc_tokens;
token_index != MAXstokenID;
token_index = stoken->next_token_index)
{
stoken = &rec->fsmarc_token_array[ token_index];
num_states = stoken->num_hmm_states;
for (i = 0; i < num_states; i++)
{
if (stoken->cost[i] < MAXcostdata)
{
ASSERT(stoken->cost[i] >= current_best_cost);
stoken->cost[i] = (costdata)(stoken->cost[i] - (costdata) current_best_cost);
}
}
}
}
static void reset_cost_offsets(multi_srec* rec, frameID current_frame,
costdata current_best_cost)
{
rec->cost_offset_for_frame[current_frame] = current_best_cost;
if (current_frame == 0)
rec->accumulated_cost_offset[current_frame] = current_best_cost;
else
rec->accumulated_cost_offset[current_frame] = rec->accumulated_cost_offset[current_frame-1] + current_best_cost;
}
/*--------------------------------------------------------------------------*
* *
* word_token functions *
* *
*--------------------------------------------------------------------------*/
/*this function allocates a new word token and remembers it in a list in the srec structure (to be used for backtrace later on*/
static wtokenID create_word_token(srec *rec)
{
wtokenID word_token_index;
word_token *word_token;
word_token_index = get_free_word_token(rec, NULL_IF_NO_TOKENS);
if (0 && word_token_index == MAXwtokenID)
{
/*FIX - use crec error handling*/
dump_core("create_word_token: cannot allocate word token - we need"
" to figure out a word pruning strategy when this happens!\n");
}
word_token = &(rec->word_token_array[word_token_index]);
return word_token_index;
}
/* gets rid of fsmnode which trace back to this word since
the word is not goingto make it ono the word lattice */
static int block_fsmnodes_per_backtrace(srec *rec, wtokenID wtoken_id)
{
int num_ftokens_blocked = 0;
ftokenID current_token_index = rec->active_fsmnode_tokens;
while (current_token_index != MAXftokenID) {
fsmnode_token *token = &(rec->fsmnode_token_array[current_token_index]);
if (token->word_backtrace == wtoken_id) {
num_ftokens_blocked++;
token->cost = MAXcostdata;
}
current_token_index = token->next_token_index;
}
return num_ftokens_blocked;
}
/* processing a word boundary,
current_token is the fsmnode_token to the left of the boundary
cost is the cost through this frame
pcurrent_word_threshold is the worst score for words on the prio.q
returns the word_token index just created
*/
static wtokenID srec_process_word_boundary_nbest(srec* rec,
nodeID end_node,
wordID word,
wtokenID word_backtrace,
costdata cost,
costdata* pcurrent_word_threshold,
int *any_nodes_blocked)
{
wtokenID wtoken_index;
wtokenID return_wtoken_index;
wtokenID token_id_to_remove;
word_token* wtoken;
if (word_backtrace != MAXwtokenID)
{
word_token* btoken = &rec->word_token_array[word_backtrace];
if (btoken->end_time >= rec->current_search_frame)
{
SREC_STATS_INC_BAD_BACKTRACES();
return MAXwtokenID;
}
}
/*make new word token*/
wtoken_index = create_word_token(rec);
//ASSERT(wtoken_index != MAXwtokenID);
if (wtoken_index == MAXwtokenID)
{
/* we could have called reprune_word_tokens() here, but we instead called it
at the beginning of do_epsilon_updates() to avoid complications of
re-pruning word tokens too deep in the search update */
return_wtoken_index = MAXwtokenID;
return return_wtoken_index;
}
wtoken = &(rec->word_token_array[wtoken_index]);
wtoken->word = word;
wtoken->_word_end_time = 0; // new
wtoken->end_time = rec->current_search_frame;
wtoken->end_node = end_node;
wtoken->backtrace = word_backtrace;
wtoken->cost = cost;
token_id_to_remove = add_word_token_to_priority_q(rec->word_priority_q, wtoken_index, rec->word_token_array);
if (token_id_to_remove == wtoken_index)
return_wtoken_index = MAXwtokenID;
else
{
/* the word was truly added to the priority q, so we must
get the new worst word on that list */
*pcurrent_word_threshold = get_priority_q_threshold(rec->word_priority_q, rec->word_token_array);
return_wtoken_index = wtoken_index;
}
if (token_id_to_remove != MAXwtokenID)
{
/* Jean: must allow for this word_token to be recycled */
/* ok, the word won't we maintained, so there's no point to
continue to search this path (as headed by this fsmarc_token) */
*any_nodes_blocked += block_fsmnodes_per_backtrace( rec, token_id_to_remove);
/* we killed the fsmnode token associated with the word being removed.
But, we didn't kill it's word backtrace, so there may be word tokens
in the backtrace which cannot connect. But we can't really kill
the whole backtrace since it might be shared with other
active states, Mike's idea is to add a counter on the
word_tokens, which counts how many active paths are using
this word_token ... TODO */
/* print_word_token(rec, token_id_to_remove, "srec_process_word_boundary killed wtoken "); */
free_word_token(rec, token_id_to_remove);
}
return return_wtoken_index;
}
ftokenID* srec_get_parent_for_active_fsmnode(srec* rec, ftokenID ftoken_index)
{
ftokenID* parent_ftoken_index = &rec->active_fsmnode_tokens;
while( (*parent_ftoken_index) != MAXftokenID) {
fsmnode_token* parent_ftoken = &rec->fsmnode_token_array[ *parent_ftoken_index];
if( *parent_ftoken_index == ftoken_index)
return parent_ftoken_index;
parent_ftoken_index = &parent_ftoken->next_token_index;
}
return NULL;
}
/*---------------------------------------------------------------------------*
* *
* updates *
* *
*---------------------------------------------------------------------------*/
/*handles epsilon transitions (used for word boundaries). Epsilons come from active
fsmnode tokens and maximize into new FSMnode tokens (finds the best path to an FSM
node).
When we hit an
epsilon, create a word token, put it in the path, and remember it in a
list of all word tokens*/
static int do_epsilon_updates(srec *rec, costdata prune_delta,
costdata best_cost)
{
fsmnode_token *new_ftoken;
fsmnode_token *current_ftoken;
wtokenID wtoken_index;
FSMnode* fsm_node;
FSMarc* fsm_arc;
costdata cost, cost_with_wtw;
ftokenID new_ftoken_index;
ftokenID current_ftoken_index;
costdata current_word_threshold;
arcID fsm_arc_index;
wordID word_with_wtw;
int num_fsm_nodes_updated=0, num_fsm_nodes_blocked, num_fsm_nodes_blocked2;
int num_wtokens_maybe_homonyms;
costdata current_prune_delta;
costdata current_prune_thresh;
altword_token* awtoken;
// printf("FRAME %d\n", rec->current_search_frame);
// print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "BEFORE UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
current_word_threshold = MAXcostdata;
current_prune_delta = prune_delta;
current_prune_thresh = best_cost + current_prune_delta;
current_ftoken_index = rec->active_fsmnode_tokens;
while (current_ftoken_index != MAXftokenID)
{
// print_fsmnode_token(rec, current_ftoken_index, "processing ... ");
current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
cost = current_ftoken->cost; /*get last state of token*/
fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
fsm_arc = NULL;
/* Jean: see below too, let's remember the wtoken_index we create in
case we need to re-use it. All N epsilon updates, and all M
M outgoing arcs can share, cuz this is the same arriving arc@frame */
wtoken_index = MAXwtokenID;
if( cost >= current_prune_thresh) {
ftokenID* parent_ftoken_index;
// the srec_get_parent_for_active_fsmnode() functions can be
// gotten rid of if we use a doubly-linked list (fwd/bwd ptrs)
parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, current_ftoken_index);
if(!parent_ftoken_index) {
PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, current_ftoken_index);
print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
exit(1);
}
*parent_ftoken_index = current_ftoken->next_token_index;
// effectively release this fsmnode_token and go to next one
rec->best_token_for_node[ current_ftoken->FSMnode_index] = MAXftokenID;
free_fsmnode_token( rec, current_ftoken_index);
current_ftoken_index = *parent_ftoken_index;
continue;
}
num_fsm_nodes_updated++;
num_fsm_nodes_blocked = 0;
num_wtokens_maybe_homonyms = 0;
for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
fsm_arc_index != MAXarcID;
fsm_arc_index = fsm_arc->linkl_next_arc)
{
fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
/* consider only epsilon transitions */
if (fsm_arc->ilabel >= EPSILON_OFFSET)
continue;
/* can't loop to yourself on epsilon! */
ASSERT(fsm_arc->to_node != current_ftoken->FSMnode_index);
cost_with_wtw = current_ftoken->cost + fsm_arc->cost; /* WTW */
word_with_wtw = current_ftoken->word;
if(fsm_arc->olabel != WORD_EPSILON_LABEL)
word_with_wtw = fsm_arc->olabel ;
// we should compare cost_with_wtw but let's let the priority_q
// do the pruning
if (cost>=current_prune_thresh || fsm_arc->cost>=current_prune_thresh)
continue;
/*if word boundary, see if it crosses the word end threshold*/
/* no room on the word priority_q, so not worth pursuing */
if (fsm_arc->ilabel == WORD_BOUNDARY && cost_with_wtw >= current_word_threshold) {
continue; // goto NEXT_FTOKEN;
}
new_ftoken = NULL;
new_ftoken_index = rec->best_token_for_node[ fsm_arc->to_node];
if(new_ftoken_index != MAXftokenID)
new_ftoken = &rec->fsmnode_token_array[ new_ftoken_index];
if( new_ftoken && (current_ftoken->cost+fsm_arc->cost)<new_ftoken->cost) {
/* clobber it */
} else if(new_ftoken) {
/* merge it */
} else if(rec->fsmnode_token_freelist == MAXftokenID) {
/* create it? maybe */
current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, current_ftoken_index);
current_prune_thresh = best_cost + current_prune_delta;
}
if (fsm_arc->ilabel == WORD_BOUNDARY) {
/* 20030920, for sure the backtrace will change! */
// token->word_backtrace = MAXwtokenID;
wtoken_index = srec_process_word_boundary_nbest(rec,
current_ftoken->FSMnode_index,
word_with_wtw,
current_ftoken->word_backtrace,
cost_with_wtw,
&current_word_threshold,
&num_fsm_nodes_blocked);
if (wtoken_index != MAXwtokenID) {
WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+wtoken_index),
rec->word_token_array[wtoken_index].end_time - current_ftoken->silence_duration);
}
if( fsm_arc->olabel!=WORD_EPSILON_LABEL && wtoken_index != MAXwtokenID) {
num_wtokens_maybe_homonyms++;
if( num_wtokens_maybe_homonyms>1)
WORD_TOKEN_SET_HOMONYM( (rec->word_token_array+wtoken_index), 1);
}
/* now drop alternative words, note the use of current_token
because token is on the other side already */
if (current_ftoken->aword_backtrace != AWTNULL) {
awtoken = current_ftoken->aword_backtrace;
for (; awtoken != AWTNULL; awtoken = awtoken->next_token) {
wtokenID wti;
wti = srec_process_word_boundary_nbest(rec,
current_ftoken->FSMnode_index,
awtoken->word,
awtoken->word_backtrace,
cost_with_wtw + awtoken->costdelta,
&current_word_threshold,
&num_fsm_nodes_blocked2);
}
/* if we don't free the altwords here, i had thought that
updates from stateN would make the altwords grow and grow,
but by that time all the fsmnodes are brand new */
/* leaving them alive allows them to propagate to state0 thru
other epsilons (eg non .wb) to new nodes but we don't
use such arcs.
the .wb would not get dropped again 'cuz we check
for that in wtoken_index above.
this is quite complex and the case for dropping word_tokens
from the node AFTER the .wb can be made
ie. would need a re-write of do_epsilon_updates() */
if( current_ftoken->aword_backtrace != AWTNULL)
free_altword_token_batch(rec, current_ftoken->aword_backtrace);
current_ftoken->aword_backtrace = AWTNULL;
/*print_fsmnode_token(rec, token-rec->fsmnode_token_array, "123a");*/
}
if( wtoken_index != MAXwtokenID) {
if( new_ftoken == NULL) {
/* create token for the other side */
new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
// this should not happen because of the above check near
// fsmnode_token_freelist == MAXftokenID
ASSERT(new_ftoken_index != MAXftokenID);
new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
new_ftoken->word_backtrace = wtoken_index;
new_ftoken->cost = cost_with_wtw;
new_ftoken->word = WORD_EPSILON_LABEL;
new_ftoken->FSMnode_index = fsm_arc->to_node;
new_ftoken->aword_backtrace = AWTNULL;
new_ftoken->next_token_index = current_ftoken->next_token_index;
current_ftoken->next_token_index = new_ftoken_index;
rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
} else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
/* update token on the other side */
ftokenID *parent_ftoken_index;
new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
new_ftoken->cost = cost_with_wtw;
new_ftoken->word_backtrace = wtoken_index;
new_ftoken->word = WORD_EPSILON_LABEL;
// unchanged token->FSMnode_index = fsm_arc->to_node;
// because this token was updated, we need to reprocess it, right after
parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
if(!parent_ftoken_index) {
PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
exit(1);
}
*parent_ftoken_index = new_ftoken->next_token_index;
new_ftoken->next_token_index = current_ftoken->next_token_index;
current_ftoken->next_token_index = new_ftoken_index;
rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
/* new_ftoken->aword_backtrace must be null, alts here were
processed and dropped in srec_process_word_boundary_nbest() */
if(new_ftoken->aword_backtrace != AWTNULL) {
PLogError( ("Error: internal search error near %s %d\n"), __FILE__, __LINE__);
continue;
}
} else {
/* token on other side is same or better, just leave it */
}
}
}
else if(fsm_arc->ilabel == EPSILON_LABEL) {
if( new_ftoken == NULL) {
/* create token for the other side */
new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
// this should not happen because of the above check near
// fsmnode_token_freelist == MAXftokenID
ASSERT(new_ftoken_index != MAXftokenID);
new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
new_ftoken->word_backtrace = current_ftoken->word_backtrace;
new_ftoken->cost = cost_with_wtw;
new_ftoken->word = word_with_wtw;
new_ftoken->FSMnode_index = fsm_arc->to_node;
new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
new_ftoken->next_token_index = current_ftoken->next_token_index;
current_ftoken->next_token_index = new_ftoken_index;
rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
} else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
/* update token on the other side */
ftokenID *parent_ftoken_index;
new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
new_ftoken->cost = cost_with_wtw;
new_ftoken->word_backtrace = current_ftoken->word_backtrace;
new_ftoken->word = word_with_wtw;
/* here we are giving up the path and alternatives that existed at
this node, which is not great! The new (better) top choice
coming in and it's alternatives are propagated instead.
TODO: merge the alternative lists and the previous top choice
*/
if(new_ftoken->aword_backtrace!=AWTNULL)
free_altword_token_batch( rec, new_ftoken->aword_backtrace);
new_ftoken->aword_backtrace = AWTNULL;
new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
// unchanged token->FSMnode_index = fsm_arc->to_node;
// because this token was updated, we need to re-process it, right after
parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
if(!parent_ftoken_index) {
PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
exit(1);
}
*parent_ftoken_index = new_ftoken->next_token_index;
new_ftoken->next_token_index = current_ftoken->next_token_index;
current_ftoken->next_token_index = new_ftoken_index;
rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
} else {
/* token on other side is same or better, just leave it */
/* todo: maybe merge alternative lists? */
}
}
} /* loop over arcs */
ASSERT( current_ftoken->cost != MAXcostdata);
if( num_fsm_nodes_blocked) {
/* we do not want to propagate fsm node tokens for paths that have
just been killed on the basis of no space for word propagation */
prune_fsmnode_tokens(rec, MAXcostdata/2, current_ftoken_index);
}
// NEXT_FTOKEN:
current_ftoken_index = current_ftoken->next_token_index;
}
// print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "AFTER UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
sanity_check_altwords(rec, rec->altword_token_freelist);
return num_fsm_nodes_updated;
}
static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
costdata *pcurrent_best_cost,
costdata *precomputed_model_scores)
{
stokenID current_token_index;
fsmarc_token *current_token;
costdata current_best_cost;
costdata current_prune_thresh;
costdata current_prune_delta;
costdata model_cost;
asr_int16_t any_alive;
HMMInfo *hmm_info;
modelID model_index;
asr_int16_t internal_state, end_state;
arcID fsm_arc_index;
FSMarc* fsm_arc;
costdata prev_cost;
costdata self_loop_cost;
current_best_cost = *pcurrent_best_cost;
current_prune_delta = *pcurrent_prune_delta;
current_prune_thresh = current_best_cost + current_prune_delta;
ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
/* best_token_for_arc must be reset here, cuz the same array might have
been used by another gender. Alternatively we could have let each
recog use it's own array thereby save cpu at expense of memory */
for (fsm_arc_index = 0; fsm_arc_index < rec->context->num_arcs; fsm_arc_index++)
rec->best_token_for_arc[fsm_arc_index] = MAXstokenID;
current_token_index = rec->active_fsmarc_tokens;
while (current_token_index != MAXstokenID)
{
current_token = &(rec->fsmarc_token_array[current_token_index]);
fsm_arc_index = current_token->FSMarc_index;
fsm_arc = &rec->context->FSMarc_list[fsm_arc_index];
/* best_token_for_arc must be set here, cuz it was reset above */
rec->best_token_for_arc[fsm_arc_index] = current_token_index;
hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
any_alive = 0;
end_state = current_token->num_hmm_states - 1;
for (internal_state = end_state; internal_state >= 0; internal_state--)
{
model_index = hmm_info->state_indices[internal_state];
model_cost = precomputed_model_scores[model_index];
/*better to come from previous or self?*/
if (internal_state > 0)
{
prev_cost = current_token->cost[internal_state-1];
/* a duration model can be applied here,
by changing the prev_cost according to some function of
the current_token->duration[internal_state-1] rec->avg_state_durations[ prev_model_index] */
if (prev_cost < current_prune_thresh)
{
modelID prev_model_index;
prev_cost = (costdata)(prev_cost + (costdata) model_cost);
/* apply duration model for "next" transition, note that it's nice
to access the duration model (avg_state_durations) somehwere
other than the acoustic models which could be far away in memory
arrive penalty would be applied here too if it was reqd */
prev_model_index = hmm_info->state_indices[internal_state-1];
prev_cost = (costdata)(prev_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[ prev_model_index],
current_token->duration[internal_state-1]));
}
}
else
{
prev_cost = MAXcostdata;
}
self_loop_cost = current_token->cost[internal_state];
/* a duration model can be applied here,
by changing the self_loop_cost according to some function of
the current_token->duration[internal_state] rec->avg_state_durations[ prev_model_index] */
if (self_loop_cost < current_prune_thresh)
{
self_loop_cost = (costdata)(self_loop_cost + (costdata) model_cost);
/* apply duration model for "loop" transition */
self_loop_cost = (costdata)(self_loop_cost + (costdata) duration_penalty_loop(rec->avg_state_durations[ model_index],
current_token->duration[internal_state]));
}
if (prev_cost < self_loop_cost)
{
current_token->cost[internal_state] = prev_cost;
current_token->word_backtrace[internal_state] = current_token->word_backtrace[internal_state-1];
current_token->word[internal_state] = current_token->word[internal_state-1];
current_token->duration[internal_state] = 1;
if (current_token->word[internal_state-1] != MAXwordID)
{
if (current_token->aword_backtrace[internal_state] != AWTNULL)
free_altword_token_batch(rec,
current_token->aword_backtrace[internal_state]);
current_token->aword_backtrace[internal_state] = refcopy_altwords(rec, current_token->aword_backtrace[internal_state-1]);
/*print_fsmarc_token(rec, current_token_index, "123c");*/
}
else
{
/* if there's no top choice, there shouldn't be alternatives! */
ASSERT(current_token->aword_backtrace[internal_state] == AWTNULL);
ASSERT(current_token->aword_backtrace[internal_state-1] == AWTNULL);
}
}
else
{
current_token->cost[internal_state] = self_loop_cost;
current_token->duration[internal_state]++;
}
if (current_token->cost[internal_state] < current_prune_thresh)
{
any_alive = 1;
if (current_token->cost[internal_state] < current_best_cost)
{
current_best_cost = current_token->cost[internal_state];
current_prune_thresh = current_best_cost + current_prune_delta;
}
}
}
current_token_index = current_token->next_token_index;
}
*pcurrent_best_cost = current_best_cost;
*pcurrent_prune_delta = current_prune_delta;
}
static int GetNumArcsArrivingClip2(srec_context* context, FSMnode* fsm_node)
{
arcID fpa = fsm_node->first_prev_arc;
FSMarc* arc;
if (fpa == MAXarcID)
return 0;
arc = &context->FSMarc_list[fpa];
if (arc->linkl_prev_arc == MAXarcID)
return 1;
else
return 2;
}
static int update_from_hmms_to_fsmnodes(srec *rec, costdata prune_delta, costdata best_cost)
{
stokenID current_token_index;
fsmarc_token *current_token;
int end_state;
costdata end_cost;
costdata current_prune_thresh;
costdata current_prune_delta; /*may get tighter to keep num fsmnodes under control*/
// vFSMarc vfsm_arc;
FSMarc* fsm_arc;
FSMnode* fsm_node;
// vFSMnode vfsm_node;
arcID fsm_arc_index;
nodeID to_node_index;
ftokenID new_ftoken_index;
fsmnode_token *ftoken;
modelID end_model_index;
labelID ilabel;
short end_cost_equality_hack;
HMMInfo* hmm_info;
altword_token *awtoken, *q;
int num_fsmnode_updates = 0;
current_prune_delta = prune_delta;
current_prune_thresh = best_cost + current_prune_delta;
current_token_index = rec->active_fsmarc_tokens;
for (ilabel = 0; ilabel < NODE_INFO_NUMS; ilabel++)
{
rec->current_best_ftoken_cost[ilabel] = MAXcostdata / 2;
rec->current_best_ftoken_index[ilabel] = MAXftokenID;
}
sanity_check_altwords(rec, rec->altword_token_freelist);
while (current_token_index != MAXstokenID)
{
current_token = &(rec->fsmarc_token_array[current_token_index]);
/*propagate from end of state token to new FSM node*/
end_state = (char) current_token->num_hmm_states - 1;
ASSERT((current_token->aword_backtrace[end_state] == AWTNULL)
|| (current_token->word[end_state] != MAXwordID));
end_cost = current_token->cost[end_state];
/* anticipated repruning: make sure there is enough space before
beginning complex computation */
if (rec->word_priority_q->max_in_q>1 && rec->altword_token_freelist_len < 3*rec->word_priority_q->max_in_q)
reprune_altword_tokens(rec);
if (end_cost < current_prune_thresh)
{
num_fsmnode_updates++;
fsm_arc_index = current_token->FSMarc_index;
fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
end_model_index = hmm_info->state_indices[end_state];
end_cost = (costdata)(end_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[end_model_index],
current_token->duration[end_state]));
to_node_index = fsm_arc->to_node;
new_ftoken_index = rec->best_token_for_node[to_node_index];
if (new_ftoken_index == MAXftokenID)
{
/*we need to make sure there is room in the new_states array
and there are free state tokens*/
if (rec->fsmnode_token_freelist == MAXftokenID)
{
/*make sure there is room for another FSMnode token
- if not, prune until there is room*/
current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, MAXftokenID);
current_prune_thresh = best_cost + current_prune_delta;
}
/*because of the above check, this should always succeed*/
new_ftoken_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
ftoken->FSMnode_index = to_node_index;
ftoken->next_token_index = rec->active_fsmnode_tokens;
ftoken->cost = end_cost;
ftoken->word_backtrace = current_token->word_backtrace[end_state];
ftoken->word = current_token->word[end_state];
if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
{
ftoken->silence_duration = current_token->duration[end_state];
}
else
{
ftoken->silence_duration = 0;
}
if (ftoken->word != MAXwordID)
{
arcID narr;
fsm_node = &rec->context->FSMnode_list[ fsm_arc->to_node];
/* when there is only one arc arriving, a refcopy is good enough
and saves memory */
narr = (arcID) GetNumArcsArrivingClip2(rec->context, fsm_node);
if (narr > 1)
ftoken->aword_backtrace = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
else
ftoken->aword_backtrace = refcopy_altwords(rec, current_token->aword_backtrace[end_state]);
}
else
{
/* if there's no top choice, there shouldn't be alternatives! */
ASSERT(current_token->aword_backtrace[end_state] == AWTNULL);
ftoken->aword_backtrace = AWTNULL;
}
rec->active_fsmnode_tokens = new_ftoken_index;
rec->best_token_for_node[to_node_index] = new_ftoken_index;
}
else /* a token already exists, use it! */
{
ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
ASSERT( ((current_token->word[end_state] == MAXwordID) && (ftoken->word == MAXwordID))
|| ((current_token->word[end_state] != MAXwordID) && (ftoken->word != MAXwordID)) );
/* this is a hack for preferring the shorter of the backtrace words
when scores are equal, used to prefer longer pau2 word */
end_cost_equality_hack = 0;
if (end_cost == ftoken->cost)
{
if (current_token->word_backtrace[end_state] != ftoken->word_backtrace
&& current_token->word_backtrace[end_state] != MAXwtokenID)
{
frameID ct_end_time = MAXframeID, et_end_time = 0;
if (current_token->word_backtrace[end_state] != MAXwtokenID)
ct_end_time = rec->word_token_array[current_token->word_backtrace[end_state]].end_time;
if (ftoken->word_backtrace != MAXwtokenID)
et_end_time = rec->word_token_array[ftoken->word_backtrace].end_time;
if (ct_end_time < et_end_time)
end_cost_equality_hack = 1;
}
}
if (end_cost < ftoken->cost || end_cost_equality_hack)
{
/* new one coming in is better, so push the current state down */
/* ftoken info goes into awtoken */
if (ftoken->word != MAXwordID)
{
/* copy_altwords() */
awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
if (awtoken != AWTNULL)
{
awtoken->costdelta = ftoken->cost - end_cost;
awtoken->word_backtrace = ftoken->word_backtrace;
awtoken->word = ftoken->word;
/* ensure full ownership! */
q = ftoken->aword_backtrace;
if (q != AWTNULL && q->refcount > 1)
{
awtoken->next_token = copy_altwords(rec, ftoken->aword_backtrace, ftoken->cost - end_cost);
free_altword_token_batch(rec, ftoken->aword_backtrace);
/* reversed order above here !! */
}
else
{
awtoken->next_token = ftoken->aword_backtrace;
count_altword_token( rec, awtoken);
for (q = awtoken->next_token; q; q = q->next_token)
q->costdelta += ftoken->cost - end_cost;
}
ftoken->aword_backtrace = awtoken;
ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
if( (q=ftoken->aword_backtrace)!=AWTNULL) {
for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
q->next_token = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
/* awtoken->costbasis = &ftoken->cost; */
ftoken->aword_backtrace->refcount = 1;
}
}
}
else
{
/* if there's no top choice, there shouldn't be alternatives! */
ASSERT(ftoken->aword_backtrace == AWTNULL);
}
/* and stoken info goes into ftoken */
ftoken->cost = end_cost;
ftoken->word_backtrace = current_token->word_backtrace[end_state];
ftoken->word = current_token->word[end_state];
if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
{
ftoken->silence_duration = current_token->duration[end_state];
}
else
{
ftoken->silence_duration = 0;
}
}
else
{
/* new arc arriving is worse */
/* print_fsmarc_token(rec, current_token_index, "new_arc_arriving worse");
print_fsmnode_token(rec, new_ftoken_index, "new_arc_arriving tonode");*/
/* append it to the alt list */
/* stoken info goes into the awtoken, ftoken unchanged */
if (ftoken->word != MAXwordID)
{
/* copy_altwords() */
awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
if (awtoken != AWTNULL)
{
awtoken->costdelta = end_cost - ftoken->cost;
awtoken->word = current_token->word[end_state];
awtoken->word_backtrace = current_token->word_backtrace[end_state];
if (current_token->aword_backtrace[end_state] != AWTNULL)
awtoken->next_token = copy_altwords(rec,
current_token->aword_backtrace[end_state],
awtoken->costdelta);
else
awtoken->next_token = AWTNULL;
/* ensure full ownership!, this is new here! */
q = ftoken->aword_backtrace;
if (q != AWTNULL && q->refcount > 1)
{
q = copy_altwords(rec, ftoken->aword_backtrace, 0);
free_altword_token_batch(rec, ftoken->aword_backtrace);
ftoken->aword_backtrace = q;
}
}
if (ftoken->aword_backtrace)
{
for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
q->next_token = awtoken;
}
else
{
ftoken->aword_backtrace = awtoken;
}
if (ftoken->aword_backtrace!=AWTNULL) {
ftoken->aword_backtrace->refcount = 1;
ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
}
}
}
/*print_fsmnode_token(rec, new_ftoken_index, "123e reused-token ");*/
}
ilabel = rec->context->FSMnode_info_list[ ftoken->FSMnode_index];
ASSERT(ilabel < NODE_INFO_NUMS);
if (ftoken->cost < rec->current_best_ftoken_cost[ilabel])
{
rec->current_best_ftoken_cost[ilabel] = ftoken->cost;
rec->current_best_ftoken_index[ilabel] = new_ftoken_index;
}
if (ftoken->cost < rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN])
{
rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN] = ftoken->cost;
rec->current_best_ftoken_index[NODE_INFO_UNKNOWN] = new_ftoken_index;
}
ASSERT(ftoken->word != MAXwordID || ftoken->aword_backtrace == AWTNULL);
}
current_token_index = current_token->next_token_index;
}
sanity_check_altwords(rec, rec->altword_token_freelist);
return num_fsmnode_updates;
}
static int update_from_current_fsm_nodes_into_new_HMMs(srec* rec,
costdata *pcurrent_prune_delta,
costdata *pcurrent_best_cost,
costdata *precomputed_model_scores)
{
costdata prev_cost;
FSMnode* fsm_node;
FSMarc* fsm_arc;
arcID fsm_arc_index;
HMMInfo *hmm_info;
modelID model_index;
fsmarc_token *token;
stokenID new_token_index = MAXstokenID;
costdata cost;
costdata current_prune_thresh;
costdata current_prune_delta = *pcurrent_prune_delta;
costdata current_best_cost = *pcurrent_best_cost;
ftokenID ftoken_index;
ftokenID old_ftoken_index;
fsmnode_token *fsmnode_token;
int num_fsm_nodes_updated = 0;
costdata orig_prune_delta;
ftoken_index = rec->active_fsmnode_tokens;
current_prune_thresh = *pcurrent_best_cost + *pcurrent_prune_delta;
orig_prune_delta = *pcurrent_prune_delta;
sanity_check_altwords(rec, rec->altword_token_freelist);
while (ftoken_index != MAXftokenID)
{
fsmnode_token = &rec->fsmnode_token_array[ftoken_index];
prev_cost = fsmnode_token->cost; /*get last state of token*/
if (fsmnode_token->FSMnode_index == rec->context->end_node)
{
prev_cost = MAXcostdata;
}
if (prev_cost < current_prune_thresh)
{
num_fsm_nodes_updated++;
fsm_node = &rec->context->FSMnode_list[fsmnode_token->FSMnode_index];
/* loop over arcs leaving this fsm_node */
for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
fsm_arc_index != MAXarcID;
fsm_arc_index = fsm_arc->linkl_next_arc)
{
labelID ilabel;
wordID olabel;
nodeID nextnode;
fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
ilabel = fsm_arc->ilabel;
olabel = fsm_arc->olabel;
nextnode = fsm_arc->to_node;
if (ilabel >= EPSILON_OFFSET)
{
/*so, not an epsilon arc*/
hmm_info = &rec->context->hmm_info_for_ilabel[ilabel];
model_index = hmm_info->state_indices[0];
cost = prev_cost + precomputed_model_scores[model_index];
cost = (costdata)(cost + (costdata) fsm_arc->cost);
if (cost < current_prune_thresh)
{
/*new node to keep*/
/* look for the fsmarc_token* token, into which to maximize, else create new one */
if (rec->best_token_for_arc[fsm_arc_index] == MAXstokenID)
{
/*make sure there is room for another state token - if not, prune
until there is room*/
/*we need to make sure there is room in the new_states array and
there are free state tokens*/
if (rec->fsmarc_token_freelist == MAXstokenID)
{
current_prune_delta = reprune_new_states(rec, current_best_cost, current_prune_delta);
}
/* because of the above check, this should always succeed */
new_token_index = setup_free_fsmarc_token(rec, fsm_arc, fsm_arc_index, EXIT_IF_NO_TOKENS);
token = &(rec->fsmarc_token_array[new_token_index]);
token->next_token_index = rec->active_fsmarc_tokens;
rec->active_fsmarc_tokens = new_token_index;
rec->num_new_states++;
rec->best_token_for_arc[fsm_arc_index] = new_token_index;
token->cost[0] = MAXcostdata;
}
else
{
new_token_index = rec->best_token_for_arc[fsm_arc_index];
token = &(rec->fsmarc_token_array[ new_token_index]);
}
if (cost < token->cost[0])
{
token->cost[0] = cost;
token->duration[0] = 1;
token->word_backtrace[0] = fsmnode_token->word_backtrace;
if (token->aword_backtrace[0] != AWTNULL)
free_altword_token_batch(rec, token->aword_backtrace[0]);
token->aword_backtrace[0] = AWTNULL;
token->aword_backtrace[0] = refcopy_altwords(rec, fsmnode_token->aword_backtrace);
if (olabel != WORD_EPSILON_LABEL)
{
token->word[0] = olabel;
//ASSERT(token->aword_backtrace[0] == AWTNULL);
}
else
{
token->word[0] = fsmnode_token->word;
}
ASSERT(token->word[0] != MAXwordID
|| token->aword_backtrace[0] == AWTNULL);
if (cost < current_best_cost)
{
current_best_cost = cost;
current_prune_delta = orig_prune_delta; /*if we have a new best cost, the prune delta could go back up*/
current_prune_thresh = cost + current_prune_delta;
ASSERT((float)cost + (float)current_prune_delta < (float)USHRT_MAX);
}
}
}
}
}
}
rec->best_token_for_node[fsmnode_token->FSMnode_index] = MAXftokenID; /*done with this node - remove it from the array*/
old_ftoken_index = ftoken_index;
ftoken_index = fsmnode_token->next_token_index;
free_fsmnode_token(rec, old_ftoken_index); /*done with this node - free the token*/
rec->active_fsmnode_tokens = ftoken_index; /*needed for sanity_check_altwords*/
}
/*done with all the tokens, set active tokens to NULL*/
rec->active_fsmnode_tokens = MAXftokenID;
sanity_check_altwords(rec, rec->altword_token_freelist);
*pcurrent_best_cost = current_best_cost;
*pcurrent_prune_delta = current_prune_delta;
return num_fsm_nodes_updated;
}
#if USE_COMP_STATS
void start_front_end_clock(void)
{
if (!comp_stats)
comp_stats = init_comp_stats();
start_cs_clock(&comp_stats->front_end);
}
void stop_front_end_clock(void)
{
end_cs_clock(&comp_stats->front_end, 1);
}
#endif
/*---------------------------------------------------------------------------*
* *
* begin and end *
* *
*---------------------------------------------------------------------------*/
/*gets things started for the viterbi search - sets up things for frame 0*/
int srec_begin(srec *rec, int begin_syn_node)
{
FSMnode* fsm_node;
fsmnode_token *token;
stokenID new_token_index;
nodeID node_index;
arcID arc_index;
if (!rec || !rec->context)
{
log_report("Error: bad inputs to srec_begin()\n");
return 1;
}
if (!rec->context->whether_prepared)
{
log_report("srec_begin: Grammar not prepared. Compiling!\n");
FST_PrepareContext(rec->context);
if (!rec->context->whether_prepared)
{
PLogError("ESR_INVALID_STATE: Grammar can not be compiled (FST_PrepareContext failed)");
return ESR_INVALID_STATE ;
}
}
#if USE_COMP_STATS
if (comp_stats == NULL)
comp_stats = init_comp_stats();
#endif
/*initialize token storage - not clear we really need this - as long as they
are managed correctly, we should be able to do this on startup - not each utt*/
initialize_free_fsmarc_tokens(rec);
initialize_free_word_tokens(rec);
initialize_free_fsmnode_tokens(rec);
initialize_word_lattice(rec->word_lattice);
initialize_free_altword_tokens(rec);
if (rec->context->num_nodes > rec->max_fsm_nodes)
{
log_report("Error: srec_begin failing due to too many grammar nodes\n");
return 1;
}
for (node_index = 0;node_index < rec->context->num_nodes;node_index++)
{
rec->best_token_for_node[node_index] = MAXftokenID;
}
if (rec->context->num_arcs > rec->max_fsm_arcs)
{
log_report("Error: srec_begin failing due to too many grammar arcs\n");
return 1;
}
for (arc_index = 0;arc_index < rec->context->num_arcs;arc_index++)
{
rec->best_token_for_arc[arc_index] = MAXstokenID;
}
rec->srec_ended = 0;
rec->num_new_states = 0;
rec->current_best_cost = 0;
rec->current_prune_delta = rec->prune_delta;
/*need help from johan - does ths FSM only have one start node?
Which one is it? assume just one and it is node 0*/
fsm_node = &rec->context->FSMnode_list[ rec->context->start_node];
node_index = (nodeID) rec->context->start_node;
/* node_index is still 0 at this point */
/*now we just need to setup an initial fsmnode token (for begin FSM node) and then do epsilon updates*/
rec->active_fsmarc_tokens = MAXstokenID;
new_token_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
token = &(rec->fsmnode_token_array[new_token_index]);
token->word_backtrace = MAXwtokenID; /* real value set below*/
token->cost = 0;
token->word = MAXwordID;
token->FSMnode_index = node_index;
token->next_token_index = MAXftokenID;
token->aword_backtrace = AWTNULL;
rec->best_token_for_node[node_index] = new_token_index;
rec->active_fsmnode_tokens = new_token_index;
rec->current_search_frame = 0;
do_epsilon_updates(rec, rec->prune_delta, 0);
return 0;
}
void srec_force_the_end(srec* rec, frameID end_frame, wordID end_word)
{
srec_word_lattice* wl = rec->word_lattice;
wtokenID wtoken_index, tmp;
frameID frame;
wtoken_index = wl->words_for_frame[end_frame];
if (wtoken_index == MAXwtokenID)
{
for (frame = end_frame - 1; frame > 20; frame--)
{
if (wl->words_for_frame[frame] != MAXwtokenID)
{
word_token* wtoken;
wl->words_for_frame[end_frame] = wl->words_for_frame[frame];
wl->words_for_frame[frame] = MAXwtokenID;
for (tmp = wl->words_for_frame[end_frame]; tmp != MAXwtokenID;
tmp = wtoken->next_token_index)
{
wtoken = &rec->word_token_array[tmp];
wtoken->end_time = frame;
wtoken->word = end_word;
wtoken->end_node = rec->context->end_node;
}
#ifdef _WIN32
PLogError(L("Forced an end path at end frame %d/%d)\n"), frame, end_frame);
#endif
break;
}
}
}
}
/* when there are no more frames of input, this functions
kills all paths not ending at the end node and
creates a word linked list even though there is no WORD_BOUNDARY ilabel */
void srec_no_more_frames(srec* rec)
{
#if USE_COMP_STATS
frameID end_frame = rec->current_search_frame;
#endif
nodeID end_node;
fsmnode_token* ftoken;
ftokenID current_token_index;
costdata current_word_threshold = MAXcostdata;
wtokenID word_token_index;
int any_nodes_blocked = 0;
altword_token* awtoken;
/* this is just for sanity checking, to find out what the state was
at the end of input */
srec_check_end_of_speech_end(rec);
if (rec->srec_ended) return;
rec->srec_ended = 1;
#if USE_COMP_STATS
comp_stats->total_time += (float)(end_frame / 50.0f);
dump_comp_stats(comp_stats, PSTDOUT);
#endif
end_node = rec->context->end_node;
/*remove all word paths from the priority_q which do not end at end_node
to make space for those being added below */
remove_non_end_word_from_q(rec, rec->word_priority_q, rec->word_token_array,
end_node);
if (rec->current_search_frame == 0)
return;
rec->accumulated_cost_offset[ rec->current_search_frame] =
rec->accumulated_cost_offset[ rec->current_search_frame-1];
rec->cost_offset_for_frame[ rec->current_search_frame] = 0;
/* watch out if using the best_token_for_node[] array here
is it valid? not if multiple recognizers, maybe we
should remember best_token_for_end_node separately */
current_token_index = rec->active_fsmnode_tokens;
while (current_token_index != MAXftokenID)
{
ftoken = &rec->fsmnode_token_array[current_token_index];
if (ftoken->FSMnode_index == end_node)
{
/* print_fsmnode_token(rec, current_token_index, "fsmnode_token at end_node "); */
word_token_index = srec_process_word_boundary_nbest(rec,
ftoken->FSMnode_index,
ftoken->word,
ftoken->word_backtrace,
ftoken->cost, &current_word_threshold, &any_nodes_blocked);
if (word_token_index != MAXwtokenID)
{
WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+word_token_index),
rec->word_token_array[word_token_index].end_time - ftoken->silence_duration);
}
/* now also dump alternatives at this last frame, sep19'03 fixed */
awtoken = ftoken->aword_backtrace;
for (; awtoken != AWTNULL; awtoken = awtoken->next_token)
{
srec_process_word_boundary_nbest(rec,
ftoken->FSMnode_index,
awtoken->word,
awtoken->word_backtrace,
ftoken->cost + awtoken->costdelta,
&current_word_threshold,
&any_nodes_blocked);
}
}
current_token_index = ftoken->next_token_index;
}
/* we clobber the word_lattice at the last frame that was created
in do_epsilon_updates() */
word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
if (FST_IsVoiceEnrollment(rec->context) && word_token_index == MAXwtokenID)
{
srec_force_the_end(rec, rec->current_search_frame, rec->context->end_silence_word);
}
/* find the current_best_cost for this recognizer ... at the end node,
it will be used to decide which recognizer wins! */
rec->current_best_cost = lattice_best_cost_to_frame(rec->word_lattice,
rec->word_token_array,
rec->current_search_frame);
}
void srec_terminate(srec* rec)
{
frameID ifr;
stokenID stoken_index, next_stoken_index;
fsmarc_token* stoken;
ftokenID ftoken_index, next_ftoken_index;
fsmnode_token* ftoken;
wtokenID wtoken_index, next_wtoken_index;
word_token* wtoken;
/* release all state tokens */
for (stoken_index = rec->active_fsmarc_tokens; stoken_index != MAXstokenID;
stoken_index = next_stoken_index)
{
stoken = &rec->fsmarc_token_array[ stoken_index];
next_stoken_index = stoken->next_token_index;
free_fsmarc_token(rec, stoken_index);
}
rec->active_fsmarc_tokens = MAXstokenID;
/* release all fsmnode tokens */
for (ftoken_index = rec->active_fsmnode_tokens; ftoken_index != MAXftokenID;
ftoken_index = next_ftoken_index)
{
ftoken = &rec->fsmnode_token_array[ ftoken_index];
next_ftoken_index = ftoken->next_token_index;
free_fsmnode_token(rec, ftoken_index);
}
rec->active_fsmnode_tokens = MAXftokenID;
/* release all word tokens */
for (ifr = 0; ifr < rec->current_search_frame; ifr++)
{
for (wtoken_index = rec->word_lattice->words_for_frame[ifr];
wtoken_index != MAXwtokenID; wtoken_index = next_wtoken_index)
{
wtoken = &rec->word_token_array[wtoken_index];
next_wtoken_index = wtoken->next_token_index;
free_word_token(rec, wtoken_index);
}
rec->word_lattice->words_for_frame[ifr] = MAXwtokenID;
}
rec->current_model_scores[SILENCE_MODEL_INDEX] = DO_NOT_COMPUTE_MODEL;
rec->current_best_cost = MAXcostdata;
rec->srec_ended = 1;
}
/*------------------------------------------------------------------------*
* *
* main work of the viterbi search *
* *
*------------------------------------------------------------------------*/
/*with new update to FSM node scheme, the sequence of operation is:
for each frame:
1. Handle all internal HMM updates based on new frame observations. This is
done in place with the current list of HMM tokens.
2. For each current active FSM node (from previous frame), activate update
into state 0 (either for existing HMM tokens or for new HMM tokens) by going
through an observation frame (so, only go from an FSM node to a new HMM
token if the first observation frame gets a score above the current pruning
threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left
at the end of this.
3. Prune. Note that the best score will have already been established for
this frame (so therefore the pruning threshold will not change).
4. reset best cost to 0 (to keep scores in range). We can do this here since we already know the best score.
5. For end hmm states which are above the pruning threshold, create new
FSMnode_tokens.
6. update epsilons, including word boundary arcs (which put words onto the word lattice).
epsilon updates go from FSM node to FSM node.
repeat for next frame based on new FSM nodes and current HMMs
*/
void srec_viterbi_part1(srec *rec,
const SWIModel *acoustic_models,
pattern_info *pattern,
costdata silence_model_cost);
void srec_viterbi_part2(srec *rec);
int multi_srec_viterbi(multi_srec *recm,
srec_eos_detector_parms* eosd,
pattern_info *pattern,
utterance_info* utt_not_used)
{
EOSrc eosrc1 = SPEECH_ENDED, eosrc2 = SPEECH_ENDED;
#if DO_ALLOW_MULTIPLE_MODELS
ASSERT(recm->num_activated_recs == recm->num_swimodels);
if (recm->num_activated_recs == 1)
{
#endif
srec* rec1 = &recm->rec[0];
#if USE_COMP_STATS
start_cs_clock1(&comp_stats->overall_search);
#endif
if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
return 1;
srec_viterbi_part1(&recm->rec[0], recm->swimodel[0], pattern, DO_NOT_COMPUTE_MODEL);
reset_best_cost_to_zero(rec1, rec1->current_best_cost);
reset_cost_offsets(recm, rec1->current_search_frame, rec1->current_best_cost);
rec1->current_prune_delta = rec1->prune_delta;
rec1->current_best_cost = 0;
srec_viterbi_part2(&recm->rec[0]);
eosrc1 = srec_check_end_of_speech(eosd, &recm->rec[0]);
#if USE_COMP_STATS
end_cs_clock1(&comp_stats->overall_search, 1);
#endif
SREC_STATS_UPDATE(&recm->rec[0]);
recm->eos_status = eosrc1;
#if DO_ALLOW_MULTIPLE_MODELS
}
else if (recm->num_activated_recs == 2)
{
srec* rec1 = &recm->rec[0];
srec* rec2 = &recm->rec[1];
const SWIModel* acoustic_models1 = recm->swimodel[0];
const SWIModel* acoustic_models2 = recm->swimodel[1];
costdata diff;
costdata current_best_cost;
ASSERT(rec1->prune_delta == rec2->prune_delta);
/* in part 1 we need to operate by adjusting the prune delta, 'cuz we want
to operate on scores after consumption of a frame */
if ((rec1->current_best_cost > MAXcostdata / 2 && !rec1->srec_ended) ||
(rec2->current_best_cost > MAXcostdata / 2 && !rec2->srec_ended))
{
printf("hey %d %d\n", rec1->current_best_cost, rec2->current_best_cost);
}
/* figure out the prune_delta for the different genders, we
want that pruning should be joint (i.e. prune male and
female relative to overall best). Before part1 we don't
yet know the overall best, so we use the gender score gap
from the last frame, and make the prune the worse gender
accordingly more aggressive */
if (!rec2->srec_ended && rec1->current_best_cost < rec2->current_best_cost)
{
diff = rec2->current_best_cost - rec1->current_best_cost;
if (rec2->current_search_frame >= (rec2->word_lattice->max_frames - 1))
{
return 1;
}
if (diff > rec2->prune_delta)
{
srec_terminate(rec2);
#ifdef SREC_ENGINE_VERBOSE_LOGGING
PLogMessage("T: terminate_viterbi(rec2) @%d", rec2->current_search_frame);
#endif
}
else
rec2->current_prune_delta = rec2->prune_delta - diff;
rec1->current_prune_delta = rec1->prune_delta;
}
else if (!rec1->srec_ended)
{
if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
{
return 1;
}
diff = rec1->current_best_cost - rec2->current_best_cost;
if (diff > rec1->prune_delta)
{
srec_terminate(rec1);
#ifdef SREC_ENGINE_VERBOSE_LOGGING
PLogMessage("T: terminate_viterbi(rec1) @%d", rec1->current_search_frame);
#endif
}
else
rec1->current_prune_delta = rec1->prune_delta - diff;
rec2->current_prune_delta = rec2->prune_delta;
}
/* now run part1 for each gender */
if (!rec1->srec_ended)
{
srec_viterbi_part1(rec1, acoustic_models1, pattern, DO_NOT_COMPUTE_MODEL);
SREC_STATS_UPDATE(rec1);
}
if (!rec2->srec_ended)
{
srec_viterbi_part1(rec2, acoustic_models2, pattern, rec1->current_model_scores[SILENCE_MODEL_INDEX]);
SREC_STATS_UPDATE(rec2);
}
/* now adjust score offsets, score offsets are shared across genders */
if (rec1->current_best_cost <= rec2->current_best_cost)
{
/* am1 is winning, prune 2 harder */
current_best_cost = rec1->current_best_cost;
reset_cost_offsets(recm, rec1->current_search_frame, current_best_cost);
}
else
{
/* am2 is winning, prune 1 harder */
current_best_cost = rec2->current_best_cost;
reset_cost_offsets(recm, rec2->current_search_frame, current_best_cost);
}
/* jean: some cleanup needed here */
/** best_token_for_arc = rec1->best_token_for_arc;
rec1->best_token_for_arc = 0; **/
if (!rec1->srec_ended)
{
reset_best_cost_to_zero(rec1, current_best_cost);
rec1->current_best_cost = (costdata)(rec1->current_best_cost - (costdata) current_best_cost);
srec_viterbi_part2(rec1);
if (rec1->active_fsmnode_tokens == MAXftokenID)
srec_terminate(rec1);
if (!rec1->srec_ended)
eosrc1 = srec_check_end_of_speech(eosd, rec1);
}
/** rec1->best_token_for_arc = best_token_for_arc;
best_token_for_arc = rec2->best_token_for_arc;
rec2->best_token_for_arc = 0; **/
if (!rec2->srec_ended)
{
reset_best_cost_to_zero(rec2, current_best_cost);
rec2->current_best_cost = (costdata)(rec2->current_best_cost - (costdata) current_best_cost);
srec_viterbi_part2(rec2);
if (rec2->active_fsmnode_tokens == MAXftokenID)
srec_terminate(rec2);
if (!rec2->srec_ended)
eosrc2 = srec_check_end_of_speech(eosd, rec2);
}
/** rec2->best_token_for_arc = best_token_for_arc; **/
SREC_STATS_UPDATE(rec1);
SREC_STATS_UPDATE(rec2);
recm->eos_status = eosrc1;
if (rec1->current_best_cost > rec2->current_best_cost)
recm->eos_status = eosrc2;
}
#endif
return 0;
}
void srec_viterbi_part1(srec *rec,
const SWIModel *acoustic_models,
pattern_info *pattern,
costdata silence_model_cost)
{
costdata current_best_cost;
/* costdata current_prune_thresh; */
costdata current_prune_delta;
/* the score difference for pruning - can get adjusted below if
pruning gets tighted to keep array sizes in check*/
costdata *current_model_scores;
int num_models_computed;
nodeID num_fsm_nodes_updated;
#if USE_COMP_STATS
start_cs_clock(&comp_stats->models);
#endif
/*first go ahead and compute scores for all models which are needed by the search at this point*/
find_which_models_to_compute(rec, acoustic_models);
/* communication happens via rec->current_model_scores */
#define SCORE_FIRST_SILENCE_ONLY
#ifdef SCORE_FIRST_SILENCE_ONLY
if (silence_model_cost != DO_NOT_COMPUTE_MODEL)
rec->current_model_scores[SILENCE_MODEL_INDEX] = silence_model_cost;
#endif
num_models_computed = compute_model_scores(rec->current_model_scores, acoustic_models, pattern, rec->current_search_frame);
rec->best_model_cost_for_frame[rec->current_search_frame] = best_uint16(rec->current_model_scores, acoustic_models->num_hmmstates);
#if USE_COMP_STATS
end_cs_clock(&comp_stats->models, num_models_computed);
start_cs_clock(&comp_stats->internal_hmm);
#endif
/*get some things out of the rec structure to make things a bit faster*/
current_model_scores = rec->current_model_scores;
/*update search to next frame*/
current_best_cost = MAXcostdata - ((costdata)2) * rec->prune_delta; /*to avoid overflows, must clean up later */
/* current_prune_thresh = MAXcostdata; */
current_prune_delta = rec->current_prune_delta;
/* srec_stats_update(rec, "(...0) "); */
/*------------------------------------------------------------------------*
1. Handle all internal HMM updates based on new frame observations. This is
done in place with the current list of HMM tokens.
*------------------------------------------------------------------------*/
update_internal_hmm_states(rec, &current_prune_delta, &current_best_cost, current_model_scores);
/* check_if_any_token_better_than_best_cost(rec, rec->active_fsmarc_tokens, current_best_cost, "after update into new");*/
#if USE_COMP_STATS
end_cs_clock(&comp_stats->internal_hmm, rec->num_new_states);
start_cs_clock(&comp_stats->fsm_to_hmm);
#endif
/* srec_stats_update(rec, "(...1) "); */
/*------------------------------------------------------------------------*
2. For each current active FSM node (from previous frame), activate update
into state 0 (either for existing HMM tokens or for new HMM tokens) by going
through an observation frame (so, only go from an FSM node to a new HMM
token if the first observation frame gets a score above the current pruning
threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left
at the end of this.
*------------------------------------------------------------------------*/
num_fsm_nodes_updated = (nodeID) update_from_current_fsm_nodes_into_new_HMMs(rec, &current_prune_delta, &current_best_cost, current_model_scores);
/* srec_stats_update(rec, "(...2) "); */
/*------------------------------------------------------------------------*
3. Prune. Note that the best score will have already been established for
this frame (so therefore the pruning threshold will not change).
*------------------------------------------------------------------------*/
#if USE_COMP_STATS
end_cs_clock(&comp_stats->fsm_to_hmm, num_fsm_nodes_updated);
start_cs_clock(&comp_stats->prune);
#endif
prune_new_tokens(rec, (costdata)(current_best_cost + current_prune_delta));
/* it's nice to do word token pruning here 'cuz we only need to traceback
the active_fsmarc_tokens, active_fsmnode_tokens are propogated thereto */
reprune_word_tokens_if_necessary(rec);
rec->current_prune_delta = current_prune_delta;
rec->current_best_cost = current_best_cost;
/* srec_stats_update(rec, "(...3) "); */
#if USE_COMP_STATS
end_cs_clock(&comp_stats->prune, rec->num_new_states);
#endif
}
void srec_viterbi_part2(srec *rec)
{
wtokenID word_token_index;
nodeID inode, num_fsm_nodes_updated;
costdata current_prune_delta = rec->current_prune_delta;
costdata current_best_cost = rec->current_best_cost;
ftokenID* ftmp;
int num_updates;
/* first we clear the best_token_for_node array, there are no live
fsmnode_tokens at this point, and we don't want leftovers from
the last frame */
ftmp = rec->best_token_for_node;
for (inode = 0; inode < rec->context->num_nodes; inode++)
*ftmp++ = MAXftokenID;
/*------------------------------------------------------------------------*
4. reset best cost to 0 (to keep scores in range). We can do this here
since we already know the best score. This is done here so that
no fsmnode tokens (there are none active now) need updating. This is also
done here before epsilons - that way we don't need to update the word
tokens .
We assume this was done just before part2.
*------------------------------------------------------------------------*/
#if USE_COMP_STATS
start_cs_clock(&comp_stats->hmm_to_fsm);
#endif
/*------------------------------------------------------------------------*
5. For end hmm states which are above the pruning threshold, create new
FSMnode_tokens.
*------------------------------------------------------------------------*/
num_updates = update_from_hmms_to_fsmnodes(rec, current_prune_delta, current_best_cost);
if (num_updates == 0)
{
num_updates = update_from_hmms_to_fsmnodes(rec, 2 * current_prune_delta, current_best_cost);
SREC_STATS_INC_FORCED_UPDATES();
}
SREC_STATS_UPDATE(rec);
#if USE_COMP_STATS
end_cs_clock(&comp_stats->hmm_to_fsm, rec->num_new_states);
start_cs_clock(&comp_stats->epsilon);
#endif
/* srec_stats_update(rec, "(...5) "); */
/*------------------------------------------------------------------------*
6. update epsilons, including word boundary arcs (which put words onto the word lattice).
epsilon updates go from FSM node to FSM node.
*------------------------------------------------------------------------*/
/*clear priority_q for this frame*/
clear_priority_q(rec->word_priority_q);
num_fsm_nodes_updated = (nodeID) do_epsilon_updates(rec, current_prune_delta, current_best_cost);
#if USE_COMP_STATS
end_cs_clock(&comp_stats->epsilon, num_fsm_nodes_updated);
#endif
/* srec_stats_update(rec, "(...6) "); */
rec->current_search_frame++;
/* no need to prune again after epsilons since they add no new cost - if we
add costs to epsilon arcs (at word boundaries for example), add another
pruning stage */
word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
}
/* get the top choice, trace it back, and find out where speech starts
and ends. this is used for channel normalization */
static srec* WHICH_RECOG(multi_srec* rec)
{
#if DO_ALLOW_MULTIPLE_MODELS
srec* return_rec = NULL;
costdata current_best_cost = MAXcostdata;
int i = 0;
for (i = 0; i < rec->num_activated_recs; i++)
{
if (current_best_cost > rec->rec[i].current_best_cost)
{
current_best_cost = rec->rec[i].current_best_cost;
return_rec = &rec->rec[i];
}
}
return return_rec;
#else
return &rec->rec[0];
#endif
}
void multi_srec_get_speech_bounds(multi_srec* recm, frameID* start_frame, frameID* end_frame)
{
frameID csf;
wtokenID token_index;
wordID last_word;
srec* rec = WHICH_RECOG(recm);
*start_frame = *end_frame = 0;
if (!rec)
return;
csf = rec->current_search_frame;
token_index = rec->word_lattice->words_for_frame[csf];
last_word = MAXwordID;
while (token_index != MAXwtokenID)
{
word_token* wtoken = &rec->word_token_array[token_index];
word_token* next_wtoken;
if (wtoken->word == rec->context->beg_silence_word)
{
if (*start_frame == 0) *start_frame = wtoken->end_time;
}
if (wtoken->word == rec->context->hack_silence_word)
{
if (wtoken->backtrace != MAXwtokenID)
{
next_wtoken = &rec->word_token_array[wtoken->backtrace];
if (next_wtoken->word == rec->context->beg_silence_word)
*start_frame = wtoken->end_time;
}
}
if (last_word == rec->context->end_silence_word)
{
*end_frame = wtoken->end_time;
if (wtoken->word == rec->context->hack_silence_word
&& wtoken->backtrace != MAXwtokenID)
{
next_wtoken = &rec->word_token_array[wtoken->backtrace];
*end_frame = WORD_TOKEN_GET_WD_ETIME( next_wtoken);
}
}
if (token_index == wtoken->backtrace)
{
/* infinite loop! */
PLogError ("warning: breaking infinite loop\n");
*end_frame = 0;
break;
}
token_index = wtoken->backtrace;
last_word = wtoken->word;
}
}
int multi_srec_get_eos_status(multi_srec* rec)
{
int rc;
ASSERT(rec);
rc = (int)rec->eos_status;
if (rc < 0) rc = 0;
return rc;
}
/*
ToDo List:
end-pointing
duration
channel normalization
re-use and appropriate killing of word_tokens
pruning fsmnode_tokens
astar backward for alternative choices
minimized graphs and word merging
Johans idea:
When propagating a fsmarc_token, we need to remember the word.id when it
is observed. Let's continue to use fsmarc_token->word[] to remember those.
When merging 2+ fsmarc_tokens into a fsmnode_token, we need remember
both histories, not just the best. All histories and maintained on a linked
list, with word_token->next_token_index serving as links, somehow we also
remember the cost offset from one link to the next and keep track of that.
Try to create the word_token as late a possible, so as to keep usage down.
The list should be sorted so that we can drop things off the end, Ie. don't
need to keep all word, a max of 10 is fine cuz that's the most we'll need
to drop off at a .wb anyways!
altwords .. working .. now cpu optimize ... ideas
use only the head refcount, #define the refcopy, not a function
free_altword_token_batch() should not double check for AWTNULL
BUILD & BUILD_DEBUG in selected areas
reprune_altword_token_batch ... change costbasis to a tag ... to say (already repruned)
endpointing
at grammar prepare ...
get the list of endnodes ... get the list of opendnodes
... start from the graph's endnode, walk backwards on all null or silence arcs, find the nodes which have a silence or null path to the end: those are sinknodes
... sinknodes are endnodes or opendnodes ... the sinknodesO are the sinknodes that do go to speech arcs .. the sinknodes1 are the sinknodes that do not go to any speech arcs
... walkforward all sinknodes0 through iwt arcs, those are openendnodes
... walkforward all sinknodes1 through iwt arcs, those are endnodes
get the top score fsmnode_token ...
... is it on an endnode ... has this been the top choice for the last 30 frames
... is it on an optional endnode ... has this neen the top choice for the last 50 frames?
*/