srec/crec/word_lattice.c - platform/external/srec - Git at Google

 /*---------------------------------------------------------------------------*
  *  word_lattice.c  *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/

 #ifndef _RTT
 #include "pstdio.h"
 #endif
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include "passert.h"
 #include "portable.h"


 #include "portable.h"

 #include "hmm_desc.h"
 #include "utteranc.h"
 #include "hmmlib.h"

 #include "srec_sizes.h"
 #include "search_network.h"
 #include "srec.h"
 #include "word_lattice.h"
 #include "astar.h"
 #include "srec_stats.h"
 #include "srec_results.h"

 #define PRINT_WORD_LATTICE        0
 #define PRINT_SEARCH_DETAILS      0

 #define TRUE_KILL_WTOKEN(WT) { WT.cost = MAXcostdata; \
     WT.word = MAXwordID;  \
     WT.end_time = MAXframeID; \
     WT.end_node = MAXnodeID; \
     WT.backtrace = MAXwtokenID; \
   }

 srec_word_lattice *allocate_word_lattice(frameID max_frames)
 {
   srec_word_lattice *wl;

   wl = (srec_word_lattice*) CALLOC_CLR(1, sizeof(srec_word_lattice), "search.word_lattice.base");
   wl->max_frames = max_frames;
   wl->words_for_frame = (wtokenID*) CALLOC_CLR(max_frames, sizeof(wtokenID), "search.word_lattice.words");

   wl->whether_sorted = (asr_int16_t*)CALLOC_CLR(max_frames,  sizeof(asr_int16_t), "search.word_lattice.sflag");

   return wl;
 }

 void destroy_word_lattice(srec_word_lattice* wl)
 {
   FREE(wl->words_for_frame);
   FREE(wl->whether_sorted);
   FREE(wl);
 }

 void initialize_word_lattice(srec_word_lattice* wl)
 {
   frameID ifr;
   for (ifr = 0; ifr < wl->max_frames; ifr++)
   {
     wl->words_for_frame[ifr] = MAXwtokenID;
     wl->whether_sorted[ifr] = 0;
   }
 }

 costdata lattice_best_cost_to_frame(srec_word_lattice *wl, word_token* word_token_array, frameID ifr)
 {
   int sanity_counter = 0;
   costdata best_cost = MAXcostdata;
   wtokenID wtoken_index = wl->words_for_frame[ ifr];
   while (wtoken_index != MAXwtokenID)
   {
     word_token* wtoken = &word_token_array[wtoken_index];
     if (sanity_counter++ > 200) return MAXcostdata;
     wtoken_index = wtoken->next_token_index;
     if (best_cost > wtoken->cost)
       best_cost = wtoken->cost;
   }
   return best_cost;
 }

 void lattice_add_word_tokens(srec_word_lattice *wl, frameID frame,
                              wtokenID word_token_list_head)
 {
   if (frame >= wl->max_frames)
   {
     log_report("lattice_add_word_tokens: max_frame not big enough\n");
     ASSERT(0);
   }
   wl->words_for_frame[frame] = word_token_list_head;
 }

 void print_word_token_backtrace(srec* rec, wtokenID wtoken_index, char* tail)
 {
   char* null = "NULL", *p;
   char iwttime[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
   bigcostdata cost;
   bigcostdata cost_for_word;
   word_token *wtoken, *last_wtoken;

   last_wtoken = NULL;
   while (wtoken_index != MAXwtokenID)
   {
     wtoken = &rec->word_token_array[wtoken_index];
     if (wtoken->word < rec->context->olabels->num_words)
       p = rec->context->olabels->words[wtoken->word];
     else
       p = null;
     ASSERT(!last_wtoken || last_wtoken->end_time > wtoken->end_time);
     ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
     cost = wtoken->cost + rec->accumulated_cost_offset[ wtoken->end_time];

     if (wtoken->backtrace != MAXwtokenID)
     {
       word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
       cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[ next_wtoken->end_time];
     }
     else
     {
       cost_for_word = cost;
     }
     sprintf(iwttime, "/%d", WORD_TOKEN_GET_WD_ETIME(wtoken) );
     PLogMessage (" (%d W%d %s cost=%d/%d/%d time=%d%s node=%d)", wtoken_index, wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, iwttime, wtoken->end_node);
     fflush(stdout);
     ASSERT(wtoken->backtrace != wtoken_index);
     wtoken_index = wtoken->backtrace;
     last_wtoken = wtoken;
   }
   PLogMessage (tail);
 }

 int sprint_bword_token_backtrace(char* buf, int buflen, srec* rec, wtokenID wtoken_index)
 {
   char* null = "NULL", *p;
   char *pbuf = buf;
   *pbuf = 0;

   while (wtoken_index != MAXwtokenID)
   {
     word_token* wtoken = &rec->word_token_array[wtoken_index];
     p = null;
     if (wtoken->word < rec->context->olabels->num_words)
       p = rec->context->olabels->words[wtoken->word];
     ASSERT(pbuf + strlen(p) + 1 < buf + buflen);
     pbuf += sprintf(pbuf, "%s ", p);
     ASSERT(wtoken->backtrace != wtoken_index);

     wtoken_index = wtoken->backtrace;
   }
   if (pbuf > buf && *(pbuf - 1) == ' ') *(pbuf - 1) = 0;
   return 0;
 }

 #define ERROR_TRANSCRIPTION_TOO_LONG -1

 ESR_ReturnCode sprint_word_token_backtraceByWordID(wordID* wordIDs, size_t* len, srec* rec, wtokenID wtoken_index)
 {
   size_t i, currentLen = 0;
   ESR_ReturnCode rc;
   word_token* wtoken;

 #if PRINT_SEARCH_DETAILS
   printf("in get backtrace wtoken %d\n", wtoken_index);
 #endif

   while (wtoken_index != MAXwtokenID)
   {
     if (*len <= currentLen)
     {
       rc = ESR_BUFFER_OVERFLOW;
       PLogError(ESR_rc2str(rc));
       *len = currentLen + 1;
       goto CLEANUP;
     }
     wtoken = &rec->word_token_array[wtoken_index];
     wordIDs[currentLen] = wtoken->word;
     ++currentLen;

     if (wtoken_index == wtoken->backtrace)
     {
       *len = 0;
       PLogError("Result is loopy, rejecting");
       return ESR_INVALID_STATE;
     }
     wtoken_index = wtoken->backtrace;
   }

   /* reverse the order */
   for (i = 0; i < currentLen / 2; i++)
   {
     wordID tmp = wordIDs[i];
     wordIDs[i] = wordIDs[(currentLen-1-i)];
     wordIDs[(currentLen-1-i)] = tmp;
   }
   /* strip the pau/pau2 markers */
   if (currentLen >= 1 && wordIDs[0] == rec->context->beg_silence_word)
   {
     for (i = 0; i < currentLen - 1; i++)
       wordIDs[i] = wordIDs[i+1];
     currentLen--;
   }
   if (currentLen >= 1 && wordIDs[currentLen-1] == rec->context->end_silence_word)
     currentLen--;
   wordIDs[currentLen] = MAXwordID;
   *len = currentLen;

   return ESR_SUCCESS;
 CLEANUP:
   return rc;
 }

 int sprint_word_token_backtrace(char *transcription, int len, srec* rec, wtokenID wtoken_index)
 {
   char *w;
   char *from_p;
   char *to_p;
   char *end;
   char *tr_end = transcription;
   int wlen;

 #define SHOW_END_TIMES 1
 #if SHOW_END_TIMES
   char buf[256/*64*/];
 #endif

   *transcription = 0;

 #if PRINT_SEARCH_DETAILS
   printf("in get backtrace wtoken %d\n", wtoken_index);
 #endif

   while (wtoken_index != MAXwtokenID)
   {
     word_token* wtoken = &rec->word_token_array[wtoken_index];
 #if PRINT_SEARCH_DETAILS
     printf("got token %d word %d\n", wtoken_index, wtoken->word);
 #endif

     w = "NULL";
     if (wtoken->word < rec->context->olabels->num_words)
       w = rec->context->olabels->words[wtoken->word];
 #if SHOW_END_TIMES
     /* should be defined outside because it is used outside by w */
     /* sprintf(buf,"%s@%d.%d",w, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_time); */
     if (strlen(w) + 12 > sizeof(buf))
     {
       *transcription = 0;
       return ERROR_TRANSCRIPTION_TOO_LONG;
     }
     else
     {
       sprintf(buf, "%s@%d", w, wtoken->end_time);
       w = &buf[0];
     }
 #endif
     wlen = strlen(w);
     if (wlen + tr_end - transcription + 1 >= len)
     {
       *transcription = 0;
       return ERROR_TRANSCRIPTION_TOO_LONG;
     }
     /*need to tack onto beginning, so move string over*/
     from_p = tr_end;
     to_p = tr_end + wlen + 1;
     tr_end = to_p;
     while (from_p >= transcription) *(to_p--) = *(from_p--);

     /* add a space*/
     *to_p = ' ';

     /*add the new word*/
     to_p = transcription;
     end = to_p + wlen;

     while (to_p < end) *(to_p++) = *(w++);

     if (wtoken_index == wtoken->backtrace)
     {
       *transcription = 0;
 #if BUILD&BUILD_DEBUG
       printf("Error: result is loopy, rejecting\n");
 #endif
       return ERROR_RESULT_IS_LOOPY;
     }
     wtoken_index = wtoken->backtrace;
   }
   return 0;
 }

 void print_word_token(srec* rec, wtokenID wtoken_index, char* msg)
 {
   bigcostdata cost, cost_for_word;
   char *p = "NULL";
   word_token* wtoken = &rec->word_token_array[wtoken_index];

   PLogMessage ( msg );
   if (wtoken->word < rec->context->olabels->num_words)
     p = rec->context->olabels->words[wtoken->word];
   ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
   cost = wtoken->cost + rec->accumulated_cost_offset[wtoken->end_time];
   if (wtoken->backtrace != MAXwtokenID)
   {
     word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
     cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[next_wtoken->end_time];
   }
   else
   {
     cost_for_word = cost;
   }
   printf("wtoken %d W%i %s cost=%d/%d/%d time=%d/%d node=%d", wtoken_index,
          wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_node);
   pfflush(PSTDOUT);
   print_word_token_backtrace(rec, wtoken->backtrace, "\n");
 }


 void print_word_token_list(srec* rec, wtokenID wtoken_index, char* msg)
 {
 #ifndef NDEBUG
   int sanity_counter = 0;
 #endif
   PLogMessage ( msg );
   while (wtoken_index != MAXwtokenID)
   {
     word_token* wtoken = &rec->word_token_array[wtoken_index];
     print_word_token(rec, wtoken_index, "");
     ASSERT(sanity_counter++ < 200);
     ASSERT(wtoken_index != wtoken->next_token_index);
     wtoken_index = wtoken->next_token_index;
   }
 }

 #define MAX_LEN 256
 void srec_get_result(srec *rec)
 {
   srec_word_lattice *wl;
   frameID i;
   wtokenID token_index;
   word_token *wtoken;

 #if PRINT_SEARCH_DETAILS
   printf("in srec_get_result\n");
 #endif

   wl = rec->word_lattice;
 #if PRINT_WORD_LATTICE
   for (i = 0; i <= rec->current_search_frame; i++)
   {
 #else
   for (i = rec->current_search_frame; i <= rec->current_search_frame; i++)
   {
 #endif

     /* put the best choice at the top */
     sort_word_lattice_at_frame(rec, i);
     token_index = wl->words_for_frame[i];

 #if PRINT_WORD_LATTICE
     printf("----- List of words for frame %d\n", i);
     print_word_token_list(rec, token_index, "");
 #endif

     if (i == rec->current_search_frame && token_index != MAXwtokenID)
     {
       wtoken =  &(rec->word_token_array[token_index]);
       print_word_token(rec, token_index, "Final Top Choice: ");
     }
   }
 }

 static srec* WHICH_RECOG(multi_srec* rec)
 {
   srec* return_rec = NULL;
   costdata current_best_cost = MAXcostdata;
   int i = 0;
 #if DO_ALLOW_MULTIPLE_MODELS
   for (i = 0; i < rec->num_activated_recs; i++)
   {
 #endif
     if (current_best_cost > rec->rec[i].current_best_cost)
     {
       current_best_cost = rec->rec[i].current_best_cost;
       return_rec = &rec->rec[i];
     }
 #if DO_ALLOW_MULTIPLE_MODELS
   }
 #endif
   return return_rec;
 }

 ESR_ReturnCode srec_get_top_choice_wordIDs(multi_srec* recm, wordID* wordIDs, size_t* len)
 {
   srec* rec = WHICH_RECOG(recm);
   frameID end_frame;
   srec_word_lattice* wl;
   wtokenID token_index;
   ESR_ReturnCode rc;

   if (!rec)
   {
     rc = ESR_INVALID_STATE;
     PLogError(ESR_rc2str(rc));
     goto CLEANUP;
   }

   end_frame = rec->current_search_frame;
   wl = rec->word_lattice;
   token_index = wl->words_for_frame[end_frame];

   if (token_index == MAXwtokenID)
   {
     PLogError(L("ESR_INVALID_STATE"));
     return ESR_INVALID_STATE;
   }
 #if PRINT_WORD_LATTICE
   print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
 #endif
   /* the head of the list on the last frame is always best */
   CHKLOG(rc, sprint_word_token_backtraceByWordID(wordIDs, len, rec, token_index));

   return ESR_SUCCESS;
 CLEANUP:
   return rc;
 }

 int srec_get_top_choice_transcription(multi_srec* recm, char *transcription, int len, int whether_strip_slot_markers)
 {
   int rc;
   srec* rec = WHICH_RECOG(recm);
   frameID end_frame;
   srec_word_lattice* wl;
   wtokenID token_index;

   if (!rec)
   {
     *transcription = 0;
     return 1;
   }
   if( recm->eos_status == VALID_SPEECH_NOT_YET_DETECTED)
   {
       *transcription = 0;
       return 1;
   }

   end_frame = rec->current_search_frame;
   wl = rec->word_lattice;
   sort_word_lattice_at_frame(rec, end_frame);
   token_index = wl->words_for_frame[end_frame];

   if (token_index != MAXwtokenID)
   {
 #if PRINT_WORD_LATTICE
     print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
 #endif
     /* the head of the list on the last frame is always best */
     rc = sprint_word_token_backtrace(transcription, len, rec, token_index);
   }
   else
   {
     strcpy(transcription, "");
     rc = 1;
   }
   if (whether_strip_slot_markers)
     srec_result_strip_slot_markers(transcription);
   return rc;
 }

 int srec_get_top_choice_score(multi_srec* recm, bigcostdata *cost, int do_incsil)
 {
   srec* rec = WHICH_RECOG(recm);
   frameID end_frame;
   srec_word_lattice* wl;
   wtokenID token_index;
   word_token* wtoken;

   if (!rec)
   {
     *cost = MAXcostdata;
     return 1;
   }

   end_frame = rec->current_search_frame;
   wl = rec->word_lattice;
   token_index = wl->words_for_frame[end_frame];

   if (end_frame < MAXframeID && token_index != MAXwtokenID)
   {
     wtoken = &rec->word_token_array[token_index];
     *cost = wtoken->cost;
     *cost += rec->accumulated_cost_offset[ wtoken->end_time];
     return 0;
   }
   else
   {
     *cost = MAXcostdata;
     return 1;
   }
 }

 int srec_print_results(multi_srec *recm, int max_choices)
 {
   char transcription[MAX_LEN];
   bigcostdata cost;

   srec_get_top_choice_transcription(recm, transcription, MAX_LEN, 1);
   srec_get_top_choice_score(recm, &cost, SCOREMODE_INCLUDE_SILENCE);

   log_report("R: %8ld %8ld %s\t%.1f\n", 0, 0, transcription, cost);

   return 0;
 }

 /* sort the word lattice at this frame, todo: remove rec argument */

 #define MAX_WTOKENS_AT_FRAME 64 /* +1 for the MAXwtokenID! */
 void sort_word_lattice_at_frame(srec* rec, frameID frame)
 {
   srec_word_lattice* wl = rec->word_lattice;
   word_token *wtoken, *wtoken2;
   wtokenID pwi[MAX_WTOKENS_AT_FRAME], token_index;
   word_token* word_token_array = rec->word_token_array;
   int i, j, npwi = 0;
   ASSERT(rec->word_priority_q->max_in_q < MAX_WTOKENS_AT_FRAME);

   ASSERT(frame < wl->max_frames);
   if (wl->whether_sorted[frame])
     return;

   wl->whether_sorted[frame] = 1;

   /* make an array of word token index addresses */
   for (pwi[npwi] = wl->words_for_frame[frame]; pwi[npwi] != MAXwtokenID;)
   {
     ASSERT(npwi < MAX_WTOKENS_AT_FRAME);
     token_index = pwi[npwi];
     wtoken = &word_token_array[ token_index];
     npwi++;
     pwi[npwi] = wtoken->next_token_index;
   }

   /* sort the word token indices, bubble sort is fine */
   for (i = 0; i < npwi; i++)
   {
     for (j = 0; j < (npwi - 1); j++)
     {
       wtoken = &word_token_array[ pwi[j]];
       wtoken2 = &word_token_array[ pwi[j+1]];
       if (wtoken->cost > wtoken2->cost)
       {
         token_index = pwi[j];
         pwi[j] = pwi[j+1];
         pwi[j+1] = token_index;
       }
     }
   }

   /*print_word_token_list(rec,wl->words_for_frame[frame],"## BEFORE SORT\n");*/
   wl->words_for_frame[ frame] = pwi[0];
   for (i = 0; i < npwi; i++)
   {
     wtoken = &word_token_array[ pwi[i]];
     wtoken->next_token_index = pwi[i+1]; /* last points nowhwere */
   }
   /*print_word_token_list(rec,wl->words_for_frame[frame],"## AFTER  SORT\n");*/
 }


 /* this frees a word token, it may still have references in the lattice though */

 void free_word_token(srec *rec, wtokenID old_token_index)
 {
   word_token* wtoken;
   wtoken = &rec->word_token_array[old_token_index];
   wtoken->next_token_index = rec->word_token_freelist;
   rec->word_token_freelist = old_token_index;
   TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
 }

 /* this frees some earlier allocated word_tokens from previous frames,
    this makes sure we can always have some to spare for future frames */

 void free_word_token_from_lattice(srec *rec, wtokenID old_token_index)
 {
   word_token* wtoken;
   wtokenID *rtoken_index;
   word_token* rtoken;

 #define CHECK_FREE_WORD_TOKEN 1
 #if CHECK_FREE_WORD_TOKEN
   stokenID stoken_index, i;
   ftokenID ftoken_index;
   fsmarc_token* stoken;
   fsmnode_token* ftoken;
   int nerrs = 0;

   stoken_index = rec->active_fsmarc_tokens;
   for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
   {
     stoken = &rec->fsmarc_token_array[stoken_index];
     for (i = 0; i < stoken->num_hmm_states; i++)
     {
       if (stoken->word_backtrace[i] == old_token_index)
       {
         printf("Error: can't delete wtoken %d cuz stoken%d.%d cost %d\n",
                old_token_index, stoken_index, i, stoken->cost[i]);
         nerrs++;
       }
     }
   }

   ftoken_index = rec->active_fsmnode_tokens;
   for (; ftoken_index != MAXftokenID; ftoken_index = ftoken->next_token_index)
   {
     ftoken = &rec->fsmnode_token_array[ftoken_index];
     if (ftoken->word_backtrace == old_token_index)
     {
       printf("Error: can't delete wtoken %d cuz ftoken %d cost %d\n",
              old_token_index, ftoken_index, ftoken->cost);
       nerrs++;
     }
   }

   /*  wtoken = &rec->word_token_array[old_token_index];
       for(ifr=wtoken->end_time+1; ifr>=0; ifr--) {
       wtoken_index = rec->word_lattice->words_for_frame[ifr];
       for( ; wtoken_index!= MAXwtokenID; wtoken_index=wtoken->next_token_index) {
       wtoken = &rec->word_token_array[wtoken_index];
       if(wtoken->backtrace == old_token_index) {
       printf("Error: can't delete wtoken %d cuz wtoken %d at frame %d backtraces cost %d\n",
       old_token_index, wtoken_index, ifr, wtoken->cost);
       nerrs++;
       }
       }
       }
   */
   ASSERT(nerrs == 0);
   if (nerrs > 0)
   {
     print_word_token(rec, old_token_index, "Error: while deleting ");
     return;
   }
 #endif

   wtoken = &rec->word_token_array[old_token_index];
   /* remove from word lattice */
   rtoken_index = &rec->word_lattice->words_for_frame[ wtoken->end_time+1];
   for (; (*rtoken_index) != MAXwtokenID; rtoken_index = &rtoken->next_token_index)
   {
     rtoken = &rec->word_token_array[(*rtoken_index)];
     if (*rtoken_index == old_token_index)
     {
       *rtoken_index = wtoken->next_token_index;
       break;
     }
   }
   wtoken->next_token_index = rec->word_token_freelist;
   rec->word_token_freelist = old_token_index;
   TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
 }

 int reprune_word_tokens(srec* rec, costdata current_best_cost)
 {
   int i, keep_astar_prune;
   arc_token* keep_arc_token_list;

   stokenID stoken_index;
   fsmarc_token* stoken;
   wtokenID btindex;
   word_token* bttoken;
   wtokenID wtoken_index;
   word_token* wtoken;
   altword_token* awtoken;

   /* remember things about the astar before changing it for local purposes */
   keep_astar_prune = rec->astar_stack->prune_delta;
   /* rec->astar_stack->prune_delta = 400; */
   /* ignore the grammar constraints for this quick astar backward pass */
   keep_arc_token_list = rec->context->arc_token_list;
   rec->context->arc_token_list = 0;

   /* we will flag all wtokens to be kept */

   /* initialize the flags to keep all */
   memset(rec->word_token_array_flags, 0, sizeof(rec->word_token_array_flags[0])*rec->word_token_array_size);

   /* flag all those tokens not active, ie already free */
   wtoken_index = rec->word_token_freelist;
   for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
   {
     wtoken = &rec->word_token_array[wtoken_index];
     rec->word_token_array_flags[wtoken_index]--;  /* already deleted */
   }

   /* flag along the best active state paths */
   stoken_index = rec->active_fsmarc_tokens;
   for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
   {
     stoken = &rec->fsmarc_token_array[ stoken_index];
     for (i = 0; i < stoken->num_hmm_states; i++)
     {
       btindex = stoken->word_backtrace[i];
       for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
       {
         bttoken = &rec->word_token_array[ btindex];
         ASSERT(rec->word_token_array_flags[ btindex] >= 0);
         rec->word_token_array_flags[ btindex] = 1;
       }
       for (awtoken = stoken->aword_backtrace[i]; awtoken;
            awtoken = awtoken->next_token)
       {
         btindex = awtoken->word_backtrace;
         for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
         {
           bttoken = &rec->word_token_array[ btindex];
           rec->word_token_array_flags[ btindex] = 1;
         }
       }
     }
   }

   /* run the astar and flag a little more */
   astar_stack_prepare_from_active_search(rec->astar_stack, 100, rec);
   astar_stack_do_backwards_search(rec, 100);
   astar_stack_flag_word_tokens_used(rec->astar_stack, rec);
   astar_stack_clear(rec->astar_stack);

   /* kill_word_tokens */
   for (i = 0; i < rec->word_token_array_size; i++)
   {
     if (rec->word_token_array_flags[i] == 0) /* < 0 are already free! */
       free_word_token_from_lattice(rec, (frameID)i);
   }

   /* set this back to a regular astar from remembered values */
   rec->context->arc_token_list = keep_arc_token_list;
   rec->astar_stack->prune_delta = (costdata) keep_astar_prune;

   SREC_STATS_INC_WTOKEN_REPRUNES(1);
   return 0;
 }
	/---------------------------------------------------------------------------
	* word_lattice.c *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/

	#ifndef _RTT
	#include "pstdio.h"
	#endif
	#include <stdlib.h>
	#include <string.h>
	#include <math.h>
	#include "passert.h"
	#include "portable.h"


	#include "portable.h"

	#include "hmm_desc.h"
	#include "utteranc.h"
	#include "hmmlib.h"

	#include "srec_sizes.h"
	#include "search_network.h"
	#include "srec.h"
	#include "word_lattice.h"
	#include "astar.h"
	#include "srec_stats.h"
	#include "srec_results.h"

	#define PRINT_WORD_LATTICE 0
	#define PRINT_SEARCH_DETAILS 0

	#define TRUE_KILL_WTOKEN(WT) { WT.cost = MAXcostdata; \
	WT.word = MAXwordID; \
	WT.end_time = MAXframeID; \
	WT.end_node = MAXnodeID; \
	WT.backtrace = MAXwtokenID; \
	}

	srec_word_lattice *allocate_word_lattice(frameID max_frames)
	{
	srec_word_lattice *wl;

	wl = (srec_word_lattice*) CALLOC_CLR(1, sizeof(srec_word_lattice), "search.word_lattice.base");
	wl->max_frames = max_frames;
	wl->words_for_frame = (wtokenID*) CALLOC_CLR(max_frames, sizeof(wtokenID), "search.word_lattice.words");

	wl->whether_sorted = (asr_int16_t*)CALLOC_CLR(max_frames, sizeof(asr_int16_t), "search.word_lattice.sflag");

	return wl;
	}

	void destroy_word_lattice(srec_word_lattice* wl)
	{
	FREE(wl->words_for_frame);
	FREE(wl->whether_sorted);
	FREE(wl);
	}

	void initialize_word_lattice(srec_word_lattice* wl)
	{
	frameID ifr;
	for (ifr = 0; ifr < wl->max_frames; ifr++)
	{
	wl->words_for_frame[ifr] = MAXwtokenID;
	wl->whether_sorted[ifr] = 0;
	}
	}

	costdata lattice_best_cost_to_frame(srec_word_lattice wl, word_token word_token_array, frameID ifr)
	{
	int sanity_counter = 0;
	costdata best_cost = MAXcostdata;
	wtokenID wtoken_index = wl->words_for_frame[ ifr];
	while (wtoken_index != MAXwtokenID)
	{
	word_token* wtoken = &word_token_array[wtoken_index];
	if (sanity_counter++ > 200) return MAXcostdata;
	wtoken_index = wtoken->next_token_index;
	if (best_cost > wtoken->cost)
	best_cost = wtoken->cost;
	}
	return best_cost;
	}

	void lattice_add_word_tokens(srec_word_lattice *wl, frameID frame,
	wtokenID word_token_list_head)
	{
	if (frame >= wl->max_frames)
	{
	log_report("lattice_add_word_tokens: max_frame not big enough\n");
	ASSERT(0);
	}
	wl->words_for_frame[frame] = word_token_list_head;
	}

	void print_word_token_backtrace(srec* rec, wtokenID wtoken_index, char* tail)
	{
	char* null = "NULL", *p;
	char iwttime[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
	bigcostdata cost;
	bigcostdata cost_for_word;
	word_token wtoken, last_wtoken;

	last_wtoken = NULL;
	while (wtoken_index != MAXwtokenID)
	{
	wtoken = &rec->word_token_array[wtoken_index];
	if (wtoken->word < rec->context->olabels->num_words)
	p = rec->context->olabels->words[wtoken->word];
	else
	p = null;
	ASSERT(!last_wtoken \|\| last_wtoken->end_time > wtoken->end_time);
	ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
	cost = wtoken->cost + rec->accumulated_cost_offset[ wtoken->end_time];

	if (wtoken->backtrace != MAXwtokenID)
	{
	word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
	cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[ next_wtoken->end_time];
	}
	else
	{
	cost_for_word = cost;
	}
	sprintf(iwttime, "/%d", WORD_TOKEN_GET_WD_ETIME(wtoken) );
	PLogMessage (" (%d W%d %s cost=%d/%d/%d time=%d%s node=%d)", wtoken_index, wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, iwttime, wtoken->end_node);
	fflush(stdout);
	ASSERT(wtoken->backtrace != wtoken_index);
	wtoken_index = wtoken->backtrace;
	last_wtoken = wtoken;
	}
	PLogMessage (tail);
	}

	int sprint_bword_token_backtrace(char* buf, int buflen, srec* rec, wtokenID wtoken_index)
	{
	char* null = "NULL", *p;
	char *pbuf = buf;
	*pbuf = 0;

	while (wtoken_index != MAXwtokenID)
	{
	word_token* wtoken = &rec->word_token_array[wtoken_index];
	p = null;
	if (wtoken->word < rec->context->olabels->num_words)
	p = rec->context->olabels->words[wtoken->word];
	ASSERT(pbuf + strlen(p) + 1 < buf + buflen);
	pbuf += sprintf(pbuf, "%s ", p);
	ASSERT(wtoken->backtrace != wtoken_index);

	wtoken_index = wtoken->backtrace;
	}
	if (pbuf > buf && (pbuf - 1) == ' ') (pbuf - 1) = 0;
	return 0;
	}

	#define ERROR_TRANSCRIPTION_TOO_LONG -1

	ESR_ReturnCode sprint_word_token_backtraceByWordID(wordID* wordIDs, size_t* len, srec* rec, wtokenID wtoken_index)
	{
	size_t i, currentLen = 0;
	ESR_ReturnCode rc;
	word_token* wtoken;

	#if PRINT_SEARCH_DETAILS
	printf("in get backtrace wtoken %d\n", wtoken_index);
	#endif

	while (wtoken_index != MAXwtokenID)
	{
	if (*len <= currentLen)
	{
	rc = ESR_BUFFER_OVERFLOW;
	PLogError(ESR_rc2str(rc));
	*len = currentLen + 1;
	goto CLEANUP;
	}
	wtoken = &rec->word_token_array[wtoken_index];
	wordIDs[currentLen] = wtoken->word;
	++currentLen;

	if (wtoken_index == wtoken->backtrace)
	{
	*len = 0;
	PLogError("Result is loopy, rejecting");
	return ESR_INVALID_STATE;
	}
	wtoken_index = wtoken->backtrace;
	}

	/* reverse the order */
	for (i = 0; i < currentLen / 2; i++)
	{
	wordID tmp = wordIDs[i];
	wordIDs[i] = wordIDs[(currentLen-1-i)];
	wordIDs[(currentLen-1-i)] = tmp;
	}
	/* strip the pau/pau2 markers */
	if (currentLen >= 1 && wordIDs[0] == rec->context->beg_silence_word)
	{
	for (i = 0; i < currentLen - 1; i++)
	wordIDs[i] = wordIDs[i+1];
	currentLen--;
	}
	if (currentLen >= 1 && wordIDs[currentLen-1] == rec->context->end_silence_word)
	currentLen--;
	wordIDs[currentLen] = MAXwordID;
	*len = currentLen;

	return ESR_SUCCESS;
	CLEANUP:
	return rc;
	}

	int sprint_word_token_backtrace(char transcription, int len, srec rec, wtokenID wtoken_index)
	{
	char *w;
	char *from_p;
	char *to_p;
	char *end;
	char *tr_end = transcription;
	int wlen;

	#define SHOW_END_TIMES 1
	#if SHOW_END_TIMES
	char buf[256/64/];
	#endif

	*transcription = 0;

	#if PRINT_SEARCH_DETAILS
	printf("in get backtrace wtoken %d\n", wtoken_index);
	#endif

	while (wtoken_index != MAXwtokenID)
	{
	word_token* wtoken = &rec->word_token_array[wtoken_index];
	#if PRINT_SEARCH_DETAILS
	printf("got token %d word %d\n", wtoken_index, wtoken->word);
	#endif

	w = "NULL";
	if (wtoken->word < rec->context->olabels->num_words)
	w = rec->context->olabels->words[wtoken->word];
	#if SHOW_END_TIMES
	/* should be defined outside because it is used outside by w */
	/* sprintf(buf,"%s@%d.%d",w, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_time); */
	if (strlen(w) + 12 > sizeof(buf))
	{
	*transcription = 0;
	return ERROR_TRANSCRIPTION_TOO_LONG;
	}
	else
	{
	sprintf(buf, "%s@%d", w, wtoken->end_time);
	w = &buf[0];
	}
	#endif
	wlen = strlen(w);
	if (wlen + tr_end - transcription + 1 >= len)
	{
	*transcription = 0;
	return ERROR_TRANSCRIPTION_TOO_LONG;
	}
	/need to tack onto beginning, so move string over/
	from_p = tr_end;
	to_p = tr_end + wlen + 1;
	tr_end = to_p;
	while (from_p >= transcription) (to_p--) = (from_p--);

	/* add a space*/
	*to_p = ' ';

	/add the new word/
	to_p = transcription;
	end = to_p + wlen;

	while (to_p < end) (to_p++) = (w++);

	if (wtoken_index == wtoken->backtrace)
	{
	*transcription = 0;
	#if BUILD&BUILD_DEBUG
	printf("Error: result is loopy, rejecting\n");
	#endif
	return ERROR_RESULT_IS_LOOPY;
	}
	wtoken_index = wtoken->backtrace;
	}
	return 0;
	}

	void print_word_token(srec* rec, wtokenID wtoken_index, char* msg)
	{
	bigcostdata cost, cost_for_word;
	char *p = "NULL";
	word_token* wtoken = &rec->word_token_array[wtoken_index];

	PLogMessage ( msg );
	if (wtoken->word < rec->context->olabels->num_words)
	p = rec->context->olabels->words[wtoken->word];
	ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
	cost = wtoken->cost + rec->accumulated_cost_offset[wtoken->end_time];
	if (wtoken->backtrace != MAXwtokenID)
	{
	word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
	cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[next_wtoken->end_time];
	}
	else
	{
	cost_for_word = cost;
	}
	printf("wtoken %d W%i %s cost=%d/%d/%d time=%d/%d node=%d", wtoken_index,
	wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_node);
	pfflush(PSTDOUT);
	print_word_token_backtrace(rec, wtoken->backtrace, "\n");
	}


	void print_word_token_list(srec* rec, wtokenID wtoken_index, char* msg)
	{
	#ifndef NDEBUG
	int sanity_counter = 0;
	#endif
	PLogMessage ( msg );
	while (wtoken_index != MAXwtokenID)
	{
	word_token* wtoken = &rec->word_token_array[wtoken_index];
	print_word_token(rec, wtoken_index, "");
	ASSERT(sanity_counter++ < 200);
	ASSERT(wtoken_index != wtoken->next_token_index);
	wtoken_index = wtoken->next_token_index;
	}
	}

	#define MAX_LEN 256
	void srec_get_result(srec *rec)
	{
	srec_word_lattice *wl;
	frameID i;
	wtokenID token_index;
	word_token *wtoken;

	#if PRINT_SEARCH_DETAILS
	printf("in srec_get_result\n");
	#endif

	wl = rec->word_lattice;
	#if PRINT_WORD_LATTICE
	for (i = 0; i <= rec->current_search_frame; i++)
	{
	#else
	for (i = rec->current_search_frame; i <= rec->current_search_frame; i++)
	{
	#endif

	/* put the best choice at the top */
	sort_word_lattice_at_frame(rec, i);
	token_index = wl->words_for_frame[i];

	#if PRINT_WORD_LATTICE
	printf("----- List of words for frame %d\n", i);
	print_word_token_list(rec, token_index, "");
	#endif

	if (i == rec->current_search_frame && token_index != MAXwtokenID)
	{
	wtoken = &(rec->word_token_array[token_index]);
	print_word_token(rec, token_index, "Final Top Choice: ");
	}
	}
	}

	static srec* WHICH_RECOG(multi_srec* rec)
	{
	srec* return_rec = NULL;
	costdata current_best_cost = MAXcostdata;
	int i = 0;
	#if DO_ALLOW_MULTIPLE_MODELS
	for (i = 0; i < rec->num_activated_recs; i++)
	{
	#endif
	if (current_best_cost > rec->rec[i].current_best_cost)
	{
	current_best_cost = rec->rec[i].current_best_cost;
	return_rec = &rec->rec[i];
	}
	#if DO_ALLOW_MULTIPLE_MODELS
	}
	#endif
	return return_rec;
	}

	ESR_ReturnCode srec_get_top_choice_wordIDs(multi_srec* recm, wordID* wordIDs, size_t* len)
	{
	srec* rec = WHICH_RECOG(recm);
	frameID end_frame;
	srec_word_lattice* wl;
	wtokenID token_index;
	ESR_ReturnCode rc;

	if (!rec)
	{
	rc = ESR_INVALID_STATE;
	PLogError(ESR_rc2str(rc));
	goto CLEANUP;
	}

	end_frame = rec->current_search_frame;
	wl = rec->word_lattice;
	token_index = wl->words_for_frame[end_frame];

	if (token_index == MAXwtokenID)
	{
	PLogError(L("ESR_INVALID_STATE"));
	return ESR_INVALID_STATE;
	}
	#if PRINT_WORD_LATTICE
	print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
	#endif
	/* the head of the list on the last frame is always best */
	CHKLOG(rc, sprint_word_token_backtraceByWordID(wordIDs, len, rec, token_index));

	return ESR_SUCCESS;
	CLEANUP:
	return rc;
	}

	int srec_get_top_choice_transcription(multi_srec* recm, char *transcription, int len, int whether_strip_slot_markers)
	{
	int rc;
	srec* rec = WHICH_RECOG(recm);
	frameID end_frame;
	srec_word_lattice* wl;
	wtokenID token_index;

	if (!rec)
	{
	*transcription = 0;
	return 1;
	}
	if( recm->eos_status == VALID_SPEECH_NOT_YET_DETECTED)
	{
	*transcription = 0;
	return 1;
	}

	end_frame = rec->current_search_frame;
	wl = rec->word_lattice;
	sort_word_lattice_at_frame(rec, end_frame);
	token_index = wl->words_for_frame[end_frame];

	if (token_index != MAXwtokenID)
	{
	#if PRINT_WORD_LATTICE
	print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
	#endif
	/* the head of the list on the last frame is always best */
	rc = sprint_word_token_backtrace(transcription, len, rec, token_index);
	}
	else
	{
	strcpy(transcription, "");
	rc = 1;
	}
	if (whether_strip_slot_markers)
	srec_result_strip_slot_markers(transcription);
	return rc;
	}

	int srec_get_top_choice_score(multi_srec* recm, bigcostdata *cost, int do_incsil)
	{
	srec* rec = WHICH_RECOG(recm);
	frameID end_frame;
	srec_word_lattice* wl;
	wtokenID token_index;
	word_token* wtoken;

	if (!rec)
	{
	*cost = MAXcostdata;
	return 1;
	}

	end_frame = rec->current_search_frame;
	wl = rec->word_lattice;
	token_index = wl->words_for_frame[end_frame];

	if (end_frame < MAXframeID && token_index != MAXwtokenID)
	{
	wtoken = &rec->word_token_array[token_index];
	*cost = wtoken->cost;
	*cost += rec->accumulated_cost_offset[ wtoken->end_time];
	return 0;
	}
	else
	{
	*cost = MAXcostdata;
	return 1;
	}
	}

	int srec_print_results(multi_srec *recm, int max_choices)
	{
	char transcription[MAX_LEN];
	bigcostdata cost;

	srec_get_top_choice_transcription(recm, transcription, MAX_LEN, 1);
	srec_get_top_choice_score(recm, &cost, SCOREMODE_INCLUDE_SILENCE);

	log_report("R: %8ld %8ld %s\t%.1f\n", 0, 0, transcription, cost);

	return 0;
	}

	/* sort the word lattice at this frame, todo: remove rec argument */

	#define MAX_WTOKENS_AT_FRAME 64 /* +1 for the MAXwtokenID! */
	void sort_word_lattice_at_frame(srec* rec, frameID frame)
	{
	srec_word_lattice* wl = rec->word_lattice;
	word_token wtoken, wtoken2;
	wtokenID pwi[MAX_WTOKENS_AT_FRAME], token_index;
	word_token* word_token_array = rec->word_token_array;
	int i, j, npwi = 0;
	ASSERT(rec->word_priority_q->max_in_q < MAX_WTOKENS_AT_FRAME);

	ASSERT(frame < wl->max_frames);
	if (wl->whether_sorted[frame])
	return;

	wl->whether_sorted[frame] = 1;

	/* make an array of word token index addresses */
	for (pwi[npwi] = wl->words_for_frame[frame]; pwi[npwi] != MAXwtokenID;)
	{
	ASSERT(npwi < MAX_WTOKENS_AT_FRAME);
	token_index = pwi[npwi];
	wtoken = &word_token_array[ token_index];
	npwi++;
	pwi[npwi] = wtoken->next_token_index;
	}

	/* sort the word token indices, bubble sort is fine */
	for (i = 0; i < npwi; i++)
	{
	for (j = 0; j < (npwi - 1); j++)
	{
	wtoken = &word_token_array[ pwi[j]];
	wtoken2 = &word_token_array[ pwi[j+1]];
	if (wtoken->cost > wtoken2->cost)
	{
	token_index = pwi[j];
	pwi[j] = pwi[j+1];
	pwi[j+1] = token_index;
	}
	}
	}

	/print_word_token_list(rec,wl->words_for_frame[frame],"## BEFORE SORT\n");/
	wl->words_for_frame[ frame] = pwi[0];
	for (i = 0; i < npwi; i++)
	{
	wtoken = &word_token_array[ pwi[i]];
	wtoken->next_token_index = pwi[i+1]; /* last points nowhwere */
	}
	/print_word_token_list(rec,wl->words_for_frame[frame],"## AFTER SORT\n");/
	}


	/* this frees a word token, it may still have references in the lattice though */

	void free_word_token(srec *rec, wtokenID old_token_index)
	{
	word_token* wtoken;
	wtoken = &rec->word_token_array[old_token_index];
	wtoken->next_token_index = rec->word_token_freelist;
	rec->word_token_freelist = old_token_index;
	TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
	}

	/* this frees some earlier allocated word_tokens from previous frames,
	this makes sure we can always have some to spare for future frames */

	void free_word_token_from_lattice(srec *rec, wtokenID old_token_index)
	{
	word_token* wtoken;
	wtokenID *rtoken_index;
	word_token* rtoken;

	#define CHECK_FREE_WORD_TOKEN 1
	#if CHECK_FREE_WORD_TOKEN
	stokenID stoken_index, i;
	ftokenID ftoken_index;
	fsmarc_token* stoken;
	fsmnode_token* ftoken;
	int nerrs = 0;

	stoken_index = rec->active_fsmarc_tokens;
	for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
	{
	stoken = &rec->fsmarc_token_array[stoken_index];
	for (i = 0; i < stoken->num_hmm_states; i++)
	{
	if (stoken->word_backtrace[i] == old_token_index)
	{
	printf("Error: can't delete wtoken %d cuz stoken%d.%d cost %d\n",
	old_token_index, stoken_index, i, stoken->cost[i]);
	nerrs++;
	}
	}
	}

	ftoken_index = rec->active_fsmnode_tokens;
	for (; ftoken_index != MAXftokenID; ftoken_index = ftoken->next_token_index)
	{
	ftoken = &rec->fsmnode_token_array[ftoken_index];
	if (ftoken->word_backtrace == old_token_index)
	{
	printf("Error: can't delete wtoken %d cuz ftoken %d cost %d\n",
	old_token_index, ftoken_index, ftoken->cost);
	nerrs++;
	}
	}

	/* wtoken = &rec->word_token_array[old_token_index];
	for(ifr=wtoken->end_time+1; ifr>=0; ifr--) {
	wtoken_index = rec->word_lattice->words_for_frame[ifr];
	for( ; wtoken_index!= MAXwtokenID; wtoken_index=wtoken->next_token_index) {
	wtoken = &rec->word_token_array[wtoken_index];
	if(wtoken->backtrace == old_token_index) {
	printf("Error: can't delete wtoken %d cuz wtoken %d at frame %d backtraces cost %d\n",
	old_token_index, wtoken_index, ifr, wtoken->cost);
	nerrs++;
	}
	}
	}
	*/
	ASSERT(nerrs == 0);
	if (nerrs > 0)
	{
	print_word_token(rec, old_token_index, "Error: while deleting ");
	return;
	}
	#endif

	wtoken = &rec->word_token_array[old_token_index];
	/* remove from word lattice */
	rtoken_index = &rec->word_lattice->words_for_frame[ wtoken->end_time+1];
	for (; (*rtoken_index) != MAXwtokenID; rtoken_index = &rtoken->next_token_index)
	{
	rtoken = &rec->word_token_array[(*rtoken_index)];
	if (*rtoken_index == old_token_index)
	{
	*rtoken_index = wtoken->next_token_index;
	break;
	}
	}
	wtoken->next_token_index = rec->word_token_freelist;
	rec->word_token_freelist = old_token_index;
	TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
	}

	int reprune_word_tokens(srec* rec, costdata current_best_cost)
	{
	int i, keep_astar_prune;
	arc_token* keep_arc_token_list;

	stokenID stoken_index;
	fsmarc_token* stoken;
	wtokenID btindex;
	word_token* bttoken;
	wtokenID wtoken_index;
	word_token* wtoken;
	altword_token* awtoken;

	/* remember things about the astar before changing it for local purposes */
	keep_astar_prune = rec->astar_stack->prune_delta;
	/* rec->astar_stack->prune_delta = 400; */
	/* ignore the grammar constraints for this quick astar backward pass */
	keep_arc_token_list = rec->context->arc_token_list;
	rec->context->arc_token_list = 0;

	/* we will flag all wtokens to be kept */

	/* initialize the flags to keep all */
	memset(rec->word_token_array_flags, 0, sizeof(rec->word_token_array_flags[0])*rec->word_token_array_size);

	/* flag all those tokens not active, ie already free */
	wtoken_index = rec->word_token_freelist;
	for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
	{
	wtoken = &rec->word_token_array[wtoken_index];
	rec->word_token_array_flags[wtoken_index]--; /* already deleted */
	}

	/* flag along the best active state paths */
	stoken_index = rec->active_fsmarc_tokens;
	for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
	{
	stoken = &rec->fsmarc_token_array[ stoken_index];
	for (i = 0; i < stoken->num_hmm_states; i++)
	{
	btindex = stoken->word_backtrace[i];
	for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
	{
	bttoken = &rec->word_token_array[ btindex];
	ASSERT(rec->word_token_array_flags[ btindex] >= 0);
	rec->word_token_array_flags[ btindex] = 1;
	}
	for (awtoken = stoken->aword_backtrace[i]; awtoken;
	awtoken = awtoken->next_token)
	{
	btindex = awtoken->word_backtrace;
	for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
	{
	bttoken = &rec->word_token_array[ btindex];
	rec->word_token_array_flags[ btindex] = 1;
	}
	}
	}
	}

	/* run the astar and flag a little more */
	astar_stack_prepare_from_active_search(rec->astar_stack, 100, rec);
	astar_stack_do_backwards_search(rec, 100);
	astar_stack_flag_word_tokens_used(rec->astar_stack, rec);
	astar_stack_clear(rec->astar_stack);

	/* kill_word_tokens */
	for (i = 0; i < rec->word_token_array_size; i++)
	{
	if (rec->word_token_array_flags[i] == 0) /* < 0 are already free! */
	free_word_token_from_lattice(rec, (frameID)i);
	}

	/* set this back to a regular astar from remembered values */
	rec->context->arc_token_list = keep_arc_token_list;
	rec->astar_stack->prune_delta = (costdata) keep_astar_prune;

	SREC_STATS_INC_WTOKEN_REPRUNES(1);
	return 0;
	}