wordrec/bestfirst.cpp - platform/external/tesseract - Git at Google

 /* -*-C-*-
  ********************************************************************************
  *
  * File:        bestfirst.c  (Formerly bestfirst.c)
  * Description:  Best first search functions
  * Author:       Mark Seaman, OCR Technology
  * Created:      Mon May 14 11:23:29 1990
  * Modified:     Tue Jul 30 16:08:47 1991 (Mark Seaman) marks@hpgrlt
  * Language:     C
  * Package:      N/A
  * Status:       Experimental (Do Not Distribute)
  *
  * (c) Copyright 1990, Hewlett-Packard Company.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  ***************************************************************************/

 /*----------------------------------------------------------------------
           I n c l u d e s
 ---------------------------------------------------------------------*/
 #include <assert.h>

 #include "bestfirst.h"
 #include "baseline.h"
 #include "bitvec.h"
 #include "callback.h"
 #include "dict.h"
 #include "freelist.h"
 #include "globals.h"
 #include "heuristic.h"
 #include "metrics.h"
 #include "permute.h"
 #include "pieces.h"
 #include "plotseg.h"
 #include "ratngs.h"
 #include "states.h"
 #include "stopper.h"
 #include "structures.h"
 #include "tordvars.h"
 #include "unicharset.h"
 #include "wordclass.h"
 #include "wordrec.h"

 void call_caller();

 /*----------------------------------------------------------------------
           V a r i a b l e s
 ---------------------------------------------------------------------*/
 int num_joints;                  /* Number of chunks - 1 */
 int num_pushed = 0;
 int num_popped = 0;

 INT_VAR(wordrec_num_seg_states, 30, "Segmentation states");

 double_VAR(wordrec_worst_state, 1, "Worst segmentation state");

 /**/
 /*----------------------------------------------------------------------
           F u n c t i o n s
 ----------------------------------------------------------------------*/

 /**********************************************************************
  * best_first_search
  *
  * Find the best segmentation by doing a best first search of the
  * solution space.
  **********************************************************************/
 namespace tesseract {
 void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record,
                                 WERD_CHOICE *best_choice,
                                 WERD_CHOICE *raw_choice,
                                 STATE *state,
                                 DANGERR *fixpt,
                                 STATE *best_state) {
   SEARCH_RECORD *the_search;
   inT16 keep_going;
   STATE guided_state;   // not used

   num_joints = chunks_record->ratings->dimension() - 1;
   the_search = new_search (chunks_record, num_joints,
     best_choice, raw_choice, state);

   // The default state is initialized as the best choice.  In order to apply
   // segmentation adjustment, or any other contextual processing in permute,
   // we give the best choice a poor rating to force the processed raw choice
   // to be promoted to best choice.
   the_search->best_choice->set_rating(100000.0);
   evaluate_state(chunks_record, the_search, fixpt);
   if (permute_debug) {
     tprintf("\n\n\n =========== BestFirstSearch ==============\n");
     best_choice->print("**Initial BestChoice**");
   }

 #ifndef GRAPHICS_DISABLED
   save_best_state(chunks_record);
 #endif
   start_recording();
   FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record, the_search);
   if (worst_priority < wordrec_worst_state)
     worst_priority = wordrec_worst_state;
   if (segment_debug) {
     print_state("BestFirstSearch", best_state, num_joints);
   }

   guided_state = *state;
   do {
                                  /* Look for answer */
     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {

       if (tord_blob_skip) {
         free_state (the_search->this_state);
         break;
       }

       guided_state = *(the_search->this_state);
       keep_going = evaluate_state(chunks_record, the_search, fixpt);
       hash_add (the_search->closed_states, the_search->this_state);

       if (!keep_going ||
           (the_search->num_states > wordrec_num_seg_states) ||
           (tord_blob_skip)) {
         if (segment_debug)
           tprintf("Breaking best_first_search on keep_going %s numstates %d\n",
                   ((keep_going) ? "T" :"F"), the_search->num_states);
         free_state (the_search->this_state);
         break;
       }

       FLOAT32 new_worst_priority = 2.0f * prioritize_state(chunks_record,
                                                            the_search);
       if (new_worst_priority < worst_priority) {
         if (segment_debug)
           tprintf("Lowering WorstPriority %f --> %f\n",
                   worst_priority, new_worst_priority);
         // Tighten the threshold for admitting new paths as better search
         // candidates are found.  After lowering this threshold, we can safely
         // popout everything that is worse than this score also.
         worst_priority = new_worst_priority;
       }
       expand_node(worst_priority, chunks_record, the_search);
     }

     free_state (the_search->this_state);
     num_popped++;
     the_search->this_state = pop_queue (the_search->open_states);
     if (segment_debug && !the_search->this_state)
       tprintf("No more states to evalaute after %d evals", num_popped);
   }
   while (the_search->this_state);

   state->part1 = the_search->best_state->part1;
   state->part2 = the_search->best_state->part2;
   stop_recording();
   if (permute_debug) {
     tprintf("\n\n\n =========== BestFirstSearch ==============\n");
             // best_choice->debug_string(getDict().getUnicharset()).string());
     best_choice->print("**Final BestChoice**");
   }
   // save the best_state stats
   delete_search(the_search);
 }
 }  // namespace tesseract


 /**********************************************************************
  * chunks_width
  *
  * Return the width of a chunk which is a composed of several blobs
  * blobs[start_blob..last_blob] inclusively,
  * whose individual widths and gaps are record in width_record in the form
  * width_record->num_char = n
  * width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
  **********************************************************************/
 int chunks_width(WIDTH_RECORD *width_record, int start_blob, int last_blob) {
   int result = 0;
   for (int x = start_blob * 2; x <= last_blob * 2; x++)
     result += width_record->widths[x];
   return (result);
 }

 /**********************************************************************
  * chunks_gap
  *
  * Return the width of between the specified chunk and next.
  **********************************************************************/
 int chunks_gap(WIDTH_RECORD *width_record, int last_chunk) {
   return (last_chunk < width_record->num_chars - 1) ?
       width_record->widths[last_chunk * 2 + 1] : 0;
 }


 /**********************************************************************
  * delete_search
  *
  * Terminate the current search and free all the memory involved.
  **********************************************************************/
 void delete_search(SEARCH_RECORD *the_search) {
   float closeness;

   closeness = (the_search->num_joints ?
     (hamming_distance(reinterpret_cast<uinT32*>(the_search->first_state),
                       reinterpret_cast<uinT32*>(the_search->best_state), 2) /
       (float) the_search->num_joints) : 0.0f);

   record_search_status (the_search->num_states,
     the_search->before_best, closeness);

   free_state (the_search->first_state);
   free_state (the_search->best_state);

   free_hash_table (the_search->closed_states);
   FreeHeapData (the_search->open_states, (void_dest) free_state);

   memfree(the_search);
 }


 /**********************************************************************
  * evaluate_chunks
  *
  * A particular word level segmentation has been chosen.  Evaluation
  * this to find the word list that corresponds to it.
  **********************************************************************/
 namespace tesseract {
 BLOB_CHOICE_LIST_VECTOR *Wordrec::evaluate_chunks(CHUNKS_RECORD *chunks_record,
                                                   SEARCH_STATE search_state) {
   BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
   BLOB_CHOICE_LIST *blob_choices;
   BLOB_CHOICE_IT blob_choice_it;
   int i;
   int x = 0;
   int y;

   /* Iterate sub-paths */
   for (i = 1; i <= search_state[0] + 1; i++) {
     if (i > search_state[0])
       y = count_blobs (chunks_record->chunks) - 1;
     else
       y = x + search_state[i];

     if (tord_blob_skip) {
       delete char_choices;
       return (NULL);
     }                            /* Process one square */
     /* Classify if needed */
     blob_choices = get_piece_rating(chunks_record->ratings,
                                     chunks_record->chunks,
                                     chunks_record->splits,
                                     x, y);

     if (blob_choices == NULL) {
       delete char_choices;
       return (NULL);
     }

     /* Add permuted ratings */
     blob_choice_it.set_to_list(blob_choices);
     last_segmentation[i - 1].certainty = blob_choice_it.data()->certainty();
     last_segmentation[i - 1].match = blob_choice_it.data()->rating();

     last_segmentation[i - 1].width =
       chunks_width (chunks_record->chunk_widths, x, y);
     last_segmentation[i - 1].gap =
       chunks_gap (chunks_record->chunk_widths, y);

     *char_choices += blob_choices;
     x = y + 1;
   }
   return (char_choices);
 }

 /**********************************************************************
  * evaluate_state
  *
  * Evaluate the segmentation that is represented by this state in the
  * best first search.  Add this state to the "states_seen" list.
  **********************************************************************/
 inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record,
                               SEARCH_RECORD *the_search,
                               DANGERR *fixpt) {
   BLOB_CHOICE_LIST_VECTOR *char_choices;
   SEARCH_STATE chunk_groups;
   float rating_limit = the_search->best_choice->rating();
   inT16 keep_going = TRUE;
   PIECES_STATE widths;

   the_search->num_states++;
   chunk_groups = bin_to_chunks(the_search->this_state,
                                the_search->num_joints);
   bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
   getDict().LogNewSegmentation(widths);

   char_choices = evaluate_chunks(chunks_record, chunk_groups);
   wordseg_rating_adjust_factor = -1.0f;
   if (char_choices != NULL && char_choices->length() > 0) {
     // Compute the segmentation cost and include the cost in word rating.
     // TODO(dsl): We should change the SEARCH_RECORD to store this cost
     // from state evaluation and avoid recomputing it here.
     prioritize_state(chunks_record, the_search);
     wordseg_rating_adjust_factor = the_search->segcost_bias;
     getDict().permute_characters(*char_choices, rating_limit,
                                  the_search->best_choice,
                                  the_search->raw_choice);
     bool replaced = false;
     if (getDict().AcceptableChoice(char_choices, the_search->best_choice,
                                    *(the_search->raw_choice), fixpt,
                                    ASSOCIATOR_CALLER, &replaced)) {
       keep_going = FALSE;
     }
   }
   wordseg_rating_adjust_factor = -1.0f;

 #ifndef GRAPHICS_DISABLED
   if (wordrec_display_segmentations) {
     display_segmentation (chunks_record->chunks, chunk_groups);
     if (wordrec_display_segmentations > 1)
       window_wait(segm_window);
   }
 #endif

   if (rating_limit != the_search->best_choice->rating()) {
     the_search->before_best = the_search->num_states;
     the_search->best_state->part1 = the_search->this_state->part1;
     the_search->best_state->part2 = the_search->this_state->part2;
     replace_char_widths(chunks_record, chunk_groups);
   }
   else if (char_choices != NULL)
     fixpt->index = -1;

   if (char_choices != NULL) delete char_choices;
   memfree(chunk_groups);

   return (keep_going);
 }


 /**********************************************************************
  * rebuild_current_state
  *
  * Evaluate the segmentation that is represented by this state in the
  * best first search.  Add this state to the "states_seen" list.
  **********************************************************************/
 BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state(
     TBLOB *blobs,
     SEAMS seam_list,
     STATE *state,
     BLOB_CHOICE_LIST_VECTOR *old_choices,
     int fx,
     bool force_rebuild,
     const WERD_CHOICE &best_choice,
     const MATRIX *ratings) {
   // Initialize search_state, num_joints, x, y.
   int num_joints = array_count(seam_list);
 #ifndef GRAPHICS_DISABLED
     if (wordrec_display_segmentations) {
       print_state("Rebuiling state", state, num_joints);
     }
 #endif
   SEARCH_STATE search_state = bin_to_chunks(state, num_joints);
   int x = 0;
   int y;
   int i;
   for (i = 1; i <= search_state[0]; i++) {
     y = x + search_state[i];
     x = y + 1;
   }
   y = count_blobs (blobs) - 1;

   // Initialize char_choices, expanded_fragment_lengths:
   // e.g. if fragment_lengths = {1 1 2 3 1},
   // expanded_fragment_lengths_str = {1 1 2 2 3 3 3 1}.
   BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
   STRING expanded_fragment_lengths_str = "";
   bool state_has_fragments = false;
   const char *fragment_lengths = NULL;

   if (best_choice.length() > 0) {
     fragment_lengths = best_choice.fragment_lengths();
   }
   if (fragment_lengths) {
     for (int i = 0; i < best_choice.length(); ++i) {
       *char_choices += NULL;
       if (fragment_lengths[i] > 1) {
         state_has_fragments = true;
       }
       for (int j = 0; j < fragment_lengths[i]; ++j) {
         expanded_fragment_lengths_str += fragment_lengths[i];
       }
     }
   } else {
     for (i = 0; i <= search_state[0]; ++i) {
       expanded_fragment_lengths_str += (char)1;
       *char_choices += NULL;
     }
   }

   // Finish early if force_rebuld is false and there are no fragments to merge.
   if (!force_rebuild && !state_has_fragments) {
     delete char_choices;
     memfree(search_state);
     return old_choices;
   }

   // Set up variables for concatenating fragments.
   const char *word_lengths_ptr = NULL;
   const char *word_ptr = NULL;
   if (state_has_fragments) {
     // Make word_lengths_ptr point to the last element in
     // best_choice->unichar_lengths().
     word_lengths_ptr = best_choice.unichar_lengths().string();
     word_lengths_ptr += (strlen(word_lengths_ptr)-1);
     // Make word_str point to the beginning of the last
     // unichar in best_choice->unichar_string().
     word_ptr = best_choice.unichar_string().string();
     word_ptr += (strlen(word_ptr)-*word_lengths_ptr);
   }
   const char *expanded_fragment_lengths =
     expanded_fragment_lengths_str.string();
   bool merging_fragment = false;
   int true_y = -1;
   char unichar[UNICHAR_LEN + 1];
   int fragment_pieces = -1;
   float rating = 0.0;
   float certainty = -MAX_FLOAT32;

   // Populate char_choices list such that it corresponds to search_state.
   //
   // If we are rebuilding a state that contains character fragments:
   // -- combine blobs that belong to character fragments
   // -- re-classify the blobs to obtain choices list for the merged blob
   // -- ensure that correct classification appears in the new choices list
   //    NOTE: a choice composed form original fragment choices will be always
   //    added to the new choices list for each character composed from
   //    fragments (even if the choice for the corresponding character appears
   //    in the re-classified choices list of for the newly merged blob).
   BLOB_CHOICE_IT temp_it;
   int char_choices_index = char_choices->length() - 1;
   for (i = search_state[0]; i >= 0; i--) {
     BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify(
         blobs, seam_list, x, y, fx, ratings, old_choices);
     // Combine character fragments.
     if (expanded_fragment_lengths[i] > 1) {
       // Start merging character fragments.
       if (!merging_fragment) {
         merging_fragment = true;
         true_y = y;
         fragment_pieces = expanded_fragment_lengths[i];
         rating = 0.0;
         certainty = -MAX_FLOAT32;
         strncpy(unichar, word_ptr, *word_lengths_ptr);
         unichar[*word_lengths_ptr] = '\0';
       }
       // Take into account the fact that we could have joined pieces
       // since we first recorded the ending point of a fragment (true_y).
       true_y -= y - x;
       // Populate fragment with updated values and look for the
       // fragment with the same values in current_choices.
       // Update rating and certainty of the character being composed.
       fragment_pieces--;
       CHAR_FRAGMENT fragment;
       fragment.set_all(unichar, fragment_pieces,
                        expanded_fragment_lengths[i]);
       temp_it.set_to_list(current_choices);
       for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
            temp_it.forward()) {
         const CHAR_FRAGMENT *current_fragment =
           getDict().getUnicharset().get_fragment(temp_it.data()->unichar_id());
         if (current_fragment && fragment.equals(current_fragment)) {
           rating += temp_it.data()->rating();
           if (temp_it.data()->certainty() > certainty) {
             certainty = temp_it.data()->certainty();
           }
           break;
         }
       }
       assert(!temp_it.cycled_list());  // make sure we found the fragment
       // Free current_choices for the fragmented character.
       delete current_choices;

       // Finish composing character from fragments.
       if (fragment_pieces == 0) {
         // Populate current_choices with the classification of
         // the blob merged from blobs of each character fragment.
         current_choices = join_blobs_and_classify(blobs, seam_list, x,
                                                   true_y, fx, ratings, NULL);
         BLOB_CHOICE *merged_choice =
           new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),
                           rating, certainty, 0, NO_PERM);

         // Insert merged_blob into current_choices, such that current_choices
         // are still sorted in non-descending order by rating.
         ASSERT_HOST(!current_choices->empty());
         temp_it.set_to_list(current_choices);
         for (temp_it.mark_cycle_pt();
              !temp_it.cycled_list() &&
              merged_choice->rating() > temp_it.data()->rating();
              temp_it.forward());
         temp_it.add_before_stay_put(merged_choice);

         // Done merging this fragmented character.
         merging_fragment = false;
       }
     }
     if (!merging_fragment) {
       // Get rid of fragments in current_choices.
       temp_it.set_to_list(current_choices);
       for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
            temp_it.forward()) {
         if (getDict().getUnicharset().get_fragment(
             temp_it.data()->unichar_id())) {
           delete temp_it.extract();
         }
       }
       char_choices->set(current_choices, char_choices_index);
       char_choices_index--;

       // Update word_ptr and word_lengths_ptr.
       if (word_lengths_ptr != NULL && word_ptr != NULL) {
         word_lengths_ptr--;
         word_ptr -= (*word_lengths_ptr);
       }
     }
     y = x - 1;
     x = y - search_state[i];
   }
   old_choices->delete_data_pointers();
   delete old_choices;
   memfree(search_state);

   return (char_choices);
 }
 }  // namespace tesseract


 /**********************************************************************
  * expand_node
  *
  * Create the states that are attached to this one.  Check to see that
  * each one has not already been visited.  If not add it to the priority
  * queue.
  **********************************************************************/
 namespace tesseract {
 void Wordrec::expand_node(FLOAT32 worst_priority,
                           CHUNKS_RECORD *chunks_record,
                           SEARCH_RECORD *the_search) {
   STATE old_state;
   int nodes_added = 0;
   int x;
   uinT32 mask = 1 << (the_search->num_joints - 1 - 32);

   old_state.part1 = the_search->this_state->part1;
   old_state.part2 = the_search->this_state->part2;

   // We need to expand the search more intelligently, or we get stuck
   // with a bad starting segmentation in a long word sequence as in CJK.
   // Expand a child node only if it is within the global bound, and no
   // worse than 2x of its parent.
   // TODO(dsl): There is some redudency here in recomputing the priority,
   // and in filtering of old_merit and worst_priority.
   the_search->this_state->part2 = old_state.part2;
   for (x = the_search->num_joints; x > 32; x--) {
     the_search->this_state->part1 = mask ^ old_state.part1;
     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
       FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
       if (segment_debug && permute_debug) {
         cprintf ("....checking state: %8.3f ", new_merit);
         print_state ("", the_search->this_state, num_joints);
       }
       if (new_merit < worst_priority) {
       push_queue (the_search->open_states, the_search->this_state,
                     worst_priority, new_merit);
         nodes_added++;
       }
     }
     mask >>= 1;
   }

   if (the_search->num_joints > 32) {
     mask = 1 << 31;
   }
   else {
     mask = 1 << (the_search->num_joints - 1);
   }

   the_search->this_state->part1 = old_state.part1;
   while (x--) {
     the_search->this_state->part2 = mask ^ old_state.part2;
     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
       FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
       if (segment_debug && permute_debug) {
         cprintf ("....checking state: %8.3f ", new_merit);
         print_state ("", the_search->this_state, num_joints);
       }
       if (new_merit < worst_priority) {
       push_queue (the_search->open_states, the_search->this_state,
                    worst_priority, new_merit);
         nodes_added++;
       }
     }
     mask >>= 1;
   }
 }
 }  // namespace tesseract


 /**********************************************************************
  * new_search
  *
  * Create and initialize a new search record.
  **********************************************************************/
 SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
                           int num_joints,
                           WERD_CHOICE *best_choice,
                           WERD_CHOICE *raw_choice,
                           STATE *state) {
   SEARCH_RECORD *this_search;

   this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));

   this_search->open_states = MakeHeap (wordrec_num_seg_states * 20);
   this_search->closed_states = new_hash_table ();

   if (state)
     this_search->this_state = new_state (state);
   else
     cprintf ("error: bad initial state in new_search\n");

   this_search->first_state = new_state (this_search->this_state);
   this_search->best_state = new_state (this_search->this_state);

   this_search->best_choice = best_choice;
   this_search->raw_choice = raw_choice;

   this_search->num_joints = num_joints;
   this_search->num_states = 0;
   this_search->before_best = 0;
   this_search->segcost_bias = 0;

   return (this_search);
 }


 /**********************************************************************
  * pop_queue
  *
  * Get this state from the priority queue.  It should be the state that
  * has the greatest urgency to be evaluated.
  **********************************************************************/
 STATE *pop_queue(HEAP *queue) {
   HEAPENTRY entry;

   if (GetTopOfHeap (queue, &entry) == OK) {
 #ifndef GRAPHICS_DISABLED
     if (wordrec_display_segmentations) {
       cprintf ("eval state: %8.3f ", entry.Key);
       print_state ("", (STATE *) entry.Data, num_joints);
     }
 #endif
     return ((STATE *) entry.Data);
   }
   else {
     return (NULL);
   }
 }


 /**********************************************************************
  * push_queue
  *
  * Add this state into the priority queue.
  **********************************************************************/
 void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
                 FLOAT32 priority) {
   HEAPENTRY entry;

   if (priority < worst_priority) {
     if (SizeOfHeap (queue) >= MaxSizeOfHeap(queue)) {
       if (segment_debug) tprintf("Heap is Full\n");
       return;
     }
     if (segment_debug)
       tprintf("\tpushing %d node  %f\n", num_pushed, priority);
     entry.Data = (char *) new_state (state);
     num_pushed++;
     entry.Key = priority;
     HeapStore(queue, &entry);
   }
 }


 /**********************************************************************
  * replace_char_widths
  *
  * Replace the value of the char_width field in the chunks_record with
  * the updated width measurements from the last_segmentation.
  **********************************************************************/
 void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) {
   WIDTH_RECORD *width_record;
   int num_blobs;
   int i;

   free_widths (chunks_record->char_widths);

   num_blobs = state[0] + 1;
   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
   width_record->num_chars = num_blobs;

   for (i = 0; i < num_blobs; i++) {

     width_record->widths[2 * i] = last_segmentation[i].width;

     if (i + 1 < num_blobs)
       width_record->widths[2 * i + 1] = last_segmentation[i].gap;
   }
   chunks_record->char_widths = width_record;
 }

 namespace tesseract {
 BLOB_CHOICE_LIST *Wordrec::join_blobs_and_classify(
     TBLOB *blobs, SEAMS seam_list,
     int x, int y, int fx, const MATRIX *ratings,
     BLOB_CHOICE_LIST_VECTOR *old_choices) {
   BLOB_CHOICE_LIST *choices = NULL;
   // First check to see if we can look up the classificaiton
   // in old_choices (if there is no need to merge blobs).
   if (x == y && old_choices != NULL && ratings == NULL) {
     choices = old_choices->get(x);
     old_choices->set(NULL, x);
     return choices;
   }
   // The ratings matrix filled in by the associator will contain the most
   // up-to-date classification info. Thus we look up the classification there
   // first, and only call classify_blob() if the classification is not found.
   if (ratings != NULL) {
     BLOB_CHOICE_LIST *choices_ptr = ratings->get(x, y);
     if (choices_ptr != NOT_CLASSIFIED) {
       choices = new BLOB_CHOICE_LIST();
       choices->deep_copy(choices_ptr, &BLOB_CHOICE::deep_copy);
     }
   }
   if (x != y) {
   join_pieces(blobs, seam_list, x, y);

   int blobindex;  // current blob
   TBLOB *p_blob;
   TBLOB *blob;
   TBLOB *next_blob;
   for (blob = blobs, blobindex = 0, p_blob = NULL;
        blobindex < x; blobindex++) {
     p_blob = blob;
     blob = blob->next;
   }
   while (blobindex < y) {
     next_blob = blob->next;
     blob->next = next_blob->next;
     oldblob(next_blob);  // junk dead blobs
     blobindex++;
   }
     if (choices == NULL) {
       choices = classify_blob(p_blob, blob, blob->next,
                               NULL, "rebuild", Orange);
     }
   }
   return choices;
 }
 }  // namespace tesseract