wordrec/heuristic.cpp - platform/external/tesseract - Git at Google

 /* -*-C-*-
  ********************************************************************************
  *
  * File:        heuristic.c  (Formerly heuristic.c)
  * Description:
  * Author:       Mark Seaman, OCR Technology
  * Created:      Fri Oct 16 14:37:00 1987
  * Modified:     Wed Jul 10 14:15:08 1991 (Mark Seaman) marks@hpgrlt
  * Language:     C
  * Package:      N/A
  * Status:       Reusable Software Component
  *
  * (c) Copyright 1987, Hewlett-Packard Company.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  *********************************************************************************/
 /*----------------------------------------------------------------------
               I n c l u d e s
 ----------------------------------------------------------------------*/
 #include <math.h>

 #include "heuristic.h"

 #include "baseline.h"
 #include "freelist.h"
 #include "metrics.h"
 #include "ratngs.h"

 /*----------------------------------------------------------------------
               M a c r o s
 ----------------------------------------------------------------------*/
 #define MAX_SQUAT       2.0      /* Width ratio */
 #define BAD_RATING   1000.0      /* No valid blob */

 /*----------------------------------------------------------------------
               F u n c t i o n s
 ----------------------------------------------------------------------*/
 /**********************************************************************
  * prioritize_state
  *
  * Create a priority for this state.  It represents the urgency of
  * checking this state.
  **********************************************************************/
 FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
                          SEARCH_RECORD *the_search,
                          STATE *old_state) {
   FLOAT32 width_pri;
   FLOAT32 match_pri;

   match_pri = rating_priority (chunks_record, the_search->this_state,
     old_state, the_search->num_joints);

   width_pri = width_priority (chunks_record, the_search->this_state,
     the_search->num_joints) * 1000.0;

   record_priorities(the_search, old_state, match_pri, width_pri);

   return (width_pri + match_pri);
 }


 /**********************************************************************
  * rating_priority
  *
  * Assign a segmentation priority based on the ratings of the blobs
  * (in that segmentation) that have been classified.  The average
  * "goodness" (i.e. rating / weight) for each blob is used to indicate
  * the segmentation priority.
  **********************************************************************/
 FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
                         STATE *state,
                         STATE *old_state,
                         int num_joints) {
   PIECES_STATE blob_chunks;
   inT16 x;
   inT16 y;
   BLOB_CHOICE_LIST *blob_choices;
   BLOB_CHOICE_IT blob_choice_it;
   inT16 first_chunk = 0;
   inT16 last_chunk;
   inT16 ratings = 0;
   inT16 weights = 0;

   bin_to_pieces(state, num_joints, blob_chunks);

   for (x = 0; blob_chunks[x]; x++) {
                                  // Iterate each blob
     last_chunk = first_chunk + blob_chunks[x] - 1;

     blob_choices = chunks_record->ratings->get(first_chunk, last_chunk);

     if (blob_choices != NOT_CLASSIFIED) {
       blob_choice_it.set_to_list(blob_choices);
       ratings += (inT16) blob_choice_it.data()->rating();
       for (y = first_chunk; y <= last_chunk; y++) {
         weights += (inT16) (chunks_record->weights[y]);
       }
     }
     first_chunk += blob_chunks[x];
   }
   if (weights <= 0)
     weights = 1;
   return ((FLOAT32) ratings / weights);
 }


 /**********************************************************************
  * state_char_widths
  *
  * Return a character width record corresponding to the character
  * width that will be generated in this segmentation state.
  **********************************************************************/
 WIDTH_RECORD *state_char_widths(WIDTH_RECORD *chunk_widths,
                                 STATE *state,
                                 int num_joints,
                                 SEARCH_STATE *search_state) {
   WIDTH_RECORD *width_record;
   int num_blobs;
   int x;
   int y;
   int i;
   SEARCH_STATE new_chunks;

   new_chunks = bin_to_chunks (state, num_joints);

   num_blobs = new_chunks[0] + 1;
   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
   width_record->num_chars = num_blobs;

   x = 0;
   for (i = 1; i <= new_chunks[0] + 1; i++) {
     if (i > new_chunks[0])
       y = num_joints;
     else
       y = x + new_chunks[i];

     width_record->widths[2 * i - 2] = chunks_width (chunk_widths, x, y);

     if (i <= new_chunks[0])
       width_record->widths[2 * i - 1] = chunks_gap (chunk_widths, y);

     x = y + 1;
   }

   *search_state = new_chunks;
   return (width_record);
 }


 /**********************************************************************
  * width_priority
  *
  * Return a priority value for this word segmentation based on the
  * character widths present in the new segmentation.
  **********************************************************************/
 FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
                        STATE *state,
                        int num_joints) {
   SEARCH_STATE new_chunks;
   FLOAT32 result = 0.0;
   WIDTH_RECORD *width_record;
   FLOAT32 squat;
   int x;

   width_record = state_char_widths (chunks_record->chunk_widths,
     state, num_joints, &new_chunks);
   for (x = 0; x < width_record->num_chars; x++) {

     squat = width_record->widths[2 * x];
     if (!baseline_enable) {
       squat /= chunks_record->row->lineheight;
     }
     else {
       squat /= BASELINE_SCALE;
     }

     if (squat > MAX_SQUAT)
       result += squat - MAX_SQUAT;

   }

   memfree(new_chunks);
   free_widths(width_record);

   return (result);
 }
	/* --C--
	********************************************************************************
	*
	* File: heuristic.c (Formerly heuristic.c)
	* Description:
	* Author: Mark Seaman, OCR Technology
	* Created: Fri Oct 16 14:37:00 1987
	* Modified: Wed Jul 10 14:15:08 1991 (Mark Seaman) marks@hpgrlt
	* Language: C
	* Package: N/A
	* Status: Reusable Software Component
	*
	* (c) Copyright 1987, Hewlett-Packard Company.
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	** http://www.apache.org/licenses/LICENSE-2.0
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*
	*********************************************************************************/
	/*----------------------------------------------------------------------
	I n c l u d e s
	----------------------------------------------------------------------*/
	#include <math.h>

	#include "heuristic.h"

	#include "baseline.h"
	#include "freelist.h"
	#include "metrics.h"
	#include "ratngs.h"

	/*----------------------------------------------------------------------
	M a c r o s
	----------------------------------------------------------------------*/
	#define MAX_SQUAT 2.0 /* Width ratio */
	#define BAD_RATING 1000.0 /* No valid blob */

	/*----------------------------------------------------------------------
	F u n c t i o n s
	----------------------------------------------------------------------*/
	/**********************************************************************
	* prioritize_state
	*
	* Create a priority for this state. It represents the urgency of
	* checking this state.
	**********************************************************************/
	FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
	SEARCH_RECORD *the_search,
	STATE *old_state) {
	FLOAT32 width_pri;
	FLOAT32 match_pri;

	match_pri = rating_priority (chunks_record, the_search->this_state,
	old_state, the_search->num_joints);

	width_pri = width_priority (chunks_record, the_search->this_state,
	the_search->num_joints) * 1000.0;

	record_priorities(the_search, old_state, match_pri, width_pri);

	return (width_pri + match_pri);
	}


	/**********************************************************************
	* rating_priority
	*
	* Assign a segmentation priority based on the ratings of the blobs
	* (in that segmentation) that have been classified. The average
	* "goodness" (i.e. rating / weight) for each blob is used to indicate
	* the segmentation priority.
	**********************************************************************/
	FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
	STATE *state,
	STATE *old_state,
	int num_joints) {
	PIECES_STATE blob_chunks;
	inT16 x;
	inT16 y;
	BLOB_CHOICE_LIST *blob_choices;
	BLOB_CHOICE_IT blob_choice_it;
	inT16 first_chunk = 0;
	inT16 last_chunk;
	inT16 ratings = 0;
	inT16 weights = 0;

	bin_to_pieces(state, num_joints, blob_chunks);

	for (x = 0; blob_chunks[x]; x++) {
	// Iterate each blob
	last_chunk = first_chunk + blob_chunks[x] - 1;

	blob_choices = chunks_record->ratings->get(first_chunk, last_chunk);

	if (blob_choices != NOT_CLASSIFIED) {
	blob_choice_it.set_to_list(blob_choices);
	ratings += (inT16) blob_choice_it.data()->rating();
	for (y = first_chunk; y <= last_chunk; y++) {
	weights += (inT16) (chunks_record->weights[y]);
	}
	}
	first_chunk += blob_chunks[x];
	}
	if (weights <= 0)
	weights = 1;
	return ((FLOAT32) ratings / weights);
	}


	/**********************************************************************
	* state_char_widths
	*
	* Return a character width record corresponding to the character
	* width that will be generated in this segmentation state.
	**********************************************************************/
	WIDTH_RECORD state_char_widths(WIDTH_RECORD chunk_widths,
	STATE *state,
	int num_joints,
	SEARCH_STATE *search_state) {
	WIDTH_RECORD *width_record;
	int num_blobs;
	int x;
	int y;
	int i;
	SEARCH_STATE new_chunks;

	new_chunks = bin_to_chunks (state, num_joints);

	num_blobs = new_chunks[0] + 1;
	width_record = (WIDTH_RECORD ) memalloc (sizeof (int) num_blobs * 2);
	width_record->num_chars = num_blobs;

	x = 0;
	for (i = 1; i <= new_chunks[0] + 1; i++) {
	if (i > new_chunks[0])
	y = num_joints;
	else
	y = x + new_chunks[i];

	width_record->widths[2 * i - 2] = chunks_width (chunk_widths, x, y);

	if (i <= new_chunks[0])
	width_record->widths[2 * i - 1] = chunks_gap (chunk_widths, y);

	x = y + 1;
	}

	*search_state = new_chunks;
	return (width_record);
	}


	/**********************************************************************
	* width_priority
	*
	* Return a priority value for this word segmentation based on the
	* character widths present in the new segmentation.
	**********************************************************************/
	FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
	STATE *state,
	int num_joints) {
	SEARCH_STATE new_chunks;
	FLOAT32 result = 0.0;
	WIDTH_RECORD *width_record;
	FLOAT32 squat;
	int x;

	width_record = state_char_widths (chunks_record->chunk_widths,
	state, num_joints, &new_chunks);
	for (x = 0; x < width_record->num_chars; x++) {

	squat = width_record->widths[2 * x];
	if (!baseline_enable) {
	squat /= chunks_record->row->lineheight;
	}
	else {
	squat /= BASELINE_SCALE;
	}

	if (squat > MAX_SQUAT)
	result += squat - MAX_SQUAT;

	}

	memfree(new_chunks);
	free_widths(width_record);

	return (result);
	}