| /* -*-C-*- |
| ******************************************************************************** |
| * |
| * File: metrics.c (Formerly metrics.c) |
| * Description: |
| * Author: Mark Seaman, OCR Technology |
| * Created: Fri Oct 16 14:37:00 1987 |
| * Modified: Tue Jul 30 17:02:07 1991 (Mark Seaman) marks@hpgrlt |
| * Language: C |
| * Package: N/A |
| * Status: Reusable Software Component |
| * |
| * (c) Copyright 1987, Hewlett-Packard Company. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| *********************************************************************************/ |
| /*---------------------------------------------------------------------- |
| I n c l u d e s |
| ----------------------------------------------------------------------*/ |
| #include "metrics.h" |
| #include "bestfirst.h" |
| #include "associate.h" |
| #include "tally.h" |
| #include "plotseg.h" |
| #include "globals.h" |
| #include "wordclass.h" |
| #include "intmatcher.h" |
| #include "freelist.h" |
| #include "djmenus.h" |
| #include "callcpp.h" |
| #include "ndminx.h" |
| #include "wordrec.h" |
| |
| /*---------------------------------------------------------------------- |
| V a r i a b l e s |
| ----------------------------------------------------------------------*/ |
| static int states_timed_out1; /* Counters */ |
| static int states_timed_out2; |
| static int words_segmented1; |
| static int words_segmented2; |
| static int segmentation_states1; |
| static int segmentation_states2; |
| static int save_priorities; |
| |
| int words_chopped1; |
| int words_chopped2; |
| int chops_attempted1; |
| int chops_performed1; |
| int chops_attempted2; |
| int chops_performed2; |
| |
| int character_count; |
| int word_count; |
| int chars_classified; |
| |
| MEASUREMENT num_pieces; |
| MEASUREMENT width_measure; |
| |
| MEASUREMENT width_priority_range;/* Help to normalize */ |
| MEASUREMENT match_priority_range; |
| |
| TALLY states_before_best; |
| TALLY best_certainties[2]; |
| TALLY character_widths; /* Width histogram */ |
| |
| FILE *priority_file_1; /* Output to cluster */ |
| FILE *priority_file_2; |
| FILE *priority_file_3; |
| |
| STATE *known_best_state = NULL; /* The right answer */ |
| |
| /*---------------------------------------------------------------------- |
| M a c r o s |
| ----------------------------------------------------------------------*/ |
| #define CERTAINTY_BUCKET_SIZE -0.5 |
| #define CERTAINTY_BUCKETS 40 |
| |
| /*---------------------------------------------------------------------- |
| F u n c t i o n s |
| ----------------------------------------------------------------------*/ |
| /********************************************************************** |
| * init_metrics |
| * |
| * Set up the appropriate variables to record information about the |
| * OCR process. Later calls will log the data and save a summary. |
| **********************************************************************/ |
| void init_metrics() { |
| words_chopped1 = 0; |
| words_chopped2 = 0; |
| chops_performed1 = 0; |
| chops_performed2 = 0; |
| chops_attempted1 = 0; |
| chops_attempted2 = 0; |
| |
| words_segmented1 = 0; |
| words_segmented2 = 0; |
| states_timed_out1 = 0; |
| states_timed_out2 = 0; |
| segmentation_states1 = 0; |
| segmentation_states2 = 0; |
| |
| save_priorities = 0; |
| |
| character_count = 0; |
| word_count = 0; |
| chars_classified = 0; |
| permutation_count = 0; |
| |
| end_metrics(); |
| |
| states_before_best = new_tally (MIN (100, num_seg_states)); |
| |
| best_certainties[0] = new_tally (CERTAINTY_BUCKETS); |
| best_certainties[1] = new_tally (CERTAINTY_BUCKETS); |
| reset_width_tally(); |
| } |
| |
| void end_metrics() { |
| if (states_before_best != NULL) { |
| memfree(states_before_best); |
| memfree(best_certainties[0]); |
| memfree(best_certainties[1]); |
| memfree(character_widths); |
| states_before_best = NULL; |
| best_certainties[0] = NULL; |
| best_certainties[1] = NULL; |
| character_widths = NULL; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * record_certainty |
| * |
| * Maintain a record of the best certainty values achieved on each |
| * word recognition. |
| **********************************************************************/ |
| void record_certainty(float certainty, int pass) { |
| int bucket; |
| |
| if (certainty / CERTAINTY_BUCKET_SIZE < MAXINT) |
| bucket = (int) (certainty / CERTAINTY_BUCKET_SIZE); |
| else |
| bucket = MAXINT; |
| |
| inc_tally_bucket (best_certainties[pass - 1], bucket); |
| } |
| |
| |
| /********************************************************************** |
| * record_search_status |
| * |
| * Record information about each iteration of the search. This data |
| * is kept in global memory and accumulated over multiple segmenter |
| * searches. |
| **********************************************************************/ |
| void record_search_status(int num_states, int before_best, float closeness) { |
| inc_tally_bucket(states_before_best, before_best); |
| |
| if (first_pass) { |
| if (num_states == num_seg_states + 1) |
| states_timed_out1++; |
| segmentation_states1 += num_states; |
| words_segmented1++; |
| } |
| else { |
| if (num_states == num_seg_states + 1) |
| states_timed_out2++; |
| segmentation_states2 += num_states; |
| words_segmented2++; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * save_summary |
| * |
| * Save the summary information into the file "file.sta". |
| **********************************************************************/ |
| namespace tesseract { |
| void Wordrec::save_summary(inT32 elapsed_time) { |
| #ifndef SECURE_NAMES |
| STRING outfilename; |
| FILE *f; |
| int x; |
| int total; |
| |
| outfilename = imagefile + ".sta"; |
| f = open_file (outfilename.string(), "w"); |
| |
| fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d characters\n", character_count); |
| fprintf (f, "%d words\n", word_count); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d permutations performed\n", permutation_count); |
| fprintf (f, "%d characters classified\n", chars_classified); |
| fprintf (f, "%4.0f%% classification overhead\n", |
| (float) chars_classified / character_count * 100.0 - 100.0); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d words chopped (pass 1) ", words_chopped1); |
| fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100); |
| fprintf (f, "%d chops performed\n", chops_performed1); |
| fprintf (f, "%d chops attempted\n", chops_attempted1); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d words joined (pass 1)", words_segmented1); |
| fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100); |
| fprintf (f, "%d segmentation states\n", segmentation_states1); |
| fprintf (f, "%d segmentations timed out\n", states_timed_out1); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d words chopped (pass 2) ", words_chopped2); |
| fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100); |
| fprintf (f, "%d chops performed\n", chops_performed2); |
| fprintf (f, "%d chops attempted\n", chops_attempted2); |
| fprintf (f, "\n"); |
| |
| fprintf (f, "%d words joined (pass 2)", words_segmented2); |
| fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100); |
| fprintf (f, "%d segmentation states\n", segmentation_states2); |
| fprintf (f, "%d segmentations timed out\n", states_timed_out2); |
| fprintf (f, "\n"); |
| |
| total = 0; |
| iterate_tally (states_before_best, x) |
| total += (tally_entry (states_before_best, x) * x); |
| fprintf (f, "segmentations (before best) = %d\n", total); |
| if (total != 0.0) |
| fprintf (f, "%4.0f%% segmentation overhead\n", |
| (float) (segmentation_states1 + segmentation_states2) / |
| total * 100.0 - 100.0); |
| fprintf (f, "\n"); |
| |
| print_tally (f, "segmentations (before best)", states_before_best); |
| |
| iterate_tally (best_certainties[0], x) |
| cprintf ("best certainty of %8.4f = %4d %4d\n", |
| x * CERTAINTY_BUCKET_SIZE, |
| tally_entry (best_certainties[0], x), |
| tally_entry (best_certainties[1], x)); |
| |
| PrintIntMatcherStats(f); |
| dj_statistics(f); |
| fclose(f); |
| #endif |
| } |
| } // namespace tesseract |
| |
| |
| /********************************************************************** |
| * record_priorities |
| * |
| * If the record mode is set then record the priorities returned by |
| * each of the priority voters. Save them in a file that is set up for |
| * doing clustering. |
| **********************************************************************/ |
| void record_priorities(SEARCH_RECORD *the_search, |
| STATE *old_state, |
| FLOAT32 priority_1, |
| FLOAT32 priority_2) { |
| record_samples(priority_1, priority_2); |
| } |
| |
| |
| /********************************************************************** |
| * record_samples |
| * |
| * Remember the priority samples to summarize them later. |
| **********************************************************************/ |
| void record_samples(FLOAT32 match_pri, FLOAT32 width_pri) { |
| ADD_SAMPLE(match_priority_range, match_pri); |
| ADD_SAMPLE(width_priority_range, width_pri); |
| } |
| |
| |
| /********************************************************************** |
| * reset_width_tally |
| * |
| * Create a tally record and initialize it. |
| **********************************************************************/ |
| void reset_width_tally() { |
| character_widths = new_tally (20); |
| new_measurement(width_measure); |
| width_measure.num_samples = 158; |
| width_measure.sum_of_samples = 125.0; |
| width_measure.sum_of_squares = 118.0; |
| } |
| |
| |
| #ifndef GRAPHICS_DISABLED |
| /********************************************************************** |
| * save_best_state |
| * |
| * Save this state away to be compared later. |
| **********************************************************************/ |
| void save_best_state(CHUNKS_RECORD *chunks_record) { |
| STATE state; |
| SEARCH_STATE chunk_groups; |
| int num_joints; |
| |
| if (save_priorities) { |
| num_joints = chunks_record->ratings->dimension() - 1; |
| |
| state.part1 = 0xffffffff; |
| state.part2 = 0xffffffff; |
| |
| chunk_groups = bin_to_chunks (&state, num_joints); |
| display_segmentation (chunks_record->chunks, chunk_groups); |
| memfree(chunk_groups); |
| |
| cprintf ("Enter the correct segmentation > "); |
| fflush(stdout); |
| state.part1 = 0; |
| scanf ("%x", &state.part2); |
| |
| chunk_groups = bin_to_chunks (&state, num_joints); |
| display_segmentation (chunks_record->chunks, chunk_groups); |
| memfree(chunk_groups); |
| window_wait(segm_window); /* == 'n') */ |
| |
| if (known_best_state) |
| free_state(known_best_state); |
| known_best_state = new_state (&state); |
| } |
| } |
| #endif |
| |
| |
| /********************************************************************** |
| * start_record |
| * |
| * Set up everything needed to record the priority voters. |
| **********************************************************************/ |
| void start_recording() { |
| if (save_priorities) { |
| priority_file_1 = open_file ("Priorities1", "w"); |
| priority_file_2 = open_file ("Priorities2", "w"); |
| priority_file_3 = open_file ("Priorities3", "w"); |
| } |
| } |
| |
| |
| /********************************************************************** |
| * stop_recording |
| * |
| * Put an end to the priority recording mechanism. |
| **********************************************************************/ |
| void stop_recording() { |
| if (save_priorities) { |
| fclose(priority_file_1); |
| fclose(priority_file_2); |
| fclose(priority_file_3); |
| } |
| } |