| /********************************************************************** |
| * File: blobbox.h (Formerly blobnbox.h) |
| * Description: Code for the textord blob class. |
| * Author: Ray Smith |
| * Created: Thu Jul 30 09:08:51 BST 1992 |
| * |
| * (C) Copyright 1992, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #ifndef BLOBBOX_H |
| #define BLOBBOX_H |
| |
| #include "varable.h" |
| #include "clst.h" |
| #include "elst2.h" |
| #include "werd.h" |
| #include "ocrblock.h" |
| #include "statistc.h" |
| |
| extern double_VAR_H (textord_error_weight, 3, |
| "Weighting for error in believability"); |
| |
| enum PITCH_TYPE |
| { |
| PITCH_DUNNO, //insufficient data |
| PITCH_DEF_FIXED, //definitely fixed |
| PITCH_MAYBE_FIXED, //could be |
| PITCH_DEF_PROP, |
| PITCH_MAYBE_PROP, |
| PITCH_CORR_FIXED, |
| PITCH_CORR_PROP |
| }; |
| |
| // The possible tab-stop types of each side of a BLOBNBOX. |
| enum TabType { |
| TT_NONE, // Not a tab. |
| TT_DELETED, // Not a tab after detailed analysis. |
| TT_UNCONFIRMED, // Initial designation of a tab-stop candidate. |
| TT_FAKE, // Added by interpolation. |
| TT_CONFIRMED, // Aligned with neighbours. |
| TT_VLINE // Detected as a vertical line. |
| }; |
| |
| // The possible region types of a BLOBNBOX. |
| // Note: keep all the text types > BRT_UNKNOWN and all the image types less. |
| // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below. |
| enum BlobRegionType { |
| BRT_NOISE, // Neither text nor image. |
| BRT_HLINE, // Horizontal separator line. |
| BRT_RECTIMAGE, // Rectangular image. |
| BRT_POLYIMAGE, // Non-rectangular image. |
| BRT_UNKNOWN, // Not determined yet. |
| BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented. |
| BRT_TEXT, // Convincing text. |
| |
| BRT_COUNT // Number of possibilities. |
| }; |
| |
| namespace tesseract { |
| class ColPartition; |
| } |
| |
| class BLOBNBOX; |
| ELISTIZEH (BLOBNBOX) |
| class BLOBNBOX:public ELIST_LINK |
| { |
| public: |
| BLOBNBOX() { |
| blob_ptr = NULL; |
| cblob_ptr = NULL; |
| area = 0; |
| Init(); |
| } |
| explicit BLOBNBOX(PBLOB *srcblob) { |
| blob_ptr = srcblob; |
| cblob_ptr = NULL; |
| box = srcblob->bounding_box (); |
| area = (int) srcblob->area (); |
| Init(); |
| } |
| explicit BLOBNBOX(C_BLOB *srcblob) { |
| blob_ptr = NULL; |
| cblob_ptr = srcblob; |
| box = srcblob->bounding_box (); |
| area = (int) srcblob->area (); |
| Init(); |
| } |
| |
| void rotate_box(FCOORD vec) { |
| box.rotate(vec); |
| } |
| void translate_box(ICOORD v) { |
| box.move(v); |
| } |
| void merge(BLOBNBOX *nextblob); |
| void chop( // fake chop blob |
| BLOBNBOX_IT *start_it, // location of this |
| BLOBNBOX_IT *blob_it, // iterator |
| FCOORD rotation, // for landscape |
| float xheight); // line height |
| |
| // Simple accessors. |
| const TBOX &bounding_box() const { |
| return box; |
| } |
| void compute_bounding_box() { |
| box = cblob_ptr != NULL ? cblob_ptr->bounding_box() |
| : blob_ptr->bounding_box(); |
| } |
| const TBOX &reduced_box() const { |
| return red_box; |
| } |
| void set_reduced_box(TBOX new_box) { |
| red_box = new_box; |
| reduced = TRUE; |
| } |
| inT32 enclosed_area() const { |
| return area; |
| } |
| bool joined_to_prev() const { |
| return joined != 0; |
| } |
| bool red_box_set() const { |
| return reduced != 0; |
| } |
| int repeated_set() const { |
| return repeated_set_; |
| } |
| void set_repeated_set(int set_id) { |
| repeated_set_ = set_id; |
| } |
| PBLOB *blob() const { |
| return blob_ptr; |
| } |
| C_BLOB *cblob() const { |
| return cblob_ptr; |
| } |
| TabType left_tab_type() const { |
| return left_tab_type_; |
| } |
| void set_left_tab_type(TabType new_type) { |
| left_tab_type_ = new_type; |
| } |
| TabType right_tab_type() const { |
| return right_tab_type_; |
| } |
| void set_right_tab_type(TabType new_type) { |
| right_tab_type_ = new_type; |
| } |
| BlobRegionType region_type() const { |
| return region_type_; |
| } |
| void set_region_type(BlobRegionType new_type) { |
| region_type_ = new_type; |
| } |
| int left_rule() const { |
| return left_rule_; |
| } |
| void set_left_rule(int new_left) { |
| left_rule_ = new_left; |
| } |
| int right_rule() const { |
| return right_rule_; |
| } |
| void set_right_rule(int new_right) { |
| right_rule_ = new_right; |
| } |
| int left_crossing_rule() const { |
| return left_crossing_rule_; |
| } |
| void set_left_crossing_rule(int new_left) { |
| left_crossing_rule_ = new_left; |
| } |
| int right_crossing_rule() const { |
| return right_crossing_rule_; |
| } |
| void set_right_crossing_rule(int new_right) { |
| right_crossing_rule_ = new_right; |
| } |
| float horz_stroke_width() const { |
| return horz_stroke_width_; |
| } |
| void set_horz_stroke_width(float width) { |
| horz_stroke_width_ = width; |
| } |
| float vert_stroke_width() const { |
| return vert_stroke_width_; |
| } |
| void set_vert_stroke_width(float width) { |
| vert_stroke_width_ = width; |
| } |
| tesseract::ColPartition* owner() const { |
| return owner_; |
| } |
| void set_owner(tesseract::ColPartition* new_owner) { |
| owner_ = new_owner; |
| } |
| void set_noise_flag(bool flag) { |
| noise_flag_ = flag; |
| } |
| bool noise_flag() const { |
| return noise_flag_; |
| } |
| |
| #ifndef GRAPHICS_DISABLED |
| // Keep in sync with BlobRegionType. |
| ScrollView::Color BoxColor() const { |
| switch (region_type_) { |
| case BRT_HLINE: |
| return ScrollView::YELLOW; |
| case BRT_RECTIMAGE: |
| return ScrollView::RED; |
| case BRT_POLYIMAGE: |
| return ScrollView::ORANGE; |
| case BRT_UNKNOWN: |
| return ScrollView::CYAN; |
| case BRT_VERT_TEXT: |
| return ScrollView::GREEN; |
| case BRT_TEXT: |
| return ScrollView::BLUE; |
| case BRT_NOISE: |
| default: |
| return ScrollView::GREY; |
| } |
| } |
| |
| void plot(ScrollView* window, // window to draw in |
| ScrollView::Color blob_colour, //for outer bits |
| ScrollView::Color child_colour) { //for holes |
| if (blob_ptr != NULL) |
| blob_ptr->plot (window, blob_colour, child_colour); |
| if (cblob_ptr != NULL) |
| cblob_ptr->plot (window, blob_colour, child_colour); |
| } |
| #endif |
| |
| NEWDELETE2(BLOBNBOX) |
| |
| private: |
| // Initializes the bulk of the members to default values. |
| void Init() { |
| joined = false; |
| reduced = false; |
| repeated_set_ = 0; |
| left_tab_type_ = TT_NONE; |
| right_tab_type_ = TT_NONE; |
| region_type_ = BRT_UNKNOWN; |
| left_rule_ = 0; |
| right_rule_ = 0; |
| left_crossing_rule_ = 0; |
| right_crossing_rule_ = 0; |
| horz_stroke_width_ = 0.0f; |
| vert_stroke_width_ = 0.0f; |
| owner_ = NULL; |
| noise_flag_ = false; |
| } |
| |
| PBLOB *blob_ptr; // poly blob |
| C_BLOB *cblob_ptr; // edgestep blob |
| TBOX box; // bounding box |
| TBOX red_box; // bounding box |
| int area:30; //enclosed area |
| int joined:1; //joined to prev |
| int reduced:1; //reduced box set |
| int repeated_set_; // id of the set of repeated blobs |
| TabType left_tab_type_; // Indicates tab-stop assessment |
| TabType right_tab_type_; // Indicates tab-stop assessment |
| BlobRegionType region_type_; // Type of region this blob belongs to |
| inT16 left_rule_; // x-coord of nearest but not crossing rule line |
| inT16 right_rule_; // x-coord of nearest but not crossing rule line |
| inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line |
| inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line |
| float horz_stroke_width_; // Median horizontal stroke width |
| float vert_stroke_width_; // Median vertical stroke width |
| tesseract::ColPartition* owner_; // Who will delete me when I am not needed |
| // Was the blob flagged as noise in the initial filtering step |
| bool noise_flag_; |
| }; |
| |
| class TO_ROW:public ELIST2_LINK |
| { |
| public: |
| TO_ROW() { |
| num_repeated_sets_ = -1; |
| } //empty |
| TO_ROW( //constructor |
| BLOBNBOX *blob, //from first blob |
| float top, //of row //target height |
| float bottom, |
| float row_size); |
| |
| float max_y() const { //access function |
| return y_max; |
| } |
| float min_y() const { |
| return y_min; |
| } |
| float mean_y() const { |
| return (y_min + y_max) / 2.0f; |
| } |
| float initial_min_y() const { |
| return initial_y_min; |
| } |
| float line_m() const { //access to line fit |
| return m; |
| } |
| float line_c() const { |
| return c; |
| } |
| float line_error() const { |
| return error; |
| } |
| float parallel_c() const { |
| return para_c; |
| } |
| float parallel_error() const { |
| return para_error; |
| } |
| float believability() const { //baseline goodness |
| return credibility; |
| } |
| float intercept() const { //real parallel_c |
| return y_origin; |
| } |
| void add_blob( //put in row |
| BLOBNBOX *blob, //blob to add |
| float top, //of row //target height |
| float bottom, |
| float row_size); |
| void insert_blob( //put in row in order |
| BLOBNBOX *blob); |
| |
| BLOBNBOX_LIST *blob_list() { //get list |
| return &blobs; |
| } |
| |
| void set_line( //set line spec |
| float new_m, //line to set |
| float new_c, |
| float new_error) { |
| m = new_m; |
| c = new_c; |
| error = new_error; |
| } |
| void set_parallel_line( //set fixed gradient line |
| float gradient, //page gradient |
| float new_c, |
| float new_error) { |
| para_c = new_c; |
| para_error = new_error; |
| credibility = |
| (float) (blobs.length () - textord_error_weight * new_error); |
| y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); |
| //real intercept |
| } |
| void set_limits( //set min,max |
| float new_min, //bottom and |
| float new_max) { //top of row |
| y_min = new_min; |
| y_max = new_max; |
| } |
| void compute_vertical_projection(); |
| //get projection |
| |
| bool rep_chars_marked() const { |
| return num_repeated_sets_ != -1; |
| } |
| void clear_rep_chars_marked() { |
| num_repeated_sets_ = -1; |
| } |
| int num_repeated_sets() const { |
| return num_repeated_sets_; |
| } |
| void set_num_repeated_sets(int num_sets) { |
| num_repeated_sets_ = num_sets; |
| } |
| |
| //true when dead |
| NEWDELETE2 (TO_ROW) BOOL8 merged; |
| BOOL8 all_caps; //had no ascenders |
| BOOL8 used_dm_model; //in guessing pitch |
| inT16 projection_left; //start of projection |
| inT16 projection_right; //start of projection |
| PITCH_TYPE pitch_decision; //how strong is decision |
| float fixed_pitch; //pitch or 0 |
| float fp_space; //sp if fixed pitch |
| float fp_nonsp; //nonsp if fixed pitch |
| float pr_space; //sp if prop |
| float pr_nonsp; //non sp if prop |
| float spacing; //to "next" row |
| float xheight; //of line |
| int xheight_evidence; // number of blobs of height xheight |
| float ascrise; //ascenders |
| float descdrop; //descenders |
| inT32 min_space; //min size for real space |
| inT32 max_nonspace; //max size of non-space |
| inT32 space_threshold; //space vs nonspace |
| float kern_size; //average non-space |
| float space_size; //average space |
| WERD_LIST rep_words; //repeated chars |
| ICOORDELT_LIST char_cells; //fixed pitch cells |
| QSPLINE baseline; //curved baseline |
| STATS projection; //vertical projection |
| |
| private: |
| BLOBNBOX_LIST blobs; //blobs in row |
| float y_min; //coords |
| float y_max; |
| float initial_y_min; |
| float m, c; //line spec |
| float error; //line error |
| float para_c; //constrained fit |
| float para_error; |
| float y_origin; //rotated para_c; |
| float credibility; //baseline believability |
| int num_repeated_sets_; // number of sets of repeated blobs |
| // set to -1 if we have not searched |
| // for repeated blobs in this row yet |
| }; |
| |
| ELIST2IZEH (TO_ROW) |
| class TO_BLOCK:public ELIST_LINK |
| { |
| public: |
| TO_BLOCK() { |
| } //empty |
| TO_BLOCK( //constructor |
| BLOCK *src_block); //real block |
| ~TO_BLOCK(); |
| |
| TO_ROW_LIST *get_rows() { //access function |
| return &row_list; |
| } |
| |
| void print_rows() { //debug info |
| TO_ROW_IT row_it = &row_list; |
| TO_ROW *row; |
| |
| for (row_it.mark_cycle_pt (); !row_it.cycled_list (); |
| row_it.forward ()) { |
| row = row_it.data (); |
| printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT |
| "\n", row->min_y (), row->max_y (), row->parallel_c (), |
| row->blob_list ()->length ()); |
| } |
| } |
| |
| // Draw the blobs on on the various lists in the block in different colors. |
| void plot_graded_blobs(ScrollView* to_win); |
| |
| BLOBNBOX_LIST blobs; //medium size |
| BLOBNBOX_LIST underlines; //underline blobs |
| BLOBNBOX_LIST noise_blobs; //very small |
| BLOBNBOX_LIST small_blobs; //fairly small |
| BLOBNBOX_LIST large_blobs; //big blobs |
| BLOCK *block; //real block |
| PITCH_TYPE pitch_decision; //how strong is decision |
| float line_spacing; //estimate |
| float line_size; //estimate |
| float max_blob_size; //line assignment limit |
| float baseline_offset; //phase shift |
| float xheight; //median blob size |
| float fixed_pitch; //pitch or 0 |
| float kern_size; //average non-space |
| float space_size; //average space |
| inT32 min_space; //min definite space |
| inT32 max_nonspace; //max definite |
| float fp_space; //sp if fixed pitch |
| float fp_nonsp; //nonsp if fixed pitch |
| float pr_space; //sp if prop |
| float pr_nonsp; //non sp if prop |
| TO_ROW *key_row; //starting row |
| |
| NEWDELETE2 (TO_BLOCK) private: |
| TO_ROW_LIST row_list; //temporary rows |
| }; |
| |
| ELISTIZEH (TO_BLOCK) |
| extern double_VAR_H (textord_error_weight, 3, |
| "Weighting for error in believability"); |
| void find_blob_limits( //get y limits |
| PBLOB *blob, //blob to search |
| float leftx, //x limits |
| float rightx, |
| FCOORD rotation, //for landscape |
| float &ymin, //output y limits |
| float &ymax); |
| void find_cblob_limits( //get y limits |
| C_BLOB *blob, //blob to search |
| float leftx, //x limits |
| float rightx, |
| FCOORD rotation, //for landscape |
| float &ymin, //output y limits |
| float &ymax); |
| void find_cblob_vlimits( //get y limits |
| C_BLOB *blob, //blob to search |
| float leftx, //x limits |
| float rightx, |
| float &ymin, //output y limits |
| float &ymax); |
| void find_cblob_hlimits( //get x limits |
| C_BLOB *blob, //blob to search |
| float bottomy, //y limits |
| float topy, |
| float &xmin, //output x limits |
| float &xymax); |
| PBLOB *rotate_blob( //get y limits |
| PBLOB *blob, //blob to search |
| FCOORD rotation //vector to rotate by |
| ); |
| PBLOB *rotate_cblob( //rotate it |
| C_BLOB *blob, //blob to search |
| float xheight, //for poly approx |
| FCOORD rotation //for landscape |
| ); |
| C_BLOB *crotate_cblob( //rotate it |
| C_BLOB *blob, //blob to search |
| FCOORD rotation //for landscape |
| ); |
| TBOX box_next( //get bounding box |
| BLOBNBOX_IT *it //iterator to blobds |
| ); |
| TBOX box_next_pre_chopped( //get bounding box |
| BLOBNBOX_IT *it //iterator to blobds |
| ); |
| void vertical_blob_projection( //project outlines |
| PBLOB *blob, //blob to project |
| STATS *stats //output |
| ); |
| //project outlines |
| void vertical_outline_projection(OUTLINE *outline, //outline to project |
| STATS *stats //output |
| ); |
| void vertical_cblob_projection( //project outlines |
| C_BLOB *blob, //blob to project |
| STATS *stats //output |
| ); |
| void vertical_coutline_projection( //project outlines |
| C_OUTLINE *outline, //outline to project |
| STATS *stats //output |
| ); |
| void plot_blob_list(ScrollView* win, // window to draw in |
| BLOBNBOX_LIST *list, // blob list |
| ScrollView::Color body_colour, // colour to draw |
| ScrollView::Color child_colour); // colour of child |
| |
| #endif |