| /////////////////////////////////////////////////////////////////////// |
| // File: alignedblob.cpp |
| // Description: Subclass of BBGrid to find vertically aligned blobs. |
| // Author: Ray Smith |
| // Created: Fri Mar 21 15:03:01 PST 2008 |
| // |
| // (C) Copyright 2008, Google Inc. |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| /////////////////////////////////////////////////////////////////////// |
| |
| #include "alignedblob.h" |
| #include "ndminx.h" |
| |
| INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); |
| INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); |
| INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle"); |
| INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle"); |
| INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle"); |
| INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle"); |
| BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug"); |
| BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); |
| |
| namespace tesseract { |
| |
| // Fraction of resolution used as alignment tolerance for aligned tabs. |
| const double kAlignedFraction = 0.03125; |
| // Fraction of resolution used as alignment tolerance for ragged tabs. |
| const double kRaggedFraction = 0.5; |
| // Fraction of height used as a minimum gutter gap for aligned blobs. |
| const double kAlignedGapFraction = 0.75; |
| // Fraction of height used as a minimum gutter gap for ragged tabs. |
| const double kRaggedGapFraction = 3.0; |
| // Constant number of pixels used as alignment tolerance for line finding. |
| const int kVLineAlignment = 3; |
| // Constant number of pixels used as gutter gap tolerance for line finding. |
| const int kVLineGutter = 1; |
| // Constant number of pixels used as the search size for line finding. |
| const int kVLineSearchSize = 150; |
| // Min number of points to accept for a ragged tab stop. |
| const int kMinRaggedTabs = 5; |
| // Min number of points to accept for an aligned tab stop. |
| const int kMinAlignedTabs = 4; |
| // Constant number of pixels minimum height of a vertical line. |
| const int kVLineMinLength = 500; |
| // Tolerance to skew on top of current estimate of skew. Divide x or y length |
| // by kMaxSkewFactor to get the y or x skew distance. |
| // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. |
| const int kMaxSkewFactor = 15; |
| |
| // Constant part of textord_debug_pix_. |
| const char* kTextordDebugPix = "psdebug_pix"; |
| |
| // Name of image file to use if textord_debug_images is true. |
| STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix; |
| // Index to image file to use if textord_debug_images is true. |
| int AlignedBlob::debug_pix_index_ = 0; |
| |
| // Increment the serial number counter and set the string to use |
| // for a filename if textord_debug_images is true. |
| void AlignedBlob::IncrementDebugPix() { |
| ++debug_pix_index_; |
| textord_debug_pix_ = kTextordDebugPix; |
| char numbuf[32]; |
| snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_); |
| textord_debug_pix_ += numbuf; |
| textord_debug_pix_ += ".pix"; |
| } |
| |
| // Constructor to set the parameters for finding aligned and ragged tabs. |
| // Vertical_x and vertical_y are the current estimates of the true vertical |
| // direction (up) in the image. Height is the height of the starter blob. |
| // v_gap_multiple is the multiple of height that will be used as a limit |
| // on vertical gap before giving up and calling the line ended. |
| // resolution is the original image resolution, and align0 indicates the |
| // type of tab stop to be found. |
| AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, |
| int height, int v_gap_multiple, |
| int resolution, TabAlignment align0) |
| : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), |
| ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), |
| alignment(align0), |
| confirmed_type(TT_CONFIRMED), |
| min_length(0) { |
| // Set the tolerances according to the type of line sought. |
| // For tab search, these are based on the image resolution for most, or |
| // the height of the starting blob for the maximum vertical gap. |
| max_v_gap = height * v_gap_multiple; |
| if (ragged) { |
| // In the case of a ragged edge, we are much more generous with the |
| // inside alignment fraction, but also require a much bigger gutter. |
| gutter_fraction = kRaggedGapFraction; |
| if (alignment == TA_RIGHT_RAGGED) { |
| l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); |
| r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
| } else { |
| l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
| r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); |
| } |
| min_points = kMinRaggedTabs; |
| } else { |
| gutter_fraction = kAlignedGapFraction; |
| l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
| r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
| min_points = kMinAlignedTabs; |
| } |
| min_gutter = static_cast<int>(height * gutter_fraction + 0.5); |
| // Fit the vertical vector into an ICOORD, which is 16 bit. |
| set_vertical(vertical_x, vertical_y); |
| } |
| |
| // Constructor to set the parameters for finding vertical lines. |
| // Vertical_x and vertical_y are the current estimates of the true vertical |
| // direction (up) in the image. Width is the width of the starter blob. |
| AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, |
| int width) |
| : gutter_fraction(0.0), |
| right_tab(false), |
| ragged(false), |
| alignment(TA_SEPARATOR), |
| confirmed_type(TT_VLINE), |
| max_v_gap(kVLineSearchSize), |
| min_gutter(kVLineGutter), |
| min_points(1), |
| min_length(kVLineMinLength) { |
| // Compute threshold for left and right alignment. |
| l_align_tolerance = MAX(kVLineAlignment, width); |
| r_align_tolerance = MAX(kVLineAlignment, width); |
| |
| // Fit the vertical vector into an ICOORD, which is 16 bit. |
| set_vertical(vertical_x, vertical_y); |
| } |
| |
| // Fit the vertical vector into an ICOORD, which is 16 bit. |
| void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { |
| int factor = 1; |
| if (vertical_y > MAX_INT16) |
| factor = vertical_y / MAX_INT16 + 1; |
| vertical.set_x(vertical_x / factor); |
| vertical.set_y(vertical_y / factor); |
| } |
| |
| |
| AlignedBlob::AlignedBlob(int gridsize, |
| const ICOORD& bleft, const ICOORD& tright) |
| : BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) { |
| } |
| |
| AlignedBlob::~AlignedBlob() { |
| } |
| |
| // Return true if the given coordinates are within the test rectangle |
| // and the debug level is at least the given detail level. |
| bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { |
| if (textord_debug_tabfind < detail_level) |
| return false; |
| return x >= textord_testregion_left && x <= textord_testregion_right && |
| y <= textord_testregion_top && y >= textord_testregion_bottom; |
| } |
| |
| // Display the tab codes of the BLOBNBOXes in this grid. |
| ScrollView* AlignedBlob::DisplayTabs(const char* window_name, |
| ScrollView* tab_win) { |
| #ifndef GRAPHICS_DISABLED |
| if (tab_win == NULL) |
| tab_win = MakeWindow(0, 50, window_name); |
| // For every tab in the grid, display it. |
| GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); |
| gsearch.StartFullSearch(); |
| BLOBNBOX* bbox; |
| while ((bbox = gsearch.NextFullSearch()) != NULL) { |
| TBOX box = bbox->bounding_box(); |
| int left_x = box.left(); |
| int right_x = box.right(); |
| int top_y = box.top(); |
| int bottom_y = box.bottom(); |
| TabType tabtype = bbox->left_tab_type(); |
| if (tabtype != TT_NONE) { |
| if (tabtype == TT_UNCONFIRMED) |
| tab_win->Pen(ScrollView::BLUE); |
| else if (tabtype == TT_CONFIRMED) |
| tab_win->Pen(ScrollView::GREEN); |
| else if (tabtype == TT_FAKE) |
| tab_win->Pen(ScrollView::YELLOW); |
| else |
| tab_win->Pen(ScrollView::GREY); |
| tab_win->Line(left_x, top_y, left_x, bottom_y); |
| } |
| tabtype = bbox->right_tab_type(); |
| if (tabtype != TT_NONE) { |
| if (tabtype == TT_UNCONFIRMED) |
| tab_win->Pen(ScrollView::MAGENTA); |
| else if (tabtype == TT_CONFIRMED) |
| tab_win->Pen(ScrollView::RED); |
| else if (tabtype == TT_FAKE) |
| tab_win->Pen(ScrollView::ORANGE); |
| else |
| tab_win->Pen(ScrollView::GREY); |
| tab_win->Line(right_x, top_y, right_x, bottom_y); |
| } |
| } |
| tab_win->Update(); |
| #endif |
| return tab_win; |
| } |
| |
| // Finds a vector corresponding to a set of vertically aligned blob edges |
| // running through the given box. The type of vector returned and the |
| // search parameters are determined by the AlignedBlobParams. |
| // vertical_x and y are updated with an estimate of the real |
| // vertical direction. (skew finding.) |
| // Returns NULL if no decent vector can be found. |
| TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, |
| BLOBNBOX* bbox, |
| int* vertical_x, |
| int* vertical_y) { |
| int ext_start_y, ext_end_y; |
| BLOBNBOX_CLIST good_points; |
| // Search up and then down from the starting bbox. |
| int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); |
| pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); |
| BLOBNBOX_C_IT it(&good_points); |
| it.move_to_last(); |
| int end_y = it.data()->bounding_box().top(); |
| it.move_to_first(); |
| int start_y = it.data()->bounding_box().bottom(); |
| if (pt_count >= align_params.min_points && |
| end_y - start_y >= align_params.min_length) { |
| int confirmed_points = 0; |
| // Count existing confirmed points to see if vector is acceptable. |
| for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
| bbox = it.data(); |
| if (align_params.right_tab) { |
| if (bbox->right_tab_type() == align_params.confirmed_type) |
| ++confirmed_points; |
| } else { |
| if (bbox->left_tab_type() == align_params.confirmed_type) |
| ++confirmed_points; |
| } |
| } |
| // Ragged vectors are not allowed to use too many already used points. |
| if (!align_params.ragged || |
| confirmed_points + confirmed_points < pt_count) { |
| const TBOX& box = bbox->bounding_box(); |
| if (WithinTestRegion(2, box.left(), box.bottom())) { |
| tprintf("Confirming tab vector of %d pts starting at %d,%d\n", |
| pt_count, box.left(), box.bottom()); |
| } |
| // Flag all the aligned neighbours as confirmed . |
| for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
| bbox = it.data(); |
| if (align_params.right_tab) { |
| bbox->set_right_tab_type(align_params.confirmed_type); |
| } else { |
| bbox->set_left_tab_type(align_params.confirmed_type); |
| } |
| } |
| // Now make the vector and return it. |
| TabVector* result = TabVector::FitVector(align_params.alignment, |
| align_params.vertical, |
| ext_start_y, ext_end_y, |
| &good_points, |
| vertical_x, vertical_y); |
| if (WithinTestRegion(2, box.left(), box.bottom())) { |
| result->Print("After fitting"); |
| } |
| return result; |
| } |
| } |
| return NULL; |
| } |
| |
| // Find a set of blobs that are aligned in the given vertical |
| // direction with the given blob. Returns a list of aligned |
| // blobs and the number in the list. |
| // For other parameters see FindAlignedBlob below. |
| int AlignedBlob::AlignTabs(const AlignedBlobParams& params, |
| bool top_to_bottom, BLOBNBOX* bbox, |
| BLOBNBOX_CLIST* good_points, int* end_y) { |
| int ptcount = 0; |
| BLOBNBOX_C_IT it(good_points); |
| |
| TBOX box = bbox->bounding_box(); |
| int x_start = params.right_tab ? box.right() : box.left(); |
| while (bbox != NULL) { |
| // Add the blob to the list if the appropriate side is a tab candidate, |
| // or if we are working on a ragged tab. |
| if (((params.right_tab && bbox->right_tab_type() != TT_NONE) || |
| (!params.right_tab && bbox->left_tab_type() != TT_NONE) || |
| params.ragged) && |
| (it.empty() || it.data() != bbox)) { |
| if (top_to_bottom) |
| it.add_before_then_move(bbox); |
| else |
| it.add_after_then_move(bbox); |
| ++ptcount; |
| } |
| // Find the next blob that is aligned with the current one. |
| // FindAlignedBlob guarantees that forward progress will be made in the |
| // top_to_bottom direction, and therefore eventually it will return NULL, |
| // making this while (bbox != NULL) loop safe. |
| bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); |
| if (bbox != NULL) { |
| box = bbox->bounding_box(); |
| if (!params.ragged) |
| x_start = params.right_tab ? box.right() : box.left(); |
| } |
| } |
| return ptcount; |
| } |
| |
| // Search vertically for a blob that is aligned with the input bbox. |
| // The search parameters are determined by AlignedBlobParams. |
| // top_to_bottom tells whether to search down or up. |
| // The return value is NULL if nothing was found in the search box |
| // or if a blob was found in the gutter. On a NULL return, end_y |
| // is set to the edge of the search box or the leading edge of the |
| // gutter blob if one was found. |
| BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, |
| bool top_to_bottom, BLOBNBOX* bbox, |
| int x_start, int* end_y) { |
| TBOX box = bbox->bounding_box(); |
| // If there are separator lines, get the column edges. |
| int left_column_edge = bbox->left_rule(); |
| int right_column_edge = bbox->right_rule(); |
| // start_y is used to guarantee that forward progress is made and the |
| // search does not go into an infinite loop. New blobs must extend the |
| // line beyond start_y. |
| int start_y = top_to_bottom ? box.bottom() : box.top(); |
| if (WithinTestRegion(2, x_start, start_y)) { |
| tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", |
| box.left(), box.top(), box.right(), box.bottom(), |
| left_column_edge, right_column_edge); |
| } |
| // Compute skew tolerance. |
| int skew_tolerance = p.max_v_gap / kMaxSkewFactor; |
| // Calculate xmin and xmax of the search box so that it contains |
| // all possibly relevant boxes upto p.max_v_gap above or below accoording |
| // to top_to_bottom. |
| // Start with a notion of vertical with the current estimate. |
| int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); |
| if (top_to_bottom) { |
| x2 = x_start - x2; |
| *end_y = start_y - p.max_v_gap; |
| } else { |
| x2 = x_start + x2; |
| *end_y = start_y + p.max_v_gap; |
| } |
| // Expand the box by an additional skew tolerance |
| int xmin = MIN(x_start, x2) - skew_tolerance; |
| int xmax = MAX(x_start, x2) + skew_tolerance; |
| // Now add direction-specific tolerances. |
| if (p.right_tab) { |
| xmax += p.min_gutter; |
| xmin -= p.l_align_tolerance; |
| } else { |
| xmax += p.r_align_tolerance; |
| xmin -= p.min_gutter; |
| } |
| // Setup a vertical search for an aligned blob. |
| GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this); |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", |
| p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", |
| xmin, xmax, start_y, p.max_v_gap, p.min_gutter); |
| vsearch.StartVerticalSearch(xmin, xmax, start_y); |
| // result stores the best real return value. |
| BLOBNBOX* result = NULL; |
| // The backup_result is not a tab candidate and can be used if no |
| // real tab candidate result is found. |
| BLOBNBOX* backup_result = NULL; |
| // neighbour is the blob that is currently being investigated. |
| BLOBNBOX* neighbour = NULL; |
| while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != NULL) { |
| if (neighbour == bbox) |
| continue; |
| TBOX nbox = neighbour->bounding_box(); |
| int n_y = (nbox.top() + nbox.bottom()) / 2; |
| if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || |
| (top_to_bottom && n_y < start_y - p.max_v_gap)) { |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", |
| nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); |
| break; // Gone far enough. |
| } |
| // It is CRITICAL to ensure that forward progress is made, (strictly |
| // in/decreasing n_y) or the caller could loop infinitely, while |
| // waiting for a sequence of blobs in a line to end. |
| // NextVerticalSearch alone does not guarantee this, as there may be |
| // more than one blob in a grid cell. See comment in AlignTabs. |
| if ((n_y < start_y) != top_to_bottom || n_y == start_y) |
| continue; // Only look in the required direction. |
| if (result != NULL && |
| ((top_to_bottom && n_y < result->bounding_box().bottom()) || |
| (!top_to_bottom && n_y > result->bounding_box().top()))) |
| return result; // This result is clear. |
| if (backup_result != NULL && p.ragged && |
| ((top_to_bottom && n_y < backup_result->bounding_box().bottom()) || |
| (!top_to_bottom && n_y > backup_result->bounding_box().top()))) |
| return backup_result; // This result is clear. |
| |
| // If the neighbouring blob is the wrong side of a separator line, then it |
| // "doesn't exist" as far as we are concerned. |
| int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); |
| if (x_at_n_y < neighbour->left_crossing_rule() || |
| x_at_n_y > neighbour->right_crossing_rule()) |
| continue; // Separator line in the way. |
| int n_left = nbox.left(); |
| int n_right = nbox.right(); |
| int n_x = p.right_tab ? n_right : n_left; |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", |
| nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), |
| n_x, n_y, x_at_n_y); |
| if (p.right_tab && |
| n_left < x_at_n_y + p.min_gutter && |
| n_right > x_at_n_y + p.r_align_tolerance && |
| (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { |
| // In the gutter so end of line. |
| if (bbox->right_tab_type() >= TT_UNCONFIRMED) |
| bbox->set_right_tab_type(TT_DELETED); |
| *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("gutter\n"); |
| return NULL; |
| } |
| if (!p.right_tab && |
| n_left < x_at_n_y - p.l_align_tolerance && |
| n_right > x_at_n_y - p.min_gutter && |
| (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { |
| // In the gutter so end of line. |
| if (bbox->left_tab_type() >= TT_UNCONFIRMED) |
| bbox->set_left_tab_type(TT_DELETED); |
| *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("gutter\n"); |
| return NULL; |
| } |
| if (n_x <= x_at_n_y + p.r_align_tolerance && |
| n_x >= x_at_n_y - p.l_align_tolerance) { |
| // Aligned so keep it. If it is a marked tab save it as result, |
| // otherwise keep it as backup_result to return in case of later failure. |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("aligned, seeking%d, l=%d, r=%d\n", |
| p.right_tab, neighbour->left_tab_type(), |
| neighbour->right_tab_type()); |
| if ((p.right_tab && neighbour->right_tab_type() != TT_NONE) || |
| (!p.right_tab && neighbour->left_tab_type() != TT_NONE)) { |
| if (result == NULL) { |
| result = neighbour; |
| } else { |
| // Keep the closest neighbour. |
| int old_y = (result->bounding_box().top() + |
| result->bounding_box().bottom()) / 2; |
| if (abs(n_y - start_y) < abs(old_y - start_y)) |
| result = neighbour; |
| } |
| } else if (backup_result == NULL) { |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("Backup\n"); |
| backup_result = neighbour; |
| } else { |
| TBOX backup_box = backup_result->bounding_box(); |
| if ((p.right_tab && backup_box.right() < nbox.right()) || |
| (!p.right_tab && backup_box.left() > nbox.left())) { |
| if (WithinTestRegion(2, x_start, start_y)) |
| tprintf("Better backup\n"); |
| backup_result = neighbour; |
| } |
| } |
| } |
| } |
| return result != NULL ? result : backup_result; |
| } |
| |
| } // namespace tesseract. |
| |