| /********************************************************************** |
| * File: oldbasel.cpp (Formerly oldbl.c) |
| * Description: A re-implementation of the old baseline algorithm. |
| * Author: Ray Smith |
| * Created: Wed Oct 6 09:41:48 BST 1993 |
| * |
| * (C) Copyright 1993, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #include "mfcpch.h" |
| #include "statistc.h" |
| #include "quadlsq.h" |
| #include "lmedsq.h" |
| #include "makerow.h" |
| #include "drawtord.h" |
| #include "oldbasel.h" |
| #include "tprintf.h" |
| |
| #define EXTERN |
| |
| EXTERN BOOL_VAR (textord_really_old_xheight, FALSE, |
| "Use original wiseowl xheight"); |
| EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation"); |
| EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation"); |
| EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism"); |
| EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines"); |
| EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions"); |
| EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights"); |
| EXTERN BOOL_VAR (oldbl_xhfix, FALSE, |
| "Fix bug in modes threshold for xheights"); |
| EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus"); |
| EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc"); |
| EXTERN INT_VAR (oldbl_holed_losscount, 10, |
| "Max lost before fallback line used"); |
| EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); |
| EXTERN double_VAR (textord_oldbl_jumplimit, 0.15, |
| "X fraction for new partition"); |
| |
| #define TURNLIMIT 1 /*min size for turning point */ |
| #define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */ |
| #define DESCENDER_FRACTION 0.5 /*descender/x-height */ |
| #define MIN_ASC_FRACTION 0.20 /*min size of ascenders */ |
| #define MIN_DESC_FRACTION 0.25 /*min size of descenders */ |
| #define MINASCRISE 2.0 /*min ascender/desc step */ |
| #define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */ |
| #define MAXHEIGHT 300 /*max blob height */ |
| #define MAXOVERLAP 0.1 /*max 10% missed overlap */ |
| #define MAXBADRUN 2 /*max non best for failed */ |
| #define HEIGHTBUCKETS 200 /* Num of buckets */ |
| #define DELTAHEIGHT 5.0 /* Small amount of diff */ |
| #define GOODHEIGHT 5 |
| #define MAXLOOPS 10 |
| #define MODENUM 10 |
| #define MAXPARTS 6 |
| #define SPLINESIZE 23 |
| |
| #define ABS(x) ((x)<0 ? (-(x)) : (x)) |
| |
| /********************************************************************** |
| * make_old_baselines |
| * |
| * Top level function to make baselines the old way. |
| **********************************************************************/ |
| |
| void make_old_baselines( //make splines |
| TO_BLOCK *block, //block to do |
| BOOL8 testing_on //correct orientation |
| ) { |
| QSPLINE *prev_baseline; //baseline of previous row |
| TO_ROW *row; //current row |
| TO_ROW_IT row_it = block->get_rows (); |
| BLOBNBOX_IT blob_it; |
| |
| prev_baseline = NULL; //nothing yet |
| for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { |
| row = row_it.data (); |
| find_textlines (block, row, 2, NULL); |
| if (row->xheight <= 0 && prev_baseline != NULL) |
| find_textlines (block, row, 2, prev_baseline); |
| if (row->xheight > 0) |
| //was a good one |
| prev_baseline = &row->baseline; |
| else { |
| prev_baseline = NULL; |
| blob_it.set_to_list (row->blob_list ()); |
| if (textord_debug_baselines) |
| tprintf ("Row baseline generation failed on row at (%d,%d)\n", |
| blob_it.data ()->bounding_box ().left (), |
| blob_it.data ()->bounding_box ().bottom ()); |
| } |
| } |
| correlate_lines(block); |
| } |
| |
| |
| /********************************************************************** |
| * correlate_lines |
| * |
| * Correlate the x-heights and ascender heights of a block to fill-in |
| * the ascender height and descender height for rows without one. |
| * Also fix baselines of rows without a decent fit. |
| **********************************************************************/ |
| |
| void correlate_lines( //cleanup lines |
| TO_BLOCK *block //block to do |
| ) { |
| TO_ROW **rows; //array of ptrs |
| int rowcount; /*no of rows to do */ |
| register int rowindex; /*no of row */ |
| //iterator |
| TO_ROW_IT row_it = block->get_rows (); |
| |
| rowcount = row_it.length (); |
| if (rowcount == 0) { |
| //default value |
| block->xheight = block->line_size; |
| return; /*none to do */ |
| } |
| rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); |
| rowindex = 0; |
| for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) |
| //make array |
| rows[rowindex++] = row_it.data (); |
| |
| /*try to fix bad lines */ |
| correlate_neighbours(block, rows, rowcount); |
| |
| block->xheight = (float) correlate_with_stats(rows, rowcount, block); |
| /*use stats */ |
| if (block->xheight <= 0) |
| //desperate |
| block->xheight = block->line_size * textord_merge_x; |
| if (block->xheight < textord_min_xheight) |
| block->xheight = (float) textord_min_xheight; |
| |
| free_mem(rows); |
| } |
| |
| |
| /********************************************************************** |
| * correlate_neighbours |
| * |
| * Try to fix rows that had a bad spline fit by using neighbours. |
| **********************************************************************/ |
| |
| void correlate_neighbours( //fix bad rows |
| TO_BLOCK *block, /*block rows are in */ |
| TO_ROW **rows, /*rows of block */ |
| int rowcount /*no of rows to do */ |
| ) { |
| TO_ROW *row; /*current row */ |
| register int rowindex; /*no of row */ |
| register int otherrow; /*second row */ |
| int upperrow; /*row above to use */ |
| int lowerrow; /*row below to use */ |
| float biggest; |
| |
| for (rowindex = 0; rowindex < rowcount; rowindex++) { |
| row = rows[rowindex]; /*current row */ |
| if (row->xheight < 0) { |
| /*quadratic failed */ |
| for (otherrow = rowindex - 2; |
| otherrow >= 0 |
| && (rows[otherrow]->xheight < 0.0 |
| || !row->baseline.overlap (&rows[otherrow]->baseline, |
| MAXOVERLAP)); otherrow--); |
| upperrow = otherrow; /*decent row above */ |
| for (otherrow = rowindex + 1; |
| otherrow < rowcount |
| && (rows[otherrow]->xheight < 0.0 |
| || !row->baseline.overlap (&rows[otherrow]->baseline, |
| MAXOVERLAP)); otherrow++); |
| lowerrow = otherrow; /*decent row below */ |
| if (upperrow >= 0) |
| find_textlines (block, row, 2, &rows[upperrow]->baseline); |
| if (row->xheight < 0 && lowerrow < rowcount) |
| find_textlines (block, row, 2, &rows[lowerrow]->baseline); |
| if (row->xheight < 0) { |
| if (upperrow >= 0) |
| find_textlines (block, row, 1, &rows[upperrow]->baseline); |
| else if (lowerrow < rowcount) |
| find_textlines (block, row, 1, &rows[lowerrow]->baseline); |
| } |
| } |
| } |
| |
| for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { |
| row = rows[rowindex]; /*current row */ |
| if (row->xheight < 0) /*linear failed */ |
| /*make do */ |
| row->xheight = -row->xheight; |
| biggest = MAX (biggest, row->xheight); |
| } |
| } |
| |
| |
| /********************************************************************** |
| * correlate_with_stats |
| * |
| * correlate the x-heights and ascender heights of a block to fill-in |
| * the ascender height and descender height for rows without one. |
| **********************************************************************/ |
| |
| int correlate_with_stats( //fix xheights |
| TO_ROW **rows, /*rows of block */ |
| int rowcount, /*no of rows to do */ |
| TO_BLOCK* block |
| ) { |
| TO_ROW *row; /*current row */ |
| register int rowindex; /*no of row */ |
| float lineheight; /*mean x-height */ |
| float ascheight; /*average ascenders */ |
| float minascheight; /*min allowed ascheight */ |
| int xcount; /*no of samples for xheight */ |
| float fullheight; /*mean top height */ |
| int fullcount; /*no of samples */ |
| float descheight; /*mean descender drop */ |
| float mindescheight; /*min allowed descheight */ |
| int desccount; /*no of samples */ |
| float xshift; /*shift in xheight */ |
| |
| /*no samples */ |
| xcount = fullcount = desccount = 0; |
| lineheight = ascheight = fullheight = descheight = 0.0; |
| for (rowindex = 0; rowindex < rowcount; rowindex++) { |
| row = rows[rowindex]; /*current row */ |
| if (row->ascrise > 0.0) { /*got ascenders? */ |
| lineheight += row->xheight;/*average x-heights */ |
| ascheight += row->ascrise; /*average ascenders */ |
| xcount++; |
| } |
| else { |
| fullheight += row->xheight;/*assume full height */ |
| fullcount++; |
| } |
| if (row->descdrop < 0.0) { /*got descenders? */ |
| /*average descenders */ |
| descheight += row->descdrop; |
| desccount++; |
| } |
| } |
| |
| if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { |
| lineheight /= xcount; /*average x-height */ |
| /*average caps height */ |
| fullheight = lineheight + ascheight / xcount; |
| /*must be decent size */ |
| if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) |
| fullheight = lineheight * (1 + MIN_ASC_FRACTION); |
| } |
| else { |
| fullheight /= fullcount; /*average max height */ |
| /*guess x-height */ |
| lineheight = fullheight * X_HEIGHT_FRACTION; |
| } |
| if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) |
| descheight /= desccount; /*average descenders */ |
| else |
| /*guess descenders */ |
| descheight = -lineheight * DESCENDER_FRACTION; |
| |
| if (lineheight > 0.0f) |
| block->block->set_cell_over_xheight((fullheight - descheight) / lineheight); |
| |
| minascheight = lineheight * MIN_ASC_FRACTION; |
| mindescheight = -lineheight * MIN_DESC_FRACTION; |
| for (rowindex = 0; rowindex < rowcount; rowindex++) { |
| row = rows[rowindex]; /*do each row */ |
| row->all_caps = FALSE; |
| if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { |
| /*no ascenders */ |
| if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) |
| && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { |
| row->ascrise = fullheight - lineheight; |
| /*shift in x */ |
| xshift = lineheight - row->xheight; |
| /*set to average */ |
| row->xheight = lineheight; |
| |
| } |
| else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) |
| && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { |
| row->ascrise = row->xheight - lineheight; |
| xshift = -row->ascrise; /*shift in x */ |
| /*set to average */ |
| row->xheight = lineheight; |
| row->all_caps = TRUE; |
| } |
| else { |
| row->ascrise = (fullheight - lineheight) * row->xheight |
| / fullheight; |
| xshift = -row->ascrise; /*shift in x */ |
| /*scale it */ |
| row->xheight -= row->ascrise; |
| row->all_caps = TRUE; |
| } |
| if (row->ascrise < minascheight) |
| row->ascrise = |
| row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); |
| } |
| if (row->descdrop > mindescheight) { |
| if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) |
| && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) |
| /*set to average */ |
| row->descdrop = descheight; |
| else |
| row->descdrop = -row->xheight * DESCENDER_FRACTION; |
| } |
| } |
| return (int) lineheight; //block xheight |
| } |
| |
| |
| /********************************************************************** |
| * find_textlines |
| * |
| * Compute the baseline for the given row. |
| **********************************************************************/ |
| |
| void find_textlines( //get baseline |
| TO_BLOCK *block, //block row is in |
| TO_ROW *row, //row to do |
| int degree, //required approximation |
| QSPLINE *spline //starting spline |
| ) { |
| int partcount; /*no of partitions of */ |
| BOOL8 holed_line; //lost too many blobs |
| int bestpart; /*biggest partition */ |
| char *partids; /*partition no of each blob */ |
| int partsizes[MAXPARTS]; /*no in each partition */ |
| int lineheight; /*guessed x-height */ |
| float jumplimit; /*allowed delta change */ |
| int *xcoords; /*useful sample points */ |
| int *ycoords; /*useful sample points */ |
| TBOX *blobcoords; /*edges of blob rectangles */ |
| int blobcount; /*no of blobs on line */ |
| float *ydiffs; /*diffs from 1st approx */ |
| int pointcount; /*no of coords */ |
| int xstarts[SPLINESIZE + 1]; //segment boundaries |
| int segments; //no of segments |
| |
| //no of blobs in row |
| blobcount = row->blob_list ()->length (); |
| partids = (char *) alloc_mem (blobcount * sizeof (char)); |
| xcoords = (int *) alloc_mem (blobcount * sizeof (int)); |
| ycoords = (int *) alloc_mem (blobcount * sizeof (int)); |
| blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX)); |
| ydiffs = (float *) alloc_mem (blobcount * sizeof (float)); |
| |
| lineheight = get_blob_coords (row, (int) block->line_size, blobcoords, |
| holed_line, blobcount); |
| /*limit for line change */ |
| jumplimit = lineheight * textord_oldbl_jumplimit; |
| if (jumplimit < MINASCRISE) |
| jumplimit = MINASCRISE; |
| |
| if (textord_oldbl_debug) { |
| tprintf |
| ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", |
| block->line_size, lineheight, jumplimit); |
| } |
| if (holed_line) |
| make_holed_baseline (blobcoords, blobcount, spline, &row->baseline, |
| row->line_m ()); |
| else |
| make_first_baseline (blobcoords, blobcount, |
| xcoords, ycoords, spline, &row->baseline, jumplimit); |
| #ifndef GRAPHICS_DISABLED |
| if (textord_show_final_rows) |
| row->baseline.plot (to_win, ScrollView::GOLDENROD); |
| #endif |
| if (blobcount > 1) { |
| bestpart = partition_line (blobcoords, blobcount, |
| &partcount, partids, partsizes, |
| &row->baseline, jumplimit, ydiffs); |
| pointcount = partition_coords (blobcoords, blobcount, |
| partids, bestpart, xcoords, ycoords); |
| segments = segment_spline (blobcoords, blobcount, |
| xcoords, ycoords, |
| degree, pointcount, xstarts); |
| if (!holed_line) { |
| do { |
| row->baseline = QSPLINE (xstarts, segments, |
| xcoords, ycoords, pointcount, degree); |
| } |
| while (textord_oldbl_split_splines |
| && split_stepped_spline (&row->baseline, jumplimit / 2, |
| xcoords, xstarts, segments)); |
| } |
| find_lesser_parts(row, |
| blobcoords, |
| blobcount, |
| partids, |
| partsizes, |
| partcount, |
| bestpart); |
| |
| } |
| else { |
| row->xheight = -1.0f; /*failed */ |
| row->descdrop = 0.0f; |
| row->ascrise = 0.0f; |
| } |
| row->baseline.extrapolate (row->line_m (), |
| block->block->bounding_box ().left (), |
| block->block->bounding_box ().right ()); |
| if (textord_really_old_xheight) |
| old_first_xheight (row, blobcoords, lineheight, |
| blobcount, &row->baseline, jumplimit); |
| else |
| make_first_xheight (row, blobcoords, lineheight, (int) block->line_size, |
| blobcount, &row->baseline, jumplimit); |
| free_mem(partids); |
| free_mem(xcoords); |
| free_mem(ycoords); |
| free_mem(blobcoords); |
| free_mem(ydiffs); |
| } |
| |
| |
| /********************************************************************** |
| * get_blob_coords |
| * |
| * Fill the blobcoords array with the coordinates of the blobs |
| * in the row. The return value is the first guess atthe line height. |
| **********************************************************************/ |
| |
| int get_blob_coords( //get boxes |
| TO_ROW *row, //row to use |
| inT32 lineheight, //block level |
| TBOX *blobcoords, //ouput boxes |
| BOOL8 &holed_line, //lost a lot of blobs |
| int &outcount //no of real blobs |
| ) { |
| //blobs |
| BLOBNBOX_IT blob_it = row->blob_list (); |
| register int blobindex; /*no along text line */ |
| int losscount; //lost blobs |
| int maxlosscount; //greatest lost blobs |
| /*height stat collection */ |
| STATS heightstat (0, MAXHEIGHT); |
| |
| if (blob_it.empty ()) |
| return 0; //none |
| maxlosscount = 0; |
| losscount = 0; |
| blob_it.mark_cycle_pt (); |
| blobindex = 0; |
| do { |
| blobcoords[blobindex] = box_next_pre_chopped (&blob_it); |
| if (blobcoords[blobindex].height () > lineheight * 0.25) |
| heightstat.add (blobcoords[blobindex].height (), 1); |
| if (blobindex == 0 |
| || blobcoords[blobindex].height () > lineheight * 0.25 |
| || blob_it.cycled_list ()) { |
| blobindex++; /*no of merged blobs */ |
| losscount = 0; |
| } |
| else { |
| if (blobcoords[blobindex].height () |
| < blobcoords[blobindex].width () * oldbl_dot_error_size |
| && blobcoords[blobindex].width () |
| < blobcoords[blobindex].height () * oldbl_dot_error_size) { |
| //counts as dot |
| blobindex++; |
| losscount = 0; |
| } |
| else { |
| losscount++; //lost it |
| if (losscount > maxlosscount) |
| //remember max |
| maxlosscount = losscount; |
| } |
| } |
| } |
| while (!blob_it.cycled_list ()); |
| |
| holed_line = maxlosscount > oldbl_holed_losscount; |
| outcount = blobindex; /*total blobs */ |
| |
| if (heightstat.get_total () > 1) |
| /*guess x-height */ |
| return (int) heightstat.ile (0.25); |
| else |
| return blobcoords[0].height (); |
| } |
| |
| |
| /********************************************************************** |
| * make_first_baseline |
| * |
| * Make the first estimate at a baseline, either by shifting |
| * a supplied previous spline, or by doing a piecewise linear |
| * approximation using all the blobs. |
| **********************************************************************/ |
| |
| void |
| make_first_baseline ( //initial approximation |
| TBOX blobcoords[], /*blob bounding boxes */ |
| int blobcount, /*no of blobcoords */ |
| int xcoords[], /*coords for spline */ |
| int ycoords[], /*approximator */ |
| QSPLINE * spline, /*initial spline */ |
| QSPLINE * baseline, /*output spline */ |
| float jumplimit /*guess half descenders */ |
| ) { |
| int leftedge; /*left edge of line */ |
| int rightedge; /*right edge of line */ |
| int blobindex; /*current blob */ |
| int segment; /*current segment */ |
| float prevy, thisy, nexty; /*3 y coords */ |
| float y1, y2, y3; /*3 smooth blobs */ |
| float maxmax, minmin; /*absolute limits */ |
| int x2 = 0; /*right edge of old y3 */ |
| int ycount; /*no of ycoords in use */ |
| float yturns[SPLINESIZE]; /*y coords of turn pts */ |
| int xturns[SPLINESIZE]; /*xcoords of turn pts */ |
| int xstarts[SPLINESIZE + 1]; |
| int segments; //no of segments |
| ICOORD shift; //shift of spline |
| |
| prevy = 0; |
| /*left edge of row */ |
| leftedge = blobcoords[0].left (); |
| /*right edge of line */ |
| rightedge = blobcoords[blobcount - 1].right (); |
| if (spline == NULL /*no given spline */ |
| || spline->segments < 3 /*or trivial */ |
| /*or too non-overlap */ |
| || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) |
| || spline->xcoords[spline->segments - 1] < rightedge |
| - MAXOVERLAP * (rightedge - leftedge)) { |
| if (textord_oldbl_paradef) |
| return; //use default |
| xstarts[0] = blobcoords[0].left () - 1; |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| xcoords[blobindex] = (blobcoords[blobindex].left () |
| + blobcoords[blobindex].right ()) / 2; |
| ycoords[blobindex] = blobcoords[blobindex].bottom (); |
| } |
| xstarts[1] = blobcoords[blobcount - 1].right () + 1; |
| segments = 1; /*no of segments */ |
| |
| /*linear */ |
| *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); |
| |
| if (blobcount >= 3) { |
| y1 = y2 = y3 = 0.0f; |
| ycount = 0; |
| segment = 0; /*no of segments */ |
| maxmax = minmin = 0.0f; |
| thisy = ycoords[0] - baseline->y (xcoords[0]); |
| nexty = ycoords[1] - baseline->y (xcoords[1]); |
| for (blobindex = 2; blobindex < blobcount; blobindex++) { |
| prevy = thisy; /*shift ycoords */ |
| thisy = nexty; |
| nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); |
| /*middle of smooth y */ |
| if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) { |
| y1 = y2; /*shift window */ |
| y2 = y3; |
| y3 = thisy; /*middle point */ |
| ycount++; |
| /*local max */ |
| if (ycount >= 3 && ((y1 < y2 && y2 >= y3) |
| /*local min */ |
| || (y1 > y2 && y2 <= y3))) { |
| if (segment < SPLINESIZE - 2) { |
| /*turning pt */ |
| xturns[segment] = x2; |
| yturns[segment] = y2; |
| segment++; /*no of spline segs */ |
| } |
| } |
| if (ycount == 1) { |
| maxmax = minmin = y3;/*initialise limits */ |
| } |
| else { |
| if (y3 > maxmax) |
| maxmax = y3; /*biggest max */ |
| if (y3 < minmin) |
| minmin = y3; /*smallest min */ |
| } |
| /*possible turning pt */ |
| x2 = blobcoords[blobindex - 1].right (); |
| } |
| } |
| |
| jumplimit *= 1.2; |
| /*must be wavy */ |
| if (maxmax - minmin > jumplimit) { |
| ycount = segment; /*no of segments */ |
| for (blobindex = 0, segment = 1; blobindex < ycount; |
| blobindex++) { |
| if (yturns[blobindex] > minmin + jumplimit |
| || yturns[blobindex] < maxmax - jumplimit) { |
| /*significant peak */ |
| if (segment == 1 |
| || yturns[blobindex] > prevy + jumplimit |
| || yturns[blobindex] < prevy - jumplimit) { |
| /*different to previous */ |
| xstarts[segment] = xturns[blobindex]; |
| segment++; |
| prevy = yturns[blobindex]; |
| } |
| /*bigger max */ |
| else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy) |
| /*smaller min */ |
| || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) { |
| xstarts[segment - 1] = xturns[blobindex]; |
| /*improved previous */ |
| prevy = yturns[blobindex]; |
| } |
| } |
| } |
| xstarts[segment] = blobcoords[blobcount - 1].right () + 1; |
| segments = segment; /*no of segments */ |
| /*linear */ |
| *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); |
| } |
| } |
| } |
| else { |
| *baseline = *spline; /*copy it */ |
| shift = ICOORD (0, (inT16) (blobcoords[0].bottom () |
| - spline->y (blobcoords[0].right ()))); |
| baseline->move (shift); |
| } |
| } |
| |
| |
| /********************************************************************** |
| * make_holed_baseline |
| * |
| * Make the first estimate at a baseline, either by shifting |
| * a supplied previous spline, or by doing a piecewise linear |
| * approximation using all the blobs. |
| **********************************************************************/ |
| |
| void |
| make_holed_baseline ( //initial approximation |
| TBOX blobcoords[], /*blob bounding boxes */ |
| int blobcount, /*no of blobcoords */ |
| QSPLINE * spline, /*initial spline */ |
| QSPLINE * baseline, /*output spline */ |
| float gradient //of line |
| ) { |
| int leftedge; /*left edge of line */ |
| int rightedge; /*right edge of line */ |
| int blobindex; /*current blob */ |
| float x; //centre of row |
| ICOORD shift; //shift of spline |
| |
| LMS lms(blobcount); //straight baseline |
| inT32 xstarts[2]; //straight line |
| double coeffs[3]; |
| float c; //line parameter |
| |
| /*left edge of row */ |
| leftedge = blobcoords[0].left (); |
| /*right edge of line */ |
| rightedge = blobcoords[blobcount - 1].right (); |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| lms.add (FCOORD ((blobcoords[blobindex].left () + |
| blobcoords[blobindex].right ()) / 2.0, |
| blobcoords[blobindex].bottom ())); |
| } |
| lms.constrained_fit (gradient, c); |
| xstarts[0] = leftedge; |
| xstarts[1] = rightedge; |
| coeffs[0] = 0; |
| coeffs[1] = gradient; |
| coeffs[2] = c; |
| *baseline = QSPLINE (1, xstarts, coeffs); |
| if (spline != NULL /*no given spline */ |
| && spline->segments >= 3 /*or trivial */ |
| /*or too non-overlap */ |
| && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) |
| && spline->xcoords[spline->segments - 1] >= rightedge |
| - MAXOVERLAP * (rightedge - leftedge)) { |
| *baseline = *spline; /*copy it */ |
| x = (leftedge + rightedge) / 2.0; |
| shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x))); |
| baseline->move (shift); |
| } |
| } |
| |
| |
| /********************************************************************** |
| * partition_line |
| * |
| * Partition a row of blobs into different groups of continuous |
| * y position. jumplimit specifies the max allowable limit on a jump |
| * before a new partition is started. |
| * The return value is the biggest partition |
| **********************************************************************/ |
| |
| int |
| partition_line ( //partition blobs |
| TBOX blobcoords[], //bounding boxes |
| int blobcount, /*no of blobs on row */ |
| int *numparts, /*number of partitions */ |
| char partids[], /*partition no of each blob */ |
| int partsizes[], /*no in each partition */ |
| QSPLINE * spline, /*curve to fit to */ |
| float jumplimit, /*allowed delta change */ |
| float ydiffs[] /*diff from spline */ |
| ) { |
| register int blobindex; /*no along text line */ |
| int bestpart; /*best new partition */ |
| int biggestpart; /*part with most members */ |
| float diff; /*difference from line */ |
| int startx; /*index of start blob */ |
| float partdiffs[MAXPARTS]; /*step between parts */ |
| |
| for (bestpart = 0; bestpart < MAXPARTS; bestpart++) |
| partsizes[bestpart] = 0; /*zero them all */ |
| |
| startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); |
| *numparts = 1; /*1 partition */ |
| bestpart = -1; /*first point */ |
| for (blobindex = startx; blobindex < blobcount; blobindex++) { |
| /*do each blob in row */ |
| diff = ydiffs[blobindex]; /*diff from line */ |
| if (textord_oldbl_debug) { |
| tprintf ("%d(%d,%d), ", blobindex, |
| blobcoords[blobindex].left (), |
| blobcoords[blobindex].bottom ()); |
| } |
| bestpart = |
| choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); |
| /*record partition */ |
| partids[blobindex] = bestpart; |
| partsizes[bestpart]++; /*another in it */ |
| } |
| |
| bestpart = -1; /*first point */ |
| partsizes[0]--; /*doing 1st pt again */ |
| /*do each blob in row */ |
| for (blobindex = startx; blobindex >= 0; blobindex--) { |
| diff = ydiffs[blobindex]; /*diff from line */ |
| if (textord_oldbl_debug) { |
| tprintf ("%d(%d,%d), ", blobindex, |
| blobcoords[blobindex].left (), |
| blobcoords[blobindex].bottom ()); |
| } |
| bestpart = |
| choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); |
| /*record partition */ |
| partids[blobindex] = bestpart; |
| partsizes[bestpart]++; /*another in it */ |
| } |
| |
| for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) |
| if (partsizes[bestpart] >= partsizes[biggestpart]) |
| biggestpart = bestpart; /*new biggest */ |
| if (textord_oldbl_merge_parts) |
| merge_oldbl_parts(blobcoords, |
| blobcount, |
| partids, |
| partsizes, |
| biggestpart, |
| jumplimit); |
| return biggestpart; /*biggest partition */ |
| } |
| |
| |
| /********************************************************************** |
| * merge_oldbl_parts |
| * |
| * For any adjacent group of blobs in a different part, put them in the |
| * main part if they fit closely to neighbours in the main part. |
| **********************************************************************/ |
| |
| void |
| merge_oldbl_parts ( //partition blobs |
| TBOX blobcoords[], //bounding boxes |
| int blobcount, /*no of blobs on row */ |
| char partids[], /*partition no of each blob */ |
| int partsizes[], /*no in each partition */ |
| int biggestpart, //major partition |
| float jumplimit /*allowed delta change */ |
| ) { |
| BOOL8 found_one; //found a bestpart blob |
| BOOL8 close_one; //found was close enough |
| register int blobindex; /*no along text line */ |
| int prevpart; //previous iteration |
| int runlength; //no in this part |
| float diff; /*difference from line */ |
| int startx; /*index of start blob */ |
| int test_blob; //another index |
| FCOORD coord; //blob coordinate |
| float m, c; //fitted line |
| QLSQ stats; //line stuff |
| |
| prevpart = biggestpart; |
| runlength = 0; |
| startx = 0; |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| if (partids[blobindex] != prevpart) { |
| // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", |
| // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), |
| // prevpart,partids[blobindex],runlength); |
| if (prevpart != biggestpart && runlength > MAXBADRUN) { |
| stats.clear (); |
| for (test_blob = startx; test_blob < blobindex; test_blob++) { |
| coord = FCOORD ((blobcoords[test_blob].left () |
| + blobcoords[test_blob].right ()) / 2.0, |
| blobcoords[test_blob].bottom ()); |
| stats.add (coord.x (), coord.y ()); |
| } |
| stats.fit (1); |
| m = stats.get_b (); |
| c = stats.get_c (); |
| if (textord_oldbl_debug) |
| tprintf ("Fitted line y=%g x + %g\n", m, c); |
| found_one = FALSE; |
| close_one = FALSE; |
| for (test_blob = 1; !found_one |
| && (startx - test_blob >= 0 |
| || blobindex + test_blob <= blobcount); test_blob++) { |
| if (startx - test_blob >= 0 |
| && partids[startx - test_blob] == biggestpart) { |
| found_one = TRUE; |
| coord = FCOORD ((blobcoords[startx - test_blob].left () |
| + blobcoords[startx - |
| test_blob].right ()) / |
| 2.0, |
| blobcoords[startx - |
| test_blob].bottom ()); |
| diff = m * coord.x () + c - coord.y (); |
| if (textord_oldbl_debug) |
| tprintf |
| ("Diff of common blob to suspect part=%g at (%g,%g)\n", |
| diff, coord.x (), coord.y ()); |
| if (diff < jumplimit && -diff < jumplimit) |
| close_one = TRUE; |
| } |
| if (blobindex + test_blob <= blobcount |
| && partids[blobindex + test_blob - 1] == biggestpart) { |
| found_one = TRUE; |
| coord = |
| FCOORD ((blobcoords[blobindex + test_blob - 1]. |
| left () + blobcoords[blobindex + test_blob - |
| 1].right ()) / 2.0, |
| blobcoords[blobindex + test_blob - |
| 1].bottom ()); |
| diff = m * coord.x () + c - coord.y (); |
| if (textord_oldbl_debug) |
| tprintf |
| ("Diff of common blob to suspect part=%g at (%g,%g)\n", |
| diff, coord.x (), coord.y ()); |
| if (diff < jumplimit && -diff < jumplimit) |
| close_one = TRUE; |
| } |
| } |
| if (close_one) { |
| if (textord_oldbl_debug) |
| tprintf |
| ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", |
| runlength, biggestpart, prevpart, |
| blobcoords[startx].left (), |
| blobcoords[startx].bottom ()); |
| //switch sides |
| partsizes[prevpart] -= runlength; |
| for (test_blob = startx; test_blob < blobindex; test_blob++) |
| partids[test_blob] = biggestpart; |
| } |
| } |
| prevpart = partids[blobindex]; |
| runlength = 1; |
| startx = blobindex; |
| } |
| else |
| runlength++; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * get_ydiffs |
| * |
| * Get the differences between the blobs and the spline, |
| * putting them in ydiffs. The return value is the index |
| * of the blob in the middle of the "best behaved" region |
| **********************************************************************/ |
| |
| int |
| get_ydiffs ( //evaluate differences |
| TBOX blobcoords[], //bounding boxes |
| int blobcount, /*no of blobs */ |
| QSPLINE * spline, /*approximating spline */ |
| float ydiffs[] /*output */ |
| ) { |
| register int blobindex; /*current blob */ |
| int xcentre; /*xcoord */ |
| int lastx; /*last xcentre */ |
| float diffsum; /*sum of diffs */ |
| float diff; /*current difference */ |
| float drift; /*sum of spline steps */ |
| float bestsum; /*smallest diffsum */ |
| int bestindex; /*index of bestsum */ |
| |
| diffsum = 0.0f; |
| bestindex = 0; |
| bestsum = (float) MAX_INT32; |
| drift = 0.0f; |
| lastx = blobcoords[0].left (); |
| /*do each blob in row */ |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| /*centre of blob */ |
| xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; |
| //step functions in spline |
| drift += spline->step (lastx, xcentre); |
| lastx = xcentre; |
| diff = blobcoords[blobindex].bottom (); |
| diff -= spline->y (xcentre); |
| diff += drift; |
| ydiffs[blobindex] = diff; /*store difference */ |
| if (blobindex > 2) |
| /*remove old one */ |
| diffsum -= ABS (ydiffs[blobindex - 3]); |
| diffsum += ABS (diff); /*add new one */ |
| if (blobindex >= 2 && diffsum < bestsum) { |
| bestsum = diffsum; /*find min sum */ |
| bestindex = blobindex - 1; /*middle of set */ |
| } |
| } |
| return bestindex; |
| } |
| |
| |
| /********************************************************************** |
| * choose_partition |
| * |
| * Choose a partition for the point and return the index. |
| **********************************************************************/ |
| |
| int |
| choose_partition ( //select partition |
| register float diff, /*diff from spline */ |
| float partdiffs[], /*diff on all parts */ |
| int lastpart, /*last assigned partition */ |
| float jumplimit, /*new part threshold */ |
| int *partcount /*no of partitions */ |
| ) { |
| register int partition; /*partition no */ |
| int bestpart; /*best new partition */ |
| float bestdelta; /*best gap from a part */ |
| static float drift; /*drift from spline */ |
| float delta; /*diff from part */ |
| static float lastdelta; /*previous delta */ |
| |
| if (lastpart < 0) { |
| partdiffs[0] = diff; |
| lastpart = 0; /*first point */ |
| drift = 0.0f; |
| lastdelta = 0.0f; |
| } |
| /*adjusted diff from part */ |
| delta = diff - partdiffs[lastpart] - drift; |
| if (textord_oldbl_debug) { |
| tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift); |
| } |
| if (ABS (delta) > jumplimit / 2) { |
| /*delta on part 0 */ |
| bestdelta = diff - partdiffs[0] - drift; |
| bestpart = 0; /*0 best so far */ |
| for (partition = 1; partition < *partcount; partition++) { |
| delta = diff - partdiffs[partition] - drift; |
| if (ABS (delta) < ABS (bestdelta)) { |
| bestdelta = delta; |
| bestpart = partition; /*part with nearest jump */ |
| } |
| } |
| delta = bestdelta; |
| /*too far away */ |
| if (ABS (bestdelta) > jumplimit |
| && *partcount < MAXPARTS) { /*and spare part left */ |
| bestpart = (*partcount)++; /*best was new one */ |
| /*start new one */ |
| partdiffs[bestpart] = diff - drift; |
| delta = 0.0f; |
| } |
| } |
| else { |
| bestpart = lastpart; /*best was last one */ |
| } |
| |
| if (bestpart == lastpart |
| && (ABS (delta - lastdelta) < jumplimit / 2 |
| || ABS (delta) < jumplimit / 2)) |
| /*smooth the drift */ |
| drift = (3 * drift + delta) / 3; |
| lastdelta = delta; |
| |
| if (textord_oldbl_debug) { |
| tprintf ("P=%d\n", bestpart); |
| } |
| |
| return bestpart; |
| } |
| |
| |
| ///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking |
| //partitions and gives all the rest partid 0*/ |
| // |
| //merge_partitions(partids,partcount,blobcount,bestpart) |
| //register char *partids; /*partition numbers*/ |
| //int partcount; /*no of partitions*/ |
| //int blobcount; /*no of blobs*/ |
| //int bestpart; /*best partition*/ |
| //{ |
| // register int blobindex; /*no along text line*/ |
| // int runlength; /*run of same partition*/ |
| // int bestrun; /*biggest runlength*/ |
| // |
| // bestrun=0; /*no runs yet*/ |
| // runlength=1; |
| // for (blobindex=1;blobindex<blobcount;blobindex++) |
| // { if (partids[blobindex]!=partids[blobindex-1]) |
| // { if (runlength>bestrun) |
| // bestrun=runlength; /*find biggest run*/ |
| // runlength=1; /*new run*/ |
| // } |
| // else |
| // { runlength++; |
| // } |
| // } |
| // if (runlength>bestrun) |
| // bestrun=runlength; |
| // |
| // for (blobindex=0;blobindex<blobcount;blobindex++) |
| // { if (blobindex<1 |
| // || partids[blobindex]!=partids[blobindex-1]) |
| // { if ((blobindex+1>=blobcount |
| // || partids[blobindex]!=partids[blobindex+1]) |
| // /*loner*/ |
| // && (bestrun>2 || partids[blobindex]!=bestpart)) |
| // { partids[blobindex]=partcount; /*discard loner*/ |
| // } |
| // else if (blobindex+1<blobcount |
| // && partids[blobindex]==partids[blobindex+1] |
| // /*pair*/ |
| // && (blobindex+2>=blobcount |
| // || partids[blobindex]!=partids[blobindex+2]) |
| // && (bestrun>3 || partids[blobindex]!=bestpart)) |
| // { partids[blobindex]=partcount; /*discard both*/ |
| // partids[blobindex+1]=partcount; |
| // } |
| // } |
| // } |
| // for (blobindex=0;blobindex<blobcount;blobindex++) |
| // { if (partids[blobindex]<partcount) |
| // partids[blobindex]=0; /*all others together*/ |
| // } |
| //} |
| |
| /********************************************************************** |
| * partition_coords |
| * |
| * Get the x,y coordinates of all points in the bestpart and put them |
| * in xcoords,ycoords. Return the number of points found. |
| **********************************************************************/ |
| |
| int |
| partition_coords ( //find relevant coords |
| TBOX blobcoords[], //bounding boxes |
| int blobcount, /*no of blobs in row */ |
| char partids[], /*partition no of each blob */ |
| int bestpart, /*best new partition */ |
| int xcoords[], /*points to work on */ |
| int ycoords[] /*points to work on */ |
| ) { |
| register int blobindex; /*no along text line */ |
| int pointcount; /*no of points */ |
| |
| pointcount = 0; |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| if (partids[blobindex] == bestpart) { |
| /*centre of blob */ |
| xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; |
| ycoords[pointcount++] = blobcoords[blobindex].bottom (); |
| } |
| } |
| return pointcount; /*no of points found */ |
| } |
| |
| |
| /********************************************************************** |
| * segment_spline |
| * |
| * Segment the row at midpoints between maxima and minima of the x,y pairs. |
| * The xstarts of the segments are returned and the number found. |
| **********************************************************************/ |
| |
| int |
| segment_spline ( //make xstarts |
| TBOX blobcoords[], //boundign boxes |
| int blobcount, /*no of blobs in row */ |
| int xcoords[], /*points to work on */ |
| int ycoords[], /*points to work on */ |
| int degree, int pointcount, /*no of points */ |
| int xstarts[] //result |
| ) { |
| register int ptindex; /*no along text line */ |
| register int segment; /*partition no */ |
| int lastmin, lastmax; /*possible turn points */ |
| int turnpoints[SPLINESIZE]; /*good turning points */ |
| int turncount; /*no of turning points */ |
| int max_x; //max specified coord |
| |
| xstarts[0] = xcoords[0] - 1; //leftmost defined pt |
| max_x = xcoords[pointcount - 1] + 1; |
| if (degree < 2) |
| pointcount = 0; |
| turncount = 0; /*no turning points yet */ |
| if (pointcount > 3) { |
| ptindex = 1; |
| lastmax = lastmin = 0; /*start with first one */ |
| while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { |
| /*minimum */ |
| if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) { |
| if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { |
| if (turncount == 0 || turnpoints[turncount - 1] != lastmax) |
| /*new max point */ |
| turnpoints[turncount++] = lastmax; |
| lastmin = ptindex; /*latest minimum */ |
| } |
| else if (ycoords[ptindex] < ycoords[lastmin]) { |
| lastmin = ptindex; /*lower minimum */ |
| } |
| } |
| |
| /*maximum */ |
| if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) { |
| if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { |
| if (turncount == 0 || turnpoints[turncount - 1] != lastmin) |
| /*new min point */ |
| turnpoints[turncount++] = lastmin; |
| lastmax = ptindex; /*latest maximum */ |
| } |
| else if (ycoords[ptindex] > ycoords[lastmax]) { |
| lastmax = ptindex; /*higher maximum */ |
| } |
| } |
| ptindex++; |
| } |
| /*possible global min */ |
| if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT |
| && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { |
| if (turncount < SPLINESIZE - 1) |
| /*2 more turns */ |
| turnpoints[turncount++] = lastmax; |
| if (turncount < SPLINESIZE - 1) |
| turnpoints[turncount++] = ptindex; |
| } |
| else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT |
| /*possible global max */ |
| && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { |
| if (turncount < SPLINESIZE - 1) |
| /*2 more turns */ |
| turnpoints[turncount++] = lastmin; |
| if (turncount < SPLINESIZE - 1) |
| turnpoints[turncount++] = ptindex; |
| } |
| else if (turncount > 0 && turnpoints[turncount - 1] == lastmin |
| && turncount < SPLINESIZE - 1) { |
| if (ycoords[ptindex] > ycoords[lastmax]) |
| turnpoints[turncount++] = ptindex; |
| else |
| turnpoints[turncount++] = lastmax; |
| } |
| else if (turncount > 0 && turnpoints[turncount - 1] == lastmax |
| && turncount < SPLINESIZE - 1) { |
| if (ycoords[ptindex] < ycoords[lastmin]) |
| turnpoints[turncount++] = ptindex; |
| else |
| turnpoints[turncount++] = lastmin; |
| } |
| } |
| |
| if (textord_oldbl_debug && turncount > 0) |
| tprintf ("First turn is %d at (%d,%d)\n", |
| turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); |
| for (segment = 1; segment < turncount; segment++) { |
| /*centre y coord */ |
| lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; |
| |
| /* fix alg so that it works with both rising and falling sections */ |
| if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) |
| /*find rising y centre */ |
| for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++); |
| else |
| /*find falling y centre */ |
| for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++); |
| |
| /*centre x */ |
| xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] |
| + xcoords[turnpoints[segment - 1]] |
| + xcoords[turnpoints[segment]] + 2) / 4; |
| /*halfway between turns */ |
| if (textord_oldbl_debug) |
| tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", |
| segment, turnpoints[segment], |
| xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], |
| ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); |
| } |
| |
| xstarts[segment] = max_x; |
| return segment; /*no of splines */ |
| } |
| |
| |
| /********************************************************************** |
| * split_stepped_spline |
| * |
| * Re-segment the spline in cases where there is a big step function. |
| * Return TRUE if any were done. |
| **********************************************************************/ |
| |
| BOOL8 |
| split_stepped_spline ( //make xstarts |
| QSPLINE * baseline, //current shot |
| float jumplimit, //max step fuction |
| int xcoords[], /*points to work on */ |
| int xstarts[], //result |
| int &segments //no of segments |
| ) { |
| BOOL8 doneany; //return value |
| register int segment; /*partition no */ |
| int startindex, centreindex, endindex; |
| float leftcoord, rightcoord; |
| int leftindex, rightindex; |
| float step; //spline step |
| |
| doneany = FALSE; |
| startindex = 0; |
| for (segment = 1; segment < segments - 1; segment++) { |
| step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, |
| (xstarts[segment] + xstarts[segment + 1]) / 2.0); |
| if (step < 0) |
| step = -step; |
| if (step > jumplimit) { |
| while (xcoords[startindex] < xstarts[segment - 1]) |
| startindex++; |
| centreindex = startindex; |
| while (xcoords[centreindex] < xstarts[segment]) |
| centreindex++; |
| endindex = centreindex; |
| while (xcoords[endindex] < xstarts[segment + 1]) |
| endindex++; |
| if (segments >= SPLINESIZE) { |
| if (textord_debug_baselines) |
| tprintf ("Too many segments to resegment spline!!\n"); |
| } |
| else if (endindex - startindex >= textord_spline_medianwin * 3) { |
| while (centreindex - startindex < |
| textord_spline_medianwin * 3 / 2) |
| centreindex++; |
| while (endindex - centreindex < |
| textord_spline_medianwin * 3 / 2) |
| centreindex--; |
| leftindex = (startindex + startindex + centreindex) / 3; |
| rightindex = (centreindex + endindex + endindex) / 3; |
| leftcoord = |
| (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; |
| rightcoord = |
| (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; |
| while (xcoords[leftindex] > leftcoord |
| && leftindex - startindex > textord_spline_medianwin) |
| leftindex--; |
| while (xcoords[leftindex] < leftcoord |
| && centreindex - leftindex > |
| textord_spline_medianwin / 2) |
| leftindex++; |
| if (xcoords[leftindex] - leftcoord > |
| leftcoord - xcoords[leftindex - 1]) |
| leftindex--; |
| while (xcoords[rightindex] > rightcoord |
| && rightindex - centreindex > |
| textord_spline_medianwin / 2) |
| rightindex--; |
| while (xcoords[rightindex] < rightcoord |
| && endindex - rightindex > textord_spline_medianwin) |
| rightindex++; |
| if (xcoords[rightindex] - rightcoord > |
| rightcoord - xcoords[rightindex - 1]) |
| rightindex--; |
| if (textord_debug_baselines) |
| tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", |
| xstarts[segment], |
| baseline-> |
| step ((xstarts[segment - 1] + |
| xstarts[segment]) / 2.0, |
| (xstarts[segment] + |
| xstarts[segment + 1]) / 2.0), |
| (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, |
| (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); |
| insert_spline_point (xstarts, segment, |
| (xcoords[leftindex - 1] + |
| xcoords[leftindex]) / 2, |
| (xcoords[rightindex - 1] + |
| xcoords[rightindex]) / 2, segments); |
| doneany = TRUE; |
| } |
| else if (textord_debug_baselines) { |
| tprintf |
| ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", |
| startindex, centreindex, endindex, |
| (inT32) textord_spline_medianwin); |
| } |
| } |
| // else tprintf("Spline step at %d is %g\n", |
| // xstarts[segment], |
| // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0, |
| // (xstarts[segment]+xstarts[segment+1])/2.0)); |
| } |
| return doneany; |
| } |
| |
| |
| /********************************************************************** |
| * insert_spline_point |
| * |
| * Insert a new spline point and shuffle up the others. |
| **********************************************************************/ |
| |
| void |
| insert_spline_point ( //get descenders |
| int xstarts[], //starts to shuffle |
| int segment, //insertion pt |
| int coord1, //coords to add |
| int coord2, int &segments //total segments |
| ) { |
| int index; //for shuffling |
| |
| for (index = segments; index > segment; index--) |
| xstarts[index + 1] = xstarts[index]; |
| segments++; |
| xstarts[segment] = coord1; |
| xstarts[segment + 1] = coord2; |
| } |
| |
| |
| /********************************************************************** |
| * find_lesser_parts |
| * |
| * Average the step from the spline for the other partitions |
| * and find the commonest partition which has a descender. |
| **********************************************************************/ |
| |
| void |
| find_lesser_parts ( //get descenders |
| TO_ROW * row, //row to process |
| TBOX blobcoords[], //bounding boxes |
| int blobcount, /*no of blobs */ |
| char partids[], /*partition of each blob */ |
| int partsizes[], /*size of each part */ |
| int partcount, /*no of partitions */ |
| int bestpart /*biggest partition */ |
| ) { |
| register int blobindex; /*index of blob */ |
| register int partition; /*current partition */ |
| int xcentre; /*centre of blob */ |
| int poscount; /*count of best up step */ |
| int negcount; /*count of best down step */ |
| float partsteps[MAXPARTS]; /*average step to part */ |
| float bestpos; /*best up step */ |
| float bestneg; /*best down step */ |
| int runlength; /*length of bad run */ |
| int biggestrun; /*biggest bad run */ |
| |
| biggestrun = 0; |
| for (partition = 0; partition < partcount; partition++) |
| partsteps[partition] = 0.0; /*zero accumulators */ |
| for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { |
| xcentre = (blobcoords[blobindex].left () |
| + blobcoords[blobindex].right ()) >> 1; |
| /*in other parts */ |
| if (partids[blobindex] != bestpart) { |
| runlength++; /*run of non bests */ |
| if (runlength > biggestrun) |
| biggestrun = runlength; |
| partsteps[partids[blobindex]] += blobcoords[blobindex].bottom () |
| - row->baseline.y (xcentre); |
| } |
| else |
| runlength = 0; |
| } |
| if (biggestrun > MAXBADRUN) |
| row->xheight = -1.0f; /*failed */ |
| else |
| row->xheight = 1.0f; /*success */ |
| poscount = negcount = 0; |
| bestpos = bestneg = 0.0; /*no step yet */ |
| for (partition = 0; partition < partcount; partition++) { |
| if (partition != bestpart) { |
| |
| //by jetsoft divide by zero possible |
| if (partsizes[partition]==0) |
| partsteps[partition]=0; |
| else |
| partsteps[partition] /= partsizes[partition]; |
| // |
| |
| |
| if (partsteps[partition] >= MINASCRISE |
| && partsizes[partition] > poscount) { |
| /*ascender rise */ |
| bestpos = partsteps[partition]; |
| /*2nd most popular */ |
| poscount = partsizes[partition]; |
| } |
| if (partsteps[partition] <= -MINASCRISE |
| && partsizes[partition] > negcount) { |
| /*ascender rise */ |
| bestneg = partsteps[partition]; |
| /*2nd most popular */ |
| negcount = partsizes[partition]; |
| } |
| } |
| } |
| /*average x-height */ |
| partsteps[bestpart] /= blobcount; |
| row->descdrop = bestneg; |
| } |
| |
| |
| /********************************************************************** |
| * old_first_xheight |
| * |
| * Makes an x-height spline by copying the baseline and shifting it. |
| * It estimates the x-height across the line to use as the shift. |
| * It also finds the ascender height if it can. |
| **********************************************************************/ |
| |
| void |
| old_first_xheight ( //the wiseowl way |
| TO_ROW * row, /*current row */ |
| TBOX blobcoords[], /*blob bounding boxes */ |
| int initialheight, //initial guess |
| int blobcount, /*blobs in blobcoords */ |
| QSPLINE * baseline, /*established */ |
| float jumplimit /*min ascender height */ |
| ) { |
| register int blobindex; /*current blob */ |
| /*height statistics */ |
| STATS heightstat (0, MAXHEIGHT); |
| int height; /*height of blob */ |
| int xcentre; /*centre of blob */ |
| int lineheight; /*approx xheight */ |
| float ascenders; /*ascender sum */ |
| int asccount; /*no of ascenders */ |
| float xsum; /*xheight sum */ |
| int xcount; /*xheight count */ |
| register float diff; /*height difference */ |
| |
| if (blobcount > 1) { |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| xcentre = (blobcoords[blobindex].left () |
| + blobcoords[blobindex].right ()) / 2; |
| /*height of blob */ |
| height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); |
| if (height > initialheight * oldbl_xhfract |
| && height > textord_min_xheight) |
| heightstat.add (height, 1); |
| } |
| if (heightstat.get_total () > 3) { |
| lineheight = (int) heightstat.ile (0.25); |
| if (lineheight <= 0) |
| lineheight = (int) heightstat.ile (0.5); |
| } |
| else |
| lineheight = initialheight; |
| } |
| else { |
| lineheight = (int) (blobcoords[0].top () |
| - baseline->y ((blobcoords[0].left () |
| + blobcoords[0].right ()) / 2) + |
| 0.5); |
| } |
| |
| xsum = 0.0f; |
| xcount = 0; |
| for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; |
| blobindex++) { |
| xcentre = (blobcoords[blobindex].left () |
| + blobcoords[blobindex].right ()) / 2; |
| diff = blobcoords[blobindex].top () - baseline->y (xcentre); |
| /*is it ascender */ |
| if (diff > lineheight + jumplimit) { |
| ascenders += diff; |
| asccount++; /*count ascenders */ |
| } |
| else if (diff > lineheight - jumplimit) { |
| xsum += diff; /*mean xheight */ |
| xcount++; |
| } |
| } |
| if (xcount > 0) |
| xsum /= xcount; /*average xheight */ |
| else |
| xsum = (float) lineheight; /*guess it */ |
| row->xheight *= xsum; |
| if (asccount > 0) |
| row->ascrise = ascenders / asccount - xsum; |
| else |
| row->ascrise = 0.0f; /*had none */ |
| if (row->xheight == 0) |
| row->xheight = -1.0f; |
| } |
| |
| |
| /********************************************************************** |
| * make_first_xheight |
| * |
| * Makes an x-height spline by copying the baseline and shifting it. |
| * It estimates the x-height across the line to use as the shift. |
| * It also finds the ascender height if it can. |
| **********************************************************************/ |
| |
| void |
| make_first_xheight ( //find xheight |
| TO_ROW * row, /*current row */ |
| TBOX blobcoords[], /*blob bounding boxes */ |
| int lineheight, //initial guess |
| int init_lineheight, //block level guess |
| int blobcount, /*blobs in blobcoords */ |
| QSPLINE * baseline, /*established */ |
| float jumplimit /*min ascender height */ |
| ) { |
| STATS heightstat (0, HEIGHTBUCKETS); |
| int lefts[HEIGHTBUCKETS]; |
| int rights[HEIGHTBUCKETS]; |
| int modelist[MODENUM]; |
| int blobindex; |
| int mode_count; //blobs to count in thr |
| int sign_bit; |
| int mode_threshold; |
| const int kBaselineTouch = 2; // This really should change with resolution. |
| const int kGoodStrength = 8; // Strength of baseline-touching heights. |
| const float kMinHeight = 0.25; // Min fraction of lineheight to use. |
| |
| sign_bit = row->xheight > 0 ? 1 : -1; |
| |
| memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0])); |
| memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); |
| mode_count = 0; |
| for (blobindex = 0; blobindex < blobcount; blobindex++) { |
| int xcenter = (blobcoords[blobindex].left () + |
| blobcoords[blobindex].right ()) / 2; |
| float base = baseline->y(xcenter); |
| float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); |
| int strength = textord_ocropus_mode && |
| bottomdiff <= kBaselineTouch ? kGoodStrength : 1; |
| int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5); |
| if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { |
| if (height > lineheight * oldbl_xhfract |
| && height > textord_min_xheight) { |
| heightstat.add (height, strength); |
| if (height < HEIGHTBUCKETS) { |
| if (xcenter > rights[height]) |
| rights[height] = xcenter; |
| if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) |
| lefts[height] = xcenter; |
| } |
| } |
| mode_count += strength; |
| } |
| } |
| |
| mode_threshold = (int) (blobcount * 0.1); |
| if (oldbl_dot_error_size > 1 || oldbl_xhfix) |
| mode_threshold = (int) (mode_count * 0.1); |
| |
| if (textord_oldbl_debug) { |
| tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", |
| blobcount, mode_count, mode_threshold); |
| } |
| find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); |
| if (textord_oldbl_debug) { |
| for (blobindex = 0; blobindex < MODENUM; blobindex++) |
| tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); |
| tprintf ("\n"); |
| } |
| pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); |
| |
| if (textord_oldbl_debug) |
| tprintf ("Output xheight=%g\n", row->xheight); |
| if (row->xheight < 0 && textord_oldbl_debug) |
| tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); |
| |
| if (sign_bit < 0) |
| row->xheight = -row->xheight; |
| } |
| |
| /********************************************************************** |
| * find_top_modes |
| * |
| * Fill the input array with the indices of the top ten modes of the |
| * input distribution. |
| **********************************************************************/ |
| |
| const int kMinModeFactorOcropus = 32; |
| const int kMinModeFactor = 12; |
| |
| void |
| find_top_modes ( //get modes |
| STATS * stats, //stats to hack |
| int statnum, //no of piles |
| int modelist[], int modenum //no of modes to get |
| ) { |
| int mode_count; |
| int last_i = 0; |
| int last_max = MAX_INT32; |
| int i; |
| int mode; |
| int total_max = 0; |
| int mode_factor = textord_ocropus_mode ? |
| kMinModeFactorOcropus : kMinModeFactor; |
| |
| for (mode_count = 0; mode_count < modenum; mode_count++) { |
| mode = 0; |
| for (i = 0; i < statnum; i++) { |
| if (stats->pile_count (i) > stats->pile_count (mode)) { |
| if ((stats->pile_count (i) < last_max) || |
| ((stats->pile_count (i) == last_max) && (i > last_i))) { |
| mode = i; |
| } |
| } |
| } |
| last_i = mode; |
| last_max = stats->pile_count (last_i); |
| total_max += last_max; |
| if (last_max <= total_max / mode_factor) |
| mode = 0; |
| modelist[mode_count] = mode; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * pick_x_height |
| * |
| * Choose based on the height modes the best x height value. |
| **********************************************************************/ |
| |
| void pick_x_height(TO_ROW * row, //row to do |
| int modelist[], |
| int lefts[], int rights[], |
| STATS * heightstat, |
| int mode_threshold) { |
| int x; |
| int y; |
| int z; |
| float ratio; |
| int found_one_bigger = FALSE; |
| int best_x_height = 0; |
| int best_asc = 0; |
| int num_in_best; |
| |
| for (x = 0; x < MODENUM; x++) { |
| for (y = 0; y < MODENUM; y++) { |
| /* Check for two modes */ |
| if (modelist[x] && modelist[y] && |
| heightstat->pile_count (modelist[x]) > mode_threshold && |
| (!textord_ocropus_mode || |
| MIN(rights[modelist[x]], rights[modelist[y]]) > |
| MAX(lefts[modelist[x]], lefts[modelist[y]]))) { |
| ratio = (float) modelist[y] / (float) modelist[x]; |
| if (1.2 < ratio && ratio < 1.8) { |
| /* Two modes found */ |
| best_x_height = modelist[x]; |
| num_in_best = heightstat->pile_count (modelist[x]); |
| |
| /* Try to get one higher */ |
| do { |
| found_one_bigger = FALSE; |
| for (z = 0; z < MODENUM; z++) { |
| if (modelist[z] == best_x_height + 1 && |
| (!textord_ocropus_mode || |
| MIN(rights[modelist[x]], rights[modelist[y]]) > |
| MAX(lefts[modelist[x]], lefts[modelist[y]]))) { |
| ratio = (float) modelist[y] / (float) modelist[z]; |
| if ((1.2 < ratio && ratio < 1.8) && |
| /* Should be half of best */ |
| heightstat->pile_count (modelist[z]) > |
| num_in_best * 0.5) { |
| best_x_height++; |
| found_one_bigger = TRUE; |
| break; |
| } |
| } |
| } |
| } |
| while (found_one_bigger); |
| |
| /* try to get a higher ascender */ |
| |
| best_asc = modelist[y]; |
| num_in_best = heightstat->pile_count (modelist[y]); |
| |
| /* Try to get one higher */ |
| do { |
| found_one_bigger = FALSE; |
| for (z = 0; z < MODENUM; z++) { |
| if (modelist[z] > best_asc && |
| (!textord_ocropus_mode || |
| MIN(rights[modelist[x]], rights[modelist[y]]) > |
| MAX(lefts[modelist[x]], lefts[modelist[y]]))) { |
| ratio = (float) modelist[z] / (float) best_x_height; |
| if ((1.2 < ratio && ratio < 1.8) && |
| /* Should be half of best */ |
| heightstat->pile_count (modelist[z]) > |
| num_in_best * 0.5) { |
| best_asc = modelist[z]; |
| found_one_bigger = TRUE; |
| break; |
| } |
| } |
| } |
| } |
| while (found_one_bigger); |
| |
| row->xheight = (float) best_x_height; |
| row->ascrise = (float) best_asc - best_x_height; |
| return; |
| } |
| } |
| } |
| } |
| |
| best_x_height = modelist[0]; /* Single Mode found */ |
| num_in_best = heightstat->pile_count (best_x_height); |
| do { |
| /* Try to get one higher */ |
| found_one_bigger = FALSE; |
| for (z = 1; z < MODENUM; z++) { |
| /* Should be half of best */ |
| if ((modelist[z] == best_x_height + 1) && |
| (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { |
| best_x_height++; |
| found_one_bigger = TRUE; |
| break; |
| } |
| } |
| } |
| while (found_one_bigger); |
| |
| row->ascrise = 0.0f; |
| row->xheight = (float) best_x_height; |
| if (row->xheight == 0) |
| row->xheight = -1.0f; |
| } |