| #/*====================================================================* |
| # - Copyright (C) 2001 Leptonica. All rights reserved. |
| # - This software is distributed in the hope that it will be |
| # - useful, but with NO WARRANTY OF ANY KIND. |
| # - No author or distributor accepts responsibility to anyone for the |
| # - consequences of using this software, or for whether it serves any |
| # - particular purpose or works at all, unless he or she says so in |
| # - writing. Everyone is granted permission to copy, modify and |
| # - redistribute this source code, for commercial or non-commercial |
| # - purposes, with the following restrictions: (1) the origin of this |
| # - source code must not be misrepresented; (2) modified versions must |
| # - be plainly marked as such; and (3) this notice may not be removed |
| # - or altered from any source or modified source distribution. |
| # *====================================================================*/ |
| |
| /* |
| * skew.c |
| * |
| * Simple top-level deskew interfaces |
| * PIX *pixDeskew() |
| * PIX *pixFindSkewAndDeskew() |
| * |
| * Simple top-level angle-finding interface |
| * l_int32 pixFindSkew() |
| * |
| * Basic angle-finding functions with all parameters |
| * l_int32 pixFindSkewSweep() |
| * l_int32 pixFindSkewSweepAndSearch() |
| * l_int32 pixFindSkewSweepAndSearchScore() |
| * l_int32 pixFindSkewSweepAndSearchScorePivot() |
| * |
| * Search over arbitrary range of angles in orthogonal directions |
| * l_int32 pixFindSkewOrthogonalRange() |
| * |
| * Differential square sum function for scoring |
| * l_int32 pixFindDifferentialSquareSum() |
| * |
| * Measures of variance of row sums |
| * l_int32 pixFindNormalizedSquareSum() |
| * |
| * |
| * ============================================================== |
| * Page skew detection |
| * |
| * Skew is determined by pixel profiles, which are computed |
| * as pixel sums along the raster line for each line in the |
| * image. By vertically shearing the image by a given angle, |
| * the sums can be computed quickly along the raster lines |
| * rather than along lines at that angle. The score is |
| * computed from these line sums by taking the square of |
| * the DIFFERENCE between adjacent line sums, summed over |
| * all lines. The skew angle is then found as the angle |
| * that maximizes the score. The actual computation for |
| * any sheared image is done in the function |
| * pixFindDifferentialSquareSum(). |
| * |
| * The search for the angle that maximizes this score is |
| * most efficiently performed by first sweeping coarsely |
| * over angles, using a significantly reduced image (say, 4x |
| * reduction), to find the approximate maximum within a half |
| * degree or so, and then doing an interval-halving binary |
| * search at higher resolution to get the skew angle to |
| * within 1/20 degree or better. |
| * |
| * The differential signal is used (rather than just using |
| * that variance of line sums) because it rejects the |
| * background noise due to total number of black pixels, |
| * and has maximum contributions from the baselines and |
| * x-height lines of text when the textlines are aligned |
| * with the raster lines. It also works well in multicolumn |
| * pages where the textlines do not line up across columns. |
| * |
| * The method is fast, accurate to within an angle (in radians) |
| * of approximately the inverse width in pixels of the image, |
| * and will work on a surprisingly small amount of text data |
| * (just a couple of text lines). Consequently, it can |
| * also be used to find local skew if the skew were to vary |
| * significantly over the page. Local skew determination |
| * is not very important except for locating lines of |
| * handwritten text that may be mixed with printed text. |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <math.h> |
| #include "allheaders.h" |
| |
| /* Default sweep angle parameters for pixFindSkew() */ |
| static const l_float32 DEFAULT_SWEEP_RANGE = 5.; /* degrees */ |
| static const l_float32 DEFAULT_SWEEP_DELTA = 1.; /* degrees */ |
| |
| /* Default final angle difference parameter for binary |
| * search in pixFindSkew(). The expected accuracy is |
| * not better than the inverse image width in pixels, |
| * say, 1/2000 radians, or about 0.03 degrees. */ |
| static const l_float32 DEFAULT_MINBS_DELTA = 0.01; /* degrees */ |
| |
| /* Default scale factors for pixFindSkew() */ |
| static const l_int32 DEFAULT_SWEEP_REDUCTION = 4; /* sweep part; 4 is good */ |
| static const l_int32 DEFAULT_BS_REDUCTION = 2; /* binary search part */ |
| |
| /* Minimum angle for deskewing in pixDeskew() */ |
| static const l_float32 MIN_DESKEW_ANGLE = 0.1; /* degree */ |
| |
| /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ |
| static const l_float32 MIN_ALLOWED_CONFIDENCE = 3.0; |
| |
| /* Minimum allowed maxscore to give nonzero confidence */ |
| static const l_int32 MIN_VALID_MAXSCORE = 10000; |
| |
| /* Constant setting threshold for minimum allowed minscore |
| * to give nonzero confidence; multiply this constant by |
| * (height * width^2) */ |
| static const l_float32 MINSCORE_THRESHOLD_CONSTANT = 0.000002; |
| |
| |
| #ifndef NO_CONSOLE_IO |
| #define DEBUG_PRINT_SCORES 0 |
| #define DEBUG_PRINT_SWEEP 0 |
| #define DEBUG_PRINT_BINARY 0 |
| #define DEBUG_PRINT_ORTH 0 |
| #define DEBUG_THRESHOLD 0 |
| #define DEBUG_PLOT_SCORES 0 |
| #endif /* ~NO_CONSOLE_IO */ |
| |
| |
| |
| /*----------------------------------------------------------------* |
| * Top-level interfaces * |
| *----------------------------------------------------------------*/ |
| /*! |
| * pixDeskew() |
| * |
| * Input: pixs (1 bpp) |
| * redsearch (for binary search: reduction factor = 1, 2 or 4) |
| * Return: deskewed pix, or null on error |
| * |
| * Notes: |
| * (1) This is the most simple high level interface, for 1 bpp input. |
| * (2) It first finds the skew angle. If the angle is large enough, |
| * it returns a deskewed image; otherwise, it returns a clone. |
| */ |
| PIX * |
| pixDeskew(PIX *pixs, |
| l_int32 redsearch) |
| { |
| PROCNAME("pixDeskew"); |
| |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| if (pixGetDepth(pixs) != 1) |
| return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); |
| if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
| return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); |
| |
| return pixFindSkewAndDeskew(pixs, redsearch, NULL, NULL); |
| } |
| |
| |
| /*! |
| * pixFindSkewAndDeskew() |
| * |
| * Input: pixs (1 bpp) |
| * redsearch (for binary search: reduction factor = 1, 2 or 4) |
| * &angle (<optional return> angle required to deskew, |
| * in degrees) |
| * &conf (<optional return> conf value is ratio max/min scores) |
| * Return: deskewed pix, or null on error |
| * |
| * Notes: |
| * (1) This first finds the skew angle. If the angle is large enough, |
| * it returns a deskewed image; otherwise, it returns a clone. |
| * (2) Use NULL for &angle and/or &conf if you don't want those values |
| * returned. |
| */ |
| PIX * |
| pixFindSkewAndDeskew(PIX *pixs, |
| l_int32 redsearch, |
| l_float32 *pangle, |
| l_float32 *pconf) |
| { |
| l_int32 ret; |
| l_float32 angle, conf, deg2rad; |
| PIX *pixd; |
| |
| PROCNAME("pixFindSkewAndDeskew"); |
| |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| if (pixGetDepth(pixs) != 1) |
| return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); |
| if (redsearch != 1 && redsearch != 2 && redsearch != 4) |
| return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); |
| |
| deg2rad = 3.1415926535 / 180.; |
| ret = pixFindSkewSweepAndSearch(pixs, &angle, &conf, |
| DEFAULT_SWEEP_REDUCTION, redsearch, |
| DEFAULT_SWEEP_RANGE, DEFAULT_SWEEP_DELTA, |
| DEFAULT_MINBS_DELTA); |
| if (pangle) |
| *pangle = angle; |
| if (pconf) |
| *pconf = conf; |
| if (ret) |
| return pixClone(pixs); |
| |
| if (L_ABS(angle) < MIN_DESKEW_ANGLE || conf < MIN_ALLOWED_CONFIDENCE) |
| return pixClone(pixs); |
| |
| if ((pixd = pixRotateShear(pixs, 0, 0, deg2rad * angle, L_BRING_IN_WHITE)) |
| == NULL) |
| return pixClone(pixs); |
| else |
| return pixd; |
| } |
| |
| |
| /*! |
| * pixFindSkew() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew, in degrees) |
| * &conf (<return> confidence value is ratio max/min scores) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) This is a simple high-level interface, that uses default |
| * values of the parameters for reasonable speed and accuracy. |
| * (2) The angle returned is the negative of the skew angle of |
| * the image. It is the angle required for deskew. |
| * Clockwise rotations are positive angles. |
| */ |
| l_int32 |
| pixFindSkew(PIX *pixs, |
| l_float32 *pangle, |
| l_float32 *pconf) |
| { |
| PROCNAME("pixFindSkew"); |
| |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not 1 bpp", procName, 1); |
| if (!pangle) |
| return ERROR_INT("&angle not defined", procName, 1); |
| if (!pconf) |
| return ERROR_INT("&conf not defined", procName, 1); |
| |
| return pixFindSkewSweepAndSearch(pixs, pangle, pconf, |
| DEFAULT_SWEEP_REDUCTION, |
| DEFAULT_BS_REDUCTION, |
| DEFAULT_SWEEP_RANGE, |
| DEFAULT_SWEEP_DELTA, |
| DEFAULT_MINBS_DELTA); |
| } |
| |
| |
| /*----------------------------------------------------------------* |
| * Basic angle-finding functions with all parameters * |
| *----------------------------------------------------------------*/ |
| /*! |
| * pixFindSkewSweep() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew, in degrees) |
| * reduction (factor = 1, 2, 4 or 8) |
| * sweeprange (half the full range; assumed about 0; in degrees) |
| * sweepdelta (angle increment of sweep; in degrees) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) This examines the 'score' for skew angles with equal intervals. |
| * (2) Caller must check the return value for validity of the result. |
| */ |
| l_int32 |
| pixFindSkewSweep(PIX *pixs, |
| l_float32 *pangle, |
| l_int32 reduction, |
| l_float32 sweeprange, |
| l_float32 sweepdelta) |
| { |
| l_int32 ret, bzero, i, nangles; |
| l_float32 deg2rad, theta; |
| l_float32 sum, maxscore, maxangle; |
| NUMA *natheta, *nascore; |
| PIX *pix, *pixt; |
| |
| PROCNAME("pixFindSkewSweep"); |
| |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not 1 bpp", procName, 1); |
| if (!pangle) |
| return ERROR_INT("&angle not defined", procName, 1); |
| if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) |
| return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1); |
| |
| *pangle = 0.0; /* init */ |
| deg2rad = 3.1415926535 / 180.; |
| ret = 0; |
| |
| /* Generate reduced image, if requested */ |
| if (reduction == 1) |
| pix = pixClone(pixs); |
| else if (reduction == 2) |
| pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| else if (reduction == 4) |
| pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
| else /* reduction == 8 */ |
| pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
| |
| pixZero(pix, &bzero); |
| if (bzero) { |
| pixDestroy(&pix); |
| return 1; |
| } |
| |
| nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
| natheta = numaCreate(nangles); |
| nascore = numaCreate(nangles); |
| pixt = pixCreateTemplate(pix); |
| |
| if (!pix || !pixt) { |
| ret = ERROR_INT("pix and pixt not both made", procName, 1); |
| goto cleanup; |
| } |
| if (!natheta || !nascore) { |
| ret = ERROR_INT("natheta and nascore not both made", procName, 1); |
| goto cleanup; |
| } |
| |
| for (i = 0; i < nangles; i++) { |
| theta = -sweeprange + i * sweepdelta; /* degrees */ |
| |
| /* Shear pix about the UL corner and put the result in pixt */ |
| pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); |
| |
| /* Get score */ |
| pixFindDifferentialSquareSum(pixt, &sum); |
| |
| #if DEBUG_PRINT_SCORES |
| L_INFO_FLOAT2("sum(%7.2f) = %7.0f", procName, theta, sum); |
| #endif /* DEBUG_PRINT_SCORES */ |
| |
| /* Save the result in the output arrays */ |
| numaAddNumber(nascore, sum); |
| numaAddNumber(natheta, theta); |
| } |
| |
| /* Find the location of the maximum (i.e., the skew angle) |
| * by fitting the largest data point and its two neighbors |
| * to a quadratic, using lagrangian interpolation. */ |
| numaFitMax(nascore, &maxscore, natheta, &maxangle); |
| *pangle = maxangle; |
| |
| #if DEBUG_PRINT_SWEEP |
| L_INFO_FLOAT2(" From sweep: angle = %7.3f, score = %7.3f", procName, |
| maxangle, maxscore); |
| #endif /* DEBUG_PRINT_SWEEP */ |
| |
| #if DEBUG_PLOT_SCORES |
| /* Plot the result -- the scores versus rotation angle -- |
| * using gnuplot with GPLOT_LINES (lines connecting data points). |
| * The GPLOT data structure is first created, with the |
| * appropriate data incorporated from the two input NUMAs, |
| * and then the function gplotMakeOutput() uses gnuplot to |
| * generate the output plot. This can be either a .png file |
| * or a .ps file, depending on whether you use GPLOT_PNG |
| * or GPLOT_PS. */ |
| {GPLOT *gplot; |
| gplot = gplotCreate("sweep_output", GPLOT_PNG, |
| "Sweep. Variance of difference of ON pixels vs. angle", |
| "angle (deg)", "score"); |
| gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
| gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
| gplotMakeOutput(gplot); |
| gplotDestroy(&gplot); |
| } |
| #endif /* DEBUG_PLOT_SCORES */ |
| |
| cleanup: |
| pixDestroy(&pix); |
| pixDestroy(&pixt); |
| numaDestroy(&nascore); |
| numaDestroy(&natheta); |
| return 0; |
| } |
| |
| |
| /*! |
| * pixFindSkewSweepAndSearch() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew; in degrees) |
| * &conf (<return> confidence given by ratio of max/min score) |
| * redsweep (sweep reduction factor = 1, 2, 4 or 8) |
| * redsearch (binary search reduction factor = 1, 2, 4 or 8; |
| * and must not exceed redsweep) |
| * sweeprange (half the full range, assumed about 0; in degrees) |
| * sweepdelta (angle increment of sweep; in degrees) |
| * minbsdelta (min binary search increment angle; in degrees) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) This finds the skew angle, doing first a sweep through a set |
| * of equal angles, and then doing a binary search until |
| * convergence. |
| * (2) Caller must check the return value for validity of the result. |
| * (3) In computing the differential line sum variance score, we sum |
| * the result over scanlines, but we always skip: |
| * - at least one scanline |
| * - not more than 10% of the image height |
| * - not more than 5% of the image width |
| * (4) See also notes in pixFindSkewSweepAndSearchScore() |
| */ |
| l_int32 |
| pixFindSkewSweepAndSearch(PIX *pixs, |
| l_float32 *pangle, |
| l_float32 *pconf, |
| l_int32 redsweep, |
| l_int32 redsearch, |
| l_float32 sweeprange, |
| l_float32 sweepdelta, |
| l_float32 minbsdelta) |
| { |
| return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, |
| redsweep, redsearch, 0.0, sweeprange, |
| sweepdelta, minbsdelta); |
| } |
| |
| |
| /*! |
| * pixFindSkewSweepAndSearchScore() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew; in degrees) |
| * &conf (<return> confidence given by ratio of max/min score) |
| * &endscore (<optional return> max score; use NULL to ignore) |
| * redsweep (sweep reduction factor = 1, 2, 4 or 8) |
| * redsearch (binary search reduction factor = 1, 2, 4 or 8; |
| * and must not exceed redsweep) |
| * sweepcenter (angle about which sweep is performed; in degrees) |
| * sweeprange (half the full range, taken about sweepcenter; |
| * in degrees) |
| * sweepdelta (angle increment of sweep; in degrees) |
| * minbsdelta (min binary search increment angle; in degrees) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) This finds the skew angle, doing first a sweep through a set |
| * of equal angles, and then doing a binary search until convergence. |
| * (2) There are two built-in constants that determine if the |
| * returned confidence is nonzero: |
| * - MIN_VALID_MAXSCORE (minimum allowed maxscore) |
| * - MINSCORE_THRESHOLD_CONSTANT (determines minimum allowed |
| * minscore, by multiplying by (height * width^2) |
| * If either of these conditions is not satisfied, the returned |
| * confidence value will be zero. The maxscore is optionally |
| * returned in this function to allow evaluation of the |
| * resulting angle by a method that is independent of the |
| * returned confidence value. |
| * (3) The larger the confidence value, the greater the probability |
| * that the proper alignment is given by the angle that maximizes |
| * variance. It should be compared to a threshold, which depends |
| * on the application. Values between 3.0 and 6.0 are common. |
| * (4) By default, the shear is about the UL corner. |
| */ |
| l_int32 |
| pixFindSkewSweepAndSearchScore(PIX *pixs, |
| l_float32 *pangle, |
| l_float32 *pconf, |
| l_float32 *pendscore, |
| l_int32 redsweep, |
| l_int32 redsearch, |
| l_float32 sweepcenter, |
| l_float32 sweeprange, |
| l_float32 sweepdelta, |
| l_float32 minbsdelta) |
| { |
| return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, |
| redsweep, redsearch, 0.0, |
| sweeprange, sweepdelta, |
| minbsdelta, |
| L_SHEAR_ABOUT_CORNER); |
| } |
| |
| |
| /*! |
| * pixFindSkewSweepAndSearchScorePivot() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew; in degrees) |
| * &conf (<return> confidence given by ratio of max/min score) |
| * &endscore (<optional return> max score; use NULL to ignore) |
| * redsweep (sweep reduction factor = 1, 2, 4 or 8) |
| * redsearch (binary search reduction factor = 1, 2, 4 or 8; |
| * and must not exceed redsweep) |
| * sweepcenter (angle about which sweep is performed; in degrees) |
| * sweeprange (half the full range, taken about sweepcenter; |
| * in degrees) |
| * sweepdelta (angle increment of sweep; in degrees) |
| * minbsdelta (min binary search increment angle; in degrees) |
| * pivot (L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) See notes in pixFindSkewSweepAndSearchScore(). |
| * (2) This allows choice of shear pivoting from either the UL corner |
| * or the center. For small angles, the ability to discriminate |
| * angles is better with shearing from the UL corner. However, |
| * for large angles (say, greater than 20 degrees), it is better |
| * to shear about the center because a shear from the UL corner |
| * loses too much of the image. |
| */ |
| l_int32 |
| pixFindSkewSweepAndSearchScorePivot(PIX *pixs, |
| l_float32 *pangle, |
| l_float32 *pconf, |
| l_float32 *pendscore, |
| l_int32 redsweep, |
| l_int32 redsearch, |
| l_float32 sweepcenter, |
| l_float32 sweeprange, |
| l_float32 sweepdelta, |
| l_float32 minbsdelta, |
| l_int32 pivot) |
| { |
| l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; |
| l_int32 width, height; |
| l_float32 deg2rad, theta, delta; |
| l_float32 sum, maxscore, maxangle; |
| l_float32 centerangle, leftcenterangle, rightcenterangle; |
| l_float32 lefttemp, righttemp; |
| l_float32 bsearchscore[5]; |
| l_float32 minscore, minthresh; |
| l_float32 rangeleft; |
| NUMA *natheta, *nascore; |
| PIX *pixsw, *pixsch, *pixt1, *pixt2; |
| |
| PROCNAME("pixFindSkewSweepAndSearchScorePivot"); |
| |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not 1 bpp", procName, 1); |
| if (!pangle) |
| return ERROR_INT("&angle not defined", procName, 1); |
| if (!pconf) |
| return ERROR_INT("&conf not defined", procName, 1); |
| if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) |
| return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1); |
| if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) |
| return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1); |
| if (redsearch > redsweep) |
| return ERROR_INT("redsearch must not exceed redsweep", procName, 1); |
| if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) |
| return ERROR_INT("invalid pivot", procName, 1); |
| |
| *pangle = 0.0; |
| *pconf = 0.0; |
| deg2rad = 3.1415926535 / 180.; |
| ret = 0; |
| |
| /* Generate reduced image for binary search, if requested */ |
| if (redsearch == 1) |
| pixsch = pixClone(pixs); |
| else if (redsearch == 2) |
| pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| else if (redsearch == 4) |
| pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
| else /* redsearch == 8 */ |
| pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); |
| |
| pixZero(pixsch, &bzero); |
| if (bzero) { |
| pixDestroy(&pixsch); |
| return 1; |
| } |
| |
| /* Generate reduced image for sweep, if requested */ |
| ratio = redsweep / redsearch; |
| if (ratio == 1) |
| pixsw = pixClone(pixsch); |
| else { /* ratio > 1 */ |
| if (ratio == 2) |
| pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); |
| else if (ratio == 4) |
| pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); |
| else /* ratio == 8 */ |
| pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); |
| } |
| |
| pixt1 = pixCreateTemplate(pixsw); |
| if (ratio == 1) |
| pixt2 = pixClone(pixt1); |
| else |
| pixt2 = pixCreateTemplate(pixsch); |
| |
| nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); |
| natheta = numaCreate(nangles); |
| nascore = numaCreate(nangles); |
| |
| if (!pixsch || !pixsw) { |
| ret = ERROR_INT("pixsch and pixsw not both made", procName, 1); |
| goto cleanup; |
| } |
| if (!pixt1 || !pixt2) { |
| ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1); |
| goto cleanup; |
| } |
| if (!natheta || !nascore) { |
| ret = ERROR_INT("natheta and nascore not both made", procName, 1); |
| goto cleanup; |
| } |
| |
| /* Do sweep */ |
| rangeleft = sweepcenter - sweeprange; |
| for (i = 0; i < nangles; i++) { |
| theta = rangeleft + i * sweepdelta; /* degrees */ |
| |
| /* Shear pix and put the result in pixt1 */ |
| if (pivot == L_SHEAR_ABOUT_CORNER) |
| pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
| else |
| pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); |
| |
| /* Get score */ |
| pixFindDifferentialSquareSum(pixt1, &sum); |
| |
| #if DEBUG_PRINT_SCORES |
| L_INFO_FLOAT2("sum(%7.2f) = %7.0f", procName, theta, sum); |
| #endif /* DEBUG_PRINT_SCORES */ |
| |
| /* Save the result in the output arrays */ |
| numaAddNumber(nascore, sum); |
| numaAddNumber(natheta, theta); |
| } |
| |
| /* Find the largest of the set (maxscore at maxangle) */ |
| numaGetMax(nascore, &maxscore, &maxindex); |
| numaGetFValue(natheta, maxindex, &maxangle); |
| |
| #if DEBUG_PRINT_SWEEP |
| L_INFO_FLOAT2(" From sweep: angle = %7.3f, score = %7.3f", procName, |
| maxangle, maxscore); |
| #endif /* DEBUG_PRINT_SWEEP */ |
| |
| #if DEBUG_PLOT_SCORES |
| /* Plot the sweep result -- the scores versus rotation angle -- |
| * using gnuplot with GPLOT_LINES (lines connecting data points). */ |
| {GPLOT *gplot; |
| gplot = gplotCreate("sweep_output", GPLOT_PNG, |
| "Sweep. Variance of difference of ON pixels vs. angle", |
| "angle (deg)", "score"); |
| gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); |
| gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); |
| gplotMakeOutput(gplot); |
| gplotDestroy(&gplot); |
| } |
| #endif /* DEBUG_PLOT_SCORES */ |
| |
| /* Check if the max is at the end of the sweep. */ |
| n = numaGetCount(natheta); |
| if (maxindex == 0 || maxindex == n - 1) { |
| L_WARNING("max found at sweep edge", procName); |
| goto cleanup; |
| } |
| |
| /* Empty the numas for re-use */ |
| numaEmpty(nascore); |
| numaEmpty(natheta); |
| |
| /* Do binary search to find skew angle. |
| * First, set up initial three points. */ |
| centerangle = maxangle; |
| if (pivot == L_SHEAR_ABOUT_CORNER) { |
| pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
| pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
| pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
| } |
| else { |
| pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); |
| pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); |
| pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); |
| } |
| |
| numaAddNumber(nascore, bsearchscore[2]); |
| numaAddNumber(natheta, centerangle); |
| numaAddNumber(nascore, bsearchscore[0]); |
| numaAddNumber(natheta, centerangle - sweepdelta); |
| numaAddNumber(nascore, bsearchscore[4]); |
| numaAddNumber(natheta, centerangle + sweepdelta); |
| |
| /* Start the search */ |
| delta = 0.5 * sweepdelta; |
| while (delta >= minbsdelta) |
| { |
| /* Get the left intermediate score */ |
| leftcenterangle = centerangle - delta; |
| if (pivot == L_SHEAR_ABOUT_CORNER) |
| pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, |
| L_BRING_IN_WHITE); |
| else |
| pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); |
| numaAddNumber(nascore, bsearchscore[1]); |
| numaAddNumber(natheta, leftcenterangle); |
| |
| /* Get the right intermediate score */ |
| rightcenterangle = centerangle + delta; |
| if (pivot == L_SHEAR_ABOUT_CORNER) |
| pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, |
| L_BRING_IN_WHITE); |
| else |
| pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, |
| L_BRING_IN_WHITE); |
| pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); |
| numaAddNumber(nascore, bsearchscore[3]); |
| numaAddNumber(natheta, rightcenterangle); |
| |
| /* Find the maximum of the five scores and its location. |
| * Note that the maximum must be in the center |
| * three values, not in the end two. */ |
| maxscore = bsearchscore[1]; |
| maxindex = 1; |
| for (i = 2; i < 4; i++) { |
| if (bsearchscore[i] > maxscore) { |
| maxscore = bsearchscore[i]; |
| maxindex = i; |
| } |
| } |
| |
| /* Set up score array to interpolate for the next iteration */ |
| lefttemp = bsearchscore[maxindex - 1]; |
| righttemp = bsearchscore[maxindex + 1]; |
| bsearchscore[2] = maxscore; |
| bsearchscore[0] = lefttemp; |
| bsearchscore[4] = righttemp; |
| |
| /* Get new center angle and delta for next iteration */ |
| centerangle = centerangle + delta * (maxindex - 2); |
| delta = 0.5 * delta; |
| } |
| *pangle = centerangle; |
| |
| #if DEBUG_PRINT_SCORES |
| L_INFO_FLOAT(" Binary search score = %7.3f", procName, bsearchscore[2]); |
| #endif /* DEBUG_PRINT_SCORES */ |
| |
| if (pendscore) /* save if requested */ |
| *pendscore = bsearchscore[2]; |
| |
| /* Return the ratio of Max score over Min score |
| * as a confidence value. Don't trust if the Min score |
| * is too small, which can happen if the image is all black |
| * with only a few white pixels interspersed. In that case, |
| * we get a contribution from the top and bottom edges when |
| * vertically sheared, but this contribution becomes zero when |
| * the shear angle is zero. For zero shear angle, the only |
| * contribution will be from the white pixels. We expect that |
| * the signal goes as the product of the (height * width^2), |
| * so we compute a (hopefully) normalized minimum threshold as |
| * a function of these dimensions. */ |
| numaGetMin(nascore, &minscore, &minloc); |
| width = pixGetWidth(pixsch); |
| height = pixGetHeight(pixsch); |
| minthresh = MINSCORE_THRESHOLD_CONSTANT * width * width * height; |
| |
| #if DEBUG_THRESHOLD |
| L_INFO_FLOAT2(" minthresh = %10.2f, minscore = %10.2f", procName, |
| minthresh, minscore); |
| L_INFO_FLOAT(" maxscore = %10.2f", procName, maxscore); |
| #endif /* DEBUG_THRESHOLD */ |
| |
| if (minscore > minthresh) |
| *pconf = maxscore / minscore; |
| else |
| *pconf = 0.0; |
| |
| /* Don't trust it if too close to the edge of the sweep |
| * range or if maxscore is small */ |
| if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || |
| (centerangle < rangeleft + sweepdelta) || |
| (maxscore < MIN_VALID_MAXSCORE)) |
| *pconf = 0.0; |
| |
| #if DEBUG_PRINT_BINARY |
| fprintf(stderr, "Binary search: angle = %7.3f, score ratio = %6.2f\n", |
| *pangle, *pconf); |
| fprintf(stderr, " max score = %8.0f\n", maxscore); |
| #endif /* DEBUG_PRINT_BINARY */ |
| |
| #if DEBUG_PLOT_SCORES |
| /* Plot the result -- the scores versus rotation angle -- |
| * using gnuplot with GPLOT_POINTS. Because the data |
| * points are not ordered by theta (increasing or decreasing), |
| * using GPLOT_LINES would be confusing! */ |
| {GPLOT *gplot; |
| gplot = gplotCreate("search_output", GPLOT_PNG, |
| "Binary search. Variance of difference of ON pixels vs. angle", |
| "angle (deg)", "score"); |
| gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); |
| gplotMakeOutput(gplot); |
| gplotDestroy(&gplot); |
| } |
| #endif /* DEBUG_PLOT_SCORES */ |
| |
| cleanup: |
| pixDestroy(&pixsw); |
| pixDestroy(&pixsch); |
| pixDestroy(&pixt1); |
| pixDestroy(&pixt2); |
| numaDestroy(&nascore); |
| numaDestroy(&natheta); |
| return ret; |
| } |
| |
| |
| /*---------------------------------------------------------------------* |
| * Search over arbitrary range of angles in orthogonal directions * |
| *---------------------------------------------------------------------*/ |
| /* |
| * pixFindSkewOrthogonalRange() |
| * |
| * Input: pixs (1 bpp) |
| * &angle (<return> angle required to deskew; in degrees cw) |
| * &conf (<return> confidence given by ratio of max/min score) |
| * redsweep (sweep reduction factor = 1, 2, 4 or 8) |
| * redsearch (binary search reduction factor = 1, 2, 4 or 8; |
| * and must not exceed redsweep) |
| * sweeprange (half the full range in each orthogonal |
| * direction, taken about 0, in degrees) |
| * sweepdelta (angle increment of sweep; in degrees) |
| * minbsdelta (min binary search increment angle; in degrees) |
| * confprior (amount by which confidence of 90 degree rotated |
| * result is reduced when comparing with unrotated |
| * confidence value) |
| * Return: 0 if OK, 1 on error or if angle measurment not valid |
| * |
| * Notes: |
| * (1) This searches for the skew angle, first in the range |
| * [-sweeprange, sweeprange], and then in |
| * [90 - sweeprange, 90 + sweeprange], with angles measured |
| * clockwise. For exploring the full range of possibilities, |
| * suggest using sweeprange = 47.0 degrees, giving some overlap |
| * at 45 and 135 degrees. From these results, and discounting |
| * the the second confidence by @confprior, it selects the |
| * angle for maximal differential variance. If the angle |
| * is larger than pi/4, the angle found after 90 degree rotation |
| * is selected. |
| * (2) The larger the confidence value, the greater the probability |
| * that the proper alignment is given by the angle that maximizes |
| * variance. It should be compared to a threshold, which depends |
| * on the application. Values between 3.0 and 6.0 are common. |
| * (3) Allowing for both portrait and landscape searches is more |
| * difficult, because if the signal from the text lines is weak, |
| * a signal from vertical rules can be larger! |
| * The most difficult documents to deskew have some or all of: |
| * (a) Multiple columns, not aligned |
| * (b) Black lines along the vertical edges |
| * (c) Text from two pages, and at different angles |
| * Rule of thumb for resolution: |
| * (a) If the margins are clean, you can work at 75 ppi, |
| * although 100 ppi is safer. |
| * (b) If there are vertical lines in the margins, do not |
| * work below 150 ppi. The signal from the text lines must |
| * exceed that from the margin lines. |
| * (4) Choosing the @confprior parameter depends on knowing something |
| * about the source of image. However, we're not using |
| * real probabilities here, so its use is qualitative. |
| * If landscape and portrait are equally likely, use |
| * @confprior = 0.0. If the likelihood of portrait (non-rotated) |
| * is 100 times higher than that of landscape, we want to reduce |
| * the chance that we rotate to landscape in a situation where |
| * the landscape signal is accidentally larger than the |
| * portrait signal. To do this use a positive value of |
| * @confprior; say 1.5. |
| */ |
| l_int32 |
| pixFindSkewOrthogonalRange(PIX *pixs, |
| l_float32 *pangle, |
| l_float32 *pconf, |
| l_int32 redsweep, |
| l_int32 redsearch, |
| l_float32 sweeprange, |
| l_float32 sweepdelta, |
| l_float32 minbsdelta, |
| l_float32 confprior) |
| { |
| l_float32 angle1, conf1, score1, angle2, conf2, score2; |
| PIX *pixr; |
| |
| PROCNAME("pixFindSkewOrthogonalRange"); |
| |
| if (!pangle || !pconf) |
| return ERROR_INT("&angle and &conf not both defined", procName, 1); |
| *pangle = *pconf = 0.0; |
| if (!pixs || pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); |
| |
| pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, |
| redsweep, redsearch, 0.0, |
| sweeprange, sweepdelta, minbsdelta, |
| L_SHEAR_ABOUT_CORNER); |
| pixr = pixRotateOrth(pixs, 1); |
| pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, |
| redsweep, redsearch, 0.0, |
| sweeprange, sweepdelta, minbsdelta, |
| L_SHEAR_ABOUT_CORNER); |
| pixDestroy(&pixr); |
| |
| if (conf1 > conf2 - confprior) { |
| *pangle = angle1; |
| *pconf = conf1; |
| } else { |
| *pangle = -90.0 + angle2; |
| *pconf = conf2; |
| } |
| |
| #if DEBUG_PRINT_ORTH |
| fprintf(stderr, " About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", |
| angle1, conf1, score1); |
| fprintf(stderr, " About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", |
| angle2, conf2, score2); |
| fprintf(stderr, " Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); |
| #endif /* DEBUG_PRINT_ORTH */ |
| |
| return 0; |
| } |
| |
| |
| |
| /*----------------------------------------------------------------* |
| * Differential square sum function * |
| *----------------------------------------------------------------*/ |
| /*! |
| * pixFindDifferentialSquareSum() |
| * |
| * Input: pixs |
| * &sum (<return> result) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) At the top and bottom, we skip: |
| * - at least one scanline |
| * - not more than 10% of the image height |
| * - not more than 5% of the image width |
| */ |
| l_int32 |
| pixFindDifferentialSquareSum(PIX *pixs, |
| l_float32 *psum) |
| { |
| l_int32 i, n; |
| l_int32 w, h, skiph, skip, nskip; |
| l_float32 val1, val2, diff, sum; |
| NUMA *na; |
| |
| PROCNAME("pixFindDifferentialSquareSum"); |
| |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| |
| /* Generate a number array consisting of the sum |
| * of pixels in each row of pixs */ |
| if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) |
| return ERROR_INT("na not made", procName, 1); |
| |
| /* Compute the number of rows at top and bottom to omit. |
| * We omit these to avoid getting a spurious signal from |
| * the top and bottom of a (nearly) all black image. */ |
| w = pixGetWidth(pixs); |
| h = pixGetHeight(pixs); |
| skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ |
| skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ |
| nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ |
| |
| /* Sum the squares of differential row sums, on the |
| * allowed rows. Note that nskip must be >= 1. */ |
| n = numaGetCount(na); |
| sum = 0.0; |
| for (i = nskip; i < n - nskip; i++) { |
| numaGetFValue(na, i - 1, &val1); |
| numaGetFValue(na, i, &val2); |
| diff = val2 - val1; |
| sum += diff * diff; |
| } |
| numaDestroy(&na); |
| *psum = sum; |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------* |
| * Normalized square sum * |
| *----------------------------------------------------------------*/ |
| /*! |
| * pixFindNormalizedSquareSum() |
| * |
| * Input: pixs |
| * &hratio (<optional return> ratio of normalized horiz square sum |
| * to result if the pixel distribution were uniform) |
| * &vratio (<optional return> ratio of normalized vert square sum |
| * to result if the pixel distribution were uniform) |
| * &fract (<optional return> ratio of fg pixels to total pixels) |
| * Return: 0 if OK, 1 on error or if there are no fg pixels |
| * |
| * Notes: |
| * (1) Let the image have h scanlines and N fg pixels. |
| * If the pixels were uniformly distributed on scanlines, |
| * the sum of squares of fg pixels on each scanline would be |
| * h * (N / h)^2. However, if the pixels are not uniformly |
| * distributed (e.g., for text), the sum of squares of fg |
| * pixels will be larger. We return in hratio and vratio the |
| * ratio of these two values. |
| * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. |
| */ |
| l_int32 |
| pixFindNormalizedSquareSum(PIX *pixs, |
| l_float32 *phratio, |
| l_float32 *pvratio, |
| l_float32 *pfract) |
| { |
| l_int32 i, w, h, empty; |
| l_float32 sum, sumsq, uniform, val; |
| NUMA *na; |
| PIX *pixt; |
| |
| PROCNAME("pixFindNormalizedSquareSum"); |
| |
| if (!pixs || pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); |
| pixGetDimensions(pixs, &w, &h, NULL); |
| |
| if (!phratio && !pvratio) |
| return ERROR_INT("nothing to do", procName, 1); |
| if (phratio) *phratio = 0.0; |
| if (pvratio) *pvratio = 0.0; |
| |
| empty = 0; |
| if (phratio) { |
| na = pixCountPixelsByRow(pixs, NULL); |
| numaGetSum(na, &sum); /* fg pixels */ |
| if (pfract) *pfract = sum / (l_float32)(w * h); |
| if (sum != 0.0) { |
| uniform = sum * sum / h; /* h*(sum / h)^2 */ |
| sumsq = 0.0; |
| for (i = 0; i < h; i++) { |
| numaGetFValue(na, i, &val); |
| sumsq += val * val; |
| } |
| *phratio = sumsq / uniform; |
| } |
| else |
| empty = 1; |
| numaDestroy(&na); |
| } |
| |
| if (pvratio) { |
| if (empty == 1) return 1; |
| pixt = pixRotateOrth(pixs, 1); |
| na = pixCountPixelsByRow(pixt, NULL); |
| numaGetSum(na, &sum); |
| if (pfract) *pfract = sum / (l_float32)(w * h); |
| if (sum != 0.0) { |
| uniform = sum * sum / w; |
| sumsq = 0.0; |
| for (i = 0; i < w; i++) { |
| numaGetFValue(na, i, &val); |
| sumsq += val * val; |
| } |
| *pvratio = sumsq / uniform; |
| } |
| else |
| empty = 1; |
| pixDestroy(&pixt); |
| numaDestroy(&na); |
| } |
| |
| return empty; |
| } |
| |
| |