liblept/numafunc2.c - platform/external/tesseract - Git at Google

 /*====================================================================*
  -  Copyright (C) 2001 Leptonica.  All rights reserved.
  -  This software is distributed in the hope that it will be
  -  useful, but with NO WARRANTY OF ANY KIND.
  -  No author or distributor accepts responsibility to anyone for the
  -  consequences of using this software, or for whether it serves any
  -  particular purpose or works at all, unless he or she says so in
  -  writing.  Everyone is granted permission to copy, modify and
  -  redistribute this source code, for commercial or non-commercial
  -  purposes, with the following restrictions: (1) the origin of this
  -  source code must not be misrepresented; (2) modified versions must
  -  be plainly marked as such; and (3) this notice may not be removed
  -  or altered from any source or modified source distribution.
  *====================================================================*/

 /*
  *   numafunc2.c
  *
  *      Transformations
  *          NUMA        *numaTransform()
  *          NUMA        *numaConvolve()
  *          NUMA        *numaConvertToInt()
  *
  *      Histogram generation and statistics
  *          NUMA        *numaMakeHistogram()
  *          NUMA        *numaMakeHistogramAuto()
  *          NUMA        *numaMakeHistogramClipped()
  *          NUMA        *numaRebinHistogram()
  *          NUMA        *numaNormalizeHistogram()
  *          l_int32      numaGetStatsUsingHistogram()
  *          l_int32      numaGetHistogramStats()
  *          l_int32      numaGetHistogramStatsOnInterval()
  *          l_int32      numaMakeRankFromHistogram()
  *          l_int32      numaHistogramGetRankFromVal()
  *          l_int32      numaHistogramGetValFromRank()
  *
  *      Splitting a distribution
  *          l_int32      numaSplitDistribution()
  *
  *      Extrema finding
  *          NUMA        *numaFindPeaks()
  *          NUMA        *numaFindExtrema()
  *
  *      Threshold crossings and frequency analysis
  *          l_int32      numaSelectCrossingThreshold()
  *          NUMA        *numaCrossingsByThreshold()
  *          NUMA        *numaCrossingsByPeaks()
  *          NUMA        *numaEvalBestHaarParameters()
  *          l_int32      numaEvalHaarSum()
  *
  *    Things to remember when using the Numa:
  *
  *    (1) The numa is a struct, not an array.  Always use accessors
  *        (see numabasic.c), never the fields directly.
  *
  *    (2) The number array holds l_float32 values.  It can also
  *        be used to store l_int32 values.  See numabasic.c for
  *        details on using the accessors.
  *
  *    (3) Occasionally, in the comments we denote the i-th element of a
  *        numa by na[i].  This is conceptual only -- the numa is not an array!
  *
  *    Some general comments on histograms:
  *
  *    (1) Histograms are the generic statistical representation of
  *        the data about some attribute.  Typically they're not
  *        normalized -- they simply give the number of occurrences
  *        within each range of values of the attribute.  This range
  *        of values is referred to as a 'bucket'.  For example,
  *        the histogram could specify how many connected components
  *        are found for each value of their width; in that case,
  *        the bucket size is 1.
  *
  *    (2) In leptonica, all buckets have the same size.  Histograms
  *        are therefore specified by a numa of occurrences, along
  *        with two other numbers: the 'value' associated with the
  *        occupants of the first bucket and the size (i.e., 'width')
  *        of each bucket.  These two numbers then allow us to calculate
  *        the value associated with the occupants of each bucket.
  *        These numbers are fields in the numa, initialized to
  *        a startx value of 0.0 and a binsize of 1.0.  Accessors for
  *        these fields are functions numa*XParameters().  All histograms
  *        must have these two numbers properly set.
  */

 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <math.h>
 #include "allheaders.h"

     /* bin sizes in numaMakeHistogram() */
 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
                       2000, 5000, 10000, 20000, 50000, 100000, 200000,\
                       500000, 1000000, 2000000, 5000000, 10000000,\
                       200000000, 50000000, 100000000};
 static const l_int32 NBinSizes = 24;


 #ifndef  NO_CONSOLE_IO
 #define  DEBUG_HISTO        0
 #define  DEBUG_CROSSINGS    0
 #define  DEBUG_FREQUENCY    0
 #endif  /* ~NO_CONSOLE_IO */


 /*----------------------------------------------------------------------*
  *                             Transformations                          *
  *----------------------------------------------------------------------*/
 /*!
  *  numaTransform()
  *
  *      Input:  nas
  *              shift (add this to each number)
  *              scale (multiply each number by this)
  *      Return: na with all values shifted and scaled, or null on error
  *
  *  Notes:
  *      (1) Each number is shifted before scaling.
  *      (2) The operation sequence is opposite to that for Box and Pta:
  *          scale first, then shift.
  */
 NUMA *
 numaTransform(NUMA      *nas,
               l_float32  shift,
               l_float32  scale)
 {
 l_int32    i, n;
 l_float32  val;
 NUMA      *nad;

     PROCNAME("numaTransform");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
     n = numaGetCount(nas);
     if ((nad = numaCreate(n)) == NULL)
         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
     for (i = 0; i < n; i++) {
         numaGetFValue(nas, i, &val);
         val = scale * val + shift;
         numaAddNumber(nad, val);
     }
     return nad;
 }


 /*!
  *  numaConvolve()
  *
  *      Input:  na
  *              halfwidth (of rectangular filter, minus the center)
  *      Return: na (after low-pass filtering), or null on error
  *
  *  Notes:
  *      (1) Full convolution takes place only from i = halfwidth to
  *          i = n - halfwidth - 1.  We do the end parts using only
  *          the partial array available.  We do not pad the ends with 0.
  *      (2) This implementation assumes specific fields in the Numa!
  */
 NUMA *
 numaConvolve(NUMA    *na,
              l_int32  halfwidth)
 {
 l_int32     i, n, rval;
 l_float32   sum, norm;
 l_float32  *array, *carray, *sumarray;
 NUMA       *nac;

     PROCNAME("numaConvolve");

     if (!na)
         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
     n = numaGetCount(na);
     if (2 * halfwidth + 1 > n)
         L_WARNING("filter wider than input array!", procName);
     array = na->array;

     if ((nac = numaCreate(n)) == NULL)
         return (NUMA *)ERROR_PTR("nac not made", procName, NULL);
     carray = nac->array;
     nac->n = n;  /* fill with zeroes */

         /* Make sum array; note the indexing */
     if ((sumarray = (l_float32 *)CALLOC(n + 1, sizeof(l_float32))) == NULL)
         return (NUMA *)ERROR_PTR("sumarray not made", procName, NULL);
     sum = 0.0;
     sumarray[0] = 0.0;
     for (i = 0; i < n; i++) {
         sum += array[i];
         sumarray[i + 1] = sum;
     }

         /* Central part */
     norm = 1. / (2 * halfwidth + 1);
     rval = n - halfwidth;
     for (i = halfwidth; i < rval; ++i)
         carray[i] = norm *
                       (sumarray[i + halfwidth + 1] - sumarray[i - halfwidth]);

         /* Left side */
     for (i = 0; i < halfwidth; i++)
         carray[i] = sumarray[i + halfwidth + 1] / (halfwidth + i + 1);

         /* Right side */
     for (i = rval; i < n; i++)
         carray[i] = (1. / (n - i + halfwidth)) *
                        (sumarray[n] - sumarray[i - halfwidth]);

     FREE(sumarray);
     return nac;
 }


 /*!
  *  numaConvertToInt()
  *
  *      Input:  na
  *      Return: na with all values rounded to nearest integer, or
  *              null on error
  */
 NUMA *
 numaConvertToInt(NUMA  *nas)
 {
 l_int32  i, n, ival;
 NUMA    *nad;

     PROCNAME("numaConvertToInt");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);

     n = numaGetCount(nas);
     if ((nad = numaCreate(n)) == NULL)
         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
     for (i = 0; i < n; i++) {
         numaGetIValue(nas, i, &ival);
         numaAddNumber(nad, ival);
     }
     return nad;
 }


 /*----------------------------------------------------------------------*
  *                 Histogram generation and statistics                  *
  *----------------------------------------------------------------------*/
 /*!
  *  numaMakeHistogram()
  *
  *      Input:  na
  *              maxbins (max number of histogram bins)
  *              &binsize  (<return> size of histogram bins)
  *              &binstart (<optional return> start val of minimum bin;
  *                         input NULL to force start at 0)
  *      Return: na consisiting of histogram of integerized values,
  *              or null on error.
  *
  *  Note:
  *      (1) This simple interface is designed for integer data.
  *          The bins are of integer width and start on integer boundaries,
  *          so the results on float data will not have high precision.
  *      (2) Specify the max number of input bins.   Then @binsize,
  *          the size of bins necessary to accommodate the input data,
  *          is returned.  It is one of the sequence:
  *                {1, 2, 5, 10, 20, 50, ...}.
  *      (3) If &binstart is given, all values are accommodated,
  *          and the min value of the starting bin is returned.
  *          Otherwise, all negative values are discarded and
  *          the histogram bins start at 0.
  */
 NUMA *
 numaMakeHistogram(NUMA     *na,
                   l_int32   maxbins,
                   l_int32  *pbinsize,
                   l_int32  *pbinstart)
 {
 l_int32    i, n, ival, hval;
 l_int32    iminval, imaxval, range, binsize, nbins, ibin;
 l_float32  val, ratio;
 NUMA      *nai, *nahist;

     PROCNAME("numaMakeHistogram");

     if (!na)
         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
     if (!pbinsize)
         return (NUMA *)ERROR_PTR("&binsize not defined", procName, NULL);

         /* Determine input range */
     numaGetMin(na, &val, NULL);
     iminval = (l_int32)(val + 0.5);
     numaGetMax(na, &val, NULL);
     imaxval = (l_int32)(val + 0.5);
     if (pbinstart == NULL) {  /* clip negative vals; start from 0 */
         iminval = 0;
         if (imaxval < 0)
             return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
     }

         /* Determine binsize */
     range = imaxval - iminval + 1;
     if (range > maxbins - 1) {
         ratio = (l_float64)range / (l_float64)maxbins;
         binsize = 0;
         for (i = 0; i < NBinSizes; i++) {
             if (ratio < BinSizeArray[i]) {
                 binsize = BinSizeArray[i];
                 break;
             }
         }
         if (binsize == 0)
             return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
     }
     else
         binsize = 1;
     *pbinsize = binsize;
     nbins = 1 + range / binsize;  /* +1 seems to be sufficient */

         /* Redetermine iminval */
     if (pbinstart && binsize > 1) {
         if (iminval >= 0)
             iminval = binsize * (iminval / binsize);
         else
             iminval = binsize * ((iminval - binsize + 1) / binsize);
     }
     if (pbinstart)
         *pbinstart = iminval;

 #if  DEBUG_HISTO
     fprintf(stderr, " imaxval = %d, range = %d, nbins = %d\n",
             imaxval, range, nbins);
 #endif  /* DEBUG_HISTO */

         /* Use integerized data for input */
     if ((nai = numaConvertToInt(na)) == NULL)
         return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
     n = numaGetCount(nai);

         /* Make histogram, converting value in input array
          * into a bin number for this histogram array. */
     if ((nahist = numaCreate(nbins)) == NULL)
         return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
     numaSetCount(nahist, nbins);
     numaSetXParameters(nahist, iminval, binsize);
     for (i = 0; i < n; i++) {
         numaGetIValue(nai, i, &ival);
         ibin = (ival - iminval) / binsize;
         if (ibin >= 0 && ibin < nbins) {
             numaGetIValue(nahist, ibin, &hval);
             numaSetValue(nahist, ibin, hval + 1.0);
         }
     }

     numaDestroy(&nai);
     return nahist;
 }


 /*!
  *  numaMakeHistogramAuto()
  *
  *      Input:  na (numa of floats; these may be integers)
  *              maxbins (max number of histogram bins; >= 1)
  *      Return: na consisiting of histogram of quantized float values,
  *              or null on error.
  *
  *  Notes:
  *      (1) This simple interface is designed for accurate binning
  *          of both integer and float data.
  *      (2) If the array data is integers, and the range of integers
  *          is smaller than @maxbins, they are binned as they fall,
  *          with binsize = 1.
  *      (3) If the range of data, (maxval - minval), is larger than
  *          @maxbins, or if the data is floats, they are binned into
  *          exactly @maxbins bins.
  *      (4) Unlike numaMakeHistogram(), these bins in general have
  *          non-integer location and width, even for integer data.
  */
 NUMA *
 numaMakeHistogramAuto(NUMA    *na,
                       l_int32  maxbins)
 {
 l_int32    i, n, imin, imax, irange, ibin, ival, allints;
 l_float32  minval, maxval, range, binsize, fval;
 NUMA      *nah;

     PROCNAME("numaMakeHistogramAuto");

     if (!na)
         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
     maxbins = L_MAX(1, maxbins);

         /* Determine input range */
     numaGetMin(na, &minval, NULL);
     numaGetMax(na, &maxval, NULL);

         /* Determine if values are all integers */
     n = numaGetCount(na);
     numaHasOnlyIntegers(na, maxbins, &allints);

         /* Do simple integer binning if possible */
     if (allints && (maxval - minval < maxbins)) {
         imin = (l_int32)minval;
         imax = (l_int32)maxval;
         irange = imax - imin + 1;
         nah = numaCreate(irange);
         numaSetCount(nah, irange);  /* init */
         numaSetXParameters(nah, minval, 1.0);
         for (i = 0; i < n; i++) {
             numaGetIValue(na, i, &ival);
             ibin = ival - imin;
             numaGetIValue(nah, ibin, &ival);
             numaSetValue(nah, ibin, ival + 1.0);
         }

         return nah;
     }

         /* Do float binning, even if the data is integers. */
     range = maxval - minval;
     binsize = range / (l_float32)maxbins;
     if (range == 0.0) {
         nah = numaCreate(1);
         numaSetXParameters(nah, minval, binsize);
         numaAddNumber(nah, n);
         return nah;
     }
     nah = numaCreate(maxbins);
     numaSetCount(nah, maxbins);
     numaSetXParameters(nah, minval, binsize);
     for (i = 0; i < n; i++) {
         numaGetFValue(na, i, &fval);
         ibin = (l_int32)((fval - minval) / binsize);
         numaGetIValue(nah, ibin, &ival);
         numaSetValue(nah, ibin, ival + 1.0);
     }

     return nah;
 }


 /*!
  *  numaMakeHistogramClipped()
  *
  *      Input:  na
  *              binsize (typically 1.0)
  *              maxsize (of histogram ordinate)
  *      Return: na (histogram of bins of size @binsize, starting with
  *                  the na[0] (x = 0.0) and going up to a maximum of
  *                  x = @maxsize, by increments of @binsize), or null on error
  *
  *  Notes:
  *      (1) This simple function generates a histogram of values
  *          from na, discarding all values < 0.0 or greater than
  *          min(@maxsize, maxval), where maxval is the maximum value in na.
  *          The histogram data is put in bins of size delx = @binsize,
  *          starting at x = 0.0.  We use as many bins as are
  *          needed to hold the data.
  */
 NUMA *
 numaMakeHistogramClipped(NUMA      *na,
                          l_float32  binsize,
                          l_float32  maxsize)
 {
 l_int32    i, n, nbins, ival, ibin;
 l_float32  val, maxval;
 NUMA      *nad;

     PROCNAME("numaMakeHistogramClipped");

     if (!na)
         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
     if (binsize <= 0.0)
         return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
     if (binsize > maxsize)
         binsize = maxsize;  /* just one bin */

     numaGetMax(na, &maxval, NULL);
     n = numaGetCount(na);
     maxsize = L_MIN(maxsize, maxval);
     nbins = (l_int32)(maxsize / binsize) + 1;

 /*    fprintf(stderr, "maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */

     if ((nad = numaCreate(nbins)) == NULL)
         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
     numaSetXParameters(nad, 0.0, binsize);
     numaSetCount(nad, nbins);  /* interpret zeroes in bins as data */
     for (i = 0; i < n; i++) {
         numaGetFValue(na, i, &val);
         ibin = (l_int32)(val / binsize);
         if (ibin >= 0 && ibin < nbins) {
             numaGetIValue(nad, ibin, &ival);
             numaSetValue(nad, ibin, ival + 1.0);
         }
     }

     return nad;
 }


 /*!
  *  numaRebinHistogram()
  *
  *      Input:  nas (input histogram)
  *              newsize (number of old bins contained in each new bin)
  *      Return: nad (more coarsely re-binned histogram), or null on error
  */
 NUMA *
 numaRebinHistogram(NUMA    *nas,
                    l_int32  newsize)
 {
 l_int32    i, j, ns, nd, index, count, val;
 l_float32  start, oldsize;
 NUMA      *nad;

     PROCNAME("numaRebinHistogram");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
     if (newsize <= 1)
         return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
     if ((ns = numaGetCount(nas)) == 0)
         return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);

     nd = (ns + newsize - 1) / newsize;
     if ((nad = numaCreate(nd)) == NULL)
         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
     numaGetXParameters(nad, &start, &oldsize);
     numaSetXParameters(nad, start, oldsize * newsize);

     for (i = 0; i < nd; i++) {  /* new bins */
         count = 0;
         index = i * newsize;
         for (j = 0; j < newsize; j++) {
             if (index < ns) {
                 numaGetIValue(nas, index, &val);
                 count += val;
                 index++;
             }
         }
         numaAddNumber(nad, count);
     }

     return nad;
 }


 /*!
  *  numaNormalizeHistogram()
  *
  *      Input:  nas (input histogram)
  *              area (target sum of all numbers in dest histogram;
  *                    e.g., use area = 1.0 if this represents a
  *                    probability distribution)
  *      Return: nad (normalized histogram), or null on error
  */
 NUMA *
 numaNormalizeHistogram(NUMA      *nas,
                        l_float32  area)
 {
 l_int32    i, ns;
 l_float32  sum, factor, fval;
 NUMA      *nad;

     PROCNAME("numaNormalizeHistogram");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
     if (area <= 0.0)
         return (NUMA *)ERROR_PTR("area must be > 0.0", procName, NULL);
     if ((ns = numaGetCount(nas)) == 0)
         return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);

     numaGetSum(nas, &sum);
     factor = area / sum;

     if ((nad = numaCreate(ns)) == NULL)
         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
     numaCopyXParameters(nad, nas);

     for (i = 0; i < ns; i++) {
         numaGetFValue(nas, i, &fval);
         fval *= factor;
         numaAddNumber(nad, fval);
     }

     return nad;
 }


 /*!
  *  numaGetStatsUsingHistogram()
  *
  *      Input:  na (an arbitrary set of numbers; not ordered and not
  *                  a histogram)
  *              maxbins (the maximum number of bins to be allowed in
  *                       the histogram; use 0 for consecutive integer bins)
  *              &min (<optional return> min value of set)
  *              &max (<optional return> max value of set)
  *              &mean (<optional return> mean value of set)
  *              &variance (<optional return> variance)
  *              &median (<optional return> median value of set)
  *              rank (in [0.0 ... 1.0]; median has a rank 0.5; ignored
  *                    if &rval == NULL)
  *              &rval (<optional return> value in na corresponding to @rank)
  *              &histo (<optional return> Numa histogram; use NULL to prevent)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) This is a simple interface for gathering statistics
  *          from a numa, where a histogram is used 'under the covers'
  *          to avoid sorting if a rank value is requested.  In that case,
  *          by using a histogram we are trading speed for accuracy, because
  *          the values in @na are quantized to the center of a set of bins.
  *      (2) If the median, other rank value, or histogram are not requested,
  *          the calculation is all performed on the input Numa.
  *      (3) The variance is the average of the square of the
  *          difference from the mean.  The median is the value in na
  *          with rank 0.5.
  *      (4) There are two situations where this gives rank results with
  *          accuracy comparable to computing stastics directly on the input
  *          data, without binning into a histogram:
  *           (a) the data is integers and the range of data is less than
  *               @maxbins, and
  *           (b) the data is floats and the range is small compared to
  *               @maxbins, so that the binsize is much less than 1.
  *      (5) If a histogram is used and the numbers in the Numa extend
  *          over a large range, you can limit the required storage by
  *          specifying the maximum number of bins in the histogram.
  *          Use @maxbins == 0 to force the bin size to be 1.
  *      (6) This optionally returns the median and one arbitrary rank value.
  *          If you need several rank values, return the histogram and use
  *               numaHistogramGetValFromRank(nah, rank, &rval)
  *          multiple times.
  */
 l_int32
 numaGetStatsUsingHistogram(NUMA       *na,
                            l_int32     maxbins,
                            l_float32  *pmin,
                            l_float32  *pmax,
                            l_float32  *pmean,
                            l_float32  *pvariance,
                            l_float32  *pmedian,
                            l_float32   rank,
                            l_float32  *prval,
                            NUMA      **phisto)
 {
 l_int32    i, n;
 l_float32  minval, maxval, fval, mean, sum;
 NUMA      *nah;

     PROCNAME("numaGetStatsUsingHistogram");

     if (pmin) *pmin = 0.0;
     if (pmax) *pmax = 0.0;
     if (pmean) *pmean = 0.0;
     if (pmedian) *pmedian = 0.0;
     if (pvariance) *pvariance = 0.0;
     if (!na)
         return ERROR_INT("na not defined", procName, 1);
     if ((n = numaGetCount(na)) == 0)
         return ERROR_INT("numa is empty", procName, 1);

     numaGetMin(na, &minval, NULL);
     numaGetMax(na, &maxval, NULL);
     if (pmin) *pmin = minval;
     if (pmax) *pmax = maxval;
     if (pmean || pvariance) {
         sum = 0.0;
         for (i = 0; i < n; i++) {
             numaGetFValue(na, i, &fval);
             sum += fval;
         }
         mean = sum / (l_float32)n;
         if (pmean) *pmean = mean;
     }
     if (pvariance) {
         sum = 0.0;
         for (i = 0; i < n; i++) {
             numaGetFValue(na, i, &fval);
             sum += fval * fval;
         }
         *pvariance = sum / (l_float32)n - mean * mean;
     }

     if (!pmedian && !prval && !phisto)
         return 0;

     nah = numaMakeHistogramAuto(na, maxbins);
     if (pmedian)
         numaHistogramGetValFromRank(nah, 0.5, pmedian);
     if (prval)
         numaHistogramGetValFromRank(nah, rank, prval);
     if (phisto)
         *phisto = nah;
     else
         numaDestroy(&nah);
     return 0;
 }


 /*!
  *  numaGetHistogramStats()
  *
  *      Input:  nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
  *              startx (x value of first bin: x(0))
  *              deltax (x increment between bins; the bin size; x(1) - x(0))
  *              &xmean (<optional return> mean value of histogram)
  *              &xmedian (<optional return> median value of histogram)
  *              &xmode (<optional return> mode value of histogram:
  *                     xmode = x(imode), where y(xmode) >= y(x(i)) for
  *                     all i != imode)
  *              &xvariance (<optional return> variance of x)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) If the histogram represents the relation y(x), the
  *          computed values that are returned are the x values.
  *          These are NOT the bucket indices i; they are related to the
  *          bucket indices by
  *                x(i) = startx + i * deltax
  */
 l_int32
 numaGetHistogramStats(NUMA       *nahisto,
                       l_float32   startx,
                       l_float32   deltax,
                       l_float32  *pxmean,
                       l_float32  *pxmedian,
                       l_float32  *pxmode,
                       l_float32  *pxvariance)
 {
     PROCNAME("numaGetHistogramStats");

     if (pxmean) *pxmean = 0.0;
     if (pxmedian) *pxmedian = 0.0;
     if (pxmode) *pxmode = 0.0;
     if (pxvariance) *pxvariance = 0.0;
     if (!nahisto)
         return ERROR_INT("nahisto not defined", procName, 1);

     return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, 0,
                                            pxmean, pxmedian, pxmode,
                                            pxvariance);
 }


 /*!
  *  numaGetHistogramStatsOnInterval()
  *
  *      Input:  nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
  *              startx (x value of first bin: x(0))
  *              deltax (x increment between bins; the bin size; x(1) - x(0))
  *              ifirst (first bin to use for collecting stats)
  *              ilast (last bin for collecting stats; use 0 to go to the end)
  *              &xmean (<optional return> mean value of histogram)
  *              &xmedian (<optional return> median value of histogram)
  *              &xmode (<optional return> mode value of histogram:
  *                     xmode = x(imode), where y(xmode) >= y(x(i)) for
  *                     all i != imode)
  *              &xvariance (<optional return> variance of x)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) If the histogram represents the relation y(x), the
  *          computed values that are returned are the x values.
  *          These are NOT the bucket indices i; they are related to the
  *          bucket indices by
  *                x(i) = startx + i * deltax
  */
 l_int32
 numaGetHistogramStatsOnInterval(NUMA       *nahisto,
                                 l_float32   startx,
                                 l_float32   deltax,
                                 l_int32     ifirst,
                                 l_int32     ilast,
                                 l_float32  *pxmean,
                                 l_float32  *pxmedian,
                                 l_float32  *pxmode,
                                 l_float32  *pxvariance)
 {
 l_int32    i, n, imax;
 l_float32  sum, sumval, halfsum, moment, var, x, y, ymax;

     PROCNAME("numaGetHistogramStats");

     if (pxmean) *pxmean = 0.0;
     if (pxmedian) *pxmedian = 0.0;
     if (pxmode) *pxmode = 0.0;
     if (pxvariance) *pxvariance = 0.0;
     if (!nahisto)
         return ERROR_INT("nahisto not defined", procName, 1);
     if (!pxmean && !pxmedian && !pxmode && !pxvariance)
         return ERROR_INT("nothing to compute", procName, 1);

     n = numaGetCount(nahisto);
     if (ilast <= 0) ilast = n - 1;
     if (ifirst < 0) ifirst = 0;
     if (ifirst > ilast || ifirst > n - 1)
         return ERROR_INT("ifirst is too large", procName, 1);
     for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
         x = startx + i * deltax;
         numaGetFValue(nahisto, i, &y);
         sum += y;
         moment += x * y;
         var += x * x * y;
     }
     if (sum == 0.0)
         return ERROR_INT("sum is 0", procName, 1);

     if (pxmean)
         *pxmean = moment / sum;
     if (pxvariance)
         *pxvariance = var / sum - moment * moment / (sum * sum);

     if (pxmedian) {
         halfsum = sum / 2.0;
         for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
             numaGetFValue(nahisto, i, &y);
             sumval += y;
             if (sumval >= halfsum) {
                 *pxmedian = startx + i * deltax;
                 break;
             }
         }
     }

     if (pxmode) {
         ymax = -1.0e10;
         for (i = ifirst; i <= ilast; i++) {
             numaGetFValue(nahisto, i, &y);
             if (y > ymax) {
                 ymax = y;
                 imax = i;
             }
         }
         *pxmode = startx + imax * deltax;
     }

     return 0;
 }


 /*!
  *  numaMakeRankFromHistogram()
  *
  *      Input:  startx (xval corresponding to first element in nay)
  *              deltax (x increment between array elements in nay)
  *              nasy (input histogram, assumed equally spaced)
  *              npts (number of points to evaluate rank function)
  *              &nax (<optional return> array of x values in range)
  *              &nay (<return> rank array of specified npts)
  *      Return: 0 if OK, 1 on error
  */
 l_int32
 numaMakeRankFromHistogram(l_float32  startx,
                           l_float32  deltax,
                           NUMA      *nasy,
                           l_int32    npts,
                           NUMA     **pnax,
                           NUMA     **pnay)
 {
 l_int32    i, n;
 l_float32  sum, fval;
 NUMA      *nan, *nar;

     PROCNAME("numaMakeRankFromHistogram");

     if (pnax) *pnax = NULL;
     if (!pnay)
         return ERROR_INT("&nay not defined", procName, 1);
     *pnay = NULL;
     if (!nasy)
         return ERROR_INT("nasy not defined", procName, 1);
     if (!pnay)
         return ERROR_INT("&nay not defined", procName, 1);
     if ((n = numaGetCount(nasy)) == 0)
         return ERROR_INT("no bins in nas", procName, 1);

         /* Normalize and generate the rank array corresponding to
          * the binned histogram. */
     nan = numaNormalizeHistogram(nasy, 1.0);
     nar = numaCreate(n + 1);  /* rank numa corresponding to nan */
     sum = 0.0;
     numaAddNumber(nar, sum);  /* first element is 0.0 */
     for (i = 0; i < n; i++) {
         numaGetFValue(nan, i, &fval);
         sum += fval;
         numaAddNumber(nar, sum);
     }

         /* Compute rank array on full range with specified
          * number of points and correspondence to x-values. */
     numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
                                startx, startx + n * deltax, npts,
                                pnax, pnay);
     numaDestroy(&nan);
     numaDestroy(&nar);
     return 0;
 }


 /*!
  *  numaHistogramGetRankFromVal()
  *
  *      Input:  na (histogram)
  *              rval (value of input sample for which we want the rank)
  *              &rank (<return> fraction of total samples below rval)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) If we think of the histogram as a function y(x), normalized
  *          to 1, for a given input value of x, this computes the
  *          rank of x, which is the integral of y(x) from the start
  *          value of x to the input value.
  *      (2) This function only makes sense when applied to a Numa that
  *          is a histogram.  The values in the histogram can be ints and
  *          floats, and are computed as floats.  The rank is returned
  *          as a float between 0.0 and 1.0.
  *      (3) The numa parameters startx and binsize are used to
  *          compute x from the Numa index i.
  */
 l_int32
 numaHistogramGetRankFromVal(NUMA       *na,
                             l_float32   rval,
                             l_float32  *prank)
 {
 l_int32    i, ibinval, n;
 l_float32  startval, binsize, binval, maxval, fractval, total, sum, val;

     PROCNAME("numaHistogramGetRankFromVal");

     if (!prank)
         return ERROR_INT("prank not defined", procName, 1);
     *prank = 0.0;
     if (!na)
         return ERROR_INT("na not defined", procName, 1);
     numaGetXParameters(na, &startval, &binsize);
     n = numaGetCount(na);
     if (rval < startval)
         return 0;
     maxval = startval + n * binsize;
     if (rval > maxval) {
         *prank = 1.0;
         return 0;
     }

     binval = (rval - startval) / binsize;
     ibinval = (l_int32)binval;
     if (ibinval >= n) {
         *prank = 1.0;
         return 0;
     }
     fractval = binval - (l_float32)ibinval;

     sum = 0.0;
     for (i = 0; i < ibinval; i++) {
         numaGetFValue(na, i, &val);
         sum += val;
     }
     numaGetFValue(na, ibinval, &val);
     sum += fractval * val;
     numaGetSum(na, &total);
     *prank = sum / total;

 /*    fprintf(stderr, "binval = %7.3f, rank = %7.3f\n", binval, *prank); */

     return 0;
 }


 /*!
  *  numaHistogramGetValFromRank()
  *
  *      Input:  na (histogram)
  *              rank (fraction of total samples)
  *              &rval (<return> approx. to the bin value)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) If we think of the histogram as a function y(x), this returns
  *          the value x such that the integral of y(x) from the start
  *          value to x gives the fraction 'rank' of the integral
  *          of y(x) over all bins.
  *      (2) This function only makes sense when applied to a Numa that
  *          is a histogram.  The values in the histogram can be ints and
  *          floats, and are computed as floats.  The val is returned
  *          as a float, even though the buckets are of integer width.
  *      (3) The numa parameters startx and binsize are used to
  *          compute x from the Numa index i.
  */
 l_int32
 numaHistogramGetValFromRank(NUMA       *na,
                             l_float32   rank,
                             l_float32  *prval)
 {
 l_int32    i, n;
 l_float32  startval, binsize, rankcount, total, sum, fract, val;

     PROCNAME("numaHistogramGetValFromRank");

     if (!prval)
         return ERROR_INT("prval not defined", procName, 1);
     *prval = 0.0;
     if (!na)
         return ERROR_INT("na not defined", procName, 1);
     if (rank < 0.0) {
         L_WARNING("rank < 0; setting to 0.0", procName);
         rank = 0.0;
     }
     if (rank > 1.0) {
         L_WARNING("rank > 1.0; setting to 1.0", procName);
         rank = 1.0;
     }

     n = numaGetCount(na);
     numaGetXParameters(na, &startval, &binsize);
     numaGetSum(na, &total);
     rankcount = rank * total;  /* count that corresponds to rank */
     sum = 0.0;
     for (i = 0; i < n; i++) {
         numaGetFValue(na, i, &val);
         if (sum + val >= rankcount)
             break;
         sum += val;
     }
     if (val <= 0.0)  /* can == 0 if rank == 0.0 */
         fract = 0.0;
     else  /* sum + fract * val = rankcount */
         fract = (rankcount - sum) / val;

     /* The use of the fraction of a bin allows a simple calculation
      * for the histogram value at the given rank. */
     *prval = startval + binsize * ((l_float32)i + fract);

 /*    fprintf(stderr, "rank = %7.3f, val = %7.3f\n", rank, *prval); */

     return 0;
 }


 /*----------------------------------------------------------------------*
  *                      Splitting a distribution                        *
  *----------------------------------------------------------------------*/
 /*!
  *  numaSplitDistribution()
  *
  *      Input:  na (histogram)
  *              scorefract (fraction of the max score, used to determine
  *                          the range over which the histogram min is searched)
  *              &splitindex (<optional return> index for splitting)
  *              &ave1 (<optional return> average of lower distribution)
  *              &ave2 (<optional return> average of upper distribution)
  *              &num1 (<optional return> population of lower distribution)
  *              &num2 (<optional return> population of upper distribution)
  *              &nascore (<optional return> for debugging; otherwise use null)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) This function is intended to be used on a distribution of
  *          values that represent two sets, such as a histogram of
  *          pixel values, and the goal is to determine the means of
  *          the two sets and the best splitting point.
  *      (2) The Otsu method finds a split point that divides the distribution
  *          into two parts by maximizing a score function that is the
  *          product of two terms:
  *            (a) the square of the difference of centroids, (ave1 - ave2)^2
  *            (b) fract1 * (1 - fract1)
  *          where fract1 is the fraction in the lower distribution.
  *          This biases the split point into the larger "bump" (i.e., toward
  *          the point where the (b) term reaches its maximum of 0.25 at
  *          fract1 = 0.5.  To avoid this, we define a range of values near
  *          the maximum of the score function, and choose the value within
  *          this range such that the histogram itself has a minimum value.
  *          The range is determined by scorefract: we include all abscissa
  *          values to the left and right of the value that maximizes the
  *          score, such that the score stays above (1 - scorefract) * maxscore.
  *      (3) We normalize the score so that if the two distributions
  *          were of equal size and at opposite ends of the numa, the
  *          score would be 1.0.
  */
 l_int32
 numaSplitDistribution(NUMA       *na,
                       l_float32   scorefract,
                       l_int32    *psplitindex,
                       l_float32  *pave1,
                       l_float32  *pave2,
                       l_float32  *pnum1,
                       l_float32  *pnum2,
                       NUMA      **pnascore)
 {
 l_int32    i, n, bestsplit, minrange, maxrange, maxindex;
 l_float32  ave1, ave2, ave1prev, ave2prev;
 l_float32  num1, num2, num1prev, num2prev;
 l_float32  val, minval, sum, fract1;
 l_float32  norm, score, minscore, maxscore;
 NUMA      *nascore, *naave1, *naave2, *nanum1, *nanum2;

     PROCNAME("numaSplitDistribution");

     if (!na)
         return ERROR_INT("na not defined", procName, 1);

     n = numaGetCount(na);
     if (n <= 1)
         return ERROR_INT("n = 1 in histogram", procName, 1);
     numaGetSum(na, &sum);
     if (sum <= 0.0)
         return ERROR_INT("sum <= 0.0", procName, 1);
     norm = 4.0 / ((n - 1) * (n - 1));
     ave1prev = 0.0;
     numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
     num1prev = 0.0;
     num2prev = sum;
     maxindex = n / 2;  /* initialize with something */

         /* Split the histogram with [0 ... i] in the lower part
          * and [i+1 ... n-1] in upper part.  First, compute an otsu
          * score for each possible splitting.  */
     nascore = numaCreate(n);
     if (pave2) naave1 = numaCreate(n);
     if (pave2) naave2 = numaCreate(n);
     if (pnum1) nanum1 = numaCreate(n);
     if (pnum2) nanum2 = numaCreate(n);
     maxscore = 0.0;
     for (i = 0; i < n; i++) {
         numaGetFValue(na, i, &val);
         num1 = num1prev + val;
         if (num1 == 0)
             ave1 = ave1prev;
         else
             ave1 = (num1prev * ave1prev + i * val) / num1;
         num2 = num2prev - val;
         if (num2 == 0)
             ave2 = ave2prev;
         else
             ave2 = (num2prev * ave2prev - i * val) / num2;
         fract1 = num1 / sum;
         score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
         numaAddNumber(nascore, score);
         if (pave1) numaAddNumber(naave1, ave1);
         if (pave2) numaAddNumber(naave2, ave2);
         if (pnum1) numaAddNumber(nanum1, num1);
         if (pnum1) numaAddNumber(nanum2, num2);
         if (score > maxscore) {
             maxscore = score;
             maxindex = i;
         }
         num1prev = num1;
         num2prev = num2;
         ave1prev = ave1;
         ave2prev = ave2;
     }

         /* Next, for all contiguous scores within a specified fraction
          * of the max, choose the split point as the value with the
          * minimum in the histogram. */
     minscore = (1. - scorefract) * maxscore;
     for (i = maxindex - 1; i >= 0; i--) {
         numaGetFValue(nascore, i, &val);
         if (val < minscore)
             break;
     }
     minrange = i + 1;
     for (i = maxindex + 1; i < n; i++) {
         numaGetFValue(nascore, i, &val);
         if (val < minscore)
             break;
     }
     maxrange = i - 1;
     numaGetFValue(na, minrange, &minval);
     bestsplit = minrange;
     for (i = minrange + 1; i <= maxrange; i++) {
         numaGetFValue(na, i, &val);
         if (val < minval) {
             minval = val;
             bestsplit = i;
         }
     }

     if (psplitindex) *psplitindex = bestsplit;
     if (pave1) numaGetFValue(naave1, bestsplit, pave1);
     if (pave2) numaGetFValue(naave2, bestsplit, pave2);
     if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
     if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);

     if (pnascore) {  /* debug mode */
         fprintf(stderr, "minrange = %d, maxrange = %d\n", minrange, maxrange);
         fprintf(stderr, "minval = %10.0f\n", minval);
         gplotSimple1(nascore, GPLOT_X11, "junkoutroot",
                      "Score for split distribution");
         *pnascore = nascore;
     }
     else
         numaDestroy(&nascore);

     if (pave1) numaDestroy(&naave1);
     if (pave2) numaDestroy(&naave2);
     if (pnum1) numaDestroy(&nanum1);
     if (pnum2) numaDestroy(&nanum2);
     return 0;
 }


 /*----------------------------------------------------------------------*
  *                             Extrema finding                          *
  *----------------------------------------------------------------------*/
 /*!
  *  numaFindPeaks()
  *
  *      Input:  source na
  *              max number of peaks to be found
  *              fract1  (min fraction of peak value)
  *              fract2  (min slope)
  *      Return: peak na, or null on error.
  *
  * Notes:
  *     (1) The returned na consists of sets of four numbers representing
  *         the peak, in the following order:
  *            left edge; peak center; right edge; normalized peak area
  */
 NUMA *
 numaFindPeaks(NUMA      *nas,
               l_int32    nmax,
               l_float32  fract1,
               l_float32  fract2)
 {
 l_int32    i, k, n, maxloc, lloc, rloc;
 l_float32  fmaxval, sum, total, newtotal, val, lastval;
 l_float32  peakfract;
 NUMA      *na, *napeak;

     PROCNAME("numaFindPeaks");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
     n = numaGetCount(nas);
     numaGetSum(nas, &total);

         /* We munge this copy */
     if ((na = numaCopy(nas)) == NULL)
         return (NUMA *)ERROR_PTR("na not made", procName, NULL);
     if ((napeak = numaCreate(4 * nmax)) == NULL)
         return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);

     for (k = 0; k < nmax; k++) {
         numaGetSum(na, &newtotal);
         if (newtotal == 0.0)   /* sanity check */
             break;
         numaGetMax(na, &fmaxval, &maxloc);
         sum = fmaxval;
         lastval = fmaxval;
         lloc = 0;
         for (i = maxloc - 1; i >= 0; --i) {
             numaGetFValue(na, i, &val);
             if (val == 0.0) {
                 lloc = i + 1;
                 break;
             }
             if (val > fract1 * fmaxval) {
                 sum += val;
                 lastval = val;
                 continue;
             }
             if (lastval - val > fract2 * lastval) {
                 sum += val;
                 lastval = val;
                 continue;
             }
             lloc = i;
             break;
         }
         lastval = fmaxval;
         rloc = n - 1;
         for (i = maxloc + 1; i < n; ++i) {
             numaGetFValue(na, i, &val);
             if (val == 0.0) {
                 rloc = i - 1;
                 break;
             }
             if (val > fract1 * fmaxval) {
                 sum += val;
                 lastval = val;
                 continue;
             }
             if (lastval - val > fract2 * lastval) {
                 sum += val;
                 lastval = val;
                 continue;
             }
             rloc = i;
             break;
         }
         peakfract = sum / total;
         numaAddNumber(napeak, lloc);
         numaAddNumber(napeak, maxloc);
         numaAddNumber(napeak, rloc);
         numaAddNumber(napeak, peakfract);

         for (i = lloc; i <= rloc; i++)
             numaSetValue(na, i, 0.0);
     }

     numaDestroy(&na);
     return napeak;
 }


 /*!
  *  numaFindExtrema()
  *
  *      Input:  nas (input values)
  *              delta (relative amount to resolve peaks and valleys)
  *      Return: nad (locations of extrema), or null on error
  *
  *  Notes:
  *      (1) This returns a sequence of extrema (peaks and valleys).
  *      (2) The algorithm is analogous to that for determining
  *          mountain peaks.  Suppose we have a local peak, with
  *          bumps on the side.  Under what conditions can we consider
  *          those 'bumps' to be actual peaks?  The answer: if the
  *          bump is separated from the peak by a saddle that is at
  *          least 500 feet below the bump.
  *      (3) Operationally, suppose we are looking for a peak.
  *          We are keeping the largest value we've seen since the
  *          last valley, and are looking for a value that is delta
  *          BELOW our current peak.  When we find such a value,
  *          we label the peak, use the current value to label the
  *          valley, and then do the same operation in reverse (looking
  *          for a valley).
  */
 NUMA *
 numaFindExtrema(NUMA      *nas,
                 l_float32  delta)
 {
 l_int32    i, n, found, loc, direction;
 l_float32  startval, val, maxval, minval;
 NUMA      *nad;

     PROCNAME("numaFindExtrema");

     if (!nas)
         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);

     n = numaGetCount(nas);
     nad = numaCreate(0);

         /* We don't know if we'll find a peak or valley first,
          * but use the first element of nas as the reference point.
          * Break when we deviate by 'delta' from the first point. */
     numaGetFValue(nas, 0, &startval);
     found = FALSE;
     for (i = 1; i < n; i++) {
         numaGetFValue(nas, i, &val);
         if (L_ABS(val - startval) >= delta) {
             found = TRUE;
             break;
         }
     }

     if (!found)
         return nad;  /* it's empty */

         /* Are we looking for a peak or a valley? */
     if (val > startval) {  /* peak */
         direction = 1;
         maxval = val;
     }
     else {
         direction = -1;
         minval = val;
     }
     loc = i;

         /* Sweep through the rest of the array, recording alternating
          * peak/valley extrema. */
     for (i = i + 1; i < n; i++) {
         numaGetFValue(nas, i, &val);
         if (direction == 1 && val > maxval ) {  /* new local max */
             maxval = val;
             loc = i;
         }
         else if (direction == -1 && val < minval ) {  /* new local min */
             minval = val;
             loc = i;
         }
         else if (direction == 1 && (maxval - val >= delta)) {
             numaAddNumber(nad, loc);  /* save the current max location */
             direction = -1;  /* reverse: start looking for a min */
             minval = val;
             loc = i;  /* current min location */
         }
         else if (direction == -1 && (val - minval >= delta)) {
             numaAddNumber(nad, loc);  /* save the current min location */
             direction = 1;  /* reverse: start looking for a max */
             maxval = val;
             loc = i;  /* current max location */
         }
     }

         /* Save the final extremum */
 /*    numaAddNumber(nad, loc); */
     return nad;
 }


 /*----------------------------------------------------------------------*
  *                Threshold crossings and frequency analysis            *
  *----------------------------------------------------------------------*/
 /*!
  *  numaSelectCrossingThreshold()
  *
  *      Input:  nax (<optional> numa of abscissa values; can be NULL)
  *              nay (signal)
  *              estthresh (estimated pixel threshold for crossing: e.g., for
  *                         images, white <--> black; typ. ~120)
  *              &bestthresh (<return> robust estimate of threshold to use)
  *      Return: 0 if OK, 1 on error
  *
  *  Note:
  *     (1) When a valid threshold is used, the number of crossings is
  *         a maximum, because none are missed.  If no threshold intersects
  *         all the crossings, the crossings must be determined with
  *         numaCrossingsByPeaks().
  *     (2) @estthresh is an input estimate of the threshold that should
  *         be used.  We compute the crossings with 41 thresholds
  *         (20 below and 20 above).  There is a range in which the
  *         number of crossings is a maximum.  Return a threshold
  *         in the center of this stable plateau of crossings.
  *         This can then be used with numaCrossingsByThreshold()
  *         to get a good estimate of crossing locations.
  */
 l_int32
 numaSelectCrossingThreshold(NUMA       *nax,
                             NUMA       *nay,
                             l_float32   estthresh,
                             l_float32  *pbestthresh)
 {
 l_int32    i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
 l_int32    val, maxval, nmax, count;
 l_float32  thresh, fmaxval, fmodeval;
 NUMA      *nat, *nac;

     PROCNAME("numaSelectCrossingThreshold");

     if (!nay)
         return ERROR_INT("nay not defined", procName, 1);

         /* Compute the number of crossings for different thresholds */
     nat = numaCreate(41);
     for (i = 0; i < 41; i++) {
         thresh = estthresh - 80.0 + 4.0 * i;
         nac = numaCrossingsByThreshold(nax, nay, thresh);
         numaAddNumber(nat, numaGetCount(nac));
         numaDestroy(&nac);
     }

         /* Find the center of the plateau of max crossings, which
          * extends from thresh[istart] to thresh[iend]. */
     numaGetMax(nat, &fmaxval, NULL);
     maxval = (l_int32)fmaxval;
     nmax = 0;
     for (i = 0; i < 41; i++) {
         numaGetIValue(nat, i, &val);
         if (val == maxval)
             nmax++;
     }
     if (nmax < 3) {  /* likely accidental max; try the mode */
         numaGetMode(nat, &fmodeval, &count);
         if (count > nmax && fmodeval > 0.5 * fmaxval)
             maxval = (l_int32)fmodeval;  /* use the mode */
     }

     inrun = FALSE;
     iend = 40;
     maxrunlen = 0;
     for (i = 0; i < 41; i++) {
         numaGetIValue(nat, i, &val);
         if (val == maxval) {
             if (!inrun) {
                 istart = i;
                 inrun = TRUE;
             }
 	    continue;
         }
         if (inrun && (val != maxval)) {
             iend = i - 1;
             runlen = iend - istart + 1;
             inrun = FALSE;
             if (runlen > maxrunlen) {
                 maxstart = istart;
                 maxend = iend;
                 maxrunlen = runlen;
             }
         }
     }
     if (inrun) {
         runlen = i - istart;
         if (runlen > maxrunlen) {
             maxstart = istart;
             maxend = i - 1;
             maxrunlen = runlen;
         }
     }

 #if 0
     foundfirst = FALSE;
     iend = 40;
     for (i = 0; i < 41; i++) {
         numaGetIValue(nat, i, &val);
         if (val == maxval) {
             if (!foundfirst) {
                 istart = i;
                 foundfirst = TRUE;
             }
         }
         if ((val != maxval) && foundfirst) {
             iend = i - 1;
             break;
         }
     }
     nmax = iend - istart + 1;
 #endif

     *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);

 #if  DEBUG_CROSSINGS
     fprintf(stderr, "\nCrossings attain a maximum at %d thresholds, between:\n"
                     "  thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
 		    nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
 		    maxend, estthresh - 80.0 + 4.0 * maxend);
     fprintf(stderr, "The best choice: %5.1f\n", *pbestthresh);
     fprintf(stderr, "Number of crossings at the 41 thresholds:");
     numaWriteStream(stderr, nat);
 #endif  /* DEBUG_CROSSINGS */

     numaDestroy(&nat);
     return 0;
 }


 /*!
  *  numaCrossingsByThreshold()
  *
  *      Input:  nax (<optional> numa of abscissa values; can be NULL)
  *              nay (numa of ordinate values, corresponding to nax)
  *              thresh (threshold value for nay)
  *      Return: nad (abscissa pts at threshold), or null on error
  *
  *  Notes:
  *      (1) If nax == NULL, we use startx and delx from nay to compute
  *          the crossing values in nad.
  */
 NUMA *
 numaCrossingsByThreshold(NUMA      *nax,
                          NUMA      *nay,
                          l_float32  thresh)
 {
 l_int32    i, n;
 l_float32  startx, delx;
 l_float32  xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
 NUMA      *nad;

     PROCNAME("numaCrossingsByThreshold");

     if (!nay)
         return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
     n = numaGetCount(nay);

     if (nax && (numaGetCount(nax) != n))
         return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);

     nad = numaCreate(0);
     numaGetFValue(nay, 0, &yval1);
     numaGetXParameters(nay, &startx, &delx);
     if (nax)
         numaGetFValue(nax, 0, &xval1);
     else
         xval1 = startx;
     for (i = 1; i < n; i++) {
         numaGetFValue(nay, i, &yval2);
         if (nax)
             numaGetFValue(nax, i, &xval2);
         else
             xval2 = startx + i * delx;
         delta1 = yval1 - thresh;
         delta2 = yval2 - thresh;
         if (delta1 == 0.0)
             numaAddNumber(nad, xval1);
         else if (delta2 == 0.0)
             numaAddNumber(nad, xval2);
         else if (delta1 * delta2 < 0.0) {  /* crossing */
             fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
             crossval = xval1 + fract * (xval2 - xval1);
             numaAddNumber(nad, crossval);
         }
         xval1 = xval2;
         yval1 = yval2;
     }

     return nad;
 }


 /*!
  *  numaCrossingsByPeaks()
  *
  *      Input:  nax (<optional> numa of abscissa values)
  *              nay (numa of ordinate values, corresponding to nax)
  *              delta (parameter used to identify when a new peak can be found)
  *      Return: nad (abscissa pts at threshold), or null on error
  *
  *  Notes:
  *      (1) If nax == NULL, we use startx and delx from nay to compute
  *          the crossing values in nad.
  */
 NUMA *
 numaCrossingsByPeaks(NUMA      *nax,
                      NUMA      *nay,
                      l_float32  delta)
 {
 l_int32    i, j, n, np, previndex, curindex;
 l_float32  startx, delx;
 l_float32  xval1, xval2, yval1, yval2, delta1, delta2;
 l_float32  prevval, curval, thresh, crossval, fract;
 NUMA      *nap, *nad;

     PROCNAME("numaCrossingsByPeaks");

     if (!nax)
         return (NUMA *)ERROR_PTR("nax not defined", procName, NULL);
     if (!nay)
         return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);

     n = numaGetCount(nax);
     if (numaGetCount(nay) != n)
         return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);

         /* Find the extrema.  Also add last point in nay to get
          * the last transition (from the last peak to the end).
          * The number of crossings is 1 more than the number of extrema. */
     nap = numaFindExtrema(nay, delta);
     numaAddNumber(nap, n - 1);
     np = numaGetCount(nap);
     L_INFO_INT("Number of crossings: %d", procName, np);

         /* Do all computation in index units of nax */
     nad = numaCreate(np);  /* output crossings, in nax units */
     previndex = 0;  /* prime the search with 1st point */
     numaGetFValue(nay, 0, &prevval);  /* prime the search with 1st point */
     numaGetXParameters(nay, &startx, &delx);
     for (i = 0; i < np; i++) {
         numaGetIValue(nap, i, &curindex);
         numaGetFValue(nay, curindex, &curval);
         thresh = (prevval + curval) / 2.0;
 /*        fprintf(stderr, "thresh[%d] = %7.3f\n", i, thresh); */
         if (nax)
             numaGetFValue(nax, previndex, &xval1);
         else
             xval1 = startx + previndex * delx;
         numaGetFValue(nay, previndex, &yval1);
         for (j = previndex + 1; j <= curindex; j++) {
             if (nax)
                 numaGetFValue(nax, j, &xval2);
             else
                 xval2 = startx + j * delx;
             numaGetFValue(nay, j, &yval2);
             delta1 = yval1 - thresh;
             delta2 = yval2 - thresh;
             if (delta1 == 0.0) {
                 numaAddNumber(nad, xval1);
                 break;
             }
             else if (delta2 == 0.0) {
                 numaAddNumber(nad, xval2);
                 break;
             }
             else if (delta1 * delta2 < 0.0) {  /* crossing */
                 fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
                 crossval = xval1 + fract * (xval2 - xval1);
                 numaAddNumber(nad, crossval);
                 break;
             }
             xval1 = xval2;
             yval1 = yval2;
         }
         previndex = curindex;
         prevval = curval;
     }

     numaDestroy(&nap);
     return nad;
 }


 /*!
  *  numaEvalBestHaarParameters()
  *
  *      Input:  nas (numa of non-negative signal values)
  *              relweight (relative weight of (-1 comb) / (+1 comb)
  *                         contributions to the 'convolution'.  In effect,
  *                         the convolution kernel is a comb consisting of
  *                         alternating +1 and -weight.)
  *              nwidth (number of widths to consider)
  *              nshift (number of shifts to consider for each width)
  *              minwidth (smallest width to consider)
  *              maxwidth (largest width to consider)
  *              &bestwidth (<return> width giving largest score)
  *              &bestshift (<return> shift giving largest score)
  *              &bestscore (<optional return> convolution with
  *                          "Haar"-like comb)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) This does a linear sweep of widths, evaluating at @nshift
  *          shifts for each width, computing the score from a convolution
  *          with a long comb, and finding the (width, shift) pair that
  *          gives the maximum score.  The best width is the "half-wavelength"
  *          of the signal.
  *      (2) The convolving function is a comb of alternating values
  *          +1 and -1 * relweight, separated by the width and phased by
  *          the shift.  This is similar to a Haar transform, except
  *          there the convolution is performed with a square wave.
  *      (3) The function is useful for finding the line spacing
  *          and strength of line signal from pixel sum projections.
  *      (4) The score is normalized to the size of nas divided by
  *          the number of half-widths.  For image applications, the input is
  *          typically an array of pixel projections, so one should
  *          normalize by dividing the score by the image width in the
  *          pixel projection direction.
  */
 l_int32
 numaEvalBestHaarParameters(NUMA       *nas,
                            l_float32   relweight,
                            l_int32     nwidth,
                            l_int32     nshift,
                            l_float32   minwidth,
                            l_float32   maxwidth,
                            l_float32  *pbestwidth,
                            l_float32  *pbestshift,
                            l_float32  *pbestscore)
 {
 l_int32    i, j;
 l_float32  delwidth, delshift, width, shift, score;
 l_float32  bestwidth, bestshift, bestscore;

     PROCNAME("numaEvalBestHaarParameters");

     if (!nas)
         return ERROR_INT("nas not defined", procName, 1);
     if (!pbestwidth || !pbestshift)
         return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);

     bestscore = 0.0;
     delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
     for (i = 0; i < nwidth; i++) {
         width = minwidth + delwidth * i;
         delshift = width / (l_float32)(nshift);
         for (j = 0; j < nshift; j++) {
             shift = j * delshift;
             numaEvalHaarSum(nas, width, shift, relweight, &score);
             if (score > bestscore) {
                 bestscore = score;
                 bestwidth = width;
                 bestshift = shift;
 #if  DEBUG_FREQUENCY
                 fprintf(stderr, "width = %7.3f, shift = %7.3f, score = %7.3f\n",
                         width, shift, score);
 #endif  /* DEBUG_FREQUENCY */
             }
         }
     }

     *pbestwidth = bestwidth;
     *pbestshift = bestshift;
     if (pbestscore)
         *pbestscore = bestscore;
     return 0;
 }


 /*!
  *  numaEvalHaarSum()
  *
  *      Input:  nas (numa of non-negative signal values)
  *              width (distance between +1 and -1 in convolution comb)
  *              shift (phase of the comb: location of first +1)
  *              relweight (relative weight of (-1 comb) / (+1 comb)
  *                         contributions to the 'convolution'.  In effect,
  *                         the convolution kernel is a comb consisting of
  *                         alternating +1 and -weight.)
  *              &score (<return> convolution with "Haar"-like comb)
  *      Return: 0 if OK, 1 on error
  *
  *  Notes:
  *      (1) This does a convolution with a comb of alternating values
  *          +1 and -relweight, separated by the width and phased by the shift.
  *          This is similar to a Haar transform, except that for Haar,
  *            (1) the convolution kernel is symmetric about 0, so the
  *                relweight is 1.0, and
  *            (2) the convolution is performed with a square wave.
  *      (2) The score is normalized to the size of nas divided by
  *          twice the "width".  For image applications, the input is
  *          typically an array of pixel projections, so one should
  *          normalize by dividing the score by the image width in the
  *          pixel projection direction.
  *      (3) To get a Haar-like result, use relweight = 1.0.  For detecting
  *          signals where you expect every other sample to be close to
  *          zero, as with barcodes or filtered text lines, you can
  *          use relweight > 1.0.
  */
 l_int32
 numaEvalHaarSum(NUMA       *nas,
                 l_float32   width,
                 l_float32   shift,
                 l_float32   relweight,
                 l_float32  *pscore)
 {
 l_int32    i, n, nsamp, index;
 l_float32  score, weight, val;

     PROCNAME("numaEvalHaarSum");

     if (!pscore)
         return ERROR_INT("&score not defined", procName, 1);
     *pscore = 0.0;
     if (!nas)
         return ERROR_INT("nas not defined", procName, 1);
     if ((n = numaGetCount(nas)) < 2 * width)
         return ERROR_INT("nas size too small", procName, 1);

     score = 0.0;
     nsamp = (l_int32)((n - shift) / width);
     for (i = 0; i < nsamp; i++) {
         index = (l_int32)(shift + i * width);
         weight = (i % 2) ? 1.0 : -1.0 * relweight;
         numaGetFValue(nas, index, &val);
         score += weight * val;
     }

     *pscore = 2.0 * width * score / (l_float32)n;
     return 0;
 }