blob: d1c8898016ccf634dce61ec6911677e58314b8a1 [file] [log] [blame]
/*====================================================================*
- Copyright (C) 2001 Leptonica. All rights reserved.
- This software is distributed in the hope that it will be
- useful, but with NO WARRANTY OF ANY KIND.
- No author or distributor accepts responsibility to anyone for the
- consequences of using this software, or for whether it serves any
- particular purpose or works at all, unless he or she says so in
- writing. Everyone is granted permission to copy, modify and
- redistribute this source code, for commercial or non-commercial
- purposes, with the following restrictions: (1) the origin of this
- source code must not be misrepresented; (2) modified versions must
- be plainly marked as such; and (3) this notice may not be removed
- or altered from any source or modified source distribution.
*====================================================================*/
/*
* numafunc2.c
*
* Transformations
* NUMA *numaTransform()
* NUMA *numaConvolve()
* NUMA *numaConvertToInt()
*
* Histogram generation and statistics
* NUMA *numaMakeHistogram()
* NUMA *numaMakeHistogramAuto()
* NUMA *numaMakeHistogramClipped()
* NUMA *numaRebinHistogram()
* NUMA *numaNormalizeHistogram()
* l_int32 numaGetStatsUsingHistogram()
* l_int32 numaGetHistogramStats()
* l_int32 numaGetHistogramStatsOnInterval()
* l_int32 numaMakeRankFromHistogram()
* l_int32 numaHistogramGetRankFromVal()
* l_int32 numaHistogramGetValFromRank()
*
* Splitting a distribution
* l_int32 numaSplitDistribution()
*
* Extrema finding
* NUMA *numaFindPeaks()
* NUMA *numaFindExtrema()
*
* Threshold crossings and frequency analysis
* l_int32 numaSelectCrossingThreshold()
* NUMA *numaCrossingsByThreshold()
* NUMA *numaCrossingsByPeaks()
* NUMA *numaEvalBestHaarParameters()
* l_int32 numaEvalHaarSum()
*
* Things to remember when using the Numa:
*
* (1) The numa is a struct, not an array. Always use accessors
* (see numabasic.c), never the fields directly.
*
* (2) The number array holds l_float32 values. It can also
* be used to store l_int32 values. See numabasic.c for
* details on using the accessors.
*
* (3) Occasionally, in the comments we denote the i-th element of a
* numa by na[i]. This is conceptual only -- the numa is not an array!
*
* Some general comments on histograms:
*
* (1) Histograms are the generic statistical representation of
* the data about some attribute. Typically they're not
* normalized -- they simply give the number of occurrences
* within each range of values of the attribute. This range
* of values is referred to as a 'bucket'. For example,
* the histogram could specify how many connected components
* are found for each value of their width; in that case,
* the bucket size is 1.
*
* (2) In leptonica, all buckets have the same size. Histograms
* are therefore specified by a numa of occurrences, along
* with two other numbers: the 'value' associated with the
* occupants of the first bucket and the size (i.e., 'width')
* of each bucket. These two numbers then allow us to calculate
* the value associated with the occupants of each bucket.
* These numbers are fields in the numa, initialized to
* a startx value of 0.0 and a binsize of 1.0. Accessors for
* these fields are functions numa*XParameters(). All histograms
* must have these two numbers properly set.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "allheaders.h"
/* bin sizes in numaMakeHistogram() */
static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
2000, 5000, 10000, 20000, 50000, 100000, 200000,\
500000, 1000000, 2000000, 5000000, 10000000,\
200000000, 50000000, 100000000};
static const l_int32 NBinSizes = 24;
#ifndef NO_CONSOLE_IO
#define DEBUG_HISTO 0
#define DEBUG_CROSSINGS 0
#define DEBUG_FREQUENCY 0
#endif /* ~NO_CONSOLE_IO */
/*----------------------------------------------------------------------*
* Transformations *
*----------------------------------------------------------------------*/
/*!
* numaTransform()
*
* Input: nas
* shift (add this to each number)
* scale (multiply each number by this)
* Return: na with all values shifted and scaled, or null on error
*
* Notes:
* (1) Each number is shifted before scaling.
* (2) The operation sequence is opposite to that for Box and Pta:
* scale first, then shift.
*/
NUMA *
numaTransform(NUMA *nas,
l_float32 shift,
l_float32 scale)
{
l_int32 i, n;
l_float32 val;
NUMA *nad;
PROCNAME("numaTransform");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
n = numaGetCount(nas);
if ((nad = numaCreate(n)) == NULL)
return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
for (i = 0; i < n; i++) {
numaGetFValue(nas, i, &val);
val = scale * val + shift;
numaAddNumber(nad, val);
}
return nad;
}
/*!
* numaConvolve()
*
* Input: na
* halfwidth (of rectangular filter, minus the center)
* Return: na (after low-pass filtering), or null on error
*
* Notes:
* (1) Full convolution takes place only from i = halfwidth to
* i = n - halfwidth - 1. We do the end parts using only
* the partial array available. We do not pad the ends with 0.
* (2) This implementation assumes specific fields in the Numa!
*/
NUMA *
numaConvolve(NUMA *na,
l_int32 halfwidth)
{
l_int32 i, n, rval;
l_float32 sum, norm;
l_float32 *array, *carray, *sumarray;
NUMA *nac;
PROCNAME("numaConvolve");
if (!na)
return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
n = numaGetCount(na);
if (2 * halfwidth + 1 > n)
L_WARNING("filter wider than input array!", procName);
array = na->array;
if ((nac = numaCreate(n)) == NULL)
return (NUMA *)ERROR_PTR("nac not made", procName, NULL);
carray = nac->array;
nac->n = n; /* fill with zeroes */
/* Make sum array; note the indexing */
if ((sumarray = (l_float32 *)CALLOC(n + 1, sizeof(l_float32))) == NULL)
return (NUMA *)ERROR_PTR("sumarray not made", procName, NULL);
sum = 0.0;
sumarray[0] = 0.0;
for (i = 0; i < n; i++) {
sum += array[i];
sumarray[i + 1] = sum;
}
/* Central part */
norm = 1. / (2 * halfwidth + 1);
rval = n - halfwidth;
for (i = halfwidth; i < rval; ++i)
carray[i] = norm *
(sumarray[i + halfwidth + 1] - sumarray[i - halfwidth]);
/* Left side */
for (i = 0; i < halfwidth; i++)
carray[i] = sumarray[i + halfwidth + 1] / (halfwidth + i + 1);
/* Right side */
for (i = rval; i < n; i++)
carray[i] = (1. / (n - i + halfwidth)) *
(sumarray[n] - sumarray[i - halfwidth]);
FREE(sumarray);
return nac;
}
/*!
* numaConvertToInt()
*
* Input: na
* Return: na with all values rounded to nearest integer, or
* null on error
*/
NUMA *
numaConvertToInt(NUMA *nas)
{
l_int32 i, n, ival;
NUMA *nad;
PROCNAME("numaConvertToInt");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
n = numaGetCount(nas);
if ((nad = numaCreate(n)) == NULL)
return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
for (i = 0; i < n; i++) {
numaGetIValue(nas, i, &ival);
numaAddNumber(nad, ival);
}
return nad;
}
/*----------------------------------------------------------------------*
* Histogram generation and statistics *
*----------------------------------------------------------------------*/
/*!
* numaMakeHistogram()
*
* Input: na
* maxbins (max number of histogram bins)
* &binsize (<return> size of histogram bins)
* &binstart (<optional return> start val of minimum bin;
* input NULL to force start at 0)
* Return: na consisiting of histogram of integerized values,
* or null on error.
*
* Note:
* (1) This simple interface is designed for integer data.
* The bins are of integer width and start on integer boundaries,
* so the results on float data will not have high precision.
* (2) Specify the max number of input bins. Then @binsize,
* the size of bins necessary to accommodate the input data,
* is returned. It is one of the sequence:
* {1, 2, 5, 10, 20, 50, ...}.
* (3) If &binstart is given, all values are accommodated,
* and the min value of the starting bin is returned.
* Otherwise, all negative values are discarded and
* the histogram bins start at 0.
*/
NUMA *
numaMakeHistogram(NUMA *na,
l_int32 maxbins,
l_int32 *pbinsize,
l_int32 *pbinstart)
{
l_int32 i, n, ival, hval;
l_int32 iminval, imaxval, range, binsize, nbins, ibin;
l_float32 val, ratio;
NUMA *nai, *nahist;
PROCNAME("numaMakeHistogram");
if (!na)
return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
if (!pbinsize)
return (NUMA *)ERROR_PTR("&binsize not defined", procName, NULL);
/* Determine input range */
numaGetMin(na, &val, NULL);
iminval = (l_int32)(val + 0.5);
numaGetMax(na, &val, NULL);
imaxval = (l_int32)(val + 0.5);
if (pbinstart == NULL) { /* clip negative vals; start from 0 */
iminval = 0;
if (imaxval < 0)
return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
}
/* Determine binsize */
range = imaxval - iminval + 1;
if (range > maxbins - 1) {
ratio = (l_float64)range / (l_float64)maxbins;
binsize = 0;
for (i = 0; i < NBinSizes; i++) {
if (ratio < BinSizeArray[i]) {
binsize = BinSizeArray[i];
break;
}
}
if (binsize == 0)
return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
}
else
binsize = 1;
*pbinsize = binsize;
nbins = 1 + range / binsize; /* +1 seems to be sufficient */
/* Redetermine iminval */
if (pbinstart && binsize > 1) {
if (iminval >= 0)
iminval = binsize * (iminval / binsize);
else
iminval = binsize * ((iminval - binsize + 1) / binsize);
}
if (pbinstart)
*pbinstart = iminval;
#if DEBUG_HISTO
fprintf(stderr, " imaxval = %d, range = %d, nbins = %d\n",
imaxval, range, nbins);
#endif /* DEBUG_HISTO */
/* Use integerized data for input */
if ((nai = numaConvertToInt(na)) == NULL)
return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
n = numaGetCount(nai);
/* Make histogram, converting value in input array
* into a bin number for this histogram array. */
if ((nahist = numaCreate(nbins)) == NULL)
return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
numaSetCount(nahist, nbins);
numaSetXParameters(nahist, iminval, binsize);
for (i = 0; i < n; i++) {
numaGetIValue(nai, i, &ival);
ibin = (ival - iminval) / binsize;
if (ibin >= 0 && ibin < nbins) {
numaGetIValue(nahist, ibin, &hval);
numaSetValue(nahist, ibin, hval + 1.0);
}
}
numaDestroy(&nai);
return nahist;
}
/*!
* numaMakeHistogramAuto()
*
* Input: na (numa of floats; these may be integers)
* maxbins (max number of histogram bins; >= 1)
* Return: na consisiting of histogram of quantized float values,
* or null on error.
*
* Notes:
* (1) This simple interface is designed for accurate binning
* of both integer and float data.
* (2) If the array data is integers, and the range of integers
* is smaller than @maxbins, they are binned as they fall,
* with binsize = 1.
* (3) If the range of data, (maxval - minval), is larger than
* @maxbins, or if the data is floats, they are binned into
* exactly @maxbins bins.
* (4) Unlike numaMakeHistogram(), these bins in general have
* non-integer location and width, even for integer data.
*/
NUMA *
numaMakeHistogramAuto(NUMA *na,
l_int32 maxbins)
{
l_int32 i, n, imin, imax, irange, ibin, ival, allints;
l_float32 minval, maxval, range, binsize, fval;
NUMA *nah;
PROCNAME("numaMakeHistogramAuto");
if (!na)
return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
maxbins = L_MAX(1, maxbins);
/* Determine input range */
numaGetMin(na, &minval, NULL);
numaGetMax(na, &maxval, NULL);
/* Determine if values are all integers */
n = numaGetCount(na);
numaHasOnlyIntegers(na, maxbins, &allints);
/* Do simple integer binning if possible */
if (allints && (maxval - minval < maxbins)) {
imin = (l_int32)minval;
imax = (l_int32)maxval;
irange = imax - imin + 1;
nah = numaCreate(irange);
numaSetCount(nah, irange); /* init */
numaSetXParameters(nah, minval, 1.0);
for (i = 0; i < n; i++) {
numaGetIValue(na, i, &ival);
ibin = ival - imin;
numaGetIValue(nah, ibin, &ival);
numaSetValue(nah, ibin, ival + 1.0);
}
return nah;
}
/* Do float binning, even if the data is integers. */
range = maxval - minval;
binsize = range / (l_float32)maxbins;
if (range == 0.0) {
nah = numaCreate(1);
numaSetXParameters(nah, minval, binsize);
numaAddNumber(nah, n);
return nah;
}
nah = numaCreate(maxbins);
numaSetCount(nah, maxbins);
numaSetXParameters(nah, minval, binsize);
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &fval);
ibin = (l_int32)((fval - minval) / binsize);
numaGetIValue(nah, ibin, &ival);
numaSetValue(nah, ibin, ival + 1.0);
}
return nah;
}
/*!
* numaMakeHistogramClipped()
*
* Input: na
* binsize (typically 1.0)
* maxsize (of histogram ordinate)
* Return: na (histogram of bins of size @binsize, starting with
* the na[0] (x = 0.0) and going up to a maximum of
* x = @maxsize, by increments of @binsize), or null on error
*
* Notes:
* (1) This simple function generates a histogram of values
* from na, discarding all values < 0.0 or greater than
* min(@maxsize, maxval), where maxval is the maximum value in na.
* The histogram data is put in bins of size delx = @binsize,
* starting at x = 0.0. We use as many bins as are
* needed to hold the data.
*/
NUMA *
numaMakeHistogramClipped(NUMA *na,
l_float32 binsize,
l_float32 maxsize)
{
l_int32 i, n, nbins, ival, ibin;
l_float32 val, maxval;
NUMA *nad;
PROCNAME("numaMakeHistogramClipped");
if (!na)
return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
if (binsize <= 0.0)
return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
if (binsize > maxsize)
binsize = maxsize; /* just one bin */
numaGetMax(na, &maxval, NULL);
n = numaGetCount(na);
maxsize = L_MIN(maxsize, maxval);
nbins = (l_int32)(maxsize / binsize) + 1;
/* fprintf(stderr, "maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
if ((nad = numaCreate(nbins)) == NULL)
return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
numaSetXParameters(nad, 0.0, binsize);
numaSetCount(nad, nbins); /* interpret zeroes in bins as data */
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &val);
ibin = (l_int32)(val / binsize);
if (ibin >= 0 && ibin < nbins) {
numaGetIValue(nad, ibin, &ival);
numaSetValue(nad, ibin, ival + 1.0);
}
}
return nad;
}
/*!
* numaRebinHistogram()
*
* Input: nas (input histogram)
* newsize (number of old bins contained in each new bin)
* Return: nad (more coarsely re-binned histogram), or null on error
*/
NUMA *
numaRebinHistogram(NUMA *nas,
l_int32 newsize)
{
l_int32 i, j, ns, nd, index, count, val;
l_float32 start, oldsize;
NUMA *nad;
PROCNAME("numaRebinHistogram");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
if (newsize <= 1)
return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
if ((ns = numaGetCount(nas)) == 0)
return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
nd = (ns + newsize - 1) / newsize;
if ((nad = numaCreate(nd)) == NULL)
return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
numaGetXParameters(nad, &start, &oldsize);
numaSetXParameters(nad, start, oldsize * newsize);
for (i = 0; i < nd; i++) { /* new bins */
count = 0;
index = i * newsize;
for (j = 0; j < newsize; j++) {
if (index < ns) {
numaGetIValue(nas, index, &val);
count += val;
index++;
}
}
numaAddNumber(nad, count);
}
return nad;
}
/*!
* numaNormalizeHistogram()
*
* Input: nas (input histogram)
* area (target sum of all numbers in dest histogram;
* e.g., use area = 1.0 if this represents a
* probability distribution)
* Return: nad (normalized histogram), or null on error
*/
NUMA *
numaNormalizeHistogram(NUMA *nas,
l_float32 area)
{
l_int32 i, ns;
l_float32 sum, factor, fval;
NUMA *nad;
PROCNAME("numaNormalizeHistogram");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
if (area <= 0.0)
return (NUMA *)ERROR_PTR("area must be > 0.0", procName, NULL);
if ((ns = numaGetCount(nas)) == 0)
return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
numaGetSum(nas, &sum);
factor = area / sum;
if ((nad = numaCreate(ns)) == NULL)
return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
numaCopyXParameters(nad, nas);
for (i = 0; i < ns; i++) {
numaGetFValue(nas, i, &fval);
fval *= factor;
numaAddNumber(nad, fval);
}
return nad;
}
/*!
* numaGetStatsUsingHistogram()
*
* Input: na (an arbitrary set of numbers; not ordered and not
* a histogram)
* maxbins (the maximum number of bins to be allowed in
* the histogram; use 0 for consecutive integer bins)
* &min (<optional return> min value of set)
* &max (<optional return> max value of set)
* &mean (<optional return> mean value of set)
* &variance (<optional return> variance)
* &median (<optional return> median value of set)
* rank (in [0.0 ... 1.0]; median has a rank 0.5; ignored
* if &rval == NULL)
* &rval (<optional return> value in na corresponding to @rank)
* &histo (<optional return> Numa histogram; use NULL to prevent)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This is a simple interface for gathering statistics
* from a numa, where a histogram is used 'under the covers'
* to avoid sorting if a rank value is requested. In that case,
* by using a histogram we are trading speed for accuracy, because
* the values in @na are quantized to the center of a set of bins.
* (2) If the median, other rank value, or histogram are not requested,
* the calculation is all performed on the input Numa.
* (3) The variance is the average of the square of the
* difference from the mean. The median is the value in na
* with rank 0.5.
* (4) There are two situations where this gives rank results with
* accuracy comparable to computing stastics directly on the input
* data, without binning into a histogram:
* (a) the data is integers and the range of data is less than
* @maxbins, and
* (b) the data is floats and the range is small compared to
* @maxbins, so that the binsize is much less than 1.
* (5) If a histogram is used and the numbers in the Numa extend
* over a large range, you can limit the required storage by
* specifying the maximum number of bins in the histogram.
* Use @maxbins == 0 to force the bin size to be 1.
* (6) This optionally returns the median and one arbitrary rank value.
* If you need several rank values, return the histogram and use
* numaHistogramGetValFromRank(nah, rank, &rval)
* multiple times.
*/
l_int32
numaGetStatsUsingHistogram(NUMA *na,
l_int32 maxbins,
l_float32 *pmin,
l_float32 *pmax,
l_float32 *pmean,
l_float32 *pvariance,
l_float32 *pmedian,
l_float32 rank,
l_float32 *prval,
NUMA **phisto)
{
l_int32 i, n;
l_float32 minval, maxval, fval, mean, sum;
NUMA *nah;
PROCNAME("numaGetStatsUsingHistogram");
if (pmin) *pmin = 0.0;
if (pmax) *pmax = 0.0;
if (pmean) *pmean = 0.0;
if (pmedian) *pmedian = 0.0;
if (pvariance) *pvariance = 0.0;
if (!na)
return ERROR_INT("na not defined", procName, 1);
if ((n = numaGetCount(na)) == 0)
return ERROR_INT("numa is empty", procName, 1);
numaGetMin(na, &minval, NULL);
numaGetMax(na, &maxval, NULL);
if (pmin) *pmin = minval;
if (pmax) *pmax = maxval;
if (pmean || pvariance) {
sum = 0.0;
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &fval);
sum += fval;
}
mean = sum / (l_float32)n;
if (pmean) *pmean = mean;
}
if (pvariance) {
sum = 0.0;
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &fval);
sum += fval * fval;
}
*pvariance = sum / (l_float32)n - mean * mean;
}
if (!pmedian && !prval && !phisto)
return 0;
nah = numaMakeHistogramAuto(na, maxbins);
if (pmedian)
numaHistogramGetValFromRank(nah, 0.5, pmedian);
if (prval)
numaHistogramGetValFromRank(nah, rank, prval);
if (phisto)
*phisto = nah;
else
numaDestroy(&nah);
return 0;
}
/*!
* numaGetHistogramStats()
*
* Input: nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
* startx (x value of first bin: x(0))
* deltax (x increment between bins; the bin size; x(1) - x(0))
* &xmean (<optional return> mean value of histogram)
* &xmedian (<optional return> median value of histogram)
* &xmode (<optional return> mode value of histogram:
* xmode = x(imode), where y(xmode) >= y(x(i)) for
* all i != imode)
* &xvariance (<optional return> variance of x)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) If the histogram represents the relation y(x), the
* computed values that are returned are the x values.
* These are NOT the bucket indices i; they are related to the
* bucket indices by
* x(i) = startx + i * deltax
*/
l_int32
numaGetHistogramStats(NUMA *nahisto,
l_float32 startx,
l_float32 deltax,
l_float32 *pxmean,
l_float32 *pxmedian,
l_float32 *pxmode,
l_float32 *pxvariance)
{
PROCNAME("numaGetHistogramStats");
if (pxmean) *pxmean = 0.0;
if (pxmedian) *pxmedian = 0.0;
if (pxmode) *pxmode = 0.0;
if (pxvariance) *pxvariance = 0.0;
if (!nahisto)
return ERROR_INT("nahisto not defined", procName, 1);
return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, 0,
pxmean, pxmedian, pxmode,
pxvariance);
}
/*!
* numaGetHistogramStatsOnInterval()
*
* Input: nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
* startx (x value of first bin: x(0))
* deltax (x increment between bins; the bin size; x(1) - x(0))
* ifirst (first bin to use for collecting stats)
* ilast (last bin for collecting stats; use 0 to go to the end)
* &xmean (<optional return> mean value of histogram)
* &xmedian (<optional return> median value of histogram)
* &xmode (<optional return> mode value of histogram:
* xmode = x(imode), where y(xmode) >= y(x(i)) for
* all i != imode)
* &xvariance (<optional return> variance of x)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) If the histogram represents the relation y(x), the
* computed values that are returned are the x values.
* These are NOT the bucket indices i; they are related to the
* bucket indices by
* x(i) = startx + i * deltax
*/
l_int32
numaGetHistogramStatsOnInterval(NUMA *nahisto,
l_float32 startx,
l_float32 deltax,
l_int32 ifirst,
l_int32 ilast,
l_float32 *pxmean,
l_float32 *pxmedian,
l_float32 *pxmode,
l_float32 *pxvariance)
{
l_int32 i, n, imax;
l_float32 sum, sumval, halfsum, moment, var, x, y, ymax;
PROCNAME("numaGetHistogramStats");
if (pxmean) *pxmean = 0.0;
if (pxmedian) *pxmedian = 0.0;
if (pxmode) *pxmode = 0.0;
if (pxvariance) *pxvariance = 0.0;
if (!nahisto)
return ERROR_INT("nahisto not defined", procName, 1);
if (!pxmean && !pxmedian && !pxmode && !pxvariance)
return ERROR_INT("nothing to compute", procName, 1);
n = numaGetCount(nahisto);
if (ilast <= 0) ilast = n - 1;
if (ifirst < 0) ifirst = 0;
if (ifirst > ilast || ifirst > n - 1)
return ERROR_INT("ifirst is too large", procName, 1);
for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
x = startx + i * deltax;
numaGetFValue(nahisto, i, &y);
sum += y;
moment += x * y;
var += x * x * y;
}
if (sum == 0.0)
return ERROR_INT("sum is 0", procName, 1);
if (pxmean)
*pxmean = moment / sum;
if (pxvariance)
*pxvariance = var / sum - moment * moment / (sum * sum);
if (pxmedian) {
halfsum = sum / 2.0;
for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
numaGetFValue(nahisto, i, &y);
sumval += y;
if (sumval >= halfsum) {
*pxmedian = startx + i * deltax;
break;
}
}
}
if (pxmode) {
ymax = -1.0e10;
for (i = ifirst; i <= ilast; i++) {
numaGetFValue(nahisto, i, &y);
if (y > ymax) {
ymax = y;
imax = i;
}
}
*pxmode = startx + imax * deltax;
}
return 0;
}
/*!
* numaMakeRankFromHistogram()
*
* Input: startx (xval corresponding to first element in nay)
* deltax (x increment between array elements in nay)
* nasy (input histogram, assumed equally spaced)
* npts (number of points to evaluate rank function)
* &nax (<optional return> array of x values in range)
* &nay (<return> rank array of specified npts)
* Return: 0 if OK, 1 on error
*/
l_int32
numaMakeRankFromHistogram(l_float32 startx,
l_float32 deltax,
NUMA *nasy,
l_int32 npts,
NUMA **pnax,
NUMA **pnay)
{
l_int32 i, n;
l_float32 sum, fval;
NUMA *nan, *nar;
PROCNAME("numaMakeRankFromHistogram");
if (pnax) *pnax = NULL;
if (!pnay)
return ERROR_INT("&nay not defined", procName, 1);
*pnay = NULL;
if (!nasy)
return ERROR_INT("nasy not defined", procName, 1);
if (!pnay)
return ERROR_INT("&nay not defined", procName, 1);
if ((n = numaGetCount(nasy)) == 0)
return ERROR_INT("no bins in nas", procName, 1);
/* Normalize and generate the rank array corresponding to
* the binned histogram. */
nan = numaNormalizeHistogram(nasy, 1.0);
nar = numaCreate(n + 1); /* rank numa corresponding to nan */
sum = 0.0;
numaAddNumber(nar, sum); /* first element is 0.0 */
for (i = 0; i < n; i++) {
numaGetFValue(nan, i, &fval);
sum += fval;
numaAddNumber(nar, sum);
}
/* Compute rank array on full range with specified
* number of points and correspondence to x-values. */
numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
startx, startx + n * deltax, npts,
pnax, pnay);
numaDestroy(&nan);
numaDestroy(&nar);
return 0;
}
/*!
* numaHistogramGetRankFromVal()
*
* Input: na (histogram)
* rval (value of input sample for which we want the rank)
* &rank (<return> fraction of total samples below rval)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) If we think of the histogram as a function y(x), normalized
* to 1, for a given input value of x, this computes the
* rank of x, which is the integral of y(x) from the start
* value of x to the input value.
* (2) This function only makes sense when applied to a Numa that
* is a histogram. The values in the histogram can be ints and
* floats, and are computed as floats. The rank is returned
* as a float between 0.0 and 1.0.
* (3) The numa parameters startx and binsize are used to
* compute x from the Numa index i.
*/
l_int32
numaHistogramGetRankFromVal(NUMA *na,
l_float32 rval,
l_float32 *prank)
{
l_int32 i, ibinval, n;
l_float32 startval, binsize, binval, maxval, fractval, total, sum, val;
PROCNAME("numaHistogramGetRankFromVal");
if (!prank)
return ERROR_INT("prank not defined", procName, 1);
*prank = 0.0;
if (!na)
return ERROR_INT("na not defined", procName, 1);
numaGetXParameters(na, &startval, &binsize);
n = numaGetCount(na);
if (rval < startval)
return 0;
maxval = startval + n * binsize;
if (rval > maxval) {
*prank = 1.0;
return 0;
}
binval = (rval - startval) / binsize;
ibinval = (l_int32)binval;
if (ibinval >= n) {
*prank = 1.0;
return 0;
}
fractval = binval - (l_float32)ibinval;
sum = 0.0;
for (i = 0; i < ibinval; i++) {
numaGetFValue(na, i, &val);
sum += val;
}
numaGetFValue(na, ibinval, &val);
sum += fractval * val;
numaGetSum(na, &total);
*prank = sum / total;
/* fprintf(stderr, "binval = %7.3f, rank = %7.3f\n", binval, *prank); */
return 0;
}
/*!
* numaHistogramGetValFromRank()
*
* Input: na (histogram)
* rank (fraction of total samples)
* &rval (<return> approx. to the bin value)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) If we think of the histogram as a function y(x), this returns
* the value x such that the integral of y(x) from the start
* value to x gives the fraction 'rank' of the integral
* of y(x) over all bins.
* (2) This function only makes sense when applied to a Numa that
* is a histogram. The values in the histogram can be ints and
* floats, and are computed as floats. The val is returned
* as a float, even though the buckets are of integer width.
* (3) The numa parameters startx and binsize are used to
* compute x from the Numa index i.
*/
l_int32
numaHistogramGetValFromRank(NUMA *na,
l_float32 rank,
l_float32 *prval)
{
l_int32 i, n;
l_float32 startval, binsize, rankcount, total, sum, fract, val;
PROCNAME("numaHistogramGetValFromRank");
if (!prval)
return ERROR_INT("prval not defined", procName, 1);
*prval = 0.0;
if (!na)
return ERROR_INT("na not defined", procName, 1);
if (rank < 0.0) {
L_WARNING("rank < 0; setting to 0.0", procName);
rank = 0.0;
}
if (rank > 1.0) {
L_WARNING("rank > 1.0; setting to 1.0", procName);
rank = 1.0;
}
n = numaGetCount(na);
numaGetXParameters(na, &startval, &binsize);
numaGetSum(na, &total);
rankcount = rank * total; /* count that corresponds to rank */
sum = 0.0;
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &val);
if (sum + val >= rankcount)
break;
sum += val;
}
if (val <= 0.0) /* can == 0 if rank == 0.0 */
fract = 0.0;
else /* sum + fract * val = rankcount */
fract = (rankcount - sum) / val;
/* The use of the fraction of a bin allows a simple calculation
* for the histogram value at the given rank. */
*prval = startval + binsize * ((l_float32)i + fract);
/* fprintf(stderr, "rank = %7.3f, val = %7.3f\n", rank, *prval); */
return 0;
}
/*----------------------------------------------------------------------*
* Splitting a distribution *
*----------------------------------------------------------------------*/
/*!
* numaSplitDistribution()
*
* Input: na (histogram)
* scorefract (fraction of the max score, used to determine
* the range over which the histogram min is searched)
* &splitindex (<optional return> index for splitting)
* &ave1 (<optional return> average of lower distribution)
* &ave2 (<optional return> average of upper distribution)
* &num1 (<optional return> population of lower distribution)
* &num2 (<optional return> population of upper distribution)
* &nascore (<optional return> for debugging; otherwise use null)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This function is intended to be used on a distribution of
* values that represent two sets, such as a histogram of
* pixel values, and the goal is to determine the means of
* the two sets and the best splitting point.
* (2) The Otsu method finds a split point that divides the distribution
* into two parts by maximizing a score function that is the
* product of two terms:
* (a) the square of the difference of centroids, (ave1 - ave2)^2
* (b) fract1 * (1 - fract1)
* where fract1 is the fraction in the lower distribution.
* This biases the split point into the larger "bump" (i.e., toward
* the point where the (b) term reaches its maximum of 0.25 at
* fract1 = 0.5. To avoid this, we define a range of values near
* the maximum of the score function, and choose the value within
* this range such that the histogram itself has a minimum value.
* The range is determined by scorefract: we include all abscissa
* values to the left and right of the value that maximizes the
* score, such that the score stays above (1 - scorefract) * maxscore.
* (3) We normalize the score so that if the two distributions
* were of equal size and at opposite ends of the numa, the
* score would be 1.0.
*/
l_int32
numaSplitDistribution(NUMA *na,
l_float32 scorefract,
l_int32 *psplitindex,
l_float32 *pave1,
l_float32 *pave2,
l_float32 *pnum1,
l_float32 *pnum2,
NUMA **pnascore)
{
l_int32 i, n, bestsplit, minrange, maxrange, maxindex;
l_float32 ave1, ave2, ave1prev, ave2prev;
l_float32 num1, num2, num1prev, num2prev;
l_float32 val, minval, sum, fract1;
l_float32 norm, score, minscore, maxscore;
NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2;
PROCNAME("numaSplitDistribution");
if (!na)
return ERROR_INT("na not defined", procName, 1);
n = numaGetCount(na);
if (n <= 1)
return ERROR_INT("n = 1 in histogram", procName, 1);
numaGetSum(na, &sum);
if (sum <= 0.0)
return ERROR_INT("sum <= 0.0", procName, 1);
norm = 4.0 / ((n - 1) * (n - 1));
ave1prev = 0.0;
numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
num1prev = 0.0;
num2prev = sum;
maxindex = n / 2; /* initialize with something */
/* Split the histogram with [0 ... i] in the lower part
* and [i+1 ... n-1] in upper part. First, compute an otsu
* score for each possible splitting. */
nascore = numaCreate(n);
if (pave2) naave1 = numaCreate(n);
if (pave2) naave2 = numaCreate(n);
if (pnum1) nanum1 = numaCreate(n);
if (pnum2) nanum2 = numaCreate(n);
maxscore = 0.0;
for (i = 0; i < n; i++) {
numaGetFValue(na, i, &val);
num1 = num1prev + val;
if (num1 == 0)
ave1 = ave1prev;
else
ave1 = (num1prev * ave1prev + i * val) / num1;
num2 = num2prev - val;
if (num2 == 0)
ave2 = ave2prev;
else
ave2 = (num2prev * ave2prev - i * val) / num2;
fract1 = num1 / sum;
score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
numaAddNumber(nascore, score);
if (pave1) numaAddNumber(naave1, ave1);
if (pave2) numaAddNumber(naave2, ave2);
if (pnum1) numaAddNumber(nanum1, num1);
if (pnum1) numaAddNumber(nanum2, num2);
if (score > maxscore) {
maxscore = score;
maxindex = i;
}
num1prev = num1;
num2prev = num2;
ave1prev = ave1;
ave2prev = ave2;
}
/* Next, for all contiguous scores within a specified fraction
* of the max, choose the split point as the value with the
* minimum in the histogram. */
minscore = (1. - scorefract) * maxscore;
for (i = maxindex - 1; i >= 0; i--) {
numaGetFValue(nascore, i, &val);
if (val < minscore)
break;
}
minrange = i + 1;
for (i = maxindex + 1; i < n; i++) {
numaGetFValue(nascore, i, &val);
if (val < minscore)
break;
}
maxrange = i - 1;
numaGetFValue(na, minrange, &minval);
bestsplit = minrange;
for (i = minrange + 1; i <= maxrange; i++) {
numaGetFValue(na, i, &val);
if (val < minval) {
minval = val;
bestsplit = i;
}
}
if (psplitindex) *psplitindex = bestsplit;
if (pave1) numaGetFValue(naave1, bestsplit, pave1);
if (pave2) numaGetFValue(naave2, bestsplit, pave2);
if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
if (pnascore) { /* debug mode */
fprintf(stderr, "minrange = %d, maxrange = %d\n", minrange, maxrange);
fprintf(stderr, "minval = %10.0f\n", minval);
gplotSimple1(nascore, GPLOT_X11, "junkoutroot",
"Score for split distribution");
*pnascore = nascore;
}
else
numaDestroy(&nascore);
if (pave1) numaDestroy(&naave1);
if (pave2) numaDestroy(&naave2);
if (pnum1) numaDestroy(&nanum1);
if (pnum2) numaDestroy(&nanum2);
return 0;
}
/*----------------------------------------------------------------------*
* Extrema finding *
*----------------------------------------------------------------------*/
/*!
* numaFindPeaks()
*
* Input: source na
* max number of peaks to be found
* fract1 (min fraction of peak value)
* fract2 (min slope)
* Return: peak na, or null on error.
*
* Notes:
* (1) The returned na consists of sets of four numbers representing
* the peak, in the following order:
* left edge; peak center; right edge; normalized peak area
*/
NUMA *
numaFindPeaks(NUMA *nas,
l_int32 nmax,
l_float32 fract1,
l_float32 fract2)
{
l_int32 i, k, n, maxloc, lloc, rloc;
l_float32 fmaxval, sum, total, newtotal, val, lastval;
l_float32 peakfract;
NUMA *na, *napeak;
PROCNAME("numaFindPeaks");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
n = numaGetCount(nas);
numaGetSum(nas, &total);
/* We munge this copy */
if ((na = numaCopy(nas)) == NULL)
return (NUMA *)ERROR_PTR("na not made", procName, NULL);
if ((napeak = numaCreate(4 * nmax)) == NULL)
return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);
for (k = 0; k < nmax; k++) {
numaGetSum(na, &newtotal);
if (newtotal == 0.0) /* sanity check */
break;
numaGetMax(na, &fmaxval, &maxloc);
sum = fmaxval;
lastval = fmaxval;
lloc = 0;
for (i = maxloc - 1; i >= 0; --i) {
numaGetFValue(na, i, &val);
if (val == 0.0) {
lloc = i + 1;
break;
}
if (val > fract1 * fmaxval) {
sum += val;
lastval = val;
continue;
}
if (lastval - val > fract2 * lastval) {
sum += val;
lastval = val;
continue;
}
lloc = i;
break;
}
lastval = fmaxval;
rloc = n - 1;
for (i = maxloc + 1; i < n; ++i) {
numaGetFValue(na, i, &val);
if (val == 0.0) {
rloc = i - 1;
break;
}
if (val > fract1 * fmaxval) {
sum += val;
lastval = val;
continue;
}
if (lastval - val > fract2 * lastval) {
sum += val;
lastval = val;
continue;
}
rloc = i;
break;
}
peakfract = sum / total;
numaAddNumber(napeak, lloc);
numaAddNumber(napeak, maxloc);
numaAddNumber(napeak, rloc);
numaAddNumber(napeak, peakfract);
for (i = lloc; i <= rloc; i++)
numaSetValue(na, i, 0.0);
}
numaDestroy(&na);
return napeak;
}
/*!
* numaFindExtrema()
*
* Input: nas (input values)
* delta (relative amount to resolve peaks and valleys)
* Return: nad (locations of extrema), or null on error
*
* Notes:
* (1) This returns a sequence of extrema (peaks and valleys).
* (2) The algorithm is analogous to that for determining
* mountain peaks. Suppose we have a local peak, with
* bumps on the side. Under what conditions can we consider
* those 'bumps' to be actual peaks? The answer: if the
* bump is separated from the peak by a saddle that is at
* least 500 feet below the bump.
* (3) Operationally, suppose we are looking for a peak.
* We are keeping the largest value we've seen since the
* last valley, and are looking for a value that is delta
* BELOW our current peak. When we find such a value,
* we label the peak, use the current value to label the
* valley, and then do the same operation in reverse (looking
* for a valley).
*/
NUMA *
numaFindExtrema(NUMA *nas,
l_float32 delta)
{
l_int32 i, n, found, loc, direction;
l_float32 startval, val, maxval, minval;
NUMA *nad;
PROCNAME("numaFindExtrema");
if (!nas)
return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
n = numaGetCount(nas);
nad = numaCreate(0);
/* We don't know if we'll find a peak or valley first,
* but use the first element of nas as the reference point.
* Break when we deviate by 'delta' from the first point. */
numaGetFValue(nas, 0, &startval);
found = FALSE;
for (i = 1; i < n; i++) {
numaGetFValue(nas, i, &val);
if (L_ABS(val - startval) >= delta) {
found = TRUE;
break;
}
}
if (!found)
return nad; /* it's empty */
/* Are we looking for a peak or a valley? */
if (val > startval) { /* peak */
direction = 1;
maxval = val;
}
else {
direction = -1;
minval = val;
}
loc = i;
/* Sweep through the rest of the array, recording alternating
* peak/valley extrema. */
for (i = i + 1; i < n; i++) {
numaGetFValue(nas, i, &val);
if (direction == 1 && val > maxval ) { /* new local max */
maxval = val;
loc = i;
}
else if (direction == -1 && val < minval ) { /* new local min */
minval = val;
loc = i;
}
else if (direction == 1 && (maxval - val >= delta)) {
numaAddNumber(nad, loc); /* save the current max location */
direction = -1; /* reverse: start looking for a min */
minval = val;
loc = i; /* current min location */
}
else if (direction == -1 && (val - minval >= delta)) {
numaAddNumber(nad, loc); /* save the current min location */
direction = 1; /* reverse: start looking for a max */
maxval = val;
loc = i; /* current max location */
}
}
/* Save the final extremum */
/* numaAddNumber(nad, loc); */
return nad;
}
/*----------------------------------------------------------------------*
* Threshold crossings and frequency analysis *
*----------------------------------------------------------------------*/
/*!
* numaSelectCrossingThreshold()
*
* Input: nax (<optional> numa of abscissa values; can be NULL)
* nay (signal)
* estthresh (estimated pixel threshold for crossing: e.g., for
* images, white <--> black; typ. ~120)
* &bestthresh (<return> robust estimate of threshold to use)
* Return: 0 if OK, 1 on error
*
* Note:
* (1) When a valid threshold is used, the number of crossings is
* a maximum, because none are missed. If no threshold intersects
* all the crossings, the crossings must be determined with
* numaCrossingsByPeaks().
* (2) @estthresh is an input estimate of the threshold that should
* be used. We compute the crossings with 41 thresholds
* (20 below and 20 above). There is a range in which the
* number of crossings is a maximum. Return a threshold
* in the center of this stable plateau of crossings.
* This can then be used with numaCrossingsByThreshold()
* to get a good estimate of crossing locations.
*/
l_int32
numaSelectCrossingThreshold(NUMA *nax,
NUMA *nay,
l_float32 estthresh,
l_float32 *pbestthresh)
{
l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
l_int32 val, maxval, nmax, count;
l_float32 thresh, fmaxval, fmodeval;
NUMA *nat, *nac;
PROCNAME("numaSelectCrossingThreshold");
if (!nay)
return ERROR_INT("nay not defined", procName, 1);
/* Compute the number of crossings for different thresholds */
nat = numaCreate(41);
for (i = 0; i < 41; i++) {
thresh = estthresh - 80.0 + 4.0 * i;
nac = numaCrossingsByThreshold(nax, nay, thresh);
numaAddNumber(nat, numaGetCount(nac));
numaDestroy(&nac);
}
/* Find the center of the plateau of max crossings, which
* extends from thresh[istart] to thresh[iend]. */
numaGetMax(nat, &fmaxval, NULL);
maxval = (l_int32)fmaxval;
nmax = 0;
for (i = 0; i < 41; i++) {
numaGetIValue(nat, i, &val);
if (val == maxval)
nmax++;
}
if (nmax < 3) { /* likely accidental max; try the mode */
numaGetMode(nat, &fmodeval, &count);
if (count > nmax && fmodeval > 0.5 * fmaxval)
maxval = (l_int32)fmodeval; /* use the mode */
}
inrun = FALSE;
iend = 40;
maxrunlen = 0;
for (i = 0; i < 41; i++) {
numaGetIValue(nat, i, &val);
if (val == maxval) {
if (!inrun) {
istart = i;
inrun = TRUE;
}
continue;
}
if (inrun && (val != maxval)) {
iend = i - 1;
runlen = iend - istart + 1;
inrun = FALSE;
if (runlen > maxrunlen) {
maxstart = istart;
maxend = iend;
maxrunlen = runlen;
}
}
}
if (inrun) {
runlen = i - istart;
if (runlen > maxrunlen) {
maxstart = istart;
maxend = i - 1;
maxrunlen = runlen;
}
}
#if 0
foundfirst = FALSE;
iend = 40;
for (i = 0; i < 41; i++) {
numaGetIValue(nat, i, &val);
if (val == maxval) {
if (!foundfirst) {
istart = i;
foundfirst = TRUE;
}
}
if ((val != maxval) && foundfirst) {
iend = i - 1;
break;
}
}
nmax = iend - istart + 1;
#endif
*pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
#if DEBUG_CROSSINGS
fprintf(stderr, "\nCrossings attain a maximum at %d thresholds, between:\n"
" thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
maxend, estthresh - 80.0 + 4.0 * maxend);
fprintf(stderr, "The best choice: %5.1f\n", *pbestthresh);
fprintf(stderr, "Number of crossings at the 41 thresholds:");
numaWriteStream(stderr, nat);
#endif /* DEBUG_CROSSINGS */
numaDestroy(&nat);
return 0;
}
/*!
* numaCrossingsByThreshold()
*
* Input: nax (<optional> numa of abscissa values; can be NULL)
* nay (numa of ordinate values, corresponding to nax)
* thresh (threshold value for nay)
* Return: nad (abscissa pts at threshold), or null on error
*
* Notes:
* (1) If nax == NULL, we use startx and delx from nay to compute
* the crossing values in nad.
*/
NUMA *
numaCrossingsByThreshold(NUMA *nax,
NUMA *nay,
l_float32 thresh)
{
l_int32 i, n;
l_float32 startx, delx;
l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
NUMA *nad;
PROCNAME("numaCrossingsByThreshold");
if (!nay)
return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
n = numaGetCount(nay);
if (nax && (numaGetCount(nax) != n))
return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
nad = numaCreate(0);
numaGetFValue(nay, 0, &yval1);
numaGetXParameters(nay, &startx, &delx);
if (nax)
numaGetFValue(nax, 0, &xval1);
else
xval1 = startx;
for (i = 1; i < n; i++) {
numaGetFValue(nay, i, &yval2);
if (nax)
numaGetFValue(nax, i, &xval2);
else
xval2 = startx + i * delx;
delta1 = yval1 - thresh;
delta2 = yval2 - thresh;
if (delta1 == 0.0)
numaAddNumber(nad, xval1);
else if (delta2 == 0.0)
numaAddNumber(nad, xval2);
else if (delta1 * delta2 < 0.0) { /* crossing */
fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
crossval = xval1 + fract * (xval2 - xval1);
numaAddNumber(nad, crossval);
}
xval1 = xval2;
yval1 = yval2;
}
return nad;
}
/*!
* numaCrossingsByPeaks()
*
* Input: nax (<optional> numa of abscissa values)
* nay (numa of ordinate values, corresponding to nax)
* delta (parameter used to identify when a new peak can be found)
* Return: nad (abscissa pts at threshold), or null on error
*
* Notes:
* (1) If nax == NULL, we use startx and delx from nay to compute
* the crossing values in nad.
*/
NUMA *
numaCrossingsByPeaks(NUMA *nax,
NUMA *nay,
l_float32 delta)
{
l_int32 i, j, n, np, previndex, curindex;
l_float32 startx, delx;
l_float32 xval1, xval2, yval1, yval2, delta1, delta2;
l_float32 prevval, curval, thresh, crossval, fract;
NUMA *nap, *nad;
PROCNAME("numaCrossingsByPeaks");
if (!nax)
return (NUMA *)ERROR_PTR("nax not defined", procName, NULL);
if (!nay)
return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
n = numaGetCount(nax);
if (numaGetCount(nay) != n)
return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
/* Find the extrema. Also add last point in nay to get
* the last transition (from the last peak to the end).
* The number of crossings is 1 more than the number of extrema. */
nap = numaFindExtrema(nay, delta);
numaAddNumber(nap, n - 1);
np = numaGetCount(nap);
L_INFO_INT("Number of crossings: %d", procName, np);
/* Do all computation in index units of nax */
nad = numaCreate(np); /* output crossings, in nax units */
previndex = 0; /* prime the search with 1st point */
numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */
numaGetXParameters(nay, &startx, &delx);
for (i = 0; i < np; i++) {
numaGetIValue(nap, i, &curindex);
numaGetFValue(nay, curindex, &curval);
thresh = (prevval + curval) / 2.0;
/* fprintf(stderr, "thresh[%d] = %7.3f\n", i, thresh); */
if (nax)
numaGetFValue(nax, previndex, &xval1);
else
xval1 = startx + previndex * delx;
numaGetFValue(nay, previndex, &yval1);
for (j = previndex + 1; j <= curindex; j++) {
if (nax)
numaGetFValue(nax, j, &xval2);
else
xval2 = startx + j * delx;
numaGetFValue(nay, j, &yval2);
delta1 = yval1 - thresh;
delta2 = yval2 - thresh;
if (delta1 == 0.0) {
numaAddNumber(nad, xval1);
break;
}
else if (delta2 == 0.0) {
numaAddNumber(nad, xval2);
break;
}
else if (delta1 * delta2 < 0.0) { /* crossing */
fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
crossval = xval1 + fract * (xval2 - xval1);
numaAddNumber(nad, crossval);
break;
}
xval1 = xval2;
yval1 = yval2;
}
previndex = curindex;
prevval = curval;
}
numaDestroy(&nap);
return nad;
}
/*!
* numaEvalBestHaarParameters()
*
* Input: nas (numa of non-negative signal values)
* relweight (relative weight of (-1 comb) / (+1 comb)
* contributions to the 'convolution'. In effect,
* the convolution kernel is a comb consisting of
* alternating +1 and -weight.)
* nwidth (number of widths to consider)
* nshift (number of shifts to consider for each width)
* minwidth (smallest width to consider)
* maxwidth (largest width to consider)
* &bestwidth (<return> width giving largest score)
* &bestshift (<return> shift giving largest score)
* &bestscore (<optional return> convolution with
* "Haar"-like comb)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This does a linear sweep of widths, evaluating at @nshift
* shifts for each width, computing the score from a convolution
* with a long comb, and finding the (width, shift) pair that
* gives the maximum score. The best width is the "half-wavelength"
* of the signal.
* (2) The convolving function is a comb of alternating values
* +1 and -1 * relweight, separated by the width and phased by
* the shift. This is similar to a Haar transform, except
* there the convolution is performed with a square wave.
* (3) The function is useful for finding the line spacing
* and strength of line signal from pixel sum projections.
* (4) The score is normalized to the size of nas divided by
* the number of half-widths. For image applications, the input is
* typically an array of pixel projections, so one should
* normalize by dividing the score by the image width in the
* pixel projection direction.
*/
l_int32
numaEvalBestHaarParameters(NUMA *nas,
l_float32 relweight,
l_int32 nwidth,
l_int32 nshift,
l_float32 minwidth,
l_float32 maxwidth,
l_float32 *pbestwidth,
l_float32 *pbestshift,
l_float32 *pbestscore)
{
l_int32 i, j;
l_float32 delwidth, delshift, width, shift, score;
l_float32 bestwidth, bestshift, bestscore;
PROCNAME("numaEvalBestHaarParameters");
if (!nas)
return ERROR_INT("nas not defined", procName, 1);
if (!pbestwidth || !pbestshift)
return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);
bestscore = 0.0;
delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
for (i = 0; i < nwidth; i++) {
width = minwidth + delwidth * i;
delshift = width / (l_float32)(nshift);
for (j = 0; j < nshift; j++) {
shift = j * delshift;
numaEvalHaarSum(nas, width, shift, relweight, &score);
if (score > bestscore) {
bestscore = score;
bestwidth = width;
bestshift = shift;
#if DEBUG_FREQUENCY
fprintf(stderr, "width = %7.3f, shift = %7.3f, score = %7.3f\n",
width, shift, score);
#endif /* DEBUG_FREQUENCY */
}
}
}
*pbestwidth = bestwidth;
*pbestshift = bestshift;
if (pbestscore)
*pbestscore = bestscore;
return 0;
}
/*!
* numaEvalHaarSum()
*
* Input: nas (numa of non-negative signal values)
* width (distance between +1 and -1 in convolution comb)
* shift (phase of the comb: location of first +1)
* relweight (relative weight of (-1 comb) / (+1 comb)
* contributions to the 'convolution'. In effect,
* the convolution kernel is a comb consisting of
* alternating +1 and -weight.)
* &score (<return> convolution with "Haar"-like comb)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This does a convolution with a comb of alternating values
* +1 and -relweight, separated by the width and phased by the shift.
* This is similar to a Haar transform, except that for Haar,
* (1) the convolution kernel is symmetric about 0, so the
* relweight is 1.0, and
* (2) the convolution is performed with a square wave.
* (2) The score is normalized to the size of nas divided by
* twice the "width". For image applications, the input is
* typically an array of pixel projections, so one should
* normalize by dividing the score by the image width in the
* pixel projection direction.
* (3) To get a Haar-like result, use relweight = 1.0. For detecting
* signals where you expect every other sample to be close to
* zero, as with barcodes or filtered text lines, you can
* use relweight > 1.0.
*/
l_int32
numaEvalHaarSum(NUMA *nas,
l_float32 width,
l_float32 shift,
l_float32 relweight,
l_float32 *pscore)
{
l_int32 i, n, nsamp, index;
l_float32 score, weight, val;
PROCNAME("numaEvalHaarSum");
if (!pscore)
return ERROR_INT("&score not defined", procName, 1);
*pscore = 0.0;
if (!nas)
return ERROR_INT("nas not defined", procName, 1);
if ((n = numaGetCount(nas)) < 2 * width)
return ERROR_INT("nas size too small", procName, 1);
score = 0.0;
nsamp = (l_int32)((n - shift) / width);
for (i = 0; i < nsamp; i++) {
index = (l_int32)(shift + i * width);
weight = (i % 2) ? 1.0 : -1.0 * relweight;
numaGetFValue(nas, index, &val);
score += weight * val;
}
*pscore = 2.0 * width * score / (l_float32)n;
return 0;
}