blob: 94c234c6fb0d0827624c39b2ee341a9232752815 [file] [log] [blame]
/*====================================================================*
- Copyright (C) 2001 Leptonica. All rights reserved.
- This software is distributed in the hope that it will be
- useful, but with NO WARRANTY OF ANY KIND.
- No author or distributor accepts responsibility to anyone for the
- consequences of using this software, or for whether it serves any
- particular purpose or works at all, unless he or she says so in
- writing. Everyone is granted permission to copy, modify and
- redistribute this source code, for commercial or non-commercial
- purposes, with the following restrictions: (1) the origin of this
- source code must not be misrepresented; (2) modified versions must
- be plainly marked as such; and (3) this notice may not be removed
- or altered from any source or modified source distribution.
*====================================================================*/
/*
* jbclass.c
*
* These are functions for unsupervised classification of
* collections of connected components -- either characters or
* words -- in binary images. They can be used as image
* processing steps in jbig2 compression.
*
* Initialization
*
* JBCLASSER *jbRankHausInit() [rank hausdorff encoder]
* JBCLASSER *jbCorrelationInit() [correlation encoder]
* JBCLASSER *jbCorrelationInitWithoutComponents() [ditto]
* static JBCLASSER *jbCorrelationInitInternal()
*
* Classify the pages
*
* l_int32 jbAddPages()
* l_int32 jbAddPage()
* l_int32 jbAddPageComponents()
*
* Rank hausdorff classifier
*
* l_int32 jbClassifyRankHaus()
* l_int32 pixHaustest()
* l_int32 pixRankHaustest()
*
* Binary correlation classifier
*
* l_int32 jbClassifyCorrelation()
*
* Determine the image components we start with
*
* l_int32 jbGetComponents()
* PIX *pixWordMaskByDilation()
*
* Build grayscale composites (templates)
*
* PIXA *jbAccumulateComposites
* PIXA *jbTemplatesFromComposites
*
* Utility functions for Classer
*
* JBCLASSER *jbClasserCreate()
* void jbClasserDestroy()
*
* Utility functions for Data
*
* JBDATA *jbDataSave()
* void jbDataDestroy()
* l_int32 jbDataWrite()
* JBDATA *jbDataRead()
* PIXA *jbDataRender()
* l_int32 jbGetULCorners()
* l_int32 jbGetLLCorners()
*
* Static helpers
*
* static JBFINDCTX *findSimilarSizedTemplatesInit()
* static l_int32 findSimilarSizedTemplatesNext()
* static void findSimilarSizedTemplatesDestroy()
* static l_int32 finalPositioningForAlignment()
*
* Note: this is NOT an implementation of the JPEG jbig2
* proposed standard encoder, the specifications for which
* can be found at http://www.jpeg.org/jbigpt2.html.
* (See below for a full implementation.)
* It is an implementation of the lower-level part of an encoder that:
*
* (1) identifies connected components that are going to be used
* (2) puts them in similarity classes (this is an unsupervised
* classifier), and
* (3) stores the result in a simple file format (2 files,
* one for templates and one for page/coordinate/template-index
* quartets).
*
* An actual implementation of the official jbig2 encoder could
* start with parts (1) and (2), and would then compress the quartets
* according to the standards requirements (e.g., Huffman or
* arithmetic coding of coordinate differences and image templates).
*
* The low-level part of the encoder provided here has the
* following useful features:
*
* - It is accurate in the identification of templates
* and classes because it uses a windowed hausdorff
* distance metric.
* - It is accurate in the placement of the connected
* components, doing a two step process of first aligning
* the the centroids of the template with those of each instance,
* and then making a further correction of up to +- 1 pixel
* in each direction to best align the templates.
* - It is fast because it uses a morphologically based
* matching algorithm to implement the hausdorff criterion,
* and it selects the patterns that are possible matches
* based on their size.
*
* We provide two different matching functions, one using Hausdorff
* distance and one using a simple image correlation.
* The Hausdorff method sometimes produces better results for the
* same number of classes, because it gives a relatively small
* effective weight to foreground pixels near the boundary,
* and a relatively large weight to foreground pixels that are
* not near the boundary. By effectively ignoring these boundary
* pixels, Hausdorff weighting corresponds better to the expected
* probabilities of the pixel values in a scanned image, where the
* variations in instances of the same printed character are much
* more likely to be in pixels near the boundary. By contrast,
* the correlation method gives equal weight to all foreground pixels.
*
* For best results, use the correlation method. Correlation takes
* the number of fg pixels in the AND of instance and template,
* divided by the product of the number of fg pixels in instance
* and template. It compares this with a threshold that, in
* general, depends on the fractional coverage of the template.
* For heavy text, the threshold is raised above that for light
* text, By using both these parameters (basic threshold and
* adjustment factor for text weight), one has more flexibility
* and can arrive at the fewest substitution errors, although
* this comes at the price of more templates.
*
* The strict Hausdorff scoring is not a rank weighting, because a
* single pixel beyond the given distance will cause a match
* failure. A rank Hausdorff is more robust to non-boundary noise,
* but it is also more susceptible to confusing components that
* should be in different classes. For implementing a jbig2
* application for visually lossless binary image compression,
* you have two choices:
*
* (1) use a 3x3 structuring element (size = 3) and a strict
* Hausdorff comparison (rank = 1.0 in the rank Hausdorff
* function). This will result in a minimal number of classes,
* but confusion of small characters, such as italic and
* non-italic lower-case 'o', can still occur.
* (2) use the correlation method with a threshold of 0.85
* and a weighting factor of about 0.7. This will result in
* a larger number of classes, but should not be confused
* either by similar small characters or by extremely
* thick sans serif characters, such as in prog/cootoots.png.
*
* As mentioned above, if visual substitution errors must be
* avoided, you should use the correlation method.
*
* We provide executables that show how to do the encoding:
* prog/jbrankhaus.c
* prog/jbcorrelation.c
*
* The basic flow for correlation classification goes as follows,
* where specific choices have been made for parameters (Hausdorff
* is the same except for initialization):
*
* // Initialize and save data in the classer
* JBCLASSER *classer =
* jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7);
* SARRAY *safiles = getSortedPathnamesInDirectory(directory,
* NULL, 0, 0);
* jbAddPages(classer, safiles);
*
* // Save the data in a data structure for serialization,
* // and write it into two files.
* JBDATA *data = jbDataSave(classer);
* jbDataWrite(rootname, data);
*
* // Reconstruct (render) the pages from the encoded data.
* PIXA *pixa = jbDataRender(data, FALSE);
*
* Adam Langley has recently built a jbig2 standards-compliant
* encoder, the first one to appear in open source. You can get
* this encoder at:
*
* http://www.imperialviolet.org/jbig2.html
*
* It uses arithmetic encoding throughout. It encodes binary images
* losslessly with a single arithmetic coding over the full image.
* It also does both lossy and lossless encoding from connected
* components, using leptonica to generate the templates representing
* each cluster.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "allheaders.h"
/* MSVC can't handle arrays dimensioned by static const integers */
#define L_BUF_SIZE 512
/* For jbClassifyRankHaus(): size of border added around
* pix of each c.c., to allow further processing. This
* should be at least the sum of the MAX_DIFF_HEIGHT
* (or MAX_DIFF_WIDTH) and one-half the size of the Sel */
static const l_int32 JB_ADDED_PIXELS = 6;
/* For pixHaustest(), pixRankHaustest() and pixCorrelationScore():
* choose these to be 2 or greater */
static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */
static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */
/* In initialization, you have the option to discard components
* (cc, characters or words) that have either width or height larger
* than a given size. This is convenient for jbDataSave(), because
* the components are placed onto a regular lattice with cell
* dimension equal to the maximum component size. The default
* values are given here. If you want to save all components,
* use a sufficiently large set of dimensions. */
static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */
static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */
static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */
static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */
/* Max allowed dilation to merge characters into words */
#define MAX_ALLOWED_DILATION 14
/* This stores the state of a state machine which fetches
* similar sized templates */
struct JbFindTemplatesState
{
JBCLASSER *classer; /* classer */
l_int32 w; /* desired width */
l_int32 h; /* desired height */
l_int32 i; /* index into two_by_two step array */
NUMA *numa; /* current number array */
l_int32 n; /* current element of numa */
};
typedef struct JbFindTemplatesState JBFINDCTX;
/* Static initialization function */
static JBCLASSER * jbCorrelationInitInternal(l_int32 components,
l_int32 maxwidth, l_int32 maxheight, l_float32 thresh,
l_float32 weightfactor, l_int32 keep_components);
/* Static helper functions */
static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs);
static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context);
static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext);
static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y,
l_int32 idelx, l_int32 idely, PIX *pixt,
l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy);
#ifndef NO_CONSOLE_IO
#define DEBUG_PLOT_CC 0
#define DEBUG_CORRELATION_SCORE 0
#endif /* ~NO_CONSOLE_IO */
/*----------------------------------------------------------------------*
* Initialization *
*----------------------------------------------------------------------*/
/*!
* jbRankHausInit()
*
* Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
* maxwidth (of component; use 0 for default)
* maxheight (of component; use 0 for default)
* size (of square structuring element; 2, representing
* 2x2 sel, is necessary for reasonable accuracy of
* small components; combine this with rank ~ 0.97
* to avoid undue class expansion)
* rank (rank val of match, each way; in [0.5 - 1.0];
* when using size = 2, 0.97 is a reasonable value)
* Return: jbclasser if OK; NULL on error
*/
JBCLASSER *
jbRankHausInit(l_int32 components,
l_int32 maxwidth,
l_int32 maxheight,
l_int32 size,
l_float32 rank)
{
JBCLASSER *classer;
PROCNAME("jbRankHausInit");
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
components != JB_WORDS)
return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
if (size < 1 || size > 10)
return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL);
if (rank < 0.5 || rank > 1.0)
return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL);
if (maxwidth == 0) {
if (components == JB_CONN_COMPS)
maxwidth = MAX_CONN_COMP_WIDTH;
else if (components == JB_CHARACTERS)
maxwidth = MAX_CHAR_COMP_WIDTH;
else /* JB_WORDS */
maxwidth = MAX_WORD_COMP_WIDTH;
}
if (maxheight == 0)
maxheight = MAX_COMP_HEIGHT;
if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL)
return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
classer->maxwidth = maxwidth;
classer->maxheight = maxheight;
classer->sizehaus = size;
classer->rankhaus = rank;
classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */
return classer;
}
/*!
* jbCorrelationInit()
*
* Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
* maxwidth (of component; use 0 for default)
* maxheight (of component; use 0 for default)
* thresh (value for correlation score: in [0.4 - 0.9])
* weightfactor (corrects thresh for thick characters [0.0 - 1.0])
* Return: jbclasser if OK; NULL on error
*/
JBCLASSER *
jbCorrelationInit(l_int32 components,
l_int32 maxwidth,
l_int32 maxheight,
l_float32 thresh,
l_float32 weightfactor)
{
return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
weightfactor, 1);
}
/*!
* jbCorrelationInitWithoutComponents()
*
* Input: same as jbCorrelationInit
* Output: same as jbCorrelationInit
*
* Note: acts the same as jbCorrelationInit(), but the resulting
* object doesn't keep a list of all the components.
*/
JBCLASSER *
jbCorrelationInitWithoutComponents(l_int32 components,
l_int32 maxwidth,
l_int32 maxheight,
l_float32 thresh,
l_float32 weightfactor)
{
return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
weightfactor, 0);
}
static JBCLASSER *
jbCorrelationInitInternal(l_int32 components,
l_int32 maxwidth,
l_int32 maxheight,
l_float32 thresh,
l_float32 weightfactor,
l_int32 keep_components)
{
JBCLASSER *classer;
PROCNAME("jbCorrelationInitInternal");
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
components != JB_WORDS)
return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
if (thresh < 0.4 || thresh > 0.9)
return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.9]",
procName, NULL);
if (weightfactor < 0.0 || weightfactor > 1.0)
return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]",
procName, NULL);
if (maxwidth == 0) {
if (components == JB_CONN_COMPS)
maxwidth = MAX_CONN_COMP_WIDTH;
else if (components == JB_CHARACTERS)
maxwidth = MAX_CHAR_COMP_WIDTH;
else /* JB_WORDS */
maxwidth = MAX_WORD_COMP_WIDTH;
}
if (maxheight == 0)
maxheight = MAX_COMP_HEIGHT;
if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL)
return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
classer->maxwidth = maxwidth;
classer->maxheight = maxheight;
classer->thresh = thresh;
classer->weightfactor = weightfactor;
classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */
classer->keep_pixaa = keep_components;
return classer;
}
/*----------------------------------------------------------------------*
* Classify the pages *
*----------------------------------------------------------------------*/
/*!
* jbAddPages()
*
* Input: jbclasser
* safiles (of page image file names)
* Return: 0 if OK; 1 on error
*
* Note:
* (1) jbclasser makes a copy of the array of file names.
* (2) The caller is still responsible for destroying the input array.
*/
l_int32
jbAddPages(JBCLASSER *classer,
SARRAY *safiles)
{
l_int32 i, nfiles;
char *fname;
PIX *pix;
PROCNAME("jbAddPages");
if (!classer)
return ERROR_INT("classer not defined", procName, 1);
if (!safiles)
return ERROR_INT("safiles not defined", procName, 1);
classer->safiles = sarrayCopy(safiles);
nfiles = sarrayGetCount(safiles);
for (i = 0; i < nfiles; i++) {
fname = sarrayGetString(safiles, i, 0);
if ((pix = pixRead(fname)) == NULL) {
L_WARNING_INT("image file %d not read", procName, i);
continue;
}
jbAddPage(classer, pix);
pixDestroy(&pix);
}
return 0;
}
/*!
* jbAddPage()
*
* Input: jbclasser
* pixs (of input page)
* Return: 0 if OK; 1 on error
*/
l_int32
jbAddPage(JBCLASSER *classer,
PIX *pixs)
{
BOXA *boxas;
PIXA *pixas;
PROCNAME("jbAddPage");
if (!classer)
return ERROR_INT("classer not defined", procName, 1);
if (!pixs)
return ERROR_INT("pix not defined", procName, 1);
classer->w = pixGetWidth(pixs);
classer->h = pixGetHeight(pixs);
/* Get the appropriate components and their bounding boxes */
if (jbGetComponents(pixs, classer->components, classer->maxwidth,
classer->maxheight, &boxas, &pixas)) {
return ERROR_INT("components not made", procName, 1);
}
jbAddPageComponents(classer, pixs, boxas, pixas);
boxaDestroy(&boxas);
pixaDestroy(&pixas);
return 0;
}
/*!
* jbAddPageComponents()
*
* Input: jbclasser
* pixs (of input page)
* boxas (b.b. of components for this page)
* pixas (components for this page)
* Return: 0 if OK; 1 on error
*
* Notes:
* (1) If there are no components on the page, we don't require input
* of empty boxas or pixas, although that's the typical situation.
*/
l_int32
jbAddPageComponents(JBCLASSER *classer,
PIX *pixs,
BOXA *boxas,
PIXA *pixas)
{
l_int32 n;
PROCNAME("jbAddPageComponents");
if (!classer)
return ERROR_INT("classer not defined", procName, 1);
if (!pixs)
return ERROR_INT("pix not defined", procName, 1);
/* Test for no components on the current page. Always update the
* number of pages processed, even if nothing is on it. */
if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) {
classer->npages++;
return 0;
}
/* Get classes. For hausdorff, it uses a specified size of
* structuring element and specified rank. For correlation,
* it uses a specified threshold. */
if (classer->method == JB_RANKHAUS) {
if (jbClassifyRankHaus(classer, boxas, pixas))
return ERROR_INT("rankhaus classification failed", procName, 1);
}
else { /* classer->method == JB_CORRELATION */
if (jbClassifyCorrelation(classer, boxas, pixas))
return ERROR_INT("correlation classification failed", procName, 1);
}
/* Find the global UL corners, adjusted for each instance so
* that the class template and instance will have their
* centroids in the same place. Then the template can be
* used to replace the instance. */
if (jbGetULCorners(classer, pixs, boxas))
return ERROR_INT("UL corners not found", procName, 1);
/* Update total component counts and number of pages processed. */
n = boxaGetCount(boxas);
classer->baseindex += n;
numaAddNumber(classer->nacomps, n);
classer->npages++;
return 0;
}
/*----------------------------------------------------------------------*
* Classification using windowed rank hausdorff metric *
*----------------------------------------------------------------------*/
/*!
* jbClassifyRankHaus()
*
* Input: jbclasser
* boxa (of new components for classification)
* pixas (of new components for classification)
* Return: 0 if OK; 1 on error
*/
l_int32
jbClassifyRankHaus(JBCLASSER *classer,
BOXA *boxa,
PIXA *pixas)
{
l_int32 n, nt, i, wt, ht, iclass, size, found, testval;
l_int32 *sumtab;
l_int32 npages, area1, area3;
l_int32 *tab8;
l_float32 rank, x1, y1, x2, y2;
BOX *box;
NUMA *naclass, *napage;
NUMA *nafg; /* fg area of all instances */
NUMA *nafgt; /* fg area of all templates */
JBFINDCTX *findcontext;
NUMAHASH *nahash;
PIX *pix, *pix1, *pix2, *pix3, *pix4;
PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd;
PIXAA *pixaa;
PTA *pta, *ptac, *ptact;
SEL *sel;
PROCNAME("jbClassifyRankHaus");
if (!classer)
return ERROR_INT("classer not found", procName, 1);
if (!boxa)
return ERROR_INT("boxa not found", procName, 1);
if (!pixas)
return ERROR_INT("pixas not found", procName, 1);
npages = classer->npages;
size = classer->sizehaus;
sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT);
/* Generate the bordered pixa, with and without dilation.
* pixa1 and pixa2 contain all the input components. */
n = pixaGetCount(pixas);
pixa1 = pixaCreate(n);
pixa2 = pixaCreate(n);
for (i = 0; i < n; i++) {
pix = pixaGetPix(pixas, i, L_CLONE);
pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
pix2 = pixDilate(NULL, pix1, sel);
pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */
pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */
pixDestroy(&pix);
}
/* Get the centroids of all the bordered images.
* These are relative to the UL corner of each (bordered) pix. */
pta = pixaCentroids(pixa1); /* centroids for this page; use here */
ptac = classer->ptac; /* holds centroids of components up to this page */
ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */
ptact = classer->ptact; /* holds centroids of templates */
/* Use these to save the class and page of each component. */
naclass = classer->naclass;
napage = classer->napage;
sumtab = makePixelSumTab8();
/* Store the unbordered pix in a pixaa, in a hierarchical
* set of arrays. There is one pixa for each class,
* and the pix in each pixa are all the instances found
* of that class. This is actually more than one would need
* for a jbig2 encoder, but there are two reasons to keep
* them around: (1) the set of instances for each class
* can be used to make an improved binary (or, better,
* a grayscale) template, rather than simply using the first
* one in the set; (2) we can investigate the failures
* of the classifier. This pixaa grows as we process
* successive pages. */
pixaa = classer->pixaa;
/* arrays to store class exemplars (templates) */
pixat = classer->pixat; /* un-dilated */
pixatd = classer->pixatd; /* dilated */
/* Fill up the pixaa tree with the template exemplars as
* the first pix in each pixa. As we add each pix,
* we also add the associated box to the pixa.
* We also keep track of the centroid of each pix,
* and use the difference between centroids (of the
* pix with the exemplar we are checking it with)
* to align the two when checking that the Hausdorff
* distance does not exceed a threshold.
* The threshold is set by the Sel used for dilating.
* For example, a 3x3 brick, sel_3, corresponds to a
* Hausdorff distance of 1. In general, for an NxN brick,
* with N odd, corresponds to a Hausdorff distance of (N - 1)/2.
* It turns out that we actually need to use a sel of size 2x2
* to avoid small bad components when there is a halftone image
* from which components can be chosen.
* The larger the Sel you use, the fewer the number of classes,
* and the greater the likelihood of putting semantically
* different objects in the same class. For simplicity,
* we do this separately for the case of rank == 1.0 (exact
* match within the Hausdorff distance) and rank < 1.0. */
rank = classer->rankhaus;
nahash = classer->nahash;
if (rank == 1.0) {
for (i = 0; i < n; i++) {
pix1 = pixaGetPix(pixa1, i, L_CLONE);
pix2 = pixaGetPix(pixa2, i, L_CLONE);
ptaGetPt(pta, i, &x1, &y1);
nt = pixaGetCount(pixat); /* number of templates */
found = FALSE;
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
/* Find score for this template */
pix3 = pixaGetPix(pixat, iclass, L_CLONE);
pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
ptaGetPt(ptact, iclass, &x2, &y2);
testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2,
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT);
pixDestroy(&pix3);
pixDestroy(&pix4);
if (testval == 1) {
found = TRUE;
numaAddNumber(naclass, iclass);
numaAddNumber(napage, npages);
if (classer->keep_pixaa) {
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
pix = pixaGetPix(pixas, i, L_CLONE);
pixaAddPix(pixa, pix, L_INSERT);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaDestroy(&pixa);
}
break;
}
}
findSimilarSizedTemplatesDestroy(&findcontext);
if (found == FALSE) { /* new class */
numaAddNumber(naclass, nt);
numaAddNumber(napage, npages);
pixa = pixaCreate(0);
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
pixaAddPix(pixa, pix, L_INSERT);
wt = pixGetWidth(pix);
ht = pixGetHeight(pix);
numaHashAdd(nahash, ht * wt, nt);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
ptaAddPt(ptact, x1, y1);
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */
}
else { /* don't save them */
pixDestroy(&pix1);
pixDestroy(&pix2);
}
}
}
else { /* rank < 1.0 */
if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */
return ERROR_INT("nafg not made", procName, 1);
nafgt = classer->nafgt;
tab8 = makePixelSumTab8();
for (i = 0; i < n; i++) { /* all instances on this page */
pix1 = pixaGetPix(pixa1, i, L_CLONE);
numaGetIValue(nafg, i, &area1);
pix2 = pixaGetPix(pixa2, i, L_CLONE);
ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */
nt = pixaGetCount(pixat); /* number of templates */
found = FALSE;
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
/* Find score for this template */
pix3 = pixaGetPix(pixat, iclass, L_CLONE);
numaGetIValue(nafgt, iclass, &area3);
pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
ptaGetPt(ptact, iclass, &x2, &y2);
testval = pixRankHaustest(pix1, pix2, pix3, pix4,
x1 - x2, y1 - y2,
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
area1, area3, rank, tab8);
pixDestroy(&pix3);
pixDestroy(&pix4);
if (testval == 1) { /* greedy match; take the first */
found = TRUE;
numaAddNumber(naclass, iclass);
numaAddNumber(napage, npages);
if (classer->keep_pixaa) {
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
pix = pixaGetPix(pixas, i, L_CLONE);
pixaAddPix(pixa, pix, L_INSERT);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaDestroy(&pixa);
}
break;
}
}
findSimilarSizedTemplatesDestroy(&findcontext);
if (found == FALSE) { /* new class */
numaAddNumber(naclass, nt);
numaAddNumber(napage, npages);
pixa = pixaCreate(0);
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
pixaAddPix(pixa, pix, L_INSERT);
wt = pixGetWidth(pix);
ht = pixGetHeight(pix);
numaHashAdd(nahash, ht * wt, nt);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
ptaAddPt(ptact, x1, y1);
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */
numaAddNumber(nafgt, area1);
}
else { /* don't save them */
pixDestroy(&pix1);
pixDestroy(&pix2);
}
}
FREE(tab8);
numaDestroy(&nafg);
}
classer->nclass = pixaGetCount(pixat);
FREE(sumtab);
ptaDestroy(&pta);
pixaDestroy(&pixa1);
pixaDestroy(&pixa2);
selDestroy(&sel);
return 0;
}
/*!
* pixHaustest()
*
* Input: pix1 (new pix, not dilated)
* pix2 (new pix, dilated)
* pix3 (exemplar pix, not dilated)
* pix4 (exemplar pix, dilated)
* delx (x comp of centroid difference)
* dely (y comp of centroid difference)
* maxdiffw (max width difference of pix1 and pix2)
* maxdiffh (max height difference of pix1 and pix2)
* Return: 0 (FALSE) if no match, 1 (TRUE) if the new
* pix is in the same class as the exemplar.
*
* Note: we check first that the two pix are roughly
* the same size. Only if they meet that criterion do
* we compare the bitmaps. The Hausdorff is a 2-way
* check. The centroid difference is used to align the two
* images to the nearest integer for each of the checks.
* These check that the dilated image of one contains
* ALL the pixels of the undilated image of the other.
* Checks are done in both direction. A single pixel not
* contained in either direction results in failure of the test.
*/
l_int32
pixHaustest(PIX *pix1,
PIX *pix2,
PIX *pix3,
PIX *pix4,
l_float32 delx, /* x(1) - x(3) */
l_float32 dely, /* y(1) - y(3) */
l_int32 maxdiffw,
l_int32 maxdiffh)
{
l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
PIX *pixt;
/* Eliminate possible matches based on size difference */
wi = pixGetWidth(pix1);
hi = pixGetHeight(pix1);
wt = pixGetWidth(pix3);
ht = pixGetHeight(pix3);
delw = L_ABS(wi - wt);
if (delw > maxdiffw)
return FALSE;
delh = L_ABS(hi - ht);
if (delh > maxdiffh)
return FALSE;
/* Round difference in centroid location to nearest integer;
* use this as a shift when doing the matching. */
if (delx >= 0)
idelx = (l_int32)(delx + 0.5);
else
idelx = (l_int32)(delx - 0.5);
if (dely >= 0)
idely = (l_int32)(dely + 0.5);
else
idely = (l_int32)(dely - 0.5);
/* Do 1-direction hausdorff, checking that every pixel in pix1
* is within a dilation distance of some pixel in pix3. Namely,
* that pix4 entirely covers pix1:
* pixt = pixSubtract(NULL, pix1, pix4), including shift
* where pixt has no ON pixels. */
pixt = pixCreateTemplate(pix1);
pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
pix4, 0, 0);
pixZero(pixt, &boolmatch);
if (boolmatch == 0) {
pixDestroy(&pixt);
return FALSE;
}
/* Do 1-direction hausdorff, checking that every pixel in pix3
* is within a dilation distance of some pixel in pix1. Namely,
* that pix2 entirely covers pix3:
* pixSubtract(pixt, pix3, pix2), including shift
* where pixt has no ON pixels. */
pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
pixZero(pixt, &boolmatch);
pixDestroy(&pixt);
return boolmatch;
}
/*!
* pixRankHaustest()
*
* Input: pix1 (new pix, not dilated)
* pix2 (new pix, dilated)
* pix3 (exemplar pix, not dilated)
* pix4 (exemplar pix, dilated)
* delx (x comp of centroid difference)
* dely (y comp of centroid difference)
* maxdiffw (max width difference of pix1 and pix2)
* maxdiffh (max height difference of pix1 and pix2)
* area1 (fg pixels in pix1)
* area3 (fg pixels in pix3)
* rank (rank value of test, each way)
* tab8 (table of pixel sums for byte)
* Return: 0 (FALSE) if no match, 1 (TRUE) if the new
* pix is in the same class as the exemplar.
*
* Note: we check first that the two pix are roughly
* the same size. Only if they meet that criterion do
* we compare the bitmaps. We convert the rank value to
* a number of pixels by multiplying the rank fraction by the number
* of pixels in the undilated image. The Hausdorff is a 2-way
* check. The centroid difference is used to align the two
* images to the nearest integer for each of the checks.
* The rank hausdorff checks that the dilated image of one
* contains the rank fraction of the pixels of the undilated
* image of the other. Checks are done in both direction.
* Failure of the test in either direction results in failure
* of the test.
*/
l_int32
pixRankHaustest(PIX *pix1,
PIX *pix2,
PIX *pix3,
PIX *pix4,
l_float32 delx, /* x(1) - x(3) */
l_float32 dely, /* y(1) - y(3) */
l_int32 maxdiffw,
l_int32 maxdiffh,
l_int32 area1,
l_int32 area3,
l_float32 rank,
l_int32 *tab8)
{
l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
l_int32 thresh1, thresh3;
PIX *pixt;
/* Eliminate possible matches based on size difference */
wi = pixGetWidth(pix1);
hi = pixGetHeight(pix1);
wt = pixGetWidth(pix3);
ht = pixGetHeight(pix3);
delw = L_ABS(wi - wt);
if (delw > maxdiffw)
return FALSE;
delh = L_ABS(hi - ht);
if (delh > maxdiffh)
return FALSE;
/* Upper bounds in remaining pixels for allowable match */
thresh1 = (l_int32)(area1 * (1. - rank) + 0.5);
thresh3 = (l_int32)(area3 * (1. - rank) + 0.5);
/* Round difference in centroid location to nearest integer;
* use this as a shift when doing the matching. */
if (delx >= 0)
idelx = (l_int32)(delx + 0.5);
else
idelx = (l_int32)(delx - 0.5);
if (dely >= 0)
idely = (l_int32)(dely + 0.5);
else
idely = (l_int32)(dely - 0.5);
/* Do 1-direction hausdorff, checking that every pixel in pix1
* is within a dilation distance of some pixel in pix3. Namely,
* that pix4 entirely covers pix1:
* pixt = pixSubtract(NULL, pix1, pix4), including shift
* where pixt has no ON pixels. */
pixt = pixCreateTemplate(pix1);
pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
pix4, 0, 0);
pixThresholdPixels(pixt, thresh1, &boolmatch, tab8);
if (boolmatch == 1) { /* above thresh1 */
pixDestroy(&pixt);
return FALSE;
}
/* Do 1-direction hausdorff, checking that every pixel in pix3
* is within a dilation distance of some pixel in pix1. Namely,
* that pix2 entirely covers pix3:
* pixSubtract(pixt, pix3, pix2), including shift
* where pixt has no ON pixels. */
pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
pixThresholdPixels(pixt, thresh3, &boolmatch, tab8);
pixDestroy(&pixt);
if (boolmatch == 1) /* above thresh3 */
return FALSE;
else
return TRUE;
}
/*----------------------------------------------------------------------*
* Classification using windowed correlation score *
*----------------------------------------------------------------------*/
/*!
* jbClassifyCorrelation()
*
* Input: jbclasser
* boxa (of new components for classification)
* pixas (of new components for classification)
* Return: 0 if OK; 1 on error
*/
l_int32
jbClassifyCorrelation(JBCLASSER *classer,
BOXA *boxa,
PIXA *pixas)
{
l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages,
overthreshold;
l_int32 *sumtab, *centtab;
l_uint32 *row, word;
l_float32 x1, y1, x2, y2, xsum, ysum;
l_float32 thresh, weight, threshold;
BOX *box;
NUMA *naclass, *napage;
NUMA *nafgt; /* fg area of all templates */
NUMA *naarea; /* w * h area of all templates */
JBFINDCTX *findcontext;
NUMAHASH *nahash;
PIX *pix, *pix1, *pix2;
PIXA *pixa, *pixa1, *pixat;
PIXAA *pixaa;
PTA *pta, *ptac, *ptact;
l_int32 *pixcts; /* pixel counts of each pixa */
l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */
l_int32 x, y, rowcount, downcount, wpl;
l_uint8 byte;
PROCNAME("jbClassifyCorrelation");
if (!classer)
return ERROR_INT("classer not found", procName, 1);
if (!boxa)
return ERROR_INT("boxa not found", procName, 1);
if (!pixas)
return ERROR_INT("pixas not found", procName, 1);
npages = classer->npages;
/* Generate the bordered pixa, which contains all the the
* input components. This will not be saved. */
n = pixaGetCount(pixas);
pixa1 = pixaCreate(n);
for (i = 0; i < n; i++) {
pix = pixaGetPix(pixas, i, L_CLONE);
pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
pixaAddPix(pixa1, pix1, L_INSERT);
pixDestroy(&pix);
}
/* Use these to save the class and page of each component. */
naclass = classer->naclass;
napage = classer->napage;
/* Get the number of fg pixels in each component. */
nafgt = classer->nafgt; /* holds fg areas of the templates */
sumtab = makePixelSumTab8();
pixcts = (l_int32 *)CALLOC(n, sizeof(*pixcts));
pixrowcts = (l_int32 **)CALLOC(n, sizeof(*pixrowcts));
centtab = makePixelCentroidTab8();
if (!pixcts || !pixrowcts || !centtab)
return ERROR_INT("calloc fail in pix*cts or centtab", procName, 1);
/* Count the "1" pixels in each row of the pix in pixa1; this
* allows pixCorrelationScoreThresholded to abort early if a match
* is impossible. This loop merges three calculations: the total
* number of "1" pixels, the number of "1" pixels in each row, and
* the centroid. The centroids are relative to the UL corner of
* each (bordered) pix. The pixrowcts[i][y] are the total number
* of fg pixels in pixa[i] below row y. */
pta = ptaCreate(n);
for (i = 0; i < n; i++) {
pix = pixaGetPix(pixa1, i, L_CLONE);
pixrowcts[i] = (l_int32 *)CALLOC(pixGetHeight(pix),
sizeof(**pixrowcts));
xsum = 0;
ysum = 0;
wpl = pixGetWpl(pix);
row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl;
downcount = 0;
for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) {
pixrowcts[i][y] = downcount;
rowcount = 0;
for (x = 0; x < wpl; x++) {
word = row[x];
byte = word & 0xff;
rowcount += sumtab[byte];
xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte];
byte = (word >> 8) & 0xff;
rowcount += sumtab[byte];
xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte];
byte = (word >> 16) & 0xff;
rowcount += sumtab[byte];
xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte];
byte = (word >> 24) & 0xff;
rowcount += sumtab[byte];
xsum += centtab[byte] + x * 32 * sumtab[byte];
}
downcount += rowcount;
ysum += rowcount * y;
}
pixcts[i] = downcount;
ptaAddPt(pta,
xsum / (l_float32)downcount, ysum / (l_float32)downcount);
pixDestroy(&pix);
}
ptac = classer->ptac; /* holds centroids of components up to this page */
ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */
ptact = classer->ptact; /* holds centroids of templates */
/* Store the unbordered pix in a pixaa, in a hierarchical
* set of arrays. There is one pixa for each class,
* and the pix in each pixa are all the instances found
* of that class. This is actually more than one would need
* for a jbig2 encoder, but there are two reasons to keep
* them around: (1) the set of instances for each class
* can be used to make an improved binary (or, better,
* a grayscale) template, rather than simply using the first
* one in the set; (2) we can investigate the failures
* of the classifier. This pixaa grows as we process
* successive pages. */
pixaa = classer->pixaa;
/* Array to store class exemplars */
pixat = classer->pixat;
/* Fill up the pixaa tree with the template exemplars as
* the first pix in each pixa. As we add each pix,
* we also add the associated box to the pixa.
* We also keep track of the centroid of each pix,
* and use the difference between centroids (of the
* pix with the exemplar we are checking it with)
* to align the two when checking that the correlation
* score exceeds a threshold. The correlation score
* is given by the square of the area of the AND
* between aligned instance and template, divided by
* the product of areas of each image. For identical
* template and instance, the score is 1.0.
* If the threshold is too small, non-equivalent instances
* will be placed in the same class; if too large, there will
* be an unnecessary division of classes representing the
* same character. The weightfactor adds in some of the
* difference (1.0 - thresh), depending on the heaviness
* of the template (measured as the fraction of fg pixels). */
thresh = classer->thresh;
weight = classer->weightfactor;
naarea = classer->naarea;
nahash = classer->nahash;
for (i = 0; i < n; i++) {
pix1 = pixaGetPix(pixa1, i, L_CLONE);
area1 = pixcts[i];
ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */
nt = pixaGetCount(pixat);
found = FALSE;
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
/* Get the template */
pix2 = pixaGetPix(pixat, iclass, L_CLONE);
numaGetIValue(nafgt, iclass, &area2);
ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */
/* Find threshold for this template */
if (weight > 0.0) {
numaGetIValue(naarea, iclass, &area);
threshold = thresh + (1. - thresh) * weight * area2 / area;
}
else
threshold = thresh;
/* Find score for this template */
overthreshold = pixCorrelationScoreThresholded(pix1, pix2,
area1, area2,
x1 - x2, y1 - y2,
MAX_DIFF_WIDTH,
MAX_DIFF_HEIGHT,
sumtab,
pixrowcts[i],
threshold);
#if DEBUG_CORRELATION_SCORE
{
l_float32 score, testscore;
l_int32 count, testcount;
score = pixCorrelationScore(pix1, pix2, area1, area2,
x1 - x2, y1 - y2,
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
sumtab);
testscore = pixCorrelationScoreSimple(pix1, pix2, area1, area2,
x1 - x2, y1 - y2, MAX_DIFF_WIDTH,
MAX_DIFF_HEIGHT, sumtab);
count = (l_int32)rint(sqrt(score * area1 * area2));
testcount = (l_int32)rint(sqrt(testscore * area1 * area2));
if ((score >= threshold) != (testscore >= threshold)) {
fprintf(stderr, "Correlation score mismatch: %d(%g,%d) vs %d(%g,%d) (%g)\n",
count, score, score >= threshold,
testcount, testscore, testscore >= threshold,
score - testscore);
}
if ((score >= threshold) != overthreshold) {
fprintf(stderr, "Mismatch between correlation/threshold comparison: %g(%g,%d) >= %g(%g) vs %s\n",
score, score*area1*area2, count, threshold, threshold*area1*area2, (overthreshold ? "true" : "false"));
}
}
#endif /* DEBUG_CORRELATION_SCORE */
pixDestroy(&pix2);
if (overthreshold) { /* greedy match */
found = TRUE;
numaAddNumber(naclass, iclass);
numaAddNumber(napage, npages);
if (classer->keep_pixaa) {
/* We are keeping a record of all components */
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
pix = pixaGetPix(pixas, i, L_CLONE);
pixaAddPix(pixa, pix, L_INSERT);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaDestroy(&pixa);
}
break;
}
}
findSimilarSizedTemplatesDestroy(&findcontext);
if (found == FALSE) { /* new class */
numaAddNumber(naclass, nt);
numaAddNumber(napage, npages);
pixa = pixaCreate(0);
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
pixaAddPix(pixa, pix, L_INSERT);
wt = pixGetWidth(pix);
ht = pixGetHeight(pix);
numaHashAdd(nahash, ht * wt, nt);
box = boxaGetBox(boxa, i, L_CLONE);
pixaAddBox(pixa, box, L_INSERT);
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
ptaAddPt(ptact, x1, y1);
numaAddNumber(nafgt, area1);
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) *
(pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS);
numaAddNumber(naarea, area);
}
else { /* don't save it */
pixDestroy(&pix1);
}
}
classer->nclass = pixaGetCount(pixat);
FREE(pixcts);
FREE(centtab);
for (i = 0; i < n; i++) {
FREE(pixrowcts[i]);
}
FREE(pixrowcts);
FREE(sumtab);
ptaDestroy(&pta);
pixaDestroy(&pixa1);
return 0;
}
/*----------------------------------------------------------------------*
* Determine the image components we start with *
*----------------------------------------------------------------------*/
/*!
* jbGetComponents()
*
* Input: pixs (1 bpp)
* components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
* maxwidth, maxheight (of saved components; larger are discarded)
* &pboxa (<return> b.b. of component items)
* &ppixa (<return> component items)
* Return: 0 if OK, 1 on error
*/
l_int32
jbGetComponents(PIX *pixs,
l_int32 components,
l_int32 maxwidth,
l_int32 maxheight,
BOXA **pboxad,
PIXA **ppixad)
{
l_int32 empty, res, redfactor;
BOXA *boxa;
PIX *pixt1, *pixt2, *pixt3;
PIXA *pixa, *pixat;
PROCNAME("jbGetComponents");
if (!pboxad)
return ERROR_INT("&boxad not defined", procName, 1);
*pboxad = NULL;
if (!ppixad)
return ERROR_INT("&pixad not defined", procName, 1);
*ppixad = NULL;
if (!pixs)
return ERROR_INT("pixs not defined", procName, 1);
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
components != JB_WORDS)
return ERROR_INT("invalid components", procName, 1);
pixZero(pixs, &empty);
if (empty) {
*pboxad = boxaCreate(0);
*ppixad = pixaCreate(0);
return 0;
}
/* If required, preprocess input pixs. The method for both
* characters and words is to generate a connected component
* mask over the units that we want to aggregrate, which are,
* in general, sets of related connected components in pixs.
* For characters, we want to include the dots with
* 'i', 'j' and '!', so we do a small vertical closing to
* generate the mask. For words, we make a mask over all
* characters in each word. This is a bit more tricky, because
* the spacing between words is difficult to predict a priori,
* and words can be typeset with variable spacing that can
* in some cases be barely larger than the space between
* characters. The first step is to generate the mask and
* identify each of its connected components. */
if (components == JB_CONN_COMPS) { /* no preprocessing */
boxa = pixConnComp(pixs, &pixa, 8);
}
else if (components == JB_CHARACTERS) {
pixt1 = pixMorphSequence(pixs, "c1.6", 0);
boxa = pixConnComp(pixt1, &pixat, 8);
pixa = pixaClipToPix(pixat, pixs);
pixDestroy(&pixt1);
pixaDestroy(&pixat);
}
else { /* components == JB_WORDS */
/* Do the operations at about 150 ppi resolution.
* It is much faster at 75 ppi, but the results are
* more accurate at 150 ppi. This will segment the
* words in body text. It can be expected that relatively
* infrequent words in a larger font will be split. */
res = pixGetXRes(pixs);
if (res <= 200) {
redfactor = 1;
pixt1 = pixClone(pixs);
}
else if (res <= 400) {
redfactor = 2;
pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
}
else {
redfactor = 4;
pixt1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
}
pixt2 = pixWordMaskByDilation(pixt1, 0, NULL);
/* Expand the optimally dilated word mask to full res. */
pixt3 = pixExpandReplicate(pixt2, redfactor);
/* Pull out the pixels in pixs corresponding to the mask
* components in pixt3. Note that above we used threshold
* levels in the reduction of 1 to insure that the resulting
* mask fully covers the input pixs. The downside of using
* a threshold of 1 is that very close characters from adjacent
* lines can be joined. But with a level of 2 or greater,
* it is necessary to use a seedfill, followed by a pixOr():
* pixt4 = pixSeedfillBinary(NULL, pixt3, pixs, 8);
* pixOr(pixt3, pixt3, pixt4);
* to insure that the mask coverage is complete over pixs. */
boxa = pixConnComp(pixt3, &pixat, 4);
pixa = pixaClipToPix(pixat, pixs);
pixaDestroy(&pixat);
pixDestroy(&pixt1);
pixDestroy(&pixt2);
pixDestroy(&pixt3);
}
/* Remove large components, and save the results. */
*ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH,
L_SELECT_IF_LTE, NULL);
*pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH,
L_SELECT_IF_LTE, NULL);
pixaDestroy(&pixa);
boxaDestroy(&boxa);
return 0;
}
/*!
* pixWordMaskByDilation()
*
* Input: pixs (1 bpp; typ. at 75 to 150 ppi)
* maxsize (use 0 for default; not to exceed 14)
* &size (<optional return> size of optimal horiz Sel)
* Return: pixd (dilated word mask), or null on error
*
* Notes:
* (1) For 75 to 150 ppi, the optimal dilation should not exceed 7.
* This is the default size chosen if maxsize <= 0.
* (2) To run this on images at resolution between 200 and 300, it
* is advisable to use a larger maxsize, say between 10 and 14.
* (3) The best size for dilating to get word masks is optionally returned.
*/
PIX *
pixWordMaskByDilation(PIX *pixs,
l_int32 maxsize,
l_int32 *psize)
{
l_int32 i, diffmin, ndiff, imin;
l_int32 ncc[MAX_ALLOWED_DILATION + 1];
BOXA *boxa;
NUMA *nacc;
PIX *pixt1, *pixt2, *pixt3;
PIXA *pixa;
SEL *sel;
PROCNAME("pixWordMaskbyDilation");
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
/* Find the optimal dilation to create the word mask.
* Look for successively increasing dilations where the
* number of connected components doesn't decrease.
* This is the situation where the components in the
* word mask should properly cover each word. Use of
* 4 cc slightly reduces the likelihood that words from
* different lines are joined. */
diffmin = 1000000;
pixa = pixaCreate(8);
pixt1 = pixCopy(NULL, pixs);
pixaAddPix(pixa, pixt1, L_COPY);
if (maxsize <= 0)
maxsize = 7; /* default */
if (maxsize > MAX_ALLOWED_DILATION)
maxsize = MAX_ALLOWED_DILATION;
nacc = numaCreate(maxsize);
for (i = 0; i <= maxsize; i++) {
if (i == 0) /* first one not dilated */
pixt2 = pixCopy(NULL, pixt1);
else /* successive dilation by sel_2h */
pixt2 = pixMorphSequence(pixt1, "d2.1", 0);
boxa = pixConnCompBB(pixt2, 4);
ncc[i] = boxaGetCount(boxa);
numaAddNumber(nacc, ncc[i]);
if (i > 0) {
ndiff = ncc[i - 1] - ncc[i];
#if DEBUG_PLOT_CC
fprintf(stderr, "ndiff[%d] = %d\n", i - 1, ndiff);
#endif /* DEBUG_PLOT_CC */
if (ndiff < diffmin) {
imin = i;
diffmin = ndiff;
}
}
pixaAddPix(pixa, pixt2, L_COPY);
pixDestroy(&pixt1);
pixt1 = pixt2;
boxaDestroy(&boxa);
}
pixDestroy(&pixt1);
#if DEBUG_PLOT_CC
{GPLOT *gplot;
NUMA *naseq;
naseq = numaMakeSequence(1, 1, numaGetCount(nacc));
gplot = gplotCreate("junk_cc_output", GPLOT_PNG,
"Number of cc vs. horizontal dilation",
"Sel horiz", "Number of cc");
gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, "");
gplotMakeOutput(gplot);
gplotDestroy(&gplot);
numaDestroy(&naseq);
}
#endif /* DEBUG_PLOT_CC */
/* Save the result of the optimal dilation */
pixt2 = pixaGetPix(pixa, imin, L_CLONE);
sel = selCreateBrick(1, imin, 0, imin - 1, SEL_HIT);
pixt3 = pixErode(NULL, pixt2, sel); /* remove effect of dilation */
selDestroy(&sel);
pixDestroy(&pixt2);
pixaDestroy(&pixa);
if (psize)
*psize = imin + 1;
numaDestroy(&nacc);
return pixt3;
}
/*----------------------------------------------------------------------*
* Build grayscale composites (templates) *
*----------------------------------------------------------------------*/
/*!
* jbAccumulateComposites()
*
* Input: pixaa (one pixa for each class)
* &pna (<return> number of samples used to build each composite)
* &ptat (<return> centroids of bordered composites)
* Return: pixad (accumulated sum of samples in each class),
* or null on error
*
*/
PIXA *
jbAccumulateComposites(PIXAA *pixaa,
NUMA **pna,
PTA **pptat)
{
l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff;
l_float32 x, y, xave, yave;
NUMA *na;
PIX *pix, *pixt1, *pixt2, *pixsum;
PIXA *pixa, *pixad;
PTA *ptat, *pta;
PROCNAME("jbAccumulateComposites");
if (!pptat)
return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL);
*pptat = NULL;
if (!pna)
return (PIXA *)ERROR_PTR("&na not defined", procName, NULL);
*pna = NULL;
if (!pixaa)
return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL);
n = pixaaGetCount(pixaa);
if ((ptat = ptaCreate(n)) == NULL)
return (PIXA *)ERROR_PTR("ptat not made", procName, NULL);
*pptat = ptat;
pixad = pixaCreate(n);
na = numaCreate(n);
*pna = na;
for (i = 0; i < n; i++) {
pixa = pixaaGetPixa(pixaa, i, L_CLONE);
nt = pixaGetCount(pixa);
numaAddNumber(na, nt);
if (nt == 0) {
L_WARNING("empty pixa found!", procName);
pixaDestroy(&pixa);
continue;
}
pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh);
pix = pixaGetPix(pixa, 0, L_CLONE);
d = pixGetDepth(pix);
pixDestroy(&pix);
pixt1 = pixCreate(maxw, maxh, d);
pixsum = pixInitAccumulate(maxw, maxh, 0);
pta = pixaCentroids(pixa);
/* Find the average value of the centroids ... */
xave = yave = 0;
for (j = 0; j < nt; j++) {
ptaGetPt(pta, j, &x, &y);
xave += x;
yave += y;
}
xave = xave / (l_float32)nt;
yave = yave / (l_float32)nt;
/* and place all centroids at their average value */
for (j = 0; j < nt; j++) {
pixt2 = pixaGetPix(pixa, j, L_CLONE);
ptaGetPt(pta, j, &x, &y);
xdiff = (l_int32)(x - xave);
ydiff = (l_int32)(y - yave);
pixClearAll(pixt1);
pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC,
pixt2, 0, 0);
pixAccumulate(pixsum, pixt1, L_ARITH_ADD);
pixDestroy(&pixt2);
}
pixaAddPix(pixad, pixsum, L_INSERT);
ptaAddPt(ptat, xave, yave);
pixaDestroy(&pixa);
pixDestroy(&pixt1);
ptaDestroy(&pta);
}
return pixad;
}
/*!
* jbTemplatesFromComposites()
*
* Input: pixac (one pix of composites for each class)
* na (number of samples used for each class composite)
* Return: pixad (8 bpp templates for each class), or null on error
*
*/
PIXA *
jbTemplatesFromComposites(PIXA *pixac,
NUMA *na)
{
l_int32 n, i;
l_float32 nt; /* number of samples in the composite; always an integer */
l_float32 factor;
PIX *pixsum; /* accumulated composite */
PIX *pixd;
PIXA *pixad;
PROCNAME("jbTemplatesFromComposites");
if (!pixac)
return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL);
if (!na)
return (PIXA *)ERROR_PTR("na not defined", procName, NULL);
n = pixaGetCount(pixac);
pixad = pixaCreate(n);
for (i = 0; i < n; i++) {
pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */
numaGetFValue(na, i, &nt);
factor = 255. / nt;
pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */
pixd = pixFinalAccumulate(pixsum, 0, 8);
pixaAddPix(pixad, pixd, L_INSERT);
pixDestroy(&pixsum);
}
return pixad;
}
/*----------------------------------------------------------------------*
* jbig2 utility routines *
*----------------------------------------------------------------------*/
/*!
* jbClasserCreate()
*
* Input: method (JB_RANKHAUS, JB_CORRELATION)
* components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
* Return: jbclasser, or null on error
*/
JBCLASSER *
jbClasserCreate(l_int32 method,
l_int32 components)
{
JBCLASSER *classer;
PROCNAME("jbClasserCreate");
if ((classer = (JBCLASSER *)CALLOC(1, sizeof(JBCLASSER))) == NULL)
return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
if (method != JB_RANKHAUS && method != JB_CORRELATION)
return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL);
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
components != JB_WORDS)
return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL);
classer->method = method;
classer->components = components;
classer->nacomps = numaCreate(0);
classer->pixaa = pixaaCreate(0);
classer->pixat = pixaCreate(0);
classer->pixatd = pixaCreate(0);
classer->nafgt = numaCreate(0);
classer->naarea = numaCreate(0);
classer->ptac = ptaCreate(0);
classer->ptact = ptaCreate(0);
classer->naclass = numaCreate(0);
classer->napage = numaCreate(0);
classer->ptaul = ptaCreate(0);
return classer;
}
/*
* jbClasserDestroy()
*
* Input: &classer (<to be nulled>)
* Return: void
*/
void
jbClasserDestroy(JBCLASSER **pclasser)
{
JBCLASSER *classer;
if (!pclasser)
return;
if ((classer = *pclasser) == NULL)
return;
sarrayDestroy(&classer->safiles);
numaDestroy(&classer->nacomps);
pixaaDestroy(&classer->pixaa);
pixaDestroy(&classer->pixat);
pixaDestroy(&classer->pixatd);
numaHashDestroy(&classer->nahash);
numaDestroy(&classer->nafgt);
numaDestroy(&classer->naarea);
ptaDestroy(&classer->ptac);
ptaDestroy(&classer->ptact);
numaDestroy(&classer->naclass);
numaDestroy(&classer->napage);
ptaDestroy(&classer->ptaul);
ptaDestroy(&classer->ptall);
FREE(classer);
*pclasser = NULL;
return;
}
/*!
* jbDataSave()
*
* Input: jbclasser
* latticew, latticeh (cell size used to store each
* connected component in the composite)
* Return: jbdata, or null on error
*
* Notes:
* (1) This routine stores the jbig2-type data required for
* generating a lossy jbig2 version of the image.
* It can be losslessly written to (and read from) two files.
* (2) It generates and stores the mosaic of templates.
* (3) It clones the Numa and Pta arrays, so these must all
* be destroyed by the caller.
* (4) Input 0 to use the default values for latticew and/or latticeh,
*/
JBDATA *
jbDataSave(JBCLASSER *classer)
{
l_int32 maxw, maxh;
JBDATA *data;
PIX *pix;
PROCNAME("jbDataSave");
if (!classer)
return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL);
/* Write the templates into an array. */
pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh);
if ((pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1))
== NULL)
return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
if ((data = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL)
return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
data->pix = pix;
data->npages = classer->npages;
data->w = classer->w;
data->h = classer->h;
data->nclass = classer->nclass;
data->latticew = maxw + 1;
data->latticeh = maxh + 1;
data->naclass = numaClone(classer->naclass);
data->napage = numaClone(classer->napage);
data->ptaul = ptaClone(classer->ptaul);
return data;
}
/*
* jbDataDestroy()
*
* Input: &data (<to be nulled>)
* Return: void
*/
void
jbDataDestroy(JBDATA **pdata)
{
JBDATA *data;
if (!pdata)
return;
if ((data = *pdata) == NULL)
return;
pixDestroy(&data->pix);
numaDestroy(&data->naclass);
numaDestroy(&data->napage);
ptaDestroy(&data->ptaul);
FREE(data);
*pdata = NULL;
return;
}
/*!
* jbDataWrite()
*
* Input: rootname (for output files; everything but the extension)
* jbdata
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) Serialization function that writes data in jbdata to file.
*/
l_int32
jbDataWrite(const char *rootout,
JBDATA *jbdata)
{
char buf[L_BUF_SIZE];
l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage;
NUMA *naclass, *napage;
PTA *ptaul;
PIX *pixt;
FILE *fp;
PROCNAME("jbDataWrite");
if (!rootout)
return ERROR_INT("no rootout", procName, 1);
if (!jbdata)
return ERROR_INT("no jbdata", procName, 1);
npages = jbdata->npages;
w = jbdata->w;
h = jbdata->h;
pixt = jbdata->pix;
nclass = jbdata->nclass;
cellw = jbdata->latticew;
cellh = jbdata->latticeh;
naclass = jbdata->naclass;
napage = jbdata->napage;
ptaul = jbdata->ptaul;
snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT);
pixWrite(buf, pixt, IFF_PNG);
snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT);
if ((fp = fopen(buf, "w")) == NULL)
return ERROR_INT("stream not opened", procName, 1);
ncomp = ptaGetCount(ptaul);
fprintf(fp, "jb data file\n");
fprintf(fp, "num pages = %d\n", npages);
fprintf(fp, "page size: w = %d, h = %d\n", w, h);
fprintf(fp, "num components = %d\n", ncomp);
fprintf(fp, "num classes = %d\n", nclass);
fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh);
for (i = 0; i < ncomp; i++) {
numaGetIValue(napage, i, &ipage);
numaGetIValue(naclass, i, &iclass);
ptaGetIPt(ptaul, i, &x, &y);
fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y);
}
fclose(fp);
return 0;
}
/*!
* jbDataRead()
*
* Input: rootname (for template and data files)
* Return: jbdata, or NULL on error
*/
JBDATA *
jbDataRead(const char *rootname)
{
char fname[L_BUF_SIZE];
char *linestr;
l_uint8 *data;
l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage;
l_int32 nbytes, npages, nclass, ncomp;
JBDATA *jbdata;
NUMA *naclass, *napage;
PIX *pixs;
PTA *ptaul;
SARRAY *sa;
PROCNAME("jbDataRead");
if (!rootname)
return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL);
snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT);
if ((pixs = pixRead(fname)) == NULL)
return (JBDATA *)ERROR_PTR("pix not read", procName, NULL);
snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT);
if ((data = arrayRead(fname, &nbytes)) == NULL)
return (JBDATA *)ERROR_PTR("data not read", procName, NULL);
if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL)
return (JBDATA *)ERROR_PTR("sa not made", procName, NULL);
nsa = sarrayGetCount(sa); /* number of cc + 6 */
linestr = sarrayGetString(sa, 0, 0);
if (strcmp(linestr, "jb data file"))
return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL);
linestr = sarrayGetString(sa, 1, 0);
sscanf(linestr, "num pages = %d", &npages);
linestr = sarrayGetString(sa, 2, 0);
sscanf(linestr, "page size: w = %d, h = %d", &w, &h);
linestr = sarrayGetString(sa, 3, 0);
sscanf(linestr, "num components = %d", &ncomp);
linestr = sarrayGetString(sa, 4, 0);
sscanf(linestr, "num classes = %d\n", &nclass);
linestr = sarrayGetString(sa, 5, 0);
sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh);
#if 1
fprintf(stderr, "num pages = %d\n", npages);
fprintf(stderr, "page size: w = %d, h = %d\n", w, h);
fprintf(stderr, "num components = %d\n", ncomp);
fprintf(stderr, "num classes = %d\n", nclass);
fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh);
#endif
if ((naclass = numaCreate(ncomp)) == NULL)
return (JBDATA *)ERROR_PTR("naclass not made", procName, NULL);
if ((napage = numaCreate(ncomp)) == NULL)
return (JBDATA *)ERROR_PTR("napage not made", procName, NULL);
if ((ptaul = ptaCreate(ncomp)) == NULL)
return (JBDATA *)ERROR_PTR("pta not made", procName, NULL);
for (i = 6; i < nsa; i++) {
linestr = sarrayGetString(sa, i, 0);
sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y);
numaAddNumber(napage, ipage);
numaAddNumber(naclass, iclass);
ptaAddPt(ptaul, x, y);
}
if ((jbdata = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL)
return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
jbdata->pix = pixs;
jbdata->npages = npages;
jbdata->w = w;
jbdata->h = h;
jbdata->nclass = nclass;
jbdata->latticew = cellw;
jbdata->latticeh = cellh;
jbdata->naclass = naclass;
jbdata->napage = napage;
jbdata->ptaul = ptaul;
FREE(data);
sarrayDestroy(&sa);
return jbdata;
}
/*!
* jbDataRender()
*
* Input: jbdata
* debugflag (if TRUE, writes into 2 bpp pix and adds
* component outlines in color)
* Return: pixa (reconstruction of original images, using templates) or
* null on error
*/
PIXA *
jbDataRender(JBDATA *data,
l_int32 debugflag)
{
l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage;
l_int32 npages, nclass, ncomp, wp, hp;
BOX *box;
NUMA *naclass, *napage;
PIX *pixt, *pixt2, *pix, *pixd;
PIXA *pixat; /* pixa of templates */
PIXA *pixad; /* pixa of output images */
PIXCMAP *cmap;
PTA *ptaul;
PROCNAME("jbDataRender");
if (!data)
return (PIXA *)ERROR_PTR("data not defined", procName, NULL);
npages = data->npages;
w = data->w;
h = data->h;
pixt = data->pix;
nclass = data->nclass;
cellw = data->latticew;
cellh = data->latticeh;
naclass = data->naclass;
napage = data->napage;
ptaul = data->ptaul;
ncomp = numaGetCount(naclass);
/* Reconstruct the original set of images from the templates
* and the data associated with each component. First,
* generate the output pixa as a set of empty pix. */
if ((pixad = pixaCreate(npages)) == NULL)
return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);
for (i = 0; i < npages; i++) {
if (debugflag == FALSE)
pix = pixCreate(w, h, 1);
else {
pix = pixCreate(w, h, 2);
cmap = pixcmapCreate(2);
pixcmapAddColor(cmap, 255, 255, 255);
pixcmapAddColor(cmap, 0, 0, 0);
pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */
pixSetColormap(pix, cmap);
}
pixaAddPix(pixad, pix, L_INSERT);
}
/* Put the class templates into a pixa. */
if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL)
return (PIXA *)ERROR_PTR("pixat not made", procName, NULL);
/* Place each component in the right location on its page. */
for (i = 0; i < ncomp; i++) {
numaGetIValue(napage, i, &ipage);
numaGetIValue(naclass, i, &iclass);
pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */
wp = pixGetWidth(pix);
hp = pixGetHeight(pix);
ptaGetIPt(ptaul, i, &x, &y);
pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */
if (debugflag == FALSE)
pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0);
else {
pixt2 = pixConvert1To2Cmap(pix);
pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0);
box = boxCreate(x, y, wp, hp);
pixRenderBoxArb(pixd, box, 1, 255, 0, 0);
pixDestroy(&pixt2);
boxDestroy(&box);
}
pixDestroy(&pix); /* the clone only */
pixDestroy(&pixd); /* the clone only */
}
pixaDestroy(&pixat);
return pixad;
}
/*!
* jbGetULCorners()
*
* Input: jbclasser
* pixs (full res image)
* boxa (of c.c. bounding rectangles for this page)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This computes the ptaul field, which has the global UL corners,
* adjusted for each specific component, so that each component
* can be replaced by the template for its class and have the
* centroid in the template in the same position as the
* centroid of the original connected component. It is important
* that this be done properly to avoid a wavy baseline in the
* result.
* (2) The array fields ptac and ptact give the centroids of
* those components relative to the UL corner of each component.
* Here, we compute the difference in each component, round to
* nearest integer, and correct the box->x and box->y by
* the appropriate integral difference.
* (3) The templates and stored instances are all bordered.
*/
l_int32
jbGetULCorners(JBCLASSER *classer,
PIX *pixs,
BOXA *boxa)
{
l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy;
l_int32 *sumtab;
l_float32 x1, x2, y1, y2, delx, dely;
BOX *box;
NUMA *naclass;
PIX *pixt;
PTA *ptac, *ptact, *ptaul;
PROCNAME("jbGetULCorners");
if (!classer)
return ERROR_INT("classer not defined", procName, 1);
if (!pixs)
return ERROR_INT("pixs not defined", procName, 1);
if (!boxa)
return ERROR_INT("boxa not defined", procName, 1);
n = boxaGetCount(boxa);
ptaul = classer->ptaul;
naclass = classer->naclass;
ptac = classer->ptac;
ptact = classer->ptact;
baseindex = classer->baseindex; /* num components before this page */
sumtab = makePixelSumTab8();
for (i = 0; i < n; i++) {
index = baseindex + i;
ptaGetPt(ptac, index, &x1, &y1);
numaGetIValue(naclass, index, &iclass);
ptaGetPt(ptact, iclass, &x2, &y2);
delx = x2 - x1;
dely = y2 - y1;
if (delx >= 0)
idelx = (l_int32)(delx + 0.5);
else
idelx = (l_int32)(delx - 0.5);
if (dely >= 0)
idely = (l_int32)(dely + 0.5);
else
idely = (l_int32)(dely - 0.5);
if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL)
return ERROR_INT("box not found", procName, 1);
boxGetGeometry(box, &x, &y, NULL, NULL);
/* Get final increments dx and dy for best alignment */
pixt = pixaGetPix(classer->pixat, iclass, L_CLONE);
finalPositioningForAlignment(pixs, x, y, idelx, idely,
pixt, sumtab, &dx, &dy);
/* if (i % 20 == 0)
fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */
ptaAddPt(ptaul, x - idelx + dx, y - idely + dy);
boxDestroy(&box);
pixDestroy(&pixt);
}
FREE(sumtab);
return 0;
}
/*!
* jbGetLLCorners()
*
* Input: jbclasser
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This computes the ptall field, which has the global LL corners,
* adjusted for each specific component, so that each component
* can be replaced by the template for its class and have the
* centroid in the template in the same position as the
* centroid of the original connected component. It is important
* that this be done properly to avoid a wavy baseline in the result.
* (2) It is computed here from the corresponding UL corners, where
* the input templates and stored instances are all bordered.
* This should be done after all pages have been processed.
* (3) For proper substitution, the templates whose LL corners are
* placed in these locations must be UN-bordered.
* This is available for a realistic jbig2 encoder, which would
* (1) encode each template without a border, and (2) encode
* the position using the LL corner (rather than the UL
* corner) because the difference between y-values
* of successive instances is typically close to zero.
*/
l_int32
jbGetLLCorners(JBCLASSER *classer)
{
l_int32 i, iclass, n, x1, y1, h;
NUMA *naclass;
PIX *pix;
PIXA *pixat;
PTA *ptaul, *ptall;
PROCNAME("jbGetLLCorners");
if (!classer)
return ERROR_INT("classer not defined", procName, 1);
ptaul = classer->ptaul;
naclass = classer->naclass;
pixat = classer->pixat;
ptaDestroy(&classer->ptall);
n = ptaGetCount(ptaul);
ptall = ptaCreate(n);
classer->ptall = ptall;
/* If the templates were bordered, we would add h - 1 to the UL
* corner y-value. However, because the templates to be used
* here have their borders removed, and the borders are
* JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS
* to the UL corner y-value. */
for (i = 0; i < n; i++) {
ptaGetIPt(ptaul, i, &x1, &y1);
numaGetIValue(naclass, i, &iclass);
pix = pixaGetPix(pixat, iclass, L_CLONE);
h = pixGetHeight(pix);
ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS);
pixDestroy(&pix);
}
return 0;
}
/*----------------------------------------------------------------------*
* Static helpers *
*----------------------------------------------------------------------*/
/* When looking for similar matches we check templates whose size is +/- 2 in
* each direction. This involves 25 possible sizes. This array contains the
* offsets for each of those positions in a spiral pattern. There are 25 pairs
* of numbers in this array: even positions are x values. */
static int two_by_two_walk[50] = {
0, 0,
0, 1,
-1, 0,
0, -1,
1, 0,
-1, 1,
1, 1,
-1, -1,
1, -1,
0, -2,
2, 0,
0, 2,
-2, 0,
-1, -2,
1, -2,
2, -1,
2, 1,
1, 2,
-1, 2,
-2, 1,
-2, -1,
-2, -2,
2, -2,
2, 2,
-2, 2};
/*!
* findSimilarSizedTemplatesInit()
*
* Input: classer
* pixs (instance to be matched)
* Return: Allocated context to be used with findSimilar*
*/
static JBFINDCTX *
findSimilarSizedTemplatesInit(JBCLASSER *classer,
PIX *pixs)
{
JBFINDCTX *state;
state = (JBFINDCTX *)CALLOC(1, sizeof(JBFINDCTX));
state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS;
state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS;
state->classer = classer;
return state;
}
static void
findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate)
{
JBFINDCTX *state;
PROCNAME("findSimilarSizedTemplatesDestroy");
if (pstate == NULL) {
L_WARNING("ptr address is null", procName);
return;
}
if ((state = *pstate) == NULL)
return;
numaDestroy(&state->numa);
FREE(state);
*pstate = NULL;
return;
}
/*!
* findSimilarSizedTemplatesNext()
*
* Input: state (from findSimilarSizedTemplatesInit)
* Return: Next template number, or -1 when finished
*
* We have a hash table mapping template area to a list of template
* numbers with that area. We wish to find similar sized templates,
* so we first look for templates with the same width and height, and
* then with width + 1, etc. This walk is guided by the
* two_by_two_walk array, above.
*
* We don't want to have to collect the whole list of templates first because
* (we hope) to find it quickly. So we keep the context for this walk in an
* explictit state structure and this function acts like a generator.
*/
static l_int32
findSimilarSizedTemplatesNext(JBFINDCTX *state)
{
l_int32 desiredh, desiredw, size, templ;
PIX *pixt;
while(1) { /* Continue the walk over step 'i' */
if (state->i >= 25) { /* all done */
return -1;
}
desiredw = state->w + two_by_two_walk[2 * state->i];
desiredh = state->h + two_by_two_walk[2 * state->i + 1];
if (desiredh < 1 || desiredw < 1) { /* invalid size */
state->i++;
continue;
}
if (!state->numa) {
/* We have yet to start walking the array for the step 'i' */
state->numa = numaHashGetNuma(state->classer->nahash,
desiredh * desiredw);
if (!state->numa) { /* nothing there */
state->i++;
continue;
}
state->n = 0; /* OK, we got a numa. */
}
/* Continue working on this numa */
size = numaGetCount(state->numa);
for ( ; state->n < size; ) {
templ = (l_int32)(state->numa->array[state->n++] + 0.5);
pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE);
if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw &&
pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) {
pixDestroy(&pixt);
return templ;
}
pixDestroy(&pixt);
}
/* Exhausted the numa; take another step and try again */
state->i++;
numaDestroy(&state->numa);
continue;
}
}
/*!
* finalPositioningForAlignment()
*
* Input: pixs (input page image)
* x, y (location of UL corner of bb of component in pixs)
* idelx, idely (compensation to match centroids of component
* and template)
* pixt (template, with JB_ADDED_PIXELS of padding on all sides)
* sumtab (for summing fg pixels in an image)
* &dx, &dy (return delta on position for best match; each
* one is in the set {-1, 0, 1})
* Return: 0 if OK, 1 on error
*
*/
static l_int32
finalPositioningForAlignment(PIX *pixs,
l_int32 x,
l_int32 y,
l_int32 idelx,
l_int32 idely,
PIX *pixt,
l_int32 *sumtab,
l_int32 *pdx,
l_int32 *pdy)
{
l_int32 w, h, i, j, minx, miny, count, mincount;
PIX *pixi; /* clipped from source pixs */
PIX *pixr; /* temporary storage */
BOX *box;
PROCNAME("finalPositioningForAlignment");
if (!pixs)
return ERROR_INT("pixs not defined", procName, 1);
if (!pixt)
return ERROR_INT("pixt not defined", procName, 1);
if (!pdx || !pdy)
return ERROR_INT("&dx and &dy not both defined", procName, 1);
if (!sumtab)
return ERROR_INT("sumtab not defined", procName, 1);
*pdx = *pdy = 0;
/* Use JB_ADDED_PIXELS pixels padding on each side */
w = pixGetWidth(pixt);
h = pixGetHeight(pixt);
box = boxCreate(x - idelx - JB_ADDED_PIXELS,
y - idely - JB_ADDED_PIXELS, w, h);
pixi = pixClipRectangle(pixs, box, NULL);
boxDestroy(&box);
if (!pixi)
return ERROR_INT("pixi not made", procName, 1);
pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1);
mincount = 0x7fffffff;
for (i = -1; i <= 1; i++) {
for (j = -1; j <= 1; j++) {
pixCopy(pixr, pixi);
pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0);
pixCountPixels(pixr, &count, sumtab);
if (count < mincount) {
minx = j;
miny = i;
mincount = count;
}
}
}
pixDestroy(&pixi);
pixDestroy(&pixr);
*pdx = minx;
*pdy = miny;
return 0;
}