| /*====================================================================* |
| - Copyright (C) 2001 Leptonica. All rights reserved. |
| - This software is distributed in the hope that it will be |
| - useful, but with NO WARRANTY OF ANY KIND. |
| - No author or distributor accepts responsibility to anyone for the |
| - consequences of using this software, or for whether it serves any |
| - particular purpose or works at all, unless he or she says so in |
| - writing. Everyone is granted permission to copy, modify and |
| - redistribute this source code, for commercial or non-commercial |
| - purposes, with the following restrictions: (1) the origin of this |
| - source code must not be misrepresented; (2) modified versions must |
| - be plainly marked as such; and (3) this notice may not be removed |
| - or altered from any source or modified source distribution. |
| *====================================================================*/ |
| |
| |
| /* |
| * jbclass.c |
| * |
| * These are functions for unsupervised classification of |
| * collections of connected components -- either characters or |
| * words -- in binary images. They can be used as image |
| * processing steps in jbig2 compression. |
| * |
| * Initialization |
| * |
| * JBCLASSER *jbRankHausInit() [rank hausdorff encoder] |
| * JBCLASSER *jbCorrelationInit() [correlation encoder] |
| * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto] |
| * static JBCLASSER *jbCorrelationInitInternal() |
| * |
| * Classify the pages |
| * |
| * l_int32 jbAddPages() |
| * l_int32 jbAddPage() |
| * l_int32 jbAddPageComponents() |
| * |
| * Rank hausdorff classifier |
| * |
| * l_int32 jbClassifyRankHaus() |
| * l_int32 pixHaustest() |
| * l_int32 pixRankHaustest() |
| * |
| * Binary correlation classifier |
| * |
| * l_int32 jbClassifyCorrelation() |
| * |
| * Determine the image components we start with |
| * |
| * l_int32 jbGetComponents() |
| * PIX *pixWordMaskByDilation() |
| * |
| * Build grayscale composites (templates) |
| * |
| * PIXA *jbAccumulateComposites |
| * PIXA *jbTemplatesFromComposites |
| * |
| * Utility functions for Classer |
| * |
| * JBCLASSER *jbClasserCreate() |
| * void jbClasserDestroy() |
| * |
| * Utility functions for Data |
| * |
| * JBDATA *jbDataSave() |
| * void jbDataDestroy() |
| * l_int32 jbDataWrite() |
| * JBDATA *jbDataRead() |
| * PIXA *jbDataRender() |
| * l_int32 jbGetULCorners() |
| * l_int32 jbGetLLCorners() |
| * |
| * Static helpers |
| * |
| * static JBFINDCTX *findSimilarSizedTemplatesInit() |
| * static l_int32 findSimilarSizedTemplatesNext() |
| * static void findSimilarSizedTemplatesDestroy() |
| * static l_int32 finalPositioningForAlignment() |
| * |
| * Note: this is NOT an implementation of the JPEG jbig2 |
| * proposed standard encoder, the specifications for which |
| * can be found at http://www.jpeg.org/jbigpt2.html. |
| * (See below for a full implementation.) |
| * It is an implementation of the lower-level part of an encoder that: |
| * |
| * (1) identifies connected components that are going to be used |
| * (2) puts them in similarity classes (this is an unsupervised |
| * classifier), and |
| * (3) stores the result in a simple file format (2 files, |
| * one for templates and one for page/coordinate/template-index |
| * quartets). |
| * |
| * An actual implementation of the official jbig2 encoder could |
| * start with parts (1) and (2), and would then compress the quartets |
| * according to the standards requirements (e.g., Huffman or |
| * arithmetic coding of coordinate differences and image templates). |
| * |
| * The low-level part of the encoder provided here has the |
| * following useful features: |
| * |
| * - It is accurate in the identification of templates |
| * and classes because it uses a windowed hausdorff |
| * distance metric. |
| * - It is accurate in the placement of the connected |
| * components, doing a two step process of first aligning |
| * the the centroids of the template with those of each instance, |
| * and then making a further correction of up to +- 1 pixel |
| * in each direction to best align the templates. |
| * - It is fast because it uses a morphologically based |
| * matching algorithm to implement the hausdorff criterion, |
| * and it selects the patterns that are possible matches |
| * based on their size. |
| * |
| * We provide two different matching functions, one using Hausdorff |
| * distance and one using a simple image correlation. |
| * The Hausdorff method sometimes produces better results for the |
| * same number of classes, because it gives a relatively small |
| * effective weight to foreground pixels near the boundary, |
| * and a relatively large weight to foreground pixels that are |
| * not near the boundary. By effectively ignoring these boundary |
| * pixels, Hausdorff weighting corresponds better to the expected |
| * probabilities of the pixel values in a scanned image, where the |
| * variations in instances of the same printed character are much |
| * more likely to be in pixels near the boundary. By contrast, |
| * the correlation method gives equal weight to all foreground pixels. |
| * |
| * For best results, use the correlation method. Correlation takes |
| * the number of fg pixels in the AND of instance and template, |
| * divided by the product of the number of fg pixels in instance |
| * and template. It compares this with a threshold that, in |
| * general, depends on the fractional coverage of the template. |
| * For heavy text, the threshold is raised above that for light |
| * text, By using both these parameters (basic threshold and |
| * adjustment factor for text weight), one has more flexibility |
| * and can arrive at the fewest substitution errors, although |
| * this comes at the price of more templates. |
| * |
| * The strict Hausdorff scoring is not a rank weighting, because a |
| * single pixel beyond the given distance will cause a match |
| * failure. A rank Hausdorff is more robust to non-boundary noise, |
| * but it is also more susceptible to confusing components that |
| * should be in different classes. For implementing a jbig2 |
| * application for visually lossless binary image compression, |
| * you have two choices: |
| * |
| * (1) use a 3x3 structuring element (size = 3) and a strict |
| * Hausdorff comparison (rank = 1.0 in the rank Hausdorff |
| * function). This will result in a minimal number of classes, |
| * but confusion of small characters, such as italic and |
| * non-italic lower-case 'o', can still occur. |
| * (2) use the correlation method with a threshold of 0.85 |
| * and a weighting factor of about 0.7. This will result in |
| * a larger number of classes, but should not be confused |
| * either by similar small characters or by extremely |
| * thick sans serif characters, such as in prog/cootoots.png. |
| * |
| * As mentioned above, if visual substitution errors must be |
| * avoided, you should use the correlation method. |
| * |
| * We provide executables that show how to do the encoding: |
| * prog/jbrankhaus.c |
| * prog/jbcorrelation.c |
| * |
| * The basic flow for correlation classification goes as follows, |
| * where specific choices have been made for parameters (Hausdorff |
| * is the same except for initialization): |
| * |
| * // Initialize and save data in the classer |
| * JBCLASSER *classer = |
| * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7); |
| * SARRAY *safiles = getSortedPathnamesInDirectory(directory, |
| * NULL, 0, 0); |
| * jbAddPages(classer, safiles); |
| * |
| * // Save the data in a data structure for serialization, |
| * // and write it into two files. |
| * JBDATA *data = jbDataSave(classer); |
| * jbDataWrite(rootname, data); |
| * |
| * // Reconstruct (render) the pages from the encoded data. |
| * PIXA *pixa = jbDataRender(data, FALSE); |
| * |
| * Adam Langley has recently built a jbig2 standards-compliant |
| * encoder, the first one to appear in open source. You can get |
| * this encoder at: |
| * |
| * http://www.imperialviolet.org/jbig2.html |
| * |
| * It uses arithmetic encoding throughout. It encodes binary images |
| * losslessly with a single arithmetic coding over the full image. |
| * It also does both lossy and lossless encoding from connected |
| * components, using leptonica to generate the templates representing |
| * each cluster. |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <math.h> |
| #include "allheaders.h" |
| |
| /* MSVC can't handle arrays dimensioned by static const integers */ |
| #define L_BUF_SIZE 512 |
| |
| /* For jbClassifyRankHaus(): size of border added around |
| * pix of each c.c., to allow further processing. This |
| * should be at least the sum of the MAX_DIFF_HEIGHT |
| * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */ |
| static const l_int32 JB_ADDED_PIXELS = 6; |
| |
| /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore(): |
| * choose these to be 2 or greater */ |
| static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */ |
| static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */ |
| |
| /* In initialization, you have the option to discard components |
| * (cc, characters or words) that have either width or height larger |
| * than a given size. This is convenient for jbDataSave(), because |
| * the components are placed onto a regular lattice with cell |
| * dimension equal to the maximum component size. The default |
| * values are given here. If you want to save all components, |
| * use a sufficiently large set of dimensions. */ |
| static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */ |
| static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */ |
| static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */ |
| static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */ |
| |
| /* Max allowed dilation to merge characters into words */ |
| #define MAX_ALLOWED_DILATION 14 |
| |
| /* This stores the state of a state machine which fetches |
| * similar sized templates */ |
| struct JbFindTemplatesState |
| { |
| JBCLASSER *classer; /* classer */ |
| l_int32 w; /* desired width */ |
| l_int32 h; /* desired height */ |
| l_int32 i; /* index into two_by_two step array */ |
| NUMA *numa; /* current number array */ |
| l_int32 n; /* current element of numa */ |
| }; |
| typedef struct JbFindTemplatesState JBFINDCTX; |
| |
| |
| /* Static initialization function */ |
| static JBCLASSER * jbCorrelationInitInternal(l_int32 components, |
| l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, |
| l_float32 weightfactor, l_int32 keep_components); |
| |
| /* Static helper functions */ |
| static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs); |
| static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context); |
| static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext); |
| static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, |
| l_int32 idelx, l_int32 idely, PIX *pixt, |
| l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy); |
| |
| #ifndef NO_CONSOLE_IO |
| #define DEBUG_PLOT_CC 0 |
| #define DEBUG_CORRELATION_SCORE 0 |
| #endif /* ~NO_CONSOLE_IO */ |
| |
| |
| /*----------------------------------------------------------------------* |
| * Initialization * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbRankHausInit() |
| * |
| * Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) |
| * maxwidth (of component; use 0 for default) |
| * maxheight (of component; use 0 for default) |
| * size (of square structuring element; 2, representing |
| * 2x2 sel, is necessary for reasonable accuracy of |
| * small components; combine this with rank ~ 0.97 |
| * to avoid undue class expansion) |
| * rank (rank val of match, each way; in [0.5 - 1.0]; |
| * when using size = 2, 0.97 is a reasonable value) |
| * Return: jbclasser if OK; NULL on error |
| */ |
| JBCLASSER * |
| jbRankHausInit(l_int32 components, |
| l_int32 maxwidth, |
| l_int32 maxheight, |
| l_int32 size, |
| l_float32 rank) |
| { |
| JBCLASSER *classer; |
| |
| PROCNAME("jbRankHausInit"); |
| |
| if (components != JB_CONN_COMPS && components != JB_CHARACTERS && |
| components != JB_WORDS) |
| return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); |
| if (size < 1 || size > 10) |
| return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL); |
| if (rank < 0.5 || rank > 1.0) |
| return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL); |
| if (maxwidth == 0) { |
| if (components == JB_CONN_COMPS) |
| maxwidth = MAX_CONN_COMP_WIDTH; |
| else if (components == JB_CHARACTERS) |
| maxwidth = MAX_CHAR_COMP_WIDTH; |
| else /* JB_WORDS */ |
| maxwidth = MAX_WORD_COMP_WIDTH; |
| } |
| if (maxheight == 0) |
| maxheight = MAX_COMP_HEIGHT; |
| |
| if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL) |
| return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); |
| classer->maxwidth = maxwidth; |
| classer->maxheight = maxheight; |
| classer->sizehaus = size; |
| classer->rankhaus = rank; |
| classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */ |
| return classer; |
| } |
| |
| |
| /*! |
| * jbCorrelationInit() |
| * |
| * Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) |
| * maxwidth (of component; use 0 for default) |
| * maxheight (of component; use 0 for default) |
| * thresh (value for correlation score: in [0.4 - 0.9]) |
| * weightfactor (corrects thresh for thick characters [0.0 - 1.0]) |
| * Return: jbclasser if OK; NULL on error |
| */ |
| JBCLASSER * |
| jbCorrelationInit(l_int32 components, |
| l_int32 maxwidth, |
| l_int32 maxheight, |
| l_float32 thresh, |
| l_float32 weightfactor) |
| { |
| return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, |
| weightfactor, 1); |
| } |
| |
| /*! |
| * jbCorrelationInitWithoutComponents() |
| * |
| * Input: same as jbCorrelationInit |
| * Output: same as jbCorrelationInit |
| * |
| * Note: acts the same as jbCorrelationInit(), but the resulting |
| * object doesn't keep a list of all the components. |
| */ |
| JBCLASSER * |
| jbCorrelationInitWithoutComponents(l_int32 components, |
| l_int32 maxwidth, |
| l_int32 maxheight, |
| l_float32 thresh, |
| l_float32 weightfactor) |
| { |
| return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, |
| weightfactor, 0); |
| } |
| |
| |
| static JBCLASSER * |
| jbCorrelationInitInternal(l_int32 components, |
| l_int32 maxwidth, |
| l_int32 maxheight, |
| l_float32 thresh, |
| l_float32 weightfactor, |
| l_int32 keep_components) |
| { |
| JBCLASSER *classer; |
| |
| PROCNAME("jbCorrelationInitInternal"); |
| |
| if (components != JB_CONN_COMPS && components != JB_CHARACTERS && |
| components != JB_WORDS) |
| return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); |
| if (thresh < 0.4 || thresh > 0.9) |
| return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.9]", |
| procName, NULL); |
| if (weightfactor < 0.0 || weightfactor > 1.0) |
| return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]", |
| procName, NULL); |
| if (maxwidth == 0) { |
| if (components == JB_CONN_COMPS) |
| maxwidth = MAX_CONN_COMP_WIDTH; |
| else if (components == JB_CHARACTERS) |
| maxwidth = MAX_CHAR_COMP_WIDTH; |
| else /* JB_WORDS */ |
| maxwidth = MAX_WORD_COMP_WIDTH; |
| } |
| if (maxheight == 0) |
| maxheight = MAX_COMP_HEIGHT; |
| |
| |
| if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL) |
| return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); |
| classer->maxwidth = maxwidth; |
| classer->maxheight = maxheight; |
| classer->thresh = thresh; |
| classer->weightfactor = weightfactor; |
| classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */ |
| classer->keep_pixaa = keep_components; |
| return classer; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Classify the pages * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbAddPages() |
| * |
| * Input: jbclasser |
| * safiles (of page image file names) |
| * Return: 0 if OK; 1 on error |
| * |
| * Note: |
| * (1) jbclasser makes a copy of the array of file names. |
| * (2) The caller is still responsible for destroying the input array. |
| */ |
| l_int32 |
| jbAddPages(JBCLASSER *classer, |
| SARRAY *safiles) |
| { |
| l_int32 i, nfiles; |
| char *fname; |
| PIX *pix; |
| |
| PROCNAME("jbAddPages"); |
| |
| if (!classer) |
| return ERROR_INT("classer not defined", procName, 1); |
| if (!safiles) |
| return ERROR_INT("safiles not defined", procName, 1); |
| |
| classer->safiles = sarrayCopy(safiles); |
| nfiles = sarrayGetCount(safiles); |
| for (i = 0; i < nfiles; i++) { |
| fname = sarrayGetString(safiles, i, 0); |
| if ((pix = pixRead(fname)) == NULL) { |
| L_WARNING_INT("image file %d not read", procName, i); |
| continue; |
| } |
| jbAddPage(classer, pix); |
| pixDestroy(&pix); |
| } |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * jbAddPage() |
| * |
| * Input: jbclasser |
| * pixs (of input page) |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| jbAddPage(JBCLASSER *classer, |
| PIX *pixs) |
| { |
| BOXA *boxas; |
| PIXA *pixas; |
| |
| PROCNAME("jbAddPage"); |
| |
| if (!classer) |
| return ERROR_INT("classer not defined", procName, 1); |
| if (!pixs) |
| return ERROR_INT("pix not defined", procName, 1); |
| |
| classer->w = pixGetWidth(pixs); |
| classer->h = pixGetHeight(pixs); |
| |
| /* Get the appropriate components and their bounding boxes */ |
| if (jbGetComponents(pixs, classer->components, classer->maxwidth, |
| classer->maxheight, &boxas, &pixas)) { |
| return ERROR_INT("components not made", procName, 1); |
| } |
| |
| jbAddPageComponents(classer, pixs, boxas, pixas); |
| boxaDestroy(&boxas); |
| pixaDestroy(&pixas); |
| return 0; |
| } |
| |
| |
| /*! |
| * jbAddPageComponents() |
| * |
| * Input: jbclasser |
| * pixs (of input page) |
| * boxas (b.b. of components for this page) |
| * pixas (components for this page) |
| * Return: 0 if OK; 1 on error |
| * |
| * Notes: |
| * (1) If there are no components on the page, we don't require input |
| * of empty boxas or pixas, although that's the typical situation. |
| */ |
| l_int32 |
| jbAddPageComponents(JBCLASSER *classer, |
| PIX *pixs, |
| BOXA *boxas, |
| PIXA *pixas) |
| { |
| l_int32 n; |
| |
| PROCNAME("jbAddPageComponents"); |
| |
| if (!classer) |
| return ERROR_INT("classer not defined", procName, 1); |
| if (!pixs) |
| return ERROR_INT("pix not defined", procName, 1); |
| |
| /* Test for no components on the current page. Always update the |
| * number of pages processed, even if nothing is on it. */ |
| if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) { |
| classer->npages++; |
| return 0; |
| } |
| |
| /* Get classes. For hausdorff, it uses a specified size of |
| * structuring element and specified rank. For correlation, |
| * it uses a specified threshold. */ |
| if (classer->method == JB_RANKHAUS) { |
| if (jbClassifyRankHaus(classer, boxas, pixas)) |
| return ERROR_INT("rankhaus classification failed", procName, 1); |
| } |
| else { /* classer->method == JB_CORRELATION */ |
| if (jbClassifyCorrelation(classer, boxas, pixas)) |
| return ERROR_INT("correlation classification failed", procName, 1); |
| } |
| |
| /* Find the global UL corners, adjusted for each instance so |
| * that the class template and instance will have their |
| * centroids in the same place. Then the template can be |
| * used to replace the instance. */ |
| if (jbGetULCorners(classer, pixs, boxas)) |
| return ERROR_INT("UL corners not found", procName, 1); |
| |
| /* Update total component counts and number of pages processed. */ |
| n = boxaGetCount(boxas); |
| classer->baseindex += n; |
| numaAddNumber(classer->nacomps, n); |
| classer->npages++; |
| |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Classification using windowed rank hausdorff metric * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbClassifyRankHaus() |
| * |
| * Input: jbclasser |
| * boxa (of new components for classification) |
| * pixas (of new components for classification) |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| jbClassifyRankHaus(JBCLASSER *classer, |
| BOXA *boxa, |
| PIXA *pixas) |
| { |
| l_int32 n, nt, i, wt, ht, iclass, size, found, testval; |
| l_int32 *sumtab; |
| l_int32 npages, area1, area3; |
| l_int32 *tab8; |
| l_float32 rank, x1, y1, x2, y2; |
| BOX *box; |
| NUMA *naclass, *napage; |
| NUMA *nafg; /* fg area of all instances */ |
| NUMA *nafgt; /* fg area of all templates */ |
| JBFINDCTX *findcontext; |
| NUMAHASH *nahash; |
| PIX *pix, *pix1, *pix2, *pix3, *pix4; |
| PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd; |
| PIXAA *pixaa; |
| PTA *pta, *ptac, *ptact; |
| SEL *sel; |
| |
| PROCNAME("jbClassifyRankHaus"); |
| |
| if (!classer) |
| return ERROR_INT("classer not found", procName, 1); |
| if (!boxa) |
| return ERROR_INT("boxa not found", procName, 1); |
| if (!pixas) |
| return ERROR_INT("pixas not found", procName, 1); |
| |
| npages = classer->npages; |
| size = classer->sizehaus; |
| sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT); |
| |
| /* Generate the bordered pixa, with and without dilation. |
| * pixa1 and pixa2 contain all the input components. */ |
| n = pixaGetCount(pixas); |
| pixa1 = pixaCreate(n); |
| pixa2 = pixaCreate(n); |
| for (i = 0; i < n; i++) { |
| pix = pixaGetPix(pixas, i, L_CLONE); |
| pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, |
| JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); |
| pix2 = pixDilate(NULL, pix1, sel); |
| pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */ |
| pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */ |
| pixDestroy(&pix); |
| } |
| |
| /* Get the centroids of all the bordered images. |
| * These are relative to the UL corner of each (bordered) pix. */ |
| pta = pixaCentroids(pixa1); /* centroids for this page; use here */ |
| ptac = classer->ptac; /* holds centroids of components up to this page */ |
| ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */ |
| ptact = classer->ptact; /* holds centroids of templates */ |
| |
| /* Use these to save the class and page of each component. */ |
| naclass = classer->naclass; |
| napage = classer->napage; |
| sumtab = makePixelSumTab8(); |
| |
| /* Store the unbordered pix in a pixaa, in a hierarchical |
| * set of arrays. There is one pixa for each class, |
| * and the pix in each pixa are all the instances found |
| * of that class. This is actually more than one would need |
| * for a jbig2 encoder, but there are two reasons to keep |
| * them around: (1) the set of instances for each class |
| * can be used to make an improved binary (or, better, |
| * a grayscale) template, rather than simply using the first |
| * one in the set; (2) we can investigate the failures |
| * of the classifier. This pixaa grows as we process |
| * successive pages. */ |
| pixaa = classer->pixaa; |
| |
| /* arrays to store class exemplars (templates) */ |
| pixat = classer->pixat; /* un-dilated */ |
| pixatd = classer->pixatd; /* dilated */ |
| |
| /* Fill up the pixaa tree with the template exemplars as |
| * the first pix in each pixa. As we add each pix, |
| * we also add the associated box to the pixa. |
| * We also keep track of the centroid of each pix, |
| * and use the difference between centroids (of the |
| * pix with the exemplar we are checking it with) |
| * to align the two when checking that the Hausdorff |
| * distance does not exceed a threshold. |
| * The threshold is set by the Sel used for dilating. |
| * For example, a 3x3 brick, sel_3, corresponds to a |
| * Hausdorff distance of 1. In general, for an NxN brick, |
| * with N odd, corresponds to a Hausdorff distance of (N - 1)/2. |
| * It turns out that we actually need to use a sel of size 2x2 |
| * to avoid small bad components when there is a halftone image |
| * from which components can be chosen. |
| * The larger the Sel you use, the fewer the number of classes, |
| * and the greater the likelihood of putting semantically |
| * different objects in the same class. For simplicity, |
| * we do this separately for the case of rank == 1.0 (exact |
| * match within the Hausdorff distance) and rank < 1.0. */ |
| rank = classer->rankhaus; |
| nahash = classer->nahash; |
| if (rank == 1.0) { |
| for (i = 0; i < n; i++) { |
| pix1 = pixaGetPix(pixa1, i, L_CLONE); |
| pix2 = pixaGetPix(pixa2, i, L_CLONE); |
| ptaGetPt(pta, i, &x1, &y1); |
| nt = pixaGetCount(pixat); /* number of templates */ |
| found = FALSE; |
| findcontext = findSimilarSizedTemplatesInit(classer, pix1); |
| while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { |
| /* Find score for this template */ |
| pix3 = pixaGetPix(pixat, iclass, L_CLONE); |
| pix4 = pixaGetPix(pixatd, iclass, L_CLONE); |
| ptaGetPt(ptact, iclass, &x2, &y2); |
| testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, |
| MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT); |
| pixDestroy(&pix3); |
| pixDestroy(&pix4); |
| if (testval == 1) { |
| found = TRUE; |
| numaAddNumber(naclass, iclass); |
| numaAddNumber(napage, npages); |
| if (classer->keep_pixaa) { |
| pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); |
| pix = pixaGetPix(pixas, i, L_CLONE); |
| pixaAddPix(pixa, pix, L_INSERT); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaDestroy(&pixa); |
| } |
| break; |
| } |
| } |
| findSimilarSizedTemplatesDestroy(&findcontext); |
| if (found == FALSE) { /* new class */ |
| numaAddNumber(naclass, nt); |
| numaAddNumber(napage, npages); |
| pixa = pixaCreate(0); |
| pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ |
| pixaAddPix(pixa, pix, L_INSERT); |
| wt = pixGetWidth(pix); |
| ht = pixGetHeight(pix); |
| numaHashAdd(nahash, ht * wt, nt); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ |
| ptaAddPt(ptact, x1, y1); |
| pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ |
| pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */ |
| } |
| else { /* don't save them */ |
| pixDestroy(&pix1); |
| pixDestroy(&pix2); |
| } |
| } |
| } |
| else { /* rank < 1.0 */ |
| if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */ |
| return ERROR_INT("nafg not made", procName, 1); |
| nafgt = classer->nafgt; |
| tab8 = makePixelSumTab8(); |
| for (i = 0; i < n; i++) { /* all instances on this page */ |
| pix1 = pixaGetPix(pixa1, i, L_CLONE); |
| numaGetIValue(nafg, i, &area1); |
| pix2 = pixaGetPix(pixa2, i, L_CLONE); |
| ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */ |
| nt = pixaGetCount(pixat); /* number of templates */ |
| found = FALSE; |
| findcontext = findSimilarSizedTemplatesInit(classer, pix1); |
| while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { |
| /* Find score for this template */ |
| pix3 = pixaGetPix(pixat, iclass, L_CLONE); |
| numaGetIValue(nafgt, iclass, &area3); |
| pix4 = pixaGetPix(pixatd, iclass, L_CLONE); |
| ptaGetPt(ptact, iclass, &x2, &y2); |
| testval = pixRankHaustest(pix1, pix2, pix3, pix4, |
| x1 - x2, y1 - y2, |
| MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, |
| area1, area3, rank, tab8); |
| pixDestroy(&pix3); |
| pixDestroy(&pix4); |
| if (testval == 1) { /* greedy match; take the first */ |
| found = TRUE; |
| numaAddNumber(naclass, iclass); |
| numaAddNumber(napage, npages); |
| if (classer->keep_pixaa) { |
| pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); |
| pix = pixaGetPix(pixas, i, L_CLONE); |
| pixaAddPix(pixa, pix, L_INSERT); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaDestroy(&pixa); |
| } |
| break; |
| } |
| } |
| findSimilarSizedTemplatesDestroy(&findcontext); |
| if (found == FALSE) { /* new class */ |
| numaAddNumber(naclass, nt); |
| numaAddNumber(napage, npages); |
| pixa = pixaCreate(0); |
| pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ |
| pixaAddPix(pixa, pix, L_INSERT); |
| wt = pixGetWidth(pix); |
| ht = pixGetHeight(pix); |
| numaHashAdd(nahash, ht * wt, nt); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ |
| ptaAddPt(ptact, x1, y1); |
| pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ |
| pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */ |
| numaAddNumber(nafgt, area1); |
| } |
| else { /* don't save them */ |
| pixDestroy(&pix1); |
| pixDestroy(&pix2); |
| } |
| } |
| FREE(tab8); |
| numaDestroy(&nafg); |
| } |
| classer->nclass = pixaGetCount(pixat); |
| |
| FREE(sumtab); |
| ptaDestroy(&pta); |
| pixaDestroy(&pixa1); |
| pixaDestroy(&pixa2); |
| selDestroy(&sel); |
| return 0; |
| } |
| |
| |
| /*! |
| * pixHaustest() |
| * |
| * Input: pix1 (new pix, not dilated) |
| * pix2 (new pix, dilated) |
| * pix3 (exemplar pix, not dilated) |
| * pix4 (exemplar pix, dilated) |
| * delx (x comp of centroid difference) |
| * dely (y comp of centroid difference) |
| * maxdiffw (max width difference of pix1 and pix2) |
| * maxdiffh (max height difference of pix1 and pix2) |
| * Return: 0 (FALSE) if no match, 1 (TRUE) if the new |
| * pix is in the same class as the exemplar. |
| * |
| * Note: we check first that the two pix are roughly |
| * the same size. Only if they meet that criterion do |
| * we compare the bitmaps. The Hausdorff is a 2-way |
| * check. The centroid difference is used to align the two |
| * images to the nearest integer for each of the checks. |
| * These check that the dilated image of one contains |
| * ALL the pixels of the undilated image of the other. |
| * Checks are done in both direction. A single pixel not |
| * contained in either direction results in failure of the test. |
| */ |
| l_int32 |
| pixHaustest(PIX *pix1, |
| PIX *pix2, |
| PIX *pix3, |
| PIX *pix4, |
| l_float32 delx, /* x(1) - x(3) */ |
| l_float32 dely, /* y(1) - y(3) */ |
| l_int32 maxdiffw, |
| l_int32 maxdiffh) |
| { |
| l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; |
| PIX *pixt; |
| |
| /* Eliminate possible matches based on size difference */ |
| wi = pixGetWidth(pix1); |
| hi = pixGetHeight(pix1); |
| wt = pixGetWidth(pix3); |
| ht = pixGetHeight(pix3); |
| delw = L_ABS(wi - wt); |
| if (delw > maxdiffw) |
| return FALSE; |
| delh = L_ABS(hi - ht); |
| if (delh > maxdiffh) |
| return FALSE; |
| |
| /* Round difference in centroid location to nearest integer; |
| * use this as a shift when doing the matching. */ |
| if (delx >= 0) |
| idelx = (l_int32)(delx + 0.5); |
| else |
| idelx = (l_int32)(delx - 0.5); |
| if (dely >= 0) |
| idely = (l_int32)(dely + 0.5); |
| else |
| idely = (l_int32)(dely - 0.5); |
| |
| /* Do 1-direction hausdorff, checking that every pixel in pix1 |
| * is within a dilation distance of some pixel in pix3. Namely, |
| * that pix4 entirely covers pix1: |
| * pixt = pixSubtract(NULL, pix1, pix4), including shift |
| * where pixt has no ON pixels. */ |
| pixt = pixCreateTemplate(pix1); |
| pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); |
| pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), |
| pix4, 0, 0); |
| pixZero(pixt, &boolmatch); |
| if (boolmatch == 0) { |
| pixDestroy(&pixt); |
| return FALSE; |
| } |
| |
| /* Do 1-direction hausdorff, checking that every pixel in pix3 |
| * is within a dilation distance of some pixel in pix1. Namely, |
| * that pix2 entirely covers pix3: |
| * pixSubtract(pixt, pix3, pix2), including shift |
| * where pixt has no ON pixels. */ |
| pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); |
| pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); |
| pixZero(pixt, &boolmatch); |
| pixDestroy(&pixt); |
| return boolmatch; |
| } |
| |
| |
| /*! |
| * pixRankHaustest() |
| * |
| * Input: pix1 (new pix, not dilated) |
| * pix2 (new pix, dilated) |
| * pix3 (exemplar pix, not dilated) |
| * pix4 (exemplar pix, dilated) |
| * delx (x comp of centroid difference) |
| * dely (y comp of centroid difference) |
| * maxdiffw (max width difference of pix1 and pix2) |
| * maxdiffh (max height difference of pix1 and pix2) |
| * area1 (fg pixels in pix1) |
| * area3 (fg pixels in pix3) |
| * rank (rank value of test, each way) |
| * tab8 (table of pixel sums for byte) |
| * Return: 0 (FALSE) if no match, 1 (TRUE) if the new |
| * pix is in the same class as the exemplar. |
| * |
| * Note: we check first that the two pix are roughly |
| * the same size. Only if they meet that criterion do |
| * we compare the bitmaps. We convert the rank value to |
| * a number of pixels by multiplying the rank fraction by the number |
| * of pixels in the undilated image. The Hausdorff is a 2-way |
| * check. The centroid difference is used to align the two |
| * images to the nearest integer for each of the checks. |
| * The rank hausdorff checks that the dilated image of one |
| * contains the rank fraction of the pixels of the undilated |
| * image of the other. Checks are done in both direction. |
| * Failure of the test in either direction results in failure |
| * of the test. |
| */ |
| l_int32 |
| pixRankHaustest(PIX *pix1, |
| PIX *pix2, |
| PIX *pix3, |
| PIX *pix4, |
| l_float32 delx, /* x(1) - x(3) */ |
| l_float32 dely, /* y(1) - y(3) */ |
| l_int32 maxdiffw, |
| l_int32 maxdiffh, |
| l_int32 area1, |
| l_int32 area3, |
| l_float32 rank, |
| l_int32 *tab8) |
| { |
| l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; |
| l_int32 thresh1, thresh3; |
| PIX *pixt; |
| |
| /* Eliminate possible matches based on size difference */ |
| wi = pixGetWidth(pix1); |
| hi = pixGetHeight(pix1); |
| wt = pixGetWidth(pix3); |
| ht = pixGetHeight(pix3); |
| delw = L_ABS(wi - wt); |
| if (delw > maxdiffw) |
| return FALSE; |
| delh = L_ABS(hi - ht); |
| if (delh > maxdiffh) |
| return FALSE; |
| |
| /* Upper bounds in remaining pixels for allowable match */ |
| thresh1 = (l_int32)(area1 * (1. - rank) + 0.5); |
| thresh3 = (l_int32)(area3 * (1. - rank) + 0.5); |
| |
| /* Round difference in centroid location to nearest integer; |
| * use this as a shift when doing the matching. */ |
| if (delx >= 0) |
| idelx = (l_int32)(delx + 0.5); |
| else |
| idelx = (l_int32)(delx - 0.5); |
| if (dely >= 0) |
| idely = (l_int32)(dely + 0.5); |
| else |
| idely = (l_int32)(dely - 0.5); |
| |
| /* Do 1-direction hausdorff, checking that every pixel in pix1 |
| * is within a dilation distance of some pixel in pix3. Namely, |
| * that pix4 entirely covers pix1: |
| * pixt = pixSubtract(NULL, pix1, pix4), including shift |
| * where pixt has no ON pixels. */ |
| pixt = pixCreateTemplate(pix1); |
| pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); |
| pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), |
| pix4, 0, 0); |
| pixThresholdPixels(pixt, thresh1, &boolmatch, tab8); |
| if (boolmatch == 1) { /* above thresh1 */ |
| pixDestroy(&pixt); |
| return FALSE; |
| } |
| |
| /* Do 1-direction hausdorff, checking that every pixel in pix3 |
| * is within a dilation distance of some pixel in pix1. Namely, |
| * that pix2 entirely covers pix3: |
| * pixSubtract(pixt, pix3, pix2), including shift |
| * where pixt has no ON pixels. */ |
| pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); |
| pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); |
| pixThresholdPixels(pixt, thresh3, &boolmatch, tab8); |
| pixDestroy(&pixt); |
| if (boolmatch == 1) /* above thresh3 */ |
| return FALSE; |
| else |
| return TRUE; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Classification using windowed correlation score * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbClassifyCorrelation() |
| * |
| * Input: jbclasser |
| * boxa (of new components for classification) |
| * pixas (of new components for classification) |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| jbClassifyCorrelation(JBCLASSER *classer, |
| BOXA *boxa, |
| PIXA *pixas) |
| { |
| l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages, |
| overthreshold; |
| l_int32 *sumtab, *centtab; |
| l_uint32 *row, word; |
| l_float32 x1, y1, x2, y2, xsum, ysum; |
| l_float32 thresh, weight, threshold; |
| BOX *box; |
| NUMA *naclass, *napage; |
| NUMA *nafgt; /* fg area of all templates */ |
| NUMA *naarea; /* w * h area of all templates */ |
| JBFINDCTX *findcontext; |
| NUMAHASH *nahash; |
| PIX *pix, *pix1, *pix2; |
| PIXA *pixa, *pixa1, *pixat; |
| PIXAA *pixaa; |
| PTA *pta, *ptac, *ptact; |
| l_int32 *pixcts; /* pixel counts of each pixa */ |
| l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */ |
| l_int32 x, y, rowcount, downcount, wpl; |
| l_uint8 byte; |
| |
| PROCNAME("jbClassifyCorrelation"); |
| |
| if (!classer) |
| return ERROR_INT("classer not found", procName, 1); |
| if (!boxa) |
| return ERROR_INT("boxa not found", procName, 1); |
| if (!pixas) |
| return ERROR_INT("pixas not found", procName, 1); |
| |
| npages = classer->npages; |
| |
| /* Generate the bordered pixa, which contains all the the |
| * input components. This will not be saved. */ |
| n = pixaGetCount(pixas); |
| pixa1 = pixaCreate(n); |
| for (i = 0; i < n; i++) { |
| pix = pixaGetPix(pixas, i, L_CLONE); |
| pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, |
| JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); |
| pixaAddPix(pixa1, pix1, L_INSERT); |
| pixDestroy(&pix); |
| } |
| |
| /* Use these to save the class and page of each component. */ |
| naclass = classer->naclass; |
| napage = classer->napage; |
| |
| /* Get the number of fg pixels in each component. */ |
| nafgt = classer->nafgt; /* holds fg areas of the templates */ |
| sumtab = makePixelSumTab8(); |
| |
| pixcts = (l_int32 *)CALLOC(n, sizeof(*pixcts)); |
| pixrowcts = (l_int32 **)CALLOC(n, sizeof(*pixrowcts)); |
| centtab = makePixelCentroidTab8(); |
| if (!pixcts || !pixrowcts || !centtab) |
| return ERROR_INT("calloc fail in pix*cts or centtab", procName, 1); |
| |
| /* Count the "1" pixels in each row of the pix in pixa1; this |
| * allows pixCorrelationScoreThresholded to abort early if a match |
| * is impossible. This loop merges three calculations: the total |
| * number of "1" pixels, the number of "1" pixels in each row, and |
| * the centroid. The centroids are relative to the UL corner of |
| * each (bordered) pix. The pixrowcts[i][y] are the total number |
| * of fg pixels in pixa[i] below row y. */ |
| pta = ptaCreate(n); |
| for (i = 0; i < n; i++) { |
| pix = pixaGetPix(pixa1, i, L_CLONE); |
| pixrowcts[i] = (l_int32 *)CALLOC(pixGetHeight(pix), |
| sizeof(**pixrowcts)); |
| xsum = 0; |
| ysum = 0; |
| wpl = pixGetWpl(pix); |
| row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl; |
| downcount = 0; |
| for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) { |
| pixrowcts[i][y] = downcount; |
| rowcount = 0; |
| for (x = 0; x < wpl; x++) { |
| word = row[x]; |
| byte = word & 0xff; |
| rowcount += sumtab[byte]; |
| xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte]; |
| byte = (word >> 8) & 0xff; |
| rowcount += sumtab[byte]; |
| xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte]; |
| byte = (word >> 16) & 0xff; |
| rowcount += sumtab[byte]; |
| xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte]; |
| byte = (word >> 24) & 0xff; |
| rowcount += sumtab[byte]; |
| xsum += centtab[byte] + x * 32 * sumtab[byte]; |
| } |
| downcount += rowcount; |
| ysum += rowcount * y; |
| } |
| pixcts[i] = downcount; |
| ptaAddPt(pta, |
| xsum / (l_float32)downcount, ysum / (l_float32)downcount); |
| pixDestroy(&pix); |
| } |
| |
| ptac = classer->ptac; /* holds centroids of components up to this page */ |
| ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */ |
| ptact = classer->ptact; /* holds centroids of templates */ |
| |
| /* Store the unbordered pix in a pixaa, in a hierarchical |
| * set of arrays. There is one pixa for each class, |
| * and the pix in each pixa are all the instances found |
| * of that class. This is actually more than one would need |
| * for a jbig2 encoder, but there are two reasons to keep |
| * them around: (1) the set of instances for each class |
| * can be used to make an improved binary (or, better, |
| * a grayscale) template, rather than simply using the first |
| * one in the set; (2) we can investigate the failures |
| * of the classifier. This pixaa grows as we process |
| * successive pages. */ |
| pixaa = classer->pixaa; |
| |
| /* Array to store class exemplars */ |
| pixat = classer->pixat; |
| |
| /* Fill up the pixaa tree with the template exemplars as |
| * the first pix in each pixa. As we add each pix, |
| * we also add the associated box to the pixa. |
| * We also keep track of the centroid of each pix, |
| * and use the difference between centroids (of the |
| * pix with the exemplar we are checking it with) |
| * to align the two when checking that the correlation |
| * score exceeds a threshold. The correlation score |
| * is given by the square of the area of the AND |
| * between aligned instance and template, divided by |
| * the product of areas of each image. For identical |
| * template and instance, the score is 1.0. |
| * If the threshold is too small, non-equivalent instances |
| * will be placed in the same class; if too large, there will |
| * be an unnecessary division of classes representing the |
| * same character. The weightfactor adds in some of the |
| * difference (1.0 - thresh), depending on the heaviness |
| * of the template (measured as the fraction of fg pixels). */ |
| thresh = classer->thresh; |
| weight = classer->weightfactor; |
| naarea = classer->naarea; |
| nahash = classer->nahash; |
| for (i = 0; i < n; i++) { |
| pix1 = pixaGetPix(pixa1, i, L_CLONE); |
| area1 = pixcts[i]; |
| ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */ |
| nt = pixaGetCount(pixat); |
| found = FALSE; |
| findcontext = findSimilarSizedTemplatesInit(classer, pix1); |
| while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { |
| /* Get the template */ |
| pix2 = pixaGetPix(pixat, iclass, L_CLONE); |
| numaGetIValue(nafgt, iclass, &area2); |
| ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */ |
| |
| /* Find threshold for this template */ |
| if (weight > 0.0) { |
| numaGetIValue(naarea, iclass, &area); |
| threshold = thresh + (1. - thresh) * weight * area2 / area; |
| } |
| else |
| threshold = thresh; |
| |
| /* Find score for this template */ |
| overthreshold = pixCorrelationScoreThresholded(pix1, pix2, |
| area1, area2, |
| x1 - x2, y1 - y2, |
| MAX_DIFF_WIDTH, |
| MAX_DIFF_HEIGHT, |
| sumtab, |
| pixrowcts[i], |
| threshold); |
| #if DEBUG_CORRELATION_SCORE |
| { |
| l_float32 score, testscore; |
| l_int32 count, testcount; |
| score = pixCorrelationScore(pix1, pix2, area1, area2, |
| x1 - x2, y1 - y2, |
| MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, |
| sumtab); |
| |
| testscore = pixCorrelationScoreSimple(pix1, pix2, area1, area2, |
| x1 - x2, y1 - y2, MAX_DIFF_WIDTH, |
| MAX_DIFF_HEIGHT, sumtab); |
| count = (l_int32)rint(sqrt(score * area1 * area2)); |
| testcount = (l_int32)rint(sqrt(testscore * area1 * area2)); |
| if ((score >= threshold) != (testscore >= threshold)) { |
| fprintf(stderr, "Correlation score mismatch: %d(%g,%d) vs %d(%g,%d) (%g)\n", |
| count, score, score >= threshold, |
| testcount, testscore, testscore >= threshold, |
| score - testscore); |
| } |
| |
| if ((score >= threshold) != overthreshold) { |
| fprintf(stderr, "Mismatch between correlation/threshold comparison: %g(%g,%d) >= %g(%g) vs %s\n", |
| score, score*area1*area2, count, threshold, threshold*area1*area2, (overthreshold ? "true" : "false")); |
| } |
| } |
| #endif /* DEBUG_CORRELATION_SCORE */ |
| pixDestroy(&pix2); |
| |
| if (overthreshold) { /* greedy match */ |
| found = TRUE; |
| numaAddNumber(naclass, iclass); |
| numaAddNumber(napage, npages); |
| if (classer->keep_pixaa) { |
| /* We are keeping a record of all components */ |
| pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); |
| pix = pixaGetPix(pixas, i, L_CLONE); |
| pixaAddPix(pixa, pix, L_INSERT); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaDestroy(&pixa); |
| } |
| break; |
| } |
| } |
| findSimilarSizedTemplatesDestroy(&findcontext); |
| if (found == FALSE) { /* new class */ |
| numaAddNumber(naclass, nt); |
| numaAddNumber(napage, npages); |
| pixa = pixaCreate(0); |
| pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ |
| pixaAddPix(pixa, pix, L_INSERT); |
| wt = pixGetWidth(pix); |
| ht = pixGetHeight(pix); |
| numaHashAdd(nahash, ht * wt, nt); |
| box = boxaGetBox(boxa, i, L_CLONE); |
| pixaAddBox(pixa, box, L_INSERT); |
| pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ |
| ptaAddPt(ptact, x1, y1); |
| numaAddNumber(nafgt, area1); |
| pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ |
| area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) * |
| (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS); |
| numaAddNumber(naarea, area); |
| } |
| else { /* don't save it */ |
| pixDestroy(&pix1); |
| } |
| } |
| classer->nclass = pixaGetCount(pixat); |
| |
| FREE(pixcts); |
| FREE(centtab); |
| for (i = 0; i < n; i++) { |
| FREE(pixrowcts[i]); |
| } |
| FREE(pixrowcts); |
| |
| FREE(sumtab); |
| ptaDestroy(&pta); |
| pixaDestroy(&pixa1); |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Determine the image components we start with * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbGetComponents() |
| * |
| * Input: pixs (1 bpp) |
| * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) |
| * maxwidth, maxheight (of saved components; larger are discarded) |
| * &pboxa (<return> b.b. of component items) |
| * &ppixa (<return> component items) |
| * Return: 0 if OK, 1 on error |
| */ |
| l_int32 |
| jbGetComponents(PIX *pixs, |
| l_int32 components, |
| l_int32 maxwidth, |
| l_int32 maxheight, |
| BOXA **pboxad, |
| PIXA **ppixad) |
| { |
| l_int32 empty, res, redfactor; |
| BOXA *boxa; |
| PIX *pixt1, *pixt2, *pixt3; |
| PIXA *pixa, *pixat; |
| |
| PROCNAME("jbGetComponents"); |
| |
| if (!pboxad) |
| return ERROR_INT("&boxad not defined", procName, 1); |
| *pboxad = NULL; |
| if (!ppixad) |
| return ERROR_INT("&pixad not defined", procName, 1); |
| *ppixad = NULL; |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (components != JB_CONN_COMPS && components != JB_CHARACTERS && |
| components != JB_WORDS) |
| return ERROR_INT("invalid components", procName, 1); |
| |
| pixZero(pixs, &empty); |
| if (empty) { |
| *pboxad = boxaCreate(0); |
| *ppixad = pixaCreate(0); |
| return 0; |
| } |
| |
| /* If required, preprocess input pixs. The method for both |
| * characters and words is to generate a connected component |
| * mask over the units that we want to aggregrate, which are, |
| * in general, sets of related connected components in pixs. |
| * For characters, we want to include the dots with |
| * 'i', 'j' and '!', so we do a small vertical closing to |
| * generate the mask. For words, we make a mask over all |
| * characters in each word. This is a bit more tricky, because |
| * the spacing between words is difficult to predict a priori, |
| * and words can be typeset with variable spacing that can |
| * in some cases be barely larger than the space between |
| * characters. The first step is to generate the mask and |
| * identify each of its connected components. */ |
| if (components == JB_CONN_COMPS) { /* no preprocessing */ |
| boxa = pixConnComp(pixs, &pixa, 8); |
| } |
| else if (components == JB_CHARACTERS) { |
| pixt1 = pixMorphSequence(pixs, "c1.6", 0); |
| boxa = pixConnComp(pixt1, &pixat, 8); |
| pixa = pixaClipToPix(pixat, pixs); |
| pixDestroy(&pixt1); |
| pixaDestroy(&pixat); |
| } |
| else { /* components == JB_WORDS */ |
| |
| /* Do the operations at about 150 ppi resolution. |
| * It is much faster at 75 ppi, but the results are |
| * more accurate at 150 ppi. This will segment the |
| * words in body text. It can be expected that relatively |
| * infrequent words in a larger font will be split. */ |
| res = pixGetXRes(pixs); |
| if (res <= 200) { |
| redfactor = 1; |
| pixt1 = pixClone(pixs); |
| } |
| else if (res <= 400) { |
| redfactor = 2; |
| pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| } |
| else { |
| redfactor = 4; |
| pixt1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); |
| } |
| |
| pixt2 = pixWordMaskByDilation(pixt1, 0, NULL); |
| |
| /* Expand the optimally dilated word mask to full res. */ |
| pixt3 = pixExpandReplicate(pixt2, redfactor); |
| |
| /* Pull out the pixels in pixs corresponding to the mask |
| * components in pixt3. Note that above we used threshold |
| * levels in the reduction of 1 to insure that the resulting |
| * mask fully covers the input pixs. The downside of using |
| * a threshold of 1 is that very close characters from adjacent |
| * lines can be joined. But with a level of 2 or greater, |
| * it is necessary to use a seedfill, followed by a pixOr(): |
| * pixt4 = pixSeedfillBinary(NULL, pixt3, pixs, 8); |
| * pixOr(pixt3, pixt3, pixt4); |
| * to insure that the mask coverage is complete over pixs. */ |
| boxa = pixConnComp(pixt3, &pixat, 4); |
| pixa = pixaClipToPix(pixat, pixs); |
| pixaDestroy(&pixat); |
| pixDestroy(&pixt1); |
| pixDestroy(&pixt2); |
| pixDestroy(&pixt3); |
| } |
| |
| /* Remove large components, and save the results. */ |
| *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH, |
| L_SELECT_IF_LTE, NULL); |
| *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH, |
| L_SELECT_IF_LTE, NULL); |
| pixaDestroy(&pixa); |
| boxaDestroy(&boxa); |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * pixWordMaskByDilation() |
| * |
| * Input: pixs (1 bpp; typ. at 75 to 150 ppi) |
| * maxsize (use 0 for default; not to exceed 14) |
| * &size (<optional return> size of optimal horiz Sel) |
| * Return: pixd (dilated word mask), or null on error |
| * |
| * Notes: |
| * (1) For 75 to 150 ppi, the optimal dilation should not exceed 7. |
| * This is the default size chosen if maxsize <= 0. |
| * (2) To run this on images at resolution between 200 and 300, it |
| * is advisable to use a larger maxsize, say between 10 and 14. |
| * (3) The best size for dilating to get word masks is optionally returned. |
| */ |
| PIX * |
| pixWordMaskByDilation(PIX *pixs, |
| l_int32 maxsize, |
| l_int32 *psize) |
| { |
| l_int32 i, diffmin, ndiff, imin; |
| l_int32 ncc[MAX_ALLOWED_DILATION + 1]; |
| BOXA *boxa; |
| NUMA *nacc; |
| PIX *pixt1, *pixt2, *pixt3; |
| PIXA *pixa; |
| SEL *sel; |
| |
| PROCNAME("pixWordMaskbyDilation"); |
| |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| |
| /* Find the optimal dilation to create the word mask. |
| * Look for successively increasing dilations where the |
| * number of connected components doesn't decrease. |
| * This is the situation where the components in the |
| * word mask should properly cover each word. Use of |
| * 4 cc slightly reduces the likelihood that words from |
| * different lines are joined. */ |
| diffmin = 1000000; |
| pixa = pixaCreate(8); |
| pixt1 = pixCopy(NULL, pixs); |
| pixaAddPix(pixa, pixt1, L_COPY); |
| |
| if (maxsize <= 0) |
| maxsize = 7; /* default */ |
| if (maxsize > MAX_ALLOWED_DILATION) |
| maxsize = MAX_ALLOWED_DILATION; |
| nacc = numaCreate(maxsize); |
| for (i = 0; i <= maxsize; i++) { |
| if (i == 0) /* first one not dilated */ |
| pixt2 = pixCopy(NULL, pixt1); |
| else /* successive dilation by sel_2h */ |
| pixt2 = pixMorphSequence(pixt1, "d2.1", 0); |
| boxa = pixConnCompBB(pixt2, 4); |
| ncc[i] = boxaGetCount(boxa); |
| numaAddNumber(nacc, ncc[i]); |
| if (i > 0) { |
| ndiff = ncc[i - 1] - ncc[i]; |
| #if DEBUG_PLOT_CC |
| fprintf(stderr, "ndiff[%d] = %d\n", i - 1, ndiff); |
| #endif /* DEBUG_PLOT_CC */ |
| if (ndiff < diffmin) { |
| imin = i; |
| diffmin = ndiff; |
| } |
| } |
| pixaAddPix(pixa, pixt2, L_COPY); |
| pixDestroy(&pixt1); |
| pixt1 = pixt2; |
| boxaDestroy(&boxa); |
| } |
| pixDestroy(&pixt1); |
| |
| #if DEBUG_PLOT_CC |
| {GPLOT *gplot; |
| NUMA *naseq; |
| naseq = numaMakeSequence(1, 1, numaGetCount(nacc)); |
| gplot = gplotCreate("junk_cc_output", GPLOT_PNG, |
| "Number of cc vs. horizontal dilation", |
| "Sel horiz", "Number of cc"); |
| gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, ""); |
| gplotMakeOutput(gplot); |
| gplotDestroy(&gplot); |
| numaDestroy(&naseq); |
| } |
| #endif /* DEBUG_PLOT_CC */ |
| |
| /* Save the result of the optimal dilation */ |
| pixt2 = pixaGetPix(pixa, imin, L_CLONE); |
| sel = selCreateBrick(1, imin, 0, imin - 1, SEL_HIT); |
| pixt3 = pixErode(NULL, pixt2, sel); /* remove effect of dilation */ |
| selDestroy(&sel); |
| pixDestroy(&pixt2); |
| pixaDestroy(&pixa); |
| if (psize) |
| *psize = imin + 1; |
| |
| numaDestroy(&nacc); |
| return pixt3; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Build grayscale composites (templates) * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbAccumulateComposites() |
| * |
| * Input: pixaa (one pixa for each class) |
| * &pna (<return> number of samples used to build each composite) |
| * &ptat (<return> centroids of bordered composites) |
| * Return: pixad (accumulated sum of samples in each class), |
| * or null on error |
| * |
| */ |
| PIXA * |
| jbAccumulateComposites(PIXAA *pixaa, |
| NUMA **pna, |
| PTA **pptat) |
| { |
| l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff; |
| l_float32 x, y, xave, yave; |
| NUMA *na; |
| PIX *pix, *pixt1, *pixt2, *pixsum; |
| PIXA *pixa, *pixad; |
| PTA *ptat, *pta; |
| |
| PROCNAME("jbAccumulateComposites"); |
| |
| if (!pptat) |
| return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL); |
| *pptat = NULL; |
| if (!pna) |
| return (PIXA *)ERROR_PTR("&na not defined", procName, NULL); |
| *pna = NULL; |
| if (!pixaa) |
| return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL); |
| |
| n = pixaaGetCount(pixaa); |
| if ((ptat = ptaCreate(n)) == NULL) |
| return (PIXA *)ERROR_PTR("ptat not made", procName, NULL); |
| *pptat = ptat; |
| pixad = pixaCreate(n); |
| na = numaCreate(n); |
| *pna = na; |
| |
| for (i = 0; i < n; i++) { |
| pixa = pixaaGetPixa(pixaa, i, L_CLONE); |
| nt = pixaGetCount(pixa); |
| numaAddNumber(na, nt); |
| if (nt == 0) { |
| L_WARNING("empty pixa found!", procName); |
| pixaDestroy(&pixa); |
| continue; |
| } |
| pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh); |
| pix = pixaGetPix(pixa, 0, L_CLONE); |
| d = pixGetDepth(pix); |
| pixDestroy(&pix); |
| pixt1 = pixCreate(maxw, maxh, d); |
| pixsum = pixInitAccumulate(maxw, maxh, 0); |
| pta = pixaCentroids(pixa); |
| |
| /* Find the average value of the centroids ... */ |
| xave = yave = 0; |
| for (j = 0; j < nt; j++) { |
| ptaGetPt(pta, j, &x, &y); |
| xave += x; |
| yave += y; |
| } |
| xave = xave / (l_float32)nt; |
| yave = yave / (l_float32)nt; |
| |
| /* and place all centroids at their average value */ |
| for (j = 0; j < nt; j++) { |
| pixt2 = pixaGetPix(pixa, j, L_CLONE); |
| ptaGetPt(pta, j, &x, &y); |
| xdiff = (l_int32)(x - xave); |
| ydiff = (l_int32)(y - yave); |
| pixClearAll(pixt1); |
| pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC, |
| pixt2, 0, 0); |
| pixAccumulate(pixsum, pixt1, L_ARITH_ADD); |
| pixDestroy(&pixt2); |
| } |
| pixaAddPix(pixad, pixsum, L_INSERT); |
| ptaAddPt(ptat, xave, yave); |
| |
| pixaDestroy(&pixa); |
| pixDestroy(&pixt1); |
| ptaDestroy(&pta); |
| } |
| |
| return pixad; |
| } |
| |
| |
| /*! |
| * jbTemplatesFromComposites() |
| * |
| * Input: pixac (one pix of composites for each class) |
| * na (number of samples used for each class composite) |
| * Return: pixad (8 bpp templates for each class), or null on error |
| * |
| */ |
| PIXA * |
| jbTemplatesFromComposites(PIXA *pixac, |
| NUMA *na) |
| { |
| l_int32 n, i; |
| l_float32 nt; /* number of samples in the composite; always an integer */ |
| l_float32 factor; |
| PIX *pixsum; /* accumulated composite */ |
| PIX *pixd; |
| PIXA *pixad; |
| |
| PROCNAME("jbTemplatesFromComposites"); |
| |
| if (!pixac) |
| return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL); |
| if (!na) |
| return (PIXA *)ERROR_PTR("na not defined", procName, NULL); |
| |
| n = pixaGetCount(pixac); |
| pixad = pixaCreate(n); |
| for (i = 0; i < n; i++) { |
| pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */ |
| numaGetFValue(na, i, &nt); |
| factor = 255. / nt; |
| pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */ |
| pixd = pixFinalAccumulate(pixsum, 0, 8); |
| pixaAddPix(pixad, pixd, L_INSERT); |
| pixDestroy(&pixsum); |
| } |
| |
| return pixad; |
| } |
| |
| |
| |
| /*----------------------------------------------------------------------* |
| * jbig2 utility routines * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * jbClasserCreate() |
| * |
| * Input: method (JB_RANKHAUS, JB_CORRELATION) |
| * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) |
| * Return: jbclasser, or null on error |
| */ |
| JBCLASSER * |
| jbClasserCreate(l_int32 method, |
| l_int32 components) |
| { |
| JBCLASSER *classer; |
| |
| PROCNAME("jbClasserCreate"); |
| |
| if ((classer = (JBCLASSER *)CALLOC(1, sizeof(JBCLASSER))) == NULL) |
| return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); |
| if (method != JB_RANKHAUS && method != JB_CORRELATION) |
| return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL); |
| if (components != JB_CONN_COMPS && components != JB_CHARACTERS && |
| components != JB_WORDS) |
| return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL); |
| |
| classer->method = method; |
| classer->components = components; |
| classer->nacomps = numaCreate(0); |
| classer->pixaa = pixaaCreate(0); |
| classer->pixat = pixaCreate(0); |
| classer->pixatd = pixaCreate(0); |
| classer->nafgt = numaCreate(0); |
| classer->naarea = numaCreate(0); |
| classer->ptac = ptaCreate(0); |
| classer->ptact = ptaCreate(0); |
| classer->naclass = numaCreate(0); |
| classer->napage = numaCreate(0); |
| classer->ptaul = ptaCreate(0); |
| |
| return classer; |
| } |
| |
| |
| /* |
| * jbClasserDestroy() |
| * |
| * Input: &classer (<to be nulled>) |
| * Return: void |
| */ |
| void |
| jbClasserDestroy(JBCLASSER **pclasser) |
| { |
| JBCLASSER *classer; |
| |
| if (!pclasser) |
| return; |
| if ((classer = *pclasser) == NULL) |
| return; |
| |
| sarrayDestroy(&classer->safiles); |
| numaDestroy(&classer->nacomps); |
| pixaaDestroy(&classer->pixaa); |
| pixaDestroy(&classer->pixat); |
| pixaDestroy(&classer->pixatd); |
| numaHashDestroy(&classer->nahash); |
| numaDestroy(&classer->nafgt); |
| numaDestroy(&classer->naarea); |
| ptaDestroy(&classer->ptac); |
| ptaDestroy(&classer->ptact); |
| numaDestroy(&classer->naclass); |
| numaDestroy(&classer->napage); |
| ptaDestroy(&classer->ptaul); |
| ptaDestroy(&classer->ptall); |
| FREE(classer); |
| *pclasser = NULL; |
| return; |
| } |
| |
| |
| /*! |
| * jbDataSave() |
| * |
| * Input: jbclasser |
| * latticew, latticeh (cell size used to store each |
| * connected component in the composite) |
| * Return: jbdata, or null on error |
| * |
| * Notes: |
| * (1) This routine stores the jbig2-type data required for |
| * generating a lossy jbig2 version of the image. |
| * It can be losslessly written to (and read from) two files. |
| * (2) It generates and stores the mosaic of templates. |
| * (3) It clones the Numa and Pta arrays, so these must all |
| * be destroyed by the caller. |
| * (4) Input 0 to use the default values for latticew and/or latticeh, |
| */ |
| JBDATA * |
| jbDataSave(JBCLASSER *classer) |
| { |
| l_int32 maxw, maxh; |
| JBDATA *data; |
| PIX *pix; |
| |
| PROCNAME("jbDataSave"); |
| |
| if (!classer) |
| return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL); |
| |
| /* Write the templates into an array. */ |
| pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh); |
| if ((pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1)) |
| == NULL) |
| return (JBDATA *)ERROR_PTR("data not made", procName, NULL); |
| |
| if ((data = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL) |
| return (JBDATA *)ERROR_PTR("data not made", procName, NULL); |
| data->pix = pix; |
| data->npages = classer->npages; |
| data->w = classer->w; |
| data->h = classer->h; |
| data->nclass = classer->nclass; |
| data->latticew = maxw + 1; |
| data->latticeh = maxh + 1; |
| data->naclass = numaClone(classer->naclass); |
| data->napage = numaClone(classer->napage); |
| data->ptaul = ptaClone(classer->ptaul); |
| |
| return data; |
| } |
| |
| |
| /* |
| * jbDataDestroy() |
| * |
| * Input: &data (<to be nulled>) |
| * Return: void |
| */ |
| void |
| jbDataDestroy(JBDATA **pdata) |
| { |
| JBDATA *data; |
| |
| if (!pdata) |
| return; |
| if ((data = *pdata) == NULL) |
| return; |
| |
| pixDestroy(&data->pix); |
| numaDestroy(&data->naclass); |
| numaDestroy(&data->napage); |
| ptaDestroy(&data->ptaul); |
| FREE(data); |
| *pdata = NULL; |
| return; |
| } |
| |
| |
| /*! |
| * jbDataWrite() |
| * |
| * Input: rootname (for output files; everything but the extension) |
| * jbdata |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) Serialization function that writes data in jbdata to file. |
| */ |
| l_int32 |
| jbDataWrite(const char *rootout, |
| JBDATA *jbdata) |
| { |
| char buf[L_BUF_SIZE]; |
| l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage; |
| NUMA *naclass, *napage; |
| PTA *ptaul; |
| PIX *pixt; |
| FILE *fp; |
| |
| PROCNAME("jbDataWrite"); |
| |
| if (!rootout) |
| return ERROR_INT("no rootout", procName, 1); |
| if (!jbdata) |
| return ERROR_INT("no jbdata", procName, 1); |
| |
| npages = jbdata->npages; |
| w = jbdata->w; |
| h = jbdata->h; |
| pixt = jbdata->pix; |
| nclass = jbdata->nclass; |
| cellw = jbdata->latticew; |
| cellh = jbdata->latticeh; |
| naclass = jbdata->naclass; |
| napage = jbdata->napage; |
| ptaul = jbdata->ptaul; |
| |
| snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT); |
| pixWrite(buf, pixt, IFF_PNG); |
| |
| snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT); |
| if ((fp = fopen(buf, "w")) == NULL) |
| return ERROR_INT("stream not opened", procName, 1); |
| ncomp = ptaGetCount(ptaul); |
| fprintf(fp, "jb data file\n"); |
| fprintf(fp, "num pages = %d\n", npages); |
| fprintf(fp, "page size: w = %d, h = %d\n", w, h); |
| fprintf(fp, "num components = %d\n", ncomp); |
| fprintf(fp, "num classes = %d\n", nclass); |
| fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh); |
| for (i = 0; i < ncomp; i++) { |
| numaGetIValue(napage, i, &ipage); |
| numaGetIValue(naclass, i, &iclass); |
| ptaGetIPt(ptaul, i, &x, &y); |
| fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y); |
| } |
| fclose(fp); |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * jbDataRead() |
| * |
| * Input: rootname (for template and data files) |
| * Return: jbdata, or NULL on error |
| */ |
| JBDATA * |
| jbDataRead(const char *rootname) |
| { |
| char fname[L_BUF_SIZE]; |
| char *linestr; |
| l_uint8 *data; |
| l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage; |
| l_int32 nbytes, npages, nclass, ncomp; |
| JBDATA *jbdata; |
| NUMA *naclass, *napage; |
| PIX *pixs; |
| PTA *ptaul; |
| SARRAY *sa; |
| |
| PROCNAME("jbDataRead"); |
| |
| if (!rootname) |
| return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL); |
| |
| snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT); |
| if ((pixs = pixRead(fname)) == NULL) |
| return (JBDATA *)ERROR_PTR("pix not read", procName, NULL); |
| |
| snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT); |
| if ((data = arrayRead(fname, &nbytes)) == NULL) |
| return (JBDATA *)ERROR_PTR("data not read", procName, NULL); |
| |
| if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) |
| return (JBDATA *)ERROR_PTR("sa not made", procName, NULL); |
| nsa = sarrayGetCount(sa); /* number of cc + 6 */ |
| linestr = sarrayGetString(sa, 0, 0); |
| if (strcmp(linestr, "jb data file")) |
| return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL); |
| linestr = sarrayGetString(sa, 1, 0); |
| sscanf(linestr, "num pages = %d", &npages); |
| linestr = sarrayGetString(sa, 2, 0); |
| sscanf(linestr, "page size: w = %d, h = %d", &w, &h); |
| linestr = sarrayGetString(sa, 3, 0); |
| sscanf(linestr, "num components = %d", &ncomp); |
| linestr = sarrayGetString(sa, 4, 0); |
| sscanf(linestr, "num classes = %d\n", &nclass); |
| linestr = sarrayGetString(sa, 5, 0); |
| sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh); |
| |
| #if 1 |
| fprintf(stderr, "num pages = %d\n", npages); |
| fprintf(stderr, "page size: w = %d, h = %d\n", w, h); |
| fprintf(stderr, "num components = %d\n", ncomp); |
| fprintf(stderr, "num classes = %d\n", nclass); |
| fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh); |
| #endif |
| |
| if ((naclass = numaCreate(ncomp)) == NULL) |
| return (JBDATA *)ERROR_PTR("naclass not made", procName, NULL); |
| if ((napage = numaCreate(ncomp)) == NULL) |
| return (JBDATA *)ERROR_PTR("napage not made", procName, NULL); |
| if ((ptaul = ptaCreate(ncomp)) == NULL) |
| return (JBDATA *)ERROR_PTR("pta not made", procName, NULL); |
| for (i = 6; i < nsa; i++) { |
| linestr = sarrayGetString(sa, i, 0); |
| sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y); |
| numaAddNumber(napage, ipage); |
| numaAddNumber(naclass, iclass); |
| ptaAddPt(ptaul, x, y); |
| } |
| |
| if ((jbdata = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL) |
| return (JBDATA *)ERROR_PTR("data not made", procName, NULL); |
| jbdata->pix = pixs; |
| jbdata->npages = npages; |
| jbdata->w = w; |
| jbdata->h = h; |
| jbdata->nclass = nclass; |
| jbdata->latticew = cellw; |
| jbdata->latticeh = cellh; |
| jbdata->naclass = naclass; |
| jbdata->napage = napage; |
| jbdata->ptaul = ptaul; |
| |
| FREE(data); |
| sarrayDestroy(&sa); |
| return jbdata; |
| } |
| |
| |
| /*! |
| * jbDataRender() |
| * |
| * Input: jbdata |
| * debugflag (if TRUE, writes into 2 bpp pix and adds |
| * component outlines in color) |
| * Return: pixa (reconstruction of original images, using templates) or |
| * null on error |
| */ |
| PIXA * |
| jbDataRender(JBDATA *data, |
| l_int32 debugflag) |
| { |
| l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage; |
| l_int32 npages, nclass, ncomp, wp, hp; |
| BOX *box; |
| NUMA *naclass, *napage; |
| PIX *pixt, *pixt2, *pix, *pixd; |
| PIXA *pixat; /* pixa of templates */ |
| PIXA *pixad; /* pixa of output images */ |
| PIXCMAP *cmap; |
| PTA *ptaul; |
| |
| PROCNAME("jbDataRender"); |
| |
| if (!data) |
| return (PIXA *)ERROR_PTR("data not defined", procName, NULL); |
| |
| npages = data->npages; |
| w = data->w; |
| h = data->h; |
| pixt = data->pix; |
| nclass = data->nclass; |
| cellw = data->latticew; |
| cellh = data->latticeh; |
| naclass = data->naclass; |
| napage = data->napage; |
| ptaul = data->ptaul; |
| ncomp = numaGetCount(naclass); |
| |
| /* Reconstruct the original set of images from the templates |
| * and the data associated with each component. First, |
| * generate the output pixa as a set of empty pix. */ |
| if ((pixad = pixaCreate(npages)) == NULL) |
| return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); |
| for (i = 0; i < npages; i++) { |
| if (debugflag == FALSE) |
| pix = pixCreate(w, h, 1); |
| else { |
| pix = pixCreate(w, h, 2); |
| cmap = pixcmapCreate(2); |
| pixcmapAddColor(cmap, 255, 255, 255); |
| pixcmapAddColor(cmap, 0, 0, 0); |
| pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */ |
| pixSetColormap(pix, cmap); |
| } |
| pixaAddPix(pixad, pix, L_INSERT); |
| } |
| |
| /* Put the class templates into a pixa. */ |
| if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) |
| return (PIXA *)ERROR_PTR("pixat not made", procName, NULL); |
| |
| /* Place each component in the right location on its page. */ |
| for (i = 0; i < ncomp; i++) { |
| numaGetIValue(napage, i, &ipage); |
| numaGetIValue(naclass, i, &iclass); |
| pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */ |
| wp = pixGetWidth(pix); |
| hp = pixGetHeight(pix); |
| ptaGetIPt(ptaul, i, &x, &y); |
| pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */ |
| if (debugflag == FALSE) |
| pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0); |
| else { |
| pixt2 = pixConvert1To2Cmap(pix); |
| pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0); |
| box = boxCreate(x, y, wp, hp); |
| pixRenderBoxArb(pixd, box, 1, 255, 0, 0); |
| pixDestroy(&pixt2); |
| boxDestroy(&box); |
| } |
| pixDestroy(&pix); /* the clone only */ |
| pixDestroy(&pixd); /* the clone only */ |
| } |
| |
| pixaDestroy(&pixat); |
| return pixad; |
| } |
| |
| |
| /*! |
| * jbGetULCorners() |
| * |
| * Input: jbclasser |
| * pixs (full res image) |
| * boxa (of c.c. bounding rectangles for this page) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) This computes the ptaul field, which has the global UL corners, |
| * adjusted for each specific component, so that each component |
| * can be replaced by the template for its class and have the |
| * centroid in the template in the same position as the |
| * centroid of the original connected component. It is important |
| * that this be done properly to avoid a wavy baseline in the |
| * result. |
| * (2) The array fields ptac and ptact give the centroids of |
| * those components relative to the UL corner of each component. |
| * Here, we compute the difference in each component, round to |
| * nearest integer, and correct the box->x and box->y by |
| * the appropriate integral difference. |
| * (3) The templates and stored instances are all bordered. |
| */ |
| l_int32 |
| jbGetULCorners(JBCLASSER *classer, |
| PIX *pixs, |
| BOXA *boxa) |
| { |
| l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy; |
| l_int32 *sumtab; |
| l_float32 x1, x2, y1, y2, delx, dely; |
| BOX *box; |
| NUMA *naclass; |
| PIX *pixt; |
| PTA *ptac, *ptact, *ptaul; |
| |
| PROCNAME("jbGetULCorners"); |
| |
| if (!classer) |
| return ERROR_INT("classer not defined", procName, 1); |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (!boxa) |
| return ERROR_INT("boxa not defined", procName, 1); |
| |
| n = boxaGetCount(boxa); |
| ptaul = classer->ptaul; |
| naclass = classer->naclass; |
| ptac = classer->ptac; |
| ptact = classer->ptact; |
| baseindex = classer->baseindex; /* num components before this page */ |
| sumtab = makePixelSumTab8(); |
| for (i = 0; i < n; i++) { |
| index = baseindex + i; |
| ptaGetPt(ptac, index, &x1, &y1); |
| numaGetIValue(naclass, index, &iclass); |
| ptaGetPt(ptact, iclass, &x2, &y2); |
| delx = x2 - x1; |
| dely = y2 - y1; |
| if (delx >= 0) |
| idelx = (l_int32)(delx + 0.5); |
| else |
| idelx = (l_int32)(delx - 0.5); |
| if (dely >= 0) |
| idely = (l_int32)(dely + 0.5); |
| else |
| idely = (l_int32)(dely - 0.5); |
| if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) |
| return ERROR_INT("box not found", procName, 1); |
| boxGetGeometry(box, &x, &y, NULL, NULL); |
| |
| /* Get final increments dx and dy for best alignment */ |
| pixt = pixaGetPix(classer->pixat, iclass, L_CLONE); |
| finalPositioningForAlignment(pixs, x, y, idelx, idely, |
| pixt, sumtab, &dx, &dy); |
| /* if (i % 20 == 0) |
| fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */ |
| ptaAddPt(ptaul, x - idelx + dx, y - idely + dy); |
| boxDestroy(&box); |
| pixDestroy(&pixt); |
| } |
| |
| FREE(sumtab); |
| return 0; |
| } |
| |
| |
| /*! |
| * jbGetLLCorners() |
| * |
| * Input: jbclasser |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) This computes the ptall field, which has the global LL corners, |
| * adjusted for each specific component, so that each component |
| * can be replaced by the template for its class and have the |
| * centroid in the template in the same position as the |
| * centroid of the original connected component. It is important |
| * that this be done properly to avoid a wavy baseline in the result. |
| * (2) It is computed here from the corresponding UL corners, where |
| * the input templates and stored instances are all bordered. |
| * This should be done after all pages have been processed. |
| * (3) For proper substitution, the templates whose LL corners are |
| * placed in these locations must be UN-bordered. |
| * This is available for a realistic jbig2 encoder, which would |
| * (1) encode each template without a border, and (2) encode |
| * the position using the LL corner (rather than the UL |
| * corner) because the difference between y-values |
| * of successive instances is typically close to zero. |
| */ |
| l_int32 |
| jbGetLLCorners(JBCLASSER *classer) |
| { |
| l_int32 i, iclass, n, x1, y1, h; |
| NUMA *naclass; |
| PIX *pix; |
| PIXA *pixat; |
| PTA *ptaul, *ptall; |
| |
| PROCNAME("jbGetLLCorners"); |
| |
| if (!classer) |
| return ERROR_INT("classer not defined", procName, 1); |
| |
| ptaul = classer->ptaul; |
| naclass = classer->naclass; |
| pixat = classer->pixat; |
| |
| ptaDestroy(&classer->ptall); |
| n = ptaGetCount(ptaul); |
| ptall = ptaCreate(n); |
| classer->ptall = ptall; |
| |
| /* If the templates were bordered, we would add h - 1 to the UL |
| * corner y-value. However, because the templates to be used |
| * here have their borders removed, and the borders are |
| * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS |
| * to the UL corner y-value. */ |
| for (i = 0; i < n; i++) { |
| ptaGetIPt(ptaul, i, &x1, &y1); |
| numaGetIValue(naclass, i, &iclass); |
| pix = pixaGetPix(pixat, iclass, L_CLONE); |
| h = pixGetHeight(pix); |
| ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS); |
| pixDestroy(&pix); |
| } |
| |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Static helpers * |
| *----------------------------------------------------------------------*/ |
| /* When looking for similar matches we check templates whose size is +/- 2 in |
| * each direction. This involves 25 possible sizes. This array contains the |
| * offsets for each of those positions in a spiral pattern. There are 25 pairs |
| * of numbers in this array: even positions are x values. */ |
| static int two_by_two_walk[50] = { |
| 0, 0, |
| 0, 1, |
| -1, 0, |
| 0, -1, |
| 1, 0, |
| -1, 1, |
| 1, 1, |
| -1, -1, |
| 1, -1, |
| 0, -2, |
| 2, 0, |
| 0, 2, |
| -2, 0, |
| -1, -2, |
| 1, -2, |
| 2, -1, |
| 2, 1, |
| 1, 2, |
| -1, 2, |
| -2, 1, |
| -2, -1, |
| -2, -2, |
| 2, -2, |
| 2, 2, |
| -2, 2}; |
| |
| |
| /*! |
| * findSimilarSizedTemplatesInit() |
| * |
| * Input: classer |
| * pixs (instance to be matched) |
| * Return: Allocated context to be used with findSimilar* |
| */ |
| static JBFINDCTX * |
| findSimilarSizedTemplatesInit(JBCLASSER *classer, |
| PIX *pixs) |
| { |
| JBFINDCTX *state; |
| |
| state = (JBFINDCTX *)CALLOC(1, sizeof(JBFINDCTX)); |
| state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS; |
| state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS; |
| state->classer = classer; |
| |
| return state; |
| } |
| |
| |
| static void |
| findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate) |
| { |
| JBFINDCTX *state; |
| |
| PROCNAME("findSimilarSizedTemplatesDestroy"); |
| |
| if (pstate == NULL) { |
| L_WARNING("ptr address is null", procName); |
| return; |
| } |
| if ((state = *pstate) == NULL) |
| return; |
| |
| numaDestroy(&state->numa); |
| FREE(state); |
| *pstate = NULL; |
| return; |
| } |
| |
| |
| /*! |
| * findSimilarSizedTemplatesNext() |
| * |
| * Input: state (from findSimilarSizedTemplatesInit) |
| * Return: Next template number, or -1 when finished |
| * |
| * We have a hash table mapping template area to a list of template |
| * numbers with that area. We wish to find similar sized templates, |
| * so we first look for templates with the same width and height, and |
| * then with width + 1, etc. This walk is guided by the |
| * two_by_two_walk array, above. |
| * |
| * We don't want to have to collect the whole list of templates first because |
| * (we hope) to find it quickly. So we keep the context for this walk in an |
| * explictit state structure and this function acts like a generator. |
| */ |
| static l_int32 |
| findSimilarSizedTemplatesNext(JBFINDCTX *state) |
| { |
| l_int32 desiredh, desiredw, size, templ; |
| PIX *pixt; |
| |
| while(1) { /* Continue the walk over step 'i' */ |
| if (state->i >= 25) { /* all done */ |
| return -1; |
| } |
| |
| desiredw = state->w + two_by_two_walk[2 * state->i]; |
| desiredh = state->h + two_by_two_walk[2 * state->i + 1]; |
| if (desiredh < 1 || desiredw < 1) { /* invalid size */ |
| state->i++; |
| continue; |
| } |
| |
| if (!state->numa) { |
| /* We have yet to start walking the array for the step 'i' */ |
| state->numa = numaHashGetNuma(state->classer->nahash, |
| desiredh * desiredw); |
| if (!state->numa) { /* nothing there */ |
| state->i++; |
| continue; |
| } |
| |
| state->n = 0; /* OK, we got a numa. */ |
| } |
| |
| /* Continue working on this numa */ |
| size = numaGetCount(state->numa); |
| for ( ; state->n < size; ) { |
| templ = (l_int32)(state->numa->array[state->n++] + 0.5); |
| pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE); |
| if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw && |
| pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) { |
| pixDestroy(&pixt); |
| return templ; |
| } |
| pixDestroy(&pixt); |
| } |
| |
| /* Exhausted the numa; take another step and try again */ |
| state->i++; |
| numaDestroy(&state->numa); |
| continue; |
| } |
| } |
| |
| |
| /*! |
| * finalPositioningForAlignment() |
| * |
| * Input: pixs (input page image) |
| * x, y (location of UL corner of bb of component in pixs) |
| * idelx, idely (compensation to match centroids of component |
| * and template) |
| * pixt (template, with JB_ADDED_PIXELS of padding on all sides) |
| * sumtab (for summing fg pixels in an image) |
| * &dx, &dy (return delta on position for best match; each |
| * one is in the set {-1, 0, 1}) |
| * Return: 0 if OK, 1 on error |
| * |
| */ |
| static l_int32 |
| finalPositioningForAlignment(PIX *pixs, |
| l_int32 x, |
| l_int32 y, |
| l_int32 idelx, |
| l_int32 idely, |
| PIX *pixt, |
| l_int32 *sumtab, |
| l_int32 *pdx, |
| l_int32 *pdy) |
| { |
| l_int32 w, h, i, j, minx, miny, count, mincount; |
| PIX *pixi; /* clipped from source pixs */ |
| PIX *pixr; /* temporary storage */ |
| BOX *box; |
| |
| PROCNAME("finalPositioningForAlignment"); |
| |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (!pixt) |
| return ERROR_INT("pixt not defined", procName, 1); |
| if (!pdx || !pdy) |
| return ERROR_INT("&dx and &dy not both defined", procName, 1); |
| if (!sumtab) |
| return ERROR_INT("sumtab not defined", procName, 1); |
| *pdx = *pdy = 0; |
| |
| /* Use JB_ADDED_PIXELS pixels padding on each side */ |
| w = pixGetWidth(pixt); |
| h = pixGetHeight(pixt); |
| box = boxCreate(x - idelx - JB_ADDED_PIXELS, |
| y - idely - JB_ADDED_PIXELS, w, h); |
| pixi = pixClipRectangle(pixs, box, NULL); |
| boxDestroy(&box); |
| if (!pixi) |
| return ERROR_INT("pixi not made", procName, 1); |
| |
| pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1); |
| mincount = 0x7fffffff; |
| for (i = -1; i <= 1; i++) { |
| for (j = -1; j <= 1; j++) { |
| pixCopy(pixr, pixi); |
| pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0); |
| pixCountPixels(pixr, &count, sumtab); |
| if (count < mincount) { |
| minx = j; |
| miny = i; |
| mincount = count; |
| } |
| } |
| } |
| pixDestroy(&pixi); |
| pixDestroy(&pixr); |
| |
| *pdx = minx; |
| *pdy = miny; |
| return 0; |
| } |