| /*====================================================================* |
| - Copyright (C) 2001 Leptonica. All rights reserved. |
| - This software is distributed in the hope that it will be |
| - useful, but with NO WARRANTY OF ANY KIND. |
| - No author or distributor accepts responsibility to anyone for the |
| - consequences of using this software, or for whether it serves any |
| - particular purpose or works at all, unless he or she says so in |
| - writing. Everyone is granted permission to copy, modify and |
| - redistribute this source code, for commercial or non-commercial |
| - purposes, with the following restrictions: (1) the origin of this |
| - source code must not be misrepresented; (2) modified versions must |
| - be plainly marked as such; and (3) this notice may not be removed |
| - or altered from any source or modified source distribution. |
| *====================================================================*/ |
| |
| /* |
| * pageseg.c |
| * |
| * Top level page segmentation |
| * l_int32 pixGetRegionsBinary() |
| * |
| * Halftone region extraction |
| * PIX *pixGenHalftoneMask() |
| * |
| * Textline extraction |
| * PIX *pixGenTextlineMask() |
| * |
| * Textblock extraction |
| * PIX *pixGenTextblockMask() |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include "allheaders.h" |
| |
| |
| /*------------------------------------------------------------------* |
| * Top level page segmentation * |
| *------------------------------------------------------------------*/ |
| /*! |
| * pixGetRegionsBinary() |
| * |
| * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) |
| * &pixhm (<optional return> halftone mask) |
| * &pixtm (<optional return> textline mask) |
| * &pixtb (<optional return> textblock mask) |
| * debug (flag: set to 1 for debug output) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) It is best to deskew the image before segmenting. |
| * (2) The debug flag enables a number of outputs. These |
| * are included to show how to generate and save/display |
| * these results. |
| */ |
| l_int32 |
| pixGetRegionsBinary(PIX *pixs, |
| PIX **ppixhm, |
| PIX **ppixtm, |
| PIX **ppixtb, |
| l_int32 debug) |
| { |
| l_int32 htfound, tlfound; |
| PIX *pixr, *pixt1, *pixt2; |
| PIX *pixtext; /* text pixels only */ |
| PIX *pixhm2; /* halftone mask; 2x reduction */ |
| PIX *pixhm; /* halftone mask; */ |
| PIX *pixtm2; /* textline mask; 2x reduction */ |
| PIX *pixtm; /* textline mask */ |
| PIX *pixvws; /* vertical white space mask */ |
| PIX *pixtb2; /* textblock mask; 2x reduction */ |
| PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ |
| PIX *pixtb; /* textblock mask */ |
| |
| PROCNAME("pixGetRegionsBinary"); |
| |
| if (ppixhm) *ppixhm = NULL; |
| if (ppixtm) *ppixtm = NULL; |
| if (ppixtb) *ppixtb = NULL; |
| if (!pixs) |
| return ERROR_INT("pixs not defined", procName, 1); |
| if (pixGetDepth(pixs) != 1) |
| return ERROR_INT("pixs not 1 bpp", procName, 1); |
| |
| /* 2x reduce, to 150 -200 ppi */ |
| pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| pixDisplayWrite(pixr, debug); |
| |
| /* Get the halftone mask */ |
| pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); |
| |
| /* Get the textline mask from the text pixels */ |
| pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); |
| |
| /* Get the textblock mask from the textline mask */ |
| pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); |
| pixDestroy(&pixr); |
| pixDestroy(&pixtext); |
| pixDestroy(&pixvws); |
| |
| /* Remove small components from the mask, where a small |
| * component is defined as one with both width and height < 60 */ |
| pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, |
| L_SELECT_IF_GTE, NULL); |
| pixDestroy(&pixtb2); |
| pixDisplayWrite(pixtbf2, debug); |
| |
| /* Expand all masks to full resolution, and do filling or |
| * small dilations for better coverage. */ |
| pixhm = pixExpandReplicate(pixhm2, 2); |
| pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); |
| pixOr(pixhm, pixhm, pixt1); |
| pixDestroy(&pixt1); |
| pixDisplayWrite(pixhm, debug); |
| |
| pixt1 = pixExpandReplicate(pixtm2, 2); |
| pixtm = pixDilateBrick(NULL, pixt1, 3, 3); |
| pixDestroy(&pixt1); |
| pixDisplayWrite(pixtm, debug); |
| |
| pixt1 = pixExpandReplicate(pixtbf2, 2); |
| pixtb = pixDilateBrick(NULL, pixt1, 3, 3); |
| pixDestroy(&pixt1); |
| pixDisplayWrite(pixtb, debug); |
| |
| pixDestroy(&pixhm2); |
| pixDestroy(&pixtm2); |
| pixDestroy(&pixtbf2); |
| |
| /* Debug: identify objects that are neither text nor halftone image */ |
| if (debug) { |
| pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ |
| pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ |
| pixDisplayWrite(pixt2, 1); |
| pixDestroy(&pixt1); |
| pixDestroy(&pixt2); |
| } |
| |
| /* Debug: display textline components with random colors */ |
| if (debug) { |
| l_int32 w, h; |
| BOXA *boxa; |
| PIXA *pixa; |
| boxa = pixConnComp(pixtm, &pixa, 8); |
| pixGetDimensions(pixtm, &w, &h, NULL); |
| pixt1 = pixaDisplayRandomCmap(pixa, w, h); |
| pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); |
| pixDisplay(pixt1, 100, 100); |
| pixDisplayWrite(pixt1, 1); |
| pixaDestroy(&pixa); |
| boxaDestroy(&boxa); |
| pixDestroy(&pixt1); |
| } |
| |
| /* Debug: identify the outlines of each textblock */ |
| if (debug) { |
| PIXCMAP *cmap; |
| PTAA *ptaa; |
| ptaa = pixGetOuterBordersPtaa(pixtb); |
| ptaaWrite("/tmp/junk_tb_outlines.ptaa", ptaa, 1); |
| pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); |
| cmap = pixGetColormap(pixt1); |
| pixcmapResetColor(cmap, 0, 130, 130, 130); |
| pixDisplay(pixt1, 500, 100); |
| pixDisplayWrite(pixt1, 1); |
| pixDestroy(&pixt1); |
| ptaaDestroy(&ptaa); |
| } |
| |
| /* Debug: get b.b. for all mask components */ |
| if (debug) { |
| BOXA *bahm, *batm, *batb; |
| bahm = pixConnComp(pixhm, NULL, 4); |
| batm = pixConnComp(pixtm, NULL, 4); |
| batb = pixConnComp(pixtb, NULL, 4); |
| boxaWrite("junk_htmask.boxa", bahm); |
| boxaWrite("junk_textmask.boxa", batm); |
| boxaWrite("junk_textblock.boxa", batb); |
| boxaDestroy(&bahm); |
| boxaDestroy(&batm); |
| boxaDestroy(&batb); |
| } |
| |
| if (ppixhm) |
| *ppixhm = pixhm; |
| else |
| pixDestroy(&pixhm); |
| if (ppixtm) |
| *ppixtm = pixtm; |
| else |
| pixDestroy(&pixtm); |
| if (ppixtb) |
| *ppixtb = pixtb; |
| else |
| pixDestroy(&pixtb); |
| |
| return 0; |
| } |
| |
| |
| /*------------------------------------------------------------------* |
| * Halftone region extraction * |
| *------------------------------------------------------------------*/ |
| /*! |
| * pixGenHalftoneMask() |
| * |
| * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) |
| * &pixtext (<optional return> text part of pixs) |
| * &htfound (<optional return> 1 if the mask is not empty) |
| * debug (flag: 1 for debug output) |
| * Return: pixd (halftone mask), or null on error |
| */ |
| PIX * |
| pixGenHalftoneMask(PIX *pixs, |
| PIX **ppixtext, |
| l_int32 *phtfound, |
| l_int32 debug) |
| { |
| l_int32 empty; |
| PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd; |
| |
| PROCNAME("pixGenHalftoneMask"); |
| |
| if (ppixtext) *ppixtext = NULL; |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| if (pixGetDepth(pixs) != 1) |
| return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); |
| |
| /* Compute seed for halftone parts at 8x reduction */ |
| pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0); |
| pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); |
| pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */ |
| pixDestroy(&pixt1); |
| pixDestroy(&pixt2); |
| pixDisplayWrite(pixhs, debug); |
| |
| /* Compute mask for connected regions */ |
| pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); |
| pixDisplayWrite(pixhm, debug); |
| |
| /* Fill seed into mask to get halftone mask */ |
| pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); |
| |
| #if 0 |
| /* Moderate opening to remove thin lines, etc. */ |
| pixOpenBrick(pixd, pixd, 10, 10); |
| pixDisplayWrite(pixd, debug); |
| #endif |
| |
| /* Check if mask is empty */ |
| pixZero(pixd, &empty); |
| if (phtfound) { |
| *phtfound = 0; |
| if (!empty) |
| *phtfound = 1; |
| } |
| |
| /* Optionally, get all pixels that are not under the halftone mask */ |
| if (ppixtext) { |
| if (empty) |
| *ppixtext = pixCopy(NULL, pixs); |
| else |
| *ppixtext = pixSubtract(NULL, pixs, pixd); |
| pixDisplayWrite(*ppixtext, debug); |
| } |
| |
| pixDestroy(&pixhs); |
| pixDestroy(&pixhm); |
| return pixd; |
| } |
| |
| |
| /*------------------------------------------------------------------* |
| * Textline extraction * |
| *------------------------------------------------------------------*/ |
| /*! |
| * pixGenTextlineMask() |
| * |
| * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) |
| * &pixvws (<return> vertical whitespace mask) |
| * &tlfound (<optional return> 1 if the mask is not empty) |
| * debug (flag: 1 for debug output) |
| * Return: pixd (textline mask), or null on error |
| * |
| * Notes: |
| * (1) The input pixs should be deskewed. |
| * (2) pixs should have no halftone pixels. |
| * (3) Both the input image and the returned textline mask |
| * are at the same resolution. |
| */ |
| PIX * |
| pixGenTextlineMask(PIX *pixs, |
| PIX **ppixvws, |
| l_int32 *ptlfound, |
| l_int32 debug) |
| { |
| l_int32 empty; |
| PIX *pixt1, *pixt2, *pixvws, *pixd; |
| |
| PROCNAME("pixGenTextlineMask"); |
| |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| if (!ppixvws) |
| return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL); |
| if (pixGetDepth(pixs) != 1) |
| return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); |
| |
| /* First we need a vertical whitespace mask. Invert the image. */ |
| pixt1 = pixInvert(NULL, pixs); |
| |
| /* The whitespace mask will break textlines where there |
| * is a large amount of white space below or above. |
| * This can be prevented by identifying regions of the |
| * inverted image that have large horizontal extent (bigger than |
| * the separation between columns) and significant |
| * vertical extent (bigger than the separation between |
| * textlines), and subtracting this from the bg. */ |
| pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0); |
| pixSubtract(pixt1, pixt1, pixt2); |
| pixDisplayWrite(pixt1, debug); |
| pixDestroy(&pixt2); |
| |
| /* Identify vertical whitespace by opening the remaining bg. |
| * o5.1 removes thin vertical bg lines and o1.200 extracts |
| * long vertical bg lines. */ |
| pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0); |
| *ppixvws = pixvws; |
| pixDisplayWrite(pixvws, debug); |
| pixDestroy(&pixt1); |
| |
| /* Three steps to getting text line mask: |
| * (1) close the characters and words in the textlines |
| * (2) open the vertical whitespace corridors back up |
| * (3) small opening to remove noise */ |
| pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1); |
| pixDisplayWrite(pixt1, debug); |
| pixd = pixSubtract(NULL, pixt1, pixvws); |
| pixOpenBrick(pixd, pixd, 3, 3); |
| pixDisplayWrite(pixd, debug); |
| pixDestroy(&pixt1); |
| |
| /* Check if text line mask is empty */ |
| if (ptlfound) { |
| *ptlfound = 0; |
| pixZero(pixd, &empty); |
| if (!empty) |
| *ptlfound = 1; |
| } |
| |
| return pixd; |
| } |
| |
| |
| /*------------------------------------------------------------------* |
| * Textblock extraction * |
| *------------------------------------------------------------------*/ |
| /*! |
| * pixGenTextblockMask() |
| * |
| * Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi) |
| * pixvws (vertical white space mask) |
| * debug (flag: 1 for debug output) |
| * Return: pixd (textblock mask), or null on error |
| * |
| * Notes: |
| * (1) Both the input masks (textline and vertical white space) and |
| * the returned textblock mask are at the same resolution. |
| * (2) The result is somewhat noisy, in that small "blocks" of |
| * text may be included. These can be removed by post-processing, |
| * using, e.g., |
| * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, |
| * L_SELECT_IF_GTE, NULL); |
| */ |
| PIX * |
| pixGenTextblockMask(PIX *pixs, |
| PIX *pixvws, |
| l_int32 debug) |
| { |
| PIX *pixt1, *pixt2, *pixt3, *pixd; |
| |
| PROCNAME("pixGenTextblockMask"); |
| |
| if (!pixs) |
| return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); |
| if (!pixvws) |
| return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL); |
| if (pixGetDepth(pixs) != 1) |
| return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); |
| |
| /* Join pixels vertically to make a textblock mask */ |
| pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); |
| pixDisplayWrite(pixt1, debug); |
| |
| /* Solidify the textblock mask and remove noise: |
| * (1) For each cc, close the blocks and dilate slightly |
| * to form a solid mask. |
| * (2) Small horizontal closing between components. |
| * (3) Open the white space between columns, again. |
| * (4) Remove small components. */ |
| pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL); |
| pixCloseSafeBrick(pixt2, pixt2, 10, 1); |
| pixDisplayWrite(pixt2, debug); |
| pixt3 = pixSubtract(NULL, pixt2, pixvws); |
| pixDisplayWrite(pixt3, debug); |
| pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH, |
| L_SELECT_IF_GTE, NULL); |
| pixDisplayWrite(pixd, debug); |
| |
| pixDestroy(&pixt1); |
| pixDestroy(&pixt2); |
| pixDestroy(&pixt3); |
| return pixd; |
| } |
| |
| |