blob: f756092dc0580b859dacc424eef8b2361fb0ba4b [file] [log] [blame]
/*====================================================================*
- Copyright (C) 2001 Leptonica. All rights reserved.
- This software is distributed in the hope that it will be
- useful, but with NO WARRANTY OF ANY KIND.
- No author or distributor accepts responsibility to anyone for the
- consequences of using this software, or for whether it serves any
- particular purpose or works at all, unless he or she says so in
- writing. Everyone is granted permission to copy, modify and
- redistribute this source code, for commercial or non-commercial
- purposes, with the following restrictions: (1) the origin of this
- source code must not be misrepresented; (2) modified versions must
- be plainly marked as such; and (3) this notice may not be removed
- or altered from any source or modified source distribution.
*====================================================================*/
/*
* pageseg.c
*
* Top level page segmentation
* l_int32 pixGetRegionsBinary()
*
* Halftone region extraction
* PIX *pixGenHalftoneMask()
*
* Textline extraction
* PIX *pixGenTextlineMask()
*
* Textblock extraction
* PIX *pixGenTextblockMask()
*/
#include <stdio.h>
#include <stdlib.h>
#include "allheaders.h"
/*------------------------------------------------------------------*
* Top level page segmentation *
*------------------------------------------------------------------*/
/*!
* pixGetRegionsBinary()
*
* Input: pixs (1 bpp, assumed to be 300 to 400 ppi)
* &pixhm (<optional return> halftone mask)
* &pixtm (<optional return> textline mask)
* &pixtb (<optional return> textblock mask)
* debug (flag: set to 1 for debug output)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) It is best to deskew the image before segmenting.
* (2) The debug flag enables a number of outputs. These
* are included to show how to generate and save/display
* these results.
*/
l_int32
pixGetRegionsBinary(PIX *pixs,
PIX **ppixhm,
PIX **ppixtm,
PIX **ppixtb,
l_int32 debug)
{
l_int32 htfound, tlfound;
PIX *pixr, *pixt1, *pixt2;
PIX *pixtext; /* text pixels only */
PIX *pixhm2; /* halftone mask; 2x reduction */
PIX *pixhm; /* halftone mask; */
PIX *pixtm2; /* textline mask; 2x reduction */
PIX *pixtm; /* textline mask */
PIX *pixvws; /* vertical white space mask */
PIX *pixtb2; /* textblock mask; 2x reduction */
PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
PIX *pixtb; /* textblock mask */
PROCNAME("pixGetRegionsBinary");
if (ppixhm) *ppixhm = NULL;
if (ppixtm) *ppixtm = NULL;
if (ppixtb) *ppixtb = NULL;
if (!pixs)
return ERROR_INT("pixs not defined", procName, 1);
if (pixGetDepth(pixs) != 1)
return ERROR_INT("pixs not 1 bpp", procName, 1);
/* 2x reduce, to 150 -200 ppi */
pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
pixDisplayWrite(pixr, debug);
/* Get the halftone mask */
pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);
/* Get the textline mask from the text pixels */
pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);
/* Get the textblock mask from the textline mask */
pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
pixDestroy(&pixr);
pixDestroy(&pixtext);
pixDestroy(&pixvws);
/* Remove small components from the mask, where a small
* component is defined as one with both width and height < 60 */
pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
L_SELECT_IF_GTE, NULL);
pixDestroy(&pixtb2);
pixDisplayWrite(pixtbf2, debug);
/* Expand all masks to full resolution, and do filling or
* small dilations for better coverage. */
pixhm = pixExpandReplicate(pixhm2, 2);
pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
pixOr(pixhm, pixhm, pixt1);
pixDestroy(&pixt1);
pixDisplayWrite(pixhm, debug);
pixt1 = pixExpandReplicate(pixtm2, 2);
pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
pixDestroy(&pixt1);
pixDisplayWrite(pixtm, debug);
pixt1 = pixExpandReplicate(pixtbf2, 2);
pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
pixDestroy(&pixt1);
pixDisplayWrite(pixtb, debug);
pixDestroy(&pixhm2);
pixDestroy(&pixtm2);
pixDestroy(&pixtbf2);
/* Debug: identify objects that are neither text nor halftone image */
if (debug) {
pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */
pixDisplayWrite(pixt2, 1);
pixDestroy(&pixt1);
pixDestroy(&pixt2);
}
/* Debug: display textline components with random colors */
if (debug) {
l_int32 w, h;
BOXA *boxa;
PIXA *pixa;
boxa = pixConnComp(pixtm, &pixa, 8);
pixGetDimensions(pixtm, &w, &h, NULL);
pixt1 = pixaDisplayRandomCmap(pixa, w, h);
pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
pixDisplay(pixt1, 100, 100);
pixDisplayWrite(pixt1, 1);
pixaDestroy(&pixa);
boxaDestroy(&boxa);
pixDestroy(&pixt1);
}
/* Debug: identify the outlines of each textblock */
if (debug) {
PIXCMAP *cmap;
PTAA *ptaa;
ptaa = pixGetOuterBordersPtaa(pixtb);
ptaaWrite("/tmp/junk_tb_outlines.ptaa", ptaa, 1);
pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
cmap = pixGetColormap(pixt1);
pixcmapResetColor(cmap, 0, 130, 130, 130);
pixDisplay(pixt1, 500, 100);
pixDisplayWrite(pixt1, 1);
pixDestroy(&pixt1);
ptaaDestroy(&ptaa);
}
/* Debug: get b.b. for all mask components */
if (debug) {
BOXA *bahm, *batm, *batb;
bahm = pixConnComp(pixhm, NULL, 4);
batm = pixConnComp(pixtm, NULL, 4);
batb = pixConnComp(pixtb, NULL, 4);
boxaWrite("junk_htmask.boxa", bahm);
boxaWrite("junk_textmask.boxa", batm);
boxaWrite("junk_textblock.boxa", batb);
boxaDestroy(&bahm);
boxaDestroy(&batm);
boxaDestroy(&batb);
}
if (ppixhm)
*ppixhm = pixhm;
else
pixDestroy(&pixhm);
if (ppixtm)
*ppixtm = pixtm;
else
pixDestroy(&pixtm);
if (ppixtb)
*ppixtb = pixtb;
else
pixDestroy(&pixtb);
return 0;
}
/*------------------------------------------------------------------*
* Halftone region extraction *
*------------------------------------------------------------------*/
/*!
* pixGenHalftoneMask()
*
* Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
* &pixtext (<optional return> text part of pixs)
* &htfound (<optional return> 1 if the mask is not empty)
* debug (flag: 1 for debug output)
* Return: pixd (halftone mask), or null on error
*/
PIX *
pixGenHalftoneMask(PIX *pixs,
PIX **ppixtext,
l_int32 *phtfound,
l_int32 debug)
{
l_int32 empty;
PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd;
PROCNAME("pixGenHalftoneMask");
if (ppixtext) *ppixtext = NULL;
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* Compute seed for halftone parts at 8x reduction */
pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0);
pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */
pixDestroy(&pixt1);
pixDestroy(&pixt2);
pixDisplayWrite(pixhs, debug);
/* Compute mask for connected regions */
pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
pixDisplayWrite(pixhm, debug);
/* Fill seed into mask to get halftone mask */
pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
#if 0
/* Moderate opening to remove thin lines, etc. */
pixOpenBrick(pixd, pixd, 10, 10);
pixDisplayWrite(pixd, debug);
#endif
/* Check if mask is empty */
pixZero(pixd, &empty);
if (phtfound) {
*phtfound = 0;
if (!empty)
*phtfound = 1;
}
/* Optionally, get all pixels that are not under the halftone mask */
if (ppixtext) {
if (empty)
*ppixtext = pixCopy(NULL, pixs);
else
*ppixtext = pixSubtract(NULL, pixs, pixd);
pixDisplayWrite(*ppixtext, debug);
}
pixDestroy(&pixhs);
pixDestroy(&pixhm);
return pixd;
}
/*------------------------------------------------------------------*
* Textline extraction *
*------------------------------------------------------------------*/
/*!
* pixGenTextlineMask()
*
* Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
* &pixvws (<return> vertical whitespace mask)
* &tlfound (<optional return> 1 if the mask is not empty)
* debug (flag: 1 for debug output)
* Return: pixd (textline mask), or null on error
*
* Notes:
* (1) The input pixs should be deskewed.
* (2) pixs should have no halftone pixels.
* (3) Both the input image and the returned textline mask
* are at the same resolution.
*/
PIX *
pixGenTextlineMask(PIX *pixs,
PIX **ppixvws,
l_int32 *ptlfound,
l_int32 debug)
{
l_int32 empty;
PIX *pixt1, *pixt2, *pixvws, *pixd;
PROCNAME("pixGenTextlineMask");
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (!ppixvws)
return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* First we need a vertical whitespace mask. Invert the image. */
pixt1 = pixInvert(NULL, pixs);
/* The whitespace mask will break textlines where there
* is a large amount of white space below or above.
* This can be prevented by identifying regions of the
* inverted image that have large horizontal extent (bigger than
* the separation between columns) and significant
* vertical extent (bigger than the separation between
* textlines), and subtracting this from the bg. */
pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0);
pixSubtract(pixt1, pixt1, pixt2);
pixDisplayWrite(pixt1, debug);
pixDestroy(&pixt2);
/* Identify vertical whitespace by opening the remaining bg.
* o5.1 removes thin vertical bg lines and o1.200 extracts
* long vertical bg lines. */
pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0);
*ppixvws = pixvws;
pixDisplayWrite(pixvws, debug);
pixDestroy(&pixt1);
/* Three steps to getting text line mask:
* (1) close the characters and words in the textlines
* (2) open the vertical whitespace corridors back up
* (3) small opening to remove noise */
pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1);
pixDisplayWrite(pixt1, debug);
pixd = pixSubtract(NULL, pixt1, pixvws);
pixOpenBrick(pixd, pixd, 3, 3);
pixDisplayWrite(pixd, debug);
pixDestroy(&pixt1);
/* Check if text line mask is empty */
if (ptlfound) {
*ptlfound = 0;
pixZero(pixd, &empty);
if (!empty)
*ptlfound = 1;
}
return pixd;
}
/*------------------------------------------------------------------*
* Textblock extraction *
*------------------------------------------------------------------*/
/*!
* pixGenTextblockMask()
*
* Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
* pixvws (vertical white space mask)
* debug (flag: 1 for debug output)
* Return: pixd (textblock mask), or null on error
*
* Notes:
* (1) Both the input masks (textline and vertical white space) and
* the returned textblock mask are at the same resolution.
* (2) The result is somewhat noisy, in that small "blocks" of
* text may be included. These can be removed by post-processing,
* using, e.g.,
* pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
* L_SELECT_IF_GTE, NULL);
*/
PIX *
pixGenTextblockMask(PIX *pixs,
PIX *pixvws,
l_int32 debug)
{
PIX *pixt1, *pixt2, *pixt3, *pixd;
PROCNAME("pixGenTextblockMask");
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (!pixvws)
return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* Join pixels vertically to make a textblock mask */
pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
pixDisplayWrite(pixt1, debug);
/* Solidify the textblock mask and remove noise:
* (1) For each cc, close the blocks and dilate slightly
* to form a solid mask.
* (2) Small horizontal closing between components.
* (3) Open the white space between columns, again.
* (4) Remove small components. */
pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
pixCloseSafeBrick(pixt2, pixt2, 10, 1);
pixDisplayWrite(pixt2, debug);
pixt3 = pixSubtract(NULL, pixt2, pixvws);
pixDisplayWrite(pixt3, debug);
pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
L_SELECT_IF_GTE, NULL);
pixDisplayWrite(pixd, debug);
pixDestroy(&pixt1);
pixDestroy(&pixt2);
pixDestroy(&pixt3);
return pixd;
}