| /////////////////////////////////////////////////////////////////////// |
| // File: leptonica_pageseg.cpp |
| // Description: Leptonica-based page segmenter. |
| // Author: Dan Bloomberg |
| // Created: Tue Aug 28 08:56:43 PDT 2007 |
| // |
| // (C) Copyright 2007, Google Inc. |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| /////////////////////////////////////////////////////////////////////// |
| |
| // Include automatically generated configuration file if running autoconf. |
| #ifdef HAVE_CONFIG_H |
| #include "config_auto.h" |
| #endif |
| |
| #include "leptonica_pageseg.h" |
| |
| #ifdef HAVE_LIBLEPT |
| // Include leptonica library only if autoconf (or makefile etc) tell us to. |
| #include "allheaders.h" |
| #endif |
| |
| #ifdef HAVE_LIBLEPT |
| // ONLY available if you have Leptonica installed. |
| |
| // class LeptonicaPageSeg |
| // |
| // Region segmentation |
| // bool GetHalftoneMask() |
| // bool GetTextlineMask() |
| // bool GetTextblockMask() |
| // |
| // Top-level (for testing/debugging) |
| // bool GetAllRegions() |
| // |
| // |
| |
| //------------------------------------------------------------------ |
| // Region segmentation |
| //------------------------------------------------------------------ |
| // GetHalftoneMask() |
| // Input: pixs (input image, assumed to be at 300 - 400 ppi) |
| // &pixht (returns halftone mask; can be NULL) |
| // &baht (returns boxa of halftone mask component b.b.s; can be NULL) |
| // &paht (returns pixa of halftone mask components; can be NULL) |
| // debugflag (set true to write out intermediate images) |
| // Return: true if ok, false on error |
| // Note: If there are no halftone regions, all requested data structures |
| // are returned as NULL. This is not an error. |
| bool LeptonicaPageSeg::GetHalftoneMask(Pix *pixs, |
| Pix **ppixht, |
| Boxa **pbaht, |
| Pixa **ppaht, |
| bool debugflag) { |
| if (!pixs) { |
| fprintf(stderr, "pixs not defined\n"); |
| return false; |
| } |
| |
| l_int32 debug = debugflag ? 1 : 0; |
| |
| // 2x reduce, to 150 - 200 ppi |
| Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| pixDisplayWrite(pixr, debug); |
| |
| // Get the halftone mask |
| Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, debug); |
| pixDestroy(&pixr); |
| if (!pixht2) { |
| if (debug) |
| printf("No halftone image parts found\n"); |
| if (ppixht) *ppixht = NULL; |
| if (pbaht) *pbaht = NULL; |
| if (ppaht) *ppaht = NULL; |
| return true; |
| } else { |
| if (debug) |
| printf("Halftone image parts found\n"); |
| } |
| |
| Pix *pixht = pixExpandReplicate(pixht2, 2); |
| pixDisplayWrite(pixht, debug); |
| pixDestroy(&pixht2); |
| |
| // Fill to capture pixels near the mask edges that were missed |
| Pix *pixt = pixSeedfillBinary(NULL, pixht, pixs, 8); |
| pixOr(pixht, pixht, pixt); |
| pixDestroy(&pixt); |
| |
| if (ppaht) { |
| Boxa *boxa = pixConnComp(pixht, ppaht, 4); |
| if (pbaht) { |
| *pbaht = boxa; |
| } else { |
| boxaDestroy(&boxa); |
| } |
| } else if (pbaht) { |
| *pbaht = pixConnComp(pixht, NULL, 4); |
| } |
| |
| if (ppixht) { |
| *ppixht =pixht; |
| } else { |
| pixDestroy(&pixht); |
| } |
| |
| return true; |
| } |
| |
| |
| // GetTextlineMask() |
| // Input: pixs (input image, assumed to be at 300 - 400 ppi) |
| // &pixtm (returns textline mask; can be NULL) |
| // &pixvws (returns vertical whitespace mask; can be NULL) |
| // &batm (returns boxa of textline mask component b.b.s; can be NULL) |
| // &patm (returns pixa of textline mask components; can be NULL) |
| // debugflag (set true to write out intermediate images) |
| // Return: true if ok, false on error |
| bool LeptonicaPageSeg::GetTextlineMask(Pix *pixs, |
| Pix **ppixtm, |
| Pix **ppixvws, |
| Boxa **pbatm, |
| Pixa **ppatm, |
| bool debugflag) { |
| if (!pixs) { |
| fprintf(stderr, "pixs not defined\n"); |
| return false; |
| } |
| |
| l_int32 debug = debugflag ? 1 : 0; |
| |
| // 2x reduce, to 150 - 200 ppi |
| Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); |
| pixDisplayWrite(pixr, debug); |
| |
| // Remove the halftone pixels from the image |
| Pix *pixtext; |
| Pix *pixht2 = pixGenHalftoneMask(pixr, &pixtext, NULL, debug); |
| pixDestroy(&pixr); |
| pixDestroy(&pixht2); |
| |
| // Get the textline mask at full res |
| Pix *pixvws; |
| Pix *pixtm2 = pixGenTextlineMask(pixtext, &pixvws, NULL, debug); |
| Pix *pixt = pixExpandReplicate(pixtm2, 2); |
| pixDestroy(&pixtext); |
| pixDestroy(&pixtm2); |
| |
| // Small dilation to capture pixels near the mask edges that were missed |
| // Do not use filling here, because the result is then used to find |
| // textblocks, and a mistake here gets propagated. |
| Pix *pixtm = pixDilateBrick(NULL, pixt, 3, 3); |
| pixDestroy(&pixt); |
| pixDisplayWrite(pixtm, debug); |
| |
| if (ppatm) { |
| Boxa *boxa = pixConnComp(pixtm, ppatm, 4); |
| if (pbatm) { |
| *pbatm = boxa; |
| } else { |
| boxaDestroy(&boxa); |
| } |
| } else if (pbatm) { |
| *pbatm = pixConnComp(pixtm, NULL, 4); |
| } |
| |
| if (ppixtm) { |
| *ppixtm =pixtm; |
| } else { |
| pixDestroy(&pixtm); |
| } |
| if (ppixvws) { |
| *ppixvws =pixvws; |
| } else { |
| pixDestroy(&pixvws); |
| } |
| |
| return true; |
| } |
| |
| |
| // GetTextblockMask() |
| // Input: pixs (input image, assumed to be at 300 - 400 ppi) |
| // &pixtb (returns textblock mask; can be NULL) |
| // &batb (returns boxa of textblock mask component b.b; can be NULL) |
| // &patb (returns pixa of textblock mask components; can be NULL) |
| // debugflag (set true to write out intermediate images) |
| // Return: true if ok, false on error |
| // Notes: |
| // To obtain a set of polylines of the outer borders of each of the |
| // textblock regions, use pixGetOuterBordersPtaa(). |
| bool LeptonicaPageSeg::GetTextblockMask(Pix *pixs, |
| Pix **ppixtb, |
| Boxa **pbatb, |
| Pixa **ppatb, |
| bool debugflag) { |
| if (!pixs) { |
| fprintf(stderr, "pixs not defined\n"); |
| return false; |
| } |
| |
| l_int32 debug = debugflag ? 1 : 0; |
| |
| // Get the textline mask at 2x reduction |
| Pix *pixtm, *pixvws; |
| GetTextlineMask(pixs, &pixtm, &pixvws, NULL, NULL, debugflag); |
| Pix *pixtm2 = pixReduceRankBinaryCascade(pixtm, 1, 0, 0, 0); |
| pixDestroy(&pixtm); |
| |
| // Get the textblock mask |
| Pix *pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); |
| Pix *pixt = pixExpandReplicate(pixtb2, 2); |
| pixDestroy(&pixtm2); |
| pixDestroy(&pixtb2); |
| pixDestroy(&pixvws); |
| |
| // Dilate to capture pixels near the mask edges that were missed |
| Pix *pixtb = pixDilateBrick(NULL, pixt, 3, 3); |
| pixDestroy(&pixt); |
| pixDisplayWrite(pixtb, debug); |
| |
| if (ppatb) { |
| Boxa *boxa = pixConnComp(pixtb, ppatb, 4); |
| if (pbatb) { |
| *pbatb = boxa; |
| } else { |
| boxaDestroy(&boxa); |
| } |
| } else if (pbatb) { |
| *pbatb = pixConnComp(pixtb, NULL, 4); |
| } |
| |
| if (ppixtb) { |
| *ppixtb = pixtb; |
| } else { |
| pixDestroy(&pixtb); |
| } |
| |
| return true; |
| } |
| |
| |
| //------------------------------------------------------------------ |
| // Top-level (for testing/debugging) |
| //------------------------------------------------------------------ |
| // GetAllRegions() |
| // Input: pixs (input image, assumed to be at 300 - 400 ppi) |
| // &pixhm (returns halftone mask; can be NULL) |
| // &pixtm (returns textline mask; can be NULL) |
| // &pixtb (returns textblock mask; can be NULL) |
| // debugflag (set true to write out intermediate images and data) |
| // Return: true if ok, false on error |
| // Note: use NULL for input on each mask you don't want. |
| bool LeptonicaPageSeg::GetAllRegions(Pix *pixs, |
| Pix **ppixhm, |
| Pix **ppixtm, |
| Pix **ppixtb, |
| bool debugflag) { |
| if (!pixs || (pixGetDepth(pixs) != 1)) { |
| fprintf(stderr, "pixs not read or not 1 bpp\n"); |
| return 1; |
| } |
| |
| l_int32 w, h; |
| pixGetDimensions(pixs, &w, &h, NULL); |
| l_int32 debug = debugflag ? 1 : 0; |
| |
| // Segment the page |
| Boxa *batm = NULL; |
| Boxa *batb = NULL; |
| Pixa *patm = NULL; |
| Pixa *patb = NULL; |
| Pix *pixhm = NULL; |
| Pix *pixtm = NULL; |
| Pix *pixtb = NULL; |
| |
| startTimer(); |
| LeptonicaPageSeg::GetHalftoneMask(pixs, &pixhm, NULL, NULL, false); |
| if (debug) |
| printf("Halftone segmentation time: %f sec\n", stopTimer()); |
| |
| startTimer(); |
| LeptonicaPageSeg::GetTextlineMask(pixs, &pixtm, NULL, &batm, &patm, false); |
| if (debug) |
| printf("Textline segmentation time: %f sec\n", stopTimer()); |
| |
| startTimer(); |
| LeptonicaPageSeg::GetTextblockMask(pixs, &pixtb, &batb, &patb, debugflag); |
| if (debug) |
| printf("Textblock segmentation time: %f sec\n", stopTimer()); |
| |
| // Display the textlines |
| if (debug) { |
| Pix *pixt = pixaDisplayRandomCmap(patm, w, h); |
| pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255); // white bg |
| pixDisplay(pixt, 100, 100); |
| pixDisplayWrite(pixt, 1); |
| pixDestroy(&pixt); |
| } |
| |
| // Display the textblocks |
| if (debug) { |
| Pix *pixt = pixaDisplayRandomCmap(patb, w, h); |
| pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255); |
| pixDisplay(pixt, 100, 100); |
| pixDisplayWrite(pixt, 1); |
| pixDestroy(&pixt); |
| } |
| |
| // Identify the outlines of each textblock |
| if (debug) { |
| Ptaa *ptaa = pixGetOuterBordersPtaa(pixtb); |
| Pix *pixt = pixRenderRandomCmapPtaa(pixtb, ptaa, 8, 1); |
| PixColormap *cmap = pixGetColormap(pixt); |
| pixcmapResetColor(cmap, 0, 130, 130, 130); |
| pixDisplayWrite(pixt, debug); |
| pixDestroy(&pixt); |
| ptaaWrite("junk_ptaa_outlines.ptaa", ptaa, 1); |
| ptaaDestroy(&ptaa); |
| } |
| |
| // Save b.b. for textblocks |
| if (debug) { |
| Boxa *ba1 = boxaSelectBySize(batb, 3, 3, L_SELECT_IF_BOTH, |
| L_SELECT_IF_GTE, NULL); |
| boxaWrite("junk_textblock.boxa", ba1); |
| boxaDestroy(&ba1); |
| } |
| |
| if (ppixhm) { |
| *ppixhm = pixhm; |
| } else { |
| pixDestroy(&pixhm); |
| } |
| if (ppixtm) { |
| *ppixtm = pixtm; |
| } else { |
| pixDestroy(&pixtm); |
| } |
| if (ppixtb) { |
| *ppixtb = pixtb; |
| } else { |
| pixDestroy(&pixtb); |
| } |
| |
| boxaDestroy(&batm); |
| boxaDestroy(&batb); |
| pixaDestroy(&patm); |
| pixaDestroy(&patb); |
| pixDestroy(&pixs); |
| return true; |
| } |
| |
| #endif // HAVE_LIBLEPT |