| /********************************************************************** |
| * File: werdit.cpp (Formerly wordit.c) |
| * Description: An iterator for passing over all the words in a document. |
| * Author: Ray Smith |
| * Created: Mon Apr 27 08:51:22 BST 1992 |
| * |
| * (C) Copyright 1992, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #include "mfcpch.h" |
| #include "werdit.h" |
| |
| #define EXTERN |
| |
| //EXTERN BOOL_VAR(wordit_linearc,FALSE,"Pass poly of linearc to Tess"); |
| |
| /********************************************************************** |
| * WERDIT::start_page |
| * |
| * Get ready to iterate over the page by setting the iterators. |
| **********************************************************************/ |
| |
| void WERDIT::start_page( //set iterators |
| BLOCK_LIST *block_list //blocks to check |
| ) { |
| block_it.set_to_list (block_list); |
| block_it.mark_cycle_pt (); |
| do { |
| while (block_it.data ()->row_list ()->empty () |
| && !block_it.cycled_list ()) { |
| block_it.forward (); |
| } |
| if (!block_it.data ()->row_list ()->empty ()) { |
| row_it.set_to_list (block_it.data ()->row_list ()); |
| row_it.mark_cycle_pt (); |
| while (row_it.data ()->word_list ()->empty () |
| && !row_it.cycled_list ()) { |
| row_it.forward (); |
| } |
| if (!row_it.data ()->word_list ()->empty ()) { |
| word_it.set_to_list (row_it.data ()->word_list ()); |
| word_it.mark_cycle_pt (); |
| } |
| } |
| } |
| while (!block_it.cycled_list () && row_it.data ()->word_list ()->empty ()); |
| } |
| |
| |
| /********************************************************************** |
| * WERDIT::forward |
| * |
| * Give the next word on the page, or NULL if none left. |
| * This code assumes all rows to be non-empty, but blocks are allowed |
| * to be empty as eventually we will have non-text blocks. |
| * The output is always a copy and needs to be deleted by somebody. |
| **********************************************************************/ |
| |
| WERD *WERDIT::forward() { //use iterators |
| WERD *word; //actual word |
| // WERD *larc_word; //linearc copy |
| WERD *result; //output word |
| ROW *row; //row of word |
| |
| if (word_it.cycled_list ()) { |
| return NULL; //finished page |
| } |
| else { |
| word = word_it.data (); |
| row = row_it.data (); |
| word_it.forward (); |
| if (word_it.cycled_list ()) { |
| row_it.forward (); //finished row |
| if (row_it.cycled_list ()) { |
| do { |
| block_it.forward (); //finished block |
| if (!block_it.cycled_list ()) { |
| row_it.set_to_list (block_it.data ()->row_list ()); |
| row_it.mark_cycle_pt (); |
| } |
| } |
| //find non-empty block |
| while (!block_it.cycled_list () |
| && row_it.cycled_list ()); |
| } |
| if (!row_it.cycled_list ()) { |
| word_it.set_to_list (row_it.data ()->word_list ()); |
| word_it.mark_cycle_pt (); |
| } |
| } |
| |
| // if (wordit_linearc && !word->flag(W_POLYGON)) |
| // { |
| // larc_word=word->larc_copy(row->x_height()); |
| // result=larc_word->poly_copy(row->x_height()); |
| // delete larc_word; |
| // } |
| // else |
| result = word->poly_copy (row->x_height ()); |
| return result; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * make_pseudo_word |
| * |
| * Make all the blobs inside a selection into a single word. |
| * The word is always a copy and needs to be deleted. |
| **********************************************************************/ |
| |
| WERD *make_pseudo_word( //make fake word |
| BLOCK_LIST *block_list, //blocks to check //block of selection |
| TBOX &selection_box, |
| BLOCK *&pseudo_block, |
| ROW *&pseudo_row //row of selection |
| ) { |
| BLOCK_IT block_it(block_list); |
| BLOCK *block; |
| ROW_IT row_it; |
| ROW *row; |
| WERD_IT word_it; |
| WERD *word; |
| PBLOB_IT blob_it; |
| PBLOB *blob; |
| PBLOB_LIST new_blobs; //list of gathered blobs |
| //iterator |
| PBLOB_IT new_blob_it = &new_blobs; |
| WERD *pseudo_word; //fabricated word |
| WERD *poly_word; //poly copy of word |
| // WERD *larc_word; //linearc copy |
| |
| for (block_it.mark_cycle_pt (); |
| !block_it.cycled_list (); block_it.forward ()) { |
| block = block_it.data (); |
| if (block->bounding_box ().overlap (selection_box)) { |
| pseudo_block = block; |
| row_it.set_to_list (block->row_list ()); |
| for (row_it.mark_cycle_pt (); |
| !row_it.cycled_list (); row_it.forward ()) { |
| row = row_it.data (); |
| if (row->bounding_box ().overlap (selection_box)) { |
| word_it.set_to_list (row->word_list ()); |
| for (word_it.mark_cycle_pt (); |
| !word_it.cycled_list (); word_it.forward ()) { |
| word = word_it.data (); |
| if (word->bounding_box ().overlap (selection_box)) { |
| // if (wordit_linearc && !word->flag(W_POLYGON)) |
| // { |
| // larc_word=word->larc_copy(row->x_height()); |
| // poly_word=larc_word->poly_copy(row->x_height()); |
| // delete larc_word; |
| // } |
| // else |
| poly_word = word->poly_copy (row->x_height ()); |
| blob_it.set_to_list (poly_word->blob_list ()); |
| for (blob_it.mark_cycle_pt (); |
| !blob_it.cycled_list (); blob_it.forward ()) { |
| blob = blob_it.data (); |
| if (blob->bounding_box (). |
| overlap (selection_box)) { |
| new_blob_it.add_after_then_move (blob_it. |
| extract |
| ()); |
| //steal off list |
| pseudo_row = row; |
| } |
| } |
| delete poly_word; //get rid of it |
| } |
| } |
| } |
| } |
| } |
| } |
| if (!new_blobs.empty ()) { |
| //make new word |
| pseudo_word = new WERD (&new_blobs, 1, NULL); |
| } |
| else |
| pseudo_word = NULL; |
| return pseudo_word; |
| } |