ccmain/otsuthr.cpp - platform/external/tesseract - Git at Google

 /**********************************************************************
  * File:        otsuthr.cpp
  * Description: Simple Otsu thresholding for binarizing images.
  * Author:      Ray Smith
  * Created:     Fri Mar 07 12:31:01 PST 2008
  *
  * (C) Copyright 2008, Google Inc.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  **********************************************************************/

 #include <string.h>
 #include "otsuthr.h"

 namespace tesseract {

 // Compute the Otsu threshold(s) for the given image rectangle, making one
 // for each channel. Each channel is always one byte per pixel.
 // Returns an array of threshold values and an array of hi_values, such
 // that a pixel value >threshold[channel] is considered foreground if
 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
 // that there is no apparent foreground. At least one hi_value will not be -1.
 // Delete thresholds and hi_values with delete [] after use.
 void OtsuThreshold(const unsigned char* imagedata,
                    int bytes_per_pixel, int bytes_per_line,
                    int left, int top, int width, int height,
                    int** thresholds, int** hi_values) {
   // Of all channels with no good hi_value, keep the best so we can always
   // produce at least one answer.
   int best_hi_value = 0;
   int best_hi_index = 0;
   bool any_good_hivalue = false;
   double best_hi_dist = 0.0;
   *thresholds = new int[bytes_per_pixel];
   *hi_values = new int[bytes_per_pixel];

   for (int ch = 0; ch < bytes_per_pixel; ++ch) {
     (*thresholds)[ch] = 0;
     (*hi_values)[ch] = -1;
     // Compute the histogram of the image rectangle.
     int histogram[kHistogramSize];
     HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
                   left, top, width, height, histogram);
     int H;
     int best_omega_0;
     int best_t = OtsuStats(histogram, &H, &best_omega_0);
     if (best_omega_0 == 0 || best_omega_0 == H) {
        // This channel is empty.
        continue;
      }
     // To be a convincing foreground we must have a small fraction of H
     // or to be a convincing background we must have a large fraction of H.
     // In between we assume this channel contains no thresholding information.
     int hi_value = best_omega_0 < H * 0.5;
     (*thresholds)[ch] = best_t;
     if (best_omega_0 > H * 0.75) {
       any_good_hivalue = true;
       (*hi_values)[ch] = 0;
     } else if (best_omega_0 < H * 0.25) {
       any_good_hivalue = true;
       (*hi_values)[ch] = 1;
     } else {
       // In case all channels are like this, keep the best of the bad lot.
       double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
       if (hi_dist > best_hi_dist) {
         best_hi_dist = hi_dist;
         best_hi_value = hi_value;
         best_hi_index = ch;
       }
     }
   }
   if (!any_good_hivalue) {
     // Use the best of the ones that were not good enough.
     (*hi_values)[best_hi_index] = best_hi_value;
   }
 }

 // Compute the histogram for the given image rectangle, and the given
 // channel. (Channel pointed to by imagedata.) Each channel is always
 // one byte per pixel.
 // Bytes per pixel is used to skip channels not being
 // counted with this call in a multi-channel (pixel-major) image.
 // Histogram is always a kHistogramSize(256) element array to count
 // occurrences of each pixel value.
 void HistogramRect(const unsigned char* imagedata,
                    int bytes_per_pixel, int bytes_per_line,
                    int left, int top, int width, int height,
                    int* histogram) {
   int bottom = top + height;
   memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
   const unsigned char* pixels = imagedata +
                                 top * bytes_per_line +
                                 left * bytes_per_pixel;
   for (int y = top; y < bottom; ++y) {
     for (int x = 0; x < width; ++x) {
       ++histogram[pixels[x * bytes_per_pixel]];
     }
     pixels += bytes_per_line;
   }
 }

 // Compute the Otsu threshold(s) for the given histogram.
 // Also returns H = total count in histogram, and
 // omega0 = count of histogram below threshold.
 int OtsuStats(const int* histogram, int* H_out, int* omega0_out) {
   int H = 0;
   double mu_T = 0.0;
   for (int i = 0; i < kHistogramSize; ++i) {
     H += histogram[i];
     mu_T += i * histogram[i];
   }

   // Now maximize sig_sq_B over t.
   // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
   int best_t = -1;
   int omega_0, omega_1;
   int best_omega_0 = 0;
   double best_sig_sq_B = 0.0;
   double mu_0, mu_1, mu_t;
   omega_0 = 0;
   mu_t = 0.0;
   for (int t = 0; t < kHistogramSize - 1; ++t) {
     omega_0 += histogram[t];
     mu_t += t * static_cast<double>(histogram[t]);
     if (omega_0 == 0)
       continue;
     omega_1 = H - omega_0;
     mu_0 = mu_t / omega_0;
     mu_1 = (mu_T - mu_t) / omega_1;
     double sig_sq_B = mu_1 - mu_0;
     sig_sq_B *= sig_sq_B * omega_0 * omega_1;
     if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
       best_sig_sq_B = sig_sq_B;
       best_t = t;
       best_omega_0 = omega_0;
     }
   }
   if (H_out != NULL) *H_out = H;
   if (omega0_out != NULL) *omega0_out = best_omega_0;
   return best_t;
 }

 }  // namespace tesseract.
	/**********************************************************************
	* File: otsuthr.cpp
	* Description: Simple Otsu thresholding for binarizing images.
	* Author: Ray Smith
	* Created: Fri Mar 07 12:31:01 PST 2008
	*
	* (C) Copyright 2008, Google Inc.
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	** http://www.apache.org/licenses/LICENSE-2.0
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*
	**********************************************************************/

	#include <string.h>
	#include "otsuthr.h"

	namespace tesseract {

	// Compute the Otsu threshold(s) for the given image rectangle, making one
	// for each channel. Each channel is always one byte per pixel.
	// Returns an array of threshold values and an array of hi_values, such
	// that a pixel value >threshold[channel] is considered foreground if
	// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
	// that there is no apparent foreground. At least one hi_value will not be -1.
	// Delete thresholds and hi_values with delete [] after use.
	void OtsuThreshold(const unsigned char* imagedata,
	int bytes_per_pixel, int bytes_per_line,
	int left, int top, int width, int height,
	int thresholds, int hi_values) {
	// Of all channels with no good hi_value, keep the best so we can always
	// produce at least one answer.
	int best_hi_value = 0;
	int best_hi_index = 0;
	bool any_good_hivalue = false;
	double best_hi_dist = 0.0;
	*thresholds = new int[bytes_per_pixel];
	*hi_values = new int[bytes_per_pixel];

	for (int ch = 0; ch < bytes_per_pixel; ++ch) {
	(*thresholds)[ch] = 0;
	(*hi_values)[ch] = -1;
	// Compute the histogram of the image rectangle.
	int histogram[kHistogramSize];
	HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
	left, top, width, height, histogram);
	int H;
	int best_omega_0;
	int best_t = OtsuStats(histogram, &H, &best_omega_0);
	if (best_omega_0 == 0 \|\| best_omega_0 == H) {
	// This channel is empty.
	continue;
	}
	// To be a convincing foreground we must have a small fraction of H
	// or to be a convincing background we must have a large fraction of H.
	// In between we assume this channel contains no thresholding information.
	int hi_value = best_omega_0 < H * 0.5;
	(*thresholds)[ch] = best_t;
	if (best_omega_0 > H * 0.75) {
	any_good_hivalue = true;
	(*hi_values)[ch] = 0;
	} else if (best_omega_0 < H * 0.25) {
	any_good_hivalue = true;
	(*hi_values)[ch] = 1;
	} else {
	// In case all channels are like this, keep the best of the bad lot.
	double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
	if (hi_dist > best_hi_dist) {
	best_hi_dist = hi_dist;
	best_hi_value = hi_value;
	best_hi_index = ch;
	}
	}
	}
	if (!any_good_hivalue) {
	// Use the best of the ones that were not good enough.
	(*hi_values)[best_hi_index] = best_hi_value;
	}
	}

	// Compute the histogram for the given image rectangle, and the given
	// channel. (Channel pointed to by imagedata.) Each channel is always
	// one byte per pixel.
	// Bytes per pixel is used to skip channels not being
	// counted with this call in a multi-channel (pixel-major) image.
	// Histogram is always a kHistogramSize(256) element array to count
	// occurrences of each pixel value.
	void HistogramRect(const unsigned char* imagedata,
	int bytes_per_pixel, int bytes_per_line,
	int left, int top, int width, int height,
	int* histogram) {
	int bottom = top + height;
	memset(histogram, 0, sizeof(histogram) kHistogramSize);
	const unsigned char* pixels = imagedata +
	top * bytes_per_line +
	left * bytes_per_pixel;
	for (int y = top; y < bottom; ++y) {
	for (int x = 0; x < width; ++x) {
	++histogram[pixels[x * bytes_per_pixel]];
	}
	pixels += bytes_per_line;
	}
	}

	// Compute the Otsu threshold(s) for the given histogram.
	// Also returns H = total count in histogram, and
	// omega0 = count of histogram below threshold.
	int OtsuStats(const int* histogram, int* H_out, int* omega0_out) {
	int H = 0;
	double mu_T = 0.0;
	for (int i = 0; i < kHistogramSize; ++i) {
	H += histogram[i];
	mu_T += i * histogram[i];
	}

	// Now maximize sig_sq_B over t.
	// http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
	int best_t = -1;
	int omega_0, omega_1;
	int best_omega_0 = 0;
	double best_sig_sq_B = 0.0;
	double mu_0, mu_1, mu_t;
	omega_0 = 0;
	mu_t = 0.0;
	for (int t = 0; t < kHistogramSize - 1; ++t) {
	omega_0 += histogram[t];
	mu_t += t * static_cast<double>(histogram[t]);
	if (omega_0 == 0)
	continue;
	omega_1 = H - omega_0;
	mu_0 = mu_t / omega_0;
	mu_1 = (mu_T - mu_t) / omega_1;
	double sig_sq_B = mu_1 - mu_0;
	sig_sq_B = sig_sq_B omega_0 * omega_1;
	if (best_t < 0 \|\| sig_sq_B > best_sig_sq_B) {
	best_sig_sq_B = sig_sq_B;
	best_t = t;
	best_omega_0 = omega_0;
	}
	}
	if (H_out != NULL) *H_out = H;
	if (omega0_out != NULL) *omega0_out = best_omega_0;
	return best_t;
	}

	} // namespace tesseract.