dict/permngram.cpp - platform/external/tesseract - Git at Google

 ///////////////////////////////////////////////////////////////////////
 // File:        permngram.cpp
 // Description: Character n-gram permuter
 // Author:      Thomas Kielbus
 // Created:     Wed Sep 12 11:26:43 PDT 2007
 //
 // (C) Copyright 2007, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 ///////////////////////////////////////////////////////////////////////

 #include "const.h"
 #include "permngram.h"
 #include "permute.h"
 #include "dawg.h"
 #include "tordvars.h"
 #include "stopper.h"
 #include "globals.h"
 #include "context.h"
 #include "ndminx.h"
 #include "dict.h"
 #include "conversion.h"

 #include <math.h>
 #include <ctype.h>

 // Ratio to control the relative importance of the classifier and the ngram
 // in the final score of a classification unit. Must be >= 0 and <= 1.
 // A value of 1.0 uses only the shape classifier score.
 // A value of 0.0 uses only the ngram score.
 double_VAR(classifier_score_ngram_score_ratio,
            0.7,
            "");

 // Rating adjustment multiplier for words not in the DAWG. Must be >= 1.
 double_VAR(non_dawg_prefix_rating_adjustment,
            1.5,
            "");

 // HypothesisPrefix represents a word prefix during the search of the
 // character-level n-gram model based permuter.
 // It holds the data needed to create the corresponding A_CHOICE.
 // Note that the string stored in the _word data member always begin with a
 // space character. This is used by the n-gram model to score the word.
 // HypothesisPrefix also contains the node in the DAWG that is reached when
 // searching for the corresponding prefix.
 class HypothesisPrefix {
  public:
   HypothesisPrefix();
   HypothesisPrefix(const HypothesisPrefix& prefix,
                    A_CHOICE* choice,
                    bool end_of_word,
                    const tesseract::Dawg *dawg,
                    tesseract::Dict* dict);

   double rating() const {return rating_;}
   double certainty() const {return certainty_;}
   const char* word() const {return word_;}
   const char* unichar_lengths() const {return unichar_lengths_;}
   const float* certainty_array() const {return certainty_array_;}
   bool is_dawg_prefix() const {return is_dawg_prefix_;}
   NODE_REF dawg_node() const {return dawg_node_;}

  private:
   double rating_;
   double certainty_;
   char word_[UNICHAR_LEN * MAX_WERD_LENGTH + 2];
   char unichar_lengths_[MAX_WERD_LENGTH + 1];
   float certainty_array_[MAX_WERD_LENGTH + 1];
   NODE_REF dawg_node_;
   bool is_dawg_prefix_;
 };

 // HypothesisPrefix is the class used as nodes in HypothesisPrefixLists
 typedef HypothesisPrefix HypothesisPrefixListNode;

 // HypothesisPrefixList maintains a sorted list of HypothesisPrefixes. The size
 // is bounded by the argument given to the constructor.
 // For the sake of simplicity, current implementation is not as efficient as it
 // could be. The list is represented by a static array of pointers to its
 // elements. All nodes are stored in positions from 0 to (size() - 1).
 class HypothesisPrefixList {
  public:
   HypothesisPrefixList(int size_bound);
   ~HypothesisPrefixList();

   void add_node(HypothesisPrefix* node);
   int size() const {return _size;}
   void clear();
   const HypothesisPrefix& node(int index) {return *_list_nodes[index];}

  private:
   HypothesisPrefix** _list_nodes;
   int _size_bound;
   int _size;
 };

 // Return the classifier_score_ngram_score_ratio for a given choice string.
 // The classification decision for characters like comma and period should
 // be based only on shape rather than on shape and n-gram score.
 // Return 1.0 for them, the default classifier_score_ngram_score_ratio
 // otherwise.
 static double get_classifier_score_ngram_score_ratio(const char* choice);

 // Permute the given char_choices using a character level n-gram model and
 // return the best word choice found.
 // This is performed by maintaining a HypothesisPrefixList of HypothesisPrefixes.
 // For each character position, each possible character choice is appended to
 // the best current prefixes to create the list of best prefixes at the next
 // character position.
 namespace tesseract {
 A_CHOICE *Dict::ngram_permute_and_select(CHOICES_LIST char_choices,
                                          float rating_limit,
                                          const Dawg *dawg) {
   if (array_count (char_choices) <= MAX_WERD_LENGTH) {
     CHOICES choices;
     int char_index_max = array_count(char_choices);
     HypothesisPrefixList list_1(20);
     HypothesisPrefixList list_2(20);
     HypothesisPrefixList* current_list = &list_1;
     HypothesisPrefixList* next_list = &list_2;
     HypothesisPrefix* initial_node = new HypothesisPrefix();
     current_list->add_node(initial_node);
     for (int char_index = 0; char_index < char_index_max; ++char_index) {
       iterate_list(choices, (CHOICES) array_index(char_choices, char_index)) {
         A_CHOICE* choice = (A_CHOICE *) first_node(choices);
         for (int node_index = 0;
              node_index < current_list->size();
              ++node_index) {
           // Append this choice to the current node
           HypothesisPrefix* new_node = new HypothesisPrefix(
               current_list->node(node_index),
               choice,
               char_index == char_index_max - 1,
               dawg, this);
           next_list->add_node(new_node);
         }
       }
       // Clear current list and switch lists
       current_list->clear();
       HypothesisPrefixList* temp_list = current_list;
       current_list = next_list;
       next_list = temp_list;

       // Give up if the current best rating is worse than rating_limit
       if (current_list->node(0).rating() > rating_limit)
         return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
     }
     const HypothesisPrefix& best_word = current_list->node(0);
     A_CHOICE* best_choice = new_choice (best_word.word() + 1,
                                         best_word.unichar_lengths(),
                                         best_word.rating(),
                                         best_word.certainty(), -1,
                                         valid_word(best_word.word() + 1) ?
                                         SYSTEM_DAWG_PERM : TOP_CHOICE_PERM);
     LogNewWordChoice(best_choice, best_word.is_dawg_prefix() ?
                      1.0 : non_dawg_prefix_rating_adjustment,
                      const_cast<float*>(best_word.certainty_array()),
                      getUnicharset());
     return best_choice;
   } else {
     return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
   }
 }
 }  // namespace tesseract

 double get_classifier_score_ngram_score_ratio(const char* choice) {
   if (!strcmp(",", choice) ||
       !strcmp(".", choice))
     return 1.0;
   else
     return classifier_score_ngram_score_ratio;
 }

 // Initial HypothesisPrefix constructor used to create the first state of the
 // search.
 HypothesisPrefix::HypothesisPrefix() {
   rating_ = 0;
   certainty_ = MAXFLOAT;
   strcpy(word_, " ");
   unichar_lengths_[0] = '\0';
   dawg_node_ = 0;
   is_dawg_prefix_ = true;
 }

 // Main constructor to create a new HypothesisPrefix by appending a character
 // choice (A_CHOICE) to an existing HypothesisPrefix. This constructor takes
 // care of copying the original prefix's data members, appends the character
 // choice to the word and updates its rating using a character-level n-gram
 // model. The state in the DAWG is also updated.
 HypothesisPrefix::HypothesisPrefix(const HypothesisPrefix& prefix,
                                    A_CHOICE* choice,
                                    bool end_of_word,
                                    const tesseract::Dawg *dawg,
                                    tesseract::Dict* dict) {
   char* word_ptr = word_;
   const char* prefix_word_ptr = prefix.word_;

   // Copy first space character
   *(word_ptr++) = *(prefix_word_ptr++);

   // Copy existing word, unichar_lengths, certainty_array
   int char_index;
   for (char_index = 0;
        prefix.unichar_lengths_[char_index] != '\0';
        ++char_index) {
     for (int char_subindex = 0;
          char_subindex < prefix.unichar_lengths_[char_index];
          ++char_subindex) {
       *(word_ptr++) = *(prefix_word_ptr++);
     }
     unichar_lengths_[char_index] = prefix.unichar_lengths_[char_index];
     certainty_array_[char_index] = prefix.certainty_array_[char_index];
   }

   // If choice is empty, use a space character instead
   const char* class_string_choice = *class_string(choice) == '\0' ?
       " " : class_string(choice);

   // Update certainty
   certainty_ = MIN(prefix.certainty_, class_certainty(choice));

   // Apprend choice to the word
   strcpy(word_ptr, class_string_choice);
   unichar_lengths_[char_index] = strlen(class_string_choice);
   unichar_lengths_[char_index + 1] = '\0';

   // Append choice certainty to the certainty array
   certainty_array_[char_index] = class_certainty(choice);

   // Copy DAWG node state
   dawg_node_ = prefix.dawg_node_;
   is_dawg_prefix_ = prefix.is_dawg_prefix_;

   // Verify DAWG and update dawg_node_ if the current prefix is already valid
   if (is_dawg_prefix_) {
     for (int char_subindex = 0;
          class_string_choice[char_subindex] != '\0';
          ++char_subindex) {

       // TODO(daria): update this code (and the rest of ngram permuter code
       // to deal with unichar ids, make use of the new parallel dawg search
       // and use WERD_CHOICE, BLOB_CHOICE_LIST_VECTOR instead of the deprecated
       // A_CHOICE.
       tprintf("Error: ngram permuter functionality is not available\n");
       exit(1);

       // Verify each byte of the appended character. Note that word_ptr points
       // to the first byte so (word_ptr - (word_ + 1)) is the index of the first
       // new byte in the string that starts at (word_ + 1).
       /*
       int current_byte_index = word_ptr - (word_ + 1) + char_subindex;
       if (!(dict->*dict->letter_is_okay_)(
          dawg, &dawg_node_, current_byte_index, word_ + 1,
          end_of_word && class_string_choice[char_subindex + 1] == '\0')) {
         dawg_node_ = NO_EDGE;
         is_dawg_prefix_ = false;
         break;
       }
       */
     }
   }

   // Copy the prefix rating
   rating_ = prefix.rating_;

   // Compute rating of current character
   double probability = probability_in_context(prefix.word_, -1,
                                               class_string_choice, -1);

   // If last character of the word, take the following space into account
   if (end_of_word)
     probability *= probability_in_context(word_, -1, " ", -1);

   double local_classifier_score_ngram_score_ratio =
       get_classifier_score_ngram_score_ratio(class_string_choice);

   double classifier_rating = class_rating(choice);
   double ngram_rating = -log(probability) / log(2.0);
   double mixed_rating =
       local_classifier_score_ngram_score_ratio * classifier_rating +
       (1 - local_classifier_score_ngram_score_ratio) * ngram_rating;

   // If the current word is not a valid prefix, adjust the rating of the
   // character being appended. If it used to be a valid prefix, compensate for
   // previous adjustments.
   if (!is_dawg_prefix_) {
     if (prefix.is_dawg_prefix_)
       rating_ *= non_dawg_prefix_rating_adjustment;
     mixed_rating *= non_dawg_prefix_rating_adjustment;
   }

   // Update rating by adding the rating of the character being appended.
   rating_ += mixed_rating;
 }

 // Create an empty HypothesisPrefixList. Its maximum size is set to the given
 // bound.
 HypothesisPrefixList::HypothesisPrefixList(int size_bound):
     _size_bound(size_bound),
     _size(0) {
   _list_nodes = new HypothesisPrefix*[_size_bound];
   for (int i = 0; i < _size_bound; ++i)
     _list_nodes[i] = NULL;
 }

 // Destroy a HypothesisPrefixList all contained nodes are deleted as well.
 HypothesisPrefixList::~HypothesisPrefixList() {
   this->clear();
   delete[] _list_nodes;
 }

 // Add a node to the HypothesisPrefixList. Maintains the sorted list property.
 // Note that the HypothesisPrefixList takes ownership of the given node and
 // might delete it if needed. It must therefore have been allocated on the heap.
 void HypothesisPrefixList::add_node(HypothesisPrefix* node) {
   // Detect nodes that have a worst rating that the current maximum and treat
   // them separately.
   if (_size > 0 && _list_nodes[_size - 1]->rating() < node->rating()) {
     if (_size == _size_bound) {
       // The list is already full. This node will not be added
       delete node;
     } else {
       // The list is not full. Add the node at the last position.
       _list_nodes[_size] = node;
       ++_size;
     }
     return;
   }
   // Find the correct position
   int node_index_target = 0;
   while (node_index_target < _size_bound &&
          _list_nodes[node_index_target] != NULL &&
          _list_nodes[node_index_target]->rating() < node->rating()) {
     ++node_index_target;
   }
   if (node_index_target >= _size_bound) {
     delete node;
   } else {
     // Move next states by 1. Starting from the last one.
     int node_index_move = _size - 1;
     while (node_index_move >= node_index_target) {
       if (node_index_move == _size_bound - 1)
         delete _list_nodes[node_index_move];
       else
         _list_nodes[node_index_move + 1] = _list_nodes[node_index_move];
       _list_nodes[node_index_move] = NULL;
       --node_index_move;
     }
     // Insert new node
     _list_nodes[node_index_target] = node;
     // Increment size if it has changed
     if (_size < _size_bound)
       ++_size;
   }
 }

 // Delete all contained nodes and set the size of the HypothesisPrefixList to 0
 void HypothesisPrefixList::clear() {
   for (int i = 0; i < _size_bound && _list_nodes[i] != NULL; ++i) {
     delete _list_nodes[i];
     _list_nodes[i] = NULL;
   }
   _size = 0;
 }
	///////////////////////////////////////////////////////////////////////
	// File: permngram.cpp
	// Description: Character n-gram permuter
	// Author: Thomas Kielbus
	// Created: Wed Sep 12 11:26:43 PDT 2007
	//
	// (C) Copyright 2007, Google Inc.
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	// http://www.apache.org/licenses/LICENSE-2.0
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	///////////////////////////////////////////////////////////////////////

	#include "const.h"
	#include "permngram.h"
	#include "permute.h"
	#include "dawg.h"
	#include "tordvars.h"
	#include "stopper.h"
	#include "globals.h"
	#include "context.h"
	#include "ndminx.h"
	#include "dict.h"
	#include "conversion.h"

	#include <math.h>
	#include <ctype.h>

	// Ratio to control the relative importance of the classifier and the ngram
	// in the final score of a classification unit. Must be >= 0 and <= 1.
	// A value of 1.0 uses only the shape classifier score.
	// A value of 0.0 uses only the ngram score.
	double_VAR(classifier_score_ngram_score_ratio,
	0.7,
	"");

	// Rating adjustment multiplier for words not in the DAWG. Must be >= 1.
	double_VAR(non_dawg_prefix_rating_adjustment,
	1.5,
	"");

	// HypothesisPrefix represents a word prefix during the search of the
	// character-level n-gram model based permuter.
	// It holds the data needed to create the corresponding A_CHOICE.
	// Note that the string stored in the _word data member always begin with a
	// space character. This is used by the n-gram model to score the word.
	// HypothesisPrefix also contains the node in the DAWG that is reached when
	// searching for the corresponding prefix.
	class HypothesisPrefix {
	public:
	HypothesisPrefix();
	HypothesisPrefix(const HypothesisPrefix& prefix,
	A_CHOICE* choice,
	bool end_of_word,
	const tesseract::Dawg *dawg,
	tesseract::Dict* dict);

	double rating() const {return rating_;}
	double certainty() const {return certainty_;}
	const char* word() const {return word_;}
	const char* unichar_lengths() const {return unichar_lengths_;}
	const float* certainty_array() const {return certainty_array_;}
	bool is_dawg_prefix() const {return is_dawg_prefix_;}
	NODE_REF dawg_node() const {return dawg_node_;}

	private:
	double rating_;
	double certainty_;
	char word_[UNICHAR_LEN * MAX_WERD_LENGTH + 2];
	char unichar_lengths_[MAX_WERD_LENGTH + 1];
	float certainty_array_[MAX_WERD_LENGTH + 1];
	NODE_REF dawg_node_;
	bool is_dawg_prefix_;
	};

	// HypothesisPrefix is the class used as nodes in HypothesisPrefixLists
	typedef HypothesisPrefix HypothesisPrefixListNode;

	// HypothesisPrefixList maintains a sorted list of HypothesisPrefixes. The size
	// is bounded by the argument given to the constructor.
	// For the sake of simplicity, current implementation is not as efficient as it
	// could be. The list is represented by a static array of pointers to its
	// elements. All nodes are stored in positions from 0 to (size() - 1).
	class HypothesisPrefixList {
	public:
	HypothesisPrefixList(int size_bound);
	~HypothesisPrefixList();

	void add_node(HypothesisPrefix* node);
	int size() const {return _size;}
	void clear();
	const HypothesisPrefix& node(int index) {return *_list_nodes[index];}

	private:
	HypothesisPrefix** _list_nodes;
	int _size_bound;
	int _size;
	};

	// Return the classifier_score_ngram_score_ratio for a given choice string.
	// The classification decision for characters like comma and period should
	// be based only on shape rather than on shape and n-gram score.
	// Return 1.0 for them, the default classifier_score_ngram_score_ratio
	// otherwise.
	static double get_classifier_score_ngram_score_ratio(const char* choice);

	// Permute the given char_choices using a character level n-gram model and
	// return the best word choice found.
	// This is performed by maintaining a HypothesisPrefixList of HypothesisPrefixes.
	// For each character position, each possible character choice is appended to
	// the best current prefixes to create the list of best prefixes at the next
	// character position.
	namespace tesseract {
	A_CHOICE *Dict::ngram_permute_and_select(CHOICES_LIST char_choices,
	float rating_limit,
	const Dawg *dawg) {
	if (array_count (char_choices) <= MAX_WERD_LENGTH) {
	CHOICES choices;
	int char_index_max = array_count(char_choices);
	HypothesisPrefixList list_1(20);
	HypothesisPrefixList list_2(20);
	HypothesisPrefixList* current_list = &list_1;
	HypothesisPrefixList* next_list = &list_2;
	HypothesisPrefix* initial_node = new HypothesisPrefix();
	current_list->add_node(initial_node);
	for (int char_index = 0; char_index < char_index_max; ++char_index) {
	iterate_list(choices, (CHOICES) array_index(char_choices, char_index)) {
	A_CHOICE* choice = (A_CHOICE *) first_node(choices);
	for (int node_index = 0;
	node_index < current_list->size();
	++node_index) {
	// Append this choice to the current node
	HypothesisPrefix* new_node = new HypothesisPrefix(
	current_list->node(node_index),
	choice,
	char_index == char_index_max - 1,
	dawg, this);
	next_list->add_node(new_node);
	}
	}
	// Clear current list and switch lists
	current_list->clear();
	HypothesisPrefixList* temp_list = current_list;
	current_list = next_list;
	next_list = temp_list;

	// Give up if the current best rating is worse than rating_limit
	if (current_list->node(0).rating() > rating_limit)
	return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
	}
	const HypothesisPrefix& best_word = current_list->node(0);
	A_CHOICE* best_choice = new_choice (best_word.word() + 1,
	best_word.unichar_lengths(),
	best_word.rating(),
	best_word.certainty(), -1,
	valid_word(best_word.word() + 1) ?
	SYSTEM_DAWG_PERM : TOP_CHOICE_PERM);
	LogNewWordChoice(best_choice, best_word.is_dawg_prefix() ?
	1.0 : non_dawg_prefix_rating_adjustment,
	const_cast<float*>(best_word.certainty_array()),
	getUnicharset());
	return best_choice;
	} else {
	return new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
	}
	}
	} // namespace tesseract

	double get_classifier_score_ngram_score_ratio(const char* choice) {
	if (!strcmp(",", choice) \|\|
	!strcmp(".", choice))
	return 1.0;
	else
	return classifier_score_ngram_score_ratio;
	}

	// Initial HypothesisPrefix constructor used to create the first state of the
	// search.
	HypothesisPrefix::HypothesisPrefix() {
	rating_ = 0;
	certainty_ = MAXFLOAT;
	strcpy(word_, " ");
	unichar_lengths_[0] = '\0';
	dawg_node_ = 0;
	is_dawg_prefix_ = true;
	}

	// Main constructor to create a new HypothesisPrefix by appending a character
	// choice (A_CHOICE) to an existing HypothesisPrefix. This constructor takes
	// care of copying the original prefix's data members, appends the character
	// choice to the word and updates its rating using a character-level n-gram
	// model. The state in the DAWG is also updated.
	HypothesisPrefix::HypothesisPrefix(const HypothesisPrefix& prefix,
	A_CHOICE* choice,
	bool end_of_word,
	const tesseract::Dawg *dawg,
	tesseract::Dict* dict) {
	char* word_ptr = word_;
	const char* prefix_word_ptr = prefix.word_;

	// Copy first space character
	(word_ptr++) = (prefix_word_ptr++);

	// Copy existing word, unichar_lengths, certainty_array
	int char_index;
	for (char_index = 0;
	prefix.unichar_lengths_[char_index] != '\0';
	++char_index) {
	for (int char_subindex = 0;
	char_subindex < prefix.unichar_lengths_[char_index];
	++char_subindex) {
	(word_ptr++) = (prefix_word_ptr++);
	}
	unichar_lengths_[char_index] = prefix.unichar_lengths_[char_index];
	certainty_array_[char_index] = prefix.certainty_array_[char_index];
	}

	// If choice is empty, use a space character instead
	const char* class_string_choice = *class_string(choice) == '\0' ?
	" " : class_string(choice);

	// Update certainty
	certainty_ = MIN(prefix.certainty_, class_certainty(choice));

	// Apprend choice to the word
	strcpy(word_ptr, class_string_choice);
	unichar_lengths_[char_index] = strlen(class_string_choice);
	unichar_lengths_[char_index + 1] = '\0';

	// Append choice certainty to the certainty array
	certainty_array_[char_index] = class_certainty(choice);

	// Copy DAWG node state
	dawg_node_ = prefix.dawg_node_;
	is_dawg_prefix_ = prefix.is_dawg_prefix_;

	// Verify DAWG and update dawg_node_ if the current prefix is already valid
	if (is_dawg_prefix_) {
	for (int char_subindex = 0;
	class_string_choice[char_subindex] != '\0';
	++char_subindex) {

	// TODO(daria): update this code (and the rest of ngram permuter code
	// to deal with unichar ids, make use of the new parallel dawg search
	// and use WERD_CHOICE, BLOB_CHOICE_LIST_VECTOR instead of the deprecated
	// A_CHOICE.
	tprintf("Error: ngram permuter functionality is not available\n");
	exit(1);

	// Verify each byte of the appended character. Note that word_ptr points
	// to the first byte so (word_ptr - (word_ + 1)) is the index of the first
	// new byte in the string that starts at (word_ + 1).
	/*
	int current_byte_index = word_ptr - (word_ + 1) + char_subindex;
	if (!(dict->*dict->letter_is_okay_)(
	dawg, &dawg_node_, current_byte_index, word_ + 1,
	end_of_word && class_string_choice[char_subindex + 1] == '\0')) {
	dawg_node_ = NO_EDGE;
	is_dawg_prefix_ = false;
	break;
	}
	*/
	}
	}

	// Copy the prefix rating
	rating_ = prefix.rating_;

	// Compute rating of current character
	double probability = probability_in_context(prefix.word_, -1,
	class_string_choice, -1);

	// If last character of the word, take the following space into account
	if (end_of_word)
	probability *= probability_in_context(word_, -1, " ", -1);

	double local_classifier_score_ngram_score_ratio =
	get_classifier_score_ngram_score_ratio(class_string_choice);

	double classifier_rating = class_rating(choice);
	double ngram_rating = -log(probability) / log(2.0);
	double mixed_rating =
	local_classifier_score_ngram_score_ratio * classifier_rating +
	(1 - local_classifier_score_ngram_score_ratio) * ngram_rating;

	// If the current word is not a valid prefix, adjust the rating of the
	// character being appended. If it used to be a valid prefix, compensate for
	// previous adjustments.
	if (!is_dawg_prefix_) {
	if (prefix.is_dawg_prefix_)
	rating_ *= non_dawg_prefix_rating_adjustment;
	mixed_rating *= non_dawg_prefix_rating_adjustment;
	}

	// Update rating by adding the rating of the character being appended.
	rating_ += mixed_rating;
	}

	// Create an empty HypothesisPrefixList. Its maximum size is set to the given
	// bound.
	HypothesisPrefixList::HypothesisPrefixList(int size_bound):
	_size_bound(size_bound),
	_size(0) {
	_list_nodes = new HypothesisPrefix*[_size_bound];
	for (int i = 0; i < _size_bound; ++i)
	_list_nodes[i] = NULL;
	}

	// Destroy a HypothesisPrefixList all contained nodes are deleted as well.
	HypothesisPrefixList::~HypothesisPrefixList() {
	this->clear();
	delete[] _list_nodes;
	}

	// Add a node to the HypothesisPrefixList. Maintains the sorted list property.
	// Note that the HypothesisPrefixList takes ownership of the given node and
	// might delete it if needed. It must therefore have been allocated on the heap.
	void HypothesisPrefixList::add_node(HypothesisPrefix* node) {
	// Detect nodes that have a worst rating that the current maximum and treat
	// them separately.
	if (_size > 0 && _list_nodes[_size - 1]->rating() < node->rating()) {
	if (_size == _size_bound) {
	// The list is already full. This node will not be added
	delete node;
	} else {
	// The list is not full. Add the node at the last position.
	_list_nodes[_size] = node;
	++_size;
	}
	return;
	}
	// Find the correct position
	int node_index_target = 0;
	while (node_index_target < _size_bound &&
	_list_nodes[node_index_target] != NULL &&
	_list_nodes[node_index_target]->rating() < node->rating()) {
	++node_index_target;
	}
	if (node_index_target >= _size_bound) {
	delete node;
	} else {
	// Move next states by 1. Starting from the last one.
	int node_index_move = _size - 1;
	while (node_index_move >= node_index_target) {
	if (node_index_move == _size_bound - 1)
	delete _list_nodes[node_index_move];
	else
	_list_nodes[node_index_move + 1] = _list_nodes[node_index_move];
	_list_nodes[node_index_move] = NULL;
	--node_index_move;
	}
	// Insert new node
	_list_nodes[node_index_target] = node;
	// Increment size if it has changed
	if (_size < _size_bound)
	++_size;
	}
	}

	// Delete all contained nodes and set the size of the HypothesisPrefixList to 0
	void HypothesisPrefixList::clear() {
	for (int i = 0; i < _size_bound && _list_nodes[i] != NULL; ++i) {
	delete _list_nodes[i];
	_list_nodes[i] = NULL;
	}
	_size = 0;
	}