blob: 1d9fb058ed160ab697f90af2baf2120a12378c6b [file] [log] [blame]
/*---------------------------------------------------------------------------*
* run_seq_lts.c *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#ifndef NO_STDERR
#include <stdio.h>
#else
extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3);
#endif
#include "passert.h"
#include "pmemory.h"
#include "plog.h"
#include "phashtable.h"
#include "lts_error.h"
#include "lts.h"
#include "lts_seq_internal.h"
#include "port_fileio.h"
#include "platform_utils.h" /* strdup, safe_strtok, etc */
#define ASSERT(x) passert(x)
#ifdef TI_DSP
#include "tidsp_defines.h"
#endif
#ifdef _DEBUG
#define PRINT_LOAD_TREE_SUMMARY 0
#define PRINT_LOAD_TREE 0
#define PRINT_CONS_COMB 0
#define PRINT_DP_LETTER 0
#define PRINT_LTS_WORD 0
#define PRINT_DICT_LOOKUP 0
#endif
#define LTS_MARKER_WORD_START "WS"
#define LTS_MARKER_PRON_START "PS"
#define LTS_MARKER_SYLL_START "SS"
#define LTS_MARKER_SYLL_START_DD "SS%d"
#define LTS_MARKER_PIPESEP "|"
#define LTS_MARKER_PIPESEP_CHAR '|'
static int load_int(PORT_FILE *fp);
static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp);
static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions);
static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap);
static SWIsltsResult free_letter_mapping(LM *lm);
static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap);
static SWIsltsResult free_phone_mapping(PM *pm);
static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp);
static SWIsltsResult free_outputs(char **outputs, char **inputs, int num);
static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp);
static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm);
static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp);
static SWIsltsResult free_allowable_cons_comb(LTS *lts);
static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp);
static SWIsltsResult free_question_strings(LTS* lts);
#define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)])
int find_phone(const char *ph, PM *pm);
int find_best_string(const char *str, LTS* lts);
int find_best_prefix_string(const char *str, LTS* lts);
int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone);
#define in_list(myV, myQ) (bitarray_read_bit( myQ->membership, myV))
#define qmatches(myQ, myU) (in_list( myU->properties[ myQ->type], myQ))
int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ;
int find_output_for_dp(LTS *lts, int *pbackoff_output);
int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length);
int is_allowable_cons_comb(LTS *lts, const char *cons_string);
void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length);
SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out);
/*------------
*
* bitarray
*
*-----------*/
#define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) )
/* int bitarray_read_bit( unsigned short* bits, int iBit)
{ // ASSERT( iBit<256);
return bits[iBit/16] & (1<<((iBit)%16));
} */
void bitarray_write_bit( unsigned short* bits, int iBit, int iVal)
{
unsigned short sect;
ASSERT( iBit<256);
sect = bits[iBit/16];
if(iVal) { sect |= (1<<(iBit%16)); }
else { sect &= ~(1<<(iBit%16)); }
bits[ iBit/16] = sect;
}
void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen)
{
unsigned int i;
for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++)
bits[i] = 0;
for(i=0; i<(unsigned int)listlen; i++)
bitarray_write_bit( bits, list[i], 1);
}
/*-----------
*
* PHashTable
*
*-----------*/
static int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
{ return strcmp((const char*)key1,(const char*)key2); }
static unsigned int HashGetCode(const void *key)
{
const char* k = (const char*)key;
unsigned int i, len, h = 0;
len = strlen(k);
for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i];
return h;
}
void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings,
const LCHAR* hashName)
{
PHashTable* table = NULL;
ESR_ReturnCode rc = ESR_SUCCESS;
PHashTableArgs hashArgs;
int i;
hashArgs.capacity = 63;
hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
rc = PHashTableCreate( &hashArgs, hashName, &table);
for(i=0; i<num_strings; i++) {
void* old;
/* formerly the code used linear lookup, so let's avoid dups to match up */
rc = PHashTableGetValue( table, strings[i], (void**)&old);
if(rc != ESR_SUCCESS) {
rc = PHashTablePutValue( table, strings[i], (const void *)i, NULL );
}
}
return table;
}
/*---------
*
* i/o
*
*---------*/
static int load_int(PORT_FILE *fp)
{
int v;
PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp);
return v;
}
static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp)
{
int i, num_questions;
LQUESTION ** questions;
SWIsltsResult nRes = SWIsltsSuccess;
num_questions = load_int(fp);
#if PRINT_LOAD_TREE_SUMMARY
pfprintf(PSTDOUT,"loading %d questions\n", num_questions);
#endif
*pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*));
if (questions == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
for (i=0;i<num_questions;i++) {
questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION));
if (questions[i] == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i);
#endif
PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp);
PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp);
questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char));
if (questions[i]->list == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp);
bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list);
}
*pnum_questions = num_questions;
return SWIsltsSuccess;
CLEAN_UP:
free_lquestions(questions, num_questions);
*pnum_questions = 0;
*pquestions = NULL;
return nRes;
}
/* deallocate questions */
static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
if (questions) {
for (i=0; i<num_questions; i++) {
if (questions[i]->list) {
FREE(questions[i]->list);
questions[i]->list = NULL;
}
FREE(questions[i]);
questions[i] = NULL;
}
FREE(questions);
}
return nRes;
}
static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap)
{
SWIsltsResult nRes = SWIsltsSuccess;
unsigned char len;
LM * lm;
int i;
/* pfprintf(PSTDOUT,"got len %d\n", len);*/
lm = (LM*) lts_alloc(1, sizeof(LM));
if (lm == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
lm->num_letters = len;
lm->letters = (char*) lts_alloc(len, sizeof(char));
if (lm->letters == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
lm->type = (char*) lts_alloc(len, sizeof(char));
if (lm->type == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp);
PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp);
{
unsigned int letter;
for (letter=0; letter <= UCHAR_MAX; letter++)
lm->letter_index_for_letter[letter] = LTS_MAXCHAR;
}
for (i=0;i<len;i++) {
char letter = toupper(lm->letters[i]);
lm->letters[i] = letter;
lm->letter_index_for_letter[(unsigned char)letter] = i;
}
*ppLetterMap = lm;
return SWIsltsSuccess;
CLEAN_UP:
free_letter_mapping(lm);
*ppLetterMap = NULL;
return nRes;
}
/* deallocate letter mapping */
static SWIsltsResult free_letter_mapping(LM *lm)
{
SWIsltsResult nRes = SWIsltsSuccess;
if (lm) {
if (lm->letters) {
FREE(lm->letters);
lm->letters = NULL;
}
if (lm->type) {
FREE(lm->type);
lm->type = NULL;
}
lm->num_letters = 0;
FREE(lm);
}
return nRes;
}
static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap)
{
SWIsltsResult nRes = SWIsltsSuccess;
PM * pm;
int i;
unsigned char len;
char * ph;
pm = (PM*) lts_alloc(1, sizeof(PM));
if (pm == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
pm->num_phones = load_int(fp);
pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*));
if (pm->phones == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
for (i=0;i<pm->num_phones;i++) {
PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp);
pm->phoneH = NULL;
pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char));
if (ph == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
PORT_FREAD_CHAR(ph, sizeof(char), len, fp);
ph[len] = '\0';
}
pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones,
pm->num_phones,
L("lts.phoneH"));
if(pm->phoneH == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
*ppPhoneMap = pm;
return SWIsltsSuccess;
CLEAN_UP:
free_phone_mapping(pm);
*ppPhoneMap = NULL;
return nRes;
}
/* deallocate phone mapping */
static SWIsltsResult free_phone_mapping(PM *pm)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
if (pm) {
if (pm->phones) {
for (i=0; i<pm->num_phones; i++) {
if (pm->phones[i]) {
FREE(pm->phones[i]);
pm->phones[i] = NULL;
}
}
FREE(pm->phones);
pm->phones = NULL;
}
if(pm->phoneH)
PHashTableDestroy( (PHashTable*)pm->phoneH);
pm->phoneH = NULL;
FREE(pm);
}
return nRes;
}
static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
char ** outputs = NULL;
char ** inputs = NULL;
int num;
unsigned char olen;
char * out;
unsigned char ilen;
char * in;
num = load_int(fp);
*poutputs = outputs = (char **) lts_alloc(num, sizeof(char*));
if (outputs == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
*pinputs = inputs = (char **) lts_alloc(num, sizeof(char*));
if (inputs == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
for (i=0;i<num;i++) {
PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp);
out = outputs[i] = lts_alloc(olen + 1, sizeof(char));
if (out == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
if (olen > 0) {
PORT_FREAD_CHAR(out, sizeof(char), olen, fp);
}
out[olen] = '\0';
PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp);
in = inputs[i] = lts_alloc(ilen + 1, sizeof(char));
if (in == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
if (ilen > 0) {
PORT_FREAD_CHAR(in, sizeof(char), ilen, fp);
}
in[ilen] = '\0';
#if PRINT_LOAD_TREE
if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]);
pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]);
#endif
}
*pnum = num;
return SWIsltsSuccess;
CLEAN_UP:
free_outputs(outputs, inputs, num);
*poutputs = NULL;
*pinputs = NULL;
*pnum = 0;
return nRes;
}
static SWIsltsResult free_outputs(char **outputs, char **inputs, int num)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
if (outputs) {
for (i=0; i<num; i++) {
if (outputs[i]) {
FREE(outputs[i]);
outputs[i] = NULL;
}
}
FREE(outputs);
}
if (inputs) {
for (i=0; i<num; i++) {
if (inputs[i]) {
FREE(inputs[i]);
inputs[i] = NULL;
}
}
FREE(inputs);
}
return nRes;
}
static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp)
{
SWIsltsResult nRes = SWIsltsSuccess;
int let, i;
RT_LTREE * tree = NULL;
RT_LTREE ** trees = NULL;
#if PRINT_LOAD_TREE_SUMMARY
pfprintf(PSTDOUT,"loading letter mapping\n");
#endif
*ptrees = NULL;
*pquestions = NULL;
*plm = NULL;
nRes = load_letter_mapping(fp, plm);
if (nRes != SWIsltsSuccess) {
goto CLEAN_UP;
}
#if PRINT_LOAD_TREE_SUMMARY
pfprintf(PSTDOUT,"loading questions\n");
#endif
nRes = load_lquestions(pquestions, num_questions, fp);
if (nRes != SWIsltsSuccess) {
goto CLEAN_UP;
}
*num_letters = load_int(fp);
if (*num_letters != (*plm)->num_letters) {
#ifndef NO_STDERR
PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"),
*num_letters, (*plm)->num_letters);
#endif
nRes = SWIsltsInternalErr;
goto CLEAN_UP;
}
*ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*));
if (trees == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
for (let=0;let<*num_letters;let++) {
/* pfprintf(PSTDOUT,"loading for t %d\n", t);*/
trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE));
if (tree == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
tree->num_nodes = load_int(fp);
tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
if (tree->values_or_question1 == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
if (tree->question2 == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short));
if (tree->left_nodes == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes);
#endif
for (i=0;i<tree->num_nodes;i++) {
PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp);
PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp);
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT,"LOAD_TREE: node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]);
#endif
PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp);
if (tree->left_nodes[i] != NO_NODE) {
if (tree->question2[i] == -1) tree->question2[i] = 0;
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]);
#endif
}
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT,"\n");
#endif
}
}
return SWIsltsSuccess;
CLEAN_UP:
free_trees(trees, *num_letters, *pquestions, *num_questions, *plm);
*ptrees = NULL;
*pquestions = NULL;
*plm = NULL;
*num_letters = 0;
*num_questions = 0;
return nRes;
}
/* deallocate trees */
static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters,
LQUESTION **questions, int num_questions, LM *lm)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
RT_LTREE * tree;
if (lm) {
free_letter_mapping(lm);
}
if (questions) {
free_lquestions(questions, num_questions);
}
if (trees) {
for (i=0; i<num_letters; i++) {
if (trees[i]) {
tree = trees[i];
if (tree->values_or_question1) {
FREE(tree->values_or_question1);
tree->values_or_question1 = NULL;
}
if (tree->question2) {
FREE(tree->question2);
tree->question2 = NULL;
}
if (tree->left_nodes) {
FREE(tree->left_nodes);
tree->left_nodes = NULL;
}
FREE(trees[i]);
trees[i] = NULL;
}
}
FREE(trees);
}
return nRes;
}
static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp)
{
SWIsltsResult nRes = SWIsltsSuccess;
char line[50];
char tempstr[50];
char * tok;
int i, toklen;
int count;
char seps[] = " \n";
lts->num_cons_comb = 0;
lts->allowable_cons_combH = NULL;
while (PORT_FGETS(line, 50, fp)) {
#ifndef TI_DSP
/*need to get rid of sme crud at the end of the line because it is being read in binary mode*/
for (i=strlen(line)-1;i>=0;i--) {
if (!isalpha(line[i])) line[i] = ' ';
}
#endif
count = 0;
tok = safe_strtok(line, seps, &toklen);
tempstr[0] = '\0';
/* get all available sequence of tokens */
while(tok && toklen > 0){
count += toklen;
strncat(tempstr, tok, toklen);
tempstr[count+1] = '\0';
strcat(tempstr, " ");
count++;
tok = safe_strtok(tok+toklen, seps, &toklen);
}
if (count > 0) {
/* delete the final space */
tempstr[count-1] = '\0';
lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char));
if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr);
#if PRINT_CONS_COMB
pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr);
#endif
lts->num_cons_comb++;
if (lts->num_cons_comb >= MAX_CONS_COMB) {
#ifndef NO_STDERR
PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB);
#endif
nRes = SWIsltsInternalErr;
goto CLEAN_UP;
}
}
}
if (lts->num_cons_comb == 0) {
#ifndef NO_STDERR
PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n"));
#endif
}
lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH"));
if(lts->allowable_cons_combH == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
#if PRINT_LOAD_TREE_SUMMARY
pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb);
#endif
return SWIsltsSuccess;
CLEAN_UP:
free_allowable_cons_comb(lts);
return nRes;
}
static SWIsltsResult free_allowable_cons_comb(LTS *lts)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
for (i=0; i<lts->num_cons_comb; i++) {
if (lts->allowable_cons_comb[i]) {
FREE(lts->allowable_cons_comb[i]);
lts->allowable_cons_comb[i] = NULL;
}
}
if(lts->allowable_cons_combH)
PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH);
lts->allowable_cons_combH = NULL;
return nRes;
}
static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
int num;
unsigned char len;
char ** strings;
char * str;
num = load_int(fp);
lts->strings = strings = (char **) lts_alloc(num, sizeof(char*));
lts->string_lens = (char*)lts_alloc(num, sizeof(char));
if (strings == NULL || lts->string_lens == NULL ) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
for (i=0;i<num;i++) {
PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
str = strings[i] = lts_alloc(len + 1, sizeof(char));
if (str == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
if (len > 0) {
PORT_FREAD_CHAR(str, sizeof(char), len, fp);
}
str[len] = '\0';
bitarray_populate_from_list( lts->membership, lts->strings[i], len);
lts->string_lens[i] = strlen(lts->strings[i]);
}
// *pnum = num;
lts->num_strings = num;
return SWIsltsSuccess;
CLEAN_UP:
free_question_strings(lts);
return nRes;
}
/* deallocate question strings */
static SWIsltsResult free_question_strings(LTS* lts)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
if (lts->strings) {
for (i=0;i<lts->num_strings;i++) {
if (lts->strings[i]) {
FREE(lts->strings[i]);
lts->strings[i] = NULL;
}
}
FREE(lts->strings);
if(lts->string_lens) FREE(lts->string_lens);
lts->strings = NULL;
lts->string_lens = NULL;
}
return nRes;
}
SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts)
{
SWIsltsResult nRes = SWIsltsSuccess;
LTS * lts;
#ifdef USE_STATIC_SLTS
/* TODO: language-specific ID here? */
lts = &g_lts;
#else /* !USE_STATIC_SLTS */
PORT_FILE *fp;
lts = (LTS*) lts_alloc(1, sizeof(LTS));
if (lts == NULL) {
nRes = SWIsltsErrAllocResource;
goto CLEAN_UP;
}
fp = PORT_FOPEN(data_filename, "rb");
if (fp == NULL) {
#ifndef NO_STDERR
PLogError(L("Cannot open %s\n"), data_filename);
#endif
nRes = SWIsltsFileOpenErr;
goto CLEAN_UP;
}
nRes = load_phone_mapping(fp, &lts->phone_mapping);
if (nRes != SWIsltsSuccess) {
PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes);
goto CLEAN_UP;
}
nRes = load_question_strings(lts, fp);
if (nRes != SWIsltsSuccess) {
PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes);
goto CLEAN_UP;
}
nRes = load_outputs(&(lts->outputs), &(lts->input_for_output), &lts->num_outputs, fp);
if (nRes != SWIsltsSuccess) {
PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes);
goto CLEAN_UP;
}
#if PRINT_LOAD_TREE
pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs);
#endif
nRes = load_trees(&(lts->trees), &(lts->num_letters),
&(lts->questions), &(lts->num_questions),
&(lts->letter_mapping),
fp);
if (nRes != SWIsltsSuccess) {
PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes);
goto CLEAN_UP;
}
nRes = load_allowable_cons_comb(lts, fp);
if (nRes != SWIsltsSuccess) {
PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes);
goto CLEAN_UP;
}
PORT_FCLOSE(fp);
#endif /* !USE_STATIC_SLTS */
*phLts = lts;
return SWIsltsSuccess;
CLEAN_UP:
free_lts(lts);
*phLts = NULL;
return nRes;
}
/* deallocates LTS */
SWIsltsResult free_lts(LTS_HANDLE hlts)
{
SWIsltsResult nRes = SWIsltsSuccess;
LTS * lts = (LTS *)hlts;
if (lts) {
#ifndef USE_STATIC_SLTS
free_phone_mapping(lts->phone_mapping);
free_question_strings(lts);
lts->strings = NULL;
lts->phone_mapping = NULL;
free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs);
lts->input_for_output = lts->outputs = NULL;
free_trees(lts->trees, lts->num_letters,
lts->questions, lts->num_questions,
lts->letter_mapping);
lts->trees = NULL;
lts->questions = NULL;
lts->letter_mapping = NULL;
free_allowable_cons_comb(lts);
FREE(lts);
#endif /* !USE_STATIC_LTS */
}
return nRes;
}
int find_phone(const char *ph, PM *pm)
{
ESR_ReturnCode rc;
int iRet = -1;
rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet);
if (rc != ESR_SUCCESS)
PLogError("error while in find_phone(%s,%x)\n", ph, pm);
return iRet;
}
int find_best_string(const char *str, LTS* lts)
{
int i, maxlen, maxi, len;
int len_str;
if(str[0] == '\0') return -1;
len_str = strlen(str);
maxi = -1;
maxlen = 0;
for (i=0;i<lts->num_strings;i++) {
len = lts->string_lens[i];
if( len > len_str)
continue; /* no point in comparison */
if (strncmp(str, lts->strings[i], len) == 0) {
if (len > maxlen) {
maxlen = len;
maxi = i;
}
}
}
return maxi;
}
int find_best_prefix_string(const char *str, LTS* lts)
{
int i;
int maxlen;
int maxi;
int len;
int prelen;
maxi = -1;
maxlen = 0;
prelen = strlen(str);
for (i=0;i<lts->num_strings;i++) {
len = lts->string_lens[i];
if (len <= prelen) {
if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) {
if (len > maxlen) {
maxlen = len;
maxi = i;
}
}
}
}
return maxi;
}
int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone)
{
int i,j;
LDP *dp;
unsigned char letter;
int hit_wb;
LM *lm;
unsigned char word[MAX_WORD_LEN];
char tempstr[MAX_WORD_LEN];
int first_syl_end;
int last_syl_start;
dp = &(lts->dp);
lm = lts->letter_mapping;
/* the LTS decision tree does not seem to be well trained at all for
the letter ' when followed by "s" ... It seems to result in the
phoneme 'm', which is wrong. "'t" seems to be OK though.
BAD: Kevin's : k6v6nmz ... pal's : palmz ... paul's : p{lz
BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz
BAD: jake's house : jAk6mz&h?s
Ignoring ' as below we get ...
BETTER: Kevin's : kev6nz ... pal's : palz ... paul's : p{lz
BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s
BETTER: jake's house : jAk6s&h?s
The proper solution requires a legitimate text normalizer with special
handling of cases like 's which would always put a "z" there,
except if preceded by an unvoiced stop (ptk) which requires a "s" there.
For now let's just skip the ' letter, which testing shows to be generally
safe (janice's, jake's etc are better but still not quite right). */
if(input_word[index] == '\'')
return 1; // same as unknown character
letter = find_letter_index(input_word[index], lm);
if (letter == LTS_MAXCHAR) {
/* lisa - we need to decide how to handle this case. Do we just silently skip unknown
characters or warn the app or user somehow*/
#ifdef NO_STDERR
PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL);
#else
PLogError(L("unknown character on input %c - skipping\n"), input_word[index]);
#endif
return 1;
}
hit_wb = 0;
/*pfprintf(PSTDOUT,"left context\n");*/
for (j=0;j<5;j++) {
if (hit_wb) {
dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
} else {
i = index - (j+1);
if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
else {
dp->properties[ Left1+j] = find_letter_index(input_word[i], lm);
if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
hit_wb = 1;
}
}
}
}
/*pfprintf(PSTDOUT,"right context\n");*/
hit_wb = 0;
for (j=0;j<5;j++) {
if (hit_wb) {
dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
} else {
i = index + (j+1);
if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
else {
dp->properties[ Right1+j] = find_letter_index(input_word[i], lm);
if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
hit_wb = 1;
}
}
}
}
dp->letter = letter; // properties[ Letter] = letter;
dp->properties[ LeftPhone1] = left_phone;
/*pfprintf(PSTDOUT,"word stuff\n"); */
/*find word start and end - use unknown character as word boundaries*/
dp->properties[ WordLen] = word_len;
if (index == 0) dp->properties[ LetInWord] = 0;
else if (index == word_len-1) dp->properties[ LetInWord] = 2;
else dp->properties[ LetInWord] = 1;
for (i=0;i<word_len;i++) {
word[i] = find_letter_index(input_word[i], lm);
}
/*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/
/* pfprintf(PSTDOUT,"syl stuff\n");*/
first_syl_end = word_len;
for (i=0;i<word_len;i++) {
if (lm->type[word[i]] == 1) {
for (j=i+1;j<word_len;j++) {
if (lm->type[word[j]] != 1) break;
}
first_syl_end = j;
break;
}
}
last_syl_start = 0;
for (i=word_len-1;i>=0;i--) {
if (lm->type[word[i]] == 1) {
for (j=i-1;j>=0;j--) {
if (lm->type[word[j]] != 1) break;
}
last_syl_start = j;
break;
}
}
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
#endif
if (index > last_syl_start) dp->properties[ SylInWord] = 2;
else if (index < first_syl_end) dp->properties[ SylInWord] = 0;
else dp->properties[ SylInWord] = 1;
first_syl_end = word_len;
for (i=0;i<word_len;i++) {
if (lm->type[word[i]] == 1) {
for (j=i+1;j<word_len;j++) {
if (lm->type[word[j]] != 1) break;
}
for (;j<word_len;j++) {
if (lm->type[word[j]] == 1) break;
}
first_syl_end = j;
break;
}
}
last_syl_start = 0;
for (i=word_len-1;i>=0;i--) {
if (lm->type[word[i]] == 1) {
for (j=i-1;j>=0;j--) {
if (lm->type[word[j]] != 1) break;
}
for (;j>=0;j--) {
if (lm->type[word[j]] == 1) break;
}
last_syl_start = j;
break;
}
}
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
#endif
if (index > last_syl_start) dp->properties[ Syl2InWord] = 2;
else if (index < first_syl_end) dp->properties[ Syl2InWord] = 0;
else dp->properties[Syl2InWord] = 1;
first_syl_end = word_len;
for (i=root_start;i<root_end;i++) {
if (lm->type[word[i]] == 1) {
for (j=i+1;j<word_len;j++) {
if (lm->type[word[j]] != 1) break;
}
first_syl_end = j;
break;
}
}
last_syl_start = 0;
for (i=root_end-1;i>=root_start;i--) {
if (lm->type[word[i]] == 1) {
for (j=i-1;j>=0;j--) {
if (lm->type[word[j]] != 1) break;
}
last_syl_start = j;
break;
}
}
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
#endif
if (index > last_syl_start) dp->properties[SylInRoot] = 2;
else if (index < first_syl_end) dp->properties[ SylInRoot] = 0;
else dp->properties[ SylInRoot] = 1;
first_syl_end = word_len;
for (i=root_start;i<root_end;i++) {
if (lm->type[word[i]] == 1) {
for (j=i+1;j<word_len;j++) {
if (lm->type[word[j]] != 1) break;
}
for (;j<word_len;j++) {
if (lm->type[word[j]] == 1) break;
}
first_syl_end = j;
break;
}
}
last_syl_start = 0;
for (i=root_end-1;i>=root_start;i--) {
if (lm->type[word[i]] == 1) {
for (j=i-1;j>=0;j--) {
if (lm->type[word[j]] != 1) break;
}
for (;j>=0;j--) {
if (lm->type[word[j]] == 1) break;
}
last_syl_start = j;
break;
}
}
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
#endif
if (index > last_syl_start) dp->properties[Syl2InRoot] = 2;
else if (index < first_syl_end) dp->properties[Syl2InRoot] = 0;
else dp->properties[Syl2InRoot] = 1;
dp->properties[Left_DFRE] = index - root_start;
dp->properties[Right_DFRE] = (root_end - index) - 1;
/* pfprintf(PSTDOUT,"strings\n");*/
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings);
#endif
dp->properties[RightString] = find_best_string(input_word+index+1, lts);
strcpy(tempstr, input_word);
tempstr[index] = '\0';
dp->properties[LeftString] = find_best_prefix_string(tempstr, lts);
#if PRINT_DP_LETTER
pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]);
for (i=0;i<word_len;i++) {
pfprintf(PSTDOUT,"%c", lm->letters[word[i]]);
}
pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n",
lm->letters[dp->left_context[2]],
lm->letters[dp->left_context[1]],
lm->letters[dp->left_context[0]],
lm->letters[dp->letter],
lm->letters[dp->right_context[0]],
lm->letters[dp->right_context[1]],
lm->letters[dp->right_context[2]],
dp->let_in_word,
dp->syl_in_word,
dp->syl2_in_word,
dp->word_len,
dp->syl_in_root,
dp->syl2_in_root,
dp->left_DFRE, dp->right_DFRE);
#endif
return 0;
}
int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp)
{
int m1, m2;
switch(type) {
case 0:
return qmatches(q1, dp);
case 1:
m1 = qmatches(q1, dp);
m2 = qmatches(q2, dp);
return(m1 && m2);
case 2:
m1 = qmatches(q1, dp);
m2 = qmatches(q2, dp);
return(m1 && !m2);
case 3:
m1 = qmatches(q1, dp);
m2 = qmatches(q2, dp);
return(!m1 && m2);
case 4:
m1 = qmatches(q1, dp);
m2 = qmatches(q2, dp);
return(!m1 && !m2);
default:
return -1;
}
/* should not come here */
return -1;
}
int find_output_for_dp(LTS *lts, int *pbackoff_output)
{
LDP *dp;
int index;
RT_LTREE *tree;
LQUESTION *q1;
LQUESTION *q2;
int comb_type;
int q2_index;
int left_index;
dp = &(lts->dp);
tree = lts->trees[dp->letter]; // properties[Letter]];
index = 0;
while (1) {
left_index = tree->left_nodes[index];
if (left_index == NO_NODE) { /*means its a leaf node*/
*pbackoff_output = tree->question2[index];
return tree->values_or_question1[index];
}
q1 = lts->questions[tree->values_or_question1[index]];
q2_index = tree->question2[index] & 0x1FFF;
comb_type = (tree->question2[index] & 0xE000) >> 13;
q2 = lts->questions[q2_index];
if (matches(q1, q2, comb_type, dp)) {
index = left_index;
} else {
index = left_index+1;
}
}
}
int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length)
{
char *tok;
int toklen;
char seps[] = " ";
if (strlen(output) == 0) return out_len;
tok = safe_strtok(output, seps, &toklen);
while (tok && toklen) {
if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) {
if (isdigit(tok[toklen-1])) {
/*means it's a vowel. So, add a syllable boundary. It's position
gets adjusted later by adjust_syllable_boundaries()*/
strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START);
if (out_len >= max_phone_length) return max_phone_length;
}
strncpy(output_phone_string[out_len], tok, toklen);
output_phone_string[out_len++][toklen] = '\0';
if (out_len >= max_phone_length) return max_phone_length;
}
tok = safe_strtok(tok+toklen, seps, &toklen);
}
return out_len;
}
int is_allowable_cons_comb(LTS *lts, const char *cons_string)
{
/* int i;
for (i=0;i<lts->num_cons_comb;i++) {
#if PRINT_CONS_COMB
pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]);
#endif
if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1;
}
return 0;
*/
ESR_ReturnCode rc;
void* iVal = NULL;
rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal);
if(rc == ESR_SUCCESS)
return 1;
else
return 0;
}
void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length)
{
char *out;
int i,j;
int syl_start;
int stress = 0;
int first_syl_bound;
char tempstr[20];
/*there should already be a syllable boundary before each vowel (add_output put one there)*/
/*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/
for (i=0;i<num_out;i++) {
out = output_phone_string[i];
if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary
find start of allowable sequence*/
syl_start = 0;
for (j=i-1;j>0;j--) {
out = output_phone_string[j];
if (isdigit(out[strlen(out)-1])) {
syl_start = j+1;
break; /*means it's a vowel*/
}
if (strcmp(out, LTS_MARKER_WORD_START) == 0) {
syl_start = j+1;
break; /*don't push syl boundaries before word boundaries*/
}
if (strcmp(out, LTS_MARKER_PRON_START) == 0) {
syl_start = j+1;
break; /*don't push syl boundaries before phrase boundaries*/
}
/* for sequences longer than 2,
check 3-syllable onset first, then check 2-syllable onset */
if(j > 1){
sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1],
output_phone_string[j]);
if (!is_allowable_cons_comb(lts, tempstr)) {
sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
if (!is_allowable_cons_comb(lts, tempstr)) {
#if PRINT_CONS_COMB
pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
output_phone_string[j]);
#endif
syl_start = j;
break;
}
}
}
/* for sequences shorter than 2 */
else
{
sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
if (!is_allowable_cons_comb(lts, tempstr)) {
#if PRINT_CONS_COMB
pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
output_phone_string[j]);
#endif
syl_start = j;
break;
}
}
} /* end for j=i-1 */
/*shift over stuff between syl_start a gap*/
for (j=i;j>syl_start;j--) {
strcpy(output_phone_string[j], output_phone_string[j-1]);
}
/*now find stress level from phone (and remove it) and add it to syl bound*/
if (i<num_out-1) {
out = output_phone_string[i+1];
if (isdigit(out[strlen(out)-1])) {
stress = atoi(out + strlen(out)-1);
} else {
stress = 0; /*should not happen*/
}
} else {
stress = 0; /*should not happen*/
}
sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
} /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */
} /* end for i=0 */
/*remove all the stress marking from the vowels*/
for (i=0;i<num_out;i++) {
out = output_phone_string[i];
if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) {
out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/
}
}
/* word boundary must be followed by syllable boundary
if no syllable boundary exists after a word boundary, move the first
syllable boundary to after the word boundary */
first_syl_bound = -1;
syl_start = -1;
for (i=1;i<num_out;i++) {
if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) &&
(strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) {
syl_start = i;
/* search for first occurance of syllable boundary */
for(j=syl_start+1;j<num_out; j++){
out = output_phone_string[j];
if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){
stress = atoi(out + strlen(out)-1);
first_syl_bound = j;
break;
}
}
/* swap entries until syl bound reaches word bound */
if(first_syl_bound >= 0){
for(; j>syl_start; j--){
strcpy(output_phone_string[j], output_phone_string[j-1]);
}
/* put syllable boundary after word boundary */
sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
/* advance i, reset variables */
i = first_syl_bound;
first_syl_bound = syl_start = -1;
}
}
}
}
SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i,j;
int root_start;
int root_end;
int output_index;
int left_phone;
char * input_seq;
int found_match;
int start_num_out;
int backoff_output;
int num_out;
start_num_out = num_out = *pnum_out;
root_start = 0;
root_end = word_len;
for (i=0;i<word_len;i++) {
if ((i == 0) || (num_out == 0)) {
/* pfprintf(PSTDOUT,"about to call find_phone1\n");*/
left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone);
#endif
if (left_phone < 0) {
#ifdef NO_STDERR
PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL);
#else
PLogError(L("Error, cannot find | in phone mappings\n"));
#endif
nRes = SWIsltsInternalErr;
goto CLEAN_UP;
}
} else {
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out);
pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]);
#endif
if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping);
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]);
#endif
if (left_phone < 0) {
#ifdef NO_STDERR
PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL);
#else
PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]);
#endif
nRes = SWIsltsInternalErr;
goto CLEAN_UP;
}
}
/* pfprintf(PSTDOUT,"calling fill up dp\n");*/
if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue;
/* pfprintf(PSTDOUT,"calling find output\n");*/
output_index = find_output_for_dp(lts, &backoff_output);
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"got output %d\n", output_index);
#endif
found_match = 1;
if (strlen(lts->input_for_output[output_index]) > 0) {
/*some extra input string to use up*/
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]);
#endif
input_seq = lts->input_for_output[output_index];
if (input_seq[0] == '=') {
root_end = i;
input_seq = input_seq+1; /*skip suffix indicator*/
}
for (j=i+1;;j++) {
if (input_seq[j-(i+1)] == '\0') break;
if (input_seq[j-(i+1)] == '-') {
root_start = j;
break;
}
if (j >= word_len) {
found_match = 0;
break;
}
if (input_seq[j-(i+1)] != word[j]) {
found_match = 0;
break;
}
}
if (found_match) {
i = j-1;
}
}
if (!found_match) {
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n",
lts->outputs[backoff_output],
ts->outputs[output_index]);
#endif
num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length);
}
else {
num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length);
}
if (num_out >= max_phone_length) {
nRes = SWIsltsMaxInputExceeded;
goto CLEAN_UP;
}
}
*pnum_out = num_out;
return SWIsltsSuccess;
CLEAN_UP:
*pnum_out = 0;
return nRes;
}
SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length)
{
SWIsltsResult nRes = SWIsltsSuccess;
int i;
int len;
int num_out = 0;
LTS * lts;
int was_in_phrase;
char word[MAX_WORD_LEN];
int num_in_word;
int max_phone_length;
int pron_len;
max_phone_length = *phone_length;
len = strlen(input_sentence);
lts = (LTS*) h;
was_in_phrase = 0;
/*add a phrase start then word start at beginning*/
strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
if (num_out >= max_phone_length) {
nRes = SWIsltsMaxInputExceeded;
goto CLEAN_UP;
}
num_in_word = 0;
pron_len = 1; // for the first time through
for (i=0;i<=len;i++) {
#if PRINT_LTS_WORD
pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]);
#endif
/* Treat hyphen as word delimiter. Not quite right for German
hyphenated compounds, but still an improvement. */
if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) {
if (num_in_word>0 ) {
strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
if (num_out >= max_phone_length) {
nRes = SWIsltsMaxInputExceeded;
goto CLEAN_UP;
}
word[num_in_word] = '\0';
if (1) {
#if PRINT_DICT_LOOKUP
pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word);
#endif
pron_len = -num_out;
nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
pron_len += num_out; // now pron_len is the number of phonemes/markers added
if(pron_len == 0)
num_out--; // to backspace on the LTS_MARKER_WORD_START !!
if (nRes != SWIsltsSuccess) {
goto CLEAN_UP;
}
}
num_in_word = 0;
}
}
else if ( (input_sentence[i] == '.')
|| (input_sentence[i] == ',')
|| (input_sentence[i] == '!')
|| (input_sentence[i] == '?')
|| (input_sentence[i] == '\n')) {
if (was_in_phrase) {
/*add a phrase boundary after lts is called*/
if (num_in_word > 0) {
strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
if (num_out >= max_phone_length) {
nRes = SWIsltsMaxInputExceeded;
goto CLEAN_UP;
}
word[num_in_word] = '\0';
if (1) {
nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
if (nRes != SWIsltsSuccess) {
goto CLEAN_UP;
}
}
num_in_word = 0;
}
strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
if (num_out >= max_phone_length) {
nRes = SWIsltsMaxInputExceeded;
goto CLEAN_UP;
}
was_in_phrase = 0;
}
}
else {
if (num_in_word < MAX_WORD_LEN-1) {
word[num_in_word++] = toupper(input_sentence[i]);
was_in_phrase = 1;
}
}
}
/*adjust syllable boundaries*/
adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length);
*phone_length = num_out;
return SWIsltsSuccess;
CLEAN_UP:
*phone_length = 0;
return nRes;
}
#ifdef USE_STATIC_SLTS
void *lts_alloc(int num, int size)
{
#ifdef NO_STDERR
PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL);
#else
PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called"));
#endif
return NULL;
}
#else
void *lts_alloc(int num, int size)
{
void *p;
p = CALLOC(num, size, MTAG);
return p;
}
#endif /* USE_STATIC_SLTS */