blob: 42946f87e3e5fe1736bf75f400148f400fa13b03 [file] [log] [blame]
/*---------------------------------------------------------------------------*
* parseStringTest.c *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#include "pstdio.h"
#include "pmemory.h"
#include "plog.h"
#include "HashMap.h"
#include "SR_Grammar.h"
#include "SR_SemanticResult.h"
#include "ESR_Session.h"
#include "ESR_Locale.h"
#include "LCHAR.h"
#include "PFileSystem.h"
#include "PANSIFileSystem.h"
/* for testing RecognizerImpl.c, see below */
#include"buildopt.h"
#include"setting.h"
#include"srec_sizes.h"
#include"SR_GrammarImpl.h"
/* defines */
#define MAX_LINE_LENGTH 256
#define MAX_STR_LENGTH 512
#define MAX_SEM_RESULTS 3
#define MAX_KEYS 30
/* protos */
ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout);
ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout);
/* struct */
typedef struct Opts
{
int use_parse_by_string_ids;
int do_check_all_ids;
}
Opts;
int usage(LCHAR* exename)
{
pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename);
return 1;
}
void lstr_strip_multiple_spaces(LCHAR* trans)
{
char *src=trans, *dst=trans;
for( ;(*dst = *src)!=L('\0'); src++) {
if(*dst != ' ') dst++;
else if(src[1] != ' ') dst++;
}
}
/**
* Display the Semantic Result
*/
void display_results(SR_SemanticResult *result, PFile* fout)
{
size_t i, size, len;
LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
LCHAR value[MAX_STR_LENGTH];
ESR_ReturnCode rc;
size = MAX_KEYS;
rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */
if (rc == ESR_SUCCESS)
{
for (i = 0; i < size; i++)
{
len = MAX_STR_LENGTH;
if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS)
pfprintf(fout, "{%s : %s}\n", keys[i], value);
else
pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
}
pfprintf(fout, "--Done--\n");
}
else
pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
}
ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts)
{
ESR_ReturnCode rc = ESR_SUCCESS;
size_t i, result_count, key_count;
SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
wordID wordIDs[32], *wordIDptr;
SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar;
wordmap* wmap;
if (opts->do_check_all_ids)
{
wordID id;
Opts myopts;
memcpy(&myopts, opts, sizeof(myopts));
myopts.do_check_all_ids = 0;
wmap = pgrammar->syntax->synx->olabels;
/* start at word 4 because "eps, -pau- -pau2- @root */
for (id = 4; id < wmap->num_words; id++)
{
trans = wmap->words[id];
Parse(grammar, trans, fout, &myopts);
}
return 0;
}
result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
for (i = 0; i < result_count; i++)
SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
lstrtrim(trans);
/* check for multiple space separators! */
lstr_strip_multiple_spaces(trans);
if (!opts->use_parse_by_string_ids)
{
rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
}
else
{
char copy_of_trans[256], *p;
strcpy(copy_of_trans, trans);
wmap = pgrammar->syntax->synx->olabels;
wordIDs[0] = wordIDs[1] = MAXwordID;
wordIDptr = &wordIDs[0];
for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " "))
{
for (i = 0; i < wmap->num_words; i++)
if (!strcmp(wmap->words[i], p))
{
*wordIDptr++ = (wordID)i;
break;
}
if (i == wmap->num_words)
{
wordIDs[0] = MAXwordID;
break;
}
}
*wordIDptr++ = MAXwordID;
/* printf("wordids:");
for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++)
printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]);
printf("\n"); */
if (wordIDs[0] == MAXwordID)
{
result_count = 0;
rc = ESR_SUCCESS;
}
else
{
rc = pgrammar->semproc->flush(pgrammar->semproc);
rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans);
rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
wordIDs, semanticResults, &result_count);
}
}
if (rc != ESR_SUCCESS)
{
pfprintf(fout, "error (%s)\n\n", trans);
return rc;
}
if (result_count < 1)
{
pfprintf(fout, "no parse (%s)\n\n", trans);
}
else
{
key_count = 0xffff;
rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count);
pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count);
for (i = 0; i < result_count; i++)
display_results(semanticResults[i], fout);
for (i = 0; i < MAX_SEM_RESULTS; i++)
{
rc = semanticResults[i]->destroy(semanticResults[i]);
if (rc != ESR_SUCCESS)
return rc;
}
}
return ESR_SUCCESS;
}
/* tests the transcription against the grammar and then decided based on what was expected of the test
whether or not is it considered a pass or fail */
ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout)
{
size_t len;
ESR_ReturnCode rc;
int i, result_count;
SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
LCHAR value[MAX_STR_LENGTH];
result_count = MAX_SEM_RESULTS;
for (i = 0; i < result_count; i++)
SR_SemanticResultCreate(&semanticResults[i]);
lstrtrim(trans);
/* check for multiple space separators! */
lstr_strip_multiple_spaces(trans);
pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result);
rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
if (rc != ESR_SUCCESS)
return rc;
/*result file will contain
transcription | key | reference | result | PASSESD/FAILED */
if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
{
pfprintf(fout, "NO PARSE FOR: %s|%s|%s| |", trans, key, ref);
if (strcmp("FAIL", result) == 0)
pfprintf(fout, "PASSED (%s)\n", trans);
else
pfprintf(fout, "FAILED (%s)\n", trans);
}
else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
{
for (i = 0; i < result_count; i++)
{
len = MAX_STR_LENGTH;
if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS)
{
pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value);
if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0)
pfprintf(fout, "PASSED\n");
else
pfprintf(fout, "FAILED\n");
}
else
{
pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key);
}
}
/*deallocate semantic results*/
for (i = 0; i < MAX_SEM_RESULTS; i++)
{
rc = semanticResults[i]->destroy(semanticResults[i]);
if (rc != ESR_SUCCESS)
return rc;
}
}
return ESR_SUCCESS;
}
int main(int argc, char **argv)
{
LCHAR trans[MAX_LINE_LENGTH];
SR_Grammar* grammar = NULL;
ESR_ReturnCode rc;
LCHAR base[P_PATH_MAX] = L("");
LCHAR infilename[P_PATH_MAX] = L("");
LCHAR inRTfilename[P_PATH_MAX] = L("");
LCHAR outfilename[P_PATH_MAX] = L("");
PFile *fin = NULL, *fout = NULL;
int i;
LCHAR *rootrule = L("myRoot"), *p;
Opts opts = { 0, 0 };
/*
* Initialize portable library.
*/
CHKLOG(rc, PMemInit());
fin = PSTDIN;
fout = PSTDOUT;
if (argc < 3)
{
usage(argv[0]);
exit(EXIT_FAILURE);
}
for (i = 1; i < argc; ++i)
{
if (!LSTRCMP(argv[i], L("-base")))
{
++i;
LSTRCPY(base, argv[i]);
}
else if (!LSTRCMP(argv[i], L("-in")))
{
++i;
LSTRCPY(infilename, argv[i]);
}
else if (!LSTRCMP(argv[i], L("-out")))
{
++i;
LSTRCPY(outfilename, argv[i]);
}
else if (!LSTRCMP(argv[i], L("-itest")))
{
++i;
LSTRCPY(inRTfilename, argv[i]);
}
else if (!LSTRCMP(argv[i], L("-ids")))
{
opts.use_parse_by_string_ids = 1;
}
else if (!LSTRCMP(argv[i], L("-allids")))
{
opts.do_check_all_ids = 1;
opts.use_parse_by_string_ids = 1;
}
else
return usage(argv[0]);
}
CHK(rc, PLogInit(NULL, 0));
rc = SR_GrammarLoad(base, &grammar);
if (rc != ESR_SUCCESS)
goto CLEANUP;
if (*outfilename)
{
if ((fout = pfopen(outfilename, "w")) == NULL)
{
pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename);
rc = 1;
goto CLEANUP;
}
}
if (opts.do_check_all_ids)
{
rc = Parse(grammar, NULL, fout, &opts);
}
else if (*infilename)
{
if (LSTRCMP(infilename, "-") == 0)
{
fin = PSTDIN;
}
else if ((fin = pfopen(infilename, "r")) == NULL)
{
pfprintf(PSTDOUT, "Could not open file: %s\n", infilename);
rc = 1;
goto CLEANUP;
}
for (;;)
{
if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL)
{
if (!pfeof(fin))
{
rc = ESR_READ_ERROR;
PLogError(ESR_rc2str(rc));
}
break;
}
if (trans[0] == '#') continue;
lstrtrim(trans);
/* check for multiple space separators! */
lstr_strip_multiple_spaces(trans);
pfprintf(fout, "Transcription: %s\n", trans);
if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
goto CLEANUP;
pfprintf(fout, "\n");
}
}
else if (*inRTfilename) /*using a test file*/
{
if ((fin = pfopen(inRTfilename, "r")) == NULL)
{
pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename);
rc = 1;
goto CLEANUP;
}
/*read through the test file parsing it into the variables
FORMAT: "the transciption" key "value"
*/
while (ESR_TRUE)
{
if (0) rc = process_single_key_line(grammar, fin, fout);
else rc = process_multi_key_line(grammar, rootrule, fin, fout);
if (rc == ESR_READ_ERROR)
{
rc = ESR_SUCCESS;
break;
}
}
}
else
{
/* get some transcriptions from the user */
pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n");
pfprintf(PSTDOUT, "'qqq' to quit\n");
while (ESR_TRUE)
{
pfprintf(PSTDOUT, "> ");
if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN))
break;
// remove trailing whitespace
for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {}
*p=0;
if (!LSTRCMP("qqq", trans))
break;
else
if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
goto CLEANUP;
}
}
CLEANUP:
if (fin && fin != PSTDIN)
pfclose(fin);
if (fout && fout != PSTDOUT)
pfclose(fout);
if (grammar) grammar->destroy(grammar);
PLogShutdown();
/* PANSIFileSystemDestroy();
PFileSystemDestroy();*/
PMemShutdown();
return rc;
}
ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout)
{
LCHAR* position;
LCHAR line[MAX_LINE_LENGTH];
LCHAR trans[MAX_LINE_LENGTH];
LCHAR key[MAX_LINE_LENGTH];
LCHAR refValue[MAX_LINE_LENGTH];
LCHAR result[MAX_LINE_LENGTH];
ESR_ReturnCode rc;
position = pfgets(line, MAX_LINE_LENGTH, fin);
if (line[0] == '#')
return ESR_SUCCESS;
if (!strncmp(line, "__END__", 7))
return ESR_READ_ERROR;
if (position == NULL)
{
if (pfeof(fin))
return ESR_READ_ERROR;
else
{
PLogError(L("ESR_READ_ERROR"));
return ESR_READ_ERROR;
}
}
//get the transcription to test
if ((position = strtok(line, "\"")) != NULL)
{
LSTRCPY(trans, position);
}
else
{
pfprintf(fout, "INVALID FORMAT for input line 1 \n");
rc = ESR_INVALID_ARGUMENT;
goto CLEANUP;
}
//get the key (meaning)
if ((position = strtok(NULL, " \t")) != NULL)
{
LSTRCPY(key, position);
}
else
{
pfprintf(fout, "INVALID FORMAT for input line 2\n");
rc = ESR_INVALID_ARGUMENT;
goto CLEANUP;
}
//get the expected return string
if ((position = strtok(NULL, "\"")) != NULL)
{
LSTRCPY(refValue, position);
}
else
{
pfprintf(fout, "INVALID FORMAT for input line 3\n");
rc = ESR_INVALID_ARGUMENT;
goto CLEANUP;
}
//get the expected result PASS/FAIL
//there is no need to write PASS, if nothing is written PASS is assumed
if ((position = strtok(NULL, " \t\r\n\"")) != NULL)
{
LSTRCPY(result, position);
if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0)
{
pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n");
rc = ESR_INVALID_ARGUMENT;
goto CLEANUP;
}
if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS)
goto CLEANUP;
}
else
{
if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS)
goto CLEANUP;
}
rc = ESR_SUCCESS;
CLEANUP:
return rc;
}
ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout)
{
LCHAR *position, *p;
LCHAR line[MAX_LINE_LENGTH];
LCHAR trans[MAX_LINE_LENGTH];
LCHAR keyvals[MAX_LINE_LENGTH];
ESR_ReturnCode rc;
SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
LCHAR refkey[MAX_LINE_LENGTH];
LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH];
size_t i, j, len;
size_t result_count;
position = pfgets(line, MAX_LINE_LENGTH, fin);
if (line[0] == '#')
return ESR_SUCCESS;
if (!strncmp(line, "__END__", 7))
return ESR_READ_ERROR;
if (position == NULL)
{
if (pfeof(fin))
return ESR_READ_ERROR;
else
{
PLogError(L("ESR_READ_ERROR"));
return ESR_READ_ERROR;
}
}
/* we're trying to parse
Hello there : BONJOUR
*/
p = strtok(line, ":");
LSTRCPY(trans, p);
/* strip trailing spaces */
for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--)
trans[len-1] = 0;
p = strtok(NULL, "\n\r");
/* strip leading spaces */
while (*p == ' ' || *p == '\t') p++;
LSTRCPY(keyvals, p);
result_count = MAX_SEM_RESULTS;
for (i = 0; i < result_count; i++)
SR_SemanticResultCreate(&semanticResults[i]);
/* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */
rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
if (rc != ESR_SUCCESS)
return rc;
/*result file will contain
transcription | key | reference | result | PASSESD/FAILED */
if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
{
pfprintf(fout, "%s|%s| |", trans, keyvals);
if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-"))
pfprintf(fout, "PASSED\n");
else
pfprintf(fout, "FAILED\n");
}
else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
{
size_t size, len;
LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */
size = MAX_KEYS;
rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size);
for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";"))
{
sprintf(refkey, "%s.%s", rootrule, p);
p = strchr(refkey, '=');
assert(p);
*p = 0;
p++;
if (*p == '\'') p++;
LSTRCPY(refval, p);
if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0;
for (i = 0; i < result_count; i++)
{
len = MAX_STR_LENGTH;
for (j = 0; j < size; j++)
if (!strcmp(keys_available[j], refkey)) break;
if (j < size)
rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len);
else
{
LSTRCPY(value, "<NOSUCHKEY>");
rc = ESR_NO_MATCH_ERROR;
}
pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value);
if (strcmp(value, refval) == 0)
pfprintf(fout, "PASSED\n");
else
pfprintf(fout, "FAILED\n");
}
}
/*deallocate semantic results*/
for (i = 0; i < MAX_SEM_RESULTS; i++)
{
rc = semanticResults[i]->destroy(semanticResults[i]);
if (rc != ESR_SUCCESS)
PLogError("%s while destroying", ESR_rc2str(rc));
}
}
return ESR_SUCCESS;
}