blob: f77d6ab679c6bbf0042cdb0e44c4af840e9c5ab5 [file] [log] [blame]
/*---------------------------------------------------------------------------*
* voicing.c *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include <limits.h>
#ifndef _RTT
#include <stdio.h>
#endif
#include "all_defs.h"
#include "voicing.h"
#include "portable.h"
#include "../cfront/sh_down.h"
#define DEBUG 0
static const char voicing[] = "$Id: voicing.c,v 1.1.10.5 2007/10/15 18:06:24 dahan Exp $";
void init_voicing_analysis(voicing_info *chan)
{
chan->count = -1;
chan->sil_count = 0;
chan->speech_count = 0;
chan->fast_count = 0;
#if DEBUG
log_report("U: 255 255 255 -1 -1 -1 -1\n");
#endif
return;
}
long voicing_analysis(voicing_info *chan, voicedata enval , int* log)
{
long retval;
int threshold;
if (chan->count < 0)
{
chan->b1 = SHIFT_UP(enval, 8);
chan->b0 = SHIFT_UP(enval, 8);
chan->count = -1;
}
/* background level
*/
if (chan->b0 > SHIFT_UP(enval, 8))
{
chan->b0 = SHIFT_UP(enval, 8);
chan->count = 0;
}
if (chan->count > B0_HANG2)
chan->b0 += B0_RATE2;
else if (chan->count > B0_HANG1)
chan->b0 += B0_RATE1;
chan->count++;
/* the second background level
*/
if ((enval - chan->quiet_margin) < (chan->b0 >> 8))
chan->b1 += SHIFT_DOWN(B1_RATE * (SHIFT_UP(enval, 8) - chan->b1), 8);
/* speech level
*/
if (chan->s0 < SHIFT_UP(enval, 8))
chan->s0 = SHIFT_UP(enval, 8);
else
chan->s0 -= B0_RATE1;
/* increase the range by 25% */
threshold = (chan->b1 + (SHIFT_DOWN(
MAX(chan->s0 - chan->b0 - DYNAMIC_RANGE, 0), 2))) >> 8;
/* Is it speech?
*/
if (enval > (threshold + chan->margin))
chan->speech_count++;
else
chan->speech_count = 0;
/* Is it Fast-match speech
*/
if (enval > (threshold + chan->fast_margin))
chan->fast_count++;
else
chan->fast_count = 0;
if (enval <= (threshold + chan->quiet_margin))
chan->sil_count++;
else
chan->sil_count = 0;
/*******************
* Returning flags *
*******************/
retval = 0L;
if (chan->fast_count > chan->voice_duration)
retval = FAST_VOICE_BIT;
else if (chan->sil_count > chan->quiet_duration)
retval = QUIET_BIT;
if (chan->speech_count > chan->voice_duration)
retval |= VOICE_BIT;
if (chan->sil_count > 0)
retval |= BELOW_THRESHOLD_BIT;
chan->voice_status = retval;
#if DEBUG
log_report("U: %d %.1f %.1f, %d %d %d %d\n", (int) enval,
chan->b0 / 256.0, chan->b1 / 256.0,
chan->speech_count, chan->fast_count,
chan->sil_count, chan->count);
#endif
return (retval);
}