Added probability of speech presence to denoiser.
git-svn-id: http://svn.xiph.org/trunk/speex@5353 0101bb08-14d6-0310-b084-bc0e0c8e3800
diff --git a/libspeex/preprocess.c b/libspeex/preprocess.c
index b597898..69d9206 100644
--- a/libspeex/preprocess.c
+++ b/libspeex/preprocess.c
@@ -49,6 +49,11 @@
#define NB_BANDS 8
+#define ZMIN .1
+#define ZMAX .316
+#define ZMIN_1 10
+#define LOG_MIN_MAX_1 0.86859
+
static void conj_window(float *w, int len)
{
int i;
@@ -160,6 +165,10 @@
st->Stmp = (float*)speex_alloc(N*sizeof(float));
st->update_prob = (float*)speex_alloc(N*sizeof(float));
+ st->zeta = (float*)speex_alloc(N*sizeof(float));
+ st->Zpeak = 0;
+ st->Zlast = 0;
+
st->noise_bands = (float*)speex_alloc(NB_BANDS*sizeof(float));
st->noise_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float));
st->speech_bands = (float*)speex_alloc(NB_BANDS*sizeof(float));
@@ -610,6 +619,7 @@
int N4 = st->frame_size - N3;
float scale=.5/N;
float *ps=st->ps;
+ float Zframe=0, Pframe;
preprocess_analysis(st, x);
@@ -734,17 +744,81 @@
}
}
+ for (i=1;i<N;i++)
+ {
+ st->zeta[i] = .7*st->zeta[i] + .3*st->prior[i];
+ }
+ {
+ int freq_start = (int)(300.0*2*N/st->sampling_rate);
+ int freq_end = (int)(2000.0*2*N/st->sampling_rate);
+ for (i=freq_start;i<freq_end;i++)
+ {
+ Zframe += st->zeta[i];
+ }
+ }
+
+ Zframe /= N;
+ if (Zframe<ZMIN)
+ {
+ Pframe = 0;
+ } else {
+ if (Zframe > 1.5*st->Zlast)
+ {
+ Pframe = 1;
+ st->Zpeak = Zframe;
+ if (st->Zpeak > 10)
+ st->Zpeak = 10;
+ if (st->Zpeak < 1)
+ st->Zpeak = 1;
+ } else {
+ if (Zframe < st->Zpeak*ZMIN)
+ {
+ Pframe = 0;
+ } else if (Zframe > st->Zpeak*ZMAX)
+ {
+ Pframe = 1;
+ } else {
+ Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN);
+ }
+ }
+ }
+ st->Zlast = Zframe;
+
+ fprintf (stderr, "%f\n", Pframe);
/* Compute gain according to the Ephraim-Malah algorithm */
for (i=1;i<N;i++)
{
float MM;
float theta;
float prior_ratio;
+ float p, q;
+ float zeta1;
+ float P1;
prior_ratio = st->prior[i]/(1.0001+st->prior[i]);
theta = (1+st->post[i])*prior_ratio;
+ if (i==1 || i==N-1)
+ zeta1 = st->zeta[i];
+ else
+ zeta1 = .25*st->zeta[i-1] + .5*st->zeta[i] + .25*st->zeta[i+1];
+ if (zeta1<ZMIN)
+ P1 = 0;
+ else if (zeta1>ZMAX)
+ P1 = 1;
+ else
+ P1 = LOG_MIN_MAX_1 * log(ZMIN_1*zeta1);
+
+ /*P1 = log(zeta1/ZMIN)/log(ZMAX/ZMIN);*/
+
+ /* FIXME: add global prop (P2) */
+ q = 1-Pframe*P1;
+ if (q>.95)
+ q=.95;
+ p=1/(1 + (q/(1-q))*(1+st->prior[i])*exp(-theta));
+
+
#if 0
/* log-spectral magnitude estimator */
if (theta<6)
@@ -762,24 +836,16 @@
{
st->gain[i]=2;
}
- }
- st->gain[0]=0;
- st->gain[N-1]=0;
- if (st->denoise_enabled)
- {
- for (i=1;i<N-1;i++)
+ if (st->denoise_enabled)
{
- st->gain2[i]=st->gain[i];
- /* Limits noise reduction to -26 dB, put prevents some musical noise */
- /*if (st->gain2[i]<.05)
- st->gain2[i]=.05;*/
+ st->gain2[i]=p*p*st->gain[i];
+ } else {
+ st->gain2[i]=1;
}
- st->gain2[N-1]=0;
- } else {
- for (i=0;i<N;i++)
- st->gain2[i] = 1;
}
+ st->gain2[0]=st->gain[0]=0;
+ st->gain2[N-1]=st->gain[N-1]=0;
if (st->agc_enabled)
speex_compute_agc(st, mean_prior);
diff --git a/libspeex/speex_preprocess.h b/libspeex/speex_preprocess.h
index c183fce..4019e70 100644
--- a/libspeex/speex_preprocess.h
+++ b/libspeex/speex_preprocess.h
@@ -64,6 +64,10 @@
float *Stmp; /**< See Cohen paper */
float *update_prob; /**< Propability of speech presence for noise update */
+ float *zeta; /**< Smoothed a priori SNR */
+ float Zpeak;
+ float Zlast;
+
float *loudness_weight; /**< Perceptual loudness curve */
float *echo_noise;
diff --git a/libspeex/testdenoise.c b/libspeex/testdenoise.c
index 7ccabc5..cc6c1a8 100644
--- a/libspeex/testdenoise.c
+++ b/libspeex/testdenoise.c
@@ -10,11 +10,16 @@
float x[NN];
int i;
SpeexPreprocessState *st;
+ int count=0;
+ float f;
st = speex_preprocess_state_init(NN, 8000);
i=1;
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i);
- /*speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i);*/
+ i=0;
+ speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i);
+ f=8000;
+ speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC_LEVEL, &f);
while (1)
{
int vad;
@@ -23,12 +28,12 @@
break;
for (i=0;i<NN;i++)
x[i]=in[i];
-
vad = speex_preprocess(st, x, NULL);
for (i=0;i<NN;i++)
out[i]=x[i];
/*fprintf (stderr, "%d\n", vad);*/
fwrite(out, sizeof(short), NN, stdout);
+ count++;
}
speex_preprocess_state_destroy(st);
return 0;