... git-svn-id: http://svn.xiph.org/trunk/speex@4121 0101bb08-14d6-0310-b084-bc0e0c8e3800

commit: a7175649e28b332248c6f2788d96f117edf6100f [log] [tgz]
author: jm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800> Wed Nov 27 02:54:34 2002 +0000
committer: jm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800> Wed Nov 27 02:54:34 2002 +0000
tree: ddab96e737f52a6bdd6caea850668cefafe3594b
parent: 7e233051842e7d72a70b25aa3b30f5cb6e4073e9 [diff]
diff --git a/configure.in b/configure.in
index af5b086..df043a6 100644
--- a/configure.in
+++ b/configure.in

@@ -5,7 +5,7 @@
 SPEEX_MAJOR_VERSION=0
 SPEEX_MINOR_VERSION=99
 SPEEX_MICRO_VERSION=0
-SPEEX_VERSION=1.0beta3
+SPEEX_VERSION=1.0beta4cvs
 SPEEX_BINARY_AGE=0
 SPEEX_INTERFACE_AGE=0
 

diff --git a/doc/manual.lyx b/doc/manual.lyx
index 9d26643..310e3bb 100644
--- a/doc/manual.lyx
+++ b/doc/manual.lyx

@@ -494,17 +494,18 @@
 
 \layout Standard
 
-During voiced segments, the speech signal is very periodic, so it is possible
- to take advantage of that by expressing the excitation signal 
-\begin_inset Formula $e(n)$
+During voiced segments, the speech signal is periodic, so it is possible
+ to take advantage of that property by approximating the excitation signal
+ 
+\begin_inset Formula $e[n]$
 \end_inset 
 
- as
+ by a gain times the past of the excitation:
 \layout Standard
 
 
 \begin_inset Formula \[
-e[n]=\beta e[n-T]+c[n]\]
+e[n]\simeq p[n]=\beta e[n-T]\]
 
 \end_inset 
 
@@ -528,28 +529,60 @@
 innovation codebook
 \emph default 
 .
- In the 
-\emph on 
-z
-\emph default 
--domain, the excitation can be expressed as:
-\layout Standard
-
-
-\begin_inset Formula \[
-e(z)=\frac{1}{1-\beta z^{-T}}\: c(z)\]
-
+ We call that long-term prediction since the excitation is predicted from
+ 
+\begin_inset Formula $e[n-T]$
 \end_inset 
 
+ with 
+\begin_inset Formula $T\gg N$
+\end_inset 
 
+.
 \layout Subsection
 
 Innovation Codebook
 \layout Standard
 
+The final excitation 
+\begin_inset Formula $e[n]$
+\end_inset 
+
+ will be the sum of the pitch prediction and an 
+\emph on 
+innovation
+\emph default 
+ signal 
+\begin_inset Formula $c[n]$
+\end_inset 
+
+ taken from a fixed codebook.
+\layout Standard
+
+
+\begin_inset Formula \[
+e[n]=p[n]+c[n]=\beta e[n-T]+c[n]\]
+
+\end_inset 
+
 This is where most of the bits in a CELP codec are allocated.
  It represents the information that couldn't be obtained either from linear
  prediction or pitch prediction.
+ In the 
+\emph on 
+z
+\emph default 
+-domain we can represent the final signal 
+\begin_inset Formula $X(z)$
+\end_inset 
+
+ as 
+\begin_inset Formula \[
+X(z)=\frac{C(z)}{A(z)\left(1-\beta z^{-T}\right)}\]
+
+\end_inset 
+
+
 \layout Subsection
 
 Analysis-by-Synthesis and Error Weighting
@@ -2286,7 +2319,7 @@
 
 \layout Standard
 
-(Tones/DTMF to be implemented)
+reserved
 \end_inset 
 </cell>
 </row>

diff --git a/libspeex/nb_celp.c b/libspeex/nb_celp.c
index c3617f6..e6a289b 100644
--- a/libspeex/nb_celp.c
+++ b/libspeex/nb_celp.c

@@ -384,7 +384,7 @@
                break;
             mode--;
          }
-         /*fprintf (stderr, "%f %d\n", st->relative_quality, mode);*/
+         fprintf (stderr, "%f %d\n", st->relative_quality, mode);
          speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
       } else {
          st->relative_quality = -1;

diff --git a/libspeex/sb_celp.c b/libspeex/sb_celp.c
index 5591fbc..b4662b6 100644
--- a/libspeex/sb_celp.c
+++ b/libspeex/sb_celp.c

@@ -363,7 +363,7 @@
          ratio=0;
       /*if (ratio>-2)*/
       low_qual+=1.0*(ratio+2);
-      {
+      /*{
          int high_mode=2;
          if (low_qual>10)
             high_mode=4;
@@ -371,8 +371,26 @@
             high_mode=3;
          else if (low_qual>5)
             high_mode=2;
-         /*high_mode=1;*/
          speex_encoder_ctl(st, SPEEX_SET_HIGH_MODE, &high_mode);
+      }*/
+      {
+         int mode;
+         mode = 4;
+         while (mode)
+         {
+            int v1;
+            float thresh;
+            v1=(int)floor(st->vbr_quality);
+            if (v1==10)
+               thresh = vbr_nb_thresh[mode][v1];
+            else
+               thresh = (st->vbr_quality-v1)*vbr_hb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_hb_thresh[mode][v1];
+            if (low_qual > thresh)
+               break;
+            mode--;
+         }
+         fprintf (stderr, "%f %d\n", low_qual, mode);
+         speex_encoder_ctl(state, SPEEX_SET_HIGH_MODE, &mode);
       }
       /*fprintf (stderr, "%f %f\n", ratio, low_qual);*/
    }
@@ -970,6 +988,7 @@
       {
          int q;
          float qual = (*(float*)ptr)+.5;
+         st->vbr_quality = (*(float*)ptr);
          if (qual>10)
             qual=10;
          q=(int)floor(.5+*(float*)ptr);

diff --git a/libspeex/vbr.c b/libspeex/vbr.c
index f8f28eb..a43db62 100644
--- a/libspeex/vbr.c
+++ b/libspeex/vbr.c

@@ -47,9 +47,9 @@
 
 float vbr_nb_thresh[8][11]={
    {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
-   { 3.9,  2.5,  2.0,  1.5,  0.5,  0.0,  0.0,  0.0,  0.0,  0.0, -1.0}, /*  2 kbps */
-   { 8.0,  6.0,  3.9,  4.5,  4.0,  3.5,  3.0,  2.5,  2.0,  1.0,  0.0}, /*  6 kbps */
-   {11.0,  8.5,  7.5,  7.0,  6.5,  6.0,  5.5,  5.0,  4.0,  3.0,  1.0}, /*  8 kbps */
+   { 3.9,  2.5,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0, -1.0}, /*  2 kbps */
+   { 8.0,  5.6,  4.7,  4.2,  3.9,  3.5,  3.0,  2.5,  2.0,  1.0,  0.0}, /*  6 kbps */
+   {11.0,  8.5,  7.5,  6.5,  5.0,  3.9,  3.9,  3.9,  3.5,  3.0,  1.0}, /*  8 kbps */
    {11.0, 11.0,  9.9,  9.0,  8.0,  7.0,  6.5,  6.0,  5.0,  4.0,  2.0}, /* 11 kbps */
    {11.0, 11.0, 11.0, 11.0,  9.5,  9.0,  8.0,  7.0,  6.5,  5.0,  3.0}, /* 15 kbps */
    {11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.5,  8.5,  8.0,  6.5,  4.0}, /* 18 kbps */
@@ -57,6 +57,14 @@
 };
 
 
+float vbr_hb_thresh[5][11]={
+   {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
+   { 3.9,  2.5,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0, -1.0}, /*  2 kbps */
+   {11.0, 11.0,  9.9,  9.0,  8.0,  7.0,  6.5,  6.0,  5.0,  4.0,  2.0}, /*  6 kbps */
+   {11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.5,  8.5,  8.0,  6.5,  4.0}, /* 10 kbps */
+   {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.8,  7.5,  5.5}  /* 18 kbps */ 
+};
+
 void vbr_init(VBRState *vbr)
 {
    int i;
@@ -104,6 +112,7 @@
   non-stationary (harder to notice high-frequency noise)???
 
 */
+#include <stdio.h>
 float vbr_analysis(VBRState *vbr, float *sig, int len, int pitch, float pitch_coef)
 {
    int i;
@@ -153,13 +162,6 @@
       vbr->consec_noise=0;
    }
 
-   /* Checking for "pseudo temporal masking" */
-   if (ener < .1*vbr->average_energy)
-      qual -= .5;
-   if (ener < .01*vbr->average_energy)
-      qual -= .5;
-   if (ener < .001*vbr->average_energy)
-      qual -= .5;
    /* Checking for very low absolute energy */
    if (ener < 30000)
    {
@@ -169,26 +171,33 @@
       if (ener < 3000)
          qual-=.7;
    } else {
-      /* Checking for energy increases */
-      if (ener > vbr->last_energy*4.0)
-         qual += .7;
-      if (ener > vbr->last_energy*1.8)
-         qual += .7;
-      if (ener > 2*vbr->average_energy)
-         qual += .7;
-      if (ener > 4*vbr->average_energy)
-         qual += .7;
-      if (ener2 > 1.6*ener1)
-         qual += .7;
-      if (ener2 < .6*ener1)
-         qual -= .5;
+      float short_diff, long_diff;
+      short_diff = log((ener+1)/(1+vbr->last_energy));
+      long_diff = log((ener+1)/(1+vbr->average_energy));
+      /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
 
-      if (ener < .3*vbr->last_energy)
-         qual -= .6;
+      if (long_diff<-5)
+         long_diff=-5;
+      if (long_diff>2)
+         long_diff=2;
+
+      if (long_diff>0)
+         qual += .6*long_diff;
+      if (long_diff<0)
+         qual += .5*long_diff;
+      if (short_diff>0)
+      {
+         if (short_diff>5)
+            short_diff=5;
+         qual += .5*short_diff;
+      }
+      /* Checking for energy increases */
+      if (ener2 > 1.6*ener1)
+         qual += .5;
    }
    vbr->last_energy = ener;
    vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
-   qual += 2.5*((pitch_coef-.4) + (vbr->soft_pitch-.4));
+   qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
 
    if (qual < vbr->last_quality)
       qual = .5*qual + .5*vbr->last_quality;
@@ -197,7 +206,7 @@
    if (qual>10)
       qual=10;
    
-   if (vbr->consec_noise>=1)
+   if (vbr->consec_noise>=2)
       qual-=1.3;
    if (vbr->consec_noise>=5)
       qual-=1.3;

diff --git a/libspeex/vbr.h b/libspeex/vbr.h
index a8ca682..51c834e 100644
--- a/libspeex/vbr.h
+++ b/libspeex/vbr.h

@@ -39,6 +39,7 @@
 #define VBR_MEMORY_SIZE 5
 
 extern float vbr_nb_thresh[8][11];
+extern float vbr_hb_thresh[5][11];
 
 typedef struct VBRState {
    float energy_alpha;
commit	a7175649e28b332248c6f2788d96f117edf6100f	[log] [tgz]
author	jm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>	Wed Nov 27 02:54:34 2002 +0000
committer	jm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>	Wed Nov 27 02:54:34 2002 +0000
tree	ddab96e737f52a6bdd6caea850668cefafe3594b
parent	7e233051842e7d72a70b25aa3b30f5cb6e4073e9 [diff]