| /*---------------------------------------------------------------------------* |
| * spec_anl.c * |
| * * |
| * Copyright 2007, 2008 Nuance Communciations, Inc. * |
| * * |
| * Licensed under the Apache License, Version 2.0 (the 'License'); * |
| * you may not use this file except in compliance with the License. * |
| * * |
| * You may obtain a copy of the License at * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, software * |
| * distributed under the License is distributed on an 'AS IS' BASIS, * |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * |
| * See the License for the specific language governing permissions and * |
| * limitations under the License. * |
| * * |
| *---------------------------------------------------------------------------*/ |
| |
| |
| |
| #include <stdlib.h> |
| #ifndef _RTT |
| #include <stdio.h> |
| #endif |
| #include <string.h> |
| #include <math.h> |
| #include <limits.h> |
| #include <assert.h> |
| |
| #include "hmm_desc.h" |
| #include "front.h" |
| #include "pendian.h" |
| #include "portable.h" |
| #include "LCHAR.h" |
| |
| #include "../clib/memmove.h" |
| |
| #define DEBUG 0 |
| |
| #include "sh_down.h" |
| |
| static int sort_ints_unique(int *list, int *num); |
| //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj); |
| |
| void peakpick(front_freq *freqobj, fftdata *density, int num_freq); |
| void magsq(fftdata *x, fftdata *y, fftdata *z, int ns); |
| |
| void preemph(fftdata *data, int window_len, samdata *wav_data, |
| int num_samples, coefdata pre_mel, |
| bigdata *last_sample); |
| void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo); |
| |
| |
| |
| void filterbank_emulation(front_channel * channel, front_wave *waveobj, |
| front_freq *freqobj, front_cep *cepobj, samdata *income, |
| samdata *outgo, int num_samples) |
| { |
| /* Part II. Mel cepstrum coefficients |
| ** |
| ** Maintain parameter queue */ |
| MEMMOVE(channel->cep + (channel->mel_dim + 1), channel->cep, |
| (Q2 - 1) *(channel->mel_dim + 1), sizeof(cepdata)); |
| channel->shift = 0; |
| |
| /* 2.01 Pre-emphasize waveform |
| Only the new samples are preemphasized. To carry on from the previous call, |
| the last sample value is stored in lastx. |
| */ |
| preemph(channel->prebuff, freqobj->window_length, income, num_samples, |
| waveobj->pre_mel, &channel->lastx); |
| |
| #if DEBUG |
| log_report("preemphasized data\n"); |
| write_scaled_frames(freqobj->window_length, 1, channel->prebuff, D_FIXED, (float) 1 / (0x01 << WAVE_SHIFT)); |
| #endif |
| /****************************************************************************** |
| ** The "NEW" fft performs shifting operations in fixed point, to maximise |
| ** precision. |
| ** |
| *******************************************************************************/ |
| channel->shift += place_sample_data(&freqobj->fft, channel->prebuff, |
| freqobj->ham, freqobj->window_length); |
| #if DEBUG |
| log_report("windowed data\n"); |
| if (channel->shift >= 0) |
| { |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)(0x01 << channel->shift)); |
| } |
| else |
| { |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)1 / (0x01 << -channel->shift)); |
| } |
| #endif |
| channel->shift *= 2; |
| channel->shift += fft_perform_and_magsq(&freqobj->fft); |
| |
| #if DEBUG |
| log_report("After magnitude squared (%d)\n", channel->frame_count); |
| if (channel->shift >= 0) |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << (channel->shift))); |
| else |
| write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << (- channel->shift))); |
| #endif |
| |
| #if DEBUG |
| log_report("After magnitude squared: "); |
| if (channel->shift >= 0) |
| write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); |
| else |
| write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); |
| #endif |
| |
| if (freqobj->do_nonlinear_filter) |
| peakpick(freqobj, freqobj->fft.real, freqobj->fft.size + 1); |
| |
| #if DEBUG |
| log_report("After peakpick: "); |
| if (channel->shift >= 0) |
| write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); |
| else |
| write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); |
| #endif |
| |
| /* 2.23 Apply filterbank emulation */ |
| channel->shift += RAMP_SHIFT; |
| filtbank(freqobj, freqobj->fft.real, channel->filterbank); |
| #if DEBUG |
| log_report("After filterbanked: "); |
| if (channel->shift >= 0) |
| write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)(0x01 << channel->shift)); |
| else |
| write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)1 / (0x01 << -channel->shift)); |
| #endif |
| |
| return; |
| } |
| |
| |
| void preemph(fftdata *data, int window_len, samdata *wav_data, |
| int num_samples, coefdata pre_mel, |
| bigdata *last_sample) |
| /* |
| ** pre-emphasize on speech data, check for end of data */ |
| /* SCALE: In this stage we're introducing a scale factor of 2 */ |
| { |
| int i; |
| bigdata temp; |
| |
| ASSERT(data); |
| ASSERT(last_sample); |
| ASSERT(wav_data); |
| ASSERT(num_samples >= 0); |
| if (num_samples > window_len) |
| num_samples = window_len; |
| |
| if (num_samples < window_len) |
| MEMMOVE(data, data + num_samples, (window_len - num_samples), |
| sizeof(fftdata)); |
| data += window_len - num_samples; |
| |
| /* If no preemphasis to do |
| */ |
| if (pre_mel == 0) |
| { /* dont't shift */ |
| for (i = 0; i < num_samples; i++) |
| data[i] = (fftdata) wav_data[i]; |
| return; |
| } |
| |
| /* Otherwise do the preemphasis |
| */ |
| for (i = 0; i < num_samples; i++) |
| { |
| temp = SHIFT_UP((bigdata)wav_data[i], COEFDATA_SHIFT); |
| data[i] = (fftdata)(SHIFT_DOWN(temp - (*last_sample), COEFDATA_SHIFT)); |
| *last_sample = (bigdata)pre_mel * wav_data[i]; |
| |
| } |
| return; |
| } |
| |
| void magsq(fftdata *x, fftdata *y, fftdata *z, int ns) |
| /* |
| ** magnitude squared, tailored for TI FFT routines |
| ** The dynamic range should fit 32 - RAMP_SHIFT */ |
| { |
| int i; |
| |
| ASSERT((float)x[0] *(float)x[0] < LONG_MAX); |
| ASSERT((float)x[0] *(float)x[0] > LONG_MIN); |
| z[0] = (fftdata)((bigdata)x[0] * (bigdata)x[0]); |
| for (i = 1; i < ns; i++) |
| { |
| ASSERT(((fftdata)x[i] *(fftdata)x[i]) >= 0); |
| ASSERT(((fftdata)y[i] *(fftdata)y[i]) >= 0); |
| ASSERT((float)x[i] *(float)x[i] < LONG_MAX); |
| ASSERT((float)x[i] *(float)x[i] > LONG_MIN); |
| ASSERT((float)y[i] *(float)y[i] < LONG_MAX); |
| ASSERT((float)y[i] *(float)y[i] > LONG_MIN); |
| /* z[i]= (fftdata) SHIFT_DOWN ((bigdata)x[i] * (bigdata)x[i] + (bigdata)y[i] * (bigdata)y[i], RAMP_SHIFT); |
| */ |
| z[i] = (fftdata)(((bigdata)x[i] * (bigdata)x[i]) |
| + ((bigdata)y[i] * (bigdata)y[i])); |
| if (z[i] <= 0) |
| z[i] = (fftdata) 1; |
| } |
| return; |
| } |
| |
| void peakpick(front_freq *freqobj, fftdata *density, int num_freq) |
| { |
| int i; |
| fftdata peak; |
| fftdata bdecay; |
| fftdata fdecay; |
| int first; |
| int last; |
| |
| ASSERT(freqobj); |
| /* Fixed pt requires scale up of COEFDATA_SHIFT on these pars (coefdata) */ |
| bdecay = freqobj->peakpickdown; |
| fdecay = freqobj->peakpickup; |
| |
| if ((bdecay <= (fftdata) 0.0) && (fdecay <= (fftdata) 0.0)) |
| return; |
| |
| first = freqobj->cut_off_below; |
| last = freqobj->cut_off_above; |
| /* this filters from cut_off_below to */ |
| /* cut_off_above inclusive */ |
| |
| if (last >= num_freq) |
| last = num_freq - 1; |
| /* as most routines seem to check both */ |
| /* limits */ |
| |
| if (bdecay > 0.0) |
| { |
| ASSERT(density[last] >= 0); |
| peak = density[last]; |
| for (i = last - 1; i >= first; i--) |
| { |
| peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)bdecay); |
| ASSERT(peak >= 0); |
| if (density[i] > peak) |
| peak = density[i]; |
| else |
| density[i] = peak; |
| } |
| } |
| if (fdecay > 0.0) |
| { |
| peak = density[first]; |
| for (i = first + 1; i <= last; i++) |
| { |
| peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)fdecay); |
| if (density[i] > peak) |
| peak = density[i]; |
| else |
| density[i] = peak; |
| } |
| } |
| return; |
| } |
| |
| void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo) |
| /* |
| ** pwr spect -> filter bank output (linear) */ |
| { |
| int i, j, k; |
| bigdata t, sum, mom, nxt; |
| |
| /* Scale down before starting mel-filterbank operations |
| */ |
| for (i = 0; i < freqobj->cut_off_above; i++) |
| density[i] = SHIFT_DOWN(density[i], RAMP_SHIFT); |
| |
| j = MAX(freqobj->fcmid[0], freqobj->cut_off_below); |
| nxt = 0; |
| for (; j < freqobj->fcmid[1]; j++) |
| { |
| ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) < LONG_MAX); |
| ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) > -LONG_MAX); |
| nxt += (bigdata) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT); |
| } |
| for (i = 0, k = 2; i < freqobj->nf; i++, k++) |
| { |
| sum = mom = 0; |
| for (; j < freqobj->fcmid[k]; j++) |
| { |
| /* TODO: Tidy up this fixed pt shifting. BP */ |
| |
| ASSERT((float) freqobj->framp[j] *(float) density[j] < LONG_MAX); |
| ASSERT((float) freqobj->framp[j] *(float) density[j] > LONG_MIN); |
| ASSERT((float) sum + (float)density[j] < LONG_MAX); |
| ASSERT((float) sum + (float)density[j] > LONG_MIN); |
| sum += (bigdata) density[j]; |
| ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] < LONG_MAX); |
| ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] > LONG_MIN); |
| |
| mom += (bigdata)(long) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT); |
| } |
| |
| ASSERT(((float)nxt + (float)sum - (float)mom) < LONG_MAX); |
| ASSERT(((float)nxt + (float)sum - (float)mom) > LONG_MIN); |
| |
| /* TODO: refine this expression. Shift down fcscl in advance. */ |
| t = (bigdata)((SHIFT_UP(nxt + sum - mom, HALF_RAMP_SHIFT) |
| + SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT + 1)) |
| / SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT)); |
| /* TODO: cleanup and also check for division by zero */ |
| nxt = mom; |
| fbo[i] = (cepdata) t; |
| } |
| return; |
| } |
| |
| int create_spectrum_filter(front_freq *freqobj, int *freq, int *spread) |
| { |
| int ii, jj, freq_step; |
| int lo, hi; |
| ASSERT(freqobj); |
| ASSERT(freqobj->spectrum_filter_num == 0); |
| ASSERT(freqobj->samplerate > 0); |
| /* Convert to FFT taps. Mark adjacent taps as well as taps within spread */ |
| freq_step = (freqobj->samplerate << 12) / (2 * freqobj->fft.size); |
| freqobj->spectrum_filter = (int *) CALLOC_CLR(freqobj->fft.size + 1, sizeof(int), "cfront.spectrum_filter"); |
| freqobj->spectrum_filter_num = 0; |
| for (ii = 0 ; ii < MAX_FILTER_NUM; ii++) |
| { |
| if (freq[ii] == 0) |
| continue; |
| lo = (((freq[ii] - spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate; |
| hi = (((freq[ii] + spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate; |
| |
| |
| for (jj = lo; jj <= hi;jj++) |
| { |
| if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size) |
| SERVICE_ERROR(MAX_FILTER_POINTS_EXCEEDED); |
| freqobj->spectrum_filter[freqobj->spectrum_filter_num++] = jj; |
| } |
| /* jj=0; |
| while (((jj+1)*freq_step)>>12 <= freq[ii]-spread[ii]) |
| jj++; |
| while (((jj-1)*freq_step>>12) < freq[ii]+spread[ii]){ |
| if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size) |
| SERVICE_ERROR (MAX_FILTER_POINTS_EXCEEDED); |
| freqobj->spectrum_filter[freqobj->spectrum_filter_num++]= jj; |
| jj++; |
| } |
| */ |
| } |
| sort_ints_unique(freqobj->spectrum_filter, &freqobj->spectrum_filter_num); |
| return (freqobj->spectrum_filter_num); |
| } |
| |
| void clear_spectrum_filter(front_freq *freqobj) |
| { |
| ASSERT(freqobj->spectrum_filter); |
| if (freqobj->spectrum_filter) |
| FREE((char *) freqobj->spectrum_filter); |
| freqobj->spectrum_filter = NULL; |
| freqobj->spectrum_filter_num = 0; |
| return; |
| } |
| |
| static int sort_ints_unique(int *list, int *num) |
| { |
| /* Sort a list of ints and make unique */ |
| int ii, jj, temp; |
| for (ii = 1; ii < *num; ii++) |
| { |
| for (jj = 0; jj < ii; jj++) |
| { |
| temp = list[ii]; |
| if (temp < list[jj]) |
| { |
| MEMMOVE(&list[jj+1], &list[jj], (ii - jj), sizeof(int)); |
| list[jj] = temp; |
| break; |
| } |
| if (temp == list[jj]) |
| { |
| MEMMOVE(&list[ii], &list[ii+1], (*num - ii), sizeof(int)); |
| |
| |
| |
| (*num)--; |
| } |
| } |
| } |
| return *num; |
| } |
| |
| //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj) |
| //{ |
| // for (int i = 0; i < freqobj->spectrum_filter_num; ++i) |
| // { |
| // ASSERT(freqobj->spectrum_filter[i] < num); |
| // data[freqobj->spectrum_filter[i]] = 0; |
| // } |
| //} |
| |
| /* -------------------------------------------------- |
| freq_warp will do pure linear warping if the warp |
| scale > 1.0. Otherwise it will do piecewise warp |
| which means warping the second part, from xstart |
| to the bandwidth with another scale which is |
| determined by b and c in the formulation. |
| In general, 0.7 < wscale < 1.4, and xstart <= 1 |
| 08/15/01, Puming Zhan |
| --------------------------------------------------- */ |
| void freq_warp(front_freq *freqobj, fftdata *inbuf, int ns) |
| { |
| int i; |
| int nsE; |
| float x1, y1, b, c, wscale; |
| fftdata *tmpbuf; |
| |
| ASSERT(freqobj && inbuf); |
| |
| ASSERT(freqobj->warp_scale != 0); |
| |
| wscale = freqobj->warp_scale; |
| x1 = freqobj->piecewise_start; |
| tmpbuf = (fftdata *) CALLOC(ns, sizeof(fftdata), "cfront.tmpbuf"); |
| |
| if (wscale < MIN_WARP_SCALE || wscale > MAX_WARP_SCALE) |
| { |
| SERVICE_ERROR(WARP_SCALE); |
| } |
| if (x1 > 1.0 || x1 < 0.5) |
| { |
| SERVICE_ERROR(PIECEWISE_START); |
| } |
| |
| y1 = x1 < wscale ? (float)x1 / wscale : (float)1.0; |
| |
| b = y1 < 1.0 ? (float)((1.0 - x1) / (1.0 - y1)) : (float)0.0; |
| |
| c = (float)((1.0 - b) * (ns - 1)); |
| |
| nsE = (int)(y1 * (ns - 1)); |
| |
| for (i = 0; i < ns; i++) |
| { |
| float x = i > nsE ? b * i + c : wscale * i; |
| int u = (int)ceil((double)x); |
| int l = (int)floor((double)x); |
| float w1 = x - l; |
| float w2 = 1 - w1; |
| |
| if (u < ns) |
| { |
| tmpbuf[i] = (int)(w1 * inbuf[u] + w2 * inbuf[l]); |
| } |
| else |
| { |
| tmpbuf[i] = inbuf[ns-1]; |
| } |
| } |
| |
| /* need to copy the warped fft into inbuf */ |
| /* because the following function filtbank() */ |
| /* will take inbuf as input */ |
| /* considering that this function will be */ |
| /* for every frame, it may not be a good idea*/ |
| /* to do malloc here */ |
| |
| for (i = 0; i < ns; i++) |
| inbuf[i] = tmpbuf[i]; |
| |
| FREE((char *) tmpbuf); |
| } |