| /*---------------------------------------------------------------------------* |
| * utteranc.h * |
| * * |
| * Copyright 2007, 2008 Nuance Communciations, Inc. * |
| * * |
| * Licensed under the Apache License, Version 2.0 (the 'License'); * |
| * you may not use this file except in compliance with the License. * |
| * * |
| * You may obtain a copy of the License at * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, software * |
| * distributed under the License is distributed on an 'AS IS' BASIS, * |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * |
| * See the License for the specific language governing permissions and * |
| * limitations under the License. * |
| * * |
| *---------------------------------------------------------------------------*/ |
| |
| |
| |
| #ifndef _h_utteranc_ |
| #define _h_utteranc_ |
| |
| #ifdef SET_RCSID |
| static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $"; |
| #endif |
| |
| |
| |
| #include "all_defs.h" |
| #include "hmm_type.h" |
| #include "fpi_tgt.h" |
| #include "voicing.h" |
| #include "specnorm.h" |
| #include "channorm.h" |
| #include "swicms.h" |
| #ifndef _RTT |
| #include "duk_io.h" |
| #endif |
| |
| #define DEFAULT_BUFFER_SIZE 100 /* in frames */ |
| #define KEEP_FRAMES 40 /* in frames, past frames kept */ |
| |
| /* Functions supported are |
| ** new, delete (by source) |
| ** open file/device, close file/device |
| ** attach and detach sink |
| ** read/store samples - including the header |
| */ |
| |
| /** |
| * @todo document |
| */ |
| typedef struct |
| { /* label structure */ |
| char *label; |
| long begin; |
| long end; |
| char *extra; |
| unsigned char flag; |
| } |
| annotate; |
| |
| |
| /** |
| * @todo document |
| */ |
| typedef struct |
| { |
| int utt_type; |
| int dim; |
| fepFramePkt *frame; |
| int num_chan; |
| int do_channorm; |
| spect_dist_info **spchchan; /* Mirrored from the Wave object */ |
| norm_info *channorm; /* Mirrored from the Wave object */ |
| swicms_norm_info *swicms; /* copy of wave obj pointer */ |
| spect_dist_info *backchan[MAX_CHAN_DIM]; |
| featdata *last_push; |
| int voice_duration; |
| int quiet_duration; |
| int unsure_duration; |
| int start_windback; |
| } |
| utt_generic_info; |
| |
| #ifndef _RTT |
| /** |
| * @todo document |
| */ |
| typedef struct |
| { |
| char typ; /* s (16 bit), c (8 bit), u (newton .utb) */ |
| int endian; /* 0 is little 1 is big */ |
| int do_skip; /* skip every other frame */ |
| unsigned long len; /* length of file/utterance */ |
| PFile* file; /* pointer to file */ |
| char name[MAX_LABEL]; /* file name */ |
| /* int op; read or write */ |
| int num_utts; /* no. of utterances in utb file */ |
| annotate *utb_table; /* utb file header information */ |
| } |
| utt_file_info; |
| |
| /** |
| * @todo document |
| */ |
| typedef struct |
| { |
| int utt_type; |
| int dim; |
| fepFramePkt *frame; |
| int num_chan; |
| int do_channorm; |
| spect_dist_info **spchchan; /* Mirrored from the Wave object */ |
| norm_info *channorm; /* Mirrored from the Wave object */ |
| swicms_norm_info *swicms; /* copy of wave obj pointer */ |
| spect_dist_info *backchan[MAX_CHAN_DIM]; |
| featdata *last_push; |
| int voice_duration; |
| int quiet_duration; |
| int unsure_duration; |
| int start_windback; |
| /* voicing_info voice; */ |
| utt_file_info file; |
| } |
| file_utterance_info; |
| #endif |
| |
| /** |
| * @todo document |
| */ |
| typedef struct |
| { |
| int utt_type; |
| int dim; |
| fepFramePkt *frame; |
| int num_chan; |
| int do_channorm; |
| spect_dist_info **spchchan; /* Mirrored from the Wave object */ |
| norm_info *channorm; /* Mirrored from the Wave object */ |
| swicms_norm_info *swicms; /* copy of wave obj pointer */ |
| spect_dist_info *backchan[MAX_CHAN_DIM]; |
| featdata *last_push; |
| int voice_duration; |
| int quiet_duration; |
| int unsure_duration; |
| int start_windback; |
| } |
| live_utterance_info; |
| |
| /** |
| * @todo document |
| */ |
| typedef union |
| { |
| int utt_type; /* live or from file */ |
| utt_generic_info gen_utt; /* generic one */ |
| #ifndef _RTT |
| file_utterance_info file_utt; |
| #endif |
| live_utterance_info live_utt; |
| } utterance_info; |
| |
| |
| /* |
| ** Size of the utb file headers and details |
| */ |
| |
| #ifndef _RTT |
| #define UTT_VERSION 2 |
| #define UTT_HEADER_SIZE 16 /*Size on disk*/ |
| #define UTB_HEADER_SIZE 32 /*Size on disk*/ |
| #define UTB_HEADER_USED 16 /*Size on disk*/ /* SAL */ |
| |
| /** |
| * UTB file header. |
| */ |
| typedef struct _UttHeader |
| { |
| /** |
| * The size of the header in bytes. |
| */ |
| unsigned short headerSize; |
| /** |
| * The version of the file format. |
| */ |
| unsigned short version; |
| /** |
| * The size of the payload in bytes. |
| */ |
| unsigned long nBytes; |
| /** |
| * The number of parameters per frame. |
| */ |
| unsigned short nParametersPerFrame; |
| /** |
| * 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style |
| */ |
| unsigned short channelNormalization; |
| /** |
| * 0=unknown, 1=no, 2=yes |
| */ |
| unsigned short speakerNormalization; |
| /** |
| * 0=unknown, 1=no, 2=yes |
| */ |
| unsigned short imeldaization; |
| /** |
| * Before imelda truncation. |
| */ |
| unsigned short nOriginalParameters; |
| /** |
| * The number of samples per frame. |
| */ |
| unsigned short samplesPerFrame; |
| /** |
| * The audio sample rate. |
| */ |
| unsigned long sampleRate; |
| /** |
| * not used in version 5. |
| */ |
| unsigned long checksum; |
| } |
| UttHeader; |
| |
| int update_utb_header(file_utterance_info *utt, int frames, int samplerate, |
| int framerate); |
| void init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate); |
| int init_data_file(char *filename, file_utterance_info *utt, int dimen, |
| char typ, int endian, int do_skip); |
| int new_data_file(char *filename, file_utterance_info *utt, int dimen, |
| char typ, int endian); |
| int set_data_frame(file_utterance_info *utt, long begin); |
| int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end); |
| void more_data_frames(file_utterance_info *utt); |
| int save_data_frames(file_utterance_info *utt); |
| void close_data_stream(file_utterance_info *utt); |
| int init_utb_file(file_utterance_info *utt, annotate **table); |
| int position_utb_file(file_utterance_info *utt, long position, annotate *table); |
| int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip); |
| int load_short_data(file_utterance_info *utt, int num_frames, int do_skip); |
| int save_utb_data(file_utterance_info *utt, int num_frames); |
| int save_short_data(file_utterance_info *utt, int num_frames); |
| int read_utt_head(UttHeader *head, PFile* datafile); |
| int write_utt_head(UttHeader *head, PFile* datafile); |
| int check_for_utb(char* filename); |
| |
| /* TCP reading routines |
| */ |
| int read_tcp(char *filename, annotate **tag_base); |
| int read_lst(char *filename, annotate *tag_base, int ntags); |
| int read_utb_table(char *filename, annotate **tag_base); |
| void save_tcp(char *tcpnam, annotate *tag, int ntags); |
| void compose_tcp_name_of_utt(char* uttname , char* tcpname); |
| |
| #endif |
| |
| void init_utterance(utterance_info *utt, int utt_type, int dimen, |
| int buffer_size, int keep_frames, int num_chan, int do_voicing); |
| void set_voicing_durations(utterance_info *utt, int voice_duration, |
| int quiet_duration, int unsure_duration, |
| int start_windback); |
| void free_utterance(utterance_info *utt); |
| int utterance_started(utterance_info *utt); |
| int utterance_ended(utterance_info *utt); |
| int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing); |
| int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt); |
| |
| #endif /* _h_utteranc_ */ |