srec/include/utteranc.h - platform/external/srec - Git at Google

 /*---------------------------------------------------------------------------*
  *  utteranc.h  *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/


 #ifndef _h_utteranc_
 #define _h_utteranc_

 #ifdef SET_RCSID
 static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $";
 #endif


 #include "all_defs.h"
 #include "hmm_type.h"
 #include "fpi_tgt.h"
 #include "voicing.h"
 #include "specnorm.h"
 #include "channorm.h"
 #include "swicms.h"
 #ifndef _RTT
 #include "duk_io.h"
 #endif

 #define DEFAULT_BUFFER_SIZE 100 /* in frames */
 #define KEEP_FRAMES   40 /* in frames, past frames kept */

 /*  Functions supported are
 **  new, delete (by source)
 **  open file/device, close file/device
 **  attach and detach sink
 **  read/store samples - including the header
 */

 /**
  * @todo document
  */
 typedef struct
 {                /* label structure */
   char *label;
   long begin;
   long end;
   char *extra;
   unsigned char flag;
 }
 annotate;


 /**
  * @todo document
  */
 typedef struct
 {
   int   utt_type;
   int   dim;
   fepFramePkt  *frame;
   int   num_chan;
   int   do_channorm;
   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
   norm_info   *channorm; /*  Mirrored from the Wave object */
   swicms_norm_info     *swicms;    /* copy of wave obj pointer */
   spect_dist_info *backchan[MAX_CHAN_DIM];
   featdata  *last_push;
   int   voice_duration;
   int   quiet_duration;
   int   unsure_duration;
   int   start_windback;
 }
 utt_generic_info;

 #ifndef _RTT
 /**
  * @todo document
  */
 typedef struct
 {
   char  typ;  /* s (16 bit), c (8 bit), u (newton .utb) */
   int   endian;  /* 0 is little 1 is big */
   int   do_skip; /* skip every other frame */
   unsigned long len;  /* length of file/utterance */
   PFile* file;  /* pointer to file */
   char  name[MAX_LABEL]; /* file name */
   /*    int   op;  read or write */
   int   num_utts; /* no. of utterances in utb file */
   annotate  *utb_table; /* utb file header information */
 }
 utt_file_info;

 /**
  * @todo document
  */
 typedef struct
 {
   int   utt_type;
   int   dim;
   fepFramePkt  *frame;
   int   num_chan;
   int   do_channorm;
   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
   norm_info   *channorm; /*  Mirrored from the Wave object */
   swicms_norm_info    *swicms;          /* copy of wave obj pointer */
   spect_dist_info *backchan[MAX_CHAN_DIM];
   featdata  *last_push;
   int   voice_duration;
   int   quiet_duration;
   int   unsure_duration;
   int   start_windback;
   /*    voicing_info voice; */
   utt_file_info file;
 }
 file_utterance_info;
 #endif

 /**
  * @todo document
  */
 typedef struct
 {
   int   utt_type;
   int   dim;
   fepFramePkt  *frame;
   int   num_chan;
   int   do_channorm;
   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
   norm_info   *channorm; /*  Mirrored from the Wave object */
   swicms_norm_info    *swicms;        /* copy of wave obj pointer */
   spect_dist_info *backchan[MAX_CHAN_DIM];
   featdata  *last_push;
   int   voice_duration;
   int   quiet_duration;
   int   unsure_duration;
   int   start_windback;
 }
 live_utterance_info;

 /**
  * @todo document
  */
 typedef union
 {
   int   utt_type; /* live or from file */
   utt_generic_info    gen_utt; /* generic one */
 #ifndef _RTT
   file_utterance_info file_utt;
 #endif
   live_utterance_info live_utt;
 } utterance_info;


 /*
 **  Size of the utb file headers and details
 */

 #ifndef _RTT
 #define UTT_VERSION 2
 #define UTT_HEADER_SIZE 16        /*Size on disk*/
 #define UTB_HEADER_SIZE 32        /*Size on disk*/
 #define UTB_HEADER_USED 16        /*Size on disk*/   /* SAL */

 /**
  * UTB file header.
  */
 typedef struct _UttHeader
 {
 	/**
 	 * The size of the header in bytes.
 	 */
   unsigned short headerSize;
 	/**
 	 * The version of the file format.
 	 */
   unsigned short version;
 	/**
 	 * The size of the payload in bytes.
 	 */
   unsigned long  nBytes;
 	/**
 	 * The number of parameters per frame.
 	 */
   unsigned short nParametersPerFrame;
 	/**
 	 * 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style
 	 */
   unsigned short channelNormalization;
   /**
 	 * 0=unknown, 1=no, 2=yes
 	 */
   unsigned short speakerNormalization;
   /**
 	 * 0=unknown, 1=no, 2=yes
 	 */
   unsigned short imeldaization;
 	/**
 	 * Before imelda truncation.
 	 */
   unsigned short nOriginalParameters;
 	/**
 	 * The number of samples per frame.
 	 */
   unsigned short samplesPerFrame;
 	/**
 	 * The audio sample rate.
 	 */
   unsigned long  sampleRate;
 	/**
 	 * not used in version 5.
 	 */
   unsigned long  checksum;
 }
 UttHeader;

 int    update_utb_header(file_utterance_info *utt, int frames, int samplerate,
                          int framerate);
 void    init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate);
 int init_data_file(char *filename, file_utterance_info *utt, int dimen,
                    char typ, int endian, int do_skip);
 int new_data_file(char *filename, file_utterance_info *utt, int dimen,
                   char typ, int endian);
 int set_data_frame(file_utterance_info *utt, long begin);
 int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end);
 void more_data_frames(file_utterance_info *utt);
 int save_data_frames(file_utterance_info *utt);
 void close_data_stream(file_utterance_info *utt);
 int init_utb_file(file_utterance_info *utt, annotate **table);
 int position_utb_file(file_utterance_info *utt, long position, annotate *table);
 int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip);
 int load_short_data(file_utterance_info *utt, int num_frames, int do_skip);
 int save_utb_data(file_utterance_info *utt, int num_frames);
 int save_short_data(file_utterance_info *utt, int num_frames);
 int read_utt_head(UttHeader *head, PFile* datafile);
 int write_utt_head(UttHeader *head, PFile* datafile);
 int check_for_utb(char* filename);

 /*  TCP reading routines
 */
 int     read_tcp(char *filename, annotate **tag_base);
 int     read_lst(char *filename, annotate *tag_base, int ntags);
 int     read_utb_table(char *filename, annotate **tag_base);
 void    save_tcp(char *tcpnam, annotate *tag, int ntags);
 void compose_tcp_name_of_utt(char* uttname , char* tcpname);

 #endif

 void init_utterance(utterance_info *utt, int utt_type, int dimen,
                     int buffer_size, int keep_frames, int num_chan, int do_voicing);
 void set_voicing_durations(utterance_info *utt, int voice_duration,
                            int quiet_duration, int unsure_duration,
                            int start_windback);
 void free_utterance(utterance_info *utt);
 int utterance_started(utterance_info *utt);
 int utterance_ended(utterance_info *utt);
 int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing);
 int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt);

 #endif /* _h_utteranc_ */
	/---------------------------------------------------------------------------
	* utteranc.h *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/



	#ifndef _h_utteranc_
	#define _h_utteranc_

	#ifdef SET_RCSID
	static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $";
	#endif



	#include "all_defs.h"
	#include "hmm_type.h"
	#include "fpi_tgt.h"
	#include "voicing.h"
	#include "specnorm.h"
	#include "channorm.h"
	#include "swicms.h"
	#ifndef _RTT
	#include "duk_io.h"
	#endif

	#define DEFAULT_BUFFER_SIZE 100 /* in frames */
	#define KEEP_FRAMES 40 /* in frames, past frames kept */

	/* Functions supported are
	** new, delete (by source)
	** open file/device, close file/device
	** attach and detach sink
	** read/store samples - including the header
	*/

	/**
	* @todo document
	*/
	typedef struct
	{ /* label structure */
	char *label;
	long begin;
	long end;
	char *extra;
	unsigned char flag;
	}
	annotate;


	/**
	* @todo document
	*/
	typedef struct
	{
	int utt_type;
	int dim;
	fepFramePkt *frame;
	int num_chan;
	int do_channorm;
	spect_dist_info *spchchan; / Mirrored from the Wave object */
	norm_info channorm; / Mirrored from the Wave object */
	swicms_norm_info swicms; / copy of wave obj pointer */
	spect_dist_info *backchan[MAX_CHAN_DIM];
	featdata *last_push;
	int voice_duration;
	int quiet_duration;
	int unsure_duration;
	int start_windback;
	}
	utt_generic_info;

	#ifndef _RTT
	/**
	* @todo document
	*/
	typedef struct
	{
	char typ; /* s (16 bit), c (8 bit), u (newton .utb) */
	int endian; /* 0 is little 1 is big */
	int do_skip; /* skip every other frame */
	unsigned long len; /* length of file/utterance */
	PFile* file; /* pointer to file */
	char name[MAX_LABEL]; /* file name */
	/* int op; read or write */
	int num_utts; /* no. of utterances in utb file */
	annotate utb_table; / utb file header information */
	}
	utt_file_info;

	/**
	* @todo document
	*/
	typedef struct
	{
	int utt_type;
	int dim;
	fepFramePkt *frame;
	int num_chan;
	int do_channorm;
	spect_dist_info *spchchan; / Mirrored from the Wave object */
	norm_info channorm; / Mirrored from the Wave object */
	swicms_norm_info swicms; / copy of wave obj pointer */
	spect_dist_info *backchan[MAX_CHAN_DIM];
	featdata *last_push;
	int voice_duration;
	int quiet_duration;
	int unsure_duration;
	int start_windback;
	/* voicing_info voice; */
	utt_file_info file;
	}
	file_utterance_info;
	#endif

	/**
	* @todo document
	*/
	typedef struct
	{
	int utt_type;
	int dim;
	fepFramePkt *frame;
	int num_chan;
	int do_channorm;
	spect_dist_info *spchchan; / Mirrored from the Wave object */
	norm_info channorm; / Mirrored from the Wave object */
	swicms_norm_info swicms; / copy of wave obj pointer */
	spect_dist_info *backchan[MAX_CHAN_DIM];
	featdata *last_push;
	int voice_duration;
	int quiet_duration;
	int unsure_duration;
	int start_windback;
	}
	live_utterance_info;

	/**
	* @todo document
	*/
	typedef union
	{
	int utt_type; /* live or from file */
	utt_generic_info gen_utt; /* generic one */
	#ifndef _RTT
	file_utterance_info file_utt;
	#endif
	live_utterance_info live_utt;
	} utterance_info;


	/*
	** Size of the utb file headers and details
	*/

	#ifndef _RTT
	#define UTT_VERSION 2
	#define UTT_HEADER_SIZE 16 /Size on disk/
	#define UTB_HEADER_SIZE 32 /Size on disk/
	#define UTB_HEADER_USED 16 /Size on disk/ /* SAL */

	/**
	* UTB file header.
	*/
	typedef struct _UttHeader
	{
	/**
	* The size of the header in bytes.
	*/
	unsigned short headerSize;
	/**
	* The version of the file format.
	*/
	unsigned short version;
	/**
	* The size of the payload in bytes.
	*/
	unsigned long nBytes;
	/**
	* The number of parameters per frame.
	*/
	unsigned short nParametersPerFrame;
	/**
	* 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style
	*/
	unsigned short channelNormalization;
	/**
	* 0=unknown, 1=no, 2=yes
	*/
	unsigned short speakerNormalization;
	/**
	* 0=unknown, 1=no, 2=yes
	*/
	unsigned short imeldaization;
	/**
	* Before imelda truncation.
	*/
	unsigned short nOriginalParameters;
	/**
	* The number of samples per frame.
	*/
	unsigned short samplesPerFrame;
	/**
	* The audio sample rate.
	*/
	unsigned long sampleRate;
	/**
	* not used in version 5.
	*/
	unsigned long checksum;
	}
	UttHeader;

	int update_utb_header(file_utterance_info *utt, int frames, int samplerate,
	int framerate);
	void init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate);
	int init_data_file(char filename, file_utterance_info utt, int dimen,
	char typ, int endian, int do_skip);
	int new_data_file(char filename, file_utterance_info utt, int dimen,
	char typ, int endian);
	int set_data_frame(file_utterance_info *utt, long begin);
	int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end);
	void more_data_frames(file_utterance_info *utt);
	int save_data_frames(file_utterance_info *utt);
	void close_data_stream(file_utterance_info *utt);
	int init_utb_file(file_utterance_info utt, annotate *table);
	int position_utb_file(file_utterance_info utt, long position, annotate table);
	int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip);
	int load_short_data(file_utterance_info *utt, int num_frames, int do_skip);
	int save_utb_data(file_utterance_info *utt, int num_frames);
	int save_short_data(file_utterance_info *utt, int num_frames);
	int read_utt_head(UttHeader head, PFile datafile);
	int write_utt_head(UttHeader head, PFile datafile);
	int check_for_utb(char* filename);

	/* TCP reading routines
	*/
	int read_tcp(char filename, annotate *tag_base);
	int read_lst(char filename, annotate tag_base, int ntags);
	int read_utb_table(char filename, annotate *tag_base);
	void save_tcp(char tcpnam, annotate tag, int ntags);
	void compose_tcp_name_of_utt(char* uttname , char* tcpname);

	#endif

	void init_utterance(utterance_info *utt, int utt_type, int dimen,
	int buffer_size, int keep_frames, int num_chan, int do_voicing);
	void set_voicing_durations(utterance_info *utt, int voice_duration,
	int quiet_duration, int unsure_duration,
	int start_windback);
	void free_utterance(utterance_info *utt);
	int utterance_started(utterance_info *utt);
	int utterance_ended(utterance_info *utt);
	int load_utterance_frame(utterance_info utt, unsigned char pUttFrame, int voicing);
	int copy_utterance_frame(utterance_info oututt, utterance_info inutt);

	#endif /* _h_utteranc_ */