/* ********************************************************************* This program parameterizes input speech using different types of parameterization suitable for speech recognition. These include: (1) LPC coefficients (2) Reflection coefficients (3) Cepstrum coefficients (4) Mel-freq cepstra coefficients (5) Bilinear-transformed cepstrum coefficients (6) Normalized energy (7) Delta-cepstrum coefficients It also provides for reading TIMIT database files, as well as NIST files and ISOLET files. The different types can be selected from the command line. On line help is provided by just typing: cparam Written by: Philipos C. Loizou Date : 2-15-94 ********************************************************************* */ #include "nrutil.h" #include #include #include #include double fabs(); void CORR(); void SCHUR(); void RCTOA(); void FREQ_RESPONSE(); void FFT_SPECTRUM(); void PREMPHASIZE(); void HAMMING(); void REGRESSION_COEF(); double log(); void PRINT_USAGE() { printf("\nUsage: cparam [options] infile outfile\n\n"); printf(" Option Default\n\n"); printf(" -c Output cepstral coefficients LPC\n"); printf(" -m Output mel-freq cepstra coeffs LPC\n"); printf(" -r Output reflection coefficients LPC\n"); printf(" -t Output bilinear cepstrum coeffs LPC\n"); printf(" -B Output FBANK energ(-m should be set)LPC\n"); printf(" -M Output normalized mel-freq cepstra LPC\n"); printf(" -d Append Delta Coefficients Off\n"); printf(" -e Append Log Speech Energy Off\n"); printf(" -g Append Delta Energy Off\n"); printf(" -a Use LPC spectrum in mel analysis Magnitude\n"); printf(" -b Plot LPC and FFT spectrums Off\n"); printf(" -h Do not output header in code file On\n"); printf(" -q Do not apply hamming window On\n"); printf(" -j Compute LFCC coefficients LPC\n"); printf(" -O Perform octave-bank analysis LPC\n"); printf(" -R Remove global mean from feature vec Off\n"); printf(" -A N Octave (N=0),1/3 octave(N=1)spacing 1\n"); printf(" -I F Initial offset (Hz) in octave anals 0.0\n"); printf(" -u N Process N frames only All \n"); printf(" -n N Set number of parameters to N 12\n"); printf(" -p N Set analysis order to N 12\n"); printf(" -o N Set bilinear transform order to N 12\n"); printf(" -v N Set LPC_MEL order to N (with -a opt)12\n"); printf(" -i F High pass freq in mel analysis (Hz) 0.0\n"); printf(" -l N Set cepstrum liftering window to N 24\n"); printf(" -x N Skip the first N frames 0\n"); printf(" -f T Set frame period to T (msecs) 10.0\n"); printf(" -w T Set window duration to T (msecs) 20.0\n"); printf(" -s F Set preemphasis factor to F 0.97\n"); printf(" -H frm Read file format frm (TIMIT,NIST,ISO)HTK\n"); printf(" -P Process frames from a label file All\n"); exit(1); } /* --- Defines the TIMIT header structure --- */ /**/ typedef struct { short hdrsize; short version; short channels; short rate; int nsamples; } header; /* --- Defines the ISOLET header structure --- */ /**/ typedef struct { short hdrsize; short version; short channels; short rate; int nsamples; int endian; } ISOheader; main(argc,argv) int argc; char *argv[]; { FILE *fpin,*fspec,*fpgnu,*fp; int i,j,k,number,cnt; int num_frames; /* Number of frames in speech file */ float *s; /* Contains a frame of speech, s[0..F_LEN-1] */ float *r; /* Holds NP reflection coefficients */ float *a; /* Holds NP prediction coefficients */ int N; /* FFT size */ int F_LEN=160; /* Frame length */ int FRM=80; /* Update frame length */ int NP=12; /* Number of parameters/coefficients */ int nOrd=12; /* Analysis order */ int fftSpec=1; /* If 1, print the FFT spectrum as well */ int PLOT=0; /* If 1, generate LPC spectrum plots */ int Rwin=2; /* Delta cepstrum window */ int RegrCoef=0; /* Add regression coefficients flag */ int addEngy=0; /* Append normalized speech energy flag*/ int dEngy=0; /* Append delta-difference energy */ int Mel=0; /* Mel-cepstrum coeffs flag */ int Cepstrum=0; /* Cepstrum coeffs flag */ int nLift=24; /* Cepstrum liftering window */ int LPC=1; /* Linear predictor coeffs flag */ int REFL=0; /* Reflection Coefficients flag */ int HAM=1; /* Hamming window flag */ int BICEPS=0; /* Bilinear-transformed cepstrum coeffs flag */ int BLTord=12; /* Order of bilinear transform */ int nLpcMel=12; /* Order of LPC analysis in Mel-cepstrum computation */ int LFCC=0; /* if 1, then compute Linear Freq Cepstrum Coeffs */ double MelHPF=0.0; /* High pass frequency in Mel analysis */ float BLTalpha=0.6; /* Mel-warping parameter, -1< a < 1 */ int NEW=0; double beta=0.97; /* Pre-emphasis constant */ float *regres; /* Holds the regression ceofficients */ float *regres1; /* Holds the regression coefficients for double delta*/ float *cepSave; /* Holds all mel/cepstrum coefficients */ float *Energy; /* Holds frame energies */ float *wts; /* Mel-filter bank weights */ int *lbin; /* Mel-filter frequency bins */ int tparams=0; /* Total number of parameters */ float engy; /* speech frame energy */ float o_engy; /* original speech frame energy before windowing*/ float Econst; /* Energy constant */ float gain; /* LP gain */ short stwo=2; /* bytes per sample */ short code ; /* code for WAVEFORM */ int SPERIOD=1250; /* The sampling period in 100 nsec units (8KHz)*/ int num_samples=0; /* Number of samples in the file */ float *dEnergy; /* Contains the differenced energies */ float *ddEnergy; /* Holds frame delta-delta energies */ header TIMIT; /* The TIMIT database's header */ ISOheader ISOLET; /* The ISOLET database's header */ float SRATE=8000.0; /* Sampling frequency in Hz */ int HDR=0; /* Flag for reading different type of headers */ int HDR_TYPE=0; /* If 1, then it's TIMIT header type */ int zero_engy=0; /* Zero energy flag */ int num_proc; /* Number of non-zero frames */ double window=20.0; /* Window length in mSecs */ double update=10.0; /* Window update in mSecs */ int NSKIP=0; /* Number of frames to skip at beginning */ int HDR_SIZE=12; /* Header size in bytes */ float octA=4.0/3.0; /* Alpha used in 1/3 octave bank analysis (for octave */ /* Alpha=2.0 ) */ int Octave=0; /* Octave bank analysis */ double octF0=0.0; /* Initial offset in Octave-bank analysis */ int octType=1; /* Perform 1/3 octave bank analysis */ float *mean; /* Global mean feature vector of dimension NP */ int FBANK=0; float mfcNorm[100]; int fVerbose=0; int two,len,begFrame; float freq; FILE *fpout,*fp2; char *infile,*outfile,*HFRMT; int cint=0,mint=0,wint=0,eint=0,fint=0,LPCspec=0,iint=0; int d1=0,np=0,dp=0,ord=0,lint=0,gint=0,bint=0,lst; int dint=0,uint=0,many=0,nov=0,dlift=0,rint=0,REMOVE_MN=0; int oint=0,not_eof_header,SKIP=0,vint=0; int NOHEADER=0,hint=0,bbint=0,LBL=0,ocint=0,ocfo=0,ii=0,NMFCC=0; void PRINT_USAGE(); void SWAP_BYTES(),SWAP_LONG(); int GetNISTheader(); int READ_LBL_FILE(); if (argc==1) PRINT_USAGE(); /* --- Handle the arguments in command line ----------- */ /**/ if (! scanargs(argc, argv,"cparam n%-num!d- p%-ord!d Otcm%- d%- e%- g%- b%- u%-num!D y%- r%- h%- w%-win!F f%-frame!F l%-win!D s%-beta!F q%- o%-ord!d H%-format%s x%-frms!D P%- a%- v%-num!D i%-freq!F j%- A%-!D I%-!F B%- M%- R%- infile!s outfile!s",&d1,&np,&dp,&ord,&cint,&dint,&eint,&gint,&bint,&uint,&many,&nov,&rint,&NOHEADER,&wint,&window,&fint,&update,&dlift,&nLift,&bbint,&beta,&hint,&oint,&BLTord,&HDR,&HFRMT,&SKIP,&NSKIP,&LBL,&LPCspec,&vint,&nLpcMel,&iint,&MelHPF,&LFCC,&ocint,&octType,&ocfo,&octF0,&FBANK,&NMFCC,&REMOVE_MN,&infile,&outfile)) exit(1); fpin = fopen(infile, "r"); if(fpin == NULL) { printf("No such file exists..\n"); exit(1);} fpout = fopen(outfile, "w"); /* ------------- Get the header in the file first, if specified -------- */ /**/ if (HDR==1) { if (strcmp(HFRMT,"TIMIT")==0) { fread(&TIMIT,1,sizeof(TIMIT),fpin); SWAP_LONG(&TIMIT.nsamples); SRATE=16000.0; /* Sampling frequency 16 KHz */ num_samples=TIMIT.nsamples; /* Get the # of samples in file */ if (num_samples <160 || num_samples>90000){ printf("\nERROR! Bad TIMIT header in file: %s\n\n",infile); exit(1); } HDR_TYPE=1; /* Header type- TIMIT */ } if (strcmp(HFRMT,"ISOLET")==0) { fread(&ISOLET,1,sizeof(ISOLET),fpin); SRATE=1000000.0/(ISOLET.rate*0.25); /* Sampling frequency (16 KHz) */ num_samples=ISOLET.nsamples; /* Get the # of samples in file */ if (num_samples <160 || num_samples>90000 || ISOLET.channels>1){ printf("\nERROR! Bad ISOLET header in file: %s\n\n",infile); exit(1); } HDR_SIZE=16; } else if (strcmp(HFRMT,"NIST")==0) { /* -- NIST header -- */ num_samples = GetNISTheader(fpin,&freq); SRATE=freq; HDR_SIZE=1024; } else {printf("\n\nERROR! Unknown file type: %s for file:%s\n ",HFRMT, infile); printf(" Supported types are: TIMIT, ISOLET and NIST\n"); exit(1); } } /* ---- Initialize frame window and update (in samples) ----- */ /**/ F_LEN = SRATE*window*10.0e-4; FRM = SRATE*update*10.0e-4; /* -------- Check which parameters were set --------------- */ if (d1==1) NP=np; /* Number of parameters or coefficients */ if (dp==1) nOrd=ord; /* Analysis order */ if (cint==1) { Mel=1; tparams += NP; } if (cint==2) { Cepstrum=1; tparams +=NP; } if (cint==4) { BICEPS=1; tparams +=BLTord; } if (cint==8) { Octave=1; tparams += NP; } if (bint ==1) PLOT=1; if (dlift==1) { if (nLift !=0) if (nLift > 2*NP || nLift < NP) { printf("\nWARNING! Cepstrum Liftering window is too small \n"); printf(" or too large compared to parameter vector length ...\n"); } } if (hint==1) HAM=0; /* No Hamming window */ if (d1==1 && nOrd F_LEN) { printf("\nWARNING! Frame update length is greater than window's length\n"); printf(" Hence it has been set to be equal to window's length.\n\n"); FRM = F_LEN; } if (window < 5.0 || update < 5.0 || window > 35.0 || update > 35.0){ printf("\nERROR Window/Update duration is too small/large..\n"); printf(" Duration should be between 5.0 and 35.0 msecs\n\n"); exit(1); } Econst = 0.0; /*2.0*log(32768.0); /* Constant used in energy computation */ /*-- Determine FFT size, N ---*/ /**/ if (Mel || PLOT || Octave) { two=2; while (F_LEN > two) two *= 2; N= two; /* FFT size */ } /* -- Allocate memory for coefficients --- */ /**/ s = vector(0,F_LEN+1); /* Speech vector */ r = vector(1,nOrd); /* Reflection coefficients */ a = vector(1,nOrd); /* Prediction coefficients */ if (REMOVE_MN){ mean = vector(1,NP); for (i=1;i<=NP; ++i) mean[i]=0.0; } if (PLOT) { fspec= fopen("spec.dat","w"); fpgnu= fopen("gnu.dat","w"); fp = fopen("fft.dat","w"); printf("\n NOTE! To see the plots, type: gnuplot gnu.dat ..\n\n"); } /* --- Read in input file's header ------- */ /**/ if (!HDR) { fread(&num_samples,sizeof(int),1,fpin); fread(&SPERIOD,sizeof(int),1,fpin); fread(&stwo,sizeof(short),1,fpin); fread(&code,sizeof(short),1,fpin); if (num_samples < 160 || num_samples>100000){ printf("\nERROR! Bad header in file: %s\n",infile); exit(1); } } num_frames = (num_samples-F_LEN)/FRM +1; /* -- Check how many frames to process --- */ /**/ if (uint==1) num_frames=many; if (many > num_frames) many=num_frames; if (LBL) { begFrame=READ_LBL_FILE(infile); num_frames=45; SKIP=1; NSKIP=begFrame; } /* -- Write out output file's header ------ */ /**/ if (!NOHEADER) WRITE_HEADER(num_frames,update,LPC,RegrCoef,Mel,Cepstrum,addEngy,dEngy, NEW,BICEPS,Octave,tparams,fpout); if (addEngy || dEngy) Energy=vector(0,num_frames); if (dEngy) { dEnergy=vector(0,num_frames); ddEnergy=vector(0,num_frames); } if (RegrCoef) { if (BICEPS) { regres=vector(0,num_frames*BLTord+1); regres1=vector(0,num_frames*BLTord+1); } else { regres=vector(0,num_frames*NP+1); regres1=vector(0,num_frames*NP+1); } } if (BICEPS) cepSave=vector(0,num_frames*BLTord+1); else cepSave=vector(0,num_frames*NP+1); cnt=1; if (Mel){ /* ------ Compute Filter-bank weights -------- */ len=N/2; /* half the FFT size */ lbin =ivector(1,len); wts = vector(1,len); COMP_FBANK_WEIGHTS(nOrd,len,SRATE,lbin,wts); /* --- If FBANK only, then compute normalizing factors for energies -- */ /**/ if (FBANK || NMFCC) { j=0; k=lbin[2]; ii=0; for (i=2; i<=len; ++i){ if (lbin[i]!=0) { if (lbin[i]==k ) j++; else{ mfcNorm[ii]=j+1; j=0; ii++; if (ii==nOrd) lst=i; } } k=lbin[i]; } mfcNorm[nOrd]=len-lst+1; if (fVerbose) for (i=1; i<=nOrd; ++i) printf("%d - %3.1f\n",i,mfcNorm[i]); } } num_proc= num_frames; if (SKIP){ /* ---- Skip the first NSKIP frames ---- */ i=HDR_SIZE+NSKIP*F_LEN*2; fseek(fpin,i,0); } while(1) { /* ================= main outer loop ===================== */ READ_SPEECH(F_LEN,FRM,HDR_TYPE,fpin,s,&o_engy); PREMPHASIZE(beta,s,F_LEN,FRM); if (HAM) HAMMING(s,F_LEN); if (Mel) MEL_CEPSTRUM(NP,F_LEN,N,nOrd,nLift,nLpcMel,LPCspec,SRATE,MelHPF,LFCC,s, &engy,wts,lbin,FBANK,mfcNorm,NMFCC,cepSave) ; if (Octave) OCTAVE_BANK(s,nOrd,NP,N,F_LEN,SRATE,octA,octF0,&engy,cepSave); if (Cepstrum || LPC || PLOT || REFL || BICEPS) zero_engy = SPEECH_TO_LPC(s,NP,nOrd,F_LEN,r,a,&engy,&gain); if (zero_engy) { num_proc--; printf("\nWARNING! zero energy frame: %d \n",cnt);} else { if (LPC) for (i=1;i<=NP;++i) cepSave[(cnt-1)*NP+i-1] = a[i]; /* Save the LPC coeffs */ if (REFL) for (i=1;i<=NP;++i) cepSave[(cnt-1)*NP+i-1] = r[i]; /* Save the reflection coeffs */ if (Cepstrum) LPC2CEPS(NP,nLift,a,cepSave); if (BICEPS ) CEPS2BILINEAR (NP,BLTord,BLTalpha,nLift,a,cepSave); if (addEngy || dEngy) Energy[cnt-1] = log(o_engy); if ( PLOT ) MAKE_PLOTS(a,s,gain,fftSpec,fp,fpgnu,fspec,N,NP,F_LEN); if (REMOVE_MN) for (i=1;i<=NP; ++i) mean[i] += cepSave[(cnt-1)*NP+i-1]; } if(cnt==num_frames) break; cnt++; } /* ======================= end of main loop =========================*/ num_frames = num_proc; /* set num_frames to # of non-zero frames */ /* --- Normalize energy ------------- */ /**/ if (addEngy || dEngy) NORMALIZE_ENGY(Energy,num_frames); if (BICEPS) NP=BLTord; if (REMOVE_MN){ /* Remove the mean from the feature vector */ for (i=1;i<=NP; ++i) mean[i] /= (float) num_frames; /* -- subtract the mean from all features */ for (j=1; j<=num_frames; ++j) for (i=1; i<=NP; ++i) cepSave[(j-1)*NP+i-1] -= mean[i]; } /* --- Compute delta cepstrum/mel-cepstrum coefficients and delta-delta cepstrum--- */ /**/ if (RegrCoef) { REGRESSION_COEF(Rwin,12,num_frames,cepSave,regres); REGRESSION_COEF(Rwin,12,num_frames,regres,regres1); } /* ============================== */ if (NEW) for (i=0;i