#include "nrutil.h" #include #include #include #include #define PI (3.141592654) #define TWOPI (6.28318530717) #define EFACTOR (0.1) /* energy scaling factor */ #define TAPER 0 double log10(); double exp(); double log(); double pow(); /* ========== Parameterizes input speech using LPC analysis ========= */ /**/ int SPEECH_TO_LPC(s,P,nOrd,F_LEN,r,a,engy,gain) float *engy,*gain; float *r,*a,*s; int P,F_LEN,nOrd; { float sum; float *ac; int i,j; void CORR(); void SCHUR(); void RCTOA(); ac = vector(0,nOrd+1); /* Take the P order autocorrelation of the speech signal s[0..LEN-1] */ /**/ CORR(s,ac,nOrd,F_LEN); *engy = ac[0]; if (*engy == 0.0) return 1; /* Compute the reflection coefficients from the autocorrelation */ /**/ SCHUR(ac,r,nOrd); /* Convert the reflection coefficients to predictor coeffs */ /**/ RCTOA(r,a,P,nOrd); /* Computes the LP gain */ /**/ sum=ac[0]; for (i=1;i<=P;++i) sum += a[i]*ac[i]; if (sum <0.0) *gain = 1.0; else *gain = sqrt(sum); return 0; } /*============== Performs autocorrelation ======================== */ void CORR(s,acf,NP,F_LEN) float s[]; float acf[]; int NP,F_LEN; { int k,j; float sum; /* calculate the first NP+1 values of the Auto-correlation function */ /**/ for(k=0; k<=NP ;++k) { sum=0.0; for (j=k ; j 0.0) r[n] = -r[n]; if (n == NP) break; /* EXIT */ P[0] += P[1]*r[n]; for(m=1; m<=NP-n;++m) { P[m] = P[1+m] + r[n]*K[NP-m+1]; K[NP-m+1]=K[NP-m+1] + r[n]*P[1+m]; } } /* end of else */ } /* end of outer for loop */ } /* end of SCHUR */ /*===========Conversion: from reflection to predictor coeffs ============ */ void RCTOA (k,as,NP,nOrd) float k[],as[]; int NP,nOrd; { int i,j; float **a; a = matrix(1,nOrd+2,1,nOrd+2); for (i=1; i<=nOrd; i++ ){ /* perform the recursion */ a[i][i]=k[i]; for (j=1;j n cepstr <--- --------+---------+----------+----------+---------- cep() array ^ ^ ^ ^ bindx at findx findx (-n) current (+n) (+2n) location */ at = (Rwin -1)*n; for (t=Rwin; t<=numFrames-Rwin; t++) /* -- Full regression formula -- */ { for (k=0;k= (numFrames*n)) { acc += j*( cep[(numFrames-1)*n + k] - cep[at + bindx]); } else { acc += j*( cep[at + findx] - cep[at+bindx]); } } regres[at+k] = acc/denom; } at = (t+1)*n; } } /* ============== Normalize the log-energy to be in -1.0 .. 1.0 ==== */ void NORMALIZE_ENGY(energy,numframes) float *energy; int numframes; { float maxengy=-1000000.0; int i; /* Find maximum energy */ /**/ for (i=0;i maxengy) maxengy = energy[i]; /* Normalize by maximum */ /**/ for (i=0;i=FRM) tail[k-FRM]=buffer[k]; } else { if (k<(F_LEN-FRM)) s[k] = tail[k]; else{ s[k] = buffer[j]; j++; } } engy = engy + s[k]*s[k]; } } *oengy = engy; if (cnt >1 ) for (j=FRM; j0.0 && fram==0){ rsln = SRATE/N; /* FFT resolution */ irsln = (MelHPF/rsln); /* FFT bin corresponding to MelHPF Hz */ } if (MelHPF>0.0) binStart=irsln; /* -- Compute cepstrum from IDFT of log-magnitude --- */ /**/ for (k=binStart; k<=Nhalf;++k) { magn = freq[k]; logmagn[k]=log(magn); } /*--- Compute cepstrum coeffs ------- */ /**/ for (i=1; i<=NP; ++i) { acc=0.0; for (k=binStart; k<=Nhalf; ++k) acc += logmagn[k]*cos(k*TWOPI*i/N); cepSave[fram*NP+i-1] = acc/N; } } else { /* --Compute cepstrum coeffs from Inv Discrete Cosine Tr---- */ for (k=2;k<=Nhalf;++k) { magn = freq[k]; bin =lbin[k]; t1 =Melwt[k]*magn; if (bin>0) bins[bin] += t1; if (bin0.0 && fram==0){ rsln = SRATE/N; /* FFT resolution */ irsln = (MelHPF/rsln); /* FFT bin corresponding to MelHPF Hz */ } if (MelHPF>0.0) binStart=irsln; if (LFCC) {/*---- Use IDFT of log-magnitude to get cepstrum coeff--*/ /* if (TAPER && fram==0 && MelHPF>0.0 ){ denom=pow(irsln*1.0,4.0); for (i=2;i0.0) binStart=2; */ /* -- Compute cesptrum from IDFT of log-magnitude --- */ /**/ for (k=binStart; k<=Nhalf;++k) { real=y[2*k-1]; imag=y[2*k]; magn = (real*real+imag*imag); /* if (TAPER && k0.0) logmagn[k]=0.5*log(magn)+log(tpwts[k]); else */ logmagn[k]=0.5*log(magn); } /*--- Compute cepstrum coeffs from FFT spectrum ------- */ /**/ for (i=1; i<=NP; ++i) { acc=0.0; for (k=binStart; k<=Nhalf; ++k) acc += logmagn[k]*cos(k*TWOPI*i/N); if (nLift!=0) acc *= (1.0 +0.5*nLift*sin(i*liftConst)); cepSave[fram*NP+i-1] = acc/N; } } else { /*----------- Mel-Cepstrum analysis ---------- */ for (k=binStart; k<=Nhalf;++k) { real=y[2*k-1]; imag=y[2*k]; magn = sqrt(real*real+imag*imag); /* if (WithOFFset) { /* ---Shift filterbanks by binStart bins bin =lbin[k-binStart+2]; t1 =Melwt[k-binStart+2]*magn; } */ bin =lbin[k]; t1 =Melwt[k]*magn; if (bin>0) bins[bin] += t1; if (bin0) wts[k] = (cenf[b+1]-mel[k])/(cenf[b+1]-cenf[b]); else wts[k] = (cenf[1] - mel[k])/(cenf[1]); /* printf("%f %f\n",k*0.5*SRATE/len,wts[k]); */ } } /* ========== Initialize the mel-scale =============== */ void INIT_MEL_SCALE(n,SRATE,mel) int n; float *mel,SRATE; { int k; float resln,fftsize; fftsize = 2.0*n; resln = SRATE/(fftsize*700.0); /* Frequency resolution */ for (k=1;k<=n+1;++k) mel[k] = 1127.0*log(1.0+(k-1)*resln); } /* =========== Computes the center frequencies ======================= */ void CEN_FREQ(NP,mel,len,cenf,lowbin) int NP,len,*lowbin; float *cenf,*mel; { int k,i,bin; float ms,f; ms = mel[len+1]; for (i=1;i<=NP+1;++i) cenf[i] = i*1.0/((float)(NP+1)) * ms; bin=1; for (i=1; i<=len;++i) { f = mel[i]; while (cenf[bin] < f) bin++; lowbin[i]=bin-1; } } /* =============== File handling routines ============================ */ void WRITE_HEADER(num_frames,update,LPC,RegrCoef,Mel,Cepstrum,addEngy,dEngy,NEW,BICEPS,Octave,tparams,fpout) int num_frames,RegrCoef,Mel,Cepstrum,addEngy,dEngy, NEW,LPC,BICEPS,Octave; short tparams; double update; /* Frame update - msecs are implied but are not included */ /* in the update number, so update=10.0 means 10.0 msecs */ FILE *fpout; { int SAMPLE_PERIOD; /* The frame update in 100 nsec units */ double nsec_units=1.0e-7; /* 100 nsecs */ short ptype=1; /* Parameterization type */ SAMPLE_PERIOD=update*1.0e-3/nsec_units; tparams *= 4; /* Total number of words per feature vector */ /*- Determine type of output parameterization to insert in output header */ /**/ if (LPC && !RegrCoef && !addEngy) ptype=1; if ((Mel || BICEPS || Octave) && !RegrCoef && !addEngy) ptype=6; if (Cepstrum && !RegrCoef && !addEngy) ptype=3; if ((Mel || BICEPS || Octave) && RegrCoef && addEngy) ptype=454; if (Cepstrum && RegrCoef && addEngy) ptype=451; if ((Mel || BICEPS || Octave) && RegrCoef && addEngy && dEngy) ptype=0x146; if (Cepstrum && RegrCoef && addEngy && dEngy) ptype=0x143; if (LPC && (addEngy || dEngy) && !RegrCoef) ptype=65; if (Cepstrum && ( addEngy || dEngy) && !RegrCoef) ptype=67; if ((Mel || BICEPS || Octave) && (addEngy || dEngy) && !RegrCoef) ptype=70; if ((Mel || BICEPS || Octave) && RegrCoef && !(addEngy || dEngy)) ptype=0x106; if (NEW) ptype=67; fwrite(&num_frames,sizeof(int),1,fpout); fwrite(&SAMPLE_PERIOD,sizeof(int),1,fpout); fwrite(&tparams,sizeof(short),1,fpout); fwrite(&ptype,sizeof(short),1,fpout); } /* ============ Save all the coefficients in output file ======== */ void SAVE_COEFFS(num_frames,NP,regrCoef,addEngy,dEngy,cepSave,regres,regres1, Energy,dEnergy,ddEnergy,fpout) int num_frames,NP,regrCoef,addEngy,dEngy; float *cepSave,*regres,*regres1,*Energy,*dEnergy,*ddEnergy; FILE *fpout; { float *work1,*work2,*work3,eng,deng,ddeng; int i; int k; work1 = vector(0,NP+1); /* Scratch, working arrays */ work2 = vector(0,NP+1); work3 = vector(0,NP+1); if (regrCoef) { for (k=0;k 500000){ printf("ERROR ! Bad NIST header in num of samples:%d\n\n",num_samples); exit(1); } fseek(fpin,1024,0); /* Skip the 1024 byte header */ return(num_samples); } /* -------------- Byte swapping subroutines ----------------- */ /**/ void SWAP_BYTES(shrt) short *shrt; { char tmp,*byte; byte = (char*) shrt; tmp = *byte; /* swap MSB with LSB */ *byte = *(byte+1); *(byte+1) = tmp; } void SWAP_LONG(word) int *word; { char tmp,*p; p = (char*) word; tmp = *p; *p = *(p+3); *(p+3) = tmp; tmp = *(p+1); *(p+1) = *(p+2); *(p+2) = tmp; } /* ---------------- Read label files ------------------------ */ int READ_LBL_FILE(str) char *str; { FILE *fpout; char str1[40],pl[40],str2[40],str3[40],str4[40]; int beg,end,beg1,end1; strcpy(str1," "); strncpy(str1,str,6); strcat(str1,".phn"); fpout = fopen(str1,"r"); if (fpout == NULL) { printf("\nERROR! No label file '%s' found\n",str1); exit(1); } else { fscanf(fpout,"%d %d %s",&beg,&end,str2); fscanf(fpout,"%d %d %s",&beg,&end,str3); fscanf(fpout,"%d %d %s",&beg1,&end1,str3); } if ((end1 - beg-45) <0) printf("ERROR! Not enough frames in: %s\n",str); /* printf("%d %d: %d %d\n",beg,end,beg+MXFRAMES,end1-(beg+MXFRAMES)); */ fclose(fpout); return(beg); } /* ------------------- OCTAVE-BANK ANALYSIS ---------------------- */ void OCTAVE_BANK(s,nFilt,NP,N,F_LEN,SRATE,alpha,F0,engy,cepSave) float *s,F0,*cepSave,SRATE,alpha,*engy; int NP,nFilt,N,F_LEN; { float BW, istart, F1, C, iend, a,denom,cenf,f_engy; int i,k,Nhalf,vb=0,bin,j; float rsln,*efreq,real,imag,acc=0.0,*y,*logfilt,*fbank,x,t1; static int *lbin,fram=0; static float factor,norm; void FFT(); Nhalf=N/2; if (fram==0) { factor = PI/(float)nFilt; norm = sqrt(2.0/(float)nFilt); efreq = vector(0,nFilt+1); lbin = ivector(0,Nhalf+1); a=1.0; denom=1; for (i=1; i=F0) lbin[i]=k; } if (vb) printf("i=%3d k=%2d %8.3f\n",i-1,k,efreq[k]); } } y = vector(1,2*N+2); logfilt = vector(1,nFilt+1); fbank = vector(1,nFilt+1); k=1; acc=0.0; for (i=1; i<=N; ++i){ if (i<=F_LEN){ y[k] = s[i-1]; y[k+1] =0.0; acc += (y[k]*y[k]); /* Energy computation */ } else { /* zero pad if necessary */ y[k]=0.0; y[k+1]=0.0; } k+=2; } *engy = acc; FFT(y,N,1); /* ---- Compute energy in each filterbank ---- */ /**/ for (i=1; i<=nFilt;++i) fbank[i]=0.0; for (i=1;i0 && bin<=nFilt) fbank[bin] += f_engy; } /* ---- Take log of filterbank energies -------- */ /**/ for (i=1; i<=nFilt; ++i){ x=fbank[i]; if (x<1.0) x=1.0; logfilt[i] = log(x); } /* -- Take Cosine Transform of log-fbank energies ---- */ /**/ for (i=1; i<=NP; ++i) { acc=0.0; t1=i*factor; for (j=1; j<=nFilt;++j) acc += logfilt[j]*cos(t1*(j-0.5)); cepSave[fram*NP+i-1] = acc*norm; /* Save the octave-cepstrum coeffs */ } free_vector(y,1,2*N+2); free_vector(logfilt,1,nFilt+1); free_vector(fbank,1,nFilt+1); fram++; }