// file: $isip/class/asr/SpeakerVerifier/sv_05.cc // version: $Id: sv_05.cc 10480 2006-03-13 21:31:40Z raghavan $ // // isip include files // #include "SpeakerVerifier.h" // method: run // // arguments: // const Sdb& sdb: (input) input signal data base // // return: a bool8 value indicating status // // this method runs speaker verifier. // bool8 SpeakerVerifier::run(Sdb& sdb_a) { // branch on the learning machine // if(algorithm_d == HMM) { hmm_d.setParamFile(param_file_name_d); hmm_d.setDebug(debug_level_d); hmm_d.setVerbosity(verbosity_d); // invoke the HMM run method // hmm_d.run(sdb_a); } else if(algorithm_d == SVM) { Sof mfile; File ofile; String output; String identifier; String distance_str; Filename input_ID; Filename input_file_name; AudioDatabase audio_db; StatisticalModel stat_model; FeatureFile feature_file; Vector data; int32 total_num_frames = 0; int32 current_file_num = 0; // setup the algorithm data structure for reading // data.setLength(1); data(0).makeVectorFloat(); // open the support vector model // if (!mfile.open(model_file_d, File::READ_ONLY)) { return Error::handle(name(), L"cannot open support vector model", Error::ARG, __FILE__, __LINE__); } // read the support vector model // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading support vector model: "); output.concat(model_file_d); Console::put(output); Console::decreaseIndention(); } if (!stat_model.read(mfile, (int32)0)) { return Error::handle(name(), L"cannot read support vector model", Error::ARG, __FILE__, __LINE__); } // load the audio database // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading audio database: "); output.concat(audio_db_file_d); Console::put(output); Console::decreaseIndention(); } if (!audio_db.open(audio_db_file_d)) { return Error::handle(name(), L"cannot open audio db file", Error::ARG, __FILE__, __LINE__); } // open the output file // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } if (!ofile.open(output_file_d, File::WRITE_ONLY, File::TEXT)) { return Error::handle(name(), L"cannot open output file", Error::ARG, __FILE__, __LINE__); } // loop over all feature file(s) // current_file_num = 0; for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file identifier // identifier.assign(input_ID); if (!audio_db.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"cannot get record from audio db file", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // open the sof file for reading // if (!feature_file.open(input_file_name)) { return Error::handle(name(), L"cannot open feature file", Error::ARG, __FILE__, __LINE__); } // loop over all features in the feature file // int32 num_frames = feature_file.getNumFrames(); total_num_frames += num_frames; float64 accum = 0.0; for (int32 i=0; i < num_frames; i++) { // read the feature // feature_file.getBufferedData(data, i, 1); // compute the distance // float64 distance = stat_model.getSupportVectorModel().getDistance(data(0).getVectorFloat()); distance_str.assign(distance); // accumulate scores from all frames // accum += distance; if (debug_level_d >= Integral::DETAILED) { distance_str.debug(L"distance"); } } // find the average utterance probability // float64 utter_dist = accum / num_frames; if (utter_dist < (float32)decision_thresholds_d ){ distance_str.assign(utter_dist); ofile.put(L"("); ofile.put(identifier); ofile.put(L") REJECTED: "); ofile.put(distance_str); ofile.put(L"\n"); ofile.flush(); } else { distance_str.assign(utter_dist); ofile.put(L"("); ofile.put(identifier); ofile.put(L") ACCEPTED: "); ofile.put(distance_str); ofile.put(L"\n"); ofile.flush(); } // close the file descriptor // feature_file.close(); } // end of first for // close the audio database // mfile.close(); ofile.close(); audio_db.close(); } else if(algorithm_d == RVM) { // declare local variables // Sof mfile; File ofile; String output; String identifier; String distance_prob_str; Filename input_ID; Filename input_file_name; AudioDatabase audio_db; StatisticalModel stat_model; FeatureFile feature_file; Vector data; int32 total_num_frames = 0; int32 current_file_num = 0; // setup the algorithm data structure for reading // data.setLength(1); data(0).makeVectorFloat(); // open the relevance vector model // if (!mfile.open(model_file_d, File::READ_ONLY)) { output.assign(L"\ncannot open file: "); output.concat(model_file_d); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // read the relevance vector model // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading relevance vector model: "); output.concat(model_file_d); Console::put(output); Console::decreaseIndention(); } if (!stat_model.read(mfile, (int32)0)) { output.assign(L"\nerror reading model: "); output.concat(model_file_d); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // load the audio database // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading audio database: "); output.concat(audio_db_file_d); Console::put(output); Console::decreaseIndention(); } if (!audio_db.open(audio_db_file_d)) { output.assign(L"\ncannot load audio database: "); output.concat(audio_db_file_d); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // open the output file // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } if (!ofile.open(output_file_d, File::WRITE_ONLY, File::TEXT)) { output.assign(L"\ncannot open file: "); output.concat(output_file_d); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // loop over all feature file(s) // current_file_num = 0; for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file identifier // identifier.assign(input_ID); if (!audio_db.getRecord(identifier, input_file_name)) { output.assign(L"\ncannot read identifier from audio database: "); output.concat(identifier); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // open the sof file for reading // if (!feature_file.open(input_file_name)) { output.assign(L"\ncannot open feature file: "); output.concat(input_file_name); output.concat(L", bailing out..."); Console::put(output); Integral::exit(); } // loop over all features in the feature file // int32 num_frames = feature_file.getNumFrames(); total_num_frames += num_frames; float64 accum_prob = 0; for (int32 i=0; i < num_frames; i++) { // read the feature // feature_file.getBufferedData(data, i, 1); // compute the distance // float64 distance_prob = stat_model.getRelevanceVectorModel().getDistanceProb(data(0).getVectorFloat()); accum_prob += distance_prob; } // find the average utterance probability // float64 utter_prob = accum_prob / num_frames; if (utter_prob < decision_thresholds_d ){ distance_prob_str.assign(utter_prob); ofile.put(L"("); ofile.put(input_ID); ofile.put(L") REJECTED: "); ofile.put(distance_prob_str); ofile.put(L"\n"); ofile.flush(); } else { distance_prob_str.assign(utter_prob); ofile.put(L"("); ofile.put(input_ID); ofile.put(L") ACCEPTED: "); ofile.put(distance_prob_str); ofile.put(L"\n"); ofile.flush(); } // close the file descriptor // feature_file.close(); } // close the file descriptor(s) // mfile.close(); ofile.close(); // close the audio database // audio_db.close(); if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(current_file_num); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // exit gracefully // return true; } // method: run // // arguments: // const Sdb& pos_sdb: (input) list of positive examples // const Sdb& neg_sdb: (input) list of negative examples // // return: a bool8 value indicating status // // this method runs speaker verifier. // bool8 SpeakerVerifier::run(Sdb& pos_sdb_a, Sdb& neg_sdb_a) { if (algorithm_d == SVM ) { svm_d.setDebug(debug_level_d); svm_d.setVerbosity(verbosity_d); // load the feature from file // svm_d.loadFeatures(pos_sdb_a,neg_sdb_a); // start training // svm_d.train(); // write the support vector model // svm_d.writeModel(); } else if (algorithm_d == RVM) { rvm_d.setDebug(debug_level_d); rvm_d.setVerbosity(verbosity_d); // load the feature from file // rvm_d.loadFeatures(pos_sdb_a,neg_sdb_a); // setup the training process // // the RVMTrainData is a train data holder which can be // configured (see RVMTrainData header file) // RVMTrainData tdata; rvm_d.setOutputFile(output_file_d); // start training // rvm_d.train(tdata); // write the relevance vector model // rvm_d.writeModel(); } // exit gracefully // return true; }