// file: $isip/class/pr/VerifyHMM/vhmm_05.cc // version: $Id: vhmm_05.cc 10584 2006-07-13 18:13:06Z sl346 $ // // isip include files // #include "VerifyHMM.h" // method: load // // arguments: none // // return: a bool8 value indicating status // // this method loads the HMM models // bool8 VerifyHMM::load() { // declare local variables // String output; // load the audio database (optional) // if (audio_db_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading audio database: "); output.concat(audio_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the audio database // if (!audio_db_d.open(audio_db_file_d)) { audio_db_file_d.debug(L"audio_db_file_d"); return Error::handle(name(), L"run: open audio database file", Error::ARG, __FILE__, __LINE__); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no audio database file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // load the transcription database (optional) // if(algorithm_d == TRAIN && implementation_d == BAUM_WELCH) { if (transcription_db_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading transcription database: "); output.concat(transcription_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the transcriptions // if (!transcription_db_d.open(transcription_db_file_d)) { transcription_db_file_d.debug(L"transcription_db_file_d"); return Error::handle(name(), L"error opening transcription database file", Error::ARG, __FILE__, __LINE__); } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no transcription database file was specified ***"); Console::put(output); Console::decreaseIndention(); return Error::handle(name(), L"error opening transcription database file", Error::ARG, __FILE__, __LINE__); } } } } // load the front-end parameters (optional) // if (fend_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading front-end: "); output.concat(fend_file_d); Console::put(output); Console::decreaseIndention(); } Sof fend_file_sof; Sdb sdb_fend_files; int32 len = 0; int32 tag = 0; Filename current_fe_file; // open the front end for reading // if (!fend_file_sof.open(fend_file_d)) { return Error::handle(name(), L"load - unable to open front-end file", Error::ARG, __FILE__, __LINE__); } // check if the recipe file is a file list or just a recipe file // if (Sdb::isSdb(fend_file_d)) { sdb_fend_files.append(fend_file_d, true); } else { sdb_fend_files.append(fend_file_d); } fend_file_sof.close(); // get the number of FE files // do { len++; } while (sdb_fend_files.gotoNext()); vector_fe_d.setLength(len); // start from the first FE // sdb_fend_files.gotoFirst(); // loop over all FE files // for (int32 num_fe = 0; num_fe < len; num_fe++) { // get the current FE file name // sdb_fend_files.getName(current_fe_file); // open the FE file for reading // if (!fend_file_sof.open(current_fe_file)) { String msg(L"Error: could not open FrontEnd file: "); msg.concat(current_fe_file); Console::put(msg); } // read the configuration of the FrontEnd from the parameter file // if (((tag = fend_file_sof.first(FrontEnd::name())) == Sof::NO_TAG) || (!vector_fe_d(num_fe).read(fend_file_sof, tag))) { String msg(L"Error: could not read configuration from file: "); msg.concat(fend_file_d); Console::put(msg); } fend_file_sof.close(); sdb_fend_files.gotoNext(); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no front-end file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // temp language model object. will be converted to IHD once loaded // and assigned to the imp_lm_d object // LanguageModel temp_lm, temp_imp_lm; // check the language model // if (lm_file_d.length() == 0) { return Error::handle(name(), L"load - invalid language model file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading language model: "); output.concat(lm_file_d); Console::put(output); Console::decreaseIndention(); } // check the statistical model pool // if (smp_file_d.length() == 0) { return Error::handle(name(), L"load - invalid statistical model pool file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading statistical model pool: "); output.concat(smp_file_d); Console::put(output); Console::decreaseIndention(); } // load the models and set them to the search engine // Sof lm_file; lm_file.open(lm_file_d, File::READ_ONLY); if (!temp_lm.read(lm_file)) { return Error::handle(name(), L"load: loading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(temp_lm.getRuleModel()); search_engine_d.setHDigraph(lm_d.getIHD()); lm_file.close(); // check the imposter language model // if( imp_lm_file_d.length() > 0 && imp_smp_file_d.length() > 0){ if (imp_lm_file_d.length() == 0) { return Error::handle(name(), L"load - invalid imposter language model file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading imposter language model: "); output.concat(imp_lm_file_d); Console::put(output); Console::decreaseIndention(); } // check the imposter statistical model pool // if (imp_smp_file_d.length() == 0) { return Error::handle(name(), L"load - invalid imposter statistical model pool file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading imposter statistical model pool: "); output.concat(imp_smp_file_d); Console::put(output); Console::decreaseIndention(); } Sof imp_lm_file; imp_lm_file.open(imp_lm_file_d, File::READ_ONLY); // load the models and set them to the search engine // if (!temp_imp_lm.read(imp_lm_file)) { return Error::handle(name(), L"load: loading model file", Error::ARG, __FILE__, __LINE__); } imp_lm_d.setRuleModel(temp_imp_lm.getRuleModel()); imp_search_engine_d.setHDigraph(imp_lm_d.getIHD()); imp_lm_file.close(); } // get the num of levels // num_levels_d = lm_d.getIHD().length(); // load the statistical model pool // Sof smp_file; int32 smp_level; if (!smp_file.open(smp_file_d, File::READ_ONLY)) { smp_file_d.debug(L"smp_file_d"); return Error::handle(name(), L"error opening statistical model pool", Error::ARG, __FILE__, __LINE__); } smp_level = search_engine_d.getHDigraph().length() - 1; search_engine_d.getHDigraph()(smp_level).loadStatisticalModels(smp_file, smp_level); // set statistical models to the search nodes in the last level // if(!search_engine_d.getHDigraph()(smp_level).connectStatisticalModels()) { return Error::handle(name(), L"load", Error::ARG, __FILE__, __LINE__); } // close statistical model pool file // smp_file.close(); // load the imposter statistical model pool // Sof imp_smp_file; int32 imp_smp_level; if (!imp_smp_file.open(imp_smp_file_d, File::READ_ONLY)) { imp_smp_file_d.debug(L"imp_smp_file_d"); return Error::handle(name(), L"error opening statistical model pool", Error::ARG, __FILE__, __LINE__); } imp_smp_level = imp_search_engine_d.getHDigraph().length() - 1; imp_search_engine_d.getHDigraph()(imp_smp_level).loadStatisticalModels(imp_smp_file, imp_smp_level); // set imposter statistical models to the search nodes in the last level // if(!imp_search_engine_d.getHDigraph()(imp_smp_level).connectStatisticalModels()) { return Error::handle(name(), L"load", Error::ARG, __FILE__, __LINE__); } // close impoter statistical model pool file imp_smp_file.close(); // check the num of levels // if( imp_lm_file_d.length() > 0 && imp_smp_file_d.length() > 0){ if (num_levels_d != imp_lm_d.getIHD().length()) { return Error::handle(name(), L"load - the number of levels of the imposter models is not identical to that of the speaker models", Error::ARG, __FILE__, __LINE__); } } // set up the number of levels for the search engines // search_engine_d.setNumLevels(num_levels_d); if( imp_lm_file_d.length() > 0 && imp_smp_file_d.length() > 0 ){ imp_search_engine_d.setNumLevels(num_levels_d); } // load the configuration parameters (optional) // if (cnfg_file_d.length() > 0) { // open the configuration file // Sof cnfg_file_sof; if (!cnfg_file_sof.open(cnfg_file_d)) { return Error::handle(name(), L"load - unable to open configuration file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading configuration file: "); output.concat(cnfg_file_d); Console::put(output); Console::decreaseIndention(); } // have the search levels read in its own configuration information // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { // configure Viterbi search engine // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.read(cnfg_file_sof, curr_level); // load the ngram language model // if (search_level.useNSymbol()) { if (!search_level.loadNSymbolModel()) { return Error::handle(name(), L"load - error reading ngram language model", Error::ARG, __FILE__, __LINE__); } } } // have the search levels read in its own configuration information // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { // configure Viterbi search engine // SearchLevel& search_level = imp_search_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.read(cnfg_file_sof, curr_level); // load the ngram language model // if (search_level.useNSymbol()) { if (!search_level.loadNSymbolModel()) { return Error::handle(name(), L"load - error reading ngram language model", Error::ARG, __FILE__, __LINE__); } } } // close the configuration files // cnfg_file_sof.close(); } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no configuration file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // make sure the correct number of levels were specified // if ((int32)num_levels_d != search_engine_d.getHDigraph().length()) { return Error::handle(name(), L"load - invalid number of levels specified", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 level = 0; level < (int32)num_levels_d; level++) { SearchLevel& search_level = search_engine_d.getSearchLevel(level); // set the initial level of the search hierarchy // if (search_level.getLevelTag().eq(transcription_level_d)) { initial_level_d = level; } // set the forced-alignment level // if (search_level.getLevelTag().eq(force_alignment_level_d)) { alignment_level_d = level; } } // loop over all search levels and verify that the context length // is not greater than one is skip symbols are used // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the search levels // Long curr_level_index(i); SearchLevel& curr_level = search_engine_d.getSearchLevel((int32)curr_level_index); // are we using context at this level? // if (curr_level.useContext()) { // is the right context length greater than one? // if (curr_level.getRightContext() > 1) { // are we using skip symbols at this level // if (curr_level.getSkipSymbolTable().length() > 0) { curr_level_index.debug(L"current search level"); return Error::handle(name(), L"load - using skip symbols with a right context length greater than one is not supported", Error::ARG, __FILE__, __LINE__); } } } } // make sure the correct number of levels were specified // if( imp_lm_file_d.length() > 0 && imp_smp_file_d.length() > 0){ if ((int32)num_levels_d != imp_search_engine_d.getHDigraph().length()) { return Error::handle(name(), L"load - invalid number of levels specified", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 level = 0; level < (int32)num_levels_d; level++) { SearchLevel& search_level = imp_search_engine_d.getSearchLevel(level); // set the initial level of the search hierarchy // if (search_level.getLevelTag().eq(transcription_level_d)) { initial_level_d = level; } // set the forced-alignment level // if (search_level.getLevelTag().eq(force_alignment_level_d)) { alignment_level_d = level; } } // loop over all search levels and verify that the context length // is not greater than one is skip symbols are used // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the search levels // Long curr_level_index(i); SearchLevel& curr_level = imp_search_engine_d.getSearchLevel((int32)curr_level_index); // are we using context at this level? // if (curr_level.useContext()) { // is the right context length greater than one? // if (curr_level.getRightContext() > 1) { // are we using skip symbols at this level // if (curr_level.getSkipSymbolTable().length() > 0) { curr_level_index.debug(L"current search level"); return Error::handle(name(), L"load - using skip symbols with a right context length greater than one is not supported", Error::ARG, __FILE__, __LINE__); } } } } } // after reading the search level tags from language models, we may // set the output levels // if (output_levels_str_d.length() > 0) { parseLevels(output_levels_str_d, output_levels_d); } else { // set default output level as the top level // output_levels_d.setLength((int32)num_levels_d); output_levels_d(0) = ON; } // after reading the search level tags from language models, we may // set the update levels // if (update_levels_str_d.length() > 0) { parseLevels(update_levels_str_d, update_levels_d); } else { // set default update level as the bottom level // update_levels_d.setLength((int32)num_levels_d); update_levels_d((int32)num_levels_d - 1) = ON; } // gracefully exit // return true; } // method: verify // // arguments: // Sdb& sdb:(input) signal data base // // return: a bool8 value indicating status // // this is the verify method to branch on either verifyl or verifylr // bool8 VerifyHMM::verify(Sdb& sdb_a) { if (algorithm_d == VERIFY) { // branch on to implement either LIKELIHOOD or LIKELITOOD_RATIO // if (implementation_d == LIKELIHOOD) { verifyl(sdb_a); } else if (implementation_d == LIKELIHOOD_RATIO) { verifylr(sdb_a); } else { return Error::handle(name(), L"verify", Error::ARG, __FILE__, __LINE__); } } else { return Error::handle(name(), L"verify", Error::ARG, __FILE__, __LINE__); } // gracefully exit // return true; } // method: verifylr // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the method to verify using speaker and imposter models // bool8 VerifyHMM::verifylr(Sdb& sdb_a) { // check for algorithm and implementation // if ((algorithm_d != VERIFY) || (implementation_d != LIKELIHOOD_RATIO)) { return Error::handle(name(), L"verify", Error::ARG, __FILE__, __LINE__); } // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Vector data; TranscriptionDatabase trans_db; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); imp_search_engine_d.setInitialLevel(initial_level_d); imp_search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // when the output mode is FILE // File total_output_file; if (output_mode_d == FILE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses // if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // when the output mode is DATABASE // Sof output_db_sof; if (output_mode_d == DATABASE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses (TEXT) // if (output_type_d == TEXT) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type TEXT // open the output file for the utterance hypotheses (BINARY) // if (output_type_d == BINARY) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY, File::BINARY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type BINARY } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } if (!imp_search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); imp_search_engine_d.linearDecoder(vector_fe_d); } // close the FrontEnds // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses, hypotheses2; float64 score = 0, score2 = 0; int32 num_frames = 0, num_frames2 = 0; DoubleLinkedList trace_path, trace_path2; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path) || !imp_search_engine_d.getHypotheses(hypotheses2, alignment_level_d, score2, num_frames2, trace_path2)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); hypotheses2.clear(); trace_path2.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nhyp2: "); output.concat(hypotheses2); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); output.concat(L"\nscore2: "); output.concat(score2); output.concat(L" frames: "); output.concat(num_frames2); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } float32 mean_score, mean_score2; String hypo; // compute the mean likelihood scores // mean_score = score / num_frames; mean_score2 = score2 / num_frames2; // make a binary decision // decision_thresholds_d(0) indicates the absolute threshold // below which a mean likelihood score is rejected; and // decision_thresholds_d(1) indicates the ratio threshold // below which the difference between the mean likelihhood // scores of the speaker models and imposter models is rejected // if (mean_score > (float32)decision_thresholds_d(0) && (mean_score - mean_score2) > (float32)decision_thresholds_d(1)) { hypo.assign(L"ACCEPTED: "); } else { hypo.assign(L"REJECTED: "); } hypo.concat(mean_score - mean_score2); if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypo); // close the output file // output_file.close(); } if (output_mode_d == DATABASE) { String name_00(identifier); String gtype_00(identifier); AnnotationGraph output_graph(name_00, gtype_00); // convert the best search path to an annotation graph // if (!createAnnotationGraph(output_graph, trace_path)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // prune the annotation graph according to output levels // pruneAnnotationGraph(output_graph); // write the annotation graph to the database // trans_db.storePartial(output_db_sof, current_file_num - 1, output_graph); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypo); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypo); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); imp_search_engine_d.clear(); // close database files // if (output_mode_d == DATABASE) { output_db_sof.close(); } // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } // gracefully exit // return true; } // method: verifyl // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is a method to verify using speaker models // bool8 VerifyHMM::verifyl(Sdb& sdb_a) { // check for algorithm and implementation // if ((algorithm_d != VERIFY) || (implementation_d != LIKELIHOOD)) { return Error::handle(name(), L"verify", Error::ARG, __FILE__, __LINE__); } // declare local variables // int32 num_valid_files = (int32)0; int32 current_file_num = (int32)0; int32 total_num_frames = (int32)0; String identifier; Filename input_file_name; Filename input_ID; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; String output; Filename output_file_name; Sdb output_sdb; Vector data; TranscriptionDatabase trans_db; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // when the output mode is FILE // File total_output_file; if (output_mode_d == FILE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses // if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // when the output mode is DATABASE // Sof output_db_sof; if (output_mode_d == DATABASE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses (TEXT) // if (output_type_d == TEXT) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type TEXT // open the output file for the utterance hypotheses (BINARY) // if (output_type_d == BINARY) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY, File::BINARY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type BINARY } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // close the FrontEnds // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } float32 mean_score; String hypo; // compute the mean likelihood scores // mean_score = score / num_frames; // make a binary decision decision_thresholds_d(0) indicates the // absolute threshold below which a mean likelihood score is // rejected // if (mean_score > (float32)decision_thresholds_d(0)) { hypo.assign(L"ACCEPTED: "); } else { hypo.assign(L"REJECTED: "); } hypo.concat(mean_score); if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypo); // close the output file // output_file.close(); } if (output_mode_d == DATABASE) { String name_00(identifier); String gtype_00(identifier); AnnotationGraph output_graph(name_00, gtype_00); // convert the best search path to an annotation graph // if (!createAnnotationGraph(output_graph, trace_path)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // prune the annotation graph according to output levels // pruneAnnotationGraph(output_graph); // write the annotation graph to the database // trans_db.storePartial(output_db_sof, current_file_num - 1, output_graph); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypo); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypo); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); imp_search_engine_d.clear(); // close database files // if (output_mode_d == DATABASE) { output_db_sof.close(); } // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } // gracefully exit // return true; } // method: run // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method // bool8 VerifyHMM::run(Sdb& sdb_a) { // check the input parameters // if (!checkParams()) { return Error::handle(name(), L"run - invalid paramaters", Error::ARG, __FILE__, __LINE__); } // branch on VERIFY // if (algorithm_d == VERIFY) { verify(sdb_a); } // end of VERIFY // branch on TRAIN using BAUM_WELCH // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH) { HiddenMarkovModel::setAlgorithm(HiddenMarkovModel::TRAIN); HiddenMarkovModel::setImplementation(HiddenMarkovModel::BAUM_WELCH); HiddenMarkovModel::linearDecoder(sdb_a); } // end of TRAIN using BAUM_WELCH // branch on MODEL_CREATION using GLOBAL // else if (algorithm_d == MODEL_CREATION && implementation_d == GLOBAL) { HiddenMarkovModel::setAlgorithm(HiddenMarkovModel::MODEL_CREATION); HiddenMarkovModel::setImplementation(HiddenMarkovModel::GLOBAL); HiddenMarkovModel::run(sdb_a); } // end of MODEL_CREATION using GLOBAL // branch on INITIALIZE using GLOBAL // else if (algorithm_d == INITIALIZE && implementation_d == GLOBAL) { HiddenMarkovModel::setAlgorithm(HiddenMarkovModel::INITIALIZE); HiddenMarkovModel::setImplementation(HiddenMarkovModel::GLOBAL); HiddenMarkovModel::run(sdb_a); } // end of INITIALIZE using GLOBAL // branch on MIXTURE_SPLITTING using VARIANCE_SPLITTING // does not support streaming input // else if (!stream_d && algorithm_d == MIXTURE_SPLITTING && implementation_d == VARIANCE_SPLITTING) { HiddenMarkovModel::setAlgorithm(HiddenMarkovModel::MIXTURE_SPLITTING); HiddenMarkovModel::setImplementation(HiddenMarkovModel::VARIANCE_SPLITTING); HiddenMarkovModel::run(sdb_a); } // end of MIXTURE_SPLITING using VARIANCE_SPLITTING // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid algorithm and implementation", Error::ARG, __FILE__, __LINE__); } // end of the UNKNOWN options // exit gracefully // return true; }