// file: $isip/class/pr/HiddenMarkovModel/hmm_05.cc // version: $Id: hmm_05.cc 10498 2006-03-15 21:42:32Z may $ // // isip include files // #include "HiddenMarkovModel.h" #include #include #include // method: store // // arguments: none // // return: a bool8 value indicating status // // this method stores the HMM models // bool8 HiddenMarkovModel::store() { // declare sof object for language model file // Sof update_lm_file; LanguageModel lm; if (output_format_d == IHD) { lm.setFormat(LanguageModel::IHD); } else if (output_format_d == JSGF) { lm.setFormat(LanguageModel::JSGF); } else if (output_format_d == XML) { lm.setFormat(LanguageModel::XML); } else if (output_format_d == BNF) { lm.setFormat(LanguageModel::BNF); } else if (output_format_d == ABNF) { lm.setFormat(LanguageModel::ABNF); } else { lm.setFormat(LanguageModel::IHD); } lm.setRuleModel(lm_d.getRuleModel()); if (output_type_d == TEXT) { update_lm_file.open(update_lm_file_d, File::WRITE_ONLY, File::TEXT); } else { update_lm_file.open(update_lm_file_d, File::WRITE_ONLY, File::BINARY); } if (!lm.write(update_lm_file)) { return Error::handle(name(), L"store - error writing models to file", Error::ARG, __FILE__, __LINE__); } // close the updated language model pool file // update_lm_file.close(); // store the statistical models // Sof update_smp_file; if (output_type_d == TEXT) { update_smp_file.open(update_smp_file_d, File::WRITE_ONLY, File::TEXT); } else { update_smp_file.open(update_smp_file_d, File::WRITE_ONLY, File::BINARY); } if (lm_d.getIHD()(lm_d.getIHD().length() - 1).getStatisticalModels().length() == 0) { Console::put(L"Warning: no statistical models available in the last search level."); } HashTable& hash_table = lm_d.getIHD()(lm_d.getIHD().length() - 1).getSymbolHashTable(); Vector& stat_models = lm_d.getIHD()(lm_d.getIHD().length() - 1).getStatisticalModels(); hash_table.write(update_smp_file, lm_d.getIHD().length() - 1, SearchLevel::PARAM_STAT_HASH); stat_models.write(update_smp_file, lm_d.getIHD().length() - 1, SearchLevel::PARAM_STAT); update_smp_file.close(); // write the symbol occupancy for each SearchLevel (optional) // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { // have the search levels write the symbol occupancies // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); search_level.storeOccupancies(); } // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"store", Error::ARG, __FILE__, __LINE__); } } // close the symbol graph database (optional) // if (symbol_graph_db_file_d.length() > 0) { if (!symbol_graph_db_d.close()) { return Error::handle(name(), L"store", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"store", Error::ARG, __FILE__, __LINE__); } } // exit gracefully // return true; } // method: parseLevels // // const String& source: (input) levels string // VectorByte& vector: (output) string converted to a vector // // return: a bool8 value indicating status // // this method converts the string to a vector of level // indices after configuring SearchLevels // bool8 HiddenMarkovModel::parseLevels(const String& source_a, VectorByte& vector_a) { // allocate space to the number of levels in the search space // vector_a.setLength(num_levels_d); // pad with all OFF's // vector_a.assign((byte8)OFF); // count the number of sub-strings separated by ',' // int32 tokens = source_a.countTokens(L','); // get all the sub-strings separated by ',' // String vals[tokens]; for (int32 i = 0, pos = 0; i < tokens; i++) { source_a.tokenize(vals[i], pos, L","); } // compute the number of all the sub-strings (levels) in input // string seperated by the delimiter ':' // int32 num = 0; for (int32 i = 0; i < tokens; i++) { num += vals[i].countTokens(L':'); } // local variables // Vector levels; // loop through all the sub-strings // for (int32 i = 0; i < tokens; i++) { // local variables // String level; int32 pos = 0; // clear the levels from the previous loop // levels.clear(Integral::RELEASE); // parse the input in the format based on the endlimiter ":" // while (vals[i].tokenize(level, pos, L":")) { // get rid of white spaces if any // level.trim(); // get the levels in numericals // int32 len = levels.length(); levels.setLength(len + (int32)1); levels(len).assign(level); } // local variables // VectorLong indices; int32 len = levels.length(); int32 count = 0; indices.setLength(len); // get the level values corresponding to the SearchLevel tags // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { if (algorithm_d == DECODE && implementation_d == STACK) { SearchLevel& search_level = stack_engine_d.getSearchLevel(curr_level); for (int32 i = 0; i < len; i++) { if ( levels(i).eq(search_level.getLevelTag())) { indices(count) = curr_level; count++; continue; } } } else { SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); for (int32 i = 0; i < len; i++) { if ( levels(i).eq(search_level.getLevelTag())) { indices(count) = curr_level; count++; continue; } } } } // find the local min and maximium index // int32 min_index = indices.min(); int32 max_index = indices.max(); // get the missing levels between the min and max indices, and set // them to ON // for (int32 j = min_index; j <= max_index; j++) { vector_a(j) = ON; } } // exit gracefully // return true; } // method: load // // arguments: none // // return: a bool8 value indicating status // // this method loads the HMM models // bool8 HiddenMarkovModel::load() { // declare local variables // String output; // language model to load // LanguageModel lm; // load the audio database (optional) // if (audio_db_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading audio database: "); output.concat(audio_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the audio database // Filename audio_db_file(audio_db_file_d); if (!audio_db_d.open(audio_db_file)) { audio_db_file_d.debug(L"audio_db_file_d"); return Error::handle(name(), L"run: open audio database file", Error::ARG, __FILE__, __LINE__); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no audio database file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // load the symbol graph database (optional) // if (symbol_graph_db_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading symbol graph database: "); output.concat(symbol_graph_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the symbol_graph database // if (!symbol_graph_db_d.open(symbol_graph_db_file_d)) { symbol_graph_db_file_d.debug(L"symbol_graph_db_file_d"); return Error::handle(name(), L"run: open symbol graph database file", Error::ARG, __FILE__, __LINE__); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no symbol graph database file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // load the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading transcription database: "); output.concat(transcription_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the transcriptions // Filename transcription_db_file(transcription_db_file_d); if (!transcription_db_d.open(transcription_db_file)) { transcription_db_file_d.debug(L"transcription_db_file_d"); return Error::handle(name(), L"error opening transcription database file", Error::ARG, __FILE__, __LINE__); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no transcription database file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // load the front-end parameters (optional) // if (fend_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading front-end: "); output.concat(fend_file_d); Console::put(output); Console::decreaseIndention(); } Sof fend_file_sof; Sdb sdb_fend_files; int32 len = 0; int32 tag = 0; Filename current_fe_file; // open the front end for reading // if (!fend_file_sof.open(fend_file_d)) { return Error::handle(name(), L"load - unable to open front-end file", Error::ARG, __FILE__, __LINE__); } // check if the recipe file is a file list or just a recipe file // if (Sdb::isSdb(fend_file_d)) { sdb_fend_files.append(fend_file_d, true); } else { sdb_fend_files.append(fend_file_d); } fend_file_sof.close(); // get the number of FE files // do { len++; } while (sdb_fend_files.gotoNext()); vector_fe_d.setLength(len); // start from the first FE // sdb_fend_files.gotoFirst(); // loop over all FE files // for (int32 num_fe = 0; num_fe < len; num_fe++) { // get the current FE file name // sdb_fend_files.getName(current_fe_file); // open the FE file for reading // if (!fend_file_sof.open(current_fe_file)) { String msg(L"Error: could not open FrontEnd file: "); msg.concat(current_fe_file); Console::put(msg); } // read the configuration of the FrontEnd from the parameter file // if (((tag = fend_file_sof.first(FrontEnd::name())) == Sof::NO_TAG) || (!vector_fe_d(num_fe).read(fend_file_sof, tag))) { String msg(L"Error: could not read configuration from file: "); msg.concat(fend_file_d); Console::put(msg); } fend_file_sof.close(); sdb_fend_files.gotoNext(); } } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no front-end file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // check the language model // if (lm_file_d.length() == 0) { return Error::handle(name(), L"load - invalid language model file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading language model: "); output.concat(lm_file_d); Console::put(output); Console::decreaseIndention(); } Sof lm_file; if (!lm_file.open(lm_file_d, File::READ_ONLY)) { lm_file_d.debug(L"lm_file_d"); return Error::handle(name(), L"error opening language model file", Error::ARG, __FILE__, __LINE__); } // load the models and set them to the search engine // if (!(algorithm_d == DECODE && implementation_d == STACK)) { if (!lm.read(lm_file)) { return Error::handle(name(), L"read: reading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(lm.getRuleModel()); search_engine_d.setHDigraph(lm_d.getIHD()); } else { if (!lm.read(lm_file)) { return Error::handle(name(), L"read: reading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(lm.getRuleModel()); stack_engine_d.setHDigraph(lm_d.getIHD()); } lm_file.close(); // set the num of levels // num_levels_d = lm_d.getIHD().length(); // check the statistical model pool // if (smp_file_d.length() == 0) { return Error::handle(name(), L"load - invalid statistical model pool file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading statistical model pool: "); output.concat(smp_file_d); Console::put(output); Console::decreaseIndention(); } // load the statistical model pool // Sof smp_file; int32 smp_level; if (!smp_file.open(smp_file_d, File::READ_ONLY)) { smp_file_d.debug(L"smp_file_d"); return Error::handle(name(), L"error opening statistical model pool", Error::ARG, __FILE__, __LINE__); } if (!(algorithm_d == DECODE && implementation_d == STACK)) { smp_level = search_engine_d.getHDigraph().length() - 1; search_engine_d.getHDigraph()(smp_level).loadStatisticalModels(smp_file, smp_level); // set statistical models to the search nodes in the last level // if(!search_engine_d.getHDigraph()(smp_level).connectStatisticalModels()) { return Error::handle(name(), L"load", Error::ARG, __FILE__, __LINE__); } } else { smp_level = stack_engine_d.getHDigraph().length() - 1; stack_engine_d.getHDigraph()(smp_level).loadStatisticalModels(smp_file, smp_level); // set statistical models to the search nodes in the last level // if(!stack_engine_d.getHDigraph()(smp_level).connectStatisticalModels()) { return Error::handle(name(), L"load", Error::ARG, __FILE__, __LINE__); } } // close statistical model pool file // smp_file.close(); // set up the number of levels for the search engine // if (!(algorithm_d == DECODE && implementation_d == STACK)) { search_engine_d.setNumLevels(num_levels_d); } else { stack_engine_d.setNumLevels(num_levels_d); Sof param_sof; param_sof.open(param_file_d); stack_engine_d.read(param_sof, 0); param_sof.close(); } // load the configuration parameters (optional) // if (cnfg_file_d.length() > 0) { // open the configuration file // Sof cnfg_file_sof; if (!cnfg_file_sof.open(cnfg_file_d)) { return Error::handle(name(), L"load - unable to open configuration file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading configuration file: "); output.concat(cnfg_file_d); Console::put(output); Console::decreaseIndention(); } // have the search levels read in its own configuration information // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { if (algorithm_d == DECODE && implementation_d == STACK) { // configure stack search engine // SearchLevel& search_level = stack_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.readConfig(cnfg_file_sof, curr_level); // load the ngram language model // if (search_level.useNSymbol()) { if (!search_level.loadNSymbolModel()) { return Error::handle(name(), L"load - error reading ngram language model", Error::ARG, __FILE__, __LINE__); } } } else { // configure Viterbi search engine // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.readConfig(cnfg_file_sof, curr_level); // load the ngram language model // if (search_level.useNSymbol()) { if (!search_level.loadNSymbolModel()) { return Error::handle(name(), L"load - error reading ngram language model", Error::ARG, __FILE__, __LINE__); } } } } // close the configuration files // cnfg_file_sof.close(); } else { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\n*** no configuration file was specified ***"); Console::put(output); Console::decreaseIndention(); } } // read the parameters for the time-synchronous (viterbi) decoder // if (!(algorithm_d == DECODE && implementation_d == STACK)) { // make sure the correct number of levels were specified // if ((int32)num_levels_d != search_engine_d.getHDigraph().length()) { return Error::handle(name(), L"load - invalid number of levels specified", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 level = 0; level < (int32)num_levels_d; level++) { SearchLevel& search_level = search_engine_d.getSearchLevel(level); // set the initial level of the search hierarchy // if (search_level.getLevelTag().eq(transcription_level_d)) { initial_level_d = level; } // set the initial level of the search hierarchy // if (search_level.getLevelTag().eq(symbol_graph_level_d)) { initial_level_d = level; } // set the forced-alignment level // if (search_level.getLevelTag().eq(force_alignment_level_d)) { alignment_level_d = level; } } // loop over all search levels and verify that the context length // is not greater than one is skip symbols are used // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the search levels // Long curr_level_index(i); SearchLevel& curr_level = search_engine_d.getSearchLevel((int32)curr_level_index); // are we using context at this level? // if (curr_level.useContext()) { // is the right context length greater than one? // if (curr_level.getRightContext() > 1) { // are we using skip symbols at this level // if (curr_level.getSkipSymbolTable().length() > 0) { curr_level_index.debug(L"current search level"); return Error::handle(name(), L"load - using skip symbols with a right context length greater than one is not supported", Error::ARG, __FILE__, __LINE__); } } } } } // read the parameters for the time-asynchronous (stack) decoder // else if (algorithm_d == DECODE && implementation_d == STACK) { // make sure the correct number of levels were specified // if ((int32)num_levels_d != stack_engine_d.getHDigraph().length()) { return Error::handle(name(), L"load - invalid number of levels specified", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 level = 0; level < (int32)num_levels_d; level++) { SearchLevel& search_level = stack_engine_d.getSearchLevel(level); // set the initial level // if (search_level.getLevelTag().eq(transcription_level_d)) { initial_level_d = level; } // set the initial level // if (search_level.getLevelTag().eq(symbol_graph_level_d)) { initial_level_d = level; } // set the alignment level // if (search_level.getLevelTag().eq(force_alignment_level_d)) { alignment_level_d = level; } } } // after reading the search level tags from language models, we may // set the output levels // if (output_levels_str_d.length() > 0) { parseLevels(output_levels_str_d, output_levels_d); } else { // set default output level as the top level // output_levels_d.setLength((int32)num_levels_d); output_levels_d(0) = ON; } // after reading the search level tags from language models, we may // set the update levels // if (update_levels_str_d.length() > 0) { parseLevels(update_levels_str_d, update_levels_d); } else { // set default update level as the bottom level // update_levels_d.setLength((int32)num_levels_d); update_levels_d((int32)num_levels_d - 1) = ON; } // gracefully exit // return true; } // method: writeNBest // // arguments: // Filename& output_file_name: (input) output file name // NBestPath* final_plist: (input) final partial path list // SymbolGraphNode* start_node: (input) symbol graph start node // // return: a bool8 value indicating status // // this method writes the n-best output hypothesis to file // bool8 HiddenMarkovModel::writeNBest(Filename& output_file_name_a, NBestPath* final_plist_a, SymbolGraphNode* start_node_a) { // declare local variables // Sof output_sof; File output_file; int32 frame_index = 0; int32 num_paths = 0; float32 lm_score = 0.0; float32 ac_score = 0.0; String output; SearchSymbol symbol; SearchSymbol hypothesis; SearchSymbol temp_string; SearchSymbol full_hypothesis; SymbolGraph symbol_graph; NBestNode* nnode = (NBestNode*)NULL; NBestNode* pnode = (NBestNode*)NULL; SymbolGraphNode* curr_node = (SymbolGraphNode*)NULL; SymbolGraphNode* prev_node = (SymbolGraphNode*)NULL; SymbolGraphNode* dst_node = (SymbolGraphNode*)NULL; SymbolGraphNode* src_node = (SymbolGraphNode*)NULL; HashTable hyp_table; // open the output file for writing // if (output_format_d == NATIVE) { if (!output_file.open(output_file_name_a, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name_a); Console::put(output); Console::decreaseIndention(); return false; } } if (output_format_d == SYMBOL_GRAPH) { if (!output_sof.open(output_file_name_a, File::WRITE_ONLY, File::TEXT)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name_a); Console::put(output); Console::decreaseIndention(); } } // get the partial paths list // SingleLinkedList& path_list = final_plist_a->getPaths(); // loop through this list and get the unique nbest hypothesis // bool8 more_paths = path_list.gotoFirst(); while (more_paths) { // retrieve the current nbest node and its corresponding symbol node // nnode = path_list.getCurr(); curr_node = nnode->getSymbolNode(); frame_index = curr_node->getFrameIndex(); curr_node->getSymbol(symbol); pnode = nnode; if (debug_level_d >= Integral::DETAILED) { Console::increaseIndention(); output.assign(L"\nsymbol: "); output.concat(symbol); Console::put(output); Console::decreaseIndention(); } // back track through the nbest nodes corresponding to this // partial path and populate the hypothesis // while (curr_node != start_node_a) { // add the symbol to the hypothesis // temp_string.assign(symbol); temp_string.concat(L" "); temp_string.concat(hypothesis); hypothesis.assign(temp_string); // retrieve the current nbest node and its corresponding symbol node // pnode = pnode->getPrevNode(); if (pnode == (NBestNode*)NULL) { return Error::handle(name(), L"writeNBest", Error::ARG, __FILE__, __LINE__); } curr_node = pnode->getSymbolNode(); if (curr_node == (SymbolGraphNode*)NULL) { return Error::handle(name(), L"writeNBest", Error::ARG, __FILE__, __LINE__); } // retrieve the frame index and the corresponding symbol // frame_index = curr_node->getFrameIndex(); curr_node->getSymbol(symbol); if (debug_level_d >= Integral::DETAILED) { Console::increaseIndention(); output.assign(L"\nhypothesis: "); output.concat(hypothesis); output.concat(L", frame: "); output.concat(frame_index); Console::put(output); Console::decreaseIndention(); } } // if the hypothesis already exists, remove this node from the path list // this can be done because the list already ordered // if (hyp_table.containsKey(hypothesis)) { // is this the last path in the list? // if (path_list.isLast()) { more_paths = false; } // remove the paths from the list // path_list.remove(nnode); // free allocated memory // delete nnode; nnode = (NBestNode*)NULL; } // add the hypothesis to the hash table // else { hyp_table.insert(hypothesis, &hypothesis); // move to the next path in the list // more_paths = path_list.gotoNext(); } // reset the hypothesis // hypothesis.clear(); } // loop through the partial path list and print the unique nbest hypothesis // for (bool8 more = path_list.gotoFirst(); more; more = path_list.gotoNext() && (num_paths < (int32)nbest_paths_d)) { // retrieve the current nbest node and its corresponding symbol node // nnode = path_list.getCurr(); if (nnode == (NBestNode*)NULL) { return Error::handle(name(), L"writeNBest", Error::ARG, __FILE__, __LINE__); } prev_node = (SymbolGraphNode*)NULL; curr_node = nnode->getSymbolNode(); if (curr_node == (SymbolGraphNode*)NULL) { return Error::handle(name(), L"writeNBest", Error::ARG, __FILE__, __LINE__); } // retrieve the frame index and the corresponding symbol // frame_index = curr_node->getFrameIndex(); curr_node->getSymbol(symbol); pnode = nnode; if (debug_level_d >= Integral::DETAILED) { Console::increaseIndention(); output.assign(L"\nsymbol: "); output.concat(symbol); Console::put(output); Console::decreaseIndention(); } // when the output format is NATIVE // if (output_format_d == NATIVE) { hypothesis.assign(nnode->getPathScore()); } // back track through the nbest nodes corresponding to this // partial path and populate the hypothesis // while (curr_node != start_node_a) { // append up the partial hypothesis // if (output_format_d == NATIVE) { temp_string.assign(symbol); temp_string.concat(L" "); temp_string.concat(hypothesis); hypothesis.assign(temp_string); } if (output_format_d == SYMBOL_GRAPH) { if (dst_node == (SymbolGraphNode*)NULL) { dst_node = symbol_graph.getTerm(); prev_node = start_node_a->getParentGraph()->getTerm(); } src_node = symbol_graph.insertNode(frame_index, symbol); lm_score = 0; ac_score = 0; curr_node->getScores(prev_node, lm_score, ac_score); src_node->insertNextNode(dst_node, lm_score, ac_score); dst_node = src_node; } // get the previous node/word // pnode = pnode->getPrevNode(); prev_node = curr_node; curr_node = pnode->getSymbolNode(); frame_index = curr_node->getFrameIndex(); curr_node->getSymbol(symbol); if (debug_level_d >= Integral::DETAILED) { Console::increaseIndention(); output.assign(L"\nhypothesis: "); output.concat(hypothesis); output.concat(L", frame: "); output.concat(frame_index); Console::put(output); Console::decreaseIndention(); } } // clear the current hypothesis string // if (output_format_d == NATIVE) { output_file.put(hypothesis); output_file.put(L"\n"); hypothesis.clear(); } if (output_format_d == SYMBOL_GRAPH) { src_node = symbol_graph.getStart(); lm_score = 0; ac_score = 0; curr_node->getScores(prev_node, lm_score, ac_score); src_node->insertNextNode(dst_node, lm_score, ac_score); } // increment number of paths hypothesized so far // num_paths++; } // when the output format is SYMBOL_GRAPH // if (output_format_d == SYMBOL_GRAPH) { symbol_graph.setScale(start_node_a->getParentGraph()->getScale()); symbol_graph.setPenalty(start_node_a->getParentGraph()->getPenalty()); symbol_graph.write(output_sof, 0); } // close the output file // if (output_format_d == NATIVE) { output_file.close(); } if (output_format_d == SYMBOL_GRAPH) { output_sof.close(); } // gracefully exit // return true; } // method: initialize // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this method initializes each model with the global mean and covariance // bool8 HiddenMarkovModel::initialize(Sdb& sdb_a) { // declare local variables // int32 num_feat = 0; int32 num_vect = 0; int32 num_adjacent = 0; int32 current_file_num = 0; String identifier; Filename input_file_name; Filename input_ID; Sof file_sof; VectorFloat mean; VectorFloat diagonal; VectorFloat feature; MatrixFloat covar; MatrixFloat temp_covar; String output; String train_path; String train_file; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop over all levels and update the ones specified // for (int32 i = 0; i < update_levels_d.length(); i++) { // determine if the mask for the current level is set // if (update_levels_d(i) == ON) { // retrieve the search level of the state level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // get the statistical models for the state level // Vector& stat_models = search_level.getStatisticalModels(); // initialize the statistical models // if ((update_mode_d == OBSERVATIONS) || (update_mode_d == ALL)) { // check if we really need to do this // if (stat_models.length() > 0) { // reset the models parameters to begin with // num_vect = 0; mean.clear(Integral::RETAIN); covar.clear(Integral::RETAIN); temp_covar.clear(Integral::RETAIN); for (int j = 0; j < stat_models.length(); j++) { stat_models(j).clear(Integral::RETAIN); } // accumulate the sufficient statistics for each model // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the top level with the corresponding transcription // we don't need a transcription database for segmented uttreances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // loop over the feature vectors in the file // for (int32 j = 0; j < vector_fe_d(vector_fe_d.length() - 1).getNumFrames(); j++) { // accumulate the number of features // num_vect++; // get the feature vector // vector_fe_d(i).getVector(feature, 0, j); // get the number of features for each feature vector // if (num_vect == 1) { num_feat = feature.length(); mean.setLength(num_feat); covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL); temp_covar.setDimensions(num_feat, num_feat, false, Integral::DIAGONAL); } // add the feature values to the mean // mean.add(feature); // store product of features into covariance metrix // for (int32 l = 0; l < num_feat; l++) { temp_covar.setValue(l, l, (float32)(feature(l) * feature(l))); } covar.add(temp_covar); // accumulate the sufficient statistics // for (int l = 0; l < stat_models.length(); l++) { stat_models(l).accumulate(feature); } } } // close all front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // compute the mean vector for the feature vectors // mean.div(num_vect); // compute the covariance matrix for the feature vectors // covar.div(num_vect - 1); for (int32 l = 0; l < num_feat; l++) { float32 value = covar.getValue(l, l) - (mean(l) * mean(l)); covar.setValue(l, l, value); } // retrieve the diagonal of the covariance matrix // covar.getDiagonal(diagonal); // loop over each statistical model and initialize the model // parameters // VectorFloat param(2); param(0) = (float32)num_feat; param(1) = (float32)num_vect; for (int32 i = 0; i < stat_models.length(); i++) { stat_models(i).initialize(param); } // write the variance floor file // diagonal.mult(variance_floor_d); file_sof.open(variance_floor_file_d, File::WRITE_ONLY); diagonal.write(file_sof, (int32)0); // close the variance floor file // file_sof.close(); } } // initialize the state transition probabilities // if ((update_mode_d == TRANSITIONS) || (update_mode_d == ALL)) { for (int j = 0; j < search_engine_d.getHDigraph()(i).getNumSubGraphs(); j++) { // retrieve the subgraph // DiGraph& subgraph = search_engine_d.getHDigraph()(i).getSubGraph((int32)j); // loop over each vertex adjacent to the start vertex // GraphVertex* start_vertex = subgraph.getStart(); num_adjacent = start_vertex->length(); for (bool8 more = start_vertex->gotoFirst(); more; more = start_vertex->gotoNext()) { GraphArc* vertex_arc = start_vertex->getCurr(); if (num_adjacent == 1) { vertex_arc->setWeight(0); } else { vertex_arc->setWeight(-log(num_adjacent)); } } // loop over each vertex in the subgraph // for (bool8 more = subgraph.gotoFirst(); more; more = subgraph.gotoNext()) { GraphVertex* vertex = const_cast* >(subgraph.getCurr()); num_adjacent = vertex->length(); for (bool8 more = vertex->gotoFirst(); more; more = vertex->gotoNext()) { GraphArc* vertex_arc = vertex->getCurr(); if (num_adjacent == 1) { vertex_arc->setWeight(0); } else { vertex_arc->setWeight(-log(num_adjacent)); } } } } } } } // write the models to file // if (!store()) { return Error::handle(name(), L"initialize", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: linearDecoder // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method for a linear decoder // bool8 HiddenMarkovModel::linearDecoder(Sdb& sdb_a) { // branch on the NBEST_GENERATION using VITERBI_NBEST // if (algorithm_d == NBEST_GENERATION && implementation_d == VITERBI_NBEST) { // declare local variables // int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename symbol_graph_file_name; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Sof symbol_graph_sof; // load the symbol graph database (optional) // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nloading symbol graph database: "); output.concat(symbol_graph_db_file_d); Console::put(output); Console::decreaseIndention(); } // load the symbol_graph database // if (!symbol_graph_db_d.open(symbol_graph_db_file_d)) { symbol_graph_db_file_d.debug(L"symbol_graph_db_file_d"); return Error::handle(name(), L"linearDecoder - could not open symbol graph database file", Error::ARG, __FILE__, __LINE__); } // we only support the LIST and TRANSFORM mode for nbest generation // if ((output_mode_d == FILE) || (output_mode_d == DATABASE) || (output_mode_d == TRANSFORM)) { return Error::handle(name(), L"linearDecoder - output mode is not supported for this algorithm", Error::ARG, __FILE__, __LINE__); } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get the symbol graph file name // identifier.assign(input_ID); if (!symbol_graph_db_d.getRecord(identifier, symbol_graph_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(symbol_graph_file_name); Console::put(output); Console::decreaseIndention(); } // open the symbol graph file // if (!symbol_graph_sof.open(symbol_graph_file_name, File::READ_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(symbol_graph_file_name); Console::put(output); Console::decreaseIndention(); } // read the symbol graph from file // SymbolGraph symbol_graph; if (!symbol_graph.read(symbol_graph_sof, 0)) { symbol_graph_file_name.debug(L"symbol_graph_file_name"); return Error::handle(name(), L"linearDecoder - cannot read symbol graph file", Error::ARG, __FILE__, __LINE__); } // close the file descriptor // symbol_graph_sof.close(); // find the length of the utterance by searching all lattice nodes // for the maximum time index // int32 max_frame = -1; int32 frame_index = -1; SingleLinkedList& nodes = symbol_graph.getNodes(); // loop over all nodes in the symbol graph // for (bool8 more = nodes.gotoFirst(); more; more = nodes.gotoNext()) { // retrieve the current node and the corresponding frame index // SymbolGraphNode* snode = nodes.getCurr(); frame_index = snode->getFrameIndex(); // set the maximum frame index // if (max_frame < frame_index) { max_frame = frame_index; } } total_num_frames += max_frame; // initialize max frame number of path lists (assuming a zero based frame // indexing scheme) // NBestPath** plist = new NBestPath*[max_frame + 1]; for (int32 i = 0; i <= max_frame; i++) { plist[i] = new NBestPath(); } // get the start node and populate the path list for the zero'th frame // SymbolGraphNode* start_node = symbol_graph.getStart(); start_node->setFrameIndex(0); NBestNode* nnode = new NBestNode(start_node, (NBestNode*)NULL, (float32)0.0); plist[0]->insertPath(nnode); // process path lists for each frame except the last frame since paths // terminate there // for (int32 i = 0; i < max_frame; i++) { // need to grow the list only if partial paths exist // if (plist[i]->getNumPaths() != 0) { plist[i]->setFrameIndex(i); plist[i]->prune((int32)nbest_beam_d, (int32)max_nbest_paths_d); plist[i]->growPaths(plist, symbol_graph.getScale(), symbol_graph.getPenalty(), (int32)max_nbest_paths_d, (int32)nbest_beam_d); } } // when the output mode is a LIST // if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); // write the nbest hypothesis // if (!writeNBest(output_file_name, plist[max_frame], start_node)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // free allocated memory // for (int32 i = 0; i <= max_frame; i++) { delete plist[i]; } delete [] plist; } // close the symbol graph database (optional) // if (!symbol_graph_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(current_file_num); output.concat(L" file(s) successfully, "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of NBEST_GENERATION using VITERBI_NBEST // branch on the SYMBOL_GRAPH_RESCORING using RESCORE_LANGUAGE // else if (algorithm_d == SYMBOL_GRAPH_RESCORING && implementation_d == RESCORE_LANGUAGE) { return Error::handle(name(), L"algorithm and implementation is not implemented", Error::ARG, __FILE__, __LINE__); } // end of SYMBOL_GRAPH_RESCORING using RESCORE_LANGUAGE // branch on the SYMBOL_GRAPH_RESCORING using RESCORE_ACOUSTIC // or RESCORE_BOTH // else if (algorithm_d == SYMBOL_GRAPH_RESCORING && (implementation_d == RESCORE_ACOUSTIC || implementation_d == RESCORE_BOTH)) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename symbol_graph_file_name; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Vector data; TranscriptionDatabase trans_db; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // set the rescoring mode to read the language model score from file // if (implementation_d == RESCORE_ACOUSTIC) { search_engine_d.getHDigraph()(initial_level_d).setRescore(true); } // when the output mode is FILE // File total_output_file; if (output_mode_d == FILE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses // if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // when the output mode is DATABASE // Sof output_db_sof; if (output_mode_d == DATABASE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses (TEXT) // if (output_type_d == TEXT) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type TEXT // open the output file for the utterance hypotheses (BINARY) // if (output_type_d == BINARY) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY, File::BINARY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type BINARY } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // get the symbol graph path // if (!symbol_graph_db_d.getRecord(identifier, symbol_graph_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // read the symbol graph and initialize the lattice // if (symbol_graph_db_file_d.length() > 0) { initSymbolGraph(symbol_graph_file_name); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // close the FrontEnds // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypotheses); // close the output file // output_file.close(); } if (output_mode_d == DATABASE) { String name_00(identifier); String gtype_00(identifier); AnnotationGraph output_graph(name_00, gtype_00); // convert the best search path to an annotation graph // if (!createAnnotationGraph(output_graph, trace_path)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // prune the annotation graph according to output levels // pruneAnnotationGraph(output_graph); // write the annotation graph to the database // trans_db.storePartial(output_db_sof, current_file_num - 1, output_graph); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypotheses); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypotheses); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); // close database files // if (output_mode_d == DATABASE) { output_db_sof.close(); } // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the audio database (optional) // if (symbol_graph_db_file_d.length() > 0) { if (!symbol_graph_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of SYMBOL_GRAPH_RESCORING using RESCORE_ACOUSTIC or RESCORE_BOTH // branch on the SYMBOL_GRAPH_GENERATION using WORD_DEPENDENT_NBEST // else if (algorithm_d == SYMBOL_GRAPH_GENERATION && implementation_d == WORD_DEPENDENT_NBEST) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"linearDecoder - error loading models", Error::ARG, __FILE__, __LINE__); } // we only support the LIST and TRANSFORM mode for symbol graph generation // if ((output_mode_d == FILE) || (output_mode_d == DATABASE)) { return Error::handle(name(), L"linearDecoder - output mode is not supported for this algorithm", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"linearDecoder - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // close the FrontEnds // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // initialize the symbol graph // float64 score = 0; int32 num_frames = 0; if (search_engine_d.initializeSymbolGraphTrace(score, num_frames)) { num_valid_files++; total_num_frames += num_frames; } else { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // generate the symbol graph // if (!search_engine_d.generateSymbolGraph()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"score: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // when the output mode is a TRANSFORM // if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // retrieve the symbol graph // SymbolGraph& symbol_graph = search_engine_d.getSymbolGraph(); Sof tmp_sof; if (!tmp_sof.open(output_file_name, File::WRITE_ONLY, File::TEXT)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } // write the symbol graph to file // symbol_graph.write(tmp_sof, 0); tmp_sof.close(); } // when the output mode is a LIST // if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); // retrieve the symbol graph // SymbolGraph& symbol_graph = search_engine_d.getSymbolGraph(); Sof tmp_sof; if (!tmp_sof.open(output_file_name, File::WRITE_ONLY, File::TEXT)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } // write the symbol graph to file // symbol_graph.write(tmp_sof, 0); tmp_sof.close(); } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of SYMBOL_GRAPH_GENERATION using WORD_DEPENDENT_NBEST // added for word-internal context generation // branch on the CONTEXT_GENERATION using SYMBOL_GENERATION // does not support streaming input. // else if (algorithm_d == CONTEXT_GENERATION && !stream_d && implementation_d == SYMBOL_GENERATION && function_mode_d == NONE) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; String output; Filename input_ID; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engines target context level // search_engine_d.setContextLevel(context_level_d); // set the search engines mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // initialize the context generation // if (!search_engine_d.initializeContextGeneration()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // loop through each input utterance // current_file_num = 0; for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { hypotheses.clear(); instance_path.clear(); num_valid_files++; total_num_frames += num_frames; } // close all the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // write the context dependent symbols // Vector& context_list = search_engine_d.getContextList(); // open the context list // Sof input_sof; Sdb symbol_list_sdb; if (!input_sof.open(context_list_d, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - unable to open context list", Error::ARG, __FILE__, __LINE__); } // loop over each element in the symbol list and append it to the sdb // String tmp_str; SearchSymbol symbol_context; for (int32 i = 0; i < context_list.length(); i++) { symbol_context.assign(context_list(i)); tmp_str.assign(SearchSymbol::NO_LEFT_CONTEXT); tmp_str.concat(L"-"); symbol_context.replace(tmp_str, L""); tmp_str.assign(L"+"); tmp_str.concat(SearchSymbol::NO_RIGHT_CONTEXT); symbol_context.replace(tmp_str, L""); symbol_list_sdb.append(symbol_context); } // write the sdb list // symbol_list_sdb.write(input_sof, (int32)context_level_d); input_sof.close(); // clean up all memory // search_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } } // end of CONTEXT_GENERATION using SYMBOL_GENERATION // branch on the CONTEXT_GENERATION using MODEL_GENERATION // does not support streaming input. // else if (algorithm_d == CONTEXT_GENERATION && !stream_d && implementation_d == MODEL_GENERATION) { // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"grammarDecoder - unable to load models", Error::ARG, __FILE__, __LINE__); } // generate context for all levels specified // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { SearchLevel& level = search_engine_d.getSearchLevel(curr_level); if (curr_level == (int32)context_level_d) { int32 left_order = level.getLeftContext(); int32 right_order = level.getRightContext(); if (!level.generateContextMapping(left_order, right_order, (int32)num_levels_d, context_list_d, search_engine_d.getSearchLevel(curr_level + 1), true)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } } // write the models to file // if (!store()) { return Error::handle(name(), L"grammarDecoder - unable to store models", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of CONTEXT_GENERATION using MODEL_GENERATION // branch on DECODE/FORCED_ALIGNMENT using VITERBI // else if ((algorithm_d == DECODE && implementation_d == VITERBI) || (algorithm_d == FORCED_ALIGNMENT && implementation_d == VITERBI)) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Vector data; TranscriptionDatabase trans_db; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // when the output mode is FILE // File total_output_file; if (output_mode_d == FILE) { // check if the filename already exists // bool8 flag_exists = false; int32 index = 0; String basename(output_file_d); Filename temp; // creating unique filename if output file exists // if (File::exists(output_file_d)) { while (!flag_exists) { // create a basename // temp.assign(basename); temp.concat(L"_"); temp.concat(Integral::getPid()); temp.concat(L"_"); temp.concat((uint32)(Integral::time()), L"%10.10lu"); temp.concat(L"_"); temp.concat(index); if (!File::exists(temp)) { output_file_d.assign(temp); break; } else { index++; } } } if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses // if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // when the output mode is DATABASE // Sof output_db_sof; if (output_mode_d == DATABASE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses (TEXT) // if (output_type_d == TEXT) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type TEXT // open the output file for the utterance hypotheses (BINARY) // if (output_type_d == BINARY) { if (!output_db_sof.open(output_file_d, File::WRITE_ONLY, File::BINARY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { // read all identifiers form the sdb object // Vector identifier_keys; identifier_keys.setCapacity(sdb_a.length()); for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { identifier.assign(input_ID); identifier_keys.concat(identifier); } // write the frame-duration to the output file // Float frame_dur = vector_fe_d((int32)0).getFrameDuration(); frame_dur.write(output_db_sof, (int32)0); // write the identifiers to the transcription database // trans_db.storePartial(output_db_sof, 0, identifier_keys); } } // end of output type BINARY } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // close the FrontEnds // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypotheses); // close the output file // output_file.close(); } if (output_mode_d == DATABASE) { String name_00(identifier); String gtype_00(identifier); AnnotationGraph output_graph(name_00, gtype_00); // convert the best search path to an annotation graph // if (!createAnnotationGraph(output_graph, trace_path)) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // prune the annotation graph according to output levels // pruneAnnotationGraph(output_graph); // write the annotation graph to the database // trans_db.storePartial(output_db_sof, current_file_num - 1, output_graph); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypotheses); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypotheses); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); // close database files // if (output_mode_d == DATABASE) { output_db_sof.close(); } // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of DECODE using VITERBI // branch on DECODE using STACK // else if (algorithm_d == DECODE && implementation_d == STACK) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the language model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // open the output file // File total_output_file; if (output_mode_d == FILE) { if (output_file_d.length() > 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } // open the output file for the utterance hypotheses // if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"run: opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming, set input_file_name to STREAM_FILE // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!stack_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // //extractFeatures(i, data); } // decode the utterance // if (!verify_d) { // use stack search engine // stack_engine_d.decode(vector_fe_d); } // close all the front ends // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!stack_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"linearDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypotheses); // close the output file // output_file.close(); } if (output_mode_d == FILE) { // output the best hypothesis // output.assign(hypotheses); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"linearDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"linearDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypotheses); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // stack_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of DECODE using STACK // branch on TRAIN using BAUM_WELCH and function UPDATE // does not support streaming input // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == UPDATE && !stream_d) { // declare local variables // String output; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // load the accumulators form file // loadAccumulators(sdb_a); // update the models using the accumulated statistics // update(); // write the trained models to file // if (!store()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of TRAIN using BAUM_WELCH and UPDATE // branch on TRAIN using BAUM_WELCH and function ACCUMULATE // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == ACCUMULATE) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score / num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // write the accumulatots to file // storeAccumulators(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } } // clean up all memory // search_engine_d.clear(); } // end of TRAIN using BAUM_WELCH and ACCUMULATE // branch on TRAIN using BAUM_WELCH and NONE // does not support streaming input. // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == NONE && !stream_d) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop over the number of iterations // for (int32 iter = 0; iter < num_iterations_d; iter++) { current_file_num = 0; // print the current iteration // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nstarting iteration: "); output.concat(iter); Console::put(output); Console::decreaseIndention(); } // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score / num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // update the models using the accumulated statistics // update(); // reset the statistical model and state transition accumulators // if (iter < (int32)num_iterations_d - 1) { resetAccumulators(); } } // end of looping over the number of iterations // clear all data structures needed to decode // if (!search_engine_d.initializeLinearPartial()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clear the top level // search_engine_d.getHDigraph()((int32)initial_level_d).clear(); LanguageModel lm_temp; LanguageModelIHD lm_ihd_temp; Sof lm_temp_file; if (!lm_temp_file.open(lm_file_d, File::READ_ONLY)) { lm_file_d.debug(L"lm_file_d"); return Error::handle(name(), L"error opening language model file", Error::ARG, __FILE__, __LINE__); } // read the language model file // if (!lm_temp.read(lm_temp_file)) { return Error::handle(name(), L"load: loading model file", Error::ARG, __FILE__, __LINE__); } lm_ihd_temp.setRuleModel(lm_temp.getRuleModel()); // restore the top-level subgraph of the hierarchy // search_engine_d.getHDigraph()((int32)initial_level_d).assign(lm_ihd_temp.getIHD()((int32)initial_level_d)); // close language model file // lm_temp_file.close(); // load the configuration parameters (optional) // if (cnfg_file_d.length() > 0) { // open the configuration file // Sof cnfg_file_sof; if (!cnfg_file_sof.open(cnfg_file_d)) { return Error::handle(name(), L"load - unable to open configuration file", Error::ARG, __FILE__, __LINE__); } // restore back the top level information from configuration file // SearchLevel& search_level = search_engine_d.getSearchLevel(initial_level_d); // read the configuration parameters corresponding to this level // search_level.readConfig(cnfg_file_sof, initial_level_d); // close the configuration files // cnfg_file_sof.close(); } // write the models to file // if (!store()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clean up all memory // search_engine_d.clear(); // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of TRAIN using BAUM_WELCH and NONE // branch on MLLR // does not support streaming input. // else if (algorithm_d == MLLR && !stream_d) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // load the occupancies from the accumulators file // loadOccupancies(); // load the configuration for the mllr object // Sof param_sof; param_sof.open(param_file_d); mllr_d.read(param_sof, 0); param_sof.close(); mllr_d.debug(L"mllr"); // get vector of statistical models // Vector& stat_models_before = search_engine_d.getHDigraph()((int32)num_levels_d- 1 ).getStatisticalModels(); Vector speech_tag; speech_tag.setLength(stat_models_before.length()); // initialize all speech tag to true to indicate all will adapt // for (int32 k = 0; k < speech_tag.length(); k++) { speech_tag(k) = true; } Vector non_adapt_symbol = search_engine_d.getHDigraph()((int32)num_levels_d - 1).getNonAdaptSymbolTable(); Vector symbol_table_phone = search_engine_d.getHDigraph()((int32)num_levels_d - 1).getSymbolTable(); HashTable& statistical_hashtable = search_engine_d.getHDigraph()((int32)num_levels_d - 1).getSymbolHashTable(); for (int32 j = 0; j < non_adapt_symbol.length(); j++) { SearchSymbol hashed_symbol = non_adapt_symbol(j); // get statistical model index from the level's symbol_hash_d // and then set the index of speech tag to false for those // symbol in the non adapt symbol table // Long* model_index = statistical_hashtable.get(hashed_symbol); speech_tag((int32)*model_index) = false; } // init the regression tree using the model and speech tag // mllr_d.initRegressionTree(stat_models_before, speech_tag); resetAccumulators(); // loop over the number of iterations // for (int32 iter = 0; iter < num_iterations_d; iter++) { current_file_num = 0; // print the current iteration // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nstarting iteration: "); output.concat(iter); Console::put(output); Console::decreaseIndention(); } // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { input_ID.debug(L"input_ID"); return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeLinearDecoder()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding // transcription we don't need a transcription database for // segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.linearDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList trace_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, trace_path)) { // if no hypothesis found // hypotheses.clear(); trace_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score / num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // get vector of statistical models // Vector& stat_models = search_engine_d.getHDigraph()((int32)num_levels_d - 1).getStatisticalModels(); // create transformation matries using the accumulated statistics // mllr_d.createTransform(stat_models); // adapt the models using the accumulated statistics // mllr_d.adapt(stat_models); // reset the statistical model and state transition accumulators // if (iter < (int32)num_iterations_d - 1) { resetAccumulators(); } } // end of looping over the number of iterations // clear all data structures needed to decode // if (!search_engine_d.initializeLinearPartial()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clear the top level // search_engine_d.getHDigraph()((int32)num_levels_d - 1).clear(); Sof lm_file; if (!lm_file.open(lm_file_d, File::READ_ONLY)) { lm_file_d.debug(L"lm_file_d"); return Error::handle(name(), L"error opening language model file", Error::ARG, __FILE__, __LINE__); } LanguageModel lm; // restore the top-level subgraph of the hierarchy // if (!lm.read(lm_file)) { return Error::handle(name(), L"load: loading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(lm.getRuleModel()); search_engine_d.setHDigraph(lm_d.getIHD()); // close language model file // lm_file.close(); // write the models to file // if (!store()) { return Error::handle(name(), L"linearDecoder", Error::ARG, __FILE__, __LINE__); } // clean up all memory // search_engine_d.clear(); // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // MLLR // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid algorithm and/or implementation", Error::ARG, __FILE__, __LINE__); } // gracefully exit // return true; } // method: grammarDecoder // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method for a grammar decoder // bool8 HiddenMarkovModel::grammarDecoder(Sdb& sdb_a) { // branch on the CONTEXT_GENERATION using SYMBOL_GENERATION // does not support streaming input. // if (algorithm_d == CONTEXT_GENERATION && !stream_d && implementation_d == SYMBOL_GENERATION && function_mode_d == NONE) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; String output; Filename input_ID; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engines target context level // search_engine_d.setContextLevel(context_level_d); // set the search engines mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // initialize the context generation // if (!search_engine_d.initializeContextGeneration()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // loop through each input utterance // current_file_num = 0; for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeGrammarDecoder()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); } // decode the utterance // if (!verify_d) { search_engine_d.grammarDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { hypotheses.clear(); instance_path.clear(); num_valid_files++; total_num_frames += num_frames; } // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // write the context dependent symbols // Vector& context_list = search_engine_d.getContextList(); // open the context list // Sof input_sof; Sdb symbol_list_sdb; if (!input_sof.open(context_list_d, File::WRITE_ONLY)) { return Error::handle(name(), L"grammarDecoder - unable to open context list", Error::ARG, __FILE__, __LINE__); } // loop over each element in the symbol list and append it to the sdb // String tmp_str; SearchSymbol symbol_context; for (int32 i = 0; i < context_list.length(); i++) { symbol_context.assign(context_list(i)); tmp_str.assign(SearchSymbol::NO_LEFT_CONTEXT); tmp_str.concat(L"-"); symbol_context.replace(tmp_str, L""); tmp_str.assign(L"+"); tmp_str.concat(SearchSymbol::NO_RIGHT_CONTEXT); symbol_context.replace(tmp_str, L""); symbol_list_sdb.append(symbol_context); } // write the sdb list // symbol_list_sdb.write(input_sof, (int32)context_level_d); input_sof.close(); // clean up all memory // search_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } } // end of CONTEXT_GENERATION using SYMBOL_GENERATION // branch on the CONTEXT_GENERATION using MODEL_GENERATION // does not support streaming input. // else if (algorithm_d == CONTEXT_GENERATION && !stream_d && implementation_d == MODEL_GENERATION) { // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"grammarDecoder - unable to load models", Error::ARG, __FILE__, __LINE__); } // generate context for all levels specified // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { SearchLevel& level = search_engine_d.getSearchLevel(curr_level); if (curr_level == (int32)context_level_d) { int32 left_order = level.getLeftContext(); int32 right_order = level.getRightContext(); if (!level.generateContextMapping(left_order, right_order, (int32)num_levels_d, context_list_d, search_engine_d.getSearchLevel(curr_level + 1))) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } } // write the models to file // if (!store()) { return Error::handle(name(), L"grammarDecoder - unable to store models", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of CONTEXT_GENERATION using MODEL_GENERATION // branch on TRAIN using BAUM_WELCH and function UPDATE // does not support streaming input. // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == UPDATE && !stream_d) { // declare local variables // String output; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // load the accumulators form file // loadAccumulators(sdb_a); // update the models using the accumulated statistics // update(); // write the trained models to file // if (!store()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of TRAIN using BAUM_WELCH and UPDATE // branch on TRAIN using BAUM_WELCH and function ACCUMULATE // does not support streaming input // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == ACCUMULATE && !stream_d) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeGrammarDecoder()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.grammarDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { // if no hypothesis found // hypotheses.clear(); instance_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score / num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // write the accumulatots to file // storeAccumulators(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } // clean up all memory // search_engine_d.clear(); } // end of TRAIN using BAUM_WELCH and ACCUMULATE // branch on TRAIN using BAUM_WELCH and NONE // does not support streaming input. // else if (algorithm_d == TRAIN && implementation_d == BAUM_WELCH && function_mode_d == NONE && !stream_d) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; Filename input_ID; String output; String train_path; String train_file; Filename output_file_name; Sdb output_sdb; Vector data; LanguageModel lm; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // loop over the number of iterations // for (int32 iter = 0; iter < num_iterations_d; iter++) { current_file_num = 0; // print the current iteration // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nstarting iteration: "); output.concat(iter); Console::put(output); Console::decreaseIndention(); } // loop through each input utterance // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // clear all data structures needed to decode // if (!search_engine_d.initializeGrammarDecoder()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.grammarDecoder(vector_fe_d); } // pick up the best hypothesis and determine the utterance probability // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { // if no hypothesis found // hypotheses.clear(); instance_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // compute the forward probabilities (alphas) // trellis_d = (BiGraph*)NULL; trellis_d = search_engine_d.computeForwardBackward(data, beta_threshold_d); // determine the utterance probability here again // computeUtterProb(score); // print utterance probability // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\naverage utterance probability: "); output.concat(score / num_frames); output.concat(L", number of frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); } // accumulate the statistics // accumulate(score, data); // close the front end processing // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } } // end of looping through the input utterances // update the models using the accumulated statistics // update(); // reset the statistical model and state transition accumulators // if (iter < (int32)num_iterations_d - 1) { resetAccumulators(); } } // end of looping over the number of iterations // clear all data structures needed to decode // if (!search_engine_d.initializeGrammarPartial()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // clear the top level // search_engine_d.getHDigraph()((int32)initial_level_d).clear(); Sof lm_file; if (!lm_file.open(lm_file_d, File::READ_ONLY)) { lm_file_d.debug(L"lm_file_d"); return Error::handle(name(), L"error opening language model file", Error::ARG, __FILE__, __LINE__); } // restore the top-level subgraph of the hierarchy // if (!lm.read(lm_file)) { return Error::handle(name(), L"load: loading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(lm.getRuleModel()); search_engine_d.setHDigraph(lm_d.getIHD()); // close language model file // lm_file.close(); // write the models to file // if (!store()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // clean up all memory // search_engine_d.clear(); // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of TRAIN using BAUM_WELCH and NONE // branch on the DECODING/FORCED_ALIGNMENT using VITERBI // else if ((algorithm_d == DECODE && implementation_d == VITERBI) || (algorithm_d == FORCED_ALIGNMENT && implementation_d == VITERBI)) { // declare local variables // int32 num_valid_files = 0; int32 current_file_num = 0; int32 total_num_frames = 0; String identifier; Filename input_file_name; File buffer_file; Filename buffer_filename; Filename temp_buffer_filename; Filename input_ID; String output; Filename output_file_name; Sdb output_sdb; Vector data; // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::DECODE); // when the output mode is FILE // File total_output_file; if (output_mode_d == FILE) { // open the output file for the utterance hypotheses // if (output_file_d.length() != 0) { if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output file: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } if (!total_output_file.open(output_file_d, File::WRITE_ONLY)) { return Error::handle(name(), L"grammarDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } else { total_output_file.close(); } } } // loop through the input utterances // for (sdb_a.gotoFirst(); sdb_a.getName(input_ID); sdb_a.gotoNext()) { current_file_num++; // if input is streaming, set input_file_name to STREAM_FILE // if (stream_d) { // buffer the input so that it can be accesses multiple times // buffer_filename.assign(File::STREAM_FILE); buffer_file.open(buffer_filename); temp_buffer_filename = (String)(*(buffer_file.getTempFilename())[0]); input_file_name.assign(temp_buffer_filename); } // otherwise get the file path // else { // get audio file path // identifier.assign(input_ID); if (!audio_db_d.getRecord(identifier, input_file_name)) { return Error::handle(name(), L"initialize - unable to find audio file for identifier", Error::ARG, __FILE__, __LINE__); } } // print utterance processing information // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nprocessing file "); output.concat(current_file_num); output.concat(L" ("); output.concat(identifier); output.concat(L")"); output.concat(L": "); output.concat(input_file_name); Console::put(output); Console::decreaseIndention(); } // initialize the decoder // if (!search_engine_d.initializeGrammarDecoder()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } // initialize the top search level with the corresponding transcription // we don't need a transcription database for segmented utterances // if (transcription_db_file_d.length() > 0) { initTranscription(identifier, current_file_num - 1); } // loop over all the front end files // for (int32 i = 0; i < vector_fe_d.length(); i++) { // process the utterance file by the front end // vector_fe_d(i).open(input_file_name); // retrieve the all frames of data in advance // // extractFeatures(i, data); } // decode the utterance // if (!verify_d) { search_engine_d.grammarDecoder(vector_fe_d); } // close all the front ends // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).close(); } // close the input file // if (stream_d) { buffer_file.close(); } // pick up the best hypothesis and its parameters // String hypotheses; float64 score = 0; int32 num_frames = 0; DoubleLinkedList instance_path; if (!search_engine_d.getHypotheses(hypotheses, alignment_level_d, score, num_frames, instance_path)) { // if no hypothesis found // hypotheses.clear(); instance_path.clear(); if ((!verify_d) && (verbosity_d >= Integral::BRIEF)) { // print the warning message // Console::increaseIndention(); Console::put(L"\nno hypothesis found"); Console::decreaseIndention(); } } else { num_valid_files++; total_num_frames += num_frames; } // print the detailed info about the hypothesis // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); output.assign(L"\nhyp: "); output.concat(hypotheses); output.concat(L"\nscore: "); output.concat(score); output.concat(L" frames: "); output.concat(num_frames); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } if (output_mode_d == TRANSFORM) { // transform the input file and its path to the output file // sdb_a.transformName(output_file_name, input_file_name); // open the output file and write best hypothesis // File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { return Error::handle(name(), L"grammarDecoder - error opening output file", Error::ARG, __FILE__, __LINE__); } output_file.put(hypotheses); // close the output file // output_file.close(); } if (output_mode_d == FILE) { // output the best hypothesis and sent ID to the output file // or to stdout // output.assign(hypotheses); if ((int32)alignment_level_d < 0) { output.concat(L" ("); output.concat(input_ID); output.concat(L")\n"); } if (output_file_d.length() != 0) { total_output_file.open(output_file_d, File::APPEND_PLUS); total_output_file.put(output); total_output_file.close(); } else { if (verbosity_d < Integral::BRIEF) { Console::increaseIndention(); Console::increaseIndention(); Console::put(output); Console::decreaseIndention(); Console::decreaseIndention(); } } } if (output_mode_d == LIST) { // output the hypothesis to the corresponding file // from the output list // bool8 more_files; if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the output list: "); output.concat(output_file_d); Console::put(output); Console::decreaseIndention(); } if (current_file_num == 1) { // read output files list into signal database // Sof output_list_file; if (!output_list_file.open(output_list_d)) { return Error::handle(name(), L"grammarDecoder - error opening output list", Error::ARG, __FILE__, __LINE__); } output_sdb.read(output_list_file, 0); output_list_file.close(); more_files = output_sdb.gotoFirst(); } else { // move to the next output file // more_files = output_sdb.gotoNext(); } if (!more_files) { return Error::handle(name(), L"grammarDecoder - insufficient output files in the output list", Error::ARG, __FILE__, __LINE__); } // open the next output file and write the best hypothesis // output_sdb.getName(output_file_name); File output_file; if (!output_file.open(output_file_name, File::WRITE_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(output_file_name); Console::put(output); Console::decreaseIndention(); } else { output_file.put(hypotheses); output_file.put(L"\n"); output_file.close(); } } } // end of looping through the input utterances // clean up all memory // search_engine_d.clear(); // close the audio database (optional) // if (audio_db_file_d.length() > 0) { if (!audio_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } // close the transcription database (optional) // if (transcription_db_file_d.length() > 0) { if (!transcription_db_d.close()) { return Error::handle(name(), L"grammarDecoder", Error::ARG, __FILE__, __LINE__); } } // print the number of successfully processed files // if (verbosity_d >= Integral::BRIEF) { output.assign(L"\nprocessed "); output.concat(num_valid_files); output.concat(L" file(s) successfully, attempted "); output.concat(current_file_num); output.concat(L" file(s), "); output.concat(total_num_frames); output.concat(L" frame(s)\n"); Console::put(output); } } // end of DECODE using VITERBI // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid algorithm and/or implementation", Error::ARG, __FILE__, __LINE__); } // gracefully exit // return true; } // method: nonLinearDecoder // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method for a non-linear decoder // bool8 HiddenMarkovModel::nonLinearDecoder(Sdb& sdb_a) { // branch on the algorithm and function mode // if ((algorithm_d == DECODE && function_mode_d == GRAMMAR_DECODING) || (algorithm_d == TRAIN && function_mode_d == NONE) || (algorithm_d == TRAIN && function_mode_d == UPDATE) || (algorithm_d == TRAIN && function_mode_d == ACCUMULATE)) { if (!grammarDecoder(sdb_a)) { return Error::handle(name(), L"nonLinearDecoder", Error::ARG, __FILE__, __LINE__); } } // branch on the DECODE and function mode NONE // else if ((algorithm_d == DECODE && function_mode_d == NONE) || (algorithm_d == SYMBOL_GRAPH_GENERATION && function_mode_d == NONE) || (algorithm_d == SYMBOL_GRAPH_RESCORING && function_mode_d == NONE) || (algorithm_d == FORCED_ALIGNMENT && function_mode_d == NONE)) { if (!networkDecoder(sdb_a)) { return Error::handle(name(), L"not implemented", Error::ARG, __FILE__, __LINE__); } } // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid cross-word mode", Error::ARG, __FILE__, __LINE__); } // gracefully exit // return true; } // method: checkParams // // arguments: none // // return: a bool8 value indicating status // // this method checks the inputs parameters // bool8 HiddenMarkovModel::checkParams() { // error checking - update levels specified by the user // for (int32 i = 0; i < update_levels_d.length(); i++) { // determine if the mask for the current level is set // if (update_levels_d(i) == ON) { if (i > (int32)num_levels_d) { return Error::handle(name(), L"checkParams - invalid specification for the update levels", Error::ARG, __FILE__, __LINE__); } } } // gracefully exit // return true; } // method: createStatisticalModels // // arguments: none // // return: a bool8 value indicating status // // this method create initial statistical models // bool8 HiddenMarkovModel::createStatisticalModels() { Sof lm_file; LanguageModel lm; // open the language model file // if (!lm_file.open(lm_file_d, File::READ_ONLY)) { lm_file_d.debug(L"lm_file_d"); return Error::handle(name(), L"error opening language model file", Error::ARG, __FILE__, __LINE__); } // read the language model file // if (!lm.read(lm_file)) { return Error::handle(name(), L"read: reading model file", Error::ARG, __FILE__, __LINE__); } lm_d.setRuleModel(lm.getRuleModel()); search_engine_d.setHDigraph(lm_d.getIHD()); // set up the number of levels for the search engine // search_engine_d.setNumLevels(search_engine_d.getHDigraph().length()); // close the lm file // lm_file.close(); // load the configuration parameters (optional) // if (cnfg_file_d.length() > 0) { // open the configuration file // Sof cnfg_file_sof; if (!cnfg_file_sof.open(cnfg_file_d)) { return Error::handle(name(), L"load - unable to open configuration file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { String output; Console::increaseIndention(); output.assign(L"\nloading configuration file: "); output.concat(cnfg_file_d); Console::put(output); Console::decreaseIndention(); } // have the search levels read in its own configuration information // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { // configure Viterbi search engine // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.readConfig(cnfg_file_sof, curr_level); } // close the configuration files // cnfg_file_sof.close(); } // open the statistical model pool file // Sof smp_sof; Sof update_smp_sof; // load the statistical model pool file // if (smp_file_d.length() > 0) { if (!smp_sof.open(smp_file_d)) { return Error::handle(name(), L"load - unable to open statistical model pool file", Error::ARG, __FILE__, __LINE__); } if (verbosity_d >= Integral::BRIEF) { String output; Console::increaseIndention(); output.assign(L"\nloading statistical model pool file: "); output.concat(smp_file_d); Console::put(output); Console::decreaseIndention(); } } else { return Error::handle(name(), L"load - unable to open statistical model pool file", Error::ARG, __FILE__, __LINE__); } // load the statistical model pool // if (update_smp_file_d.length() > 0) { if (!update_smp_sof.open(update_smp_file_d, File::WRITE_ONLY, File::TEXT)) { return Error::handle(name(), L"load - unable to open update statistical model pool file", Error::ARG, __FILE__, __LINE__); } } else { return Error::handle(name(), L"load - unable to open update statistical model pool file", Error::ARG, __FILE__, __LINE__); } // have the search levels read in its own configuration information // for (int32 curr_level = 0; curr_level < search_engine_d.getHDigraph().length(); curr_level++) { // configure Viterbi search engine // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); // read the configuration parameters corresponding to this level // search_level.createStatisticalModels(smp_sof, curr_level); if (search_level.getStatisticalModels().length() > 0 ) { search_level.storeStatisticalModels(update_smp_sof, curr_level); } } smp_sof.close(); update_smp_sof.close(); // gracefully exit // return true; } // method: run // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method // bool8 HiddenMarkovModel::run(Sdb& sdb_a) { // check the input parameters // if (!checkParams()) { return Error::handle(name(), L"run - invalid paramaters", Error::ARG, __FILE__, __LINE__); } // branch on the ALGORITHM using CONTEXT_GENERATION // if (algorithm_d == CONTEXT_GENERATION) { // branch on the CONTEXT using SYMBOL_ONLY or WORD-INTERNAL // if ((context_mode_d == SYMBOL_ONLY) || (context_mode_d == SYMBOL_INTERNAL)) { if (!linearDecoder(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } // branch on the CONTEXT using CROSS-WORD // else if (context_mode_d == CROSS_SYMBOL) { if (!grammarDecoder(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } } // branch on MODEL_CREATION using GLOBAL // does not support streaming input. // else if (!stream_d && algorithm_d == MODEL_CREATION && implementation_d == GLOBAL) { // create all the models // if (!createStatisticalModels()) { return Error::handle(name(), L"model creation", Error::ARG, __FILE__, __LINE__); } } // end of MODEL_CREATION using GLOBAL // branch on INITIALIZE using GLOBAL // does not support streaming input. // else if (!stream_d && algorithm_d == INITIALIZE && implementation_d == GLOBAL) { // initialize the models // if (!initialize(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } // end of INITIALIZE using GLOBAL // branch on MIXTURE_SPLITTING using VARIANCE_SPLITTING // does not support streaming input // else if (!stream_d && algorithm_d == MIXTURE_SPLITTING && implementation_d == VARIANCE_SPLITTING) { // determine if the number of mixtures to split are valid // if ((int32)num_mixtures_d < 1) { return Error::handle(name(), L"mixture splitting: invalid number of mixtures specified", Error::ARG, __FILE__, __LINE__); } // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"run: load hmm models", Error::ARG, __FILE__, __LINE__); } // retrieve the search level of the state level // if ((int32)num_levels_d <= 0) { return Error::handle(name(), L"invalid specification for the number of levels (num_levels_d <= 0)", Error::ARG, __FILE__, __LINE__); } SearchLevel& search_level = search_engine_d.getSearchLevel((int32)num_levels_d - 1); // get the statistical models for the state level // Vector& stat_models = search_level.getStatisticalModels(); // loop over each statistical model and split the mixtures // for (int i = 0; i < stat_models.length(); i++) { stat_models(i).setAlgorithm(StatisticalModel::MIXTURE_SPLITTING); stat_models(i).setImplementation(StatisticalModel::VARIANCE_SPLITTING); stat_models(i).splitMixtureModel(num_mixtures_d); } // write the models to file // if (!store()) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of MIXTURE_SPLITING using VARIANCE_SPLITTING // branch on the ALGORITHM using TRAIN_PARAMETER_TYING or // TEST_PARAMETER_TYING. does not support streaming input. // else if (!stream_d && ((algorithm_d == TRAIN_PARAMETER_TYING) || (algorithm_d == TEST_PARAMETER_TYING))) { if (!parameterTying(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } // branch on the CONTEXT using SYMBOL_ONLY or WORD-INTERNAL // else if ((context_mode_d == SYMBOL_ONLY) || (context_mode_d == SYMBOL_INTERNAL)) { if (!linearDecoder(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } // branch on the CONTEXT using CROSS-WORD // else if (context_mode_d == CROSS_SYMBOL) { if (!nonLinearDecoder(sdb_a)) { return Error::handle(name(), L"run", Error::ARG, __FILE__, __LINE__); } } // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid context mode", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: computeUtterProb // // arguments: // float64& utter_prob: (output) utterance probability // // return: a bool8 value indicating status // // this method computes the utterance probability form the trellis // bool8 HiddenMarkovModel::computeUtterProb(float64& utter_prob_a) { // declare local varaibles // float64 backward_occ = (float64)0.0; BiGraphVertex* vertex = (BiGraphVertex*)NULL; BiGraphVertex* start_vertex = (BiGraphVertex*)NULL; // initialize the utterance probability // utter_prob_a = Integral::DB_LOG_MIN_VALUE; // determine if the trellis is valid // if (trellis_d == (BiGraph*)NULL) { return Error::handle(name(), L"beta computation failed - invalid trellis", Error::ARG, __FILE__, __LINE__); } // get the start node of the trellis // start_vertex = trellis_d->getStart(); // make sure the start node has adjacent nodes // if (start_vertex->lengthChild() == 0) { return Error::handle(name(), L"beta computation failed - start vertex out-degree is zero", Error::ARG, __FILE__, __LINE__); } // loop over all nodes adjacent to the start node // for (bool8 more = start_vertex->gotoFirstChild(); more; more = start_vertex->gotoNextChild()) { vertex = start_vertex->getCurrChild()->getVertex(); if (vertex != (BiGraphVertex*)NULL) { backward_occ = vertex->getItem()->getBeta(); utter_prob_a = Integral::logAddLog(utter_prob_a, backward_occ); } } // determine if the utterance probability is valid // if (utter_prob_a == Integral::DB_LOG_MIN_VALUE) { return Error::handle(name(), L"beta computation failed - utterance probability is zero", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: insertNonSpeechSymbols // // arguments: // Vector& boundary_symbols: (input) vector of boundary non-speech symbols // Vector& internal_symbols: (input) vector of internal non-speech symbols // DiGraph& graph: (input) graph where the symbols are inserted // SearchLevel& level: (input) search level representing the hierarchy // // return: a bool8 value indicating status // // this method inserts non-speech symbols into the graph // bool8 HiddenMarkovModel::insertNonSpeechSymbols(Vector& boundary_symbols_a, Vector& internal_symbols_a, DiGraph& graph_a, SearchLevel& level_a) { // declare local variables // int32 num_vertices_boundary = 0; int32 num_vertices_internal = 0; String output; SearchSymbol tmp_output; int32 symbol_index = 0; SingleLinkedList< GraphVertex > graph_nodes; SearchNode** boundary_snode = (SearchNode**)NULL; SearchNode** internal_snode = (SearchNode**)NULL; GraphVertex* src_vertex = (GraphVertex*)NULL; GraphVertex* dst_vertex = (GraphVertex*)NULL; GraphVertex* snode_vert = (GraphVertex*)NULL; // determine the munber of symbols we are adding // num_vertices_boundary = boundary_symbols_a.length(); num_vertices_internal = internal_symbols_a.length(); if ((num_vertices_boundary == 0) && (num_vertices_internal == 0)) { return true; } // allocate memory // if (num_vertices_boundary > 0) { boundary_snode = new SearchNode*[num_vertices_boundary]; } if (num_vertices_internal > 0) { internal_snode = new SearchNode*[num_vertices_internal]; } // generate the non-speech search nodes // for (int i = 0; i < num_vertices_boundary; i++) { // create a search node for the symbol // boundary_snode[i] = new SearchNode(); boundary_snode[i]->setSearchLevel(&level_a); boundary_snode[i]->setSymbol(boundary_symbols_a(i)); // determine the symbol index and assign it to the node // symbol_index = level_a.getSymbolIndex(boundary_symbols_a(i)); if (symbol_index < 0) { boundary_symbols_a(i).debug(L"boundary symbol: "); return Error::handle(name(), L"insertNonSpeechSymbols - symbol not found in the symbol table", Error::ARG, __FILE__, __LINE__); } boundary_snode[i]->setSymbolId(symbol_index); } // generate the non-speech search nodes // for (int i = 0; i < num_vertices_internal; i++) { // create a search node for the symbol // internal_snode[i] = new SearchNode(); internal_snode[i]->setSearchLevel(&level_a); internal_snode[i]->setSymbol(internal_symbols_a(i)); // determine the symbol index and assign it to the node // symbol_index = level_a.getSymbolIndex(internal_symbols_a(i)); if (symbol_index < 0) { internal_symbols_a(i).debug(L"internal symbol: "); return Error::handle(name(), L"insertNonSpeechSymbols - symbol not found in the symbol table", Error::ARG, __FILE__, __LINE__); } internal_snode[i]->setSymbolId(symbol_index); } // collect all internal nodes // graph_nodes.setAllocationMode(DstrBase::USER); for (bool8 more = graph_a.gotoFirst(); more; more = graph_a.gotoNext()) { graph_nodes.insert(graph_a.getCurr()); } // insert forced non-speech symbols at transcription bounds // src_vertex = graph_a.getStart(); // loop over each arc in the vertex // if (num_vertices_boundary > 0) { for (bool8 more = src_vertex->gotoFirst(); more; more = src_vertex->gotoNext()) { // retrieve the destination vertex // dst_vertex = src_vertex->getCurr()->getVertex(); // remove arc from source -> destination // graph_a.removeArc(src_vertex, dst_vertex); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: removing arc from: "); output.concat(L"S"); output.concat(L" to "); dst_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } // insert arcs form source->non-speech-symbols->destination // for (int i = 0; i < num_vertices_boundary; i++) { snode_vert = graph_a.insertVertex(boundary_snode[i]); // source -> non-speech-symbol // graph_a.insertArc(src_vertex, snode_vert, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); output.concat(L"S"); output.concat(L" to "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } // non-speech-symbol -> destination // graph_a.insertArc(snode_vert, dst_vertex, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); dst_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } } } } // insert optional non-speech symbols between transcription nodes // for (bool8 more = graph_nodes.gotoFirst(); more; more = graph_nodes.gotoNext()) { // retrieve the source vertex // src_vertex = graph_nodes.getCurr(); // loop over each arc in the vertex // for (bool8 more1 = src_vertex->gotoFirst(); more1; more1 = src_vertex->gotoNext()) { // retrieve the destination vertex // dst_vertex = src_vertex->getCurr()->getVertex(); // boundary symbol // if (dst_vertex == graph_a.getTerm()) { // remove arc from source -> destination // if (num_vertices_boundary > 0) { graph_a.removeArc(src_vertex, dst_vertex); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: removing arc from: "); src_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); output.concat(L"T"); Console::put(output); Console::decreaseIndention(); } } // insert arcs form source->non-speech-symbols->destination // for (int i = 0; i < num_vertices_boundary; i++) { snode_vert = graph_a.insertVertex(boundary_snode[i]); // source -> non-speech-symbol // graph_a.insertArc(src_vertex, snode_vert, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); src_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } // non-speech-symbol -> destination // graph_a.insertArc(snode_vert, dst_vertex, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); output.concat(L"T"); Console::put(output); Console::decreaseIndention(); } } } // internal symbol // else { // insert arcs form source->non-speech-symbols->destination // for (int i = 0; i < num_vertices_internal; i++) { snode_vert = graph_a.insertVertex(internal_snode[i]); // source -> non-speech-symbol // graph_a.insertArc(src_vertex, snode_vert, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); src_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } // non-speech-symbol -> destination // graph_a.insertArc(snode_vert, dst_vertex, false, 0); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); snode_vert->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); dst_vertex->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } } } } } // clean memory created // for (int i = 0; i < num_vertices_boundary; i++) { delete boundary_snode[i]; } delete [] boundary_snode; // clean memory created // for (int i = 0; i < num_vertices_internal; i++) { delete internal_snode[i]; } delete [] internal_snode; // exit gracefully // return true; } // method: initSymbolGraph // // arguments: // Filename& symbol_graph_file: (input) symbol graph filename // // return: a bool8 value indicating status // // this method initializes the top search level with the symbol graph // bool8 HiddenMarkovModel::initSymbolGraph(Filename& symbol_graph_file_a) { // declare local variables // Sof tmp_sof; String output; // retrieve the current search level // SearchLevel& search_level = search_engine_d.getSearchLevel((int32)initial_level_d); // retrieve the symbol graph object // SymbolGraph& symbol_graph = search_engine_d.getSymbolGraph(); // open the symbol graph file // if (!tmp_sof.open(symbol_graph_file_a, File::READ_ONLY)) { Console::increaseIndention(); output.assign(L"\ncannot open output file: "); output.concat(symbol_graph_file_a); Console::put(output); Console::decreaseIndention(); } // read the symbol graph from file // if (!symbol_graph.read(tmp_sof, 0)) { symbol_graph_file_a.debug(L"symbol_graph_file_a"); return Error::handle(name(), L"initSymbolGraph - cannot read symbol graph file", Error::ARG, __FILE__, __LINE__); } tmp_sof.close(); // reduce the size of the symbol graph to a more compact representation // SymbolGraph compact_graph; symbol_graph.compact(compact_graph); // convert the symbol graph at a digraph representation // DiGraph language_model; if (!compact_graph.convert(search_level, language_model)) { return Error::handle(name(), L"initSymbolGraph", Error::ARG, __FILE__, __LINE__); } // set the top level subgraph corresponding to the transcription // search_level.setSubGraph((int32)initial_level_d, language_model); // exit gracefully // return true; } // method: initTranscription // // arguments: // String& identifier: (input) utterance/conversation ID // int32 index: (input) transcription index // // return: a bool8 value indicating status // // this method initializes the top search level with the indexed transcription // bool8 HiddenMarkovModel::initTranscription(String& identifier_a, int32 index_a) { // retrieve the current search level // SearchLevel& search_level = search_engine_d.getSearchLevel((int32)initial_level_d); // algorithm: TRAIN or CONTEXT_GENERATION // if ((algorithm_d == TRAIN) || (algorithm_d == CONTEXT_GENERATION) || (algorithm_d == FORCED_ALIGNMENT) || (algorithm_d == MLLR)) { // declare local variables // String output; AnnotationGraph ag; DiGraph transcription; float32 start_time = 0, stop_time = 0; bool8 is_conversation = false; int32 channel = 0; if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nretrieving annotation graph for identifier: "); output.concat(identifier_a); output.concat(L", level: "); output.concat(transcription_level_d); Console::put(output); Console::decreaseIndention(); } // get AnnotationGraph transcription given ID // if (!transcription_db_d.getRecord(identifier_a, ag)) { identifier_a.debug(L"identifier: "); return Error::handle(name(), L"initTranscription - unable to find annotation graph for identifier", Error::ARG, __FILE__, __LINE__); } // convert AnnotationGraph to Digraph // agToDigraph(transcription, ag, channel, start_time, stop_time, is_conversation); if (verbosity_d >= Integral::BRIEF) { if (is_conversation) { Console::increaseIndention(); output.assign(L"segment: "); output.concat(L"channel = "); output.concat(channel); output.concat(L", start_time = "); output.concat(start_time); output.concat(L", stop_time = "); output.concat(stop_time); Console::put(output); Console::decreaseIndention(); } } // initialize frontend if neccessary // if (is_conversation) { // loop over all front ends // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).init(start_time, stop_time, channel); } } // set the top level subgraph corresponding to the transcription // search_level.setSubGraph(0, transcription); } // algorithm: DECODE // else { // declare local variables // AnnotationGraph ag; float32 start_time = 0, stop_time = 0; int32 channel = 0; // get AnnotationGraph transcription given ID // if (!transcription_db_d.getRecord(identifier_a, ag)) { return Error::handle(name(), L"initTranscription - unable to find annotation graph for identifier", Error::ARG, __FILE__, __LINE__); } // convert AnnotationGraph to Digraph // if (agToSegment(ag, channel, start_time, stop_time)) { if (verbosity_d >= Integral::BRIEF) { String output; Console::increaseIndention(); output.assign(L"segment: "); output.concat(L"channel = "); output.concat(channel); output.concat(L", start_time = "); output.concat(start_time); output.concat(L", stop_time = "); output.concat(stop_time); Console::put(output); Console::decreaseIndention(); } // initialize frontend if neccessary // for (int32 i = 0; i < vector_fe_d.length(); i++) { vector_fe_d(i).init(start_time, stop_time, channel); } } } // exit gracefully // return true; } // method: agToDigraph // // arguments: // DiGraph& digraph: (input) diagraph // AnnotationGraph& ag: (input) ag // int32& channel: (input) channel // float32& start_time: (input) start time // float32& stop_time: (input) stop time // bool8& is_conversation: (input) is conversation flag // // return: a bool8 value indicating status // bool8 HiddenMarkovModel::agToDigraph(DiGraph& digraph_a, AnnotationGraph& ag_a, int32& channel_a, float32& start_time_a, float32& stop_time_a, bool8& is_conversation_a) { // declare local variables // DoubleLinkedList anno_list; Annotation* annot; Annotation* last_annot; Anchor* ach; String trans; String transcription; String substring; String str; String output; SearchSymbol symbol; SearchSymbol tmp_output; SearchNode search_node; GraphVertex* ptr = (GraphVertex*)NULL; GraphVertex* src = (GraphVertex*)NULL; GraphVertex* dst = (GraphVertex*)NULL; int32 pos = 0; int32 symbol_index = 0; // initilize this utterance as conversation file // is_conversation_a = true; // retrieve the top most search level // SearchLevel& level = search_engine_d.getSearchLevel((int32)initial_level_d); // get annotation list // if (transcription_level_d.length() == 0) { return Error::handle(name(), L"agToDigraph - transcription level information has not been specilized", Error::ARG, __FILE__, __LINE__); } else { if (!ag_a.getAnnotationSetByFeature(OPTION_TRANSCRIPTION_LEVEL, transcription_level_d, anno_list)) { return Error::handle(name(), L"agToDigraph - the queried transcription level information has not been recorded in transcription database", Error::ARG, __FILE__, __LINE__); } } // convert transcriptions from annotation list into DiGraph // ptr = (GraphVertex*)NULL; anno_list.gotoFirst(); // get the start time information if it has, otherwise, set it not // conversational file // if ((annot = anno_list.getCurr()) != NULL) { ach = annot->getStartAnchor(); if (ach->getAnchored()) { channel_a = annot->getChannel(); start_time_a = ach->getOffset(); } else { is_conversation_a = false; } } // main loop // while ((annot = anno_list.getCurr()) != NULL) { // keep this annotation poiter for end time information // last_annot = annot; // tokenize the transcription and generate the graph // pos = 0; trans = annot->getType(); if (trans.length() > (int32)0) { while (trans.tokenize(substring, pos)) { substring.trim(); if (!substring.eq(String::DEF_VALUE)) { // setup the search symbol and search level // symbol.assign(substring); search_node.setSearchLevel(&level); search_node.setSymbol(symbol); transcription.concat(symbol); transcription.concat(L" "); // setup the search index // symbol_index = level.getSymbolIndex(symbol); if ((int32)symbol_index < 0) { symbol.debug(L"symbol: "); return Error::handle(name(), L"agToDigraph - symbol not found in the symbol table", Error::ARG, __FILE__, __LINE__); } search_node.setSymbolId(symbol_index); // insert the search node and transitions // if (ptr == (GraphVertex*)NULL) { src = digraph_a.getStart(); dst = digraph_a.insertVertex(&search_node); digraph_a.insertArc(src, dst); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); output.concat(L"S"); output.concat(L" to "); dst->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } } else { src = ptr; dst = digraph_a.insertVertex(&search_node); digraph_a.insertArc(src, dst); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); src->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); dst->getItem()->getSymbol(tmp_output); output.concat(tmp_output); Console::put(output); Console::decreaseIndention(); } } ptr = dst; } } } else { ptr = digraph_a.getStart(); } // goto the next annotation of the list // if (!anno_list.gotoNext()) { break; } } // end of main loop // get the end time information if it has // if ((annot = anno_list.getCurr()) != NULL) { ach = annot->getEndAnchor(); if (ach->getAnchored()) { stop_time_a = ach->getOffset(); } else { is_conversation_a = false; } } // add an arc from the last node added to the term node // src = ptr; dst = digraph_a.getTerm(); digraph_a.insertArc(src, dst); if (debug_level_d >= Integral::ALL) { Console::increaseIndention(); output.assign(L"\n transcription: creating arc from: "); src->getItem()->getSymbol(tmp_output); output.concat(tmp_output); output.concat(L" to "); output.concat(L"T"); Console::put(output); Console::decreaseIndention(); } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\ntranscription: "); output.concat(transcription); Console::put(output); Console::decreaseIndention(); } // retrieve the non-speech symbols for this level // Vector& boundary_symbol_table = level.getNonSpeechBoundarySymbolTable(); Vector& internal_symbol_table = level.getNonSpeechInternalSymbolTable(); // insert the non-speech symbols into the graphs // insertNonSpeechSymbols(boundary_symbol_table, internal_symbol_table, digraph_a, level); // exit gracefully // return true; } // method: agToSegment // // arguments: none // AnnotationGraph& ag: (input) ag // int32& channel: (input) channel // float32& start_time: (input) start time // float32& stop_time: (input) stop time // // return: a bool8 value indicating if this utterance is // conversational utterance or not // bool8 HiddenMarkovModel::agToSegment(AnnotationGraph& ag_a, int32& channel_a, float32& start_time_a, float32& stop_time_a) { // declare local variables // DoubleLinkedList anno_list; Annotation* annot; Anchor* ach; // get annotation list // if (transcription_level_d.length() == 0) { return Error::handle(name(), L"agToDigraph - transcription level information has not been specilized", Error::ARG, __FILE__, __LINE__); } else { if (!ag_a.getAnnotationSetByFeature(OPTION_TRANSCRIPTION_LEVEL, transcription_level_d, anno_list)) { return Error::handle(name(), L"agToDigraph - the queried transcription level information has not been recorded in transcription database", Error::ARG, __FILE__, __LINE__); } } // convert transcriptions from annotation list into DiGraph // anno_list.gotoFirst(); // get the start time information if it has, otherwise, set it not // conversational file // if ((annot = anno_list.getCurr()) != NULL) { ach = annot->getStartAnchor(); if (ach->getAnchored()) { channel_a = annot->getChannel(); start_time_a = ach->getOffset(); } else { // not conversational utterance // return false; } } // get the end time information if it has // if ((annot = anno_list.getLast()) != NULL) { ach = annot->getEndAnchor(); if (ach->getAnchored()) { stop_time_a = ach->getOffset(); } else { // not conversational utterance // return false; } } // exit gracefully // return true; } // method: resetAccumulators // // arguments: none // // return: a bool8 value indicating status // // this method reset the accumulators for the models // bool8 HiddenMarkovModel::resetAccumulators() { // reset the state transition accumulators // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the search level // SearchLevel& search_level = search_engine_d.getSearchLevel((int32)i); // retrieve the subgraphs at this level // Vector< DiGraph >& sub_graphs = search_level.getSubGraphs(); // loop over all subgraphs at this level // for (int32 j = 0; j < sub_graphs.length(); j++) { // loop over each vertex of the subgraph // for (bool8 more = sub_graphs(j).gotoFirst(); more; more = sub_graphs(j).gotoNext()) { // loop over each arc of the vertex // GraphVertex* vertex = sub_graphs(j).getCurr(); for (bool8 more1 = vertex->gotoFirst(); more1; more1 = vertex->gotoNext()) { vertex->getCurr()->setAccumulator(0); } } } } // retrieve the search level of the state level // if ((int32)num_levels_d <= 0) { return Error::handle(name(), L"invalid specification for the number of levels (num_levels_d <= 0)", Error::ARG, __FILE__, __LINE__); } SearchLevel& search_level = search_engine_d.getSearchLevel((int32)num_levels_d - 1); // get the statistical models for the state level // Vector& stat_models = search_level.getStatisticalModels(); // loop over each statistical model and reset the accumulatots // for (int i = 0; i < stat_models.length(); i++) { stat_models(i).resetAccumulators(); } // exit gracefiully // return true; } // method: updateStateTransitions // // arguments: // SearchLevel& search_level: (input) search level // // return: a bool8 value indicating status // // this method update the models using the accumulated statistics // bool8 HiddenMarkovModel::updateStateTransitions(SearchLevel& search_level_a) { // declare local variables // float32 weight = 0.0; float64 value = 0.0; float64 accumulator = 0.0; GraphArc* search_arc = (GraphArc*)NULL; GraphVertex* start_vertex = (GraphVertex*)NULL; GraphVertex* term_vertex = (GraphVertex*)NULL; GraphVertex* search_vertex = (GraphVertex*)NULL; // update the state transition probabilities // for (int i = 0; i < search_level_a.getNumSubGraphs(); i++) { // retrieve the subgraph and its start and term vertices // DiGraph& subgraph = search_level_a.getSubGraph((int32)i); start_vertex = subgraph.getStart(); term_vertex = subgraph.getTerm(); // initialize values // accumulator = 0.0; // accumulate all transitions from the start node // for (bool8 more = start_vertex->gotoFirst(); more; more = start_vertex->gotoNext()) { search_arc = start_vertex->getCurr(); accumulator += search_arc->getAccumulator(); } // update all transitions from the start node // for (bool8 more = start_vertex->gotoFirst(); more; more = start_vertex->gotoNext()) { search_arc = start_vertex->getCurr(); value = search_arc->getAccumulator(); // have we accumulated any statistics for this search node? // if (accumulator > 0.0) { // compute the expected transition value // if (Integral::almostEqual(value, accumulator)) { weight = 0.0; } else { weight = Integral::log(value) - Integral::log(accumulator); } // check the bounds on the expected transition value // if ((weight > -DEF_FLT_MAX) && (weight < DEF_FLT_MAX)) { search_arc->setWeight(weight); } else { search_arc->setWeight(Integral::MIN_LOG_VALUE); } } } // loop over all search nodes in the subgraph // for (bool8 more = subgraph.gotoFirst(); more; more = subgraph.gotoNext()) { // retrieve the search node // search_vertex = const_cast* >(subgraph.getCurr()); // initialize values // accumulator = 0.0; // loop over all transitions from the search node // for (bool8 more1 = search_vertex->gotoFirst(); more1; more1 = search_vertex->gotoNext()) { search_arc = search_vertex->getCurr(); accumulator += search_arc->getAccumulator(); } // update all transitions fron the search node // for (bool8 more = search_vertex->gotoFirst(); more; more = search_vertex->gotoNext()) { search_arc = search_vertex->getCurr(); value = search_arc->getAccumulator(); // have we accumulated any statistics for this search node? // if (accumulator > 0.0) { // compute the expected transition value // if (Integral::almostEqual(value, accumulator)) { weight = 0.0; } else { weight = Integral::log(value) - Integral::log(accumulator); } // check the bounds on the expected transition value // if ((weight > -DEF_FLT_MAX) && (weight < DEF_FLT_MAX)) { search_arc->setWeight(weight); } else { search_arc->setWeight(Integral::MIN_LOG_VALUE); } } } } } // exit gracefully // return true; } // method: updateStatisticalModels // // arguments: // SearchLevel& search_level: (input) search level // // return: a bool8 value indicating status // // this method update the models using the accumulated statistics // bool8 HiddenMarkovModel::updateStatisticalModels(SearchLevel& search_level_a) { // declare local variables // Sof varfile; VectorFloat varfloor; // get the statistical models for the state level // Vector& stat_models = search_level_a.getStatisticalModels(); // read the variance floor values // if (!variance_floor_file_d.eq(String::DEF_VALUE)) { varfile.open(variance_floor_file_d); varfloor.read(varfile, 0); varfile.close(); } // loop over each statistical model and update the model parameters // for (int i = 0; i < stat_models.length(); i++) { if (stat_models(i).getAccessCount() > min_model_count_d) { stat_models(i).update(varfloor, min_model_count_d); } } // exit gracefully // return true; } // method: extractFeatures // // arguments: // int32 fe_index: (input) front end index // Vector& data: (output) feature vectors // // return: a bool8 value indicating status // // this method extracts all features for all frames form file // bool8 HiddenMarkovModel::extractFeatures(int32 fe_index_a, Vector& data_a) { // declare local variables // int32 num_frames = vector_fe_d(fe_index_a).getNumFrames(); // set the number of feature vectors // data_a.setLength(num_frames); // loop over the feature vectors in the file // for (int32 i = 0; i < num_frames; i++) { // get the feature vector // vector_fe_d(fe_index_a).getVector(data_a(i), 0, (int32)i); // sanity check // if (data_a(i).length() == 0) { return Error::handle(name(), L"extractFeatures - observation length is zero", Error::ARG, __FILE__, __LINE__); } } // exit gracefully // return true; } // method: loadAccumulators // // arguments: none // // return: a bool8 value indicating status // // this method loads the state transition and emission accumulators form file // bool8 HiddenMarkovModel::loadAccumulators(Sdb& sdb_a) { // declare local variables // String output; Sof accum_list_file; Sof accum_file; Sdb accum_sdb; Filename accum_file_name; // if the accumulator files are specified in the commandline instead of a // list then obtain the commandline arguments // if (sdb_a.length() > 0 ) { // loop over each element in the sdb // for (sdb_a.gotoFirst(); sdb_a.getName(accum_file_name); sdb_a.gotoNext()) { // open the accumulator file // if (!accum_file.open(accum_file_name)) { return Error::handle(name(), L"loadAccumulators", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the current search level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // read the state transition probability accumulators from file // search_level.loadTransitionAccumulators(accum_file); // read the state emission probability accumulators from file // search_level.loadEmissionAccumulators(accum_file); } // close the file // accum_file.close(); } // remove the accumulator files when done // for (sdb_a.gotoFirst(); sdb_a.getName(accum_file_name); sdb_a.gotoNext()) { File::remove(accum_file_name); } // exit gracefully // return true; } /* *normal mode where the accumulator list is a file */ if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the accumulator file: "); output.concat(accum_file_d); Console::put(output); Console::decreaseIndention(); } // open the accumulator list file // if (!accum_list_file.open(accum_list_d)) { return Error::handle(name(), L"loadAccumulators", Error::ARG, __FILE__, __LINE__); } // read the contents of the sdb // accum_sdb.read(accum_list_file, 0); accum_list_file.close(); // loop over each element in the sdb // for (accum_sdb.gotoFirst(); accum_sdb.getName(accum_file_name); accum_sdb.gotoNext()) { // open the accumulator file // if (!accum_file.open(accum_file_name)) { return Error::handle(name(), L"loadAccumulators", Error::ARG, __FILE__, __LINE__); } // loop over all search levels // for (int32 i = 0; i < (int32)num_levels_d; i++) { // retrieve the current search level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // read the state transition probability accumulators from file // search_level.loadTransitionAccumulators(accum_file); // read the state emission probability accumulators from file // search_level.loadEmissionAccumulators(accum_file); } // close the file // accum_file.close(); } // remove the accumulator files when done // for (accum_sdb.gotoFirst(); accum_sdb.getName(accum_file_name); accum_sdb.gotoNext()) { File::remove(accum_file_name); } // exit gracefully // return true; } // method: storeAccumulators // // arguments: none // // return: a bool8 value indicating status // // this method stores the state transition and emission accumulators to file // bool8 HiddenMarkovModel::storeAccumulators() { // declare local variables // String output; Sof accum_file; // check if the accumulator file exists (this tweak is done to make training // compatible with isip_run // if(accum_file_d.length() == 0) { bool8 flag_exists = false; int32 index = 0; if(!File::exists(accumulator_path_d)) { File::mkdir(accumulator_path_d); } Filename accumulator_path_name(accumulator_path_d); accumulator_path_name.concat(L"/"); accumulator_path_name.concat(accumulator_name_d); Filename basename(accumulator_path_name); Filename temp; while (!flag_exists) { // build a unique string // temp.assign(basename); temp.concat(L"_"); temp.concat(Integral::getPid()); temp.concat(L"_"); temp.concat((uint32)(Integral::time()), L"%10.10lu"); temp.concat(L"_"); temp.concat(index); temp.concat(L".sof"); // check if all the scripts have been created // if (!File::exists(temp)) { accum_file_d.assign(temp); break; } else { index++; } } } if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); output.assign(L"\nopening the accumulator file: "); output.concat(accum_file_d); Console::put(output); Console::decreaseIndention(); } // open the accumulator file // if (!accum_file.open(accum_file_d, File::WRITE_ONLY, File::BINARY)) { return Error::handle(name(), L"storeAccumulators", Error::ARG, __FILE__, __LINE__); } // loop over all levels and update the ones specified // for (int32 i = 0; i < update_levels_d.length(); i++) { // determine if the mask for the current level is set // if ((update_levels_d(i) == ON) && (i > 0)) { // retrieve the search level of the state level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // write the state emission probability accumulators to file // // update the state transtition probabilities // if ((update_mode_d == TRANSITIONS) || (update_mode_d == ALL)) { search_level.storeTransitionAccumulators(accum_file); } // write the state emission probability accumulators to file // if ((update_mode_d == OBSERVATIONS) || (update_mode_d == ALL)) { search_level.storeEmissionAccumulators(accum_file); } } } // close the file // accum_file.close(); // exit gracefully // return true; } // method: accumulate // // arguments: // float64 utter_prob: (input) utterance probability // Vector& data: (input) feature vectors // // return: a bool8 value indicating status // // this method accumulates statistics during training // bool8 HiddenMarkovModel::accumulate(float64 utter_prob_a, Vector& data_a) { // accumulate the state transition statistics // accumulateStateTransitions(utter_prob_a, data_a); // accumulate the state observation statistics // accumulateStatisticalModels(utter_prob_a, data_a); // exit gracefully // return true; } // method: update // // arguments: none // // return: a bool8 value indicating status // // this method update the models using the accumulated statistics // bool8 HiddenMarkovModel::update() { // loop over all levels and update the ones specified // for (int32 i = 0; i < update_levels_d.length(); i++) { // determine if the mask for the current level is set // if ((update_levels_d(i) == ON) && (i > 0)) { // retrieve the search level of the state level // SearchLevel& search_level = search_engine_d.getSearchLevel(i); // update the state transtition probabilities // if ((update_mode_d == TRANSITIONS) || (update_mode_d == ALL)) { updateStateTransitions(search_level); } // update the state observation probabilities // if ((update_mode_d == OBSERVATIONS) || (update_mode_d == ALL)) { updateStatisticalModels(search_level); } } } // exit gracefully // return true; } // method: accumulateStatisticalModels // // arguments: // float64 utter_prob: (input) utterance probability // Vector& data: (input) feature vectors // // return: a bool8 value indicating status // // Reference: // // [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice // Hall P T R, New Jersey, 1993, pp. 350-352, ISBN 0-13-015157-2 // // gamma(t)[j, k] = A[t, j] * B[t, j, k] // // alpha(t)[j] * beta(t)[j] // A[t, j] = ------------------------- // alpha(t)[j] * beta(t)[j] t = [1, .., T] k=[1, .., M] (1) // // c[j, k] * gauss[O(t), mu[j, k] * cov[j, k] // B[t, j, k] = ------------------------------------------ // c[j, m] * gauss[O(t), mu[j, m] * cov[j, m] m=[1, .., M] // // this method accumulates statistics during training and then uses them // to update the model parameters // bool8 HiddenMarkovModel::accumulateStatisticalModels(float64 utter_prob_a, Vector& data_a) { // declare local variables // TrainNode* train_node = (TrainNode*)NULL; BiGraphVertex* vertex = (BiGraphVertex*)NULL; // make sure the trellis is valid // if (trellis_d == (BiGraph*)NULL) { return false; } // accumulate the model statistics // for (bool8 more = trellis_d->gotoFirst(); more; more = trellis_d->gotoNext()) { // retrieve the node // vertex = const_cast* >(trellis_d->getCurr()); train_node = const_cast(vertex->getItem()); if (train_node->getValidNode()) { if (train_node->getValidModel()) { train_node->accumulate(utter_prob_a, data_a, min_mpd_d, min_occupancy_d); } } } // exit gracefully // return true; } // method: accumulateStateTransitions // // arguments: // float64 utter_prob: (input) utterance probability // Vector& data: (input) feature vectors // // return: a bool8 value indicating status // // Reference: // // [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice // Hall P T R, New Jersey, 1993, ISBN 0-13-015157-2. // // this method update the models using the accumulated statistics // bool8 HiddenMarkovModel::accumulateStateTransitions(float64 utter_prob_a, Vector& data_a) { // declare local variables // Double val; float64 alpha = 0.0; float64 beta = 0.0; float64 weight = 0.0; float64 score = 0.0; float64 gamma = 0.0; Context* symbol = (Context*)NULL; TrainNode* child_tnode = (TrainNode*)NULL; TrainNode* parent_tnode = (TrainNode*)NULL; BiGraphVertex* child = (BiGraphVertex*)NULL; BiGraphVertex* parent = (BiGraphVertex*)NULL; GraphArc* search_arc = (GraphArc*)NULL; GraphVertex* child_gnode = (GraphVertex*)NULL; GraphVertex* parent_gnode = (GraphVertex*)NULL; DoubleLinkedList > active_list(DstrBase::USER); // determine if the trellis is valid // if (trellis_d == (BiGraph*)NULL) { return Error::handle(name(), L"accumulateStateTransitions", Error::ARG, __FILE__, __LINE__); } // add the start vertex to the active list // active_list.insertLast(trellis_d->getStart()); // loop until the active list is empty // while (!active_list.isEmpty()) { // remove the first element from the list // active_list.removeFirst(parent); // accumulate the transitions from this vertex to the adjacent vertices // for (bool8 more = parent->gotoFirstChild(); more; more = parent->gotoNextChild()) { // retrieve the current child // child = parent->getCurrChild()->getVertex(); // retrieve the train nodes // child_tnode = child->getItem(); parent_tnode = parent->getItem(); // make sure the child is reachable from the final hypothesis // if (!child_tnode->getValidNode()) { continue; } // retrieve the probabilities // alpha = parent_tnode->getAlpha(); weight = parent->getCurrChild()->getWeight(); beta = child_tnode->getBeta(); // determine if the parent has a valid serach node // if ((symbol = parent_tnode->getReference()) != (Context*)NULL) { // retrieve the parents graph vertex and search node // parent_gnode = symbol->getCentralVertex(); // determine if the child has a valid serach node // if ((symbol = child_tnode->getReference()) != (Context*)NULL) { // retrieve the childs graph vertex and search node // child_gnode = symbol->getCentralVertex(); // model evaluation - output score // score = 0.0; if (child_tnode->getValidModel()) { score = child_tnode->getScore(); } // compute the transition probability // gamma = exp(alpha + weight + score + beta - utter_prob_a); // accumulate the statistics // if ((parent_gnode != (GraphVertex*)NULL) && (child_gnode != (GraphVertex*)NULL)) { // get the search arc connecting the nodes // search_arc = getSearchArc(parent_gnode, child_gnode); // accumulate transition probability statistics // if (search_arc != (GraphArc*)NULL) { search_arc->setAccumulator(search_arc->getAccumulator() + gamma); } } else { return Error::handle(name(), L"accumulateStateTransitions - search vertices are null", Error::ARG, __FILE__, __LINE__); } } } // insert the children of this vertex into the list // if (!child->getItem()->isAccumulatorValid()) { active_list.insertLast(child); child->getItem()->setAccumulatorValid(true); } } } // clear the list before exiting // active_list.clear(); // exit gracefully // return true; } // method: getSearchArc // // arguments: // GraphVertex* src: (input) source vertex // GraphVertex* dst: (input) destination vertex // // return: arc connecting the input vertices // // this method returns the arc connecting two vertices // GraphArc* HiddenMarkovModel::getSearchArc(GraphVertex* src_a, GraphVertex* dst_a) { // declare local variables // GraphArc* search_arc = (GraphArc*)NULL; // loop over all vertices adjacent to the source vertex // for (bool8 more = src_a->gotoFirst(); more; more = src_a->gotoNext()) { search_arc = src_a->getCurr(); if (search_arc->getVertex() == dst_a) { break; } } // return the arc connecting two vertices // return search_arc; } // method: insert // // arguments: // VectorByte& mask: (input/output) mask vector // int32 start_index: (input) start index // int32 num_elem: (input) number of elements // byte8 mode: (input) byte8 mode (0 or, 1) // // return: a bool8 value indicating status // // this method inserts given byte8 mode (OFF or ON) in mask vector // bool8 HiddenMarkovModel::insert(VectorByte& mask_a, int32 start_index_a, int32 num_elem_a, byte8 mode_a) { int32 len = mask_a.length(); // check the argument // if ((start_index_a > len) || (num_elem_a > len)) { return Error::handle(name(), L"insert", Error::ARG, __FILE__, __LINE__); } int32 end_index = start_index_a + num_elem_a; for (int32 i = start_index_a; i < end_index; i++) { mask_a(i) = mode_a; } // exit gracefully // return true; } // method: parameterTying // // arguments: // Sdb& sdb: (input) signal data base to run on // // return: a bool8 value indicating status // // this is the run method for running the parameter-tying method // bool8 HiddenMarkovModel::parameterTying(Sdb& sdb_a) { // branch on TRAIN_PARAMGTYING and ML // if (algorithm_d == TRAIN_PARAMETER_TYING && implementation_d == ML) { // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"parameterTying", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // load the occupancies from the accumulators file // loadOccupancies(); // train the decision-tree and get the clustured statistical models // parameterTyingTrain(); // write the trained models to file // if (!store()) { return Error::handle(name(), L"parameterTying", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of TRAIN_PARAMETER_TYING and ML // branch on TEST_PARAM_TYING and ML // else if (algorithm_d == TEST_PARAMETER_TYING && implementation_d == ML) { // load the hmm models from the model file // if (!load()) { return Error::handle(name(), L"parameterTying", Error::ARG, __FILE__, __LINE__); } // set the search engine mode // search_engine_d.setInitialLevel(initial_level_d); search_engine_d.setSearchMode(HierarchicalSearch::TRAIN); // get the index of the statistical models for the models that // were not seen in the training set // parameterTyingTest(); // write the trained models to file // if (!store()) { return Error::handle(name(), L"parameterTying", Error::ARG, __FILE__, __LINE__); } // clean up memory // search_engine_d.clear(); } // end of TEST_PARAMETER_TYING and ML // branch on the UNKNOWN options // else { return Error::handle(name(), L"invalid cross-word mode", Error::ARG, __FILE__, __LINE__); } // gracefully exit // return true; } // method: parameterTyingTrain // // arguments: none // // return: a bool8 value indicating status // // this is the run method for building the PhoneticDecisionTree // bool8 HiddenMarkovModel::parameterTyingTrain() { // local variables // HashTable symbol_hash_out; Vector stat_models_out; bool8 status; // get the symbol table (states) at the lowest level // int32 level = num_levels_d - (Long)1; SearchLevel& search_level = search_engine_d.getSearchLevel(level); Vector& symbol_table = search_level.getSymbolTable(); // get the sub-graphs (states) at the lowest level that correspond // to the upper graph symbols (phone models) // Vector >& sub_graphs = search_level.getSubGraphs(); // get the HashTable that maps the symbols (states) to the // StatisticalModels at the lowest level // HashTable& symbol_hash = search_level.getSymbolHashTable(); // get the statistical models from the lowest level // Vector& stat_models = search_level.getStatisticalModels(); // get the context map, left-context, right-context, upper-level // symbol-table, contextless symbol-table from the upper-level // (phone models) // level--; SearchLevel& upper_search_level = search_engine_d.getSearchLevel(level); Vector& context_map = upper_search_level.getContextMap(); int32 left_context = upper_search_level.getLeftContext(); int32 right_context = upper_search_level.getRightContext(); Vector& upper_symbol_table = upper_search_level.getSymbolTable(); Vector& contextless_symbol_table = upper_search_level.getContextLessSymbolTable(); // declare the PhoneticDecisionTree object // PhoneticDecisionTree d_tree; HashTable tied_symbol_hash; Vector tied_stat_models; // initlally load the tree with the required data // d_tree.loadTrain(context_map, left_context, right_context, upper_symbol_table, contextless_symbol_table, sub_graphs, symbol_table, symbol_hash, stat_models, ques_ans_file_d, tied_symbol_hash, tied_stat_models); // set the runmode, stopmode, algorithm, implementation and various // thresholds // d_tree.setRunMode(DecisionTreeBase::TRAIN); d_tree.setStopMode(DecisionTreeBase::THRESH); d_tree.setAlgorithm(PhoneticDecisionTree::ML); d_tree.setImplementation(PhoneticDecisionTree::DEFAULT); d_tree.setSplitThreshold(phonetic_dt_split_threshold_d); d_tree.setMergeThreshold(phonetic_dt_merge_threshold_d); d_tree.setNumOccThreshold(phonetic_dt_num_occ_threshold_d); // train the tree // status = d_tree.runDecisionTree(); // get the new mapping for symbols(states) to statistical models and // statistical models // d_tree.getStatTrain(context_map, sub_graphs, symbol_table, contextless_symbol_table, symbol_hash, stat_models, phonetic_dt_file_d, tied_symbol_hash, tied_stat_models); // exit gracefully // return status; } // method: parameterTyingTest // // arguments: none // // return: a bool8 value indicating status // // this is the run method for runing the PhoneticDecisionTree in TEST // mode // bool8 HiddenMarkovModel::parameterTyingTest() { // local variables // bool8 status = true; // get the symbol table (states) at the lowest level // int32 level = num_levels_d - (Long)1; SearchLevel& search_level = search_engine_d.getSearchLevel(level); Vector& symbol_table = search_level.getSymbolTable(); // get the sub-graphs (states) at the lowest level that correspond // to the upper graph symbols (phone models) // Vector >& sub_graphs = search_level.getSubGraphs(); // get the HashTable that maps the symbols (states) to the // StatisticalModels at the lowest level // HashTable& symbol_hash = search_level.getSymbolHashTable(); // get the context map, left-context, right-context, upper-level // symbol table, upper-level contextless symbol table from the // upper-level (phone models) // level--; SearchLevel& upper_search_level = search_engine_d.getSearchLevel(level); Vector& context_map = upper_search_level.getContextMap(); int32 left_context = upper_search_level.getLeftContext(); int32 right_context = upper_search_level.getRightContext(); Vector& upper_symbol_table = upper_search_level.getSymbolTable(); Vector& upper_contextless_symbol_table = upper_search_level.getContextLessSymbolTable(); // declare the PhoneticDecisionTree object // PhoneticDecisionTree d_tree; // initially load the tree // d_tree.loadTest(phonetic_dt_file_d); // set the runmode, stopmode, algorithm, implementation // d_tree.setRunMode(DecisionTreeBase::TEST); d_tree.setStopMode(DecisionTreeBase::THRESH); d_tree.setAlgorithm(PhoneticDecisionTree::ML); d_tree.setImplementation(PhoneticDecisionTree::DEFAULT); // update the new mapping for symbols(states) to statistical models and // statistical models // d_tree.getStatTest(context_map, left_context, right_context, upper_symbol_table, upper_contextless_symbol_table, sub_graphs, symbol_table, symbol_hash, ques_ans_file_d); // exit gracefully // return status; } // method: loadOccupancies // // arguments: none // // return: a bool8 value indicating status // // this method loads the occupancies from the occupancy file // bool8 HiddenMarkovModel::loadOccupancies() { // read the state occupancies from file // for (int32 curr_level = 0; curr_level < num_levels_d; curr_level++) { // have the search levels load the symbol occupancies // SearchLevel& search_level = search_engine_d.getSearchLevel(curr_level); search_level.loadOccupancies(); } // exit gracefully // return true; } // method: createContexts // // arguments: // Vector& symbols: (input) input symbols // int32 order: (input) length of the contexts // Vector& all_contexts: (output) all contexts // // return: a bool8 value indicating status // // this method creates all possible contexts // bool8 HiddenMarkovModel::createContexts(Vector& symbols_a, int32 order_a, Vector& all_contexts_a) { // local variable // SearchSymbol symbol; // set the capacity of the all-context vector // int32 capacity = (int32)Integral::pow((float64)symbols_a.length(), order_a); all_contexts_a.setCapacity(capacity); // generate all possible contexts recursively // if (!appendContexts(symbols_a, symbol, 0, order_a, all_contexts_a)) { return Error::handle(name(), L"createContexts", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: appendContexts // // arguments: // Vector& symbols: (input) input symbols // SearchSymbol symbol: (input) current search symbol // int32 curr_order: (input) current order of the context // int32 order: (input) order of the context // Vector& all_contexts: (output) all context vector // // return: a bool8 value indicating status // // this method appends the symbols to the contexts at any given // context-level // bool8 HiddenMarkovModel::appendContexts(Vector& symbols_a, SearchSymbol symbol_a, int32 curr_order_a, int32 order_a, Vector& all_contexts_a) { // declare local variables // int32 thresh = order_a / 2; // check if we are finished generating the contexts // if (curr_order_a == order_a) { all_contexts_a.concat(symbol_a); return true; } // loop over all possible symbols in the symbol table // SearchSymbol symbol_copy(symbol_a); int32 num_symbols = symbols_a.length(); for (int32 i = 0; i < num_symbols; i++) { // incrementally build the context // symbol_a.assign(symbol_copy); if (curr_order_a == 0) { symbol_a.assign(symbols_a(i)); } else { if (curr_order_a <= thresh) { symbol_a.concat(L"-"); symbol_a.concat(symbols_a(i)); } else { symbol_a.concat(L"+"); symbol_a.concat(symbols_a(i)); } } // generate all possible contexts recursively // if (!appendContexts(symbols_a, symbol_a, curr_order_a + 1, order_a, all_contexts_a)) { return Error::handle(name(), L"appendContexts", Error::ARG, __FILE__, __LINE__); } } // exit gracefully // return true; }