// file: $isip/class/stat/NGramModel/ngrm_05.cc // version: $Id: ngrm_05.cc 8326 2002-07-10 16:23:24Z alphonso $ // // isip include files // #include "NGramModel.h" // method: load // // arguments: // Sof& sof: (input) input ngram source file // const int32& tag: (input) sof object instance name // const Vector symbol_table: (input) input symbol table mapping index // const String& name: (input) sof object instance name // // return: a bool8 indicating status // // this method reads in the ngram from a file // bool8 NGramModel::load(Sof& sof_a, const int32& tag_a, const Vector& symbol_table_a, const String& name_a) { // check the order of the language model // if ((int32)order_d < 1) { Console::put(L"*** order of the language model is zero ***"); return false; } // store the symbol table for future use // symbol_table_d.assign(symbol_table_a); // parse the ngram file // NGramParser parser(order_d); if (!parser.load(sof_a, gram_hash_d, symbol_table_a, tag_a, name_a)) { return Error::handle(name(), L"load", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: store // // arguments: // Sof& sof: (output) output ngram source file // const int32& tag: (input) sof object instance name // const String& name: (input) sof object instance name // // return: a bool8 indicating status // // this method store ngrams into a source ngram file // bool8 NGramModel::store(Sof& sof_a, const int32& tag_a, const String& name_a) const{ // parse the ngram file // NGramParser parser(order_d); if (!parser.store(sof_a, gram_hash_d, symbol_table_d, tag_a, name_a)) { return Error::handle(name(), L"store", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: getScore // // arguments: // const VectorLong& index: (input) SearchSymbol index sequence // // return: a float_4 score represented by the ngram // // this method returns the score represented by the ngram give a // SearchSymbol index sequence // float32 NGramModel::getScore(const VectorLong& index_a) { // define temporary variables used in function // int32 skipped = 0; float32 score = DEF_LM_SCORE; NGramNode* ngnode = (NGramNode*)NULL; NGramNode* pre_node = (NGramNode*)NULL; HashTable* hash_table = &gram_hash_d; int32 num_indices = index_a.length(); // get the correct ngram node // for (int32 i = 0; i < num_indices; i++) { // store the prefix node for reading backoff score // pre_node = ngnode; // find the successive nodes // ngnode = hash_table->get(index_a(i)); // if any of these ngram word sequences do not exist // if (ngnode == (NGramNode*)NULL) { skipped++; if (pre_node == (NGramNode*)NULL) { continue; } break; } // get the hash talbe of the next gram // hash_table = ngnode->getNextGram(); } // end for loop // get lm score if ngram exists // if (ngnode != (NGramNode*)NULL) { score = ngnode->getLmScore(); } // else, use back-off to get ngram score // else if (pre_node != (NGramNode*)NULL) { // get the backoff score // score = pre_node->getBackoff(); // add the lm score of the current word for a shorter history list // VectorLong shorter_index; shorter_index.shift(index_a, -skipped); shorter_index.setLength(num_indices - skipped); score += getScore(shorter_index); } // otherwise this word sequence is invalid // else { index_a.debug(L"index_a"); Console::put(L"*** symbol sequence not found in language model ***"); } // return score and exit gracefully // return score; }