// file: $isip/class/pr/LanguageModelJSGF/lmjsgf_03.cc // // there is no source code in this file // // isip include files // #include "LanguageModelJSGF.h" // method: read // // arguments: // Sof& sof: (input) sof file object // int32 tag: (input) sof object instance tag // const String& name: (input) sof object instance name // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file // bool8 LanguageModelJSGF::read(Sof& sof_a, int32 tag_a, const String& name_a) { // read the instance of the object from the Sof file // if (!sof_a.find(name_a, tag_a)) { return false; } // read the actual data from the sof file // if (!readData(sof_a)) { return false; } // exit gracefully // return true; } // method: readData // // arguments: // Sof& sof: (input) sof file object // const String& pname: (input) parameter name // int32 size: (input) size in bytes of object (or full_size) // bool8 param: (input) is the parameter name in the file? // bool8 nested: (input) are we nested? // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file. it assumes // that the Sof file is already positioned correctly. // bool8 LanguageModelJSGF::readData(Sof& sof_a, const String& pname_a, int32 size_a, bool8 param_a, bool8 nested_a) { Integral::DEBUG debug_level = Integral::NONE; SofParser parser; parser.setDebug(debug_level); // are we nested? // if (nested_a) { parser.setNest(); } // load the parse // if (!parser.load(sof_a, size_a)) { return Error::handle(name(), L"readData", Error::READ, __FILE__, __LINE__, Error::WARNING); } // read the start symbol // Vector temp_vector; start_symbol_d.clear(); if (!readSymbolType(sof_a, DEF_LEVEL, PARAM_JSGF_START_SYMBOL, temp_vector, parser)) { return Error::handle(name(), L"readData: could not find start symbol", Error::READ, __FILE__, __LINE__, Error::WARNING); } start_symbol_d.assign(temp_vector(0)); temp_vector.clear(); // read the term symbol // term_symbol_d.clear(); if (!readSymbolType(sof_a, DEF_LEVEL, PARAM_JSGF_TERM_SYMBOL, temp_vector, parser)) { return Error::handle(name(), L"readData: could not find term symbol", Error::READ, __FILE__, __LINE__, Error::WARNING); } term_symbol_d.assign(temp_vector(0)); temp_vector.clear(); // count the number of levels // int32 num_levels = 0; String level_tag(SearchLevel::PARAM_LEVEL_TAG); level_tag.concat(PARAM_UNDERSCORE); level_tag.concat((Long)num_levels); while (parser.isPresent(sof_a, level_tag)) { num_levels++; level_tag.assign(SearchLevel::PARAM_LEVEL_TAG); level_tag.concat(PARAM_UNDERSCORE); level_tag.concat((Long)num_levels); } if (num_levels == 0) { return Error::handle(name(), L"readData: incorrect format", Error::READ, __FILE__, __LINE__, Error::WARNING); } hg_d.setLength(num_levels); // loop over levels and read grammar, context mapping, and symbol // types for each level // for (int level = 0; level < num_levels; level++) { // read the symbol type // readSymbolType(sof_a, level, SearchLevel::PARAM_LEVEL_TAG, temp_vector, parser); hg_d(level).setLevelTag(temp_vector(0)); temp_vector.clear(); // set the level index // hg_d(level).setLevelIndex(level); // check for context in the previous level // bool8 context = false; if (level > 0 && hg_d(level - 1).getContextMap().length() > 0) { context = true; } // read jsgf grammars // if (context || level == 0 ) { readGrammars(sof_a, PARAM_GRAMMARS, level, context, NULL, parser); } else{ readGrammars(sof_a, PARAM_GRAMMARS, level, context, &(hg_d(level - 1).getSymbolTable()), parser); } // read the nonspeech boundary symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_NONSPEECH_BOUNDARY_SYMBOL, hg_d(level).getNonSpeechBoundarySymbolTable(), parser); // read the nonspeech internal symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_NONSPEECH_INTERNAL_SYMBOL, hg_d(level).getNonSpeechInternalSymbolTable(), parser); // read the dummy symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_DUMMY_SYMBOL, hg_d(level).getDummySymbolTable(), parser); // read the exclude symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_EXCLUDE_SYMBOL, hg_d(level).getExcludeSymbolTable(), parser); // read the nsymbol exclude symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_NSYMBOL_EXCLUDE_SYMBOL, hg_d(level).getNSymbolExcludeSymbolTable(), parser); // read the spenalty exclude symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_SPENALTY_EXCLUDE_SYMBOL, hg_d(level).getSPenaltyExcludeSymbolTable(), parser); // read the context less symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_CONTEXTLESS_SYMBOL, hg_d(level).getContextLessSymbolTable(), parser); // read the skip symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_SKIP_SYMBOL, hg_d(level).getSkipSymbolTable(), parser); // read the non adaptation symbols // readSymbolType(sof_a, level, SearchLevel::PARAM_NON_ADAPT_SYMBOL, hg_d(level).getNonAdaptSymbolTable(), parser); // read the context mapping (if it exists) for this level // readContextMapping(sof_a, SearchLevel::PARAM_CONTEXT_MAPPING, level, parser); } // exit gracefully // return true; } // method: readSymbolType // // arguments: // Sof& sof: (input) sof file object // int32 level_a: (input) language model level // const String& name_a: (input) parameter name // Vector symbol_list_a: (output) list of symbols // // return: a bool8 value indicating status // // this method read one rule from the sof file // bool8 LanguageModelJSGF::readSymbolType(Sof& sof_a, int32 level_a, const String& name_a, Vector& symbol_list_a, SofParser& parser_a) { //local variable // JSGFParser jsgf_parser; SofParser parser; String grammar; String algorithm; String param_name(name_a); if (level_a != DEF_LEVEL) { param_name.concat(PARAM_UNDERSCORE); param_name.concat((Long)level_a); } // read the grammar // if (parser_a.isPresent(sof_a, param_name)) { if (!grammar.readData(sof_a, param_name, parser_a.getEntry(sof_a, param_name))){ return Error::handle(name(), L"readData: error reading symbol type", Error::READ, __FILE__, __LINE__, Error::WARNING); } } else { return false; } // set the JSGF parser // jsgf_parser.setExpression(grammar); jsgf_parser.parseJSGF(); Vector temp_string_vector; // get the symbols // temp_string_vector.assign(jsgf_parser.getSymbolList()); symbol_list_a.clear(); // convert strings to search symbols. this should realy be done in // the JSGF parser, but since the entire parser was written using // Strings instead of SearchSymbols, it will take a lot of time to // make the appropriate changes // for (int i = 0; i < temp_string_vector.length(); i++) { symbol_list_a.concat((SearchSymbol)temp_string_vector(i)); } // exit gracefully // return true; } // method: readGrammars // // arguments: // Sof& sof: (input) sof file object // int32 level: (input) language model level // bool8 use_context: (input) are we using context? // const String& name: (input) parameter name // Vector symbol_list: (output) list of symbols // // return: a bool8 value indicating status // // this method reads the grammar for a particular level // bool8 LanguageModelJSGF::readGrammars(Sof& sof_a, const String& name_a, int32 level_a, bool8 use_context_a, Vector* symbol_table_a, SofParser& parser_a) { // symbol list for the given search level // Vector symbol_list; Vector > graph_list; Vector graph_name_list; Vector grammars; Vector > jsgf_graphs; String param_name(name_a); param_name.concat(PARAM_UNDERSCORE); param_name.concat((Long)level_a); // read the grammar string vector // if (parser_a.isPresent(sof_a, param_name)) { if (!grammars.readData(sof_a, param_name, parser_a.getEntry(sof_a, param_name), false, false)) { return Error::handle(name(), L"readData: error reading grammars", Error::READ, __FILE__, __LINE__, Error::WARNING); } } for (int i = 0; i < grammars.length(); i++) { // declare variables // Vector sub_symbol_list; Vector tmp_symbol_list; DiGraph sub_graph; String graph_name; Vector jsgf_graph; // read the actual data from the sof file // readJSGF(sof_a, grammars(i), tmp_symbol_list, sub_graph, graph_name, jsgf_graph); // store JSGF token vector for this graph // jsgf_graphs.concat(jsgf_graph); // convert the symbol list of Strings to // the sub symbol list of SearchSymbols // for (int32 i = 0; i < tmp_symbol_list.length(); i++) { SearchSymbol tmp; tmp.assign(tmp_symbol_list(i)); sub_symbol_list.concat(tmp); } // check if any symbol in sub_symbol_list already exists in symbol_list // bool8 same = false; for(int32 i=0; i 0) { // get the symbol table of the upper level // Vector& symbol_table = *symbol_table_a; Vector graph_symbols; // do we have context mapping ? // if ( use_context_a ){ // set the symbol table // for (int32 j = 0; j < num_graphs; j++) { String symbol; Ulong index; index.assign(j); symbol.assign(CONTEXT_LABEL_PREFIX); symbol.concat(index); graph_symbols.concat(symbol); } } // test the symbol_table // if ( symbol_table_a == NULL && !use_context_a ){ return Error::handle(name(), L"no symbol table of upper level", Error::TEST, __FILE__, __LINE__); } // align the graphs in the graph list // if ( use_context_a ){ alignGraphs(jsgf_graphs, graph_symbols, graph_name_list); } else { alignGraphs(jsgf_graphs, symbol_table, graph_name_list); } } // append the graphs for this level to the // JSGF grammars // grammars_d.concat(jsgf_graphs); // declare a vector of SearchNode graphs // Vector< DiGraph > node_graphs(num_graphs); // number of symbols in the symbol table on this level // int32 num_symbols = symbol_list.length(); for(int32 i=0; i& sub_symbol_list_a: (output) symbol list // DiGraph& graph_a: (output) graph converted from the file // String sub_graph_name_d: (output) grammar name for the graph // // return: a bool8 value indicating status // bool8 LanguageModelJSGF::readJSGF(Sof& sof_a, String grammar_a, Vector& sub_symbol_list_a, DiGraph& sub_graph_a, String& sub_graph_name_a, Vector& jsgf_graph_a) { // declare a JSGF parser // JSGFParser parser; // set the jsgf expression in the parser // parser.setExpression(grammar_a); // parse the JSGF grammar and convert it to a ISIP DiGraph in the parser // parser.parseJSGF(start_symbol_d, term_symbol_d); // store the JSGF Tokens // jsgf_graph_a.assign(parser.getTokenVector()); // pass symbol list of the current JSGF grammar // sub_symbol_list_a = parser.getSymbolList(); // get the DiGraph from the parser // sub_graph_a = parser.getGraph(); // get the grammar name from the parser // String graph_name = parser.getGrammarName(); String sub_str; String delim(L". ;"); int32 pos = 7; while(graph_name.tokenize(sub_str, pos, delim)) { sub_graph_name_a = sub_str; } // exit gracefully // return true; } // method: readContextMapping // // arguments: // Sof& sof: (input) sof file object // const int32 tag: (input) sof object instance tag // SearchLevel& level: (input/output) search level // int32 level_index: (input) search level index // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file according // to the specified name and tag // bool8 LanguageModelJSGF::readContextMapping(Sof& sof_a, const String& name_a, int32 level_a, SofParser& parser_a) { // make sure the symbol table has been readed // int32 num_symbols = (hg_d(level_a).getSymbolTable()).length(); if (num_symbols < 1) { return Error::handle(name(), L"readContextMapping", Error::ARG, __FILE__, __LINE__); } // local variable // Vector context_maps; Vector contexts; String param_name(name_a); // set up the parameter name to match the current level // if (level_a != DEF_LEVEL) { param_name.concat(PARAM_UNDERSCORE); param_name.concat((Long)level_a); } // read the all the contextmaps as vector of string from the given // level-index // if (parser_a.isPresent(sof_a, param_name)) { if (!context_maps.readData(sof_a, param_name, parser_a.getEntry(sof_a, param_name), false, false)) { return Error::handle(name(), L"readContextMapping", Error::READ, __FILE__, __LINE__, Error::WARNING); } } // set the capacity and length // int32 len = context_maps.length(); contexts.setCapacity(len); contexts.setLength(len); // loop over all the context_maps and parse string in each loop // for (int32 k = 0; k < context_maps.length(); k++) { // local variables // String context_map; Vector symbol_list; Vector rule_names; String symbol; Vector context; // set the JSGF parser // JSGFParser jsgf_parser; jsgf_parser.setExpression(context_maps(k)); jsgf_parser.parseJSGF(); // get the data // symbol_list = jsgf_parser.getSymbolList(); jsgf_parser.getPublicRuleNames(rule_names); if (symbol_list.length() > 1 || rule_names.length() > 1) { return Error::handle(name(), L"readContextMapping: the number of rule names or symbols great than one", Error::READ, __FILE__, __LINE__, Error::WARNING); } // get the data string // int32 context_length = rule_names(0).countTokens(L"-"); int32 rule_length = symbol_list(0).countTokens(L"_"); if ( rule_length != 2 ){ return Error::handle(name(), L"readContextMapping: wrong graph name", Error::READ, __FILE__, __LINE__, Error::WARNING); } // get the context // int32 j=0; for (int32 i = 0; i < context_length; i++) { rule_names(0).tokenize(symbol, j, L"-"); context.concat(symbol); } // set this context // if (!contexts(k).setContext(context)) { return Error::handle(name(), L"readContextMapping", Error::READ, __FILE__, __LINE__, Error::WARNING); } // get the context index // j = 0; for (int32 i = 0; i < rule_length; i++) { symbol_list(0).tokenize(symbol, j, L"_"); } // set the context index // Ulong index; index.assign(symbol); if (!contexts(k).setContextIndex(index)) { return Error::handle(name(), L"readContextMapping", Error::READ, __FILE__, __LINE__, Error::WARNING); } } // set the contextmapping in the search level // if (!hg_d(level_a).setContextMap(contexts)) { return Error::handle(name(), L"readContextMapping", Error::READ, __FILE__, __LINE__, Error::WARNING); } // when there is a context mapping table at this level // if (contexts.length() > 0) { // add start and terminal search symbol to the symbol table // Vector& symbol_table = hg_d(level_a).getSymbolTable(); symbol_table.concat(SearchSymbol::NO_LEFT_CONTEXT); symbol_table.concat(SearchSymbol::NO_RIGHT_CONTEXT); // set up start and terminal search node for each subgraph // Vector >& sub_graphs = hg_d(level_a).getSubGraphs(); for (int32 i = 0; i < sub_graphs.length(); i++) { // start vertex // SearchNode* snode_p = new SearchNode(); snode_p->setSearchLevel(&hg_d(level_a)); snode_p->setSymbol(SearchSymbol::NO_LEFT_CONTEXT); sub_graphs(i).getStart()->setItem(snode_p); // terminal vertex // snode_p = new SearchNode(); snode_p->setSearchLevel(&hg_d(level_a)); snode_p->setSymbol(SearchSymbol::NO_RIGHT_CONTEXT); sub_graphs(i).getTerm()->setItem(snode_p); } // insert all context pairs into the context mapping hash table // // The original assignment assumes that the configuration // file, containing context information, has already been // loaded, when in fact it has not. The new assignment gets // the length of the first context in the language model file // and assigns it to total_context_length. If any of the contexts // do not match this length, an error will be generated. // int32 total_context_length = contexts(0).getContext().length(); Context context(total_context_length); // loop over all context pairs // for (int32 i = 0; i < contexts.length(); i++) { // loop over the symbols in the context // for (int32 j = 0; j < total_context_length; j++) { SearchSymbol ss(contexts(i).getContext()(j)); int32 symbol_id = hg_d(level_a).getSymbolIndex(ss); // check whether the symbol is valid // if (symbol_id == -1) { ss.debug(L"symbol"); return Error::handle(name(), L"readContextMapping", Error::ARG, __FILE__, __LINE__); } else { context.assignAndAdvance(symbol_id); } } // insert a context specification into the hash table // Ulong index = contexts(i).getContextIndex(); // check if the context is already in the table // HashTable& context_hash = hg_d(level_a).getContextHash(); Ulong* existing_index = context_hash.get(context); // if this context is not in the table yet, insert it // if (existing_index == NULL) { context_hash.insert(context, &index); } // if the context is already in the table, then check whether // the index of the model is the same as the one from table // else { // if indices are different, explain conflict and return error // if (!existing_index->eq(index)) { contexts(i).debug(L"Context:"); String out; out.concat(L" is already in context mapping table with the index: "); out.concat(*existing_index); out.concat(L"\n while new index is: "); out.concat(index); Console::put(out); return Error::handle(name(), L"readContextMapping", Error::ARG, __FILE__, __LINE__); } // otherwise indices are not conflicting, just print a warning // else { contexts(i).debug(L"Warning: This context is already in the table:"); String out; out.concat(L" is already in context mapping table with the index: "); out.concat(*existing_index); Console::put(out); return Error::handle(name(), L"readContextMapping", Error::ARG, __FILE__, __LINE__); } } } // output the debugging information // if (debug_level_d >= Integral::ALL) { HashTable& context_hash = hg_d(level_a).getContextHash(); context_hash.debug(L"context hash table:"); } } // when there is NO context mapping table at this level // else { // symbols will be mapped to the model with the same index at lower level // Context dummy(1); HashTable& context_hash = hg_d(level_a).getContextHash(); for (ulong i = 0; i < (ulong)num_symbols; i++) { Ulong i_l(i); dummy.assignAndAdvance(i_l); context_hash.insert(dummy, &i_l); } if (debug_level_d >= Integral::ALL) { context_hash.debug(L"dummy context hash table:"); } } // exit gracefully // return true; }