// file: $isip/class/pr/LanguageModelXML/lmxml_04.cc // version: $Id: lmxml_04.cc 10420 2006-02-06 20:39:23Z may $ //isip include files // #include "LanguageModelXML.h" #ifndef ISIP_XMLTOKEN #include #endif #ifndef ISIP_DIGRAPH #include #endif #ifndef ISIP_SINGLE_LINKED_LIST #include #endif // method: write // // arguments: // Sof& sof: (input) sof file object // int32 tag: (input) sof object instance tag // const String& name: (input) sof object instance name // // return: bool8 value indicating status // // this method has the object write itself to an Sof file // bool8 LanguageModelXML::write(Sof& sof_a, int32 tag_a, const String& name_a) const { int32 obj_size = 0; // figure out size // if (sof_a.isText()) { //dynamic for text // obj_size = Sof::ANY_SIZE; } else { // binary is not yet implemented. // all this does is set the size to the size of hg_d // obj_size = sofSize(); } // write object into sof index // if (!sof_a.put(name_a, tag_a, obj_size)) { return false; } // exit gracefully // return writeData(sof_a); } // method: writeData // // arguments: // Sof& sof: (input) sof file object // const String& pname: (input) parameter name // // return: bool8 value indicating status // // this method has the object write itself to an Sof file. // bool8 LanguageModelXML::writeData(Sof& sof_a, const String& pname_a) const { // copy of hg_d // HierarchicalDigraph h_digraph(hg_d); // loop over each level // for (int32 level = 0; level < h_digraph.length(); level++) { // write level tag // writeLevelTag(sof_a, level, h_digraph(level).getLevelTag()); // write the grammars // writeGrammars(sof_a, level, h_digraph); // write the nonspeech boundary symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_NONSPEECH_BOUNDARY_SYMBOL, h_digraph(level).getNonSpeechBoundarySymbolTable()); // write the nonspeech internal symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_NONSPEECH_INTERNAL_SYMBOL, h_digraph(level).getNonSpeechInternalSymbolTable()); // write the dummy symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_DUMMY_SYMBOL, h_digraph(level).getDummySymbolTable()); // write the exclude symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_EXCLUDE_SYMBOL, h_digraph(level).getExcludeSymbolTable()); // write the nsymbol exclude symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_NSYMBOL_EXCLUDE_SYMBOL, h_digraph(level).getNSymbolExcludeSymbolTable()); // write the spenalty exclude symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_SPENALTY_EXCLUDE_SYMBOL, h_digraph(level).getSPenaltyExcludeSymbolTable()); // write the context less symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_CONTEXTLESS_SYMBOL, h_digraph(level).getContextLessSymbolTable()); // write the skip symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_SKIP_SYMBOL, h_digraph(level).getSkipSymbolTable()); // write the non adaptation symbols // writeSymbols(sof_a, level, SearchLevel::PARAM_NON_ADAPT_SYMBOL, h_digraph(level).getNonAdaptSymbolTable()); } return true; } // method: writeLevelTag // // arguments: // Sof& sof_a: (input) sof file object // int32 level_a: (input) level of IHD // const String& tag_a: IHD level tag // // return: bool8 value indicating status // // this method writes the level tag of an IHD level // bool8 LanguageModelXML::writeLevelTag(Sof& sof_a, int32 level_a, const String& tag_a) const { String pname(L"search_tag"); String level_name(L"level"); if (level_a != DEF_LEVEL) { pname.concat(PARAM_UNDERSCORE); pname.concat((Long)level_a); level_name.concat((Long)level_a); } Vector search_tag; XMLToken temp_token; // variable to store root attribute // Vector< Pair > root_attrib(1); root_attrib(0).assign(ROOT_ATTRIB, level_name); // variable to store id attribute // Vector< Pair > id_attrib(1); id_attrib(0).assign(ID_ATTRIB, level_name); // create start grammar tag // temp_token.init(XMLToken::START_TAG, GRAMMAR, root_attrib, (int32)0); search_tag.concat(temp_token); // create start rule tag // temp_token.init(XMLToken::START_TAG, RULE, id_attrib, 1); search_tag.concat(temp_token); // create start item tag // temp_token.init(XMLToken::START_TAG, ITEM, 2); search_tag.concat(temp_token); // create cdata tag // if (!tag_a.eq(L"")) { temp_token.init(XMLToken::CDATA, tag_a, 3); search_tag.concat(temp_token); } else { temp_token.init(XMLToken::CDATA, level_name, 3); search_tag.concat(temp_token); } // create end item tag // temp_token.init(XMLToken::END_TAG, ITEM, 2); search_tag.concat(temp_token); // create end rule tag // temp_token.init(XMLToken::END_TAG, RULE, 1); search_tag.concat(temp_token); // create end grammar tag // temp_token.init(XMLToken::END_TAG, GRAMMAR, (int32)0); search_tag.concat(temp_token); // convert to string and write to file // String return_string; return_string = tokensToString( search_tag ); return return_string.writeData(sof_a, pname); } // method: writeSymbols // // arguments: // Sof& sof_a: (input) sof file object // int32 level_a: level being written // const String& pname_a: parameter name for output // Vector symbol_list_a: symbols to be written // // return: bool8 value indicating status // // this method writes a vector of symbols to a file as // an XML grammar // bool8 LanguageModelXML::writeSymbols(Sof& sof_a, int32 level_a, const String& pname_a, Vector symbol_list_a) const { // don't write anything if there are no symbols. // if (symbol_list_a.length() == 0 && sof_a.isText()) { return false; } // form the parameter name // String param_name(pname_a); if (level_a != DEF_LEVEL) { param_name.concat(PARAM_UNDERSCORE); param_name.concat((Long)level_a); } // variable to store symbol grammar // Vector grammar; // token to temporarily store working token // XMLToken temp_token; // variable to store root attribute // Vector< Pair > root_attrib(1); root_attrib(0).assign(ROOT_ATTRIB, pname_a); // variable to store id attribute // Vector< Pair > id_attrib(1); id_attrib(0).assign(ID_ATTRIB, pname_a); // create and add start grammar token // temp_token.init(XMLToken::START_TAG, GRAMMAR, root_attrib, 0); grammar.concat(temp_token); // create and add start rule token // temp_token.init(XMLToken::START_TAG, RULE, id_attrib, 1); grammar.concat(temp_token); // loop through all the symbols, create tokens for each, // and add them to the grammar // for (int32 i = 0; i < symbol_list_a.length(); i++) { // create and add start tag // temp_token.init(XMLToken::START_TAG, ITEM, 2); grammar.concat(temp_token); // create and add cdata token // temp_token.init(XMLToken::CDATA, symbol_list_a(i), 3); grammar.concat(temp_token); // create and add end tag // temp_token.init(XMLToken::END_TAG, ITEM, 2); grammar.concat(temp_token); } // create and add end rule token // temp_token.init(XMLToken::END_TAG, RULE, 1); grammar.concat(temp_token); // create and add end grammar token // temp_token.init(XMLToken::END_TAG, GRAMMAR, 0, Integral::RESET); grammar.concat(temp_token); // convert to string and write to file // String return_string; return_string = tokensToString( grammar ); return return_string.writeData(sof_a, param_name); } // method: writeGrammars // // arguments: // Sof& sof_a: (input) sof file object // int32 level_a: level being written // HierarchicalDigraph& h_digraph_a: digraph being converted // // return: bool8 value indicating status // // this method writes all of the grammars in a level to // a file. // bool8 LanguageModelXML::writeGrammars(Sof& sof_a, int32 level_a, HierarchicalDigraph& h_digraph_a) const { // variable to store all grammars in level // Vector all_grammars; // variable to store number of grammars in level // int32 num_grammars = grammars_d(level_a).length(); for (int32 i=0; i grammar; // get grammar // grammar=grammars_d(level_a)(i); // add grammar to vector // all_grammars.concat(tokensToString(grammar)); } // form the parameter name // String param_name(PARAM_GRAMMARS); if (level_a != DEF_LEVEL) { param_name.concat(PARAM_UNDERSCORE); param_name.concat((Long)level_a); } // write all grammars to file // return all_grammars.writeData(sof_a, param_name); } // method: removeRedundantItemTags // // arguments: // Vector& token_vector_a: token vector // // return: token vector // // this method finds and removes all redundant item tags from // the input vector. NOTE: it does not correct the depths. This // is not necessary, as the token-vector -> string conversion // correctly adjusts depths // Vector LanguageModelXML::removeRedundantItemTags( Vector& token_vector_a) const { Vector return_vector; Vector remove_tags; // mark tokens for removal // for (int32 i=0; i<(token_vector_a.length()-1); i++) { if (token_vector_a(i).isA(XMLToken::START_TAG) && token_vector_a(i).getValue().eq(ITEM) && token_vector_a(i+1).isA(XMLToken::START_TAG) && token_vector_a(i+1).getValue().eq(ITEM)) { int32 j=0; int32 k=0; j=constFindFollowingTagAtDepth(token_vector_a, i, token_vector_a(i).getDepth(), XMLToken::END_TAG, ITEM); k=constFindFollowingTagAtDepth(token_vector_a, i+1, token_vector_a(i+1).getDepth(), XMLToken::END_TAG, ITEM); if (k==(j-1)) { remove_tags.concat((Long)i); remove_tags.concat((Long)j); } } } // loop through and copy all tokens except // tokens to be removed bool8 merge_flag=false; for (Long i=0; i(Long)0) { merge_flag=false; Vector < Pair > merged_attributes; merged_attributes=token_vector_a((int32)i).getAttributes(); merged_attributes.concat(token_vector_a((int32)(i-(Long)1)).getAttributes()); return_vector(return_vector.length()-1).setAttributes(merged_attributes); } } } return return_vector; } // method: constFindFollowingTagAtDepth // // arguments: // Vector& token_vector: (input) the token vector // in which we want to search // // int32 start_index: (input) the index of the token from // which the forward search begins // // int32 base_depth: (input) the depth of the token we wish // to find // // XMLToken::TYPE type: (input) the type of the token we wish // to find // // String value: (input) the value of the token we wish to find // // return: the index of the token that was found // // this is a const version of findFollowingTagAtDepth. // // this general purpose method finds a tag which matches any of the // search criteria which are (optionally) specified. it iterates // forward starting at start_index_a until a token is found whose // depth, type, and value match the search criteria // int32 LanguageModelXML::constFindFollowingTagAtDepth(Vector& token_vector_a, int32 start_index_a, int32 base_depth_a, XMLToken::TYPE type_a, String value_a) const { // prevent vector out of bounds exceptions and unreasonable // depths // if(start_index_a>token_vector_a.length() || base_depth_a < 0) { return Integral::NO_POS; } // iterate backwards until tag is found whose // total depth matches the argument base_depth // and whose other parameters match the specified // search criteria (given by args value_a and // type_a // for better code readability // store the current token // XMLToken current_token; // loop from the start_index until index 0, which is the // very first token in the token_vertex_vector // for(int32 i= start_index_a; i < token_vector_a.length(); i++) { current_token = token_vector_a(i); // if type is null and value is null and depth matches, return // if(type_a == XMLToken::NULL_TAG && value_a.eq(String::EMPTY) && base_depth_a == current_token.getDepth()) { return i; } // if type is null and value matches and depth matches, return // else if(type_a == XMLToken::NULL_TAG && current_token.isA(value_a) && base_depth_a == current_token.getDepth()) { return i; } // if type matches and value is null and depth matches, return // else if (current_token.isA(type_a) && value_a.eq(String::DEF_VALUE) && base_depth_a == current_token.getDepth()) { return i; } // if type matches and value matches and depth matches, return // else if (current_token.isA(type_a) && current_token.isA(value_a) && base_depth_a == current_token.getDepth()) { return i; } }// end looping over all indeces // if we get here, no tag was found // return Integral::NO_POS; } // method: tokensToString // // arguments: // Sof& sof: Vector& token_vector_a // // return: string containing formatted XML. // // this method takes a vector of xml tokens and // returns a formatted string of XML to be written // to a file. // String LanguageModelXML::tokensToString(Vector& token_vector_a) const { // remove redundant item tags // token_vector_a=removeRedundantItemTags(token_vector_a); // string to hold formatted grammar // String grammar; // prepare grammar // grammar.concat(XML_VERSION_TAG); grammar.concat(L"\n"); int depth=0; // loop through the vector and create the string // for (int32 i=0; i CDATA combo is found, // print it all on one line instead of nesting it // over several lines // if (token_vector_a(i).isA(XMLToken::START_TAG) && token_vector_a(i).getValue().eq(ITEM) && token_vector_a(i+1).isA(XMLToken::CDATA) && token_vector_a(i+2).isA(XMLToken::END_TAG) && token_vector_a(i+2).getValue().eq(ITEM)) { grammar.concat(token_vector_a(i).toXML()); grammar.concat(L" "); i++; grammar.concat(token_vector_a(i).getValue()); grammar.concat(L" "); i++; grammar.concat(token_vector_a(i).toXML()); depth--; } else { grammar.concat(token_vector_a(i).toXML()); } grammar.concat(L"\n"); } return grammar; }