// file: $isip/class/pr/LanguageModelXML/lmxml_08.cc // version: $Id: lmxml_08.cc 10429 2006-02-14 22:35:30Z wholland $ // ISIP include files // #include "LanguageModelXML.h" // method: setRuleModel // // arguments: // RuleModel& rm_a: rule model to convert // // return: a bool8 value indicating status // // this method initiates the BNF->XML conversion. the resulting // xml grammar is stored in memory // bool8 LanguageModelXML::setRuleModel(const RuleModel& rm_a) { if (debug_level_d>Integral::BRIEF) { Console::put(L"performing BNF->XML conversion"); Console::increaseIndention(); } // clear the ihd and grammars // hg_d.clear(Integral::RESET); grammars_d.clear(Integral::RESET); // first thing's first, save the ihd // hg_d=rm_a.second(); int32 i, j; // loop through each level // for (i=0; i > this_level; if (debug_level_d>Integral::BRIEF) { String out_string(L"level "); out_string.concat((Long)i); Console::put(out_string); Console::increaseIndention(); } // loop through each graph and perform conversion // for (j=0; jIntegral::BRIEF) { String out_string(L"graph "); out_string.concat((Long)j); Console::put(out_string); Console::increaseIndention(); } this_level.concat(convertBNFtoXML(rm_a.first()(i)(j))); if (debug_level_d>Integral::BRIEF) { Console::put(L"XML equivalent:"); Console::increaseIndention(); Console::put(tokensToString(this_level(j))); Console::decreaseIndention(); } } // concat converted grammars onto storage vector // grammars_d.concat(this_level); } return true; } // method: convertBNFtoXML // // arguments: // ProductionRuleSet prset_a: production rules to convert // // return: Vector containing equivalent XML grammar // // this method performs the BNF->XML conversion. this method assumes // that the root rule is marked appropriately. // the root rule is defined as the rule that defines the entire // set (with the aid of other rules). typically, the root rule has // a rule name of "S" (for set). // Vector LanguageModelXML::convertBNFtoXML(ProductionRuleSet prset_a) { ProductionRuleSet working_set; working_set.assign(prset_a); Vector ret_xml; // if we have an empty grammar, add an // empty grammar to the xml and return // if (prset_a.length()==0 || (prset_a.length()==1 && prset_a(0).getRuleType()==ProductionRule::NORMAL && prset_a(0).getRuleName().eq(L""))) { XMLToken empty_grammar; empty_grammar.init(XMLToken::START_AND_END_TAG, GRAMMAR); ret_xml.concat(empty_grammar); return ret_xml; } // figure out the root rule // String root_rule_uri; int32 i, j; for (i=0; i=prset_a.length()) { root_rule_uri=prset_a(0).getRuleName(); } else { for (; i > attrib_array; Pair root_attrib; root_attrib.first().assign(ROOT); root_attrib.second().assign(root_rule_uri); attrib_array.concat(root_attrib); start_grammar_tag.init(XMLToken::START_TAG, GRAMMAR, attrib_array); // add the start grammar tag // ret_xml.concat(start_grammar_tag); // if we have multiple starts, go ahead and // take care of the start rule. otherwise, the // code below this will handle it // if (multiple_starts) { ProductionRuleSet finished_set; ProductionRuleSet return_set; int32 i; for (i=0; i containing all rules named rule_name_a // // this method extracts all rules named rule_name_a from the given set // of production rules. note that these rules are removed from the // input set // ProductionRuleSet LanguageModelXML::extractRule(const String& rule_name_a, ProductionRuleSet& prset_a) const { ProductionRuleSet finished_set; ProductionRuleSet return_set; int32 i; for (i=0; i& token_vector_a: vector to which to add the xml rule // // return: bool8 value indicating status // // this method converts a set of production rule alternatives (production // rules having the same left-hand side) into a single xml format rule // bool8 LanguageModelXML::addRule(ProductionRuleSet& prset_a, Vector& token_vector_a) const { if (prset_a.length()==0) { return true; } int32 i; int32 depth=1; String rule_name; rule_name.assign(prset_a(0).getRuleName()); // add start rule tag // addStartRuleTag(token_vector_a, rule_name, depth); // if there's only one possible expansion, we don't need // a one-of // if (prset_a.length()==1) { addAlternative(prset_a(0), token_vector_a, depth); } // otherwise, each rule is nested inside a one-of // else { // add the start one-of // addStartBranchTag(token_vector_a, depth); for (i=0; i& token_vector_a: vector to which to add the xml rule // // return: bool8 value indicating status // // this method converts a single production rule into xml format // bool8 LanguageModelXML::addAlternative(ProductionRule& pr_a, Vector& token_vector_a, int32 depth_a) const { // if we can go to the first token, the rule is not empty // and we can proceed. if we can't go to the first token, // the rule is empty and we add an epsilon // if (pr_a.gotoFirst()) { // check the type of the first token (it should be one // of these three) and handle it accordingly // if (pr_a.getType()==ProductionRuleTokenType::TERMINAL) { addCDATA(token_vector_a, pr_a.getValue(), depth_a); } else if (pr_a.getType()==ProductionRuleTokenType::NON_TERMINAL) { addRuleRef(token_vector_a, pr_a.getValue(), depth_a); } else if (pr_a.getType()==ProductionRuleTokenType::EPSILON) { addDummyItem(token_vector_a, depth_a); } // even though in theory we should only have at most one // concatenation and one other symbol, we'll allow them // to concat as many as they like // while (pr_a.gotoNext()) { // if the symbol isn't concatenation, skip it // if (pr_a.getType()==ProductionRuleTokenType::CONCATENATION) { // make sure there's a next symbol before proceeding // if (pr_a.gotoNext()) { // if it's a terminal symbol, handle it appropriately // if (pr_a.getType()==ProductionRuleTokenType::TERMINAL) { pr_a.gotoPrev(); // if it has no weight, make it plain ol' cdata // if (pr_a.getWeight().eq(0)) { pr_a.gotoNext(); addCDATA(token_vector_a, pr_a.getValue(), depth_a); } // otherwise, enclose it in one-of and item tags // else { // because of the screwy XML rules, we have to add // a one-of around this item to give it a weight // addStartBranchTag(token_vector_a, depth_a); addStartItemTag(token_vector_a, depth_a, pr_a.getWeight()); pr_a.gotoNext(); addCDATA(token_vector_a, pr_a.getValue(), depth_a); addEndItemTag(token_vector_a, depth_a); addEndBranchTag(token_vector_a, depth_a); } } // if it's a non-terminal, handle it appropriately // else if (pr_a.getType()==ProductionRuleTokenType::NON_TERMINAL) { pr_a.gotoPrev(); // if it has no weight, make it plain ol' cdata // if (pr_a.getWeight().eq(0)) { pr_a.gotoNext(); addRuleRef(token_vector_a, pr_a.getValue(), depth_a); } // otherwise, enclose it in one-of and item tags // else { // because of the screwy XML rules, we have to add // a one-of around this item to give it a weight // addStartBranchTag(token_vector_a, depth_a); addStartItemTag(token_vector_a, depth_a, pr_a.getWeight()); pr_a.gotoNext(); addRuleRef(token_vector_a, pr_a.getValue(), depth_a); addEndItemTag(token_vector_a, depth_a); addEndBranchTag(token_vector_a, depth_a); } } } } } } else { addDummyItem(token_vector_a, depth_a); } return true; } // method: addRuleRef // // arguments: // Vector& token_vector_a: vector to store tag in // String rule_name_a: rule name // int32& depth_a: depth to add token at // // return: bool8 value indicating status // // this method adds a ruleref with name rule_name_a to the token // vector // bool8 LanguageModelXML::addRuleRef(Vector& token_vector_a, String rule_name_a, int32& depth_a) const { XMLToken temp_token; String temp_string; temp_string.assign(L"#"); temp_string.concat(rule_name_a); // attribute vector for start rule tag // Vector< Pair > attrib_array; Pair attrib_pair; attrib_pair.assign(URI, temp_string); attrib_array.concat(attrib_pair); temp_token.init(XMLToken::START_AND_END_TAG, RULEREF, attrib_array, depth_a); token_vector_a.concat(temp_token); return true; } // method: addStartRuleTag // // arguments: // Vector& token_vector_a: vector to store tag in // String rule_name_a: rule name // int32& depth_a: depth to add token at // // return: bool8 value indicating status // // this method adds a start rule tag to the token vector with name // rule_name_a. // bool8 LanguageModelXML::addStartRuleTag(Vector& token_vector_a, String rule_name_a, int32& depth_a) const { XMLToken temp_token; String temp_string; temp_string.assign(rule_name_a); // attribute vector for start rule tag // Vector< Pair > attrib_array; Pair attrib_pair; attrib_pair.assign(ID, temp_string); attrib_array.concat(attrib_pair); temp_token.init(XMLToken::START_TAG, RULE, attrib_array, depth_a++); token_vector_a.concat(temp_token); return true; } // method: addEndRuleTag // // arguments: // Vector& token_vector_a: vector to store tag in // int32& depth_a: depth to add token at // // return: bool8 value indicating status // // this method adds an end rule tag to the token vector // bool8 LanguageModelXML::addEndRuleTag(Vector& token_vector_a, int32& depth_a) const { XMLToken temp_token; temp_token.init(XMLToken::END_TAG, RULE, --depth_a); token_vector_a.concat(temp_token); return true; } // method: addDummyItem // // arguments: // Vector& token_vector_a: vector to store tag in // int32& depth_a: depth to add token at // // return: bool8 value indicating status // // this method adds a NULL ruleref to token_vector_a // bool8 LanguageModelXML::addDummyItem(Vector& token_vector_a, int32& depth_a) const { XMLToken temp_token; Vector< Pair > attrib_array; Pair special_attrib(SPECIAL, SPECIAL_NULL); attrib_array.concat(special_attrib); // create and add token // temp_token.init(XMLToken::START_AND_END_TAG, RULEREF, attrib_array, depth_a); token_vector_a.concat(temp_token); return true; } // method: addCDATA // // arguments: // Vector& token_vector_a: vector to store tag in // int32& depth_a: depth to add token at // // return: bool8 value indicating status // // this method adds CDATA to token_vector_a // bool8 LanguageModelXML::addCDATA(Vector& token_vector_a, const String& cdata_a, int32& depth_a) const { XMLToken temp_token; // create and add token // temp_token.init(XMLToken::CDATA, cdata_a, depth_a); token_vector_a.concat(temp_token); return true; } // method: addStartItemTag // // arguments: // Vector& token_vector_a: working token vector // int32 depth_a: depth // bool8 repeat_a: flag indicating whether to use repeat attribute // // return: bool8 value indicating status // // this method adds a start item tag to the token vector // bool8 LanguageModelXML::addStartItemTag(Vector& token_vector_a, int32& depth_a, float32 weight_a, bool8 repeat_a, float32 repeat_prob_a) const { XMLToken temp_token; String weight_str; weight_str.concat((Float)weight_a); String repeat_prob_str; repeat_prob_str.concat((Float)repeat_prob_a); Vector< Pair > attrib_array; Pair repeat_attrib(REPEAT, REPEAT_LOOP_BACK); Pair weight_attrib(WEIGHT, weight_str); Pair repeat_prob_attrib(REPEAT_PROB, repeat_prob_str); // set up attributes // if (repeat_a) { attrib_array.concat(repeat_attrib); } if (!((Float)weight_a).eq(1)) { attrib_array.concat(weight_attrib); } if (repeat_a && !((Float)repeat_prob_a).eq(1)) { attrib_array.concat(repeat_prob_attrib); } // create and add start tag // temp_token.init(XMLToken::START_TAG, ITEM, attrib_array, depth_a++); token_vector_a.concat(temp_token); return true; } // method: addEndItemTag // // arguments: // Vector& token_vector_a: working token vector // int32 depth_a: depth // // return: bool8 value indicating status // // this method adds an end item tag to the token vector // bool8 LanguageModelXML::addEndItemTag(Vector& token_vector_a, int32& depth_a) const { XMLToken temp_token; // create and add end tag // temp_token.init(XMLToken::END_TAG, ITEM, --depth_a); token_vector_a.concat(temp_token); return true; } // method: addStartBranchTag // // arguments: // Vector& token_vector_a: working token vector // int32 depth_a: depth // // return: bool8 value indicating status // // this method adds a start branch tag to the token vector // bool8 LanguageModelXML::addStartBranchTag(Vector& token_vector_a, int32& depth_a) const { XMLToken temp_token; // create and add start tag // temp_token.init(XMLToken::START_TAG, ONE_OF, depth_a++); token_vector_a.concat(temp_token); return true; } // method: addEndBranchTag // // arguments: // Vector& token_vector_a: working token vector // int32 depth_a: depth // // return: bool8 value indicating status // // this method adds an end branch tag to the token vector // bool8 LanguageModelXML::addEndBranchTag(Vector& token_vector_a, int32& depth_a) const { XMLToken temp_token; // create and add end tag // temp_token.init(XMLToken::END_TAG, ONE_OF, --depth_a); token_vector_a.concat(temp_token); return true; } // method: getXMLModel // // arguments: // none // // return: an xml rule model // // this method returns the XML model in memory // Pair< Vector< Vector< Rule > >, HierarchicalDigraph> LanguageModelXML::getXMLModel() { Pair< Vector< Vector< Rule > >, HierarchicalDigraph> ret; ret.first().assign(grammars_d); ret.second().assign(hg_d); return ret; } // method: setXMLModel // // arguments: // Pair< Vector< Vector< Rule > >, HierarchicalDigraph> xml_model_a: model // // return: bool8 value indicating status // // this method sets the XML model in memory to the specified model // bool8 LanguageModelXML::setXMLModel(Pair< Vector< Vector< Rule > >, HierarchicalDigraph> xml_model_a) { return (grammars_d.assign(xml_model_a.first()) && hg_d.assign(xml_model_a.second())); }