// file: $isip/class/pr/LanguageModelIHD/lmihd_07.cc // // system include files // #include // isip include files // #include "LanguageModelIHD.h" // method: getRuleModel // // arguments: none // // return: a RuleModel object containing BNF rules representing the IHD // // this method converts an IHD object to a set of BNF production rules and // returns a RuleModel object // RuleModel LanguageModelIHD::getRuleModel() { Vector< DiGraph > digraphs; Vector symbol_table; SingleLinkedList vertices; SingleLinkedList< Triple< Pair, Float, Boolean> > arcs; Vector< Vector< Vector > > prod_rule_model; HierarchicalDigraph hg(hg_d); // loop over the levels in the HierarchicalDigraph0 // for (int i=0; i > level_prod_rules; // loop over each graph at this level // for (int j=0; j prod_rules; // for every arc, create a rule: // // (node number)::=(node value), (branch number) // // where: // // '(node number)' = rule_name_d // (node value) = // ',' = // (branch number) = // //vertices.getCurr()->debug(L"current vertex"); // arcs.gotoFirst(); do { ProductionRule rule; if (arcs.getCurr() != NULL) { // create rule name (in the form Rxx where xx is the symbol index) // String rule_name; String symbol_name; int32 rule_index; int32 symbol_index; rule_index = (arcs.getCurr()->first()).first(); symbol_index = (arcs.getCurr()->first()).second(); if (rule_index == DiGraph::START_INDEX) { if (i <= 0) { rule.setRuleName(ProductionRule::START_RULE_NAME); } else { if (hg_d(i-1).getContextMap().length() > 0) { String temp_name; temp_name.concat((Long)j); rule.setRuleName(temp_name); } else { rule.setRuleName(hg_d(i-1).getSymbolTable()(j)); } } rule.setRuleType(ProductionRule::START); symbol_name.assign(ProductionRule::RULE_NAME_BASE); symbol_name.concat((Long)symbol_index); rule.append(ProductionRuleTokenType::NON_TERMINAL, symbol_name, arcs.getCurr()->second()); } else { // need to convert rule_index to a string Rxx where xx is the // symbol index // rule_name.assign(ProductionRule::RULE_NAME_BASE); rule_name.concat((Long)rule_index); rule.setRuleName(rule_name); vertices.gotoPosition(rule_index); symbol_name.assign(symbol_table(*vertices.getCurr())); rule.append(ProductionRuleTokenType::TERMINAL, symbol_name); // append a concatenation symbol to the rule // rule.append(ProductionRuleTokenType::CONCATENATION, ProductionRule::DEF_RULE_NAME, arcs.getCurr()->second()); if (symbol_index == DiGraph::TERM_INDEX) { // add a non_terminal that references a rule to an // epsilon symbol // symbol_name.assign(ProductionRule::TERM_RULE_NAME); rule.append(ProductionRuleTokenType::NON_TERMINAL, symbol_name); prod_rules.concat(rule); // create the epsilon rule // rule.clear(); rule.setRuleName(ProductionRule::TERM_RULE_NAME); rule.append(ProductionRuleTokenType::EPSILON, ProductionRule::DEF_RULE_NAME); } else { symbol_name.assign(ProductionRule::RULE_NAME_BASE); symbol_name.concat((Long)symbol_index); // set symbol name based on symbol index // rule.append(ProductionRuleTokenType::NON_TERMINAL, symbol_name); } } prod_rules.concat(rule); } else { rule.setRuleType(ProductionRule::NORMAL); if (i <= 0) { rule.setRuleName(ProductionRule::START_RULE_NAME); } else { if (hg_d(i-1).getContextMap().length() > 0) { rule.setRuleName((String)((Long)j)); } else { rule.setRuleName(hg_d(i-1).getSymbolTable()(j)); } } prod_rules.concat(rule); } } while (arcs.gotoNext()); level_prod_rules.concat(prod_rules); // after rules have been created for all vertices in the // graph, and the Vector object to a // Vector> object. } // add the Vector> object to a // Vector< Vector < Vector > > object. prod_rule_model.concat(level_prod_rules); } // after iterative over all levels, there will be a // Vector< Vector < Vector > > with the dimensions // (levels) x (graphs in level) x (rules in graph) // // add this object to the typedefed Pair object RuleModel as the // first component, and add the HierarchicalDigraph object as the // second component. the digraphs in the hierarchical digraph // object will be cleared before adding to the RuleModel object // Vector > graphs; // loop over each level and clear the subgraphs // for (int i = 0; i < hg.length(); i++) { hg(i).setSubGraphs(graphs); } // finally, create the RuleModel object // RuleModel rm(prod_rule_model, hg); return rm; } // method: setRuleModel // // arguments: RuleModel rm_a // // return: a bool8 indicating status // // this method converts an IHD object to a set of BNF production rules and // returns a RuleModel object // bool8 LanguageModelIHD::setRuleModel(const RuleModel& rm_a) { typedef Triple< Pair, Float, Boolean> TopoTriple; Vector< DiGraph > digraphs; SingleLinkedList vertices; SingleLinkedList arcs; // assign all HierarchicalDigraph information from RuleModel // to this class's hg_d object // hg_d.assign(rm_a.second()); // loop over all levels // for (int32 i = 0; i < rm_a.first().length(); i++) { // retrieve graphs at this level // Vector bnf_graphs(rm_a.first()(i)); Vector search_symbols = hg_d(i).getSymbolTable(); // loop over all graphs at this level // for (int32 j = 0; j < bnf_graphs.length(); j++) { Vector< Pair > nodes; DiGraph digraph; // loop over all rules in this graph // for (int32 k = 0; k < bnf_graphs(j).length(); k++) { bnf_graphs(j)(k).gotoFirst(); do { if (bnf_graphs(j)(k).length() > 0) { SearchSymbol symbol = bnf_graphs(j)(k).getValue(); // try to identify unique nodes by finding unique pairs of rule // names and terminal tokens. this is an attempt to minimize // the graph as much as possible. // Pair node(bnf_graphs(j)(k).getRuleName(), symbol); if (bnf_graphs(j)(k).getType() == ProductionRuleTokenType::TERMINAL && !nodes.contains(&node)) { nodes.concat(node); } } } while (bnf_graphs(j)(k).gotoNext()); } // insert nodes into digraph // for (int k = 0; k < nodes.length(); k++) { Ulong vertex = search_symbols.first(nodes(k).second()); digraph.insertVertex(&vertex); } // now that we have a list of unique nodes in this graph and a list of // symbols, we can construct the digraph object for this graph. the // list of symbols doesn't contain all symbols at this level yet, // but it does contain the only symbols used in this graph. // a complete symbol list isn't necessary at this point. the // indices of the symbols in the symbol list will remain constant // as we iterate over the graphs, and after all graphs have been // processed, we will have a complete list of symbols // // loop over all graphs and search for concatenation (',') symbols. // all rules are assumed to be // in normalized BNF form. the following rule formats are permitted // // (rule_name) -> (terminal) , (non_terminal) // (rule_name) -> (non_terminal) // (rule_name) -> (terminal) , (epsilon) // (rule_name) -> (epsilon) // for (int k = 0; k < bnf_graphs(j).length(); k++) { bnf_graphs(j)(k).gotoFirst(); if (bnf_graphs(j)(k).getRuleType() == ProductionRule::START) { if (bnf_graphs(j)(k).getType() == ProductionRuleTokenType::NON_TERMINAL) { float32 weight = bnf_graphs(j)(k).getWeight(); if (isEpsilon(bnf_graphs(j), bnf_graphs(j)(k).getValue())) { digraph.insertArc(digraph.getStart(), digraph.getTerm(), GraphArc::DEF_EPSILON, weight); } Vector > terminals = findTerminals(bnf_graphs(j), bnf_graphs(j)(k).getValue()); for (int l = 0; l < terminals.length(); l++) { digraph.insertArc(digraph.getStart(), digraph.getVertex(nodes.first(terminals(l))), GraphArc::DEF_EPSILON, weight); } } else { return Error::handle(L"LanguageModelIHD", L"setRuleModel: invalid start rule", Error::READ, __FILE__, __LINE__, Error::ERROR); } } else if (bnf_graphs(j)(k).getRuleType() == ProductionRule::NORMAL) { // if the first token is a terminal symbol, the following token must // be a concatenation token. following the concatenation will be // either a non_terminal symbol or an epsilon symbol. // if (bnf_graphs(j)(k).getType() == ProductionRuleTokenType::TERMINAL) { // determine which node this is // Pair vertex(bnf_graphs(j)(k).getRuleName(), bnf_graphs(j)(k).getValue()); int32 start_index = nodes.first(vertex); // move forward 1 tokens to the concatenation token // bnf_graphs(j)(k).gotoNext(); // get the weight for this concatenation token. this will be the // weight of the arc // float32 weight = bnf_graphs(j)(k).getWeight(); // move forward again to either a non_terminal or an epsilon // bnf_graphs(j)(k).gotoNext(); if (bnf_graphs(j)(k).getType() == ProductionRuleTokenType::NON_TERMINAL) { // check to see whether or not the non_terminal symbol results // in an epsilon symbol before a terminal symbol. for example: // // (non_terminal1) -> (terminal),(non_terminal2) // (non_terminal2) -> (epsilon) // if (isEpsilon(bnf_graphs(j), bnf_graphs(j)(k).getValue())) { digraph.insertArc(digraph.getVertex(start_index), digraph.getTerm(), GraphArc::DEF_EPSILON, weight); } // if it does not result in an epsilon, find all of the // connecting terminal nodes. // for example: // // (non_terminal1) -> (terminal1),(non_terminal2) // (non_terminal2) -> (terminal2), (non_terminaln) // else { Vector > terminals = findTerminals(bnf_graphs(j), bnf_graphs(j)(k).getValue()); for (int l = 0; l < terminals.length(); l++) { digraph.insertArc(digraph.getVertex(start_index), digraph.getVertex(nodes.first(terminals(l))), GraphArc::DEF_EPSILON, weight); } } } // otherwise, there is an epsilon after the terminal. // else if (bnf_graphs(j)(k).getType() == ProductionRuleTokenType::EPSILON) { digraph.insertArc(digraph.getVertex(start_index), digraph.getTerm(), GraphArc::DEF_EPSILON, weight); } // if none of the cases above, this rule is invalid // else { return Error::handle(L"LanguageModelIHD", L"setRuleModel: invalid rule", Error::READ, __FILE__, __LINE__, Error::ERROR); } } } } digraphs.concat(digraph); } hg_d(i).setSymbolTable(search_symbols); hg_d(i).setTempGraphs(digraphs); hg_d(i).convertDigraphs(); hg_d(i).convertContexts(); digraphs.clear(); } // exit gracefully // return true; } // method: isEpsilon // // arguments: Vector rules // String rule_name // // return: a bool8 indicating whether or not the non_terminal results // in an epsilon token // // determines whether a non_terminal token results in an epsilon token // bool8 LanguageModelIHD::isEpsilon(Vector rules_a, String rule_name_a) { // loop over all rules and look for rule names matching rule_name_a // for (int i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(rule_name_a)){ rules_a(i).gotoFirst(); // check to see if the first token is an epsilon. if so, return true // if (rules_a(i).getType() == ProductionRuleTokenType::EPSILON) { return true; } // check to see if the first token is a non_terminal symbol. if so, // recursivley check to see if this rule results in an epsilon; // else if (rules_a(i).getType() == ProductionRuleTokenType::NON_TERMINAL) { return isEpsilon(rules_a, rules_a(i).getRuleName()); } } } // this rule does not result in an epsilon // return false; } // method: findTerminals // // arguments: Vector rules // String rule_name // // return: a Vector > containing all // rule_name + terminal token pairs. these correspond // to unique terminal nodes // // finds all unique terminal symbols given a non_terminal symbol // Vector > LanguageModelIHD::findTerminals(Vector rules_a, String rule_name_a) { Vector > terminal_nodes; Pair terminal_node; for (int i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(rule_name_a)){ rules_a(i).gotoFirst(); if (rules_a(i).getType() == ProductionRuleTokenType::TERMINAL) { terminal_node.assign(rule_name_a, rules_a(i).getValue()); if (!terminal_nodes.contains(&terminal_node)) { terminal_nodes.concat(terminal_node); } } else if (rules_a(i).getType() == ProductionRuleTokenType::NON_TERMINAL) { terminal_nodes.concat(findTerminals(rules_a, rules_a(i).getValue())); } } } return terminal_nodes; }