// file: $isip/class/pr/LanguageModelABNF/lmabnf_06.cc // version: $Id: lmabnf_06.cc 10529 2006-03-29 19:40:48Z may $ // // isip include files // #include "LanguageModelABNF.h" // method: findRightSymbols // // arguments: // ProductionRuleSet rules_a: (input) set of rules representing a graph // int32 index_a: (input) the current rule // int32 nest_level_a: (input) the current nesting level at this position // int32 position_a: (input) the position in the rule // Float weight_a: (input) the arc weight for these right symbols // // return: a WeightedSymbols (see typedef in header file) object containing // all right symbols and their associated arc weights // // this method finds all of the symbols in rule at index_a directly to the // right of position_a // WeightedSymbols LanguageModelABNF::findRightSymbols(ProductionRuleSet& rules_a, int32 index_a, int32 nest_level_a, int32 position_a, Float weight_a) { WeightedSymbols right_symbols; ProductionRule current_rule = rules_a(index_a); if (!current_rule.gotoPosition(++position_a)) { Error::handle(L"LanguageModelABNF", L"findRightSymbols - unable to find right symbols", Error::ERROR, __FILE__, __LINE__); } if (current_rule.getType() ==ProductionRuleTokenType::TERMINAL) { Pair symbol(current_rule.getValue(), weight_a); right_symbols.concat(symbol); } else if (current_rule.getType() ==ProductionRuleTokenType::NON_TERMINAL) { for (int32 i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(current_rule.getValue())) { // since we want to start at the first token of the rule referenced // by the non_terminal, we set position_a argment to be -1 since // it will be incremented to 0 when findRightSymbols is called // right_symbols.concat(findRightSymbols(rules_a, i, nest_level_a, -1, weight_a)); } } } else if (current_rule.getType() ==ProductionRuleTokenType::OPEN_PAREN) { int32 alternation_index = position_a; int32 temp_position = position_a; Float weight = weight_a; do { current_rule.gotoPosition(alternation_index); // this open parenthesis token has a valid weight if the following // token is a non_terminal // if (current_rule.gotoNext()) { if (current_rule.getType() == ProductionRuleTokenType::NON_TERMINAL) { current_rule.gotoPrev(); weight.assign(current_rule.getWeight()); } } right_symbols.concat(findRightSymbols(rules_a, index_a, nest_level_a, alternation_index, weight)); alternation_index = findToken(current_rule, alternation_index, ProductionRuleTokenType::ALTERNATION, RIGHT); //weight.assign(current_rule.getWeight()); alternation_index++; } while (alternation_index > 0); position_a = temp_position; } else if (current_rule.getType() ==ProductionRuleTokenType::KLEENE_STAR) { int32 concat_index; right_symbols.concat(findRightSymbols(rules_a, index_a, nest_level_a, position_a, weight_a)); concat_index = findToken(current_rule, position_a, ProductionRuleTokenType::CONCATENATION, RIGHT); if (concat_index >= 0) { right_symbols.concat(findRightSymbols(rules_a, index_a, nest_level_a, concat_index, weight_a)); } else { Error::handle(className(), L"findRightSymbols - right concatenation not found", Error::ERROR, __FILE__, __LINE__); } } else if (current_rule.getType() ==ProductionRuleTokenType::KLEENE_PLUS) { right_symbols.concat(findRightSymbols(rules_a, index_a, nest_level_a, position_a, weight_a)); } else if (current_rule.getType() ==ProductionRuleTokenType::EPSILON) { Pair symbol(ProductionRule::TERM_RULE_NAME, weight_a); right_symbols.concat(symbol); } return right_symbols; } // method: findLeftSymbols // // arguments: // ProductionRuleSet rules_a: (input) set of rules representing a graph // int32 index_a: (input) the current rule // int32 nest_level_a: (input) the current nesting level at this position // int32 position_a: (input) the position in the rule // // return: a Vector object containing // all left symbols // // this method finds all of the symbols in rule at index_a directly to the // left of position_a // Vector LanguageModelABNF::findLeftSymbols(ProductionRuleSet& rules_a, int32 index_a, int32 nest_level_a, int32 position_a) { Vector left_symbols; ProductionRule current_rule = rules_a(index_a); if (!current_rule.gotoPosition(--position_a)) { Error::handle(className(), L"findLeftSymbols - unable to find left symbols", Error::ERROR, __FILE__, __LINE__); } if (current_rule.getType() ==ProductionRuleTokenType::TERMINAL) { SearchSymbol symbol = current_rule.getValue(); left_symbols.concat(symbol); } else if (current_rule.getType() ==ProductionRuleTokenType::NON_TERMINAL) { for (int32 i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(current_rule.getValue())) { int32 last_token = rules_a(i).length() - 1; left_symbols.concat(findLeftSymbols(rules_a, i, nest_level_a, last_token)); } } } else if (current_rule.getType() ==ProductionRuleTokenType::CLOSE_PAREN) { int32 alternation_index = position_a; do { left_symbols.concat(findLeftSymbols(rules_a, index_a, nest_level_a, alternation_index)); alternation_index = findToken(current_rule, alternation_index, ProductionRuleTokenType::ALTERNATION, LEFT); alternation_index--; } while (alternation_index > 0); } return left_symbols; } // method: buildNormBNFRules // // arguments: // Vector left_symbols_a: (input) the left symbols with // which the normalized BNF rules will be created // WeightedSymbols right_symbols_a: (input) the right symbols and // weights with which the normalized BNF rules wil be created // // ProductionRuleSet bnf_rules_a: (output) the resulting normalized BNF rules // // return: a bool8 indicating status // bool8 LanguageModelABNF::buildNormBNFRules(Vector left_symbols_a, WeightedSymbols right_symbols_a, ProductionRuleSet& bnf_rules_a) { if (left_symbols_a.length() == 0 && right_symbols_a.length() == 0) { // no need to make rules // return true; } else if (left_symbols_a.length() == 0) { return Error::handle(className(), L"no left symbols to connect to right symbols", Error::WARNING, __FILE__, __LINE__); } else if (right_symbols_a.length() == 0) { return Error::handle(className(), L"no right symbols to connect to left symbols", Error::WARNING, __FILE__, __LINE__); } for (int32 i=0; i < left_symbols_a.length(); i++) { for (int32 j=0; j < right_symbols_a.length(); j++) { ProductionRule new_rule; String rule_name(ProductionRule::RULE_NAME_BASE); String non_terminal_name(ProductionRule::RULE_NAME_BASE); rule_name.concat(left_symbols_a(i)); // build the rule // new_rule.setRuleName(rule_name); new_rule.append(ProductionRuleTokenType::TERMINAL, left_symbols_a(i)); new_rule.append(ProductionRuleTokenType::CONCATENATION, ProductionRule::DEF_RULE_NAME, right_symbols_a(j).second()); // check to see if the right symbol is the symbol indicating // the end of the graph // if (right_symbols_a(j).first().eq(ProductionRule::TERM_RULE_NAME)) { // create a rule with the non_terminal referencing the // end rule // non_terminal_name = ProductionRule::TERM_RULE_NAME; new_rule.append(ProductionRuleTokenType::NON_TERMINAL, non_terminal_name); if (!bnf_rules_a.contains(&new_rule)) { bnf_rules_a.concat(new_rule); } // create the end rule // new_rule.clear(); new_rule.setRuleName(ProductionRule::TERM_RULE_NAME); new_rule.append(ProductionRuleTokenType::EPSILON, ProductionRule::DEF_RULE_NAME); if (!bnf_rules_a.contains(&new_rule)) { bnf_rules_a.concat(new_rule); } } else { // add a non_terminal referencing the rule corresponding to // the right symbol // non_terminal_name.concat(right_symbols_a(j).first()); new_rule.append(ProductionRuleTokenType::NON_TERMINAL, non_terminal_name); bnf_rules_a.concat(new_rule); } } } return true; } // method: findEndOfNestLevel // // arguments: // ProductionRule current_rule_a: (input) the current rule // int32 index_a: (input) the current position within the rule // DIRECTION dir_a: (input) the direction to search for the end of the nest // level // // return: an integer indicating the position of the end of the nest level // // this method starts from the position in the rule indicated by index_a and // searches in the direction dir_a for the end of the current nest level // int32 LanguageModelABNF::findEndOfNestLevel(ProductionRule current_rule_a, int32 index_a, DIRECTION dir_a) { int32 relative_nest_level = 0; current_rule_a.gotoPosition(index_a); while (relative_nest_level >= 0) { if (dir_a == RIGHT) { if (!current_rule_a.gotoNext()) { return index_a; } else if (current_rule_a.getType() == ProductionRuleTokenType::OPEN_PAREN) { relative_nest_level++; } else if (current_rule_a.getType() == ProductionRuleTokenType::CLOSE_PAREN) { relative_nest_level--; } index_a++; } if (dir_a == LEFT) { if (!current_rule_a.gotoPrev()) { return index_a; } else if (current_rule_a.getType() == ProductionRuleTokenType::OPEN_PAREN) { relative_nest_level--; } else if (current_rule_a.getType() == ProductionRuleTokenType::CLOSE_PAREN) { relative_nest_level++; } index_a--; } } return index_a; } // method: findToken // // arguments: // ProductionRule current_rule_a: (input) the current rule // int32 index_a: (input) the current position within the rule // ProductionRuleTokenType::TYPE token_a: (input) the token we're looking for // DIRECTION dir_a: (input) the direction to search for the token // // return: an integer indicating the position of the token // // this method starts from the position in the rule indicated by index_a and // searches in the direction dir_a for token_a at the current nesting level. // int32 LanguageModelABNF::findToken(ProductionRule current_rule_a, int32 index_a, ProductionRuleTokenType::TYPE token_a, DIRECTION dir_a) { int32 relative_nest_level = 0; current_rule_a.gotoPosition(index_a); while (relative_nest_level >= 0) { if (dir_a == RIGHT) { if (!current_rule_a.gotoNext()) { return -1; } else if (current_rule_a.getType() == ProductionRuleTokenType::OPEN_PAREN) { relative_nest_level++; } else if (current_rule_a.getType() == ProductionRuleTokenType::CLOSE_PAREN) { relative_nest_level--; } else if (current_rule_a.getType() == token_a && relative_nest_level == 0) { return index_a; } index_a++; } if (dir_a == LEFT) { if (!current_rule_a.gotoPrev()) { return index_a; } else if (current_rule_a.getType() == ProductionRuleTokenType::OPEN_PAREN) { relative_nest_level--; } else if (current_rule_a.getType() == ProductionRuleTokenType::CLOSE_PAREN) { relative_nest_level++; } else if (current_rule_a.getType() == token_a && relative_nest_level == 0) { return index_a; } index_a--; } } return -1; } // method: restoreSymbols // // arguments: // ProductionRuleSet rules_a: (output) the rules for which we're // replacing symbols // Vector > chart_a: (input) the chart // matching original symbols with substituted // symbols // // return: a bool8 indicating status // // this method replaces the symbols we substituded with the original symbols // bool8 LanguageModelABNF::restoreSymbols(ProductionRuleSet& rules_a, Vector > chart_a) { for (int32 i=0; i < rules_a.length(); i++) { rules_a(i).gotoFirst(); // loop over all tokens // do { if (rules_a(i).getType() == ProductionRuleTokenType::TERMINAL) { for (int32 j = 0; j < chart_a.length(); j++) { if (chart_a(j).first().eq(rules_a(i).getValue())) { rules_a(i).setValue(chart_a(j).second()); } } } } while (rules_a(i).gotoNext()); } return true; } // method: createStartRules // // arguments: // // RuleModel rule_model_a: (input) the original ABNF rules for this // graph // ProductionRuleSet bnf_rules_a: (output) the resulting BNF rules with // start rules // int32 level_index_a: (input) the current level // int32 graph_index_a: (intput) the current graph at this level // // return: a bool8 indicating status // // this method creates BNF start rules for the current graph. it is // necessary to load the entire rulemodel object here since we need // to access the HierarchicalDigraph object in order to get the // graph names // bool8 LanguageModelABNF::createStartRules(RuleModel rule_model_a, ProductionRuleSet& bnf_rules_a, int32 level_index_a, int32 graph_index_a) { ProductionRuleSet abnf_rules; abnf_rules.assign(rule_model_a.first()(level_index_a)(graph_index_a)); for (int32 i = 0; i < abnf_rules.length(); i++) { WeightedSymbols right_symbols; if (abnf_rules(i).getRuleType() == ProductionRule::START) { abnf_rules(i).gotoFirst(); right_symbols = findRightSymbols(abnf_rules, i, 0, -1, abnf_rules(i).getWeight()); for (int32 j = 0; j < right_symbols.length(); j++) { ProductionRule start_rule; SearchSymbol non_terminal_name(ProductionRule::RULE_NAME_BASE); if (level_index_a > 0) { String rule_name = rule_model_a.second()(level_index_a-1).getSymbolTable()(graph_index_a); start_rule.setRuleName(rule_name); } else { start_rule.setRuleName(ProductionRule::START_RULE_NAME); } // make the rule tye START // start_rule.setRuleType(ProductionRule::START); // add a non_terminal referencing the rule corresponding to // the right symbol // non_terminal_name.concat(right_symbols(j).first()); start_rule.append(ProductionRuleTokenType::NON_TERMINAL, non_terminal_name, right_symbols(j).second()); bnf_rules_a.concat(start_rule); } } } // exit gracefully // return true; }