// file: $isip/class/pr/LanguageModelBNF/lmbnf_07.cc // // system include files // #include // isip include files // #include "LanguageModelBNF.h" // method: minimizeGraph // // arguments: // // return: // // bool8 LanguageModelBNF::minimizeGraph() { RuleModel rm_min; Vector > graph_levels; // loop over levels // for (int32 i = 0; i < rm_d.first().length(); i++) { Vector graphs; // loop over graphs // for (int32 j = 0; j < rm_d.first()(i).length(); j++) { ProductionRuleSet current_graph = rm_d.first()(i)(j); // merge rules // for (int32 k = 0; k < current_graph.length(); k++) { int32 l = 0; while (l < current_graph.length()) { // we don't need to compare a rule to itself // if (k != l) { // since all rules below the merged rule will be // shifted up, we have to adjust the indices // appropriately // if (!mergeRules(current_graph, k, l)) { l++; } else { if (k > l) { k--; } } } else { l++; } } } graphs.concat(current_graph); } graph_levels.concat(graphs); } rm_min.assign(graph_levels, rm_d.second()); // return the minimized graph // return rm_d.assign(rm_min); } // method: mergeRules // // arguments: // // return: bool8 indicating whether or not the two rules were merged // bool8 LanguageModelBNF::mergeRules(ProductionRuleSet& rules_a, int32 index1_a, int32 index2_a) { ProductionRule rule1, rule2; String non_terminal1, non_terminal2; rule1.assign(rules_a(index1_a)); rule2.assign(rules_a(index2_a)); // get the terminal symbol from each rule (1st token). if they're not the // same, these two rules cannot be merged. // rule1.gotoFirst(); rule2.gotoFirst(); // make sure that these rules are valid BNF rules // if (!isValidBNF(rule1) || !isValidBNF(rule2)) { return false; } // if both of these are start rules, we need to check to see if they // are of the form: // // (start rule)->(non_terminal) // // and if they are, we need to see if the terminal referenced by // R0 is the same for both rules. if it is, we can merge these // start rules // if (rule1.getRuleType() == ProductionRule::START && rule1.length() == 1 && rule2.getRuleType() == ProductionRule::START && rule2.length() == 1) { Vector terminals1, terminals2; non_terminal1.assign(rule1.getValue()); non_terminal2.assign(rule2.getValue()); // get all the terminals referenced by the non_terminal // terminals1.assign(findTerminals(rules_a, non_terminal1)); terminals2.assign(findTerminals(rules_a, non_terminal2)); // if either start rule references more than one terminal, then // we can't merge them // if (terminals1.length() != 1 || terminals2.length() != 1) { return false; } // if the terminals are different, then we cant merge the start rules // if (!terminals1(0).eq(terminals2(0))) { return false; } } else { // verify that both rules are mergeable types. that is, make sure that // the rules match this form: // // (rule)->(terminal),(non_terminal) // if (!isMergeType(rule1) || !isMergeType(rule2)) { return false; } if (!rule1.getValue().eq(rule2.getValue()) || !rule1.getRuleName().eq(rule2.getRuleName())) { return false; } // get the terminal referenced by the non_terminal (last token) for // each rule. // Vector terminals1, terminals2; rule1.gotoPosition(MERGE_TYPE_LEN - 1); rule2.gotoPosition(MERGE_TYPE_LEN - 1); non_terminal1.assign(rule1.getValue()); non_terminal2.assign(rule2.getValue()); // goto the concatenation symbol // rule1.gotoPrev(); rule2.gotoPrev(); // we must make sure that the weights on each concatenation // are the same. if not, the rules cannot be merged. // if (rule1.getWeight() != rule2.getWeight()) { return false; } // get all the terminals referenced by the non_terminal // terminals1.assign(findTerminals(rules_a, non_terminal1)); terminals2.assign(findTerminals(rules_a, non_terminal2)); // check to see that the non_terminal for each node only references // one terminal. all sets of BNF rules generated by the IFC LM classes // ensure that each non_terminal only references one terminal. however, // this is not a requirement of BNF, and we can't make this assumption // when minimizing the graphs. if the non_terminal for either rule // references more than one terminal, then the two rules cannot be merged // if (terminals1.length() != 1 || terminals2.length() != 1) { return false; } // if the terminal node isn't the same, we can't merge // if (!terminals1(0).eq(terminals2(0))) { return false; } } // check to see if rule1 directly references rule2. if so, we can only // merge the two rules if one of the rules references itself, i.e. has // a self loop. // if (detectFalseLoop(non_terminal1, non_terminal2, rules_a)) { return false; } // if we reach this point, we should be able to merge the two rules. Find // all non_terminal tokens and rules that match the name of the non_terminal // in the second rule, and rename them to the non_terminal name of the first // rule // for (int32 i = 0; i < rules_a.length(); i++) { if (isValidBNF(rules_a(i))) { if (rules_a(i).getRuleName().eq(non_terminal2)) { rules_a(i).setRuleName(non_terminal1); } rules_a(i).gotoFirst(); if (rules_a(i).getType() == ProductionRuleTokenType::NON_TERMINAL && rules_a(i).getValue().eq(non_terminal2)) { rules_a(i).setValue(non_terminal1); } else if (rules_a(i).length() == MERGE_TYPE_LEN) { rules_a(i).gotoPosition(MERGE_TYPE_LEN - 1); if ( rules_a(i).getValue().eq(non_terminal2)) { rules_a(i).setValue(non_terminal1); } } } } // finally, remove the 2nd rule from the rule set and return true // rules_a.deleteRange(index2_a, 1); return true; } // method: isValidBNF // // arguments: ProductionRule rule // // return: bool8 indicating whether or not the rule is a valid BNF Rule // bool8 LanguageModelBNF::isValidBNF(ProductionRule rule_a) { rule_a.gotoFirst(); // check the types: // // (rule)->(epsilon) // (rule)->(non_terminal) // if (rule_a.length() == 1) { if (rule_a.getType() == ProductionRuleTokenType::EPSILON || rule_a.getType() == ProductionRuleTokenType::NON_TERMINAL) { return true; } } else { // check the type: // // (rule)->(terminal),(non_terminal) // if (!isMergeType(rule_a)) { return false; } } // exit gracefully // return true; } // method: isMergeType // // arguments: ProductionRule rule // // return: bool8 indicating whether or not this is the type of rule that // can be merged // bool8 LanguageModelBNF::isMergeType(ProductionRule rule_a) { rule_a.gotoFirst(); // check the type: // // (rule)->(terminal),(non_terminal) // if (rule_a.length() == 3) { if (rule_a.getType() != ProductionRuleTokenType::TERMINAL) { return false; } rule_a.gotoNext(); if (rule_a.getType() != ProductionRuleTokenType::CONCATENATION) { return false; } rule_a.gotoNext(); if (rule_a.getType() != ProductionRuleTokenType::NON_TERMINAL) { return false; } } else { return false; } return true; } // method: findTerminal // // arguments: ProductionRuleSet rules // String rule_name // // return: a Vector containing all the terminal tokens. // these correspond to unique terminal nodes // // finds all unique terminal symbols given a non_terminal symbol // Vector LanguageModelBNF::findTerminals(ProductionRuleSet rules_a, String rule_name_a) { Vector terminal_nodes; for (int32 i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(rule_name_a)){ rules_a(i).gotoFirst(); if (rules_a(i).getType() == ProductionRuleTokenType::TERMINAL) { SearchSymbol terminal = rules_a(i).getValue(); if (!terminal_nodes.contains(&terminal)) { terminal_nodes.concat(terminal); } } else if (rules_a(i).getType() == ProductionRuleTokenType::NON_TERMINAL) { terminal_nodes.concat(findTerminals(rules_a, rules_a(i).getValue())); } } } return terminal_nodes; } // method: detectFalseLoop // // arguments: String non_terminal1_a // String non_terminal2_a // // return: bool8 indicating whether or not the two non_terminals // (rule references) result in a false loop. A false loop // occurs when rule1 references rule2, but not vice versa. // bool8 LanguageModelBNF::detectFalseLoop(String non_terminal1_a, String non_terminal2_a, ProductionRuleSet rules_a) { ProductionRuleSet rules1, rules2, rules; bool8 reference1 = false, reference2 = false, selfloop = false; bool8 falseloop = false; rules1.assign(getMergeTypeRules(non_terminal1_a, rules_a)); rules2.assign(getMergeTypeRules(non_terminal2_a, rules_a)); rules.concat(rules1); rules.concat(rules2); for (int32 i = 0; i < rules.length(); i++) { rules(i).gotoPosition(MERGE_TYPE_LEN - 1); if (rules(i).getRuleName().eq(non_terminal1_a) && rules(i).getValue().eq(non_terminal2_a)) { reference1 = true; } if (rules(i).getRuleName().eq(non_terminal2_a) && rules(i).getValue().eq(non_terminal1_a)) { reference2 = true; } if ((rules(i).getRuleName().eq(non_terminal2_a) && rules(i).getValue().eq(non_terminal2_a)) || (rules(i).getRuleName().eq(non_terminal1_a) && rules(i).getValue().eq(non_terminal1_a))) { selfloop = true; } } if (selfloop) { Vector non_terms1, non_terms2; for (int32 j = 0; j < rules1.length(); j++) { rules1(j).gotoPosition(MERGE_TYPE_LEN - 1); if (!rules1(j).getValue().eq(non_terminal1_a) && !rules1(j).getValue().eq(non_terminal2_a)) { non_terms1.concat(rules1(j).getValue()); } } for (int32 j = 0; j < rules2.length(); j++) { rules2(j).gotoPosition(MERGE_TYPE_LEN - 1); if (!rules2(j).getValue().eq(non_terminal2_a) && !rules2(j).getValue().eq(non_terminal1_a)) { non_terms2.concat(rules2(j).getValue()); } } non_terms1.sort(); non_terms2.sort(); if (!non_terms1.eq(non_terms2) && (non_terms1.length() > 0 && non_terms2.length() > 0)) { falseloop = true; } } if (falseloop) { return true; } else if (selfloop || (reference1 && reference2)) { return false; } else if (reference1 || reference2) { return true; } return false; } // method: getMergeTypeRules // // arguments: // // return: // ProductionRuleSet LanguageModelBNF::getMergeTypeRules(String non_terminal_a, ProductionRuleSet rules_a) { ProductionRuleSet merge_type_rules; for (int32 i = 0; i < rules_a.length(); i++) { if (rules_a(i).getRuleName().eq(non_terminal_a)) { if (isMergeType(rules_a(i))) { merge_type_rules.concat(rules_a(i)); } else if (rules_a(i).length() == 1) { ProductionRuleSet temp_rules; temp_rules.concat(getMergeTypeRules(rules_a(i).getValue(), rules_a)); for (int32 j = 0; j < temp_rules.length(); j++) { temp_rules(j).setRuleName(non_terminal_a); } merge_type_rules.concat(temp_rules); } } } return merge_type_rules; }