// file: $isip/class/pr/LanguageModelXML/lmxml_05.cc // version: $Id: lmxml_05.cc 10356 2006-01-10 18:52:05Z wholland $ // ISIP include files // #include "LanguageModelXML.h" // method: clearXMLGrammar // // arguments: none // // return: a bool8 indicating status // // this method clears everything that has // to do with parsing a single XML grammar, // while leaving the language model intact // bool8 LanguageModelXML::clearXMLGrammar() { rule_nesting_level_d = 0; return (symbol_table_d.clear(Integral::RESET) && temp_rule_d.clear(Integral::RESET) && rules_d.clear(Integral::RESET) && grammar_start_tag_d.clear(Integral::RESET) && grammar_end_tag_d.clear(Integral::RESET)); } // method: displayVector // // arguments: // Vector& targets: (input) a vector of strings to be printed. // // return: a bool8 indicating status // // this method displays a vector as it appears when written // to a file. This allows the information to be viewed // quickly, which is useful for high-level debugging // purposes. // bool8 LanguageModelXML::displayVector(Vector& targets_a) { // declare a sof // Sof sof_0; Sof sof_1; // open a file in write plus mode // SysString file0; // register a temporary file // Integral::makeTemp(file0); File::registerTemp(file0); // open the file // if (!sof_0.open(file0, File::WRITE_PLUS, File::TEXT)) { return Error::handle(name(), L"open", Error::TEST, __FILE__, __LINE__); } // write the vector to the file // targets_a.write(sof_0,1); // close file // sof_0.close(); // declare a string to use to read the file line by line // String line; // declare a string to store the entire digraph // String output; if (!sof_1.open(file0, File::READ_PLUS, File::TEXT)) { return Error::handle(name(), L"open", Error::TEST, __FILE__, __LINE__); } // read the file into a string // sof_1.find((SysString)Vector::name(), 1); while(sof_1.gets(line, Sof::BUFFER_SIZE)) { output.concat(line); output.concat(SysChar::NEWLINE); } // close file // sof_1.close(); File::remove(file0); // output the string // Console::put(output); // indicate success // return true; } // method: debugSearchMethod // // arguments: // int32 (LanguageModelXML::*func_ptr)(int32): (input) a pointer to a method // which is a member of LanguageModelXML, accepts a int32 argument, and // and returns a int32. // // returns: a bool8 indicating status. // // this method prints the grammar in a format useful for // debugging problems in the code - it displays each tag of the grammar // itself alongside the return value a particular token search method // would yield, if it were called. This way, a single search method may be // tested with every possible combination of tags and verified at a glance. // it accepts a function // pointer so that it may accept any method which is given // an index and searches for another index, and outputs the // results in tabular form. // bool8 LanguageModelXML::debugSearchMethod (int32 (LanguageModelXML::*func_ptr)(int32), Vector token_vector_a) { // this method requires the XMLParser, so check for expat // #if defined(HAVE_EXPAT) SysString header(L"|input |output| xml form of input"); SysString column1; SysString column2; SysString column3; SysString output(header); // loop over all tokens in the token vertex vector // for(int32 i=0; i < token_vector_a.length(); i++) { column1.assign(i, L"%-6d"); column2.assign(((*this).*func_ptr)(i),L"%-6d"); int32 token_depth = token_vector_a(i).getDepth(); // start tags visual depth differs from logical depth // if(indexIsA(token_vector_a, i, XMLToken::START_TAG)) { // adjust the depth for proper display (visual depth is different // from logical depth) // token_depth--; } column3.assign(XMLParser::getIndent(token_depth)); column3.concat(token_vector_a(i).toXML()); output.concat(L"\n|"); output.concat(column1); output.concat(L"|"); output.concat(column2); output.concat(L"|"); output.concat(column3); } if(token_vector_a.length() < 1) { Error::handle(name(), L"debugSearchMethod", ERR_DEBG_DSM_NOGR, __FILE__, __LINE__); } else { Console::put(output); } // if expat is not present, this method will generate an error // #else Error::handle(name(), L"debugSearchMethod requires Expat", Error::NOT_IMPLEM, __FILE__, __LINE__); #endif // indicate success // return true; } // method: debugTargetMethod // // arguments: // bool8 (LanguageModelXML::*func_ptr)(int32, Vector): // (input) a pointer to a method // which is a member of LMXML, accepts a int32 and a Vector and // and returns a int32 // // returns: a bool8 indicating status. // // this method prints the grammar in a format useful for // debugging problems in the code - it displays each tag of the grammar // itself alongside the return value a particular arc-target search method // would yield, if it were called. This way, a single search method may be // tested with every possible combination of tags and verified at a glance. // it accepts a function // pointer so that it may accept any method which is given // an index and a Vector and returns another index, and outputs the // results in tabular form. // bool8 LanguageModelXML::debugTargetMethod (int32 (LanguageModelXML::*func_ptr)(int32, Vector&), Vector token_vector_a) { // this method requires the XMLParser, so check for expat // #if defined(HAVE_EXPAT) SysString header(L"|input |output|return| xml form of i/o"); SysString column1; SysString column2; SysString column3; SysString column4; SysString row0(L"\n-----------------------"); SysString output(header); Vector method_output; // loop over all tokens in the token vertex vector // for(int32 i=0; i < token_vector_a.length(); i++) { // assign the input index // column1.assign(i, L"%-6d"); // leave the second column blank // column2.assign(L" "); int32 token_depth = token_vector_a(i).getDepth(); if(indexIsA(token_vector_a, i, XMLToken::START_TAG)) { // adjust the depth for proper display (visual depth is different // from logical depth) // token_depth--; } column3.assign(L" "); column4.assign(XMLParser::getIndent(token_depth)); column4.concat(token_vector_a(i).toXML()); output.concat(row0); output.concat(L"\n|"); output.concat(column1); output.concat(L"|"); output.concat(column2); output.concat(L"|"); output.concat(column3); output.concat(L"|"); output.concat(column4); // clear method_output // method_output.clear(Integral::RESET); // call the method // int32 return_val = ((*this).*func_ptr)(i, method_output); // loop over the output // for(int32 j = 0; j < method_output.length(); j++) { // leave the first column blank // column1.assign(L" "); column2.assign(method_output(j),L"%-6d"); column3.assign(L" "); if(isInBounds(token_vector_a, (int32)method_output(j))) { token_depth = token_vector_a((int32)method_output(j)).getDepth(); } if(indexIsA(token_vector_a, method_output(j), XMLToken::START_TAG)) { // adjust the depth for proper display (visual depth is different // from logical depth) // token_depth--; } column4.assign(XMLParser::getIndent(token_depth)); if(isInBounds(token_vector_a, method_output(j))) { column4.concat(token_vector_a(method_output(j)).toXML()); } output.concat(L"\n|"); output.concat(column1); output.concat(L"|"); output.concat(column2); output.concat(L"|"); output.concat(column3); output.concat(L"|"); output.concat(column4); } column1.assign(L" "); column2.assign(L" "); column3.assign(return_val, L"%-6d"); if(isInBounds(token_vector_a, return_val)) { token_depth = token_vector_a(return_val).getDepth(); } if(indexIsA(token_vector_a, return_val, XMLToken::START_TAG)) { // adjust the depth for proper display (visual depth is different // from logical depth) // token_depth--; } column4.assign(XMLParser::getIndent(token_depth)); if(isInBounds(token_vector_a, return_val)) { column4.concat(token_vector_a(return_val).toXML()); } output.concat(L"\n|"); output.concat(column1); output.concat(L"|"); output.concat(column2); output.concat(L"|"); output.concat(column3); output.concat(L"|"); output.concat(column4); } output.concat(row0); if(token_vector_a.length() < 1) { Error::handle(name(), L"debugTargetMethod", ERR_DEBG_DTM_NOGR, __FILE__, __LINE__); } else { Console::put(output); } // if expat is not present, this method will generate an error // #else Error::handle(name(), L"debugTargetMethod requires Expat", Error::NOT_IMPLEM, __FILE__, __LINE__); #endif // indicate success // return true; } // method: indexIsA // // arguments: // int32 index: (input) the index of an item in the // token_vector_d // // String value: (input) the value to check for // // Vector token_vector: (input) the vector to which the // index belongs. // // this method checks whether the token at index_a has the // value value_a. The value of this function lies in the fact // that it checks whether the index is in bounds at the same time- // something which had to be done in many, many places. // the checking of the token's type // // returns: a bool8 indicating status // bool8 LanguageModelXML::indexIsA(Vector& token_vector_a, int32 index_a, String value_a) { if(!isInBounds(token_vector_a, index_a)) { return false; } if(token_vector_a(index_a).isA(value_a)) { return true; } return false; } // method: indexIsA // // arguments: // int32 index: (input) the index of an item in the // token_vector_a // // XMLToken::TYPE type: (input) the type to check for // // Vector token_vector: (input) the vector to which the // index belongs. // // this method checks whether the token at index_a is of type // type_a // // returns: a bool8 indicating status // bool8 LanguageModelXML::indexIsA(Vector& token_vector_a, int32 index_a, XMLToken::TYPE type_a) { if(!isInBounds(token_vector_a, index_a)) { return false; } if(token_vector_a(index_a).isA(type_a)) { return true; } return false; } // method: isInBounds // // arguments: // int32 index: (input) the index that we wish to ensure // actually exists // // Vector token_vector: (input) the vector to which the // index belongs. // // return: a bool8 indicating status // bool8 LanguageModelXML::isInBounds(Vector& token_vector_a, int32 index_a) { // the index may not be negative, nor equal nor exceed the length // of the vector // if((index_a < 0) || (index_a > (token_vector_a.length() -1))) { return false; } return true; } // method: getRuleID // // arguments: // Rule rule: (input) a vector of xml tokens storing a rule // // return: a String containing the rule name // String LanguageModelXML::getRuleID(Rule rule_a) { if(rule_a.length() > 0) { return rule_a(0).getAttributeValue(ID); } return String::EMPTY; } // method: getRuleIndexByID // // arguments: // String id: (input) the name of the rule that we wish to locate // within the rules_d vector // // return: the index of the sought rule within the vector rules_d // // this method searches the rules_d vector for a rule // whose id attribute matches the string id_a // int32 LanguageModelXML::getRuleIndexByID(String id_a) { // iterate over all rules // for(int32 i = 0; i < rules_d.length(); i++) { // if the id matches, the rule has been found // at index i // if(getRuleID(rules_d(i)).eq(id_a)) { return i; } }// end looping over all rules // if execution reaches here, no rule with the specified // id was found // return Integral::NO_POS; } // method: getDummySymbol // // arguments: none // // return: a String containing the requested dummy symbol // // this method retrieves a suitable dummy symbol for // an automatically generated dummy node (a symbol which // does not exist as a non-dummy in the current grammar). // // String LanguageModelXML::getDummySymbol() { String dummy_symbol; // if no dummy symbol was defined in the grammar start tag, // use the default symbol // if(dummy_symbol.eq(String::EMPTY)) { dummy_symbol = ISIP_DUMMY_NODE; } // return the dummy symbol // return dummy_symbol; } // method: getHandledValues // // arguments: none // // return: // Vector handled_values: a vector of strings // containing all valid token values to be // given to the parser // Vector LanguageModelXML::getHandledValues() { Vector handled_values; handled_values.concat(GRAMMAR); handled_values.concat(RULE); handled_values.concat(RULEREF); handled_values.concat(ITEM); handled_values.concat(ONE_OF); return handled_values; } // method: getGrammarName // // arguments: none // // return: a String containing the name of the grammar, which // is taken from the root rule name. // String LanguageModelXML::getGrammarName() { // create a string that will define the rule in which // the conversion will begin. all grammar's are required // to have a root node. // String root(ROOT); String grammar_name = grammar_start_tag_d.getAttributeValue(root); // if no root rule was specified, select the first rule // in the grammar to begin processing // if(grammar_name.eq(String::EMPTY) && rules_d.length() > 0) { grammar_name = getRuleID(rules_d(0)); } // get the name of the root rule for this grammar // return grammar_name; } // method: tokenizeRepeatValue // // arguments: // XMLToken xml_token: (input) an item tag which may // contain the repeat attribute.x // // return: a Triple containing the following // triple.first(): a int32 containing "M" // triple.second(): a int32 containing "N" // triple.third(): a Boolean indicating whether a loop is // requested (this is indicated by format "M-", which // indicates M *or more* repetitions. // // this method tokenizes the repeat value into // three parts. // the value may have the following forms: // "M", "M-", "M-N", where M and N are integers, // and the - is used as a delimeter which indicates // a range of possible values. // Triple LanguageModelXML::tokenizeRepeatValue (XMLToken xml_token_a) { // declare a triple to store the M, N, and loop request values // Triple repeat_tokens; // assign default values that will indicate a failure to the triple that // will be returned // declare a int32 to store M // Long m = Integral::NO_POS; // decleare a int32 to store n // Long n = Integral::NO_POS; // declare a Boolean to store the loop request value // Boolean loop_request; // declare a variable to store the position of a character // in a string while tokenizing the string. // int32 pos = 0; // declare a string to store individual pieces of the repeat // attribute's value. // String string_token; // store the delimiter by which will will tokenize // String delim(REPEAT_RANGE_DELIM); // get the repeat attribute value // String repeat_value = xml_token_a .getAttributeValue(REPEAT); // if the repeat attribute is not present, return // if(repeat_value.eq(String::EMPTY)) { return repeat_tokens; } // if no "-" is present, then the format // must be "M" // if(repeat_value.countDelimiters(delim) == 0) { int32 m_int32; if(!repeat_value.get(m_int32)) { Error::handle(name(), L"tokenizeRepeatValue", ERR_REPT_IAV, __FILE__, __LINE__); } m.assign(m_int32); } // first iteration will get "M". // else if(repeat_value.tokenize(string_token, pos, delim)) { // store m // int32 m_int32; if(!string_token.get(m_int32)) { Error::handle(name(), L"tokenizeRepeatValue", ERR_REPT_IAV, __FILE__, __LINE__); } m.assign(m_int32); } else { m.assign(Integral::NO_POS); } // Second iteration will get "N", if present. // if(repeat_value.tokenize(string_token, pos, delim)) { // if there is an N value, there is no infinite loop // loop_request.assign(false); // store n // int32 n_int32; if(!string_token.get(n_int32)) { Error::handle(name(), L"tokenizeRepeatValue", ERR_REPT_IAV, __FILE__, __LINE__); } n.assign(n_int32); } else { // if there is no N value, then a loop request is implicit // loop_request.assign(true); // assign a value of "invalid" to n // n.assign(Integral::NO_POS); } // save the lower bound, upper bound, and whether a loop is reqeusted // repeat_tokens.assign(m,n, loop_request); // return the information // return repeat_tokens; } // method: updateSymbolTable // // arguments: // XMLToken xml_token: (input) the token to be checked against // the symbol table, so that any new symbol it might contain can // be added to the symbol table. // // return: a bool8 indicating status // // this method takes a token and updates the symbol table. // each symbol may only appear once in the symbol table, and // only CDATA tokens generate symbols at all. // bool8 LanguageModelXML::updateSymbolTable(XMLToken xml_token_a) { // only add CDATA type tags to the symbol table // if(xml_token_a.getType() != XMLToken::CDATA) { return false; } // convert the token value to the symbol type (String) // String symbol = xml_token_a.getValue(); // do not allow duplicate symbols in tye symbol table // if(!symbol_table_d.contains(&symbol)) { symbol_table_d.concat(symbol); } // indicate success // return true; } bool8 LanguageModelXML::displayXMLModel() { int32 i, j; for (i=0; i