// file: $isip/class/asr/JSGFParser/jp_06.cc // version: $Id: jp_06.cc 10499 2006-03-15 21:43:17Z may $ // // isip include files // #include "JSGFParser.h" // method: parseGrammar // // arguments: none // // return: a bool8 value indicating status // // This method checks through the class-protected token vector to // validate the input JSGF grammar while saving some usefule data // bool8 JSGFParser::parseGrammar() { // declare a current token // JSGFToken curr_tok; // get the number of tokens in the token vector // int32 num_tok = token_vect_d.length(); // make sure the class-protected token index is zero // token_index_d = 0; // set the first token as the current token // if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar header", Error::TEST, __FILE__, __LINE__); } // the first token in the grammar must be a header token // if (curr_tok.getTokenType() == JSGFToken::HEADER) { // move to the next token // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar name declaration", Error::TEST, __FILE__, __LINE__); } } else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"missing self-identifying header at the beginning of the grammar", Error::TEST, __FILE__, __LINE__); } // The current token must be a grammar declaration // if (curr_tok.getTokenType() == JSGFToken::GRAMMAR_NAME) { // store the grammar name as a class-protected data // grammar_name_d.assign(curr_tok.grammar_name_d); // move to the next // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar body", Error::TEST, __FILE__, __LINE__); } } else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"missing grammar declaration after the header", Error::TEST, __FILE__, __LINE__); } // the current token must be a semi-colon // if (curr_tok.operator_d(0).eq(L";")) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar body", Error::TEST, __FILE__, __LINE__); } } else { return Error::handle(name(), L"missing semi-colon in grammar declaration", Error::TEST, __FILE__, __LINE__); } // The current token can be an import grammar declaration // while (curr_tok.getTokenType() == JSGFToken::IMPORT_GRAMMAR_NAME) { // get import rules // //getImport(curr_tok); // move to the next token // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar body", Error::TEST, __FILE__, __LINE__); } // the current token must be a semi-colon // if (curr_tok.operator_d(0).eq(L";")) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing grammar body", Error::TEST, __FILE__, __LINE__); } } else { return Error::handle(name(), L"missing semi-colon in import grammar declaration", Error::TEST, __FILE__, __LINE__); } } // end: while (curr_tok.getTokenType() == 3) // Then starting from the current token, the rule definitions come // so make sure each rule definition is valid // while (token_index_d < num_tok) { parseRuleDefinition(); token_index_d++; } // gracefully exit // return true; } // method: parseRuleDefinition // // arguments: none // // return: a bool8 value indicating status // // This method checks if a rule definition is valid // bool8 JSGFParser::parseRuleDefinition() { // set grouping and optional grouping match flag as zero // grouping_match_d = 0; op_grouping_match_d = 0; // get the total number of tokens // int32 num_tok = token_vect_d.length(); // initalize current token // JSGFToken curr_tok = token_vect_d(token_index_d); // The first token can be a public keyword // if (curr_tok.getTokenType() == JSGFToken::KEYWORD) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing rule definition", Error::TEST, __FILE__, __LINE__); } } // The current token can be a rule name // if (curr_tok.getTokenType() == JSGFToken::RULE_NAME) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing rule definition", Error::TEST, __FILE__, __LINE__); } } // error if the current token is neither of the above // else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"invalid rule definition", Error::TEST, __FILE__, __LINE__); } // The current token must be operator= // if (curr_tok.operator_d(0).eq(L"=")) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing rule exapnsion for rule definition", Error::TEST, __FILE__, __LINE__); } } else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"missing operator= in the rule definition", Error::TEST, __FILE__, __LINE__); } // starting from the current token, check if a valid rule expansion exists // if there isn't an expansion, this is a dummy symbol // if (curr_tok.operator_d(0).ne(L";")) { parseRuleExpansion(); } // check grouping flags // if (grouping_match_d != 0) { return Error::handle(name(), L"missing left brace ( in grouping", Error::TEST, __FILE__, __LINE__); } if (op_grouping_match_d != 0) { return Error::handle(name(), L"missing left bracket [ in optional grouping", Error::TEST, __FILE__, __LINE__); } // gracefully exit // return true; } // method: parseRuleExpansion // // arguments: none // // return: a bool8 value indicating status // // This method checks if a rule expansion is valid // bool8 JSGFParser::parseRuleExpansion() { // get the total number of tokens // int32 num_tok = token_vect_d.length(); // initalize current token // JSGFToken curr_tok = token_vect_d(token_index_d); while (curr_tok.operator_d(0).ne(L";") && curr_tok.operator_d(0).ne(L")") && curr_tok.operator_d(0).ne(L"]")) { // bool8 variable to check if alternative is weighted or not // bool8 weighted = false; // the current token can be weight // if (curr_tok.getTokenType() == JSGFToken::WEIGHT) { // update the weighted flag // weighted = true; // move to the next // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } // the current token must lead to a unit // parseUnit(); curr_tok = token_vect_d(token_index_d); // current token can be a vertical bar | indicating alternative // if (curr_tok.operator_d(0).eq(L"|")) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } // semi-colon can not follow vertical bar // if (curr_tok.operator_d(0).eq(L";") || curr_tok.operator_d(0).eq(L")") || curr_tok.operator_d(0).eq(L"]")) { return Error::handle(name(), L"invalid rule component after the vertical bar |", Error::TEST, __FILE__, __LINE__); } // if a weight is specified for one item in a set of alternatives // then a weight must be specified for every item // if a weight token is set for the unit before the vertical bar | // the current token is required to be weight // if ((weighted) && (curr_tok.getTokenType() != JSGFToken::WEIGHT)) { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"no matching weight token after the vertical bar |", Error::TEST, __FILE__, __LINE__); } // error if a weight token is not set for the unit before // the vertical bar | while the current token is weight token // if ((!weighted) && (curr_tok.getTokenType() == JSGFToken::WEIGHT)) { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"no matching weight token before the vertical bar |", Error::TEST, __FILE__, __LINE__); } } } // end: while (curr_tok.operator_d(0).ne(L";") && ...) // update grouping match flags // if (curr_tok.operator_d(0).eq(L")")) { grouping_match_d--; } else if (curr_tok.operator_d(0).eq(L"]")) { op_grouping_match_d--; } // gracefully exit // return true; } // method: parseUnit // // arguments: none // // return: a bool8 value indicating status // // This method checks if a rule expansion is valid // bool8 JSGFParser::parseUnit() { // get the total number of tokens // int32 num_tok = token_vect_d.length(); // initalize current token // JSGFToken curr_tok = token_vect_d(token_index_d); // the current can be terminal symbol, quoted token or rule reference // if ((curr_tok.getTokenType() == JSGFToken::RULE_NAME) || (curr_tok.getTokenType() == JSGFToken::TERMINAL) || (curr_tok.getTokenType() == JSGFToken::QUOTED_TOKEN)) { // move to the next // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } // the current token can lead to a grouping with () parentheses // else if (curr_tok.operator_d(0).eq(L"(")){ grouping_match_d++; parseGrouping(); curr_tok = token_vect_d(token_index_d); } // the current token can lead to an optional grouping with [] brackets // else if (curr_tok.operator_d(0).eq(L"[")){ op_grouping_match_d++; parseOptionalGrouping(); curr_tok = token_vect_d(token_index_d); } // otherwise invalid // else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"invalid use of rule component at this position", Error::TEST, __FILE__, __LINE__); } // the current token can be a unary operator * or + // if (curr_tok.operator_d(0).eq(L"*") || curr_tok.operator_d(0).eq(L"+")) { token_index_d++; if (!(token_index_d < num_tok)) { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } // or the current token can also be a tag // else if (curr_tok.getTokenType() == JSGFToken::TAG) { // multiple tags followed are allowed // while (curr_tok.getTokenType() == JSGFToken::TAG) { token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } } // gracefully exit // return true; } // method: parseGrouping // // arguments: none // // return: a bool8 value indicating status // // This method checks if a rule expansion is valid // bool8 JSGFParser::parseGrouping() { // get the total number of tokens // int32 num_tok = token_vect_d.length(); // declare current token // JSGFToken curr_tok; // move to the next // token_index_d++; curr_tok = token_vect_d(token_index_d); // make sure the current token is not right brace ) // empty grouping () is illegal // if (curr_tok.operator_d(0).eq(L")")) { return Error::handle(name(), L"empty grouping () is illegal", Error::TEST, __FILE__, __LINE__); } // followed can be any valid rule expansion // if (token_index_d < num_tok) { parseRuleExpansion(); } else { return Error::handle(name(), L"invalid using ( left brace", Error::TEST, __FILE__, __LINE__); } // get the current token // curr_tok = token_vect_d(token_index_d); // the current token must be ) right parentheses // if (curr_tok.operator_d(0).eq(L")")) { // move to the next // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"missing ) right brace in grouping", Error::TEST, __FILE__, __LINE__); } // gracefully exit // return true; } // method: parseOptionalGrouping // // arguments: none // // return: a bool8 value indicating status // // This method checks if a rule expansion is valid // bool8 JSGFParser::parseOptionalGrouping() { // get the total number of tokens // int32 num_tok = token_vect_d.length(); // declare current token // JSGFToken curr_tok; // move to the next // token_index_d++; curr_tok = token_vect_d(token_index_d); // make sure the current token is not right brace ) // empty optional grouping [] is illegal // if (curr_tok.operator_d(0).eq(L"]")) { return Error::handle(name(), L"empty optional grouping [] is illegal", Error::TEST, __FILE__, __LINE__); } // followed can be any valid rule expansion // if (token_index_d < num_tok) { parseRuleExpansion(); } else { return Error::handle(name(), L"invalid using [ left brace", Error::TEST, __FILE__, __LINE__); } // get the current token // curr_tok = token_vect_d(token_index_d); // the current token must be ] right bracket // if (curr_tok.operator_d(0).eq(L"]")) { // move to the next // token_index_d++; if (token_index_d < num_tok) { curr_tok = token_vect_d(token_index_d); } else { return Error::handle(name(), L"missing semi-colon for rule definition", Error::TEST, __FILE__, __LINE__); } } else { Console::put(L"***unexpected token:"); curr_tok.printToken(); return Error::handle(name(), L"missing ] right bracket in optional grouping", Error::TEST, __FILE__, __LINE__); } // gracefully exit // return true; }