// file: $isip/class/mmedia/XMLParser/xp_03.cc // version: $Id: xp_03.cc 10284 2005-10-27 20:52:14Z wholland $ #if defined(HAVE_EXPAT) // ISIP include files // #include "XMLParser.h" // method: OnPostCreate // // arguments: none // // return: none // // Invoked by Create // void XMLParser::OnPostCreate () { // Enable all the event routines we want // EnableStartElementHandler (); EnableEndElementHandler (); // Note: EnableElementHandler will do both start and end // EnableCharacterDataHandler (); } // method: OnStartElement // // arguments: // XML_Char* pszName: (input) a character string containing // the value of the XML tag // XML_Char** papszAttrs: (input) a character string array // containing the XML tag's attributes, in alternating // name,value,name,value, fashion. // // return: none // // This method is called every time an XML start tag is encountered while // parsing. // void XMLParser::OnStartElement (const XML_Char *psz_name, const XML_Char **papsz_attrs) { // store the start element in a String // SysString sysstart_element((byte8*)psz_name); String start_element; start_element.assign(sysstart_element); XMLToken temp_token; // store the element's type, value, and attributes // temp_token.init(XMLToken::START_TAG, start_element, (const byte8**)papsz_attrs); // add to vector // addToken(temp_token); return; } // method: OnEndElement // // arguments: // XML_Char* psz_name: (input) a character string containing // the value of the XML tag // // return: none // // This method is called every time an XML end tag is encountered while // parsing. // void XMLParser::OnEndElement (const XML_Char *psz_name) { // store the end element in a String // SysString sysend_element((byte8*)psz_name); String end_element; end_element.assign(sysend_element); // declare a token to store the parsed tag // XMLToken temp_token; // store the element's type and value // temp_token.init(XMLToken::END_TAG, end_element); // add the token to the vector // addToken(temp_token); return; } // method: OnCharacterData // // arguments: // XML_Char* parsedData: (input) a string containing the character data found. // this also frequently contains unvalid characters following // the character data. // int nlength: (input) an integer specifying the number of valid characters // in parsedData. // // return: none // // this method is called whenever parsing encounters a piece of // character data not within < > delimiters. It creates an XMLToken // to store the character data, and adds the token to the current rule // // void XMLParser::OnCharacterData (const XML_Char *parsed_data, int nLength) { // store the character data element in a String // SysString syschar_data((byte8*)parsed_data); String char_data; char_data.assign(syschar_data); // declare a token to store the parsed tag // XMLToken temp_token; // declare a string to hold the token's value // String string_token; // declare a buffer to store only the fraction of the // parsed_data that we want to use. the buffer parsed_data // contains not only this character data, but sometimes tags // following it as well. // byte8 pszData[Sof::BUFFER_SIZE]; // declare a variable to store the position of a character // in a string while tokenizing the string. // int32 pos = 0; // get only the part of this buffer that is character data // as specified by nlength, (the buffer // sometimes has an end tag in it) // char_data.getBuffer(pszData, nLength+1, SysChar::ENCODE_UTF8); char_data.assign(pszData); // do not generate tokens for plain white space // (sometimes CDATA handler is called for "\n" // chars between tags) // if(char_data.firstNotSpace() == Integral::NO_POS) { return; } // normalize CDATA white space according to SRGS 1.0 // // remove internal space characters, and replace with a single // space. internal end line characters are removed already due // to the line-by-line parsing. // // here, multiple sequential tokens in a single cdata parse // will be seperated into seperate tokens // (e.g. "a b c" => "a" "b" "c") // // tokenize parses the next token starting from pos, and stores // it in string_token // while(char_data.tokenize(string_token, pos)) { // store the element's type, value, and attributes // temp_token.init(XMLToken::CDATA, string_token); // add the token to the vector // addToken(temp_token); } // end looping over tokens within single cdata return; } #endif