// file: $isip/class/search/SymbolGraph/sgrp_03.cc // version: $Id: sgrp_03.cc 9346 2003-11-23 16:35:10Z alphonso $ // // isip include files // #include "SymbolGraph.h" #include // method: read // // arguments: // Sof& sof: (input) sof file object // int32 tag: (input) sof object instance tag // const String& name: (input) sof object instance name // // return: a bool8 indicating status // // this method has the object read itself from an Sof file // bool8 SymbolGraph::read(Sof& sof_a, int32 tag_a, const String& name_a) { // get the instance of the object from the Sof file // if (!sof_a.find(name_a, tag_a)) { return false; } // read the actual data from the sof file // if (!readData(sof_a)) { return false; } // exit gracefully // return true; } // method: readData // // arguments: // Sof& sof: (input) sof file object // const String& pname: (input) parameter name // int32 size: (input) number of bytes in file // bool8 param: (input) is the parameter specified? // bool8 nested: (input) is this nested? // // return: logical error status // // this method has the object read itself from an Sof file. it assumes // that the Sof file is already positioned correctly. // bool8 SymbolGraph::readData(Sof& sof_a, const String& pname_a, int32 size_a, bool8 param_a, bool8 nested_a) { // when we are reading text data // if (sof_a.isText()) { return readDataText(sof_a, pname_a, size_a, param_a, nested_a); } // when we are reading binary data // else { return readDataBinary(sof_a); } } // method: readLatticeFormat // // arguments: // Sof& sof: (input) sof file object // const String& pname: (input) parameter name // int32 size: (input) size of the object // bool8 param: (input) is the parameter specified? // bool8 nested: (input) is this nested? // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file. it assumes // that the Sof file is already positioned correctly. // bool8 SymbolGraph::readLatticeFormat(Sof& sof_a, const String& pname_a, int32 size_a, bool8 param_a, bool8 nested_a) { // declare some lists to read the data from // int32 pos = 0; int32 pos1 = 0; Char chr; String buf; String buf1; String buffer; String symbol; Float ac_score; Float lm_score; Long num_arcs; Long num_nodes; Long node_index; Float time_index; Long arc_index; Long start_index; Long stop_index; Vector nodes; Vector > symbols; Vector, Float, Float> > arcs; // read the ngram information such as the number of unigrams, // bigrams etc // while (sof_a.gets(buffer, Sof::BUFFER_SIZE)) { // get the first char // buffer.trim(); chr = buffer(0); // make sure that the line is valid // if (!buffer.eq(DEF_PARAM) && !chr.eq(L'#')) { // tokenize on the '=' delimiter // int32 num_tokens = buffer.countTokens(L"="); // VERSION=1.0 // lmscale=12 // wdpenalty=0 // if (num_tokens == 2) { pos = 0; buffer.tokenize(buf, pos, L"="); buf.trim(); // set the language model scale factor // if (buf.eq(PARAM_SCALE)) { buffer.tokenize(buf, pos, L"="); buf.trim(); if (debug_level_d >= Integral::DETAILED) { buf.debug(L"scale"); } // set language model scale here // scale_d.assign(buf); } // set the acoustic model scale factor // else if (buf.eq(PARAM_PENALTY)) { buffer.tokenize(buf, pos, L"="); buf.trim(); if (debug_level_d >= Integral::DETAILED) { buf.debug(L"penalty"); } // set symbol penalty here // penalty_d.assign(buf); } // skip the version number // else if (buffer.eq(PARAM_VERSION)) { // do nothing in this case // continue; } // unknown format // else { return Error::handle(name(), L"readLatticeFormat", Error::ARG, __FILE__, __LINE__); } } // N=5 L=5 // I=0 t=0 // else if (num_tokens == 3) { pos = 0; buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); // N=5 // if (chr.eq(L'N')) { pos = 0; buf.tokenize(buf1, pos, L"="); buf1.trim(); // error checking // if (!buf1.eq(L"N")) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } buf.tokenize(buf1, pos, L"="); buf1.trim(); num_nodes.assign(buf1); nodes.setCapacity((int32)num_nodes); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"num_nodes"); } buffer.tokenize(buf, pos); buf.trim(); pos = 0; buf.tokenize(buf1, pos, L"="); buf1.trim(); // error checking // if (!buf1.eq(L"L")) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } buf.tokenize(buf1, pos, L"="); buf1.trim(); num_arcs.assign(buf1); arcs.setCapacity((int32)num_arcs); symbols.setCapacity((int32)num_arcs); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"num_arcs"); } } // I=0 // else if (chr.eq(L'I')) { pos = 0; buf.tokenize(buf1, pos, L"="); buf1.trim(); // error checking // if (!buf1.eq(L"I")) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } buf.tokenize(buf1, pos, L"="); buf1.trim(); // add new node here // node_index.assign(buf1); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"node_index"); } if (nodes.length() < (int32)node_index + 1) { nodes.setLength((int32)node_index + 1); } buffer.tokenize(buf, pos); buf.trim(); pos = 0; buf.tokenize(buf1, pos, L"="); buf1.trim(); // error checking // if (!buf1.eq(L"t")) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } buf.tokenize(buf1, pos, L"="); buf1.trim(); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"time_index"); } // set frame index here // time_index.assign(buf1); int32 frame_index = (int32)rint((float64)time_index / (float64)DEF_FRAME_DURATION); nodes((int32)node_index).assign(frame_index); } // unknown format // else { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } } // J=0 S=0 E=2 W=!SENT_START v=0 a=-269.622 l=-227.956 // else if (num_tokens == 8) { // J=0 E=2 W=!SENT_START v=0 a=-269.622 l=-227.956 // pos = 0; buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'J')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // add new arc here // arc_index.assign(buf1); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"arc_index"); } if (arcs.length() < (int32)arc_index + 1) { arcs.setLength((int32)arc_index + 1); } if (symbols.length() < (int32)arc_index + 1) { symbols.setLength((int32)arc_index + 1); } // S=0 E=2 W=!SENT_START v=0 a=-269.622 l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'S')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the start index here // start_index.assign(buf1); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"start_index"); } arcs((int32)arc_index).first().first().assign(start_index); // E=2 W=!SENT_START v=0 a=-269.622 l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'E')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the stop index here // stop_index.assign(buf1); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"stop_index"); } arcs((int32)arc_index).first().second().assign(stop_index); // W=!SENT_START v=0 a=-269.622 l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'W')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the symbol here // symbol.assign(buf1); if (debug_level_d >= Integral::DETAILED) { buf1.debug(L"symbol"); } symbols((int32)arc_index).first().assign(start_index); symbols((int32)arc_index).second().assign(stop_index); symbols((int32)arc_index).third().assign(symbol); // v=0 a=-269.622 l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'v')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // ignore the pronunciation // // a=-269.622 l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'a')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the acoustic model score here // ac_score.assign(buf1); arcs((int32)arc_index).second().assign(ac_score); // l=-227.956 // buffer.tokenize(buf, pos); buf.trim(); // get the first char // chr = buf(0); if (!chr.eq(L'l')) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } pos1 = 0; buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the language model score here // buf.tokenize(buf1, pos1, L"="); buf1.trim(); // set the language model score here // lm_score.assign(buf1); arcs((int32)arc_index).third().assign(lm_score); } // unknown format // else { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } } } // create a graph using the list of vertex elements and arcs // this->set(nodes, symbols, arcs); // exit gracefully // return true; } // method: readDataText // // arguments: // Sof& sof: (input) sof file object // const String& pname: (input) parameter name // int32 size: (input) size of the object // bool8 param: (input) is the parameter specified? // bool8 nested: (input) is this nested? // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file. it assumes // that the Sof file is already positioned correctly. // bool8 SymbolGraph::readDataText(Sof& sof_a, const String& pname_a, int32 size_a, bool8 param_a, bool8 nested_a) { // first cleanup the list // if (!clear(Integral::RESET)) { return Error::handle(name(), L"readDataBinary", Error::ARG, __FILE__, __LINE__); } // local variables // SofParser parser; parser.setDebug(debug_level_d); // are we nested? // if (nested_a) { parser.setNest(); } // load the parse // if (!parser.load(sof_a, size_a)) { return Error::handle(name(), L"readDataText", Error::READ, __FILE__, __LINE__, Error::WARNING); } // read the two flags // if (!format_d.readData(sof_a, PARAM_FORMAT, parser.getEntry(sof_a, PARAM_FORMAT))) { return Error::handle(name(), L"readDataText", Error::READ, __FILE__, __LINE__, Error::WARNING); } // branch on the format (LATTICE) // if (format_d.eq(DEF_FORMAT)) { if (!readLatticeFormat(sof_a, pname_a, size_a, param_a, nested_a)) { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } } // unsupported file format // else { return Error::handle(name(), L"readDataText", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: readDataBinary // // arguments: // Sof& sof: (input) sof file object // // return: a bool8 value indicating status // // this method has the object read itself from an Sof file. it assumes // that the Sof file is already positioned correctly. // bool8 SymbolGraph::readDataBinary(Sof& sof_a) { // declare some lists to read the data from // Vector nodes; Vector > symbols; Vector, Float, Float> > arcs; // first cleanup the list // if (!clear(Integral::RESET)) { return Error::handle(name(), L"readDataBinary", Error::ARG, __FILE__, __LINE__); } // read the scale // scale_d.readData(sof_a); // read the penalty // penalty_d.readData(sof_a); // read the list of nodes // nodes.readData(sof_a); // read the list of symbols // symbols.readData(sof_a); // read the list of arcs // arcs.readData(sof_a); // create a graph using the list of vertex elements and arcs // this->set(nodes, symbols, arcs); // exit gracefully // return true; }