quick start:g++ [flags ...] file ... -l /isip/tools/lib/$ISIP_BINARY/lib_mmedia.a #include <LanguageModelXML.h> boolean read(Sof& sof, long tag, const String& cname = CLASS_NAME); RuleModel getRuleModel();
description:Sof sof; sof.open("lm_xml_obj.sof", File::READ_ONLY); LanguageModelXML lmxml; lmxml.read(sof, 0); LanguageModelIHD lmihd; lmihd.setRuleModel(lmxml.getRuleModel()); HierarchicalDigraph hg; hg.assign(lmihd.getHDigraph());
static const String LanguageModelXML::CLASS_NAME = L"LanguageModelXML";
static const String LanguageModelXML::DEF_PARAM = L"";
static const long LanguageModelXML::DEF_LEVEL = -1;
static const String LanguageModelXML::PARAM_GRAMMARS = L"grammars";
static const String LanguageModelXML::PARAM_UNDERSCORE = L"_";
static const long LanguageModelXML::DEF_TAG = 0;
static const String LanguageModelXML::CONTEXT_LABEL_PREFIX = L"G_";
static const float LanguageModelXML::DEF_WEIGHT = 1.0;
static const String LanguageModelXML::SPECIAL_GARBAGE = "GARBAGE";
static const String LanguageModelXML::SPECIAL_VOID = L"VOID";
static const String LanguageModelXML::SPECIAL_NULL = L"NULL";
static const String LanguageModelXML::ONE_OF = L"one-of";
static const String LanguageModelXML::ITEM = L"item";
static const String LanguageModelXML::RULEREF = L"ruleref";
static const String LanguageModelXML::RULE = L"rule";
static const String LanguageModelXML::GRAMMAR = L"grammar";
static const String LanguageModelXML::WEIGHT = L"weight";
static const String LanguageModelXML::REPEAT = L"repeat";
static const String LanguageModelXML::REPEAT_PROB = L"repeat-prob";
static const String LanguageModelXML::ROOT = L"root";
static const String LanguageModelXML::URI = L"uri";
static const String LanguageModelXML::SPECIAL = L"special";
static const String LanguageModelXML::ID = L"id";
static const String LanguageModelXML::RULEREF_URI_LOCAL_DELIM = L"#";
static const String LanguageModelXML::REPEAT_RANGE_DELIM = L"-";
static const String LanguageModelXML::REPEAT_LOOP_BACK = L"1-";
static const String LanguageModelXML::ISIP_DUMMY_NODE = L"isip_rule_null";
static const String LanguageModelXML::XML_VERSION_TAG = L"";
static const String LanguageModelXML::ROOT_ATTRIB = L"root";
static const String LanguageModelXML::ID_ATTRIB = L"id";
static const String LanguageModelXML::ALGORITHM = L"algorithm";
static const String LanguageModelXML::XML = L"XML";
static const String LanguageModelXML::IMPLEMENTATION = L"IHD";
static const String LanguageModelXML::IHD = L"IHD";
static const long LanguageModelXML::LOOP_BACK = 0;
static const long LanguageModelXML::TERMINAL_INDEX = -2;
static const long LanguageModelXML::START_INDEX = -1;
static const long ERR = 100800;
static const long ERR_CTXT_NO_SYM_TAB = 100801;
static const long ERR_CTXT_INV_LENGTHS = 100802;
static const long ERR_CTXT_INV_GRAPH_ALIGN = 100803;
static const long ERR_CTXT_INV_SYM = 100804;
static const long ERR_READ_SYM_TYPE = 100805;
static const long ERR_CTXT_SCV = 100806;
static const long ERR_CTXT_INV_SYM_TAB = 100807;
static const long ERR_CTXT_INV_TOKEN_INDEX = 100808;
static const long ERR_CTXT_ESHG = 100809;
static const long ERR_CTXT_EXISTS_DIF_INDEX = 100810;
static const long ERR_CTXT_EXISTS_SAME_INDEX = 100811;
static const long ERR_DEBG_TEMP_FILE = 100812;
static const long ERR_DEBG_DSM_NOGR = 100813;
static const long ERR_DEBG_DTM_NOGR = 100814;
static const long ERR_REPT_IAV = 100815;
static const long ERR_GRAMMAR_NO_START = 100816;
static const long ERR_GRAMMAR_NO_END = 100817;
static const long ERR_GRAMMAR_EMPTY = 100818;
static const long ERR_GRAMMAR_FORMAT = 100819;
static const long ERR_RULEREF_NOT_FOUND = 100820;
static const long ERR_RULEREF_NON_LOCAL = 100821;
static const long ERR_ONE_OF_EMPTY= 100822;
static const long ERR_RULE_EMPTY = 100823;
static const long ERR_TOKEN_UNHANDLED = 100824;
static const long ERR_RULE_SPECIAL_INV = 100825;
static const long ERR_VERTEX_STORE = 100826;
long rule_nesting_level_d;
Vector< Vector < Vector< XMLToken > > > grammars_d;
HierarchicalDigraph hg_d;
Vector< String > symbol_table_d;
Rule temp_rule_d;
Vector<Rule> rules_d;
XMLToken grammar_start_tag_d;
XMLToken grammar_end_tag_d;
static const String& name();
static boolean diagnose(Integral::DEBUG debug_level);
boolean debug(const unichar* msg) const;
LanguageModelXML();
boolean read(Sof& sof, long tag, const String& cname = CLASS_NAME);
boolean readData(Sof& sof, const String& pname = DEF_PARAM, long size = SofParser::FULL_OBJECT, boolean param = true, boolean nested = false);
boolean write(Sof& sof, long tag, const String& cname = CLASS_NAME) const;
boolean writeData(Sof& sof, const String& pname = DEF_PARAM) const;
long sofSize() const;
boolean assign(const LanguageModelXML& arg);
boolean assign(const LanguageModelBase& arg);
LanguageModelXML& operator=(const LanguageModelXML& arg);
boolean eq(const LanguageModelXML& arg) const;
boolean eq(const LanguageModelBase& arg) const;
LanguageModelXML& operator==(const LanguageModelXML& arg);
boolean clear(Integral::CMODE ctype);
RuleModel getRuleModel();
boolean setRuleModel(const RuleModel& rm_a);
Pair< Vector< Vector< Rule > >, HierarchicalDigraph> getXMLModel();
boolean setXMLModel(Pair< Vector< Vector< Rule > >, HierarchicalDigraph> xml_model_a);
boolean readXMLGrammars( Sof& sof, const String& gname, long level, boolean use_context, Vector< SearchSymbol >* symbol_table, SofParser& parser )
boolean parseXMLGrammar(String grammar_a, Vector< String >& sub_symbol_list_a, Vector< XMLToken >& token_vector_a, String& grammar_name_a);
boolean alignGraphs(Vector< Vector< XMLToken > >& grammar_list_a, Vector< SearchSymbol >& symbol_table_a, Vector< String >& graph_name_list_a);
boolean readSymbolType( Sof& sof, long level, const String& pname, Vector< SearchSymbol >& symbol_list, SofParser& parser );
boolean readContextMapping(Sof& sof, const String& pname, long level, SofParser&parser);
boolean addDummyItem(Vector< XMLToken >& token_vector_a, long& depth_a) const;
boolean addEndBranchTag(Vector< XMLToken >& token_vector_a, long& depth_a) const;
boolean addEndItemTag(Vector< XMLToken >& token_vector_a, long& depth_a) const;
boolean addStartBranchTag(Vector< XMLToken >& token_vector_a, long& depth_a) const;
boolean addStartItemTag(Vector< XMLToken >& token_vector_a, long& depth_a, float weight_a=1, boolean repeat_a=0, float repeat_prob_a=1) const;
boolean writeGrammars(Sof& sof_a, long level_a, HierarchicalDigraph& h_digraph_a) const;
boolean writeLevelTag(Sof& sof_a, long level_a, const String& tag_a) const;
boolean addCDATA(Vector< XMLToken >& token_vector_a, const String& cdata_a, long& depth_a) const;
boolean addStartRuleTag(Vector< XMLToken >& token_vector_a, String rule_name_a, long& depth_a) const;
boolean addEndRuleTag(Vector< XMLToken >& token_vector_a, long& depth_a) const;
boolean writeSymbols(Sof& sof_a, long level_a, const String& pname_a, Vector< SearchSymbol > symbol_list_a) const;
long LanguageModelXML::constFindFollowingTagAtDepth(Vector< XMLToken >& token_vector_a, long start_index_a, long base_depth_a, XMLToken::TYPE type_a, String value_a) const;
String tokensToString(Vector< XMLToken >& token_vector_a) const;
Vector< XMLToken > removeRedundantItemTags(Vector& token_vector_a) const;
Vector< XMLToken > convertBNFtoXML(ProductionRuleSet prset_a);
ProductionRuleSet extractRule(const String& rule_name_a, ProductionRuleSet& prset_a) const;
boolean addRule(ProductionRuleSet& prset_a, Vector< XMLToken >& token_vector_a) const;
boolean addAlternative(ProductionRule pr_a, Vector< XMLToken >& token_vector_a, long depth_a) const;
ProductionRule convertXMLtoABNF(Vector< XMLToken > token_vector_a);
boolean appendProduction(ProductionRule& prod1_a, ProductionRule prod2_a);
Vector< XMLToken > getRange(long start_a, long end_a, Vector< XMLToken > vector_a);
long findNextItem(long start_a, Vector< XMLToken > vector_a);
boolean preProcessXMLGrammar(Vector& token_vector);
boolean validateXMLGrammar();
boolean partitionGrammar(Vector& token_vector);
boolean expandXMLRules();
boolean expandXMLRule(String rule_id);
boolean handleStartAndEndElement (XMLToken xml_token);
boolean handleStartElement (XMLToken xml_token);
boolean handleEndElement (XMLToken xml_token);
boolean handleCharacterData (XMLToken xml_token);
boolean handleSpecialRules(Vector& token_vector);
boolean handleRepeatRequests(Vector& token_vector);
boolean addOptionalRepeatSections(Vector& token_vector, Long m);
boolean addManditoryRepeatSections(Vector& token_vector, Long m);
long findFirstItemTagOfCDATA(long index);
long findFirstItemTagOfCDATA(Vector& token_vector, long index);
long findFollowingStructure(long index);
long findFollowingStructure(Vector& token_vector, long index);
long findForwardTargetStructure(long index);
long findForwardTargetStructure(Vector& token_vector, long index);
long findImmediateNesting(long index);
long findImmediateNesting(Vectortoken_vector, long index);
long findInternalStructure(long index);
long findInternalStructure(Vector& token_vector, long index);
long findMatchingStartTag(long index);
long findMatchingStartTag(Vector& token_vector, long index);
long findMatchingEndTag(long index);
long findMatchingEndTag(Vector& token_vector, long index);
long findPrecedingStructure(long index);
long findPrecedingStructure(Vector& token_vector, long index);
long findPrecedingTagAtDepth( long start_index, long base_depth, XMLToken::TYPE type = XMLToken::NULL_TAG, String value = String::EMPTY );
long findPrecedingTagAtDepth( Vector& token_vector, long start_index, long base_depth, XMLToken::TYPE type = XMLToken::NULL_TAG, String value = String::EMPTY );
long findFollowingTagAtDepth( long start_index, long base_depth, XMLToken::TYPE type = XMLToken::NULL_TAG, String value = String::EMPTY );
long findFollowingTagAtDepth( Vector& token_vector, long start_index, long base_depth, XMLToken::TYPE type = XMLToken::NULL_TAG, String value = String::EMPTY );
boolean clearXMLGrammar();
boolean displayVector(Vector& targets);
boolean displayXMLModel();
boolean displayDiGraph();
boolean debugSearchMethod(long (LanguageModelXML::*func_ptr)( long ));
boolean LanguageModelXML::debugTargetMethod(long (LanguageModelXML::*func_ptr)(long, Vector& ));
boolean indexIsA(long index, String value);
boolean indexIsA(Vector& token_vector, long index, String value );
boolean indexIsA(long index, XMLToken::TYPE type );
boolean indexIsA(Vector& token_vector, long index, XMLToken::TYPE type );
boolean isInBounds(long index);
boolean isInBounds(Vector& token_vector, long index);
DiGraphgetDiGraph();
String getDummySymbol();
String getGrammarName();
VectorgetHandledValues();
float getRepeatProb(long index);
String getRuleID(Rule rule);
long getRuleIndexByID(String id);
VectorgetSymbolList()
float getWeight(long index = Integral::NO_POS);
boolean updateSymbolTable(XMLToken xml_token);
boolean storeVertex(XMLToken xml_token, long index);
TripletokenizeRepeatValue(long index);
TripletokenizeRepeatValue(XMLToken xml_token);
const String& className() const
// declare an sof object to parse the file // Sof sof; // open the file for reading // sof.open("lm_xml_obj.sof", File::READ_ONLY); // declare a LMXML object to read the grammars and perform // the XML->BNF conversion // LanguageModelXML lmxml; // read the grammars // lmxml.read(sof, 0); // declare an HierarchicalDigraph object to store the grammars in DiGraph form // HierarchicalDigraph hg; // declare a LanguageModelIHD object to perform the BNF->IHD conversion // LanguageModelIHD lmihd; // perform conversion // lmihd.setRuleModel(lmxml.getRuleModel()); // retrieve the grammars from the LMIHD object, and store them in the HD object // hg.assign(lmihd.getHDigraph());