// file: $isip/class/search/LexicalTree/LexicalTree.h // version: $Id: LexicalTree.h 10636 2007-01-26 22:18:09Z tm334 $ // // make sure definitions are only made once // #ifndef ISIP_LEXICAL_TREE #define ISIP_LEXICAL_TREE #ifndef ISIP_DI_GRAPH #include #endif #ifndef ISIP_SEARCH_NODE #include #endif // LexicalTree: A class implement a lexical tree using Digraph data structure // class LexicalTree : public DiGraph { //--------------------------------------------------------------------------- // // public constants // //--------------------------------------------------------------------------- public: // define the class name // static const String CLASS_NAME; //---------------------------------------- // // i/o related constants // //---------------------------------------- static const String DEF_PARAM; //---------------------------------------- // // other important constants // //---------------------------------------- // define algorithm choices // enum ALGORITHM { UNFACTORED = 0, DI_GRAPH, NGRAM, NGRAM_CACHED, DEF_ALGORITHM = UNFACTORED}; // define implementation choices // enum IMPLEMENTATION { ALWAYS_MAX=0, UPPER_BOUND, SUM, DEF_IMPLEMENTATION = ALWAYS_MAX }; //---------------------------------------- // // default values and arguments // //---------------------------------------- //--------------------------------------- // // error codes // //--------------------------------------- static const int32 ERR = (int32)90600; //--------------------------------------------------------------------------- // // protected data // //--------------------------------------------------------------------------- protected: // type definition // typedef SearchNode LexicalNode; // type definition // typedef SearchSymbol LexSymbol; // type definition // typedef GraphVertex GVLexicalNode; // type definition // typedef DiGraph Pronunciation; // type definition // typedef GraphArc GALexicalNode; // define a static debug level // static Integral::DEBUG debug_level_d; // define a static memory manager // static MemoryManager mgr_d; //--------------------------------------------------------------------------- // // private data // //--------------------------------------------------------------------------- private: // If we put the codes on spreading ngrams in the search process, // these definitions can be removed in the future. // // algorithm name // ALGORITHM algorithm_d; // implementation type // IMPLEMENTATION implementation_d; // the start node of the LexicalTree // GVLexicalNode* root_vert_d; // the weight to term // float32 term_weight_d; //--------------------------------------------------------------------------- // // required public methods // //--------------------------------------------------------------------------- public: // method: name // static const String& name() { return CLASS_NAME; } // method: diagnose // static bool8 diagnose(Integral::DEBUG debug_level); // method: debug // bool8 debug(const unichar* message_a) ; // method: setDebug // static bool8 setDebug(Integral::DEBUG debug_level) { debug_level_d = debug_level; return true; } // constructor(s) // LexicalTree(); LexicalTree(const LexicalTree& copy_tree); // method: destructor // ~LexicalTree() {} // assign methods // bool8 assign(const LexicalTree& copy_tree_a); // method: sofSize // int32 sofSize() const { return Error::handle(name(), L"sofSize", Error::NOT_IMPLEM, __FILE__, __LINE__); } // method: read // bool8 read(Sof& sof, int32 tag, const String& cname = CLASS_NAME) { return Error::handle(name(), L"read", Error::NOT_IMPLEM, __FILE__, __LINE__); } // method: write // bool8 write(Sof& sof, int32 tag, const String& cname = CLASS_NAME) const { return Error::handle(name(), L"write", Error::NOT_IMPLEM, __FILE__, __LINE__); } // method: readData // bool8 readData(Sof& sof, const String& pname = DEF_PARAM, int32 size = SofParser::FULL_OBJECT, bool8 param = true, bool8 nested = false) { return Error::handle(name(), L"readData", Error::NOT_IMPLEM, __FILE__, __LINE__); } // method: writeData // bool8 writeData(Sof& sof, const String& pname = DEF_PARAM) const { return Error::handle(name(), L"writeData", Error::NOT_IMPLEM, __FILE__, __LINE__); } // equality method // bool8 eq(const LexicalTree& compare_tree_a) const; // method: new // static void* operator new(size_t size) { return mgr_d.get(); } // method: new[] // static void* operator new[](size_t size) { return mgr_d.getBlock(size); } // method: delete // static void operator delete(void* ptr) { mgr_d.release(ptr); } // method: delete[] // static void operator delete[](void* ptr) { mgr_d.releaseBlock(ptr); } // method: setGrowSize // static bool8 setGrowSize(int32 grow_size) { return mgr_d.setGrow(grow_size); } // clear methods // bool8 clear(Integral::CMODE ctype = Integral::DEF_CMODE); //--------------------------------------------------------------------------- // // class-specific public methods // //--------------------------------------------------------------------------- // set the algorithm // bool8 setAlgorithm(LexicalTree::ALGORITHM algorithm){ return (algorithm_d = algorithm); } // get the algorithm // ALGORITHM getAlgorithm() { return algorithm_d; } // set the implementation // bool8 setImplementation(IMPLEMENTATION implementation) { return (implementation_d = implementation); } // get the implementation // IMPLEMENTATION getImplementation() { return implementation_d; } // expand subgraphs into lexical trees // static bool8 expandLexicalTree(DiGraph& word_graph_a, const Vector >& pron_vec_a, int32 level_a); static bool8 expandLexicalTree(GVLexicalNode*& root_node_a, const Vector >& pron_vec_a, int32 level_a); //--------------------------------------------------------------------------- // // private methods // //--------------------------------------------------------------------------- private: // some of these methods are reserved for future changes. If we move // the ngram spreading in the search process, many of thsee // methods will be useless // // debug functions // bool8 debugTree(const unichar* message_a); bool8 debugVertex (GVLexicalNode*& lex_vert_a, bool8 recursive_a = false, GALexicalNode* arc_a = NULL); // factor the lexical tree from a vertex // float32 factorLexicalTree( GVLexicalNode*, const HashTable* ngram_a = NULL, int32 history_a = -1 ); // factor the LexicalTree (whole tree ) // float32 factorLexicalTree( const HashTable* ngram_a = NULL, int32 history_a = -1 ){ return factorLexicalTree(this->getStart(), ngram_a, history_a); } // build the lexical tree // bool8 buildLexicalTree(DiGraph& word_graph_a, const Vector >& pron_vec_a, SingleLinkedList& tree_vec_a); LexicalTree* buildLexicalTree(GVLexicalNode*& root_node_a, const Vector >& pron_vec_a); // insert a new pronication into the lexical tree // bool8 insertPron(GVLexicalNode*& word_vert_a, const DiGraph& pron_a, const float32 & weight_a); //insert a subgraph of a pronication into the lexical tree // bool8 insertSubgraph(GVLexicalNode*& word_vert_a, GVLexicalNode*& curr_lex_vert_a, GVLexicalNode*& curr_pron_vert_a, const float32 & pron_prob_a, bool8 insert_mode_a = true); //expand a subgraph of a pronication into the lexical tree structure // static bool8 expandSubgraph(GVLexicalNode*& word_vert_a, GVLexicalNode*& curr_lex_vert_a, GVLexicalNode*& curr_pron_vert_a, const float32 & pron_prob_a, bool8 insert_mode_a = true); // this method find the successor with given symbol ID. // If it is not exsited, return a NULL. // static GVLexicalNode* findSuccVert(GVLexicalNode*& lex_vert_a, int32 symbol_id_a ); // overload the parent's insertArc function // bool8 insertArc(GVLexicalNode* start_vertex_a, GVLexicalNode* end_vertex_a, bool8 is_epsilon = GALexicalNode::DEF_EPSILON, float32 weight = GALexicalNode::DEF_WEIGHT); }; // end of include file // #endif