quick start:g++ [flags ...] file ... -l /isip/tools/lib/$ISIP_BINARY/lib_pr.a #include <PhoneticDecisionTree.h> PhoneticDecisionTree(ALGORITHM algorithm = DEF_ALGORITHM, IMPLEMENTATION implementation = DEF_IMPLEMENTATION, float split_threshold, float merge_threshold, float num_occ_threshold); boolean eq(const PhoneticDecisionTree& arg); boolean setAlgorithm(ALGORITHM algorithm); boolean setSplitThreshold(float split_threshold);
description:PhoneticDecisionTree pdt; pdt.setStopMode(THRESH); pdt.setRunMode(TRAIN); pdt.setAlgorithm(ML); pdt.setImplementation(DEFAULT); pdt.setSplitThreshold(100); pdt.setMergeThreshold(1000); pdt.setNumOccThreshold(600); PhoneticDecisionTreeNode pdt_node; Triple<Long, StatisticalModel, HashTable<String, String> > datapoint; SingleLinkedList<Triple<Long, StatisticalModel, HashTable<String, String> > > data; StatisticalModel sm; GaussianModel gm; MixtureModel mm; mm.add(gm); sm.assign(mm); sm.setOccupancy((Double)0); HashTable<String, String> hash; datapoint.assign((Long)0, sm, hash); data.insert(&datapoint); pdt_node.setDataPoints(data); pdt.load(attributes, pdt_node); pdt.buildDecisionTree();
static const String CLASS_NAME = L"PhoneticDecisionTree";
enum ALGORITHM { ML = 0, DEF_ALGORITHM = ML };
enum IMPLEMENTATION { DEFAULT = 0, DEF_IMPLEMENTATION = DEFAULT };
static const NameMap ALGO_MAP(L"ML");
static const NameMap IMPL_MAP(L"DEFAULT");
static const String DEF_PARAM = L"";
static const String PARAM_ALGORITHM = L"algorithm";
static const String PARAM_IMPLEMENTATION = L"implementation";
static const String PARAM_SPLIT_THRESHOLD = L"split_threshold";
static const String PARAM_MERGE_THRESHOLD = L"merge_threshold";
static const String PARAM_NUM_OCC_THRESHOLD = L"num_occ_threshold";
static const String PARAM_BDT = L"base_dt";
static const String YES = L"yes";
static const String NO = L"no";
static const String CPH = L"cph";
static const String POS = L"pos";
static const float DEF_SPLIT_THRESHOLD = 10;
static const float DEF_MERGE_THRESHOLD = 5;
static const float DEF_SPLIT_THRESHOLD = 100;
static const long ERR = 00100300;
typedef Triple<Long, StatisticalModel, HashTable<String, String> > DataPoint;
typedef SingleLinkedList<DataPoint> Data;
typedef BiGraphVertex<PhoneticDecisionTreeNode> TreeNode;
ALGORITHM algorithm_d;
IMPLEMENTATION implementation_d;
PhoneticDecisionTreeNode pdt_rootnode_d;
Float split_threshold_d;
Float merge_threshold_d;
Float num_occ_threshold_d;
static MemoryManager mgr_d;
static const String& name();
static boolean diagnose(Integral::DEBUG debug_level);
boolean debug(const unichar* message) const;
~PhoneticDecisionTree();
PhoneticDecisionTree(ALGORITHM algorithm = DEF_ALGORITHM, IMPLEMENTATION implementation = DEF_IMPLEMENTATION, float floor);
PhoneticDecisionTree(const PhoneticDecisionTree& arg);
boolean assign(const PhoneticDecisionTree& arg);
PhoneticDecisionTree& operator= (const PhoneticDecisionTree& arg);
long sofSize() const;
boolean read(Sof& sof, long tag, const String& name = CLASS_NAME);
boolean write(Sof& sof, long tag, const String& name = CLASS_NAME) const;
boolean readData(Sof& sof, const String& pname = DEF_PARAM, long size = SofParser::FULL_OBJECT, boolean param = true, boolean nested = false);
boolean writeData(Sof& sof, const String& pname = DEF_PARAM) const;
boolean eq(const PhoneticDecisionTree& arg) const;
static void* operator new(size_t size);
static void* operator new[](size_t size);
static void operator delete(void* ptr);
static void operator delete[](void* ptr);
static boolean setGrowSize(long grow_size);
boolean clear(Integral::CMODE ctype = Integral::DEF_CMODE);
boolean setAlgorithm(ALGORITHM algorithm);
boolean setImplementation(IMPLEMENTATION implementation);
boolean setSplitThreshold(float split_threshold);
boolean setMergeThreshold(float merge_threshold);
boolean setNumOccThreshold(float num_occ_threshold);
boolean set(ALGORITHM algorithm = DEF_ALGORITHM, IMPLEMENTATION implementation = DEF_IMPLEMENTATION, float split_threshold = DEF_SPLIT_THRESHOLD, float merge_threshold = DEF_MERGE_THRESHOLD, float num_occ_threshold = DEF_NUM_OCC_THRESHOLD);
ALGORITHM getAlgorithm() const;
IMPLEMENTATION getImplementation() const;
float getSplitThreshold() const;
float getMergeThreshold() const;
float getNumOccThreshold() const;
boolean get(ALGORITHM& algorithm, IMPLEMENTATION& implementation, float& split_threshold, float& merge_threshold, float& num_occ_threshold);
boolean getStatTrain(Vector<ContextMap>& context_map, Vector<DiGraph<SearchNode> >& sub_graphs, Vector<SearchSymbol>& symbol_table, HashTable<SearchSymbol, Long>& symbol_hash_in, Vector<StatisticalModel>& stat_models_in, HashTable<SearchSymbol, Long>& symbol_hash_out, Vector<StatisticalModel>& stat_models_out, Filename& phonetic_dt_file, HashTable<SearchSymbol, Long>& tied_symbol_hash, Vector<StatisticalModel>& tied_stat_models);
boolean getStatTest(Vector<ContextMap>& context_map, long& left_context, long& right_context, Vector<SearchSymbol>& upper_symbol_table, Vector<SearchSymbol>& upper_contextless_symbol_table, Vector<DiGraph<SearchNode> >& sub_graphs, Vector<SearchSymbol>& symbol_table, HashTable<SearchSymbol, Long>& symbol_hash, Filename& ques_ans_file);
boolean runDecisionTree();
boolean trainDecisionTree();
boolean load(const Attributes& attribtues, PhoneticDecisionTreeNode& pdtnode);
loadTrain(Vector<ContextMap>& context_map, long& left_context, long& right_context, Vector<SearchSymbol>& upper_symbol_table, Vector<SearchSymbol>& contextless_symbol_table, Vector<DiGraph<SearchNode> >& sub_graphs, Vector<SearchSymbol>& symbol_table, HashTable<SearchSymbol, Long>& symbol_hash, Vector<StatisticalModel>& stat_models, Filename& ques_ans_file, HashTable<SearchSymbol, Long>& tied_symbol_hash, Vector<StatisticalModel>& tied_stat_models);
boolean loadTest(Filename& phonetic_dt_file);
boolean setParser(SofParser* parser);
Long classifyDataPoint(DataPoint& datapoint);
boolean classifyData(TreeNode* node, Attribute& attribute);
boolean mergeLeafNodes(TreeNode* start_node, TreeNode* best_node);
boolean splitSubTree(TreeNode*);
boolean mergeSubTree(TreeNode*);
boolean reindexSubTree(TreeNode*, long& index);
Long findClass(TreeNode*, DataPoint& datapoint);
boolean findBestAttribute(TreeNode& node, Attribute& best_attribute, float& likelihood);
Long findTypicalIndex(TreeNode* node);
boolean markNode(TreeNode* node, boolean& flag);
boolean updateTypicalIndex(TreeNode* start_node, TreeNode* best_node);
boolean computeSumOccupancy(TreeNode* node, float& sum_num_occ);
boolean isSplitOccupancyBelowThreshold(TreeNode* node, Attribute& attribute);
boolean computeDeterminantPooledCovariance(TreeNode* node, float& det_pooled_covariance);
double computeScale(StatisticalModel& stat_model);
boolean computeLikelihoodNode(TreeNode* node, float& likelihood);
boolean computeLikelihoodSplitNode(TreeNode* node, Attribute& attribute, float& split_likelihood);
boolean computeLikelihoodMergeNodes(TreeNode* start_node, TreeNode* node, float& merge_likelihood);
boolean createContexts(Vector<SearchSymbol>& symbols, long& length, Vector<ContextMap>& all_contexts);
boolean appendContextLevel(Vector<SearchSymbol>& symbols, long& level, Vector<ContextMap>& all_contexts);
boolean validateContexts(VectorContextMap>& all_contexts, Vector<SearchSymbol>& contextless_symbols, Vector<ContextMap>& valid_contexts);
boolean getUnseenContexts(Vector<ContextMap>& seen_contexts, Vector<ContextMap>& valid_contexts, Vector<ContextMap>& unseen_contexts);
boolean updateLowerLevel(Vector<ContextMap>& context_map, Vector<ContextMap>& unseen_context_map, Vector<DiGraph<SearchNode> >& sub_graphs, Vector<SearchSymbol>& symbol_table, HashTable<SearchSymbol, Long>& symbol_hash);
boolean getCentralSymbols(Vector<SearchSymbol>& symbol_table, Vector<SearchSymbol>& contextless_symbol_table, SingleLinkedList<String>& central_symbols);
boolean readQuestionAnswer(Filename& ques_ans_file, SingleLinkedList<Pair<Long, String> >& questions, HashTable< String, String>& answers);
boolean poolStatisticalModel(Vector<ContextMap>& context_map, Vector<SearchSymbol>& contextless_symbol_table, Vector<DiGraph<SearchNode> >& sub_graphs, Vector<SearchSymbol>& symbol_table, HashTable<SearchSymbol,Long>& symbol_hash, Vector<StatisticalModel>& stat_models, long& context_len, SingleLinkedList<Pair<Long, String> >& questions, HashTable<String, String>& answers, Data& data, HashTable<SearchSymbol, Long>& tied_symbol_hash, Vector<StatisticalModel>>& tied_stat_models);
boolean isTiedSSymbol(SearchSymbol& search_symbol, HashTable<SearchSymbol, Long>& symbol_hash);examples: