// file: $isip/class/mmedia/FeatureSelect/feats_05.cc // version: $Id: feats_05.cc 9407 2004-02-25 01:53:46Z parihar $ // // isip include files // #include "FeatureSelect.h" // method: compute // // arguments: // String& symbol: (input) symbol corresponding to which the out-of-class // features are to be selected. // Sdb& symbols_similar: (input) list of similar symbols // Sdb& symbols_all: (input) list of all symbols under consideration // // return: a bool8 value indicating status // // this method computes the out-of-class features for a given symbol // bool8 FeatureSelect::compute(String& symbol_a, Sdb& symbols_similar_a, Sdb& symbols_all_a) { // error checking // if (symbol_a.length() <= (int32)0) { String msg(L"Error: no symbol specified "); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } if (!symbols_similar_a.gotoFirst()) { String msg(L"Error: no input list of symbols similar to input symbol "); msg.concat(L"specified "); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } if (!symbols_all_a.gotoFirst()) { String msg(L"Error: no input list of all the symbols "); msg.concat(L"specified "); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } if (symbols_similar_a.length() > symbols_all_a.length()) { String msg(L"Error: list of similar symbols cannot be greater than "); msg.concat("list of all symbols under consideration "); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } // branch on algorithm type: CLASS_BASED // else if (algorithm_d == CLASS_BASED) { if (!computeClassBased(symbol_a, symbols_similar_a, symbols_all_a)) { String msg(L"Error: cannot select features"); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } } // else error // else { String msg(L"Error: incorrect algorithm type: "); Console::put(msg); Error::handle(name(), L"compute", ERR, __FILE__, __LINE__); } // exit gracefully // return true; } // method: computeClassBased // // arguments: // const String& symbol: (input) symbol corresponding to which the // out-of-class features are to be selected. // Sdb& symbols_similar: (input) list of similar symbols // Sdb& symbols_all: (input) list of all symbols under consideration // // return: a bool8 value indicating status // // this method selects out-of-class data corresponding to the input // symbol. the algorithm is documented extensively in: // // 1. A. Ganapathiraju, Support Vector Machines for Speech Recognition, // Ph.D. Dissertation, // Department of Electrical and Computer Engineering, // Mississippi State University, January 2002. // 2. Monthly Tutorial for February 2004 // http://www.isip.msstate.edu/projects/speech/software/tutorials/monthly/ // 2004_02/index.html // bool8 FeatureSelect::computeClassBased(const String& symbol_a, Sdb& symbols_similar_a, Sdb& symbols_all_a) { // increase the identation // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); } // open the databases // if (!openAudioDatabase()) { String msg(L"Error: cannot open audio database: "); msg.concat(audiodb_file_d); Console::put(msg); Error::handle(name(), L"computeSegFeaturesSymbol", ERR, __FILE__, __LINE__); } // increase the indention // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); } // find the number of in-class datapoints (feature-vectors) for the // input symbol // int32 num_inclass; Filename filename; // retrieve the features for the input symbol // String symbol(symbol_a); Vector features; if (!retrieveFtrFile(symbol, filename)) { String msg(L"Error: cannot retrieve feature file for input symbol: "); msg.concat(symbol); msg.concat(L" from the audio database: "); msg.concat(audiodb_file_d); Console::put(msg); Error::handle(name(), L"computeClassBased", ERR, __FILE__, __LINE__); } if (!readFeatures(filename, features)) { String msg(L"Error: cannot read features from feature file: "); msg.concat(filename); Console::put(msg); Error::handle(name(), L"computeClassBased",ERR, __FILE__, __LINE__); } num_inclass = features.length(); // compute the number of out-of-class datapoints (feature-vectors) // using the class ratio. // let, ratio_class_d = in : out // num_outclass = no. of out-of-class datapoints // num_inclass = no. of in-class datapoints // then, // num_outclass = (out / in) * num_inclass // float32 num_oclass = ((float32)ratio_class_d((int32)1) / (float32)ratio_class_d((int32)0)) * (float32)num_inclass; // now divide the out-of-class datapoints into similar and // dissimilar classes based on the similar ratio // let, similar_ratio_d = similar : dissimilar // then, // num_outclass_similar = (similar / (similar+dissimilar)) * num_out_class // num_outclass_dissimilar = (similar/(similar+dissimilar)) * num_out_class // float32 total = ratio_similar_d((int32)0) + ratio_similar_d((int32)1); float32 num_oclass_sim = ((float32)ratio_similar_d((int32)0) / total) * num_oclass; float32 num_oclass_dissim = ((float32)ratio_similar_d((int32)1) / total) * num_oclass; // prepare the list (sdb) of dissimilar symbols // Sdb symbols_dissimilar; Filename temp1; Filename temp2; for (bool8 m = symbols_all_a.gotoFirst(); m; m = symbols_all_a.gotoNext()) { symbols_all_a.getName(temp1); bool8 found = false; for (bool8 n = symbols_similar_a.gotoFirst(); n; n = symbols_similar_a.gotoNext()) { symbols_similar_a.getName(temp2); if ((temp1.eq(temp2)) ) { found = true; break; } } if ((!found) && (temp1.ne(symbol_a))) { symbols_dissimilar.append(temp1); } } // compute the number of datapoints/similar-symbols and number of // datapoints/dissimilar-symbols // int32 num_sim = symbols_similar_a.length(); int32 num_dissim = symbols_dissimilar.length(); int32 num_per_sim = (int32)ceil((num_oclass_sim / (float32)num_sim)); int32 num_per_dissim = (int32)ceil((num_oclass_dissim / (float32)num_dissim)); // get the out-of-class data from similar and dissimilar phones // Vector features_sim; Vector features_dissim; Vector features_oclass; // increase the identation // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"selecting features for similar symbols: "); Console::putNoWrap(output); } Filename sy1; symbols_similar_a.gotoFirst(); symbols_similar_a.getName(sy1); if ((num_sim > 0) && (sy1.length() > 0)) { // increase the identation // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); } getAllFeatures(symbols_similar_a, num_per_sim, features_sim); // decrease the identation // if (verbosity_d >= Integral::BRIEF) { Console::decreaseIndention(); } } Filename sy2; symbols_dissimilar.gotoFirst(); symbols_dissimilar.getName(sy2); if ((num_dissim > 0) && (sy2.length() > 0)) { // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"selecting features for dissimilar symbols: "); Console::putNoWrap(output); } // increase the identation // if (verbosity_d >= Integral::BRIEF) { Console::increaseIndention(); } getAllFeatures(symbols_dissimilar, num_per_dissim, features_dissim); // decrease the identation // if (verbosity_d >= Integral::BRIEF) { Console::decreaseIndention(); } } // decrease the identation // if (verbosity_d >= Integral::BRIEF) { Console::decreaseIndention(); } features_oclass.assign(features_sim); features_oclass.concat(features_dissim); // transform output filename // Filename output_file; Filename uni_output_file; output_file.transform(filename, output_directory_d, output_extension_d, output_suffix_d, output_preserve_d); uni_output_file.transformUniquely(output_file); // store the segmental features // if (!writeFeatures(uni_output_file, features_oclass)) { String msg(L"Error: cannot write features to feature file: "); msg.concat(uni_output_file); Console::put(msg); Error::handle(name(), L"computeSegFeatures", ERR, __FILE__, __LINE__); } // close the databases // if (!closeAudioDatabase()) { String msg(L"Error: cannot close audio database: "); msg.concat(audiodb_file_d); Console::put(msg); Error::handle(name(), L"computeSegFeaturesSymbol", ERR, __FILE__, __LINE__); } // decrease the identation // if (verbosity_d >= Integral::BRIEF) { Console::decreaseIndention(); } // exit gracefully // return true; }