// file: $isip/class/algo/SegmentConcat/segc_06.cc // version: $Id: segc_06.cc 10600 2006-08-08 22:27:19Z ss754 $ // // isip include files // #include "SegmentConcat.h" // method: generateSegFeature // // arguments: // const Vector& features: (input) feature vectors // const VectorLong& start_frames: (input) start frames corresponding // to each symbol // const VectorLong& end_frames: (input) end frames corresponding // to each symbol // Vector& seg_features: (output) segmental features // // return: a bool8 value indicating status // // this method computes the segmental features by concatening the // features generated according to the ratio for each symbol // alignment. each symbol alignment, given by start and end frames, // corresponds one output concatenated feature // bool8 SegmentConcat::generateSegFeature(const Vector& features_a, const VectorLong& start_frames_a, const VectorLong& end_frames_a, Vector& seg_features_a) { // check the number of start frames is equal to the end frames // if (start_frames_a.length()!=end_frames_a.length()) { String msg(L"Error: number of start and end frames unequal"); Console::put(msg); Error::handle(name(), L"generateSegFeature", ERR, __FILE__, __LINE__); } // check the ratio // if (ratio_d.length()<=(int32)0) { String msg(L"segmental ratio cannot be zero or negative"); Console::put(msg); Error::handle(name(), L"generateSegFeature", ERR, __FILE__, __LINE__); } // local variables and intermediate computations // int32 num_seg = ratio_d.length(); int32 total = (int32)0; for (int32 i = 0; i < num_seg; i++) { total += (int32)ratio_d(i); } VectorLong num_frame_per_seg; num_frame_per_seg.setLength(num_seg); VectorLong start_seg; start_seg.setLength(num_seg); VectorLong end_seg; end_seg.setLength(num_seg); int32 svm_feat_dim = num_seg * dim_d; if (duration_flag_d) { svm_feat_dim++; } // set the number of segmental feature vectors // seg_features_a.setLength(start_frames_a.length()); // loop over the start times and compute segmental feature-vector // for each // for (int32 l = 0; l < start_frames_a.length(); l++) { // get the start and end frame // int32 start = start_frames_a(l); int32 end = end_frames_a(l); int32 remaining_frames = end - start; int32 total_frames = remaining_frames; float64 dur = log((float64)total_frames); // compute the start and end frames for the various segments // note: any additional frames left out because of integer arithmetic go // to the central segment // for (int32 i = 0; i < num_seg; i++) { if (i != num_seg / 2) { int32 temp_ratio = ratio_d(i); num_frame_per_seg(i) = (int32)floor((float64)temp_ratio* (float64)total_frames / (double)total); if ((int32)num_frame_per_seg(i) < 1 && total_frames >= num_seg) { num_frame_per_seg(i) = 1; } remaining_frames -= num_frame_per_seg(i); } } num_frame_per_seg(num_seg / 2) = remaining_frames; // compute the start and end frames for each segment // start_seg(0) = start; for (int32 i = 1; i < num_seg; i++) { start_seg(i) = start_seg(i - 1) + num_frame_per_seg(i - 1); end_seg(i - 1) = start_seg(i); } end_seg(num_seg - 1) = end; // make sure that output space is allocated and initialized to zero // seg_features_a(l).setLength(svm_feat_dim); seg_features_a(l).setRange(0, svm_feat_dim, 0.0); // get means for the segments // for (int32 i = 0; i < num_seg; i++) { for (int32 j = start_seg(i); j < end_seg(i); j++) { // sum over the vectors in the segment // for (int32 k = 0; k < dim_d; k++) { seg_features_a(l)((i * dim_d) + k) += features_a(j)(k); } } if (end_seg(i) - start_seg(i) > 1) { for (int32 k = 0; k < dim_d; k++) { seg_features_a(l)((i*dim_d) + k) /= (double)(end_seg(i) - start_seg(i)); } } } if (duration_flag_d) { seg_features_a(l)(svm_feat_dim - 1) = dur / MAX_LOG_DURATION; } } // exit gracefully // return true; } // method: normalizeFeatures // // arguments: // const Vector& features: (input) feature vectors // Vector& norm_features: (output) normalized features // // return: a bool8 value indicating status // // this method computes the segmental features by concatening the // features generated according to the ratio for each symbol // alignment. each symbol alignment, given by start and end frames, // corresponds one output concatenated feature // bool8 SegmentConcat::normalizeFeatures(const Vector& features_a, Vector& norm_features_a) { // check the size of the input features // int32 num_vect = features_a.length(); if (num_vect <= 0) { String msg(L"Error: zero vectors in the input"); Console::put(msg); Error::handle(name(), L"normalizeFeatures", ERR, __FILE__, __LINE__); } // verbosity // if (verbosity_d >= Integral::BRIEF) { String output(L"normalizing features..."); Console::putNoWrap(output); } // open the min_max sof file // Sof min_max_sof; if (min_max_file_d.length() > (int32)0) { min_max_sof.open(min_max_file_d); } else { String msg(L"Error: no input min-max file specified "); Console::put(msg); Error::handle(name(), L"normalizeFeatures", ERR, __FILE__, __LINE__); } // read the min and max vectors // min_max_d.read(min_max_sof, (int32)0); if (min_max_d.length() != (int32)2) { String msg(L"Error: reading min-max file"); Console::put(msg); Error::handle(name(), L"normalizeFeatures", ERR, __FILE__, __LINE__); } if (min_max_d(0).length() != features_a(0).length()) { String msg(L"Error: reading min-max file"); Console::put(msg); Error::handle(name(), L"normalizeFeatures", ERR, __FILE__, __LINE__); } if (min_max_d(1).length() != features_a(0).length()) { String msg(L"Error: reading min-max file"); Console::put(msg); Error::handle(name(), L"normalizeFeatures", ERR, __FILE__, __LINE__); } // close the sof min-max file // min_max_sof.close(); // assign the data // norm_features_a.assign(features_a); // normalize the features to be within the range of [-1, 1] // for (int32 i = 0; i < num_vect; i++) { for (int32 j = 0; j < dim_d; j++) { norm_features_a(i)(j) = 2 * (norm_features_a(i)(j) - min_max_d(0)(j)) / (min_max_d(1)(j) - min_max_d(0)(j)) - 1.0; } } // exit gracefully // return true; } // method: openAudioDatabase // // arguments: // none // // return: a bool8 value indicating status // // this method opens the audio database // bool8 SegmentConcat::openAudioDatabase() { // open the audio database // audiodb_d.setDebug(debug_level_d); if (verbosity_d >= Integral::BRIEF) { String output(L"opening audio database: "); output.concat(audiodb_file_d); Console::putNoWrap(output); } if (!audiodb_d.open(audiodb_file_d)) { String msg(L"unable to open audio database (\n"); msg.concat(audiodb_file_d); msg.concat(L")"); Console::put(msg); Error::handle(name(), L"openAudioDatabase", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: openTranscriptionDatabase // // arguments: // none // // return: a bool8 value indicating status // // this method opens the transcription database // bool8 SegmentConcat::openTransDatabase() { // open the transcription database // transdb_d.setDebug(debug_level_d); if (verbosity_d >= Integral::BRIEF) { String output(L"opening audio database: "); output.concat(audiodb_file_d); Console::putNoWrap(output); } if (!transdb_d.open(transdb_file_d)) { String msg(L"unable to open trans database (\n"); msg.concat(transdb_file_d); msg.concat(L")"); Console::put(msg); Error::handle(name(), L"openTransDatabase", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: closeAudioDatabase // // arguments: // none // // return: a bool8 value indicating status // // this method closes the audio database // bool8 SegmentConcat::closeAudioDatabase() { // close the audio database // if (verbosity_d >= Integral::BRIEF) { String output(L"closing audio database: "); output.concat(audiodb_file_d); Console::putNoWrap(output); } if (!audiodb_d.close()) { String msg(L"unable to close audio database (\n"); msg.concat(audiodb_file_d); msg.concat(L")"); Console::put(msg); Error::handle(name(), L"closeAudioDatabase", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: closeTransDatabase // // arguments: // none // // return: a bool8 value indicating status // // this method closes the transcription database // bool8 SegmentConcat::closeTransDatabase() { // close the transcription database // if (verbosity_d >= Integral::BRIEF) { String output(L"closing transcription database: "); output.concat(transdb_file_d); Console::putNoWrap(output); } if (!transdb_d.close()) { String msg(L"unable to close transcription database (\n"); msg.concat(transdb_file_d); msg.concat(L")"); Console::put(msg); Error::handle(name(), L"closeTransDatabase", Error::ARG, __FILE__, __LINE__); } // exit gracefully // return true; } // method: retrieveFtrFile // // arguments: // const String& identifier: (input) identifier // Filename& ftr_file: (output) feature file corresponding to the // identifier // return: a bool8 value indicating status // // this method retrieves the feature file from audio database // bool8 SegmentConcat::retrieveFtrFile(const String& identifier_a, Filename& ftr_file_a) { // error checking // if (identifier_a.length() <= (int32)0) { String msg(L"no input identifier"); Console::putNoWrap(msg); Error::handle(name(), L"retrieveFtrFile", ERR, __FILE__, __LINE__); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"retrieving feature file for "); output.concat(L"identifier "); output.concat(identifier_a); Console::putNoWrap(output); } // retrieve the feature-file corresponding to the identifier // String ident(identifier_a); if (!audiodb_d.getRecord(ident, ftr_file_a)) { String msg(L"no file for identifier "); msg.concat(identifier_a); msg.concat(L" in the audio database"); Console::putNoWrap(msg); Error::handle(name(), L"retrieveFtrFile", Error::ARG, __FILE__, __LINE__); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"retrieved feature file for identifier "); output.concat(identifier_a); output.concat(L" is "); output.concat(ftr_file_a); Console::putNoWrap(output); } // exit gracefully // return true; } // method: readFeatures // // arguments: // const Filename& ftr_file: (input) feature file // Vector& features: (output)features corresponding to the // identifier // return: a bool8 value indicating status // // this method reads the features from the feature file // bool8 SegmentConcat::readFeatures(const Filename& ftr_file_a, Vector& features_a) { // error checking // if (ftr_file_a.length() <= (int32)0) { String msg(L"no input feature file"); Console::putNoWrap(msg); Error::handle(name(), L"readFeatures", ERR, __FILE__, __LINE__); } // local variables // FeatureFile ftr_file_in; Filename input_file; // set the feature file type and format // ftr_file_in.setFileType(input_type_d); if (input_format_d == FeatureFile::RAW) { ftr_file_in.setNumFeatures(dim_d); ftr_file_in.setFileFormat(input_format_d); // ftr_file.setNumChannels(channel_a); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"reading feature file: "); output.concat(ftr_file_a); Console::putNoWrap(output); } // open the feature file, and read the header if sof // ftr_file_in.open(ftr_file_a); // get the dimensions of sof file // if (output_format_d == FeatureFile::SOF) { // set the file format // dim_d = ftr_file_in.getNumFeatures(); } // get the feature-vectors from all the channels // int32 num = ftr_file_in.getNumFrames(); // set FeatureFile object's data type as VectorFloat // ftr_file_in.setDataType(FeatureFile::VECTOR_FLOAT); if (ftr_file_in.readFeatureData(features_a, (int32)0, FeatureFile::DEF_START_POS, num) != num) { String msg(L"Error: can't read features from feature file: "); msg.concat(ftr_file_a); Console::put(msg); Error::handle(name(), L"readFeatures", ERR, __FILE__, __LINE__); } // close the feature file // ftr_file_in.close(); // exit gracefully // return true; } // method: retrieveAlign // // arguments: // const String& identifier: (input) identifier // cont String& symbol: (input) symbol whose alignments are retrieved // VectorLong& start_frames: (output) start times for alignments // VectorLong& end_frames: (output) corresponding end times // // return: a bool8 value indicating status // // this method retrieves the alignments from transcription database at // a given level corresponding to the input symbol. if no input symbol // is specified, all the alignments are retrieved // bool8 SegmentConcat::retrieveAlign(const String& identifier_a, const String& symbol_a, VectorLong& start_frames_a, VectorLong& end_frames_a) { // error checking // if (identifier_a.length() <= (int32)0) { String msg(L"no input identifier"); Console::putNoWrap(msg); Error::handle(name(), L"retrieveSymbolAlign", ERR, __FILE__, __LINE__); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"retrieving start and end frames "); output.concat(L" for symbol "); output.concat(symbol_a); output.concat(L"corresponding to identifier "); output.concat(identifier_a); Console::putNoWrap(output); } // retrieve the start-times and stop-times of the symbols in the // transcription corresponding to the retrieved identifier at a // given level // Vector annotations; VectorFloat start_times, end_times; String level; level.assign(L"level"); String symbol(symbol_a); String temp_level(level_d); // retreive all alignments at a given level if no input symbol is // provided // String ident(identifier_a); if (symbol_a.length() == (int32)0) { if (!transdb_d.getRecord(ident, level, temp_level, annotations, start_times, end_times)) { String msg(L"no transcription for identifier "); msg.concat(identifier_a); msg.concat(L" in the transcription database"); Console::putNoWrap(msg); Error::handle(name(), L"retrieveSymbolAlign", ERR, __FILE__, __LINE__); } } // else retrive alignments on the basis of symbol // else { if (!transdb_d.getRecord(ident, level, temp_level, symbol, annotations, start_times, end_times)) { String msg(L"no transcription for identifier "); msg.concat(identifier_a); msg.concat(L" in the transcription database"); Console::putNoWrap(msg); Error::handle(name(), L"retrieveSymbolAlign", ERR, __FILE__, __LINE__); } } start_frames_a.assign(start_times); end_frames_a.assign(end_times); // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"retrieved start and stop frames of the symbol"); output.concat(symbol_a); output.concat(L" corresponding to the identifier "); output.concat(identifier_a); output.concat(L" at "); output.concat(level_d); output.concat(L" level are:\n"); output.concat(L"start-frames:\n"); Console::putNoWrap(output); start_frames_a.debug(L"start-frames"); output.concat(L"end-frames:\n"); Console::putNoWrap(output); end_frames_a.debug(L"end-frames"); } // exit gracefully // return true; } // method: writeFeatures // // arguments: // const Filename& ftrfile_name: (input) name of the output feature file // const Vector& features: (input) features // // return: a bool8 value indicating status // // this method write features to an output feature file // bool8 SegmentConcat::writeFeatures(const Filename& ftrfile_name_a, const Vector& features_a) { // error checking // if (ftrfile_name_a.length() <= (int32)0) { String msg(L"no input feature file name"); Console::putNoWrap(msg); Error::handle(name(), L"writeFeatures", ERR, __FILE__, __LINE__); } if (features_a.length() <= (int32)0) { String msg(L"no input features"); Console::putNoWrap(msg); Error::handle(name(), L"writeFeatures", ERR, __FILE__, __LINE__); } // local variables // FeatureFile ftrfile; // set the feature file type and format // ftrfile.setFileType(output_type_d); if (output_format_d == FeatureFile::RAW) { ftrfile.setNumFeatures(dim_d); ftrfile.setFileFormat(output_format_d); // ftrfile.setNumChannels(channel_a); } // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"writing features to the feature file: "); output.concat(ftrfile_name_a); Console::putNoWrap(output); } // open the feature file // ftrfile.open(ftrfile_name_a, File::WRITE_ONLY); // get the feature-vectors to the feature file // Vector temp_features(features_a); // set FeatureFile object's data type as VectorFloat // ftrfile.setDataType(FeatureFile::VECTOR_FLOAT); if (!ftrfile.writeFeatureData(temp_features)) { String msg(L"Error: cannot write features to feature file"); Console::put(msg); Error::handle(name(), L"writeFeatures", ERR, __FILE__, __LINE__); } // close the feature file // ftrfile.close(); // exit gracefully // return true; } // method: writeMinMax // // arguments: // none // // return: a bool8 value indicating status // // this method writes min-max values of each dimension to min-max file // bool8 SegmentConcat::writeMinMax() { // error checking // if (min_max_file_d.length() <= (int32)0) { String msg(L"no output min-max file name specified: "); msg.concat(min_max_file_d); Console::putNoWrap(msg); Error::handle(name(), L"writeMinMax", ERR, __FILE__, __LINE__); } if (min_max_d.length() <= (int32)0) { String msg(L"min-max vector is empty: "); Console::putNoWrap(msg); min_max_d.debug(L"min_max_d"); Error::handle(name(), L"writeMinMax", ERR, __FILE__, __LINE__); } // local variables // Sof min_max_sof; // display a status message // if (verbosity_d >= Integral::BRIEF) { String output(L"writing min-max values in each dimension "); output.concat(L"to the feature file: "); output.concat(min_max_file_d); Console::putNoWrap(output); } // open the feature file // File::TYPE file_type = File::TEXT; if (output_type_d == FeatureFile::BINARY) { file_type = File::BINARY; } min_max_sof.open(min_max_file_d, File::WRITE_ONLY, file_type); // get the feature-vectors to the feature file // if (!min_max_d.write(min_max_sof, (int32)0)) { String msg(L"Error: cannot write min-max values to min-max file"); Console::put(msg); Error::handle(name(), L"writeMinMax", ERR, __FILE__, __LINE__); } // close the output file // min_max_sof.close(); // exit gracefully // return true; }