// file: $isip/class/mmedia/SegmentConcat/SegmentConcat.h // version: $Id: SegmentConcat.h 9391 2004-01-31 18:48:14Z parihar $ // // make sure definitions are only made once // #ifndef ISIP_SEGMENT_CONCAT #define ISIP_SEGMENT_CONCAT // isip include files // #ifndef ISIP_VECTOR_FLOAT #include #endif #ifndef ISIP_SDB #include #endif #ifndef ISIP_FEATURE_FILE #include #endif #ifndef ISIP_AUDIO_DATABASE #include #endif #ifndef ISIP_TRANSCRIPTION_DATABASE #include #endif #ifndef ISIP_MEMORY_MANAGER #include #endif // SegmentConcat: a class that manuiplates the feature // vectors. currently, three algorithms are supported: // MIN_MAX - compute the min-max for each dimension of the feature-vectors. // SEG_CONCAT - manipulate the feature vectors given the segmental // information on a file basis. // SEG_CONCAT_SYMBOL - manuplate the feature vectors given the segmental // information on a symbol (model) basis. // The two implementations supported for SEG_CONCAT are: // NONE and NORMALIZE. // class SegmentConcat { //--------------------------------------------------------------------------- // // public constants // //--------------------------------------------------------------------------- public: // define the class name // static const String CLASS_NAME; //---------------------------------------- // // other important constants // //---------------------------------------- // define the algorithm choices // enum ALGORITHM { MIN_MAX = 0, SEG_CONCAT, SEG_CONCAT_SYMBOL, DEF_ALGORITHM = MIN_MAX }; // define the implementation choices // enum IMPLEMENTATION { NONE = 0, NORMALIZE, DEF_IMPLEMENTATION = NONE }; // define the static NameMap objects // static const NameMap ALGO_MAP; static const NameMap IMPL_MAP; //---------------------------------------- // // i/o related constants // //---------------------------------------- static const String DEF_PARAM; static const String PARAM_ALGORITHM; static const String PARAM_IMPLEMENTATION; //---------------------------------------- // // default values and arguments // //---------------------------------------- static const bool8 DEF_DURATION_FLAG = true; static const int32 DEF_DIMENSION = 39; static const float64 MAX_LOG_DURATION = 3.912; //---------------------------------------- // // error codes // //---------------------------------------- static const int32 ERR = 50800; //--------------------------------------------------------------------------- // // protected data // //--------------------------------------------------------------------------- protected: // algorithm name // ALGORITHM algorithm_d; // implementation name // IMPLEMENTATION implementation_d; // minimum and maximum of the input features in each dimension - // this variable keeps the rolling min and max values // Vector min_max_d; Filename min_max_file_d; // variables associated to the segmental feature generation // VectorLong ratio_d; bool8 duration_flag_d; String level_d; // variables associated with output feature file // Long dim_d; FeatureFile::FILE_TYPE input_type_d; FeatureFile::FILE_FORMAT input_format_d; // variables associated with output feature file // FeatureFile::FILE_TYPE output_type_d; FeatureFile::FILE_FORMAT output_format_d; String output_directory_d; String output_extension_d; Long output_preserve_d; String output_suffix_d; // variables associated with the databases // Filename audiodb_file_d; Filename transdb_file_d; AudioDatabase audiodb_d; TranscriptionDatabase transdb_d; // declare a static debug level for all class instantiations // static Integral::DEBUG debug_level_d; // declare a static debug level for all class instantiations // static Integral::DEBUG verbosity_d; // static memory manager // static MemoryManager mgr_d; //--------------------------------------------------------------------------- // // required public methods // //--------------------------------------------------------------------------- public: // method: name // static const String& name() { return CLASS_NAME; } // other static methods // static bool8 diagnose(Integral::DEBUG debug_level); // method: setDebug // static bool8 setDebug(Integral::DEBUG arg) { debug_level_d = arg; return true; } // other debug methods // bool8 debug(const unichar* msg) const; // method: destructor // ~SegmentConcat() {} // other constructor // SegmentConcat(ALGORITHM algorithm = DEF_ALGORITHM, IMPLEMENTATION implementation = DEF_IMPLEMENTATION); SegmentConcat(const SegmentConcat& arg) { assign(arg); } // assign methods // bool8 assign(const SegmentConcat& arg); // method: operator= // SegmentConcat& operator= (const SegmentConcat& arg) { assign(arg); return *this; } // i/o methods // int32 sofSize() const; bool8 read(Sof& sof, int32 tag, const String& name = CLASS_NAME); bool8 write(Sof& sof, int32 tag, const String& name = CLASS_NAME) const; bool8 readData(Sof& sof, const String& pname = DEF_PARAM, int32 size = SofParser::FULL_OBJECT, bool8 param = true, bool8 nested = false); bool8 writeData(Sof& sof, const String& pname = DEF_PARAM) const; // equality methods // bool8 eq(const SegmentConcat& arg) const; // method: new // static void* operator new(size_t size) { return mgr_d.get(); } // method: new[] // static void* operator new[](size_t size) { return mgr_d.getBlock(size); } // method: delete // static void operator delete(void* ptr) { mgr_d.release(ptr); } // method: delete[] // static void operator delete[](void* ptr) { mgr_d.releaseBlock(ptr); } // method: setGrowSize // static bool8 setGrowSize(int32 grow_size) { return mgr_d.setGrow(grow_size); } // other memory management methods // bool8 clear(Integral::CMODE ctype = Integral::DEF_CMODE); //--------------------------------------------------------------------------- // // class-specific public methods: // set methods // //--------------------------------------------------------------------------- // method: setAlgorithm // bool8 setAlgorithm(ALGORITHM algorithm) { algorithm_d = algorithm; return true; } // method: setImplementation // bool8 setImplementation(IMPLEMENTATION implementation) { implementation_d = implementation; return true; } // method: setDimension // bool8 setDimension(int32 dim) { dim_d = dim; return true; } // method: setMinMax // bool8 setMinMax(Vector& min_max) { min_max_d = min_max; return true; } // method: setMinMaxFile // bool8 setMinMaxFile(const Filename& min_max_file) { min_max_file_d = min_max_file; return true; } // method: setRatio // bool8 setRatio(const VectorLong& ratio) { ratio_d = ratio; return true; } // method: setDurationFlag // bool8 setDurationFlag(const bool8 duration_flag) { duration_flag_d = duration_flag; return true; } // method: setInputType // bool8 setInputType(const FeatureFile::FILE_TYPE input_type = FeatureFile::DEF_FILE_TYPE) { input_type_d = input_type; return true; } // method: setInputFormat // bool8 setInputFormat(const FeatureFile::FILE_FORMAT input_format = FeatureFile::DEF_FILE_FORMAT) { input_format_d = input_format; return true; } // method: setOutputType // bool8 setOutputType(const FeatureFile::FILE_TYPE output_type = FeatureFile::DEF_FILE_TYPE) { output_type_d = output_type; return true; } // method: setOutputFormat // bool8 setOutputFormat(const FeatureFile::FILE_FORMAT output_format = FeatureFile::DEF_FILE_FORMAT) { output_format_d = output_format; return true; } // method: setOutputDirectory // bool8 setOutputDirectory(const String& new_dir) { return output_directory_d.assign(new_dir); } // method: setOutputExtension // bool8 setOutputExtension(const String& extension) { output_extension_d = extension; return true; } // method: setOutputPreserve // bool8 setOutputPreserve(const int32 new_preserve_level) { return output_preserve_d.assign(new_preserve_level); } // method: setOutputSuffix // bool8 setOutputSuffix(const String& suffix) { output_suffix_d = suffix; return true; } // method: setVerbosity // bool8 setVerbosity(const Integral::DEBUG verbosity) { verbosity_d = verbosity; return true; } // method: setAudioDatabaseFile // bool8 setAudioDatabaseFile(const Filename& audiodb_file) { audiodb_file_d = audiodb_file; return true; } // method: setTransDatabaseFile // bool8 setTransDatabaseFile(const Filename& transdb_file) { transdb_file_d = transdb_file; return true; } // method: setLevel // bool8 setLevel(const String& level) { level_d = level; return true; } //--------------------------------------------------------------------------- // // class-specific public methods: // get methods // //--------------------------------------------------------------------------- // method: getAlgorithm // ALGORITHM getAlgorithm() const { return algorithm_d; } // method: getImplementation // IMPLEMENTATION getImplementation() const { return implementation_d; } // method: getDimension // int32 getDimension() const { return dim_d; } // method: getMinMax // Vector getMinMax() const { return min_max_d; } // method: getMinMaxFile // Filename getMinMaxFile() const { return min_max_file_d; } // method: getRatio // VectorLong getRatio() const { return ratio_d; } // method: getDurationFlag // bool8 getDurationFlag() const { return duration_flag_d; } // method: getInputType // FeatureFile::FILE_TYPE getInputType() const { return input_type_d; } // method: getInputFormat // FeatureFile::FILE_FORMAT getInputFormat() const { return input_format_d; } // method: getOutputType // FeatureFile::FILE_TYPE getOutputType() const { return output_type_d; } // method: getOutputFormat // FeatureFile::FILE_FORMAT getOutputFormat() const { return output_format_d; } // method: getOutputDirectory // String getOutputDirectory() const { return output_directory_d; } // method: getOutputExtension // String getOutputExtension() const { return output_extension_d; } // method: getOutputPreserve // int32 getOutputPreserve() const { return (int32)output_preserve_d; } // method: getOutputSuffix // String getOutputSuffix() const { return output_suffix_d; } // method: getAudioDatabaseFile // Filename getAudioDatabaseFile() const { return audiodb_file_d; } // method: getTransDatabaseFile // Filename getTransDatabaseFile() const { return transdb_file_d; } // method: getLevel // String getLevel() const { return level_d; } //--------------------------------------------------------------------------- // // class-specific public methods: // computational methods // //--------------------------------------------------------------------------- // compute method // bool8 compute(Sdb& sdb_features, Sdb& sdb_symbols); //--------------------------------------------------------------------------- // // private methods // //--------------------------------------------------------------------------- private: // method to compute minimun and maximum values in each dimension // bool8 computeMinMax(Sdb& sdb_features); // method to compute concatenated segmental features on the input // feature file basis // bool8 computeSegFeatures(Sdb& sdb_features); // method to compute concatenated segmental features on the symbol // (model) basis // bool8 computeSegFeaturesSymbol(Sdb& sdb_features, Sdb& sdb_symbols); // methods to open databases // bool8 openAudioDatabase(); bool8 openTransDatabase(); // methods to close databases // bool8 closeAudioDatabase(); bool8 closeTransDatabase(); // method to retrieve feature file from audio database // bool8 retrieveFtrFile(const String& identifier, Filename& ftr_file); // method to retrieve alignments from transcription database // bool8 retrieveAlign(const String& identifier, const String& symbol, VectorLong& start_frame, VectorLong& end_frames); // method to read features from feature file // bool8 readFeatures(const Filename& ftr_file, Vector& features); // method to write features to feature file // bool8 writeFeatures(const Filename& ftr_file, const Vector& features); // method to write min-max values to min-max file // bool8 writeMinMax(); // method to generate segmental features // bool8 generateSegFeature(const Vector& features, const VectorLong& start_frames, const VectorLong& end_frames, Vector& seg_features); // method to normalize features // bool8 normalizeFeatures(const Vector& features, Vector& norm_features); }; // end of include file // #endif