quick start:g++ [flags ...] file ... -l /isip/tools/lib/$ISIP_BINARY/lib_sp.a SignalDetector(); SignalDetector(ALGORITHM algorithm,IMPLEMENTATION implementation); boolean apply(Vector<VectorFloat> signal); boolean getStartPoints(float& t1); boolean getEndTime(VectorFloat& start_time, VectorFloat& end_time, long channel_index_a);
description:SignalDetector sig_det; Sof sof; String file; int num_quit_try = 100; file.assign(L"./diagnose_param.sof"); sof.open(file, File::READ_ONLY); sig_det.read(sof, (long)0); sig_det.init(); Vector<VectorFloat> data(1); AudioFile src; src.setFileFormat(AudioFile::RAW); src.setFileType(AudioFile::BINARY); src.setSampleFrequency(8000); src.setSampleNumBytes(2); src.setNumChannels(1); src.open(L"./diagnose.raw"); long N = sig_det.getNSamplesToRead(); long i = 0; long len = src.getNumSamples(); len = (long)len / N; float t1, t2; long num_utt, nf = 0; while (i <= len + num_quit_try) { src.getData(data(0), 0, i * N, N); i++; sig_det.apply(data); nf++; } VectorFloat start_points; VectorFloat stop_points; sig_det.getEndTime(start_points, stop_points, 0); src.close(); sof.close(); An example with comments can be found at the end of the page.
static const String CLASS_NAME = L"SignalDetector";
enum ALGORITHM { ENERGY = 0, ENERGY_ZC , DEF_ALGORITHM = ENERGY } ;
enum IMPLEMENTATION { DB_POWER = 0, DEF_IMPLEMENTATION = DB_POWER };
enum PRECISION { COARSE = 0, MEDIUM, FINE, DEF_PRECISION = COARSE };
static NameMap ALGO_MAP;
static NameMap IMPL_MAP;
static const String DEF_PARAM;
static const String PARAM_ALGORITHM;
static const String PARAM_IMPLEMENTATION;
static const String PARAM_NUM_CHANNELS;
static const String PARAM_SAMPLE_FREQUENCY;
static const String PARAM_SAMPLE_NUM_BYTES;
static const String PARAM_CHANNEL_TO_BE_PROCESSED;
static const String PARAM_FRAME_DURATION;
static const String PARAM_WINDOW_DURATION;
static const String PARAM_PREEMPHASIS;
static const String PARAM_PAD_TIME;
static const String PARAM_SIGNAL_NOMINAL_LEVEL;
static const String PARAM_SIGNAL_ADAPTATION_DELTA;
static const String PARAM_SIGNAL_ADAPTATION_CONSTANT;
static const String PARAM_NOISE_NOMINAL_LEVEL;
static const String PARAM_NOISE_ADAPTATION_DELTA;
static const String PARAM_NOISE_ADAPTATION_CONSTANT;
static const String PARAM_NOISE_FLOOR;
static const String PARAM_UTTERANCE_MINIMUM_DURATION;
static const String PARAM_UTTERANCE_MAXIMUM_DURATION;
static const String PARAM_UTTERANCE_MINIMUM_SEPARATION;
static const String PARAM_UTTERANCE_DELTA;
static const String PARAM_ZC_UTTERANCE_THRESH;
static const String PARAM_ZC_NEGATIVE_THRESH;
static const String PARAM_ZC_POSITIVE_THRESH;
static const String PARAM_DBGL;
static const long DEF_NUM_CHANNELS = 1;
static const float DEF_SAMPLE_FREQUENCY = 16000.0;
static const long DEF_SAMPLE_NUM_BYTES = 2;
static const long DEF_CHANNEL_TO_BE_PROCESSED = 1;
static const float DEF_FRAME_DURATION = 0.020;
static const float DEF_WINDOW_DURATION = 0.030;
static const float DEF_PREEMPHASIS = 0.95;
static const float DEF_PAD_TIME = 0.10;
static const float DEF_SIGNAL_NOMINAL_LEVEL = -35.0;
static const float DEF_SIGNAL_ADAPTATION_DELTA = 20.0;
static const float DEF_SIGNAL_ADAPTATION_CONSTANT = 0.50;
static const float DEF_NOMINAL_NOISE_LEVEL = -60.0;
static const float DEF_NOISE_ADAPTATION_DELTA = 20.0
static const float DEF_NOISE_ADAPTATION_CONSTANT = 0.95;
static const float DEF_NOISE_FLOOR = -65.0;
static const float DEF_UTTERANCE_MINIMUM_DURATION = 0.050
static const float DEF_UTTERANCE_MINIMUM_SEPARATION = 0.030
static const float DEF_UTTERANCE_MAXIMUM_DURATION = 10.0
static const float DEF_UTTERANCE_DELTA = 6.0
static const long SM_NSTATES = 4;
static const long SM_HISTORY = 3;
static const long SM_STATE_NOIS = 0;
static const long SM_STATE_NTOS = 1;
static const long SM_STATE_STON = 2;
static const float SM_WGT_NOIS = 0.0;
static const long SM_STATE_SIGN = 3;
static const float SM_WGT_SIGN = 1.0;
static const float SM_WGT_TRAN = 0.5;
static const double DEF_SAMPLE_SCALE_FACTOR = 3.051850e-05;
static const long DEF_ZC_UTTERANCE_THRESH = 0;
static const long DEF_ZC_POSITIVE_THRESH = 0;
static const long DEF_ZC_NEGATIVE_THRESH = 0;
static const long ERR = 80500;
static const long ERR_AUDIO_BUFFER_OVERFLOW = 80510;
static const long ERR_ENERGY_BUFFER_OVERFLOW = 80520;
static const long ERR_ZC_BUFFER_OVERFLOW = 80530;
static const long ERR_OUT_OF_BOUNDS = 80540;protected data:
ALGORITHM algorithm_d;
IMPLEMENTATION implementation_d;
static MemoryManager mgr_d;
DebugLevel debug_level_d;
Vector<CircularBuffer<Float> > cbuf_d;
Long num_channels_d;
Float sample_frequency_d;
Long sample_num_bytes_d;
Long channel_to_be_processed_d;
Float frame_duration_d;
Float window_duration_d;
Float preemphasis_d;
Float pad_time_d;
Float signal_nominal_level_d;
Float signal_adaptation_delta_d;
Float signal_adaptation_constant_d;
Float noise_nominal_level_d;
Float noise_adaptation_delta_d;
Float noise_adaptation_constant_d;
Float noise_floor_d;
Float utterance_minimum_duration_d;
Float utterance_minimum_separation_d;
Float utterance_maximum_duration_d
Float utt_delta_d;
Float noise_threshold_d;
Float signal_threshold_d;
long frame_sample_duration_d;
long window_sample_duration_d;
long utterance_minimum_sample_duration_d;
long utterance_minimum_sample_separation_d;
Vector<Long> states_d;
Vector<Long> durations_d;
Vector<Long> num_frame_d;
Vector<CircularBuffer<Long> > egy_d;
long egy_len_d;
long egy_cur_d;
long egy_rlse_d;
Vector<VectorFloat> egy_data_window_d;
Vector<Long> sig_patn_d;
long sig_patn_len_d;
Vector<Long> nse_patn_d;
long nse_patn_len_d;
Vector<Boolean> utt_in_progress_d;
Vector<VectorFloat> start_points_d;
Vector<VectorFloat> stop_points_d;
Vector<Long> utt_coarse_beg_d;
Vector<Long> utt_coarse_end_d;
Vector<Long> utt_egy_beg_d;
Vector<Long> utt_egy_end_d;
Vector<Long> utt_zc_beg_d;
Vector<Long> utt_zc_end_d;
Vector<Long> num_endpoints_d;
Vector<CircularBuffer<Long> > zc_d;
Vector<VectorFloat> zc_data_window_d;
Long zc_utterance_thresh_d;
Long zc_negative_thresh_d;
Long zc_positive_thresh_d;
Float win_dur_scale_factor_d;
static const String& name();
static boolean diagnose(Integral::DEBUG level);
boolean debug(const unichar* message) const;
~SignalDetector();
SignalDetector();
SignalDetector(ALGORITHM algorithm, IMPLEMENTATION implementation);
SignalDetector(const SignalDetector& arg);
boolean assign(const SignalDetector& arg);
long sofSize() const;
boolean read(Sof& sof_a, long tag, const String& name = CLASS_NAME);
boolean write(Sof& sof_a, long tag, const String& name = CLASS_NAME) const;
boolean readData(Sof& sof_a, const String& pname = String::getEmptyString(), long size = SofParser::FULL_OBJECT, boolean param = true, boolean nested = false);
boolean writeData(Sof& sof_a,const String& param = String::getEmptyString()) const;
boolean eq(const SignalDetector& arg) const;
static void* operator new(size_t size);
static void* operator new[](size_t size);
static void operator delete(void* ptr);
static void operator delete[](void* ptr);
static boolean setGrowSize(long grow_size);
boolean clear(Integral::CMODE ctype = Integral::DEF_CMODE);
boolean setParser(SofParser* parser);
boolean setAlgorithm(ALGORITHM algorithm);
boolean setImplementation(IMPLEMENTATION implementation);
boolean setdefaultParameters();
boolean setNumChannels(long num_channels);
boolean setSampleNumBytes(long sample_num_bytes);
boolean setSampleFrequency(float sample_rate);
boolean setChannelToBeProcessed(long chan);
boolean setFrameDuration(float fram_dur);
boolean setWindowDuration(float win_dur);
boolean setPreemphasis(float preemphasis);
boolean setSigNominalLevel(float nom_sig_lev);
boolean setSigAdaptDelta(float sig_delta);
boolean setSigAdaptConst(float signal_adapt);
boolean setNoiseNominalLevel(float nom_noise_lev);
boolean setNoiseAdaptDelta(float noise_delta);
boolean setNoiseAdaptConst(float noise_adapt);
boolean setNoiseFloor(float noise_floor);
boolean setUttDelta(float utt_delta);
boolean setMinUttDur(float min_utt_dur);
boolean setMinUttSep(float min_utt_sep);
boolean setMaxUttDur(float max_utt_dur);
boolean setZcUttThreshold(long zc_utt_thresh);
boolean setZcNegThreshold(long zc_neg_thresh);
boolean setZcPosThreshold(long zc_pos_thresh);
boolean setPadTime(long pad_time);
ALGORITHM getAlgorithm() const;
IMPLEMENTATION getImplementation() const;
boolean get(ALGORITHM& algorithm, IMPLEMANTATION& implementation) ;
long getNumChannels();
long getSampleNumBytes();
long getSampleFrequency();
float getFrameDuration();
long getChannelToBeProcessed();
float getFrameDuration();
float getWindowDuration();
float getPreemphasis();
float getSigNominalLevel();
float getSigAdaptDelta();
float getSigAdaptConst();
float getNoiseNominalLevel();
float getNoiseAdaptDelta();
float getNoiseAdaptConst();
float getNoiseFloor();
float getUttDelta();
float getMinUttDur();
float getMinUttSep();
float getMaxUttDur();
Long getZcUttThreshold();
Long getZcNegThreshold();
Long getZcPosThreshold();
Long getPadTime();
Vector<Boolean> getUttStatus();
boolean getEndTime(VectorFloat& start_time, VectorFloat& end_time, long channel_index_a);
boolean init();
boolean flush();
boolean reset();
long apply(Vector<VectorFloat> audio_data);
long compute(long channel_index_a);
boolean computeEndPoints(PRECISION precision, long channel_index_a);
boolean computeEndTime(float& start_time, float& stop_time, long channel_index_a);
boolean computeStartTime(float& start_time, long channel_index_a);
boolean updateThresholds(double egy);
boolean stateMachineReset(long channel_index_a);
Long stateMachineAdvance(float egy, long channel_index_a);
long cbScanForPattern(Vector<Long> pattern, long len, long start_frame, long channel_index_a);
boolean cbSmoothSignal(long channel_index_a);
boolean cnSmoothNoise(long channel_index_a);
boolean cbSmoothTransitions(long channel_index_a);
cbUttInProgress(long channel_index_a);
boolean cbUttNotInProgress(long channel_index_a);
boolean cbRemoveShortSignalBursts(long channel_index_a);
boolean cbRemoveShortNoiseBursts(long channel_index_a);
boolean cbRemoveLongNoiseBursts(long channel_index_a);
boolean cbPurge(long channel_index_a);
boolean cbRelease(long frame_index, long channel_index_a);
float computeEnergy(VectorFloat signal, long channel_index_a);
long computeZeroCrossingRate(VectorFloat signal, long channel_index_a);
SignalDetector sig_det; Sof sof; String file; // the number of times to try after the end of file // int num_quit_try = 100; file.assign(L"./diagnose_param.sof"); // try opening the file // sof.open(file, File::READ_ONLY); // read the sof file into the signal detector object // sig_det.read(sof, (long)0); // initialise the signal detector object // sig_det.init(); Vector<VectorFloat> data(1); AudioFile src; // set the file format // src.setFileFormat(AudioFile::RAW); // set the file type // src.setFileType(AudioFile::BINARY); // set the sample frequency, number of bytes per sample and number // of channels // src.setSampleFrequency(8000); src.setSampleNumBytes(2); src.setNumChannels(1); // open file, default is read only // src.open(L"./diagnose.raw"); // defined the sample number for one process // long N = sig_det.getNSamplesToRead(); // define local variable // long i = 0; // get number of samples in this file // long len = src.getNumSamples(); // get how many frames we need to process // len = (long)len / N; // define the start and the stop times // float t1, t2; // define the number od utterances anf the number of frames // long num_utt, nf = 0; // check to see if an utterance has been detected. quit reading the data // only if a valid utterance has been detected. // while (i <= len + num_quit_try) { src.getData(data(0), 0 ,i * N, N); i++; // process data // sig_det.apply(data); } // get the endpoints // VectorFloat start_points; VectorFloat stop_points; sig_det.getEndTime(start_points, stop_points, 0); // close the audiofile object and the sof file object // src.close(); sof.close();