name: SignalDetector

synopsis:

g++ [flags ...] file ... -l /isip/tools/lib/$ISIP_BINARY/lib_sp.a

SignalDetector();
SignalDetector(ALGORITHM algorithm,IMPLEMENTATION implementation);
boolean apply(Vector<VectorFloat> signal);
boolean getStartPoints(float& t1);
boolean getEndTime(VectorFloat& start_time, 
		   VectorFloat& end_time, 
		   long channel_index_a);

quick start:

SignalDetector sig_det;
  Sof sof;
  String file;
  int num_quit_try = 100;
  file.assign(L"./diagnose_param.sof");
  sof.open(file, File::READ_ONLY);
  sig_det.read(sof, (long)0);
  sig_det.init();
  Vector<VectorFloat> data(1);
  AudioFile src;
  src.setFileFormat(AudioFile::RAW);  
  src.setFileType(AudioFile::BINARY);  
  src.setSampleFrequency(8000);
  src.setSampleNumBytes(2);
  src.setNumChannels(1);
  src.open(L"./diagnose.raw");
  long N = sig_det.getNSamplesToRead(); 
  long i = 0;
  long len = src.getNumSamples(); 
  len = (long)len / N;     
  float t1, t2;
  long num_utt, nf = 0;
  while (i <= len + num_quit_try) {
 	
    src.getData(data(0), 0, i * N, N); 
    i++;	
    sig_det.apply(data);
    nf++;
}
  VectorFloat start_points;	
  VectorFloat stop_points; 			
  sig_det.getEndTime(start_points, stop_points, 0);	
  src.close();
  sof.close();

An example with comments can be found at the end of the page.

description:

This class is used to detect the start and stop times of a segment of data in a stream of data. The audio data, which is represented as a multichannel sequence of floating point data is handled as a vector of vector floats. The audio data is passed to this class in variable-sized amounts of data. Methods are provided to determine the state of the detection, including when a valid signal has been detected. dependencies:

public constants:

define the class names:

static const String CLASS_NAME = L"SignalDetector";

define the enumeration data types:

enum ALGORITHM { ENERGY = 0, ENERGY_ZC , DEF_ALGORITHM = ENERGY } ;

enum IMPLEMENTATION { DB_POWER = 0, DEF_IMPLEMENTATION = DB_POWER };

enum PRECISION { COARSE = 0, MEDIUM, FINE, DEF_PRECISION = COARSE };

define the names for each of the enumerated values:
```
static NameMap  ALGO_MAP;
```
```
static NameMap IMPL_MAP;
```

define i/o related constants:

static const String DEF_PARAM;

static const String PARAM_ALGORITHM;

static const String PARAM_IMPLEMENTATION;

static const String PARAM_NUM_CHANNELS;

static const String PARAM_SAMPLE_FREQUENCY;

static const String PARAM_SAMPLE_NUM_BYTES;

static const String PARAM_CHANNEL_TO_BE_PROCESSED;

static const String PARAM_FRAME_DURATION;

static const String PARAM_WINDOW_DURATION;

static const String PARAM_PREEMPHASIS;

static const String PARAM_PAD_TIME;

static const String PARAM_SIGNAL_NOMINAL_LEVEL;

static const String PARAM_SIGNAL_ADAPTATION_DELTA;

static const String PARAM_SIGNAL_ADAPTATION_CONSTANT;

static const String PARAM_NOISE_NOMINAL_LEVEL;

static const String PARAM_NOISE_ADAPTATION_DELTA;

static const String PARAM_NOISE_ADAPTATION_CONSTANT;

static const String PARAM_NOISE_FLOOR;

static const String PARAM_UTTERANCE_MINIMUM_DURATION;

static const String PARAM_UTTERANCE_MAXIMUM_DURATION;

static const String PARAM_UTTERANCE_MINIMUM_SEPARATION;

static const String PARAM_UTTERANCE_DELTA;

static const String PARAM_ZC_UTTERANCE_THRESH;

static const String PARAM_ZC_NEGATIVE_THRESH;

static const String PARAM_ZC_POSITIVE_THRESH;

static const String PARAM_DBGL;

define dfault signal processing parameters:

static const long DEF_NUM_CHANNELS = 1;

static const float DEF_SAMPLE_FREQUENCY = 16000.0;

static const long DEF_SAMPLE_NUM_BYTES = 2;

static const long DEF_CHANNEL_TO_BE_PROCESSED = 1;

static const float DEF_FRAME_DURATION = 0.020;

static const float DEF_WINDOW_DURATION  = 0.030;

static const float DEF_PREEMPHASIS = 0.95;

static const float DEF_PAD_TIME = 0.10;

constants relating to the energy levels:

static const float DEF_SIGNAL_NOMINAL_LEVEL = -35.0;

static const float DEF_SIGNAL_ADAPTATION_DELTA = 20.0;

static const float DEF_SIGNAL_ADAPTATION_CONSTANT = 0.50;

constants related to utterance duration and separation:

static const float DEF_NOMINAL_NOISE_LEVEL = -60.0;

static const float DEF_NOISE_ADAPTATION_DELTA = 20.0

static const float DEF_NOISE_ADAPTATION_CONSTANT = 0.95;

static const float DEF_NOISE_FLOOR = -65.0;

constants related to utterance duration and separation:

static const float DEF_UTTERANCE_MINIMUM_DURATION = 0.050

static const float DEF_UTTERANCE_MINIMUM_SEPARATION = 0.030

static const float DEF_UTTERANCE_MAXIMUM_DURATION = 10.0

static const float DEF_UTTERANCE_DELTA = 6.0

state machine related stuff:

static const long SM_NSTATES = 4;

static const long SM_HISTORY = 3;

static const long SM_STATE_NOIS = 0;

static const long SM_STATE_NTOS = 1;

static const long SM_STATE_STON = 2;

static const float SM_WGT_NOIS = 0.0;

static const long SM_STATE_SIGN = 3;

static const float SM_WGT_SIGN = 1.0;

static const float SM_WGT_TRAN = 0.5;

sample value scaling:

static const double DEF_SAMPLE_SCALE_FACTOR = 3.051850e-05;

zero crossing related parameters:

static const long DEF_ZC_UTTERANCE_THRESH = 0;

static const long DEF_ZC_POSITIVE_THRESH = 0;

static const long DEF_ZC_NEGATIVE_THRESH = 0;

error codes:

static const long ERR = 80500;

static const long ERR_AUDIO_BUFFER_OVERFLOW = 80510;

static const long ERR_ENERGY_BUFFER_OVERFLOW = 80520;

static const long ERR_ZC_BUFFER_OVERFLOW = 80530;

static const long ERR_OUT_OF_BOUNDS = 80540;

protected data:

algorithm name:
```
ALGORITHM algorithm_d;
```
implementation name:
```
IMPLEMENTATION implementation_d;
```
static memory manager:
```
static MemoryManager mgr_d;
```
debugging parameters:
```
DebugLevel debug_level_d;
```
circular buffer which is used to hold audio:
```
Vector<CircularBuffer<Float> > cbuf_d;
```

signal processing parameters:

Long num_channels_d;

Float sample_frequency_d;

Long sample_num_bytes_d;

Long channel_to_be_processed_d;

Float frame_duration_d;

Float window_duration_d;

Float preemphasis_d;

utterance pad time:
```
Float pad_time_d;
```

signal level-related energy parameters:

Float signal_nominal_level_d;

Float signal_adaptation_delta_d;

Float signal_adaptation_constant_d;

noise level-related energy parameters:

Float noise_nominal_level_d;

Float noise_adaptation_delta_d;

Float noise_adaptation_constant_d;

Float noise_floor_d;

utterance related parameters:

Float utterance_minimum_duration_d;

Float utterance_minimum_separation_d;

Float utterance_maximum_duration_d

Float utt_delta_d;

adaptive thresholds:

Float noise_threshold_d;

Float signal_threshold_d;

variables relating to signal processing:

 long frame_sample_duration_d;

long window_sample_duration_d;

utterance-related parameters:

long utterance_minimum_sample_duration_d;

long utterance_minimum_sample_separation_d;

state machine:

Vector<Long> states_d;

Vector<Long> durations_d;

useful counters:
```
Vector<Long> num_frame_d;
```
circular buffer that holds energy states:
```
Vector<CircularBuffer<Long> > egy_d;
```

circular buffer related parameters:

long egy_len_d;

long egy_cur_d;

long egy_rlse_d;

Vector<VectorFloat> egy_data_window_d;

utterance pattern matching parameters:

Vector<Long> sig_patn_d;

long sig_patn_len_d;

Vector<Long> nse_patn_d;

long nse_patn_len_d;

flag that keeps track whether an utterance is in progress:
```
Vector<Boolean> utt_in_progress_d;
```

enpoint related values:

Vector<VectorFloat> start_points_d;

Vector<VectorFloat> stop_points_d;

Vector<Long> utt_coarse_beg_d;

Vector<Long> utt_coarse_end_d;

Vector<Long> utt_egy_beg_d;

Vector<Long> utt_egy_end_d;

Vector<Long> utt_zc_beg_d;

Vector<Long> utt_zc_end_d;

Vector<Long> num_endpoints_d;

zerocrossing related values:

Vector<CircularBuffer<Long> > zc_d;

Vector<VectorFloat> zc_data_window_d;

Long zc_utterance_thresh_d;

Long zc_negative_thresh_d;

Long zc_positive_thresh_d;

useful sacling factor:
```
Float win_dur_scale_factor_d;
```

required public methods:

static methods:

static const String& name();

static boolean diagnose(Integral::DEBUG level);

debug methods:

boolean debug(const unichar* message) const;

destructor/constructor(s):

~SignalDetector();

SignalDetector();

SignalDetector(ALGORITHM algorithm, IMPLEMENTATION implementation);

SignalDetector(const SignalDetector& arg);

assign methods:

boolean assign(const SignalDetector& arg);

i/o methods:

long sofSize() const;

boolean read(Sof& sof_a, long tag, const String& name = CLASS_NAME);

boolean write(Sof& sof_a, long tag, const String& name = CLASS_NAME) const;

boolean readData(Sof& sof_a, const String& pname = String::getEmptyString(), long size = SofParser::FULL_OBJECT, boolean param = true, boolean nested = false);

boolean writeData(Sof& sof_a,const String& param = String::getEmptyString()) const;

equality methods:

boolean eq(const SignalDetector& arg) const;

memory management methods:

static void* operator new(size_t size);

static void* operator new[](size_t size);

static void operator delete(void* ptr);

static void operator delete[](void* ptr);

static boolean setGrowSize(long grow_size);

boolean clear(Integral::CMODE ctype = Integral::DEF_CMODE);

boolean setParser(SofParser* parser);

class-specific public methods:

set methods:

boolean setAlgorithm(ALGORITHM algorithm);

boolean setImplementation(IMPLEMENTATION implementation);

boolean setdefaultParameters();

boolean setNumChannels(long num_channels);

boolean setSampleNumBytes(long sample_num_bytes);

boolean setSampleFrequency(float sample_rate);

boolean setChannelToBeProcessed(long chan);

boolean setFrameDuration(float fram_dur);

boolean setWindowDuration(float win_dur);

boolean setPreemphasis(float preemphasis);

boolean setSigNominalLevel(float nom_sig_lev);

boolean setSigAdaptDelta(float sig_delta);

boolean setSigAdaptConst(float signal_adapt);

boolean setNoiseNominalLevel(float nom_noise_lev);

boolean setNoiseAdaptDelta(float noise_delta);

boolean setNoiseAdaptConst(float noise_adapt);

boolean setNoiseFloor(float noise_floor);

boolean setUttDelta(float utt_delta);

boolean setMinUttDur(float min_utt_dur);

boolean setMinUttSep(float min_utt_sep);

boolean setMaxUttDur(float max_utt_dur);

boolean setZcUttThreshold(long zc_utt_thresh);

boolean setZcNegThreshold(long zc_neg_thresh);

boolean setZcPosThreshold(long zc_pos_thresh);

boolean setPadTime(long pad_time);

get methods:

ALGORITHM getAlgorithm() const;

IMPLEMENTATION getImplementation() const;

boolean get(ALGORITHM& algorithm, IMPLEMANTATION& implementation) ;

long getNumChannels();

long getSampleNumBytes();

long getSampleFrequency();

float getFrameDuration();

long getChannelToBeProcessed();

float getFrameDuration();

float getWindowDuration();

float getPreemphasis();

float getSigNominalLevel();

float getSigAdaptDelta();

float getSigAdaptConst();

float getNoiseNominalLevel();

float getNoiseAdaptDelta();

float getNoiseAdaptConst();

float getNoiseFloor();

float getUttDelta();

float getMinUttDur();

float getMinUttSep();

float getMaxUttDur();

Long getZcUttThreshold();

Long getZcNegThreshold();

Long getZcPosThreshold();

Long getPadTime();

Vector<Boolean> getUttStatus();

boolean getEndTime(VectorFloat& start_time, VectorFloat& end_time, long channel_index_a);

public methods:

initialization methods:

boolean init();

boolean flush();

boolean reset();

computational methods:

long apply(Vector<VectorFloat> audio_data);

long compute(long channel_index_a);

endpoint processing:

boolean computeEndPoints(PRECISION precision, long channel_index_a);

boolean computeEndTime(float& start_time, float& stop_time, long channel_index_a);

boolean computeStartTime(float& start_time, long channel_index_a);

public methods:

update thresholds:
```
boolean updateThresholds(double egy);
```

state machine related methods:

boolean stateMachineReset(long channel_index_a);

Long stateMachineAdvance(float egy, long channel_index_a);

circular buffer related methods:

long cbScanForPattern(Vector<Long> pattern, long len, long start_frame, long channel_index_a);

boolean cbSmoothSignal(long channel_index_a);

boolean cnSmoothNoise(long channel_index_a);

boolean cbSmoothTransitions(long channel_index_a);

cbUttInProgress(long channel_index_a);

boolean cbUttNotInProgress(long channel_index_a);

boolean cbRemoveShortSignalBursts(long channel_index_a);

boolean cbRemoveShortNoiseBursts(long channel_index_a);

boolean cbRemoveLongNoiseBursts(long channel_index_a);

boolean cbPurge(long channel_index_a);

boolean cbRelease(long frame_index, long channel_index_a);

computational methods:

float computeEnergy(VectorFloat signal, long  channel_index_a);

long computeZeroCrossingRate(VectorFloat signal, long channel_index_a);

examples:

This example demonstrates how to use SignalDetector class:

  SignalDetector sig_det;
  Sof sof;
  String file;

  // the number of times to try after the end of file
  //
  int num_quit_try = 100;
  file.assign(L"./diagnose_param.sof");
 
  // try opening the file
  //
  sof.open(file, File::READ_ONLY);

  // read the sof file into the signal detector object
  //
  sig_det.read(sof, (long)0);

  // initialise the signal detector object
  //
  sig_det.init();
  Vector<VectorFloat> data(1);
  AudioFile src;

  // set the file format
  //
  src.setFileFormat(AudioFile::RAW);  

  // set the file type
  //
  src.setFileType(AudioFile::BINARY);  

  // set the sample frequency, number of bytes per sample and number
  // of channels
  //
  src.setSampleFrequency(8000);
  src.setSampleNumBytes(2);
  src.setNumChannels(1);
  
  // open file, default is read only
  //
  src.open(L"./diagnose.raw");

  // defined the sample number for one process
  //
  long N = sig_det.getNSamplesToRead(); 
  
  // define local variable
  //
  long i = 0;  

  // get number of samples in this file
  //
  long len = src.getNumSamples(); 
  
  // get how many frames we need to process
  //
  len = (long)len / N;     
  
  // define the start and the stop times
  //
  float t1, t2;
  
  // define the number od utterances anf the number of frames
  //
  long num_utt, nf = 0;
  
  // check to see if an utterance has been detected. quit reading the data
  // only if a valid utterance has been detected.
  //
  while (i <= len + num_quit_try) {
    
    src.getData(data(0), 0 ,i * N, N); 
    i++;
       
    // process data
    //
    sig_det.apply(data);
        
  }
 
  // get the endpoints
  //
  VectorFloat start_points;	
  VectorFloat stop_points; 			
  sig_det.getEndTime(start_points, stop_points, 0);	
  // close the audiofile object and the sof file object
  //
  src.close();
  sof.close();

notes:

none.