// file: $isip_ifc/class/sp/SignalDetector/SignalDetector.h
// version: $Id: SignalDetector.h 10214 2005-08-15 05:01:23Z stanley $
//

// make sure definitions are only made once
//
#ifndef ISIP_SIGNAL_DETECTOR
#define ISIP_SIGNAL_DETECTOR

// isip include files
//
#ifndef ISIP_VECTOR
#include <Vector.h>
#endif

#ifndef ISIP_CIRCULAR_BUFFER
#include <CircularBuffer.h>
#endif

#ifndef ISIP_VECTOR_FLOAT
#include <VectorFloat.h>
#endif

#ifndef ISIP_BOOLEAN
#include <Boolean.h>
#endif

#ifndef ISIP_DEBUG_LEVEL
#include <DebugLevel.h>
#endif

// SignalDetector: This class is used to detect the start and stop
// times of a segment of data in a stream of data. The audio data,
// which is represented as a multichannel sequence of floating point
// data is handled as a vector of vector floats. The audio data is
// passed to this class in variable-sized amounts of data. Methods
// are provided to determine the state of the detection, including when
// a valid signal has been detected.
//
class SignalDetector {
  
  //---------------------------------------------------------------------------
  //
  // public constants
  //
  //---------------------------------------------------------------------------
public:
  
  // define the class name
  //
  static const String CLASS_NAME;
  
  //----------------------------------------
  //
  // other important constants
  //
  //----------------------------------------
  
  // define the algorithm choices
  //
  enum ALGORITHM { ENERGY = 0, ENERGY_ZC , DEF_ALGORITHM = ENERGY };
  
  // define the implementation choices
  //
  enum IMPLEMENTATION { DB_POWER = 0, DEF_IMPLEMENTATION = DB_POWER };
  
  // define the precision of the endpoints:
  //  the endpoints are successively refined
  //
  enum PRECISION { COARSE = 0, MEDIUM, FINE, DEF_PRECISION = COARSE };
  
  // define the static objects
  //
  static const NameMap ALGO_MAP;
  static const NameMap IMPL_MAP;
  
  //----------------------------------------
  //
  // i/o related constants
  //
  //----------------------------------------
  
  // constants relating to implementation and algorithm.
  //
  static const String DEF_PARAM;
  static const String PARAM_ALGORITHM;
  static const String PARAM_IMPLEMENTATION;
  
  // constants relating to signal processing
  //
  static const String PARAM_NUM_CHANNELS;
  static const String PARAM_SAMPLE_FREQUENCY;
  static const String PARAM_SAMPLE_NUM_BYTES;
  static const String PARAM_CHANNEL_TO_BE_PROCESSED;  
  static const String PARAM_FRAME_DURATION;
  static const String PARAM_WINDOW_DURATION;
  
  // constants relating to preprocessing of the audio data
  //
  static const String PARAM_PREEMPHASIS;
  
  // constants related to the output signal
  //
  static const String PARAM_PAD_TIME;
  
  // this section contains data common to all algorithms:
  //  the basic assumption in this class is that the signal
  //  and noise have different durational properties
  //
  
  // constants relating to the energy levels
  //
  static const String PARAM_SIGNAL_NOMINAL_LEVEL;
  static const String PARAM_SIGNAL_ADAPTATION_DELTA;
  static const String PARAM_SIGNAL_ADAPTATION_CONSTANT;
  
  // constants relating to noise levels
  //
  static const String PARAM_NOISE_NOMINAL_LEVEL;
  static const String PARAM_NOISE_ADAPTATION_DELTA;
  static const String PARAM_NOISE_ADAPTATION_CONSTANT;
  static const String PARAM_NOISE_FLOOR;
  
  // constants related to utterance duration and separation
  //
  static const String PARAM_UTTERANCE_MINIMUM_DURATION;
  static const String PARAM_UTTERANCE_MAXIMUM_DURATION;
  static const String PARAM_UTTERANCE_MINIMUM_SEPARATION;
  static const String PARAM_UTTERANCE_DELTA;
  
  // this section contains data for a specific algorithm
  //
  
  // algorithm: energy_zc
  // implementation: db_power
  // description: combine a zero-crossing measure with energy
  //
  static const String PARAM_ZC_UTTERANCE_THRESH;
  static const String PARAM_ZC_NEGATIVE_THRESH;
  static const String PARAM_ZC_POSITIVE_THRESH;
  
  // constants for debug level
  //
  static const String PARAM_DBGL;
  
  //----------------------------------------
  //
  // default values and arguments
  //
  //---------------------------------------- 
  
  // constants relating to signal processing
  //
  static const int32 DEF_NUM_CHANNELS = 1;
  static const float32 DEF_SAMPLE_FREQUENCY = 16000.0;
  static const int32 DEF_SAMPLE_NUM_BYTES = 2;
  static const int32 DEF_CHANNEL_TO_BE_PROCESSED = 1;
  static const float32 DEF_FRAME_DURATION = 0.020;
  static const float32 DEF_WINDOW_DURATION  = 0.030;
  static const int32 DEF_CBUF_LENGTH  = 3000000;
  
  // constants relating to preprocessing of the audio data
  //
  static const float32 DEF_PREEMPHASIS = 0.95;
  
  // constants related to the output signal
  //
  static const float32 DEF_PAD_TIME = 0.10;
  
  // this section contains data common to all algorithms:
  // the basic assumption in this class is that the signal
  // and noise have different durational properties
  //
  
  // constants relating to the energy levels
  //
  static const float32 DEF_SIGNAL_NOMINAL_LEVEL = -30.0;
  static const float32 DEF_SIGNAL_ADAPTATION_DELTA = 5.0;
  static const float32 DEF_SIGNAL_ADAPTATION_CONSTANT = 0.50;
  
  // constants relating to noise levels
  //
  static const float32 DEF_NOISE_NOMINAL_LEVEL = -40.0;
  static const float32 DEF_NOISE_ADAPTATION_DELTA = 5.0;
  static const float32 DEF_NOISE_ADAPTATION_CONSTANT = 0.75;
  static const float32 DEF_NOISE_FLOOR = -80.0;
  
  // constants related to utterance duration and separation
  //
  static const float32 DEF_UTTERANCE_MINIMUM_DURATION = 0.2;
  static const float32 DEF_UTTERANCE_MINIMUM_SEPARATION = 0.1;
  static const float32 DEF_UTTERANCE_MAXIMUM_DURATION = 100.0;
  static const float32 DEF_UTTERANCE_DELTA = 10.0;
  
  // state machine related parameters:
  //  the basic approach in this class is to use a state
  //  machine to keep track of the previous state of the
  //  process. we use a state machine with four states
  //  and we use the three previous states to make decisions
  //
  static const int32 SM_NSTATES = 4;
  static const int32 SM_HISTORY = 3;
  static const int32 SM_STATE_NOIS = 0;
  static const int32 SM_STATE_NTOS = 1;
  static const int32 SM_STATE_STON = 2;
  static const int32 SM_STATE_SIGN = 3;
  
  static const float32 SM_WGT_NOIS = 0.0;
  static const float32 SM_WGT_SIGN = 1.0;
  static const float32 SM_WGT_TRAN = 0.5;
  
  // sample value scaling
  // 1/DEF_SAMPLE_SCALE_FACTOR = 32767.  So we
  // normalize the sample values
  //
  static const float64 DEF_SAMPLE_SCALE_FACTOR = 3.051850e-05;
  
  // this section contains data for a specific algorithm
  //
  
  // algorithm: energy_zc
  // implementation: db_power
  // description: combine a zero-crossing measure with energy
  //
  static const int32 DEF_ZC_UTTERANCE_THRESH = 0;
  static const int32 DEF_ZC_POSITIVE_THRESH = 0;
  static const int32 DEF_ZC_NEGATIVE_THRESH = 0;
  
  //----------------------------------------
  //
  // error codes
  //
  //----------------------------------------  
  
  static const int32 ERR = 80500;
  static const int32 ERR_AUDIO_BUFFER_OVERFLOW = 80510;
  static const int32 ERR_ENERGY_BUFFER_OVERFLOW = 80520;
  static const int32 ERR_ZC_BUFFER_OVERFLOW = 80530;
  static const int32 ERR_OUT_OF_BOUNDS = 80540;
  
  //---------------------------------------------------------------------------
  //
  // protected data
  //
  //---------------------------------------------------------------------------
protected:
  
  // algorithm name
  //
  ALGORITHM algorithm_d;
  
  // implementation name
  //
  IMPLEMENTATION implementation_d;
  
  // static memory manager
  //
  static MemoryManager mgr_d;
  
  // debugging parameters
  //
  DebugLevel debug_level_d;
  
  
  // this flag is used to determine if the object's init method
  // needs to be called
  //
  bool8 is_valid_d;
  
  
  // circular buffer which is used to hold audio data
  //
  Vector<CircularBuffer<Float> > cbuf_d;
  
  //----------------------------------------------------
  //
  // define parameters that are written to the parmeter file
  //
  //----------------------------------------------------
  
  // variables relating to signal processing
  //
  Long num_channels_d;
  Float sample_frequency_d;
  Long sample_num_bytes_d;
  Long channel_to_be_processed_d;
  Float frame_duration_d;
  Float window_duration_d;
  
  
  // variables relating to preprocessing of the audio data
  //
  Float preemphasis_d;
  
  // variable related to the output signal
  //
  Float pad_time_d;
  
  // signal level-related energy parameters
  //
  Float signal_nominal_level_d;
  Float signal_adaptation_delta_d;
  Float signal_adaptation_constant_d;
  
  // noise level-related energy parameters
  //
  Float noise_nominal_level_d;
  Float noise_adaptation_delta_d;
  Float noise_adaptation_constant_d;
  Float noise_floor_d;
  
  // utterance-related parameters
  //
  Float utterance_minimum_duration_d;
  Float utterance_maximum_duration_d;
  Float utterance_minimum_separation_d;
  Float utterance_delta_d; 
  
  // adaptive thresholds
  //
  Float noise_threshold_d;
  Float signal_threshold_d;
  
  // these parameters are calculated from the values calculated from
  // the parameter file
  //
  
  // variables relating to signal processing
  //
  int32 frame_sample_duration_d;
  int32 window_sample_duration_d;
  
  // utterance-related parameters
  //
  int32 utterance_minimum_sample_duration_d;
  int32 utterance_minimum_sample_separation_d;
  
  
  //----------------------------------------------------
  //
  // define parameters that are common to all algorithms
  // and implementations
  //
  //----------------------------------------------------
  
  // state machine
  //
  Vector<VectorLong> states_d;
  Vector<VectorLong> durations_d;
  
  // useful counters
  //
  Vector<Long> num_frame_d;
  
  // define variables to handle the energy values
  //
  Vector<CircularBuffer<Long> > egy_d;
  
  // length of the energy buffer
  //
  int32 egy_len_d;
  Vector<Long> egy_cur_d;
  Vector<Long> egy_rlse_d;
  Vector<VectorFloat> egy_data_window_d;
  
  // energy patterns used to detect signal or noise
  //
  Vector<Long> sig_patn_d;
  int32 sig_patn_len_d;
  Vector<Long> nse_patn_d;
  int32 nse_patn_len_d;
  
  
  // information that tracks the state of the detection process
  //
  Vector<Boolean> utt_in_progress_d;
  
  // keeps track of the number of endpoints
  //
  Vector<Long> num_endpoints_d;
  
  Vector<VectorFloat> start_points_d;
  Vector<VectorFloat> stop_points_d;
  Vector<Long> utt_coarse_beg_d;
  Vector<Long> utt_coarse_end_d;
  Vector<Long> utt_egy_beg_d;
  Vector<Long> utt_egy_end_d;
  Vector<Long> utt_zc_beg_d;
  Vector<Long> utt_zc_end_d;
  
  // algorithm: energy_zc
  // implementation: db_power
  // description: combine a zero-crossing measure with energy
  //
  
  // define variables to handle the zero crossing values
  //
  Vector<CircularBuffer<Long> > zc_d;  
  Vector<VectorFloat> zc_data_window_d;
  
  Long zc_utterance_thresh_d;                            
  Long zc_negative_thresh_d;
  Long zc_positive_thresh_d;
  
  // useful scale factors
  // win_dur_scale_factor_d = 1/(number of samples in a window of data)
  // this value is used in the enrgy calculation
  //
  Float win_dur_scale_factor_d;
  
  //---------------------------------------------------------------------------
  //
  // required public methods
  //
  //---------------------------------------------------------------------------
public:
  
  // method: name
  //
  static const String& name() {
    return CLASS_NAME;
  }
  
  // other static methods
  //
  static bool8 diagnose(Integral::DEBUG debug_level);
  
  // debug methods:
  //  setDebug is inherited from the base class
  //
  bool8 debug(const unichar* msg) const;
  
  // method: destructor
  //
  ~SignalDetector();
  
  // method: default constructor
  //
  SignalDetector(ALGORITHM algorithm,
		 IMPLEMENTATION implementation) {
    
    algorithm_d = algorithm;
    implementation_d = implementation;
    is_valid_d = false;
  }
  
  // default constructor
  //
  SignalDetector();
  
  // method: copy constructor
  //
  SignalDetector(const SignalDetector& arg) {
    assign(arg);
  }
  
  // assign methods
  //
  bool8 assign(const SignalDetector& arg);
  
  // method: operator=
  //
  SignalDetector& operator= (const SignalDetector& arg) {
    assign(arg);
    return *this;
  }
  
  // i/o methods
  //
  int32 sofSize() const;
  
  bool8 read(Sof& sof, int32 tag, const String& name = CLASS_NAME);
  bool8 write(Sof& sof, int32 tag, const String& name = CLASS_NAME) const;
  
  bool8 readData(Sof& sof, const String& pname = DEF_PARAM,
		   int32 size = SofParser::FULL_OBJECT,
		   bool8 param = true,
                   bool8 nested = false);
  bool8 writeData(Sof& sof, const String& pname = DEF_PARAM) const;
  
  // equality methods
  //
  bool8 eq(const SignalDetector& arg) const;
  
  // method: new
  //
  static void* operator new(size_t size) {
    return mgr_d.get();
  }
  
  // method: new[]
  //
  static void* operator new[](size_t size) {
    return mgr_d.getBlock(size);
  }
  
  // method: delete
  //
  static void operator delete(void* ptr) {
    mgr_d.release(ptr);
  }
  
  // method: delete[]
  //
  static void operator delete[](void* ptr) {
    mgr_d.releaseBlock(ptr);
  }
  
  // method: setGrowSize
  //
  static bool8 setGrowSize(int32 grow_size) {
    return mgr_d.setGrow(grow_size);
  }
  
  // other memory management methods
  //
  bool8 clear(Integral::CMODE ctype = Integral::DEF_CMODE);
  
  // method to set the parser
  //
  bool8 setParser(SofParser* parser);
  
  
  //---------------------------------------------------------------------------
  //
  // class-specific public methods:
  //  set methods
  //
  //---------------------------------------------------------------------------
  
  // method: setAlgorithm
  //
  bool8 setAlgorithm(ALGORITHM algorithm) {
    algorithm_d = algorithm;
    is_valid_d = false;
    return true;
  }
  
  // method: setImplementation
  //
  bool8 setImplementation(IMPLEMENTATION implementation) {
    implementation_d = implementation;
    is_valid_d = false;
    return true;  
  }
  
  // method: set
  //
  bool8 set(ALGORITHM algorithm = DEF_ALGORITHM,
	      IMPLEMENTATION implementation = DEF_IMPLEMENTATION) {
    algorithm_d = algorithm;
    implementation_d = implementation;
    is_valid_d = false;
    return true;
  }
  
  // other set methods
  //
  bool8 setdefaultParameters();
  bool8 setNumChannels(int32 num_channels);
  bool8 setSampleNumBytes(int32 sample_num_bytes);
  bool8 setSampleFrequency(float32 sample_rate);
  bool8 setChannelToBeProcessed(int32 chan);
  bool8 setFrameDuration(float32 fram_dur);
  bool8 setWindowDuration(float32 win_dur);
  bool8 setPreemphasis(float32 premphasis);
  bool8 setSigNominalLevel(float32 nom_sig_lev);
  bool8 setSigAdaptDelta(float32 sig_delta);
  bool8 setSigAdaptConst(float32 signal_adapt);
  bool8 setNoiseNominalLevel(float32 nom_noise_lev);
  bool8 setNoiseAdaptDelta(float32 noise_delta);
  bool8 setNoiseAdaptConst(float32 noise_adapt);
  bool8 setNoiseFloor(float32 noise_floor);
  bool8 setUttDelta(float32 utt_delta);
  bool8 setMinUttDur(float32 min_utt_dur);
  bool8 setMinUttSep(float32 min_utt_sep);
  bool8 setMaxUttDur(float32 max_utt_dur);
  bool8 setZcUttThreshold(int32 zc_utt_thresh);
  bool8 setZcNegThreshold(int32 zc_neg_thresh);
  bool8 setZcPosThreshold(int32 zc_pos_thresh);
  bool8 setPadTime(float32 pad_time);
  
  // TODO
  // bool8 setUttStatus(bool8 utt_status);
  
  
  //---------------------------------------------------------------------------
  //
  // class-specific public methods:
  //  get methods
  //
  //---------------------------------------------------------------------------
  
  // method: getAlgorithm
  //
  ALGORITHM getAlgorithm() const {
    return algorithm_d;
  }  
  
  // method: getImplementation
  //
  IMPLEMENTATION getImplementation() const {
    return implementation_d;
  }
  
  // method: get
  //
  bool8 get(ALGORITHM& algorithm,
	      IMPLEMENTATION& implementation) {
    algorithm = algorithm_d;
    implementation = implementation_d;
    return true;
  }
  
  
  // other get methods
  //
  int32 getNumChannels();
  int32 getSampleNumBytes();
  int32 getSampleFrequency();
  int32 getChannelToBeProcessed();
  float32 getFrameDuration();
  float32 getWindowDuration();
  float32 getPreemphasis();
  float32 getSigNominalLevel();
  float32 getSigAdaptDelta();
  float32 getSigAdaptConst();
  float32 getNoiseNominalLevel();
  float32 getNoiseAdaptDelta();
  float32 getNoiseAdaptConst();
  float32 getNoiseFloor();
  float32 getUttDelta();
  float32 getMinUttDur();
  float32 getMinUttSep();
  float32 getMaxUttDur();
  int32 getZcUttThreshold();
  int32 getZcNegThreshold();
  int32 getZcPosThreshold();
  int32 getPadTime();
  Vector<Boolean> getUttStatus();
  bool8 getEndTime(VectorFloat& start_time, VectorFloat& end_time, int32 channel_index_a);
  
  
  //---------------------------------------------------------------------------
  //
  // class-specific public methods:
  // 
  //
  //---------------------------------------------------------------------------
  
  // this method initializes the detector and clears all buffers
  //
  bool8 init();
  
  // this method clears all circular buffers only if an utterance is not
  // in progress. it is used to prevent the detector from accumulating
  // large amounts of memory while it is processing noise.
  //
  bool8 flush();
  
  // this method resets the detector to the start state in which it
  // can begin processing a new utterance
  //
  bool8 reset(int32 channel_index_a);
  
  // these methods processing an incoming buffer of data. note
  // that data can be passed to this algorithm in variable sized
  // chunks. the apply method accepts multichannel data; the
  // compute method processes a single channel of data.
  //
  int32 apply(Vector<VectorFloat> audio_data);
  int32 compute(int32 channel_index_a);
  
  // these methods allow a user to get the current endpoints
  // while the detection algorithm is in progress. these methods
  // can be used to get the endpoints once a valid utterance has
  // been found.
  //
  bool8 computeEndPoints(PRECISION precision, int32 channel_index_a);
  bool8 computeEndTime(float32& start_time, float32& stop_time, int32 channel_index_a);
  bool8 computeStartTime(float32& start_time, int32 channel_index_a);
  
  
  //---------------------------------------------------------------------------
  //
  // private methods
  //
  //---------------------------------------------------------------------------
private:
  
  // methods common to all algorithms and implementations
  //
  
  // update all adaptive thresholds
  //
  bool8 updateThresholds(float64 egy);
  
  // state machine related methods
  //
  bool8 stateMachineReset(int32 channel_index_a);
  int32 stateMachineAdvance(float32 egy, int32 channel_index_a);
  
  // circular buffer related methods
  //
  int32 cbScanForPattern(Vector<Long> pattern, int32 len, int32 start_frame, int32 channel_index_a);
  bool8 cbSmoothSignal(int32 channel_index_a);
  bool8 cbSmoothNoise(int32 channel_index_a);
  bool8 cbSmoothTransitions(int32 channel_index_a);
  bool8 cbUttInProgress(int32 channel_index_a);
  bool8 cbUttNotInProgress(int32 channel_index_a);
  bool8 cbRemoveShortSignalBursts(int32 channel_index_a);
  bool8 cbRemoveShortNoiseBursts(int32 channel_index_a);
  bool8 cbRemoveLongNoiseBursts(int32 channel_index_a);
  bool8 cbPurge(int32 channel_index_a);
  bool8 cbRelease(int32 frame_index, int32 channel_index_a);
  bool8 cbZeroCrossingRelease(int32 frame_index);  
  
  // description: standard energy computation
  //
  float32 computeEnergy(VectorFloat signal, int32 channel_index_a);
  
  // algorithm: energy_zc
  // implementation: db_power
  // description: combine a zero-crossing measure with energy
  //
  int32 computeZeroCrossingRate(VectorFloat signal, int32 channel_index_a);
  
};

// end of include file
// 
#endif