// file: $isip/class/stat/GaussianModel/gaus_08.cc
// version: $Id: gaus_08.cc 8756 2002-10-18 21:47:07Z alphonso $
//

// isip include files
//
#include "GaussianModel.h"

// method: getLogLikelihood
//
// arguments:
//  const VectorFloat& input: (input) test vector
//
// return: float32 value giving the log-likelihood of the data given the model
//
// this method gives the distance between the test vector and gaussian
// model. we want to comput log-likelihood, which is:
//             1 
//  -------------------------  * exp (-1/2 * [(x-u)' * inverse(Cov) * (x-u)])
//   sqrt( (2*pi)^N * |Cov| )
//
//  'N' is the dimension of the probability space
//  'x' is the input vector
//  'u' is the mean of the distribution
//  'Cov' is the covariance of the distribution
//
float32 GaussianModel::getLogLikelihood(const VectorFloat& input_a) {

  // check for initialization
  //
  if ((!is_valid_d) && (!init())) {
    Error::handle(name(), L"getLogLikelihood", Error::ARG,
		  __FILE__, __LINE__);
  }	    
  
  // subtract the mean from feature vector to get (x-u)
  //
  VectorDouble dbl_deviation;
  deviation_d.sub(input_a, mean_d);

  // if mode is NONE, then do normal computation
  //
  if (mode_d == NONE) {

    // compute the inverse of covariance matrix
    //
    MatrixFloat cov_inverse;
    cov_inverse.inverse(covariance_d);
    
    // compute the quadratic portion of the gaussian exponential:
    //  (x-u)' * inv(Cov) * (x-u)
    //
    float32 value = 0;
    cov_inverse.quadratic(value, deviation_d);
    
    // compute the scale factor:
    //             1 
    //  ------------------------- 
    //   sqrt( (2*pi)^N * |Cov| )
    //
    float64 det = Integral::log(covariance_d.determinant());
    float64 tmp = Integral::log(Integral::TWO_PI);
  
    scale_d = (float64)0.5 * ((float64)input_a.length() * tmp + det);
  
    // compute log-likelihood
    //
    return (-scale_d - (float64)0.5 * value);
  }

  else if (mode_d == PRECOMPUTE) {
    
    // calculate the gaussian formula
    //
    float32 value = 0;

    covariance_d.quadratic(value, deviation_d);
    return (-scale_d - (float64)0.5 * value);
  }

  // we should never get here
  //
  return false;
}

// method: accumulate
//
// arguments:
//   VectorFloat data: (input) feature vector
//
// this method accumulates the model parameters using the input features
//
bool8 GaussianModel::accumulate(VectorFloat& data_a) {

  // declare local variables
  //
  int32 num_feat = data_a.length();
  
  // set the dimensions of the mean vector and covariance matrix
  //
  if (mean_d.length() != num_feat) {
    mean_d.setLength(num_feat);
    mean_d.clear(Integral::RETAIN);    
  }
  if ((covariance_d.getNumRows() != num_feat) ||
      (covariance_d.getNumColumns() != num_feat)) {
    covariance_d.setDimensions(num_feat, num_feat);
    covariance_d.clear(Integral::RETAIN);    
  }

  // add the feature values to the mean
  //
  mean_d.add(data_a);
  
  // store product of features into covariance metrix
  //
  if (covariance_d.isDiagonal()) {
    MatrixFloat covar(num_feat, num_feat, Integral::DIAGONAL);
    for (int32 l=0; l < num_feat; l++) {
      covar.setValue(l, l, (float32)(data_a(l) * data_a(l)));
    }
    covariance_d.add(covar);    
  }
  else {
    MatrixFloat covar(num_feat, num_feat, Integral::FULL);
    covar.outerProduct(data_a);
    covariance_d.add(covar);
  }

  // exit gracefully
  //
  return true;  
}

// method: initialize
//
// arguments:
//   VectorFloat& param: (input) initialization parameters
//
// this method initializes the model parameters using the accumulated features
//
bool8 GaussianModel::initialize(VectorFloat& param_a) {

  // declare local variables
  //
  MatrixFloat covar;
  int32 num_feat = 0;
  int32 num_vect = 0;

  // check for valid number of parameters
  //
  if (param_a.length() != 2) {
    return Error::handle(name(), L"initialize", Error::ARG, __FILE__, __LINE__);
  }
  
  // get the number of features and the nember of feature vectors
  //
  num_feat = param_a(0);
  num_vect = param_a(1);
  
  // compute the mean vector for the feature vectors
  //
  mean_d.div(num_vect);

  // compute the covariance matrix for the feature vectors
  //
  covariance_d.div(num_vect - 1);

  if (covariance_d.isDiagonal()) {
    for (int32 l=0; l < num_feat; l++) {
      float32 value = covariance_d.getValue(l, l) - (mean_d(l) * mean_d(l));
      covariance_d.setValue(l, l, value);
    }
  }
  else {
    for (int32 l=0; l < num_feat; l++) {
      for (int32 k=0; k < num_feat; k++) {
	float32 value = covariance_d.getValue(l, k) - (mean_d(l) * mean_d(k));
	covariance_d.setValue(l, k, value);
      }
    }
  }

  // reset the mode flag
  //
  mode_d = NONE;
  
  // exit gracefully
  //
  return true;  
}

// method: update
//
// arguments:
//   VectorFloat& varfloor: (input) variance floor
//   int32 min_count: (input) minimum model count
//
// return: a bool8 value indicating status
//
// [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice
// Hall P T R, New Jersey, 1993, pp. 350-352, ISBN 0-13-015157-2
//
// General Equation:
//                  
// gamma(t)[j, k] = A[t, j] * B[t, j, k]
//                  
//
//            alpha(t)[j] * beta (t)[j]
// A[t, j] =  -------------------------
//             alpha(t)[j] * beta (t)[j] t = [1, .., T]  k = [1, .., M]
//
//               c[j, k] * gauss[O(t), mu[j, k] * cov[j, k]
// B[t, j, k] =  ------------------------------------------
//               c[j, m] * gauss[O(t), mu[j, m] * cov[j, m]  m = [1, .., M]
//
// Weights Update Equation:
// 
//           gamma(t)[j, k]  t = [1, .., T]
// c[j, k] = --------------
//           gamma(t)[j, k]  t = [1, .., T]  k = [1, .., M]
//
// Mean Update Equation:
//
//            gamma(t)[j, k] * O(t)  t = [1, .., T]
// mu[j, k] = ---------------------                                         (1)
//                gamma(t)[j, k]     t = [1, .., T]
//
// Covariance Update Equation:
//
//             gamma[j, k] * (O(t) - mu[j, k])^2  t = [1, .., T]
// cov[j, k] = ---------------------------------                            (2)
//                      gamma(t)[j, k]            t = [1, .., T]
//
// this method updates the statistical model parameters using the
// accumulated statistics during training
//
bool8 GaussianModel::update(VectorFloat& varfloor_a, int32 min_count_a) {

  // declare local variables
  //
  Float value;
  VectorFloat diagonal;
  MatrixFloat mean_square;

  // update only if the access count is greater than or equal to the
  // minimum model count specified by the user input
  //
  if (access_accum_d >= min_count_a) {
  
    // update the model mean from (1) above
    //
    mean_d.assign(mean_accum_d);
    mean_d.div(occ_accum_d);
    
    // update the model covariance from (2) above
    //
    orig_covar_d.assign(covar_accum_d);
    orig_covar_d.div(occ_accum_d);
    mean_square.outerProduct(mean_d);
    orig_covar_d.sub(mean_square);  
    
    // get the diagonal of the covariance
    //
    if (covariance_d.isDiagonal()) {
      orig_covar_d.getDiagonal(diagonal);
      
      // floor the variance if it falls below a critical value
      //
      if (diagonal.length() == varfloor_a.length()) {
	for (int i=0; i < diagonal.length(); i++) {
	  if (diagonal(i) < varfloor_a(i)) {
	    diagonal(i) = varfloor_a(i);
	  }
	}
      }
      orig_covar_d.makeDiagonal(diagonal);
    }
    covariance_d.assign(orig_covar_d);
    
    // update the scale factor and invert the covariance if needed
    //
    is_valid_d = false;
    this->init();
  }
  
  // exit gracefully
  //
  return true;  
}

// method: accumulate
//
// arguments:
//   VectorDouble& params: (input) training parameters
//   VectorFloat& data: (input) observations
//   bool8 precomp: (input) flag that indicate if data is precomputed
//
// return: a bool8 value indicating status
//
// [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice
// Hall P T R, New Jersey, 1993, pp. 350-352, ISBN 0-13-015157-2
//
// General Equation:
//                  
// gamma(t)[j, k] = A[t, j] * B[t, j, k]                                   (0)
//                  
//
//            alpha(t)[j] * beta (t)[j]
// A[t, j] =  -------------------------
//             alpha(t)[j] * beta (t)[j] t = [1, .., T]  k = [1, .., M]    (1)
//
//               c[j, k] * gauss[O(t), mu[j, k] * cov[j, k]
// B[t, j, k] =  ------------------------------------------
//               c[j, m] * gauss[O(t), mu[j, m] * cov[j, m]  m = [1, .., M]
//
// Weights Update Equation:
// 
//           gamma(t)[j, k]  t = [1, .., T]                                (2)
// c[j, k] = --------------
//           gamma(t)[j, k]  t = [1, .., T]  k = [1, .., M]
//
// Mean Update Equation:
//
//            gamma(t)[j, k] * O(t)  t = [1, .., T]                        (3)
// mu[j, k] = ---------------------
//                gamma(t)[j, k]     t = [1, .., T]
//
// Covariance Update Equation:
//
//             gamma[j, k] * (O(t) - mu[j, k])^2  t = [1, .., T]           (4)
// cov[j, k] = ---------------------------------
//                      gamma(t)[j, k]            t = [1, .., T]
//
// this method accumulates the statistics for the model which are
// needed to update the model parameters during training
//
bool8 GaussianModel::accumulate(VectorDouble& param_a,
				  VectorFloat& data_a,
				  bool8 precomp_a) {


  // declare local variables
  //
  float64 gamma = 0;
  float64 alpha = 0;
  float64 beta = 0;
  float64 utter_norm = 0;  
  float64 mixture_likelihood = 0;
  float64 mixture_weight = 0;
  float64 model_likelihood = 0;
  float64 min_mpd = 0;
  float64 min_occupancy = 0;

  static VectorFloat observation;
  static MatrixFloat covariance;
  
  // assumptions:
  //
  // param_a(0) - forward probability (alpha)
  // param_a(1) - backward probability (beta)
  // param_a(2) - utterance normalization, i.e., (1) above
  // param_a(3) - minimum model probability deviance
  // param_a(4) - floor on the occupancy probability    
  // param_a(5) - mixture likelihood
  // param_a(6) - mixture weight
  // param_a(7) - model likelihood
  
  // retrieve the data
  //
  alpha = param_a(0);
  beta = param_a(1);
  utter_norm = param_a(2);
  min_mpd = param_a(3);
  min_occupancy = param_a(4);
  mixture_likelihood = param_a(5);
  mixture_weight = param_a(6);
  model_likelihood = param_a(7);

  // compute the state occupancy probability
  //
  gamma = Integral::exp(alpha + beta + mixture_weight + model_likelihood
			- utter_norm - mixture_likelihood);       // from (0)

  // determine if the model violates the floor on the occupancy probability
  //
  if (gamma > min_occupancy) {

    // set the dimensions of the accumulators if needed
    //
    if (mean_accum_d.length() != data_a.length()) {
      observation.setLength(data_a.length());
      mean_accum_d.setLength(data_a.length());
    }
    
    if ((covar_accum_d.getNumRows() != covariance_d.getNumRows()) &&
	(covar_accum_d.getNumColumns() != covariance_d.getNumColumns())) {
      covariance.changeType(Integral::DIAGONAL);
      covariance.setDimensions(covariance_d.getNumRows(),
			       covariance_d.getNumColumns());
      covar_accum_d.setDimensions(covariance_d.getNumRows(),
				  covariance_d.getNumColumns());
    }
    
    // accumulate the state occupancy probability
    //
    occ_accum_d += gamma;
    
    // increment the model access count
    //
    access_accum_d++;
    
    // accumulate the mean update numerator product from (3) above
    //
    observation.mult(data_a, gamma);
    mean_accum_d.add(observation);
            
    // accumulate the covariance update numerator product from (4) above
    // if the covariance is diagonal then we can avoid most of the
    // computations
    //
    if (covariance_d.isDiagonal()) {
      observation.mult(data_a, data_a);
      covariance.setDiagonal(observation);
    }
    else {
      covariance.outerProduct(data_a);
    }

    covariance.mult(gamma);    
    covar_accum_d.add(covariance);
  }
  
  // exit gracefully
  //
  return true;  
}