// file: $isip/class/stats/MixtureModel/mm_08.cc
// version: $Id: mm_08.cc 8756 2002-10-18 21:47:07Z alphonso $
//

// isip include files
//
#include "MixtureModel.h"

// method: getLikelihood
//
// arguments:
//  const VectorFloat& input: (input) test vector
//
// return: a float32 value giving the likelihood
//
// this method gives the likelihood of the input vector given the parameters
// of the mixture model
//
float32 MixtureModel::getLikelihood(const VectorFloat& input_a) {

  // check for initialization
  //
  if ((!is_valid_d) && (!init())) {
    Error::handle(name(), L"getLikelihood", Error::ARG,
		  __FILE__, __LINE__);
  }	    

  // declare local variables
  //
  float32 score = 0.0;
  int32 i = 0;

  // verify that the model list is not empty
  //
  if (!models_d.gotoFirst()) {
    return Error::handle(name(), L"getLikelihood", ERR, __FILE__, __LINE__);
  }

  // get the cumulative mixture score. if in NONE mode, the it is simply
  // a series of multiply-adds. if in PRE_COMPUTE mode then we have to
  // change the weights back to linear first.
  //
  if (mode_d == NONE) {
    do {
      score += weights_d(i++) * models_d.getCurr()->getLikelihood(input_a);
    } while (models_d.gotoNext());
  }
  else {
    do {
      score += weights_d(i++).exp() *
	models_d.getCurr()->getLikelihood(input_a);
    } while (models_d.gotoNext());
  }

  // exit gracefully
  //
  return score;
}

// method: getLogLikelihood
//
// arguments:
//  const VectorFloat& input: (input) test vector
//
// return: a float32 value giving the log-likelihood
//
// this method gives the log-likelihood of the input vector given the
// parameters of the mixture model
//
float32 MixtureModel::getLogLikelihood(const VectorFloat& input_a) {

  // check for initialization
  //
  if ((!is_valid_d) && (!init())) {
    Error::handle(name(), L"getLogLikelihood", Error::ARG,
		  __FILE__, __LINE__);
  }	    

  // define the variables to hold the score
  //
  float32 output_score = MIN_SCORE;
  float32 tmp_score = 0;
  int32 i = 0;

  // mode: NONE
  //
  if (mode_d == NONE) {

    // make sure the model list is not empty and move to the front of the
    // model list
    //
    if (!models_d.gotoFirst()) {
      return Error::handle(name(), L"getLogLikelihood", ERR,
			   __FILE__, __LINE__);
    }

    // convert the weights to log-weights
    //
    VectorFloat log_weights;
    log_weights.log(weights_d);
    
    // accumulate the log likelihood
    //
    do {
      tmp_score = log_weights(i++) +
	models_d.getCurr()->getLogLikelihood(input_a);

      // carry out the log addition taking care not to underflow
      //
      output_score = Integral::logAddLog(output_score, tmp_score);
    } while (models_d.gotoNext());
  }

  // mode: PRECOMPUTE
  //
  else {

    // make sure the list is not empty and move to the front of the model list
    //
    if (!models_d.gotoFirst()) {
      return
	Error::handle(name(), L"getLogLikelihood", ERR, __FILE__, __LINE__);
    }

    // accumulate the log likelihood
    //
    do {
      tmp_score = weights_d(i++) +
	models_d.getCurr()->getLogLikelihood(input_a);
      
      // carry out the log addition taking care not to underflow
      //
      output_score = Integral::logAddLog(output_score, tmp_score);
    } while (models_d.gotoNext());
  }
  
  // exit gracefully
  //
  return output_score;
}

// method: update
//
// arguments:
//   VectorFloat& varfloor: (input) variance floor
//   int32 min_count: (input) minimum model count 
//
// return: a bool8 value indicating status
//
// [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice
// Hall P T R, New Jersey, 1993, pp. 350-352, ISBN 0-13-015157-2
//
// General Equation:
//                  
// gamma(t)[j, k] = A[t, j] * B[t, j, k]
//                  
//
//            alpha(t)[j] * beta (t)[j]
// A[t, j] =  -------------------------
//            alpha(t)[j] * beta (t)[j] t = [1, .., T]  k = [1, .., M]
//
//               c[j, k] * gauss[O(t), mu[j, k] * cov[j, k]
// B[t, j, k] =  ------------------------------------------
//               c[j, m] * gauss[O(t), mu[j, m] * cov[j, m]  m = [1, .., M]
//
// Weights Update Equation:
// 
//           gamma(t)[j, k]  t = [1, .., T]                                 (1)
// c[j, k] = --------------
//           gamma(t)[j, k]  t = [1, .., T]  k = [1, .., M]                 (2)
//
// Mean Update Equation:
//
//            gamma(t)[j, k] * O(t)  t = [1, .., T]
// mu[j, k] = ---------------------
//                gamma(t)[j, k]     t = [1, .., T]
//
// Covariance Update Equation:
//
//             gamma[j, k] * (O(t) - mu[j, k])^2  t = [1, .., T]
// cov[j, k] = ---------------------------------
//                      gamma(t)[j, k]            t = [1, .., T]
//
// this method updates the statistical model parameters using the
// accumulated statistics during training
//
bool8 MixtureModel::update(VectorFloat& varfloor_a, int32 min_count_a) {

  // declare local variables
  //
  int index = 0;
  float32 occupancy = 0;
  float32 mixture_occupancy = 0;

  // compute the mixture occupancy by adding up the individual occupancies
  // of each model in the mixture, i.e., from (2) above. the individual
  // occupancies are stored in the accumulators of each model
  //
  mixture_occupancy = Integral::DB_LOG_MIN_VALUE;
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    occupancy =  log(models_d.getCurr()->getOccupancy());
    mixture_occupancy = Integral::logAddLog(mixture_occupancy,
					    occupancy);         // from (2)
  }

  // update the mixture weights
  //
  index = 0;  
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    occupancy =  log(models_d.getCurr()->getOccupancy());
    if (mode_d == NONE) {
      weights_d(index++) = exp(occupancy - mixture_occupancy);  // from (1)&(2)
    }
    else {
      weights_d(index++) = occupancy - mixture_occupancy;       // from (1)&(2)
    }
  }
  
  // loop over each model in the mixture and update their parameters
  //
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    models_d.getCurr()->update(varfloor_a, min_count_a);
  }
  
  // exit gracefully
  //
  return true;  
}

// method: accumulate
//
// arguments:
//   VectorFloat data: (input) feature vector
//
// this method accumulate the model parameters using the input features
//
bool8 MixtureModel::accumulate(VectorFloat& data_a) {

  // loop over each model in the mixture and accumulate their parameters
  //
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    models_d.getCurr()->accumulate(data_a);
  }
  
  // exit gracefully
  //
  return true;  
}

// method: initialize
//
// arguments: none
//
// this method initializes the model parameters using the accumulated features
//
bool8 MixtureModel::initialize(VectorFloat& param) {

  // initialize the mixture weights
  //
  initializeWeights();
  
  // loop over each model in the mixture and initialize their parameters
  //
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    models_d.getCurr()->initialize(param);
  }
  
  // exit gracefully
  //
  return true;  
}

// method: accumulate
//
// arguments:
//   VectorDouble params: (input) training parameters
//   VectorFloat data: (input) observations
//   bool8 precomp: (input) flag that indicate if data is precomputed
//
// return: a bool8 value indicating status
//
// [1] L. Rabiner, B. H. Juang, "Fundamentals of Speech Recognition", Prentice
// Hall P T R, New Jersey, 1993, pp. 350-352, ISBN 0-13-015157-2
//
//            alpha(t)[j] * beta (t)[j]
// A[t, j] =  -------------------------
//             alpha(t)[j] * beta (t)[j] t = [1, .., T]  k = [1, .., M]
//
//               c[j, k] * gauss[O(t), mu[j, k] * cov[j, k]
// B[t, j, k] =  ------------------------------------------
//               c[j, m] * gauss[O(t), mu[j, m] * cov[j, m]  m = [1, .., M] (1)
//
// Weights Update Equation:
// 
//           gamma(t)[j, k]  t = [1, .., T]
// c[j, k] = --------------
//           gamma(t)[j, k]  t = [1, .., T]  k = [1, .., M]                 (2)
//
// Mean Update Equation:
//
//            gamma(t)[j, k] * O(t)  t = [1, .., T]
// mu[j, k] = ---------------------
//                gamma(t)[j, k]     t = [1, .., T]
//
// Covariance Update Equation:
//
//             gamma[j, k] * (O(t) - mu[j, k])^2  t = [1, .., T]
// cov[j, k] = ---------------------------------
//                      gamma(t)[j, k]            t = [1, .., T]
//
// this method accumulates the statistics for the model which are
// needed to update the model parameters during training
//
bool8 MixtureModel::accumulate(VectorDouble& param_a,
				 VectorFloat& data_a,
				 bool8 precomp_a) {

  // declare local variables
  //
  int index = 0;
  float64 alpha = 0;
  float64 beta = 0;
  float64 utter_norm = 0;  
  float64 mixture_weight = 0;
  float64 mixture_likelihood = 0;
  float64 model_likelihood = 0;  
  float64 min_mpd = 0;
  float64 min_occupancy = 0;
  float64 tmp_mprob = 0;
  
  // old assumptions:
  //
  // param_a(0) - forward probability (alpha)
  // param_a(1) - backward probability (beta)
  // param_a(2) - utterance normalization, i.e., (1) above
  // param_a(3) - minimum model probability deviance
  // param_a(4) - floor on the occupancy probability
  // param_a(5) - likelihood score

  // retrieve the data
  //
  alpha = param_a(0);
  beta = param_a(1);
  utter_norm = param_a(2);  
  min_mpd = param_a(3);
  min_occupancy = param_a(4);
  mixture_likelihood = param_a(5);  

  // compute the model probability deviance
  //
  tmp_mprob = alpha + beta - utter_norm;
  
  // determine if the mixture violates the minimum probability deviance
  //
  if (tmp_mprob > -min_mpd) {
    
    // increase the length of the parameter vector
    //
    param_a.setLength(param_a.length() + 2);
    
    // loop over each model in the mixture and have them accumulate parameters
    //
    index = 0;
    for (bool8 more = models_d.gotoFirst(); more;
	 more = models_d.gotoNext()) {

      // compute the model likelihood
      //
      model_likelihood = param_a(5);  
      if (models_d.length() > 1) {
	model_likelihood = models_d.getCurr()->getLogLikelihood(data_a);
      }
      
      // new assumptions:
      //
      // param_a(0) - forward probability (alpha)
      // param_a(1) - backward probability (beta)
      // param_a(2) - utterance normalization, i.e., (1) above
      // param_a(3) - minimum model probability deviance
      // param_a(4) - floor on the occupancy probability    
      // param_a(5) - mixture likelihood
      // param_a(6) - mixture weight
      // param_a(7) - model likelihood

      if (mode_d == NONE) {

	mixture_weight = Integral::log(weights_d(index++));
	param_a(6) = mixture_weight;
	param_a(7) = model_likelihood;
	
	// have the model accumulate their own statitics
	//
	models_d.getCurr()->accumulate(param_a, data_a, precomp_a);
      }
      
      // mode: PRECOMPUTE
      //
      else {

	mixture_weight = weights_d(index++);
	param_a(6) = mixture_weight;
	param_a(7) = model_likelihood;
	
	// have the model accumulate their own statitics
	//
	models_d.getCurr()->accumulate(param_a, data_a, precomp_a);
      }
    }
  }
  
  // exit gracefully
  //
  return true;  
}

// method: getMean
//
// arguments:
//  VectorFloat& mean: (output) mean of the underlying distribution
//
// return: a bool8 value indicating status
//
// this method computes the mean of the underlying distribution
//
bool8 MixtureModel::getMean(VectorFloat& mean_a) {
  
  // local variables
  //
  VectorFloat mean;
  int32 i=0;
  VectorFloat temp1;
  int32 len1;
  
  // set the lenght of the mean
  //
  models_d.getFirst()->getMean(temp1);
  len1 = temp1.length();
  mean.setLength(len1);
  
  // loop over each model in the mixture and compute the weighted mean
  //
  for (bool8 more = models_d.gotoFirst(); more;
       more = models_d.gotoNext()) {
    
    // local variables
    //
    VectorFloat temp2;
    int32 len2;
    
    // get the mean from the mixture-model and compute the mean
    //
    models_d.getCurr()->getMean(temp2);
    len2 = temp2.length();

    // error if dimensionality if the means of all the mixture is not
    // same
    //
    if (len1 != len2) {
      return Error::handle(name(), L"getMean", Error::ARG,
			   __FILE__, __LINE__);
    }
    
    temp2.mult(weights_d(i));
    i++;
    mean.add(temp2);
  }
  
  mean_a = mean;
  
  // exit gracefully
  //
  return true;
}

// method: getCovariance
//
// arguments:
//  MatrixFloat& cov: (output) covariance of the underlying distribution
//
// return: a bool8 value indicating status
//
// presently, this method gets the covariance for single mixture
// distribution only
//
bool8 MixtureModel::getCovariance(MatrixFloat& cov_a) {
  
  // find the number of mixtures
  //
  int32 num_mix =  models_d.length();

  // get the covariance if it is a single mixture
  //
  if (num_mix == (int32)1) {

    // temporary variable
    //
    MatrixFloat cov((int32)1, (int32)1);
    
    // get the covariance
    //    
    models_d.getFirst()->getCovariance(cov);    
    cov_a = cov;      
  }
  
  // else error if num_mix != 1
  //
  else {
    Error::handle(name(), L"getCovariance", ERR, __FILE__, __LINE__);
  }  
  
  // exit gracefully
  //
  return true;
}