#!/usr/bin/env python

# file: $(NEDC_NFC)/class/python/nedc_ml_tools/nedc_cov_tools.py
#                                                                              
# revision history:                                                            
#
# 20230623 (AB): code refactored to new comment style 
# 20230309 (SS): added an option for scaling
# 20230226 (JP): final review
# 20230222 (SS): updated implementations
# 20230218 (JP): refectored the code based on what we have learned recently
# 20230113 (JP): changed the interface to handle shrinkage properly
# 20230110 (JP): converted to functions and simplified things
# 20230106 (JP): refactored
# 20230106 (SS): initial version
#
# This file contains an implementation of several popular techniques to
# compute a covariance matrix. There are three basic options: ctype,
# center and scale:
#   - ctype: full (def) or diagonal
#   - center: none, tied or untied (def)
#   - scale:  none, biased (def), unbiased or empirical
#
# A good reference on this can be found here:
#
#  Hertzog, C. (1986). On Pooling Covariance Matrices for Multivariate
#  Analysis. Educational and Psychological Measurement, 46(2), 349–352.
#  https://doi.org/10.1177/001316448604600208
#
# The techniques used here integrate approaches found in popular tools
# like JMP (https://www.jmp.com/en_us/home.html), which is based on SAS,
# an scikit-learn (https://scikit-learn.org/stable/). These options
# should support all known ways to compute the covariance based
# on what we have seen in Python.
#
# The shrinkage algorithm implemented here comes from scikit-learn:
#
#  https://scikit-learn.org/stable/modules/generated/sklearn.covariance.ShrunkCovariance.html
#
# and is described here:
#
#  W. Rayens and T. Greene, Covariance pooling and stabilization for
#  classification, Computational Statistics & Data Analysis, Volume 11,
#  Issue 1, 1991, Pages 17-42, ISSN 0167-9473,
#  https://doi.org/10.1016/0167-9473(91)90050-C.
#
#------------------------------------------------------------------------------

# import reqired system modules
#
import numpy as np
import os
import sys

# import various machine learning tools
#
from sklearn import covariance as sklc

# import required NEDC modules
#
import nedc_debug_tools as ndt
import nedc_file_tools as nft

#------------------------------------------------------------------------------
#                                                                              
# global variables are listed here
#                                                                              
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define variables to handle option names and values. For each of these,
# we list the parameter name, the allowed values, and the default values.
#
PRM_CTYPE = "ctype"
CTYPE_FULL = "full"
CTYPE_DIAG = "diagonal"
DEF_CTYPE = CTYPE_FULL

PRM_CENTER = "center"
CENTER_NONE = "none"
CENTER_TIED = "tied"
CENTER_UNTIED = "untied"
DEF_CENTER = CENTER_UNTIED

PRM_SCALE = "scale"
SCALE_NONE = "none"
SCALE_BIASED = "biased"
SCALE_UNBIASED = "unbiased"
SCALE_EMP = "empirical"
DEF_SCALE = SCALE_BIASED

# declare a global debug object so we can use it in functions
#
dbgl = ndt.Dbgl()

#------------------------------------------------------------------------------
#
# functions listed here
#
#------------------------------------------------------------------------------

def compute(data, ctype = DEF_CTYPE, center = DEF_CENTER, scale = DEF_SCALE):
    """
    function: compute
    
    arguments:
     data: a list of matrices
     ctype: covariance type
     center: the type of centering
     scale: method of scaling
 
    return: 
     a covariance matrix or None (if it fails)

    description: 
     If the scaling is SCALE_EMP, we call a separate function. Otherwise,
     we handle it in this function.
    """
       
    # display informational message
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s::%s: ctype = %s, center = %s, scale = %s" %
              (__FILE__, ndt.__LINE__, Covar.__CLASS_NAME__, ndt.__NAME__,
               ctype, center, scale))

    # check for empirical scaling
    #
    if scale == SCALE_EMP:
        return compute_emp(data, ctype, center, scale)

    # make a single matrix from the entire data set:
    #  collapse the individual classes into one data set
    #
    whole_data = np.vstack((data))
    
    # branch on the type of centering:
    #
    # case 1: no centering or debiasing (NONE)
    #
    if center == CENTER_NONE:
        return calculate(whole_data, ctype, scale)

    # case 2: center using a global mean (TIED)
    #
    elif center == CENTER_TIED:
        data_center = debias(whole_data)
        return calculate(data_center, ctype, scale)

    # case 3: center using a mean per class (UNTIED)
    #
    elif center == CENTER_UNTIED:

        # loop over all the data and center using a class-specific mean
        #
        db = []
        for d in data:
            db.append(debias(d))

        # calculate the covariance
        #
        return calculate(np.vstack((db)), ctype, scale)
        
    # exit ungracefully: unknown mode
    #
    else:
        return None
#
# end of function

#------------------------------------------------------------------------------
#
# supporting functions go here
#
#------------------------------------------------------------------------------

def compute_emp(data, ctype, center, scale):
    """
    function: compute_emp
    
    arguments:
     data: a list of matrices
     ctype: covariance type
     center: the type of centering
     scale: method of scaling
 
    return: 
     a covariance matrix or None (if it fails)

    description: 
     This function computes an empirical covariance estimate.
    """
       
    # display informational message
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s::%s: ctype = %s, center = %s, scale = %s" %
              (__FILE__, ndt.__LINE__, Covar.__CLASS_NAME__, ndt.__NAME__,
               ctype, center, scale))

    # case 1: no centering
    #
    if center == CENTER_NONE:
        return calculate(data, ctype, scale)

    # case 2: tied
    #
    elif center == CENTER_TIED:

        # subtract a class-specific mean
        #
        mu = np.mean (np.vstack((data)), axis = 0)
        for i in range(len(data)):
            data[i] = data[i] - mu

        # compute the covariance
        #
        return calculate(data, ctype, center, scale)

    # case 3: untied
    #
    elif center == CENTER_UNTIED:

        # loop over the data in each class and subtract the data from
        # the mean of the class
        #
        db = []
        for d in data:
            db.append(debias(d))
            
    # calculate the covariance
    #
    return calculate(db, ctype, scale = SCALE_EMP)

    # exit ungracefully: we should never reach this line
    #
    return None
#
# end of function

def debias(data):
    """
    function: debias
    
    arguments:
     data: a numpy matrix of data
 
    return: 
     the debiased data

    description: 
     This function computes the mean and debiases the data by subtracting
     the mean.
    """
               
    # display informational message
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s::%s: debiasing data" %
              (__FILE__, ndt.__LINE__, Covar.__CLASS_NAME__, ndt.__NAME__))

    # calculate the mean of data
    #
    mean = np.mean(data, axis = 0)
            
    # subtract the mean
    #
    data_debiased = data - mean
    
    # exit gracefully
    #
    return data_debiased
#
# end of function
    
def calculate(data, ctype , scale):
    """
    function: calculate
    
    arguments:
     data: a matrix
     ctype: covariance type (full, diagonal)
     scale : type of scaling (e.g., biased)
 
    return: 
     a covariance matrix or None (if it fails)

    description: 
     This function calcuates a raw covariance from the data. it has two
     modes - full and diagonal. note that subtraction of the mean (debiasing)
     is done within scikit-learn.
    """
       
    # display informational message
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s::%s: ctype = %s, scale = %s" %
              (__FILE__, ndt.__LINE__, Covar.__CLASS_NAME__, ndt.__NAME__,
               ctype, scale))

    # compute the covariance
    #
    # if scale is biased, ctype is full 
    #
    if scale == SCALE_BIASED and ctype == CTYPE_FULL:
        cov = np.cov(data.T, bias = True)
            
    # if scale is biased and ctype is diag
    #
    elif scale == SCALE_BIASED and ctype == CTYPE_DIAG:
        cov = np.diag (np.var(data, ddof = 0, axis = 0))
            
    # if scale unbiased and ctype full
    #
    elif scale == SCALE_UNBIASED and ctype == CTYPE_FULL:
        cov = np.cov(data.T, bias = False)
        
    # if scale is unbiased and ctype is diag
    #
    elif scale == SCALE_UNBIASED and ctype == CTYPE_DIAG:
        cov = np.diag (np.var(data, ddof = 1, axis = 0))
        
    # if scale is none and ctype is full
    #
    elif scale == SCALE_NONE and ctype == CTYPE_FULL:
        data = debias(data)
        cov = data.T @ data

    # if scale is none and ctype is diag
    #
    elif scale == SCALE_NONE and ctype == CTYPE_DIAG:
        data = debias(data)
        cov = np.diag(np.diag(data.T @ data))

    # if scale is empirical
    #
    elif  scale == SCALE_EMP:
        
        # initialize a variable to add covariance of each class
        #
        cov = 0

        # calculate emperical weights
        #
        weights = calculate_empirical_weights(data)
        
        for i in range(len(data)):

            # ctype full
            #
            if ctype == CTYPE_FULL:

                # calculate covariance
                #
                cov += weights[i] * np.cov(data[i].T, bias = False)

            # ctype is diagnal
            #
            elif ctype == CTYPE_DIAG:

                # calculate covariance
                #
                cov += weights[i] * np.diag(np.var(data[i],ddof = 1, axis = 0))

    # exit gracefully
    #
    return cov
#
# end of function
   
def calculate_empirical_weights(data):
    """
    function: calculate_empirical_weights
    
    arguments:
     data: a matrix
 
    return: 
     a numpy vector containing the empirical priors

    description: 
     This function calcuates the priors from the data using an empirical estimate:
    
     weight = (Nt - 1) / (N - num_classes)
    
     This formulation is popular in tools like JMP.
    """
       
    # calculate the number of classes
    # 
    num_classes = len(data)

    # initialize an output vector
    #
    weights = np.zeros(num_classes)
        
    # calculate the total amount of data in each class, and set the
    # weights to the numerator term (Nt - 1).
    # 
    num_data = int(0)
    for i in range(num_classes):
        npts = data[i].shape[0]
        num_data += npts
        weights[i] = (npts - int(1));

    # normalize
    #
    weights *= (float(1.0) / float(num_data - num_classes))

    # exit gracefully
    #
    return weights
#
# end of function

#
# end of file