#!/usr/bin/env python
#
# file: imld/alg/imld_alg_kmeans.py
#
# revision history:
#
# 20220210 (MM): initial version
# This script implements the Class Dependent and Independent Principal
# Component Analysis machine learning algorithm for the ISIP Machine
# Learning Demo software.
#
#------------------------------------------------------------------------------
#
# imports are listed here
#
#------------------------------------------------------------------------------

# import modules
#
from sklearn.decomposition import PCA
import numpy as np
from scipy.spatial import distance
from math import cos, sin, atan2, sqrt
import lib.imld_constants_file as icf
import gui.imld_gui_window as igw

#------------------------------------------------------------------------------
#
# global variables are listed here
#
# ------------------------------------------------------------------------------
CI = "CI"
CD = "CD"
NUM_STEPS = 3
PCA_DIM = 2
FORMAT = "{:<15} {:<15}"
PARAMETER = "PARAMETER"
VALUE = "VALUE"
#------------------------------------------------------------------------------
#
# classes are listed here
#
#------------------------------------------------------------------------------

#  class: AlgorithmPCA
#
#  This class contains methods to apply the PCA algorithm on a set of data
#  while displaying the means and covariances as well as elliptical regions.
#
class AlgorithmPCA():
    # method: AlgorithmPCA::constructor
    #
    # arguments:
    #  win_input: GUI input display
    #  win_output: GUI output display
    #  win_log: GUI process log
    #
    # return: none
    #
    def __init__(self, win_input, win_output, win_log, mode):

        # create class data
        #
        AlgorithmPCA.__CLASS_NAME__ = self.__class__.__name__

        # copy the inputs into class data
        #
        self.input_d = win_input
        self.output_d = win_output
        self.log_d = win_log
        self.mode = mode

        # exit gracefully
        #
        return None

    # method: AlgorithmPCA::initialize
    #
    # arguments:
    #  data: data recorded from display that will be used for training
    #
    # return:
    #  True
    #
    # initialize variables for PCA
    #
    def initialize(self, data):

        # initialize variables
        #
        self.data = data
        self.cov_mat = None
        self.mean_mat = None
        self.trans_mat = None
        self.dist = None
        self.pca = PCA(PCA_DIM, random_state=icf.SEED)
        self.pca_classes = []
        self.classes = len(self.data)
        self.print_params()

        # exit gracefully
        #
        return True

    # method: AlgorithmPCA::run_algo
    #
    # arguments:
    #  data: data recorded from display
    #
    # return:
    #  True
    #
    # run algorithm steps
    #
    def run_algo(self, data):

        # initialize algorithm
        #
        self.initialize(data)

        # compute and display stats
        #
        self.compute_stats()
        self.display_means()
        self.display_cov()

        # plot ellipse regions
        #
        self.ellipse_regions(data, self.input_d.class_info)

        # exit gracefully
        #
        return True

    # method: AlgorithmPCA::compute_stats
    #
    # arguments: none
    #
    # return:
    #  True
    #
    # this method calculates the covariance and means matrix for each class the PCA is applied to
    #
    def compute_stats(self):

        # initialize variables
        #
        self.mean_mat = []
        self.cov_mat = []
        self.trans_mat = []


        # compute the covariance for each class
        if self.mode == CI:
            data = np.vstack((self.data))
            self.pca.fit(data)
            tmp_cov = self.pca.get_covariance()

        # iterate through class and apply PCA and find the mean,
        for i in range(len(self.data)):
            self.pca.fit(self.data[i])
            self.pca_classes.append(self.pca.fit(self.data[i]))
            mean_vector = self.pca.mean_
            self.mean_mat.append(mean_vector)
            self.trans_mat.append(self.pca.transform(self.data[i]))

            # if CD find the covariance matrix for each class, or if CI append
            # the same covariance matrix
            #
            if self.mode == CI:
                self.cov_mat.append(tmp_cov)

            else:
                self.cov_mat.append(self.pca.get_covariance())

        # exit gracefully
        #
        return True

    # method: AlgorithmPCA::display_means
    #
    # arguments: none
    #
    # return:
    #  True
    #
    # this method display the means for each class and plots a point at the mean
    #
    def display_means(self):
        text = "\nMeans: \n"
        count = 0
        for line in self.mean_mat:
            text += "Class" + str(count) + ': ' + str(line) + '\n'
            self.input_d.canvas.axes.scatter(line[0], line[1], c='black', s=7)
            count += 1

        self.log_d.append(text)
        self.input_d.canvas.draw_idle()

        # exit gracefully
        #
        return True

    # method: AlgorithmPCA::display_cov
    #
    # arguments: none
    #
    # return:
    #  True
    #
    # this method displays the covariance matrix of each class in the process log
    #
    def display_cov(self):
        text = 'Covariance Matrix: \n'

        for classes in range(len(self.cov_mat)):
            text += 'Class' + str(classes) +': '
            str_cov = np.array2string(self.cov_mat[classes], formatter={'float_kind': lambda x: "%.4f" % x})
            text += '\n' + str_cov+ '\n'

        self.log_d.append(text)

        # exit gracefully
        #
        return True

    def print_params(self):
        param = self.pca.get_params()
        self.log_d.append("\n"+(FORMAT.format
                           (PARAMETER, VALUE)))

        for k, v in param.items():
            k,v = str(k), str(v)

            self.log_d.append(FORMAT.format(k, v))
        return True

    # method: AlgorithmPCA::predict
    #
    # arguments:
    #  ax: the canvas with the original data is plotted on
    #  X: is the data that is being used for the predictions
    #
    # return:
    #  xx: the x coordinates of the contour
    #  yy: the y coordinates of the contour
    #  Z: the height of the contour
    #
    # This method is used to make a prediction using the Mahalanobis distance
    #
    def predict(self, ax, X):

        # Creates the mesh grid
        X = np.concatenate(X, axis=0)
        X = np.reshape(X, (-1, 2))
        res = (ax.canvas.axes.get_xlim()[1] - ax.canvas.axes.get_ylim()[0]) / 100
        x_min, x_max = X[:, 0].min() - .75, X[:, 0].max() + .75
        y_min, y_max = X[:, 1].min() - .75, X[:, 1].max() + .75
        xx, yy = np.meshgrid(np.arange(x_min, x_max, res),
                             np.arange(y_min, y_max, res))

        # if dependent reshape the covariance matrix
        #
        if self.mode == CD:
            self.cov_mat = np.reshape(self.cov_mat, (self.classes, 2, 2))
        Z = np.empty((0, 0))

        # calculate the distance using mean of the class and the covariance of the class
        #
        for pairs in np.c_[xx.ravel(), yy.ravel()]:
            distances = np.empty((0, 0))
            for i in range(self.classes):
                maha = (distance.mahalanobis(self.mean_mat[i], pairs, np.linalg.inv(self.cov_mat[i])))
                distances = np.append(distances, maha)

            # save the distance
            #
            Z = np.append(Z, np.argmin(distances))

        # exit gracefully
        #
        return xx, yy, Z

    # method: AlgorithmPCA::prediction_classifier
    #
    # arguments:
    #  data: the class data being used for predictions
    #
    # return:
    #  distance: minimal distance
    #
    # this method calculates the distance prediction of each class
    def prediction_classifier(self, data):

        # set up prediction list
        #
        prediction = []

        # iterate through each sampe
        #
        for i in range(self.classes):
            for j in range(len(data[i])):
                distances = np.empty((0, 0))
                for k in range(self.classes):

                    # calculate the maha distance and record the index of the
                    # minimum value
                    #
                    maha = distance.mahalanobis(self.mean_mat[k],
                                                data[i][j],
                                                np.linalg.inv(self.cov_mat[k]))
                    distances = np.append(distances, maha)

                # record prediction
                #
                prediction.append(np.argmin(distances))


        # exit gracefully
        #
        return prediction

    # method: AlgorithmPCA::ellipse_regions
    #
    # arguments:
    #  x: is the data passed through from the original class data
    #  classes: is the number of classes in the dataset
    #
    # return:
    #  True
    #
    # this method calculates the elliptical region surrounding the class data
    #
    def ellipse_regions(self, x, classes):

        # initialize the boundaries of the support region
        #
        y = np.arange(len(classes))
        x = np.concatenate(x, axis=0)
        x = np.reshape(x, (-1, 2))
        y = np.reshape(y, (-1, 1))

        # initialize the region for each class
        #
        self.support_region = np.empty((0, 0))
        pca_d = [np.empty((2, 2)) * np.nan for i in range(len(classes))]

        # if class independent find the mean and the covariance matrix
        #
        if self.mode == CI:
            val = np.empty((2, 1))
            if x.size > 0:
                mu = np.empty((0, 0))
                for num in range(len(classes)):
                    index = np.where(y == num)[0]
                    reshaped_x = np.reshape(x[index], (-1, 2))

                    # find the mean of the data for the clas
                    #
                    mu_v = np.mean(reshaped_x, axis=0)
                    mu = np.append(mu, mu_v)

                mu = np.reshape(mu, (len(classes), 2, 1))
                self.mu = mu

                # find the covariance matrix
                #
                self.pca.fit_transform(x, y)
                cov = self.pca.get_covariance()
                global_mu = self.pca.mean_

                # find the eigenvalues and eiganvectors
                #
                eigVal, eigVector = np.linalg.eig(cov)

                # create a temporary matrix to hold the initial transformation matrix
                #
                temp = np.empty((2, 2))
                for i in range(2):
                    for j in range(2):
                        temp[j][i] = (eigVector[i][j] / sqrt(eigVal[i]))

                # calculate the theta for the transformation matrix
                #
                alpha = cov[0][0] - cov[1][1]
                beta = -2 * cov[0][1]

                if eigVal[0] > eigVal[1]:
                    theta = atan2((alpha - sqrt((alpha * alpha) +
                                                (beta * beta))), beta)
                else:
                    theta = atan2((alpha + sqrt((alpha * alpha) +
                                                (beta * beta))), beta)

                # calculate the transformation matrix
                #
                trans_matrix = np.linalg.inv(temp)

                self.trans_matrix = temp

            # iterate through each degree
            #
            for i in range(0, 360):
                val[0][0] = 1.5 * cos(i)
                val[1][0] = 1.5 * sin(i)

                # transform the points from the feature space back to the
                # original space to create the support region for the data set
                #
                supp = np.dot(trans_matrix, val)

                # rotate the points (original space)
                #
                xval = (supp[0][0] * cos(theta)) - (supp[1][0] * sin(theta))

                yval = (supp[0][0] * sin(theta)) +(supp[1][0] * cos(theta))

                xval = xval + global_mu[0]
                yval = yval + global_mu[1]

                self.support_region = np.append(self.support_region, [xval, yval])

            self.support_region = np.reshape(self.support_region, (-1, 2))

        # if class dependent
        #
        else:
            val = np.empty((2, 1))
            for i in range(len(classes)):
                index = np.where(y == i)[0]
                if x[index].size > 0:

                    # find the eigenvalues and eigenvectors
                    #
                    self.Eigval = self.pca_classes[i].explained_variance_
                    self.Eigvect = self.pca_classes[i].components_
                    for j in range(2):
                        for k in range(2):
                            pca_d[i][k][j] = self.Eigvect[k][j] / sqrt(self.Eigval[j])

                # find the mean and covariance matrices
                #
                if not np.isnan(pca_d[i])[0][0]:
                    covPCA = self.cov_mat[i]
                    meanPCA = self.mean_mat[i]

                    # calculate the inverse transformation matrix
                    #
                    alpha = covPCA[0][0] - covPCA[1][1]
                    beta = -2 * covPCA[0][1]

                    if self.Eigval[0] > self.Eigval[1]:
                        theta = atan2((alpha - sqrt((alpha * alpha) + (beta * beta))), beta)
                    else:
                        theta = atan2((alpha + sqrt((alpha * alpha) + (beta * beta))), beta)

                    inv_trans = np.linalg.inv(pca_d[i])

                # iterate through each degree for the support region
                #
                for z in range(0, 360):
                    val[0][0] = 1.5 * cos(z)
                    val[1][0] = 1.5 * sin(z)

                    # transform the points from the feature space back to the
                    # original space to create the support region for the data set
                    #
                    supp = np.dot(inv_trans, val)

                    # rotate the points (original space)
                    #
                    xval = (supp[0][0] * cos(theta)) - (supp[1][0] * sin(theta))

                    yval = (supp[0][0] * sin(theta)) + (supp[1][0] * cos(theta))

                    xval = xval + meanPCA[0]
                    yval = yval + meanPCA[1]

                    self.support_region = np.append(self.support_region, [xval, yval])

                self.support_region = np.reshape(self.support_region, (-1, 2))

        # plot the support region
        #
        self.input_d.canvas.axes.scatter(self.support_region[:, 0], self.support_region[:, 1], c='black', s=1)
        self.input_d.canvas.draw_idle()

        # exit gracefully
        #
        return True

#
# end of class

#
# end of file