#!/usr/bin/env python
#
# file: imld/alg/imld_alg_kmeans.py
#
# revision history:
#
# 20220210 (MM): initial version
#
# This script implements KMeans machine learning algorithm for the ISIP Machine
# Learning Demo software.
#
#------------------------------------------------------------------------------
#
# imports are listed here
#
#------------------------------------------------------------------------------

# import modules
#
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial import distance
import lib.imld_constants_file as icf


# ------------------------------------------------------------------------------
#
# global variables are listed here
#
# ------------------------------------------------------------------------------
FORMAT = "{:<15} {:<15}"
PARAMETER = "PARAMETER"
VALUE = "VALUE"
#------------------------------------------------------------------------------
#
# classes are listed here
#
#------------------------------------------------------------------------------

#  class: AlgorithmKMeans
#
#  This class contains methods to apply the KMeans algorithm on a set of data
#  with a choice on numbers of clusters, iterations and max run time.
#

class AlgorithmKMeans():
    # method: AlgorithmKMeans::constructor
    #
    # arguments:
    #  win_input: GUI input display
    #  win_output: GUI output display
    #  win_log: GUI process log
    #  n_cluster: number of clusters
    #  init: choice of method between Kmeans++ or Random
    #  n_init: number of times Kmeans is ran to determine the best centroid seed
    #  maxiter: max number of iterations for a single run
    #
    # return: none
    #
    def __init__(self, win_input, win_output, win_log, n_clusters, init, n_init,
                 maxiter):
        # create class data
        #
        AlgorithmKMeans.__CLASS_NAME__ = self.__class__.__name__

        # copy the inputs into class data
        #
        self.input_d = win_input
        self.output_d = win_output
        self.log_d = win_log
        self.n_clusters = n_clusters
        self.initial = init
        self.n_init = n_init
        self.maxiter = maxiter

        # exit gracefully
        #
        return None

    # method: AlgorithmKMeans::initialize
    #
    # arguments:
    #  data: data recorded from display that will be used for training
    #
    # return:
    #  True
    #
    # This method initializes variables for KMeans
    #
    def initialize(self, data):

        # initialize variables
        #
        self.data = data
        self.classes = len(self.data)

        # find mean within each class
        #
        self.means = [d.mean(axis=0) for d in self.data]

        # set up Kmeans model
        #
        self.kmeans = KMeans(n_clusters=self.n_clusters,
                             init=self.initial, n_init=self.n_init,
                             max_iter=self.maxiter,random_state=icf.SEED)
        self.X = np.empty((0, 0))
        self.print_params()

        # exit gracefully
        #
        return True

    # method: AlgorithmKMeans::run_algo
    #
    # arguments:
    #  data: data recorded from display
    #
    # return:
    #  True
    #
    # This method runs the initialization and training
    #
    def run_algo(self, data):

        # initialize and train algorithm
        #
        self.initialize(data)
        self.train()

        # exit gracefully
        #
        return True

    # method: AlgorithmKMeans::train
    #
    # arguments:
    #  None
    #
    # return:
    #  True
    #
    # This method trains the model and computes the cluster mean
    #
    def train(self):

        # stack data and train algorithm
        #
        data = np.vstack((self.data))
        self.kmeans.fit(data)

        # compute cluster means
        #
        self.compute_cluster_mean(self.input_d)

        # exit gracefully
        #
        return True

    def print_params(self):
        param = self.kmeans.get_params()
        self.log_d.append("\n"+(FORMAT.format
                           (PARAMETER, VALUE)))

        for k, v in param.items():
            k, v = str(k), str(v)

            self.log_d.append(FORMAT.format(k, v))

    # method: AlgorithmKMeans::compute_cluster_mean
    #
    # arguments:
    #  ax: display where means will be plotted
    #
    # return:
    #  True
    #
    # This method plots the cluster means
    #
    def compute_cluster_mean(self, ax):

        # find cluster mean and plot
        #
        ax.canvas.axes.scatter(self.kmeans.cluster_centers_[:,0],
                               self.kmeans.cluster_centers_[:,1],
                               c='black', s=8)

        # exit gracefully
        #
        return True

    # method: AlgorithmKMeans::predict
    #
    # arguments:
    #  ax: display where  will be plotted
    #  X: data recorded from display
    #
    # return:
    #  xx: vector of X coordinates of a coordinate matrix
    #  yy: vector of Y coordinates of a coordinate matrix
    #  Z: prediction based on the coordinate matrix
    #
    # This method calculates the predictions used for a decision surface
    #
    def predict(self, ax, X):

        # reshape data
        #
        X = np.concatenate(X, axis=0)
        X = np.reshape(X, (-1, 2))

        # set up the limits and map of values for decision map
        #
        res = (ax.canvas.axes.get_xlim()[1] -
               ax.canvas.axes.get_ylim()[0]) / 100
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, res),
                             np.arange(y_min, y_max, res))

        # predict values for decision surface
        #
        Z = self.kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)

        # exit gracefully
        #
        return xx, yy, Z

    # method: AlgorithmKMeans::prediction_classifier
    #
    # arguments:
    #  data: data from the evaluation display
    #
    # return:
    #  prediction: the predicated class label
    #
    # This method predicts the class label through calculating the shortest
    # distance between the predicted cluster value and all of the clusters mean
    #
    def prediction_classifier(self, data):

        # find cluster centers
        #
        centers = self.kmeans.cluster_centers_

        # predict cluster label
        #
        data = np.vstack((data))
        clusters = self.kmeans.predict(data)

        # find the class closest to cluster label using euclidean distance
        #
        prediction = []
        for c in clusters:
            predicted_center = centers[c]
            min_dist = np.inf
            for classes in range(len(self.means)):

                # calc distance between mean and predicted value center
                #
                dist = distance.euclidean(self.means[classes], predicted_center)

                # check if calculated distance is lower than current min
                # distance
                #
                if min_dist > dist:
                    min_dist = dist
                    label = classes

            # record prediction
            #
            prediction.append(label)

        # exit gracefully
        #
        return prediction

#
# end of class

#
# end of file