#!/usr/bin/env python
#
# file: imld/alg/imld_model.py
#
# revision history:
#
# 20240118 (SM): updates for v3.0.0
# 20220129 (MM): clean up, implement prepare_data()
# 20200101 (MM): initial version
# This class contains a collection of functions that deal with applying
# algorithms, plotting decision surfaces and step functions used to forward the
# progress of the model
#
#------------------------------------------------------------------------------
#
# imports are listed here
#
#------------------------------------------------------------------------------

# import modules
#
import numpy as np
import imld_gui_window as igw
from PyQt5 import QtWidgets, QtCore
from sklearn.preprocessing import Normalizer
from copy import deepcopy

# import local modules
#
import nedc_ml_tools as ml
import nedc_file_tools as nft

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# define model's messages
#
TRAIN = "Training"
EVAL = "Evaluation"
RESET = "Process Resetting...\n"
BREAK = "==============================================="
EVAL_ERROR = "There's no evaluation data to classify.\n"
TRAIN_ERROR = "There's no training data to classify.\n"
CLASS_ERROR = "Eval and Training data classes do not match\n"

#------------------------------------------------------------------------------
#
# classes are listed here
#
#------------------------------------------------------------------------------

class IMLD_MLToolData(ml.MLToolData):
    '''
    class: IMLD_MLToolData

    description:
     a child class of ML Tools' MLToolData that has the same methods as the
     parents class. adds some extra class data a methods for IMLD purposes
    '''

    def __init__(self, _imld_data, label_names):
        """
        method: IMLD_MLToolData::constructor

        argument:
         imld_data: data that is generated by IMLD

        return:
         None

        description:
         this function initializes an MLToolData object with additional
         variables for IMLD purposes
        """

        self.imld_data = _imld_data

        self.dir_path = ""
        self.lndx = 0
        self.nfeats = -1
        self.num_of_classes = len(self.imld_data)

        labels = []
        data = []
        mapping_label = {}

        # converting the data into our new format
        #
        for i, lists in enumerate(self.imld_data):
            mapping_label[i] = label_names[i]
            labels.extend([i] * len(lists))
            for item in lists:
                data.append(item)

        labels = np.asarray(labels)
        data = np.asarray(data)

        self.labels = labels
        self.data = data
        self.mapping_label = mapping_label
    #
    # end of method
        
    def prep_decision_surface(self, ax):
        '''
        method: IMLD_MLToolData::prep_decision_surface

        arguments:
         ax: the canvas with the original data is plotted on

        return:
         new_data: a IMLD_MLToolData object that contains the data that was
                   processed in this function. this object is ready to be
                   used in a decision surface
         xx      : the x coordinates of the contour
         yy      : the y coordinates of the contour

        description: 
         this method prepares the data already in the object to be mapped onto
         one of IMLD's canvases (eval/train). this function returns an
         IMLD_MLToolData object that can be used to plot a decision surface
        '''

        # Generate a grid of coordinates from the canvas
        #
        x_min, x_max = ax.canvas.axes.get_xlim()
        y_min, y_max = ax.canvas.axes.get_ylim()
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), 
                             np.linspace(y_min, y_max, 100))

        # Stack the coordinates to create an array of (x, y) pairs
        #
        points = np.c_[xx.ravel(), yy.ravel()]

        # copy the current IMLD_MLToolsData object and change its internal
        # data to the calculated points
        #
        new_data = deepcopy(self)
        new_data.data = points

        # exit gracefully
        #
        return new_data, xx, yy
    #
    # end of method
#
# end of class

class Model:
    '''
    class: Model

    description:
     this class contains methods to apply chosen algorithms, extract the data
     needed for the algorithms, step through the model, plot decision surfaces
     and display computed errors
    '''

    def __init__(self, algo, label_names, win_input, win_output, win_log, normalize=False):
        '''
        method: Model::constructor

        arguments:
         algo: the algorithm chosen
         win_input: GUI input display
         win_output: GUI output display
         win_log: GUI process log

        return:
         None

        description:
         initialize a Model object
        '''

        # copy the inputs into class data
        #
        self.label_names = label_names
        self.input_d = win_input
        self.output_d = win_output
        self.log_d = win_log.output
        self.algo = algo
        self.step_index = 0
        self.info = {TRAIN: self.input_d,
                     EVAL: self.output_d}
        QtWidgets.QApplication.processEvents()

        self.train_data = None
        self.eval_data = None

        self.normalize = normalize

        # exit gracefully
        #
        return None
    #
    # end of method
    
    def prepare_train(self):
        '''
        method: Model::prepare_train

        arguments:
         None

        return
         Bool: False if there is no training data, True if the data was properly
               prepared

        description:
         prepare the current training data for training
        '''
            
        # clear any results in either the input or output display
        #
        if self.input_d.canvas.axes.collections is not None or \
        self.output_d.canvas.axes.collections is not None:
            self.input_d.clear_result_plot()
            self.output_d.clear_result_plot()

        # prepare the data for the model
        #
        QtCore.QCoreApplication.processEvents()
        self.train_data = self.prepare_data(self.info[TRAIN])
        self.ml_train = IMLD_MLToolData(self.train_data, self.label_names)

        # if there is no training data, return False
        #
        if len(self.train_data) == 0:
            self.log_d.append(TRAIN_ERROR)
            return False

        # exit gracefully
        #
        return True
    #
    # end of method

    def process_train(self):
        '''
        method: Model::process_train

        arguments:
         None

        return:
         None

        description:
         train the model with the training data
        '''

        # train the model
        #
        QtCore.QCoreApplication.processEvents()
        self.train()

        # exit gracefully
        #
        return None
    #
    # end of method

    def prepare_eval(self):
        '''
        method: Model::prepare_eval

        arguments:
         None

        return:
         bool: True if the evaluation data is properly prepared, false if there
               is no evaluation data

        description:
         prepare the evaluation data for evaluation
        '''

        # clear any results in either the input or output display
        #
        if self.output_d.canvas.axes.collections is not None:
            self.output_d.clear_result_plot()

        # prepare the eval data for the model
        #
        QtCore.QCoreApplication.processEvents()
        self.eval_data = self.prepare_data(self.info[EVAL])
        self.ml_eval = IMLD_MLToolData(self.eval_data, self.label_names)

        # check if classes in data match
        #
        if len(self.eval_data) != 0:

            # if classes do not match reset process
            #
            if self.train_data:
                if len(self.train_data) != len(self.eval_data):
                    self.log_d.append(CLASS_ERROR)

                    # exit gracefully
                    #
                    return False
            
        # if there is no evaluation data, return false
        #
        if len(self.eval_data) == 0:
            self.log_d.append(EVAL_ERROR)
            return False
        
        # exit gracefully
        #
        return True    
    #
    # end of method      

    def prepare_errors(self, set, cw):
        '''
        method: Model::prepare_errors

        arguments:
         set: a string either "TRAIN" to compute the train set or "EVAL" to 
              compute the eval set

        return:
         None

        description:
         compute the errors for either the training or evaluation set
        '''

        # determine the set to compute errors for
        #
        if set == EVAL:
            data = self.ml_eval
            pred = self.eval_labels
        elif set == TRAIN:
            data = self.ml_train 
            pred = self.train_labels

        # calculate the error rate
        #
        error = (1 - self.algo.accuracy(data.labels, pred)) * 100

        # display the errors
        #
        self.display_errors(set, error)

        cm = self.algo.confusion_matrix(data.num_of_classes, data.labels, pred)

        if cw:
            self.print_confusion_matrix(cm, data.mapping_label)

        self.log_d.append(nft.DELIM_NEWLINE)

        # exit gracefully
        #
        return None
    #
    # end of method
      
    def prepare_plot(self, set):
        '''
        method: Model::prepare_plot

        arguments:
         set: a string either "TRAIN" to compute the train set or "EVAL" to 
              compute the eval set

        return:
         None

        description:
         plot the decision surface for either the eval or train data
        '''

        # determine the set to plot the decision surface for
        #
        if set == EVAL:
            data = self.ml_eval
            display = self.output_d
        elif set == TRAIN:
            data = self.ml_train
            display = self.input_d
            
        # plot the decision surface
        #
        self.plot_decision_surface(display, data)

        # exit gracefully
        #
        return None
    #
    # end of method

    def predict_labels(self, set):
        '''
        method: Model::predict_labels

        arguments:
         set: either "TRAIN" or "EVAL" depending on desired dataset

        return:
         None

        description:
         predict the train or eval dataset depending on the model
        '''

        # determine the set to plot the decision surface for
        #
        if set == EVAL:
            labels, post = self.algo.predict(self.ml_eval)
            self.eval_labels = labels
            self.eval_post = post
        elif set == TRAIN:
            labels, post = self.algo.predict(self.ml_train)
            self.train_labels = labels
            self.train_post = post

        # exit gracefully
        #
        return None
    #
    # end of method

    def prepare_data(self, info):
        '''
        method: Model::prepare_data

        arguments:
         info: dict of use data

        return:
         data: a list of coordinates from the chosen data

        description:
         this method runs the algorithm
        '''

        # set up list of data
        #
        data = []

        # extract the data from the dictionary per class
        #
        info = info.class_info
        for classes in info:
            x_data = np.array(info[classes][1])
            y_data = np.array(info[classes][2])
            coordinates = np.column_stack((x_data, y_data))

            data.append(coordinates)

        # check parse for empty classes
        #
        new_data = [y for y in data if 0 != y.size]

        # normalize data
        #
        if self.normalize:

            # set up parameter for normalizer
            #
            norm = 'l2'

            norm_data = []
            for i in range(len(new_data)):
                norm_data.append(self.normalizer(new_data[i], norm))

            new_data = norm_data

        # exit gracefully
        #
        return new_data
    #
    # end of method

    def normalizer(self, data, norm):
        '''
        method: Model::normalizer

        arguments:
         data: a list of coordinates from chosen data
         norm: normalizer

        return:
         norm_data: the normalized data

        description:
         this method normalizes the data
        '''

        transformer = Normalizer(norm).fit(data)
        norm_data = transformer.transform(data)

        # exit gracefully
        #
        return norm_data
    #
    # end of method

    def train(self):
        '''
        method: Model::train

        arguments:
         None

        return:
         bool: if the training data is empty, return false. else, return true
        '''

        # check if there is training data
        #
        if self.train_data is None:
            return False

        # run the algo with selected data
        #
        self.algo.train(self.ml_train)

        # exit gracefully
        #
        return True
    #
    # end of method

    def predict_decision_surface(self, ax, data):
        '''
        method: Model::predict_decision_surface

        arguments:
         ax: the canvas with the original data is plotted on
         data: the data to create the decision surface for

        return:
         xx: the x coordinates of the contour
         yy: the y coordinates of the contour
         Z : the height of the contour

        description:
         this method is used to predict a decision surface using Mahalanobis
         distance
        '''

        # prepare the data to be predicted and to create a decision
        # surface with
        #
        formatted_data, xx, yy = data.prep_decision_surface(ax)

        ds_alg = deepcopy(self.algo)

        # predict the prepared data
        #
        labels, _ = ds_alg.predict(formatted_data)

        # convert the predicitions to numpy arrays
        #
        labels = np.array(labels)

        # Reshape the distances to match the shape of xx and yy
        Z = labels.reshape(xx.shape)

        # exit gracefully
        #
        return xx, yy, Z
    #
    # end of method

    def plot_decision_surface(self, display, data):
        '''
        method: Model::plot_decision_surface

        arguments:
         None

        return:
         None

        description:
         this method plots the decision surface based on the algorithms 
         prediction
        '''

        # record the algorithm's prediction and plot the decision surface
        #
        QtCore.QCoreApplication.processEvents()
        xx, yy, Z = self.predict_decision_surface(display, data)
        QtCore.QCoreApplication.processEvents()
        self.decision_surface(display, xx, yy, Z)

        # exit gracefully
        #
        return None
    #
    # end of method

    def display_errors(self, label, error): 
        '''
        method: Model::display_errors

        arguments:
         label: either training for evalulation label

        return:
         None

        description:
         this method displays the errors calculated
        '''

        # display the error rate for selected label
        #
        text = f"{label} Error Rate = {error:.2f}%"
        self.log_d.append(text)

        # exit gracefully
        #
        return True
    #
    # end of method

    def decision_surface(self, ax, xx, yy, Z):
        '''
        method: Model::decision_surface

        arguments:
         ax: the axes that the decision surface is graphed upon
         xx: the x coordinate data
         yy: the y coordinate data
         Z : the height values of the contour

        return:
         None

        description:
         this method computes the errors of the algorithm
        '''

        # reshape the contour
        #
        Z = Z.reshape(xx.shape)

        # plot the decision surface
        #
        ax.canvas.axes.contourf(xx, yy, Z, alpha = 0.4,
                                cmap=self.input_d.surface_color)
        ax.canvas.draw_idle()

        # exit gracefully
        #
        return True
    #
    # end of method

    def print_confusion_matrix(self, cnf, mapping_label):
        """
        method: Model::print_confusion_matrix

        arguments:
         cnf: the confusion matrix
         mapping_label: the mapping labels from an algorithm

        return:
         a boolean value indicating status

        description:
         none
        """

        # get the number of rows and colums for the numeric data:
        #  we assume a square matrix in this case
        #
        nrows = len(cnf)
        ncols = len(cnf)

        # create the table headers
        #
        headers = ["Ref/Hyp:"]
        for i in range(nrows):
            if isinstance(mapping_label[i], int):
                headers.append(ml.ALG_FMT_LBL % mapping_label[i])
            else:
                headers.append(mapping_label[i])

        # convert the confusion matrix to percentages
        #
        pct = np.empty_like(cnf, dtype = float)
        for i in range(nrows):
            sum = float(cnf[i].sum())
            for j in range(ncols):
                pct[i][j] = float(cnf[i][j]) / sum

        # get the width of each colum and compute the total width:
        #  the width of the percentage column includes "()" and two spaces
        #
        width_lab = int(float(ml.ALG_FMT_WLB[1:-1]))
        width_cell = int(float(ml.ALG_FMT_WCL[1:-1]))
        width_pct = int(float(ml.ALG_FMT_WPC[1:-1]))
        width_paren = 4
        total_width_cell = width_cell + width_pct + width_paren
        total_width_table = width_lab + \
            ncols * (width_cell + width_pct + width_paren)

        # print the title
        #
        title = "Confusion Matrix"
        self.log_d.append(title+nft.DELIM_COLON)

        # print the next ncols labels center-aligned:
        #  add a newline at the end
        #
        str_line = ''
        for i in range(1, ncols + 1):
            
            if i == 1:
                str_line += "%*s" % (width_lab, "Ref/Hyp:")

            # compute the number of spaces needed to center-align
            #
            num_spaces = total_width_cell - len(headers[i])
            num_spaces_2 = int(num_spaces / 2)

            # write spaces, header, spaces
            #
            str_line += ("%s" % nft.DELIM_SPACE * num_spaces_2)
            str_line += ("%s" % headers[i])
            str_line += ("%s" % nft.DELIM_SPACE * (num_spaces - num_spaces_2))

        self.log_d.append(str_line)

        # write the rows with numeric data:
        #  note that "%%" is needed to print a percent
        #
        for i in range(nrows):
            str_line = ''

            # write the row label
            #
            str_line += ("%*s" % (width_lab, headers[i+1] + nft.DELIM_COLON))

            # write the numeric data and then add a new line
            #
            for j in range(ncols):
                str_line += (ml.ALG_FMT_WST % (cnf[i][j], ml.ALG_SCL_PCT * pct[i][j]))
            
            self.log_d.append(str_line)

        # exit gracefully
        #
        return True
    #
    # end of method
#
# end of class

#
# end of file