#!/usr/bin/env python # # file: imld/alg/imld_model.py # # revision history: # # 20240118 (SM): updates for v3.0.0 # 20220129 (MM): clean up, implement prepare_data() # 20200101 (MM): initial version # This class contains a collection of functions that deal with applying # algorithms, plotting decision surfaces and step functions used to forward the # progress of the model # #------------------------------------------------------------------------------ # # imports are listed here # #------------------------------------------------------------------------------ # import modules # import numpy as np import imld_gui_window as igw from PyQt5 import QtWidgets, QtCore from sklearn.preprocessing import Normalizer from copy import deepcopy # import local modules # import nedc_ml_tools as ml import nedc_file_tools as nft #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # define model's messages # TRAIN = "Training" EVAL = "Evaluation" RESET = "Process Resetting...\n" BREAK = "===============================================" EVAL_ERROR = "There's no evaluation data to classify.\n" TRAIN_ERROR = "There's no training data to classify.\n" CLASS_ERROR = "Eval and Training data classes do not match\n" #------------------------------------------------------------------------------ # # classes are listed here # #------------------------------------------------------------------------------ class IMLD_MLToolData(ml.MLToolData): ''' class: IMLD_MLToolData description: a child class of ML Tools' MLToolData that has the same methods as the parents class. adds some extra class data a methods for IMLD purposes ''' def __init__(self, _imld_data, label_names): """ method: IMLD_MLToolData::constructor argument: imld_data: data that is generated by IMLD return: None description: this function initializes an MLToolData object with additional variables for IMLD purposes """ self.imld_data = _imld_data self.dir_path = "" self.lndx = 0 self.nfeats = -1 self.num_of_classes = len(self.imld_data) labels = [] data = [] mapping_label = {} # converting the data into our new format # for i, lists in enumerate(self.imld_data): mapping_label[i] = label_names[i] labels.extend([i] * len(lists)) for item in lists: data.append(item) labels = np.asarray(labels) data = np.asarray(data) self.labels = labels self.data = data self.mapping_label = mapping_label # # end of method def prep_decision_surface(self, ax): ''' method: IMLD_MLToolData::prep_decision_surface arguments: ax: the canvas with the original data is plotted on return: new_data: a IMLD_MLToolData object that contains the data that was processed in this function. this object is ready to be used in a decision surface xx : the x coordinates of the contour yy : the y coordinates of the contour description: this method prepares the data already in the object to be mapped onto one of IMLD's canvases (eval/train). this function returns an IMLD_MLToolData object that can be used to plot a decision surface ''' # Generate a grid of coordinates from the canvas # x_min, x_max = ax.canvas.axes.get_xlim() y_min, y_max = ax.canvas.axes.get_ylim() xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) # Stack the coordinates to create an array of (x, y) pairs # points = np.c_[xx.ravel(), yy.ravel()] # copy the current IMLD_MLToolsData object and change its internal # data to the calculated points # new_data = deepcopy(self) new_data.data = points # exit gracefully # return new_data, xx, yy # # end of method # # end of class class Model: ''' class: Model description: this class contains methods to apply chosen algorithms, extract the data needed for the algorithms, step through the model, plot decision surfaces and display computed errors ''' def __init__(self, algo, label_names, win_input, win_output, win_log, normalize=False): ''' method: Model::constructor arguments: algo: the algorithm chosen win_input: GUI input display win_output: GUI output display win_log: GUI process log return: None description: initialize a Model object ''' # copy the inputs into class data # self.label_names = label_names self.input_d = win_input self.output_d = win_output self.log_d = win_log.output self.algo = algo self.step_index = 0 self.info = {TRAIN: self.input_d, EVAL: self.output_d} QtWidgets.QApplication.processEvents() self.train_data = None self.eval_data = None self.normalize = normalize # exit gracefully # return None # # end of method def prepare_train(self): ''' method: Model::prepare_train arguments: None return Bool: False if there is no training data, True if the data was properly prepared description: prepare the current training data for training ''' # clear any results in either the input or output display # if self.input_d.canvas.axes.collections is not None or \ self.output_d.canvas.axes.collections is not None: self.input_d.clear_result_plot() self.output_d.clear_result_plot() # prepare the data for the model # QtCore.QCoreApplication.processEvents() self.train_data = self.prepare_data(self.info[TRAIN]) self.ml_train = IMLD_MLToolData(self.train_data, self.label_names) # if there is no training data, return False # if len(self.train_data) == 0: self.log_d.append(TRAIN_ERROR) return False # exit gracefully # return True # # end of method def process_train(self): ''' method: Model::process_train arguments: None return: None description: train the model with the training data ''' # train the model # QtCore.QCoreApplication.processEvents() self.train() # exit gracefully # return None # # end of method def prepare_eval(self): ''' method: Model::prepare_eval arguments: None return: bool: True if the evaluation data is properly prepared, false if there is no evaluation data description: prepare the evaluation data for evaluation ''' # clear any results in either the input or output display # if self.output_d.canvas.axes.collections is not None: self.output_d.clear_result_plot() # prepare the eval data for the model # QtCore.QCoreApplication.processEvents() self.eval_data = self.prepare_data(self.info[EVAL]) self.ml_eval = IMLD_MLToolData(self.eval_data, self.label_names) # check if classes in data match # if len(self.eval_data) != 0: # if classes do not match reset process # if self.train_data: if len(self.train_data) != len(self.eval_data): self.log_d.append(CLASS_ERROR) # exit gracefully # return False # if there is no evaluation data, return false # if len(self.eval_data) == 0: self.log_d.append(EVAL_ERROR) return False # exit gracefully # return True # # end of method def prepare_errors(self, set, cw): ''' method: Model::prepare_errors arguments: set: a string either "TRAIN" to compute the train set or "EVAL" to compute the eval set return: None description: compute the errors for either the training or evaluation set ''' # determine the set to compute errors for # if set == EVAL: data = self.ml_eval pred = self.eval_labels elif set == TRAIN: data = self.ml_train pred = self.train_labels # calculate the error rate # error = (1 - self.algo.accuracy(data.labels, pred)) * 100 # display the errors # self.display_errors(set, error) cm = self.algo.confusion_matrix(data.num_of_classes, data.labels, pred) if cw: self.print_confusion_matrix(cm, data.mapping_label) self.log_d.append(nft.DELIM_NEWLINE) # exit gracefully # return None # # end of method def prepare_plot(self, set): ''' method: Model::prepare_plot arguments: set: a string either "TRAIN" to compute the train set or "EVAL" to compute the eval set return: None description: plot the decision surface for either the eval or train data ''' # determine the set to plot the decision surface for # if set == EVAL: data = self.ml_eval display = self.output_d elif set == TRAIN: data = self.ml_train display = self.input_d # plot the decision surface # self.plot_decision_surface(display, data) # exit gracefully # return None # # end of method def predict_labels(self, set): ''' method: Model::predict_labels arguments: set: either "TRAIN" or "EVAL" depending on desired dataset return: None description: predict the train or eval dataset depending on the model ''' # determine the set to plot the decision surface for # if set == EVAL: labels, post = self.algo.predict(self.ml_eval) self.eval_labels = labels self.eval_post = post elif set == TRAIN: labels, post = self.algo.predict(self.ml_train) self.train_labels = labels self.train_post = post # exit gracefully # return None # # end of method def prepare_data(self, info): ''' method: Model::prepare_data arguments: info: dict of use data return: data: a list of coordinates from the chosen data description: this method runs the algorithm ''' # set up list of data # data = [] # extract the data from the dictionary per class # info = info.class_info for classes in info: x_data = np.array(info[classes][1]) y_data = np.array(info[classes][2]) coordinates = np.column_stack((x_data, y_data)) data.append(coordinates) # check parse for empty classes # new_data = [y for y in data if 0 != y.size] # normalize data # if self.normalize: # set up parameter for normalizer # norm = 'l2' norm_data = [] for i in range(len(new_data)): norm_data.append(self.normalizer(new_data[i], norm)) new_data = norm_data # exit gracefully # return new_data # # end of method def normalizer(self, data, norm): ''' method: Model::normalizer arguments: data: a list of coordinates from chosen data norm: normalizer return: norm_data: the normalized data description: this method normalizes the data ''' transformer = Normalizer(norm).fit(data) norm_data = transformer.transform(data) # exit gracefully # return norm_data # # end of method def train(self): ''' method: Model::train arguments: None return: bool: if the training data is empty, return false. else, return true ''' # check if there is training data # if self.train_data is None: return False # run the algo with selected data # self.algo.train(self.ml_train) # exit gracefully # return True # # end of method def predict_decision_surface(self, ax, data): ''' method: Model::predict_decision_surface arguments: ax: the canvas with the original data is plotted on data: the data to create the decision surface for return: xx: the x coordinates of the contour yy: the y coordinates of the contour Z : the height of the contour description: this method is used to predict a decision surface using Mahalanobis distance ''' # prepare the data to be predicted and to create a decision # surface with # formatted_data, xx, yy = data.prep_decision_surface(ax) ds_alg = deepcopy(self.algo) # predict the prepared data # labels, _ = ds_alg.predict(formatted_data) # convert the predicitions to numpy arrays # labels = np.array(labels) # Reshape the distances to match the shape of xx and yy Z = labels.reshape(xx.shape) # exit gracefully # return xx, yy, Z # # end of method def plot_decision_surface(self, display, data): ''' method: Model::plot_decision_surface arguments: None return: None description: this method plots the decision surface based on the algorithms prediction ''' # record the algorithm's prediction and plot the decision surface # QtCore.QCoreApplication.processEvents() xx, yy, Z = self.predict_decision_surface(display, data) QtCore.QCoreApplication.processEvents() self.decision_surface(display, xx, yy, Z) # exit gracefully # return None # # end of method def display_errors(self, label, error): ''' method: Model::display_errors arguments: label: either training for evalulation label return: None description: this method displays the errors calculated ''' # display the error rate for selected label # text = f"{label} Error Rate = {error:.2f}%" self.log_d.append(text) # exit gracefully # return True # # end of method def decision_surface(self, ax, xx, yy, Z): ''' method: Model::decision_surface arguments: ax: the axes that the decision surface is graphed upon xx: the x coordinate data yy: the y coordinate data Z : the height values of the contour return: None description: this method computes the errors of the algorithm ''' # reshape the contour # Z = Z.reshape(xx.shape) # plot the decision surface # ax.canvas.axes.contourf(xx, yy, Z, alpha = 0.4, cmap=self.input_d.surface_color) ax.canvas.draw_idle() # exit gracefully # return True # # end of method def print_confusion_matrix(self, cnf, mapping_label): """ method: Model::print_confusion_matrix arguments: cnf: the confusion matrix mapping_label: the mapping labels from an algorithm return: a boolean value indicating status description: none """ # get the number of rows and colums for the numeric data: # we assume a square matrix in this case # nrows = len(cnf) ncols = len(cnf) # create the table headers # headers = ["Ref/Hyp:"] for i in range(nrows): if isinstance(mapping_label[i], int): headers.append(ml.ALG_FMT_LBL % mapping_label[i]) else: headers.append(mapping_label[i]) # convert the confusion matrix to percentages # pct = np.empty_like(cnf, dtype = float) for i in range(nrows): sum = float(cnf[i].sum()) for j in range(ncols): pct[i][j] = float(cnf[i][j]) / sum # get the width of each colum and compute the total width: # the width of the percentage column includes "()" and two spaces # width_lab = int(float(ml.ALG_FMT_WLB[1:-1])) width_cell = int(float(ml.ALG_FMT_WCL[1:-1])) width_pct = int(float(ml.ALG_FMT_WPC[1:-1])) width_paren = 4 total_width_cell = width_cell + width_pct + width_paren total_width_table = width_lab + \ ncols * (width_cell + width_pct + width_paren) # print the title # title = "Confusion Matrix" self.log_d.append(title+nft.DELIM_COLON) # print the next ncols labels center-aligned: # add a newline at the end # str_line = '' for i in range(1, ncols + 1): if i == 1: str_line += "%*s" % (width_lab, "Ref/Hyp:") # compute the number of spaces needed to center-align # num_spaces = total_width_cell - len(headers[i]) num_spaces_2 = int(num_spaces / 2) # write spaces, header, spaces # str_line += ("%s" % nft.DELIM_SPACE * num_spaces_2) str_line += ("%s" % headers[i]) str_line += ("%s" % nft.DELIM_SPACE * (num_spaces - num_spaces_2)) self.log_d.append(str_line) # write the rows with numeric data: # note that "%%" is needed to print a percent # for i in range(nrows): str_line = '' # write the row label # str_line += ("%*s" % (width_lab, headers[i+1] + nft.DELIM_COLON)) # write the numeric data and then add a new line # for j in range(ncols): str_line += (ml.ALG_FMT_WST % (cnf[i][j], ml.ALG_SCL_PCT * pct[i][j])) self.log_d.append(str_line) # exit gracefully # return True # # end of method # # end of class # # end of file