#!/usr/bin/env python # # file: /data/isip/exp/demos/exp_0007/v0.0.6/imld_alg_lda.py # # revision history: # 20200811 (LV): standardization, completion # 20200505 (SJ): initial version # # This script implements the Class Dependent and Independent Linear # Discriminant Analysis machine learning algorithm for the ISIP Machine # Learning Demo software. #----------------------------------------------------------------------------- # import system modules # import numpy as np # import data related modules # from scipy.spatial import distance from math import sin, cos, atan2, sqrt from sklearn.discriminant_analysis import LinearDiscriminantAnalysis import gui.imld_gui_window as igw # set error handling for floating point errors # np.seterr(divide='ignore', invalid='ignore') CI = "CI" CD = "CD" NUM_STEPS = 3 LDA_DIM = 1 class AlgorithmLDA(): # method: AlgorithmLDA::constructor # # arguments: # win_input: GUI input display # win_output: GUI output display # win_log: GUI process log # # return: none # def __init__(self, win_input, win_output, win_log, mode): # create class data # AlgorithmLDA.__CLASS_NAME__ = self.__class__.__name__ # copy the inputs into class data # self.input_d = win_input self.output_d = win_output self.log_d = win_log self.mode = mode # method: AlgorithmLDA::initialize # # arguments: None # # return: none # # initialize variables for LDA # def initialize(self): self.data = [] self.cov_mat = None self.mean_mat = None self.trans_mat = None self.dist = None self.lda = None self.lda_classes = [] self.classes = None self.step_index = 0 # method: AlgorithmLDA::run_algo # # arguments: None # # return: none # # run algorithm steps # def run_algo(self): # calc everything and display stats self.initialize() self.extract_data(self.input_d) self.compute_stats() self.display_means() self.display_cov() self.ellipse_regions(self.data, self.input_d.class_info) # method: AlgorithmLDA::extract_data # # arguments: # display: the window that data is being extracted from # # return: none # # this method takes in a window and extracts the data and formats it for the following # algorithmic steps # def extract_data(self, display): # check if their is any data plotted # self.data = [] if len(display.canvas.axes.collections) == 0: return False self.classes = len(display.class_info) # extract the data from the dictionary per class # for data in display.class_info: x_data = np.array(display.class_info[data][1]) y_data = np.array(display.class_info[data][2]) coordinates = np.column_stack((x_data,y_data)) self.data.append(coordinates) return True def create_labels(self): labels = [] count = 0 d = self.input_d.class_info for i in d: total_samples = len(d[i][1]) labels = labels + [count]*total_samples count +=1 labels = np.array(labels) return labels # method: AlgorithmLDA::compute_stats # # arguments: none # return: none # # this method calculates the covariance and means matrix for each class the LDA is applied to # def compute_stats(self): # initialize variables # self.mean_mat = [] self.cov_mat = [] self.trans_mat = [] self.data = np.array(self.data) self.labels = self.create_labels() self.lda = LinearDiscriminantAnalysis(solver='eigen', store_covariance = True, n_components=None) if self.mode == CI: data = np.vstack((self.data)) self.lda.fit(data,self.labels) tmp_cov = self.lda.covariance_ for i in range(len(self.data)): labels = [i] * len(self.data[i]) fitted_data = self.lda.fit(self.data[i], labels) self.lda_classes.append(fitted_data) mean_vector = self.lda.means_[0] self.mean_mat.append(mean_vector) if self.mode == CI: self.cov_mat.append(tmp_cov) else: self.cov_mat.append(self.lda.covariance_) # method: AlgorithmLDA::display_means # # arguments: none # # return: none # # this method display the means for each class and plots a point at the mean # def display_means(self): text = "Means: \n" count = 0 for line in self.mean_mat: text += "Class" + str(count) + ': ' + str(line) + '\n' self.input_d.canvas.axes.scatter(line[0], line[1], c='black', s=7) count += 1 self.log_d.append(text) self.input_d.canvas.draw_idle() # method: AlgorithmLDA::display_cov # # arguments: none # # return: none # # this method displays the covariance matrix of each class in the process log # def display_cov(self): text = 'Covariance Matrix: \n' for classes in range(len(self.cov_mat)): text += 'Class' + str(classes) +': ' str_cov = np.array2string(self.cov_mat[classes], formatter={'float_kind': lambda x: "%.4f" % x}) text += '\n' + str_cov+ '\n' self.log_d.append(text) # method: AlgorithmLDA::predict # # arguments: # ax: the canvas with the original data is plotted on # X: is the data that is being used for the predictions # # return: # xx: the x coordinates of the contour # yy: the y coordinates of the contour # Z: the height of the contour # # This method is used to make a prediction using the Mahalanobis distance # def predict(self, ax, X): # Creates the mesh grid X = np.concatenate(X, axis=0) X = np.reshape(X, (-1, 2)) res = (ax.canvas.axes.get_xlim()[1] - ax.canvas.axes.get_ylim()[0]) / 100 x_min, x_max = X[:, 0].min() - .75, X[:, 0].max() + .75 y_min, y_max = X[:, 1].min() - .75, X[:, 1].max() + .75 xx, yy = np.meshgrid(np.arange(x_min, x_max, res), np.arange(y_min, y_max, res)) # if dependent create the reshape the covariance matrix # if self.mode == CD: self.cov_mat = np.reshape(self.cov_mat, (self.classes, 2, 2)) Z = np.empty((0, 0)) # calculate the distance using mean of the class and the covariance of the class # for pairs in np.c_[xx.ravel(), yy.ravel()]: distances = np.empty((0, 0)) for i in range(self.classes): maha = (distance.mahalanobis(self.mean_mat[i], pairs, self.cov_mat[i])) distances = np.append(distances, maha) # save the distance # Z = np.append(Z, np.argmin(distances)) return xx, yy, Z # method: AlgorithmLDA::prediction_classifier # # arguments: # data: the class data being used for predictions # # return: # distance: minimal distance # # this method calculates the distance prediction of each class def prediction_classifier(self, data): distances = np.empty((0, 0)) for cur_class in range(self.classes): maha = (distance.mahalanobis(self.mean_mat[cur_class], data, self.cov_mat[cur_class])) distances = np.append(distances, maha) return np.argmin(distances) # method: AlgorithmLDA::ellipse_regions # # arguments: # x: is the data passed through from the original class data # classes: is the number of classes in the dataset # # return: none # # this method calculates the elliptical region surrounding the class data # def ellipse_regions(self, x, classes): # initialize the boundaries of the support region # y = np.arange(len(classes)) y = self.labels x = np.concatenate(x, axis=0) x = np.reshape(x, (-1, 2)) y = np.reshape(y, (-1, 1)) # initialize the region for each class # self.support_region = np.empty((0, 0)) lda_d = [np.empty((2, 2)) * np.nan for i in range(len(classes))] # if class independent find the mean and the covariance matrix # if self.mode == CI: val = np.empty((2, 1)) if x.size > 0: mu = np.empty((0, 0)) for num in range(len(classes)): index = np.where(y == num)[0] reshaped_x = np.reshape(x[index], (-1, 2)) # find the mean of the data for the clas # mu_v = np.mean(reshaped_x, axis=0) mu = np.append(mu, mu_v) mu = np.reshape(mu, (len(classes), 2, 1)) self.mu = mu # find the covariance matrix # #x = x.transpose() y = np.ravel(y) self.lda.fit_transform(x, y) cov = self.lda.covariance_ global_mu = self.lda.means_ # find the eigenvalues and eiganvectors # eigVal, eigVector = np.linalg.eig(cov) # create a temporary matrix to hold the initial transformation matrix # temp = np.empty((2, 2)) for i in range(2): for j in range(2): temp[j][i] = (eigVector[i][j] / sqrt(eigVal[i])) # calculate the theta for the transformation matrix # alpha = cov[0][0] - cov[1][1] beta = -2 * cov[0][1] if eigVal[0] > eigVal[1]: theta = atan2((alpha - sqrt((alpha * alpha) + (beta * beta))), beta) else: theta = atan2((alpha + sqrt((alpha * alpha) + (beta * beta))), beta) # calculate the transformation matrix # trans_matrix = np.linalg.inv(temp) self.trans_matrix = temp # iterate through each degree # for i in range(0, 360): val[0][0] = 1.5 * cos(i) val[1][0] = 1.5 * sin(i) # transform the points from the feature space back to the # original space to create the support region for the data set # supp = np.dot(trans_matrix, val) # rotate the points (original space) # xval = (supp[0][0] * cos(theta)) - (supp[1][0] * sin(theta)) yval = (supp[0][0] * sin(theta)) +(supp[1][0] * cos(theta)) xval = xval + global_mu[0] yval = yval + global_mu[1] self.support_region = np.append(self.support_region, [xval, yval]) self.support_region = np.reshape(self.support_region, (-1, 2)) # if class dependent # else: val = np.empty((2, 1)) for i in range(len(classes)): index = np.where(y == i)[0] if x[index].size > 0: # find the eigenvalues and eigenvectors # #self.Eigval = self.lda_classes[i].explained_variance_ #self.Eigvect = self.lda_classes[i].scaling self.Eigval, self.Eigvect = np.linalg.eig(self.lda_classes[i].covariance_) for j in range(2): for k in range(2): lda_d[i][k][j] = self.Eigvect[k][j] / sqrt(self.Eigval[j]) # find the mean and covariance matrices # if not np.isnan(lda_d[i])[0][0]: covLDA = self.cov_mat[i] meanLDA = self.mean_mat[i] # calculate the inverse transformation matrix # alpha = covLDA[0][0] - covLDA[1][1] beta = -2 * covLDA[0][1] if self.Eigval[0] > self.Eigval[1]: theta = atan2((alpha - sqrt((alpha * alpha) + (beta * beta))), beta) else: theta = atan2((alpha + sqrt((alpha * alpha) + (beta * beta))), beta) inv_trans = np.linalg.inv(lda_d[i]) # iterate through each degree for the support region # for z in range(0, 360): val[0][0] = 1.5 * cos(z) val[1][0] = 1.5 * sin(z) # transform the points from the feature space back to the # original space to create the support region for the data set # supp = np.dot(inv_trans, val) # rotate the points (original space) # xval = (supp[0][0] * cos(theta)) - (supp[1][0] * sin(theta)) yval = (supp[0][0] * sin(theta)) + (supp[1][0] * cos(theta)) xval = xval + meanLDA[0] yval = yval + meanLDA[1] self.support_region = np.append(self.support_region, [xval, yval]) self.support_region = np.reshape(self.support_region, (-1, 2)) # plot the support region # self.input_d.canvas.axes.scatter(self.support_region[:, 0], self.support_region[:, 1], c='black', s=1) self.input_d.canvas.draw_idle() # # end of class # # end of file