# this file is used to implement the Nearest Neighbor algorithm import numpy as np from sklearn.neighbors import KNeighborsClassifier class AlgorithmNN(): def __init__(self, input_display, eval_display, process_box): self.input = input_display self.eval = eval_display self.process = process_box self.knn = KNeighborsClassifier(weights="distance") self.class_num = np.empty((0, 0)) self.X = np.empty((0, 0)) self.classes = 0 self.mu = np.empty((0,0)) self.support_region = np.empty((0, 0)) self.step_index = 0 self.trans_matrix = 0 self.data = False self.Z = None def run(self): if self.step_index == 1: self.step1() elif self.step_index == 2: self.step2() self.process.append(" *** Algorithm Complete ***\n") def get_step_index(self): if self.step_index > 2: finished = True else: finished = False return finished def increment_step(self): self.step_index = self.step_index + 1 def reset_step(self): self.step_index = 0 def initialize(self): text = "Algorithm: K-Nearest Neighbor:" self.process.append(text) text = "\n *** Initializing the Algorithm ***\n" self.process.append(text) self.knn = KNeighborsClassifier(weights="distance") self.class_num = np.empty((0, 0)) self.X = np.empty((0, 0)) self.classes = 0 self.mu = np.empty((0,0)) self.support_region = np.empty((0, 0)) self.step_index = 0 self.trans_matrix = 0 self.data = False def step1(self): text = " 1. Computing the Statistics.\n" self.process.append(text) self.compute_mean() self.input.canvas.draw_idle() def step2(self): text = " 2. Drawing the Decision Surface(s).\n" self.process.append(text) self.process.repaint() self.plot_decision_regions(self.X, self.knn, self.input) text = " 3. Computing Error Rates:\n " self.process.append(text) text = " Train:" self.process.append(text) self.compute_errors(self.input) text = " Eval:" self.process.append(text) self.classify_eval() self.input.reset_classes() def extract_data(self): self.classes = 0 observed_classes = np.empty((0, 0)) if len(self.input.canvas.axes.collections) == 0: return False else: self.data = True index = np.empty((0,0),dtype="int64") collection_index = 0 for collections in self.input.canvas.axes.collections: # retrieve the x,y and gid of the collection # try: current_gid = collections.get_gid()[0][0] except TypeError: current_gid = collections.get_gid() if current_gid is not None: data = np.array(collections.get_offsets()) classes = np.array(collections.get_gid()) self.class_num = np.append(self.class_num, classes) self.X = np.append(self.X, data) if current_gid not in observed_classes: self.classes = self.classes + 1 observed_classes = np.append(observed_classes, current_gid) index = np.append(index,collection_index) else: for collects in self.input.canvas.axes.collections: if current_gid in collects.get_gid(): save_sets = collects.get_offsets() current_set = collections.get_offsets() final_set = np.vstack((save_sets, current_set)) collects.set_offsets(final_set) collects.set_gid(final_set.shape[0], current_gid) break collection_index = collection_index + 1 for means in self.input.canvas.axes.collections: mu = np.mean(means.get_offsets(),axis=0) self.mu = np.append(self.mu,mu) self.mu = np.reshape(self.mu, (self.classes,2)) remove_collections = np.array(self.input.canvas.axes.collections)[index].tolist() self.input.canvas.axes.collections = remove_collections self.X = np.reshape(self.X, (-1, 2)) self.class_num = np.ndarray.flatten(self.class_num) self.knn.fit(self.X,self.class_num) return True def compute_mean(self): for means in range(self.classes): text = " Class {}: {}".format(means + 1, self.mu[means]) self.process.append(text) self.input.canvas.axes.scatter(self.mu[:,0], self.mu[:,1], facecolors='none',edgecolors='black', s=8) def plot_decision_regions(self, X, clf, display): res = (display.canvas.axes.get_xlim()[1] - display.canvas.axes.get_ylim()[0]) / 100 x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, res), np.arange(y_min, y_max, res)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) display.canvas.axes.contourf(xx, yy, Z, alpha=0.4) display.canvas.draw_idle() def compute_errors(self,display): # initialize variables # samples = 0 incorrect1 = 0 incorrect2 = 0 incorrect3 = 0 incorrect4 = 0 for collection in range(self.classes): size = display.canvas.axes.collections[collection].get_offsets().shape[0] samples = samples + size text = " Number of samples: {}".format(samples) self.process.append(text) if self.classes == 4: text = " Ref/Hyp: {:10s}{:10s}{:10s}{:10s}".format("Class 1","Class 2","Class 3","Class 4") elif self.classes == 3: text = " Ref/Hyp: Class 1 Class 2 Class 3" elif self.classes == 2: text = " Ref/Hyp: {:10s}{:10s}".format("Class 1","Class 2") elif self.classes == 1: text = " Ref/Hyp: Class 1 " self.process.append(text) # computes the classification error for the first set # if self.classes > 0: data = np.array(display.canvas.axes.collections[0].get_offsets()) size = data.shape[0] predicted = [0, 0, 0, 0] expected = display.canvas.axes.collections[0].get_gid()[0][0] for i in range(size): prediction = int(self.knn.predict(np.reshape(data[i],(1,2)))[0]) if prediction != expected: predicted[prediction] = predicted[prediction] + 1 incorrect1 = incorrect1 + 1 else: predicted[prediction] = predicted[prediction] + 1 text = " Class 1:" for classes in range(self.classes): text = text + "{:12d}".format(predicted[classes]) self.process.append(text) # computes the classification error for the second set # text = " Class 2:" if self.classes > 1: data = np.array(display.canvas.axes.collections[1].get_offsets()) size = data.shape[0] predicted = [0, 0, 0, 0] expected = display.canvas.axes.collections[1].get_gid()[0][0] for i in range(size): prediction = int(self.knn.predict(np.reshape(data[i],(-1,2)))[0]) if prediction != expected: predicted[prediction] = predicted[prediction] + 1 incorrect2 = incorrect2 + 1 else: predicted[prediction] = predicted[prediction] + 1 for classes in range(self.classes): text = text + "{:12d}".format(predicted[classes]) self.process.append(text) # computes the classification error for the third set # text = " Class 3:" if self.classes > 2: data = np.array(display.canvas.axes.collections[2].get_offsets()) size = data.shape[0] predicted = [0, 0, 0, 0] expected = display.canvas.axes.collections[2].get_gid()[0][0] for i in range(size): prediction = int(self.knn.predict(np.reshape(data[i],(-1,2)))[0]) if prediction != expected: predicted[prediction] = predicted[prediction] + 1 incorrect3 = incorrect3 + 1 else: predicted[prediction] = predicted[prediction] + 1 for classes in range(self.classes): text = text + "{:12d}".format(predicted[classes]) self.process.append(text) # computes the classification error for the fourth set # text = " Class 4:" if self.classes > 3: data = np.array(display.canvas.axes.collections[3].get_offsets()) size = data.shape[0] predicted = [0, 0, 0, 0] expected = display.canvas.axes.collections[3].get_gid()[0][0] for i in range(size): prediction = int(self.knn.predict(np.reshape(data[i],(-1,2)))[0]) if prediction != expected: predicted[prediction] = predicted[prediction] + 1 incorrect4 = incorrect4 + 1 else: predicted[prediction] = predicted[prediction] + 1 for classes in range(self.classes): text = text + "{:12d}".format(predicted[classes]) self.process.append(text) incorrect = incorrect1 + incorrect2 + incorrect3 + incorrect4 error = (incorrect / samples) * 100 text = "\n Error Rate = {} / {} = {:.2f}%\n".format(incorrect,samples,error) self.process.append(text) def classify_eval(self): eval_classes = 0 eval_data = np.empty((0,0)) for collections in self.eval.canvas.axes.collections: if None is not collections.get_gid(): eval_classes = eval_classes + 1 if eval_classes == 0: self.process.append(" There is no Eval Data to Classify.\n") return False elif eval_classes != self.classes: text = " The number of classes in Eval Data ({}) does not match train data ({}).\n".format(eval_classes, self.classes) self.process.append(text) return False for collections in self.eval.canvas.axes.collections: # retrieve the x,y and gid of the collection # data = np.array(collections.get_offsets()) eval_data = np.append(eval_data, data) eval_data = np.reshape(eval_data, (-1,2)) self.plot_decision_regions(eval_data, self.knn, self.eval) self.compute_errors(self.eval)