#!/usr/bin/env python # # file: Bayes Classification with Priors # # revision history: # # 20260120 (AA): initial version for Bayes classification assignment #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np import matplotlib.pyplot as plt from scipy.stats import multivariate_normal #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default values # DEF_N_POINTS = 10000 DEF_PRIOR_1 = 0.75 DEF_PRIOR_2 = 0.25 # Class 1 Parameters # DEF_MU1 = [0.0, 0.0] # Non-diagonal (rotated) # DEF_COV1 = [[3.0, 1.5], [1.5, 3.0]] # Class 2 Parameters # DEF_MU2 = [2.5, 2.5] # Same shape, shifted # DEF_COV2 = [[3.0, 1.5], [1.5, 3.0]] DEF_OUT_PLOT = "bayes_classification_results.png" #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # generate data for a single class # def generate_data(mu, cov, n_points, seed): np.random.seed(seed) return np.random.multivariate_normal(mu, cov, n_points) # classify a single sample using straight Bayes Rule # rule: choose class i if P(x|w_i) * P(w_i) is maximized # def classify_sample(x, rv1, rv2, p1, p2): # compute likelihoods (pdf values) # likelihood1 = rv1.pdf(x) likelihood2 = rv2.pdf(x) # compute posteriors (unnormalized) # posterior = likelihood * prior # post1 = likelihood1 * p1 post2 = likelihood2 * p2 # decision rule # if post1 > post2: return 1 else: return 2 # compute confusion matrix and accuracy # def compute_metrics(true_labels, pred_labels): # initialize confusion matrix # rows: Actual, cols: Predicted # [[TP (1->1), FN (1->2)], # [FP (2->1), TN (2->2)]] # cm = np.zeros((2, 2), dtype=int) # iterate and fill matrix # for t, p in zip(true_labels, pred_labels): # map class 1 to index 0, class 2 to index 1 r = t - 1 c = p - 1 cm[r, c] += 1 # compute accuracy # total = len(true_labels) correct = cm[0, 0] + cm[1, 1] accuracy = (float(correct) / total) * 100.0 error_rate = 100.0 - accuracy return cm, accuracy, error_rate # visualize the classification # def plot_results(X1, X2, pred_labels, outfile): # combine data for easier indexing # X = np.vstack((X1, X2)) plt.figure(figsize=(10, 8)) # scatter plot colored by PREDICTED class # Class 1 predictions in Blue, Class 2 in Red # # separate points based on prediction # idx_pred_1 = [i for i, label in enumerate(pred_labels) if label == 1] idx_pred_2 = [i for i, label in enumerate(pred_labels) if label == 2] # plot predicted class 1 # plt.scatter(X[idx_pred_1, 0], X[idx_pred_1, 1], c='blue', s=2, alpha=0.3, label='Pred: Class 1') # plot predicted class 2 # plt.scatter(X[idx_pred_2, 0], X[idx_pred_2, 1], c='red', s=2, alpha=0.3, label='Pred: Class 2') plt.title("Bayes Classification Results (P1=%.2f, P2=%.2f)" % (DEF_PRIOR_1, DEF_PRIOR_2)) plt.xlabel("x1") plt.ylabel("x2") plt.legend() plt.grid(True) plt.savefig(outfile) print("Visualization saved to: %s" % outfile) # function: main # def main(): print("Generating data...") print(" Class 1: N=%d, Prior=%.2f" % (DEF_N_POINTS, DEF_PRIOR_1)) print(" Class 2: N=%d, Prior=%.2f" % (DEF_N_POINTS, DEF_PRIOR_2)) # 1. Generate Data # X1 = generate_data(DEF_MU1, DEF_COV1, DEF_N_POINTS, seed=42) X2 = generate_data(DEF_MU2, DEF_COV2, DEF_N_POINTS, seed=99) # Create true labels # y1 = np.ones(DEF_N_POINTS, dtype=int) * 1 y2 = np.ones(DEF_N_POINTS, dtype=int) * 2 # Combine # X_all = np.vstack((X1, X2)) y_true = np.concatenate((y1, y2)) # 2. Setup Distributions for Classification # rv1 = multivariate_normal(DEF_MU1, DEF_COV1) rv2 = multivariate_normal(DEF_MU2, DEF_COV2) print("Classifying %d points..." % len(y_true)) pred_labels = [] # Iterate over each point # for i in range(len(X_all)): # classify # label = classify_sample(X_all[i], rv1, rv2, DEF_PRIOR_1, DEF_PRIOR_2) pred_labels.append(label) # 3. Compute Confusion Matrix and Error # cm, acc, err = compute_metrics(y_true, pred_labels) # Display Results # print("-" * 40) print("Results:") print("-" * 40) print("Confusion Matrix:") print(" Pred C1 Pred C2") print("Actual C1 %5d %5d" % (cm[0, 0], cm[0, 1])) print("Actual C2 %5d %5d" % (cm[1, 0], cm[1, 1])) print("-" * 40) print("Accuracy: %6.2f%%" % acc) print("Error Rate: %6.2f%%" % err) print("-" * 40) # Plot # plot_results(X1, X2, pred_labels, DEF_OUT_PLOT) return True if __name__ == '__main__': main()