#!/usr/bin/env python
#
# file: Bayes Classification with Log Probabilities
#
# revision history:
#
# 20260123 (SP): initial version for Bayes classification assignment
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define default values
#
DEF_CLASS_1       = 1
DEF_CLASS_2       = 2
DEF_N_POINTS      = 10000
DEF_PRIOR_1       = 0.75
DEF_PRIOR_2       = 0.25

# Class 1 Parameters
#
DEF_MU1           = [0.0, 0.0]
# Non-diagonal (rotated)
#
DEF_COV1          = [[3.0, 1.5],   
                     [1.5, 3.0]]

# Class 2 Parameters
#
DEF_MU2           = [2.5, 2.5]

# Same shape, unequal variance
#
DEF_COV2          = [[2.0, 0.1],
                     [0.2, 4.0]]


DEF_OUT_PLOT      = "bayes_clf_log_probs.png"

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

# generate data for a single class
#
def generate_data(mu, cov, n_points, seed):
    """method: generate_data
    arguments:
        mu: mean vector (1D array-like)
        cov: covariance matrix (2D array-like)
        n_points: number of data points to generate (int)
        seed: random seed for reproducibility (int)
    return:
        data: generated data points (2D numpy array)
    description:
        Generate synthetic data points from a multivariate normal distribution.
    """
    np.random.seed(seed)
    return np.random.multivariate_normal(mu, cov, n_points)

def discriminant_log_gaussian(x, mu, cov, prior):
    """
    method: discriminant_log_gaussian
    arguments:
        x: data point (1D array-like)
        mu: mean vector (1D array-like)
        cov: covariance matrix (2D array-like)
        prior: prior probability of the class (float)
    return:
        log_discriminant: log discriminant value (float)
    description:
        Calculate the log discriminant function for a Gaussian distribution with a given prior.
    """
    
    # convert inputs to numpy arrays
    #
    x = np.asarray(x)
    mu = np.asarray(mu)
    cov = np.asarray(cov)
    
    # prior should not be zero or negative because log(0) is undefined
    #
    if prior <= 0.0:
        raise ValueError("Prior must be > 0 to compute log(prior).")
    
    # get dimension of the mean vector
    #
    d = mu.size
    
    # calculate the point difference from the mean
    #
    xc = x - mu
    
    # compute the inverse covariance matrix because we need it for the
    # quadratic form
    #
    inv_cov = np.linalg.inv(cov)
    
    # compute the quadratic form
    #
    quad = float(xc.T @ inv_cov @ xc)
    
    # compute the sign and (natural) logarithm of the determinant of an array
    #
    sign, logdet = np.linalg.slogdet(cov)
    
    # ensure covariance is positive definite, otherwise logdet is undefined
    #
    if sign <= 0:
        raise ValueError("Covariance must be positive definite (det > 0).")
    
    # compute the log likelihood
    #
    log_likelihood = -0.5 * (quad + logdet + d * np.log(2.0 * np.pi))
    
    # compute the log discriminant using log probabilities using 
    # 𝒈_𝒊(𝒙)=𝒍𝒏(𝒑(𝒙│𝝎_𝒊))+𝒍𝒏(𝑷(𝝎_𝒊))
    #
    g = log_likelihood + np.log(prior)
    
    # return the log discriminant value
    #
    return g


# classify a single sample using straight log probabilities
# rule: choose class i if 𝒍𝒏(𝒑(𝒙│𝝎_𝒊))+𝒍𝒏(𝑷(𝝎_𝒊) is maximized
#
def classify_sample(x, mu1, cov1, mu2, cov2, p1, p2):
    
    """method: classify_sample  
    arguments:
        x: data point to classify (1D array-like)
        mu1: mean vector for class 1 (1D array-like)
        cov1: covariance matrix for class 1 (2D array-like)
        mu2: mean vector for class 2 (1D array-like)
        cov2: covariance matrix for class 2 (2D array-like)
        p1: prior probability (𝝎_1) for class 1 (float)
        p2: prior probability (𝝎_2) for class 2 (float)
    return:
        class_label: predicted class label (int)
    description:
        Classify a data point using Bayes classification with log probabilities.
    """
    g1 = discriminant_log_gaussian(x, mu1, cov1, p1)
    g2 = discriminant_log_gaussian(x, mu2, cov2, p2)

    # decision rule
    #
    if g1 > g2:
        return DEF_CLASS_1
    else:
        return DEF_CLASS_2

# compute confusion matrix and accuracy
#
def compute_metrics(true_labels, pred_labels):
    
    # initialize confusion matrix
    # rows: Actual, cols: Predicted
    # [[TP (1->1), FN (1->2)],
    #  [FP (2->1), TN (2->2)]]
    #
    cm = np.zeros((2, 2), dtype=int)
    
    # iterate and fill matrix
    #
    for t, p in zip(true_labels, pred_labels):
        # map class 1 to index 0, class 2 to index 1
        r = t - DEF_CLASS_1
        c = p - DEF_CLASS_1
        cm[r, c] += 1
        
    # compute accuracy
    #
    total = len(true_labels)
    correct = cm[0, 0] + cm[1, 1]
    accuracy = (float(correct) / total) * 100.0
    error_rate = 100.0 - accuracy
    
    return cm, accuracy, error_rate

# visualize the classification
#
def plot_results(X1, X2, pred_labels, outfile):
    
    # combine data for easier indexing
    #
    X = np.vstack((X1, X2))
    
    plt.figure(figsize=(10, 8))
    
    # scatter plot colored by PREDICTED class
    # Class 1 predictions in Blue, Class 2 in Red
    #
    
    # separate points based on prediction
    #
    idx_pred_1 = [i for i, label in enumerate(pred_labels) if label == DEF_CLASS_1]
    idx_pred_2 = [i for i, label in enumerate(pred_labels) if label == DEF_CLASS_2]
    
    # plot predicted class 1
    #
    plt.scatter(X[idx_pred_1, 0], X[idx_pred_1, 1], 
                c='blue', s=2, alpha=0.3, label=f'Pred: Class {DEF_CLASS_1}')
    
    # plot predicted class 2
    #
    plt.scatter(X[idx_pred_2, 0], X[idx_pred_2, 1], 
                c='red', s=2, alpha=0.3, label=f'Pred: Class {DEF_CLASS_2}')

    plt.title("Log Discriminants Results (P1=%.2f, P2=%.2f)" % 
              (DEF_PRIOR_1, DEF_PRIOR_2))
    plt.xlabel("x1")
    plt.ylabel("x2")
    plt.legend()
    plt.grid(True)
    
    plt.savefig(outfile)
    print("Visualization saved to: %s" % outfile)

# function: main
#
def main():
    
    print("Generating data...")
    print("  Class 1: N=%d, Prior=%.2f" % (DEF_N_POINTS, DEF_PRIOR_1))
    print("  Class 2: N=%d, Prior=%.2f" % (DEF_N_POINTS, DEF_PRIOR_2))

    # 1. Generate Data
    #
    X1 = generate_data(DEF_MU1, DEF_COV1, DEF_N_POINTS, seed=42)
    X2 = generate_data(DEF_MU2, DEF_COV2, DEF_N_POINTS, seed=99)
    
    # Create true labels
    #
    y1 = np.ones(DEF_N_POINTS, dtype=int) * DEF_CLASS_1
    y2 = np.ones(DEF_N_POINTS, dtype=int) * DEF_CLASS_2
    
    # Combine
    #
    X_all = np.vstack((X1, X2))
    y_true = np.concatenate((y1, y2))
    
    # 2. Setup Distributions for Classification
    #
    rv1 = multivariate_normal(DEF_MU1, DEF_COV1)
    rv2 = multivariate_normal(DEF_MU2, DEF_COV2)
    
    print("Classifying %d points..." % len(y_true))
    
    pred_labels = []

    # Iterate over each point
    #
    for i in range(len(X_all)):
        
        # classify
        #
        label = classify_sample(X_all[i], DEF_MU1, DEF_COV1, DEF_MU2, DEF_COV2, DEF_PRIOR_1, DEF_PRIOR_2)
        pred_labels.append(label)

    # 3. Compute Confusion Matrix and Error
    #
    cm, acc, err = compute_metrics(y_true, pred_labels)
    
    # Display Results
    #
    print("-" * 40)
    print("Results:")
    print("-" * 40)
    print("Confusion Matrix:")
    print("                Pred C1   Pred C2")
    print("Actual C1       %5d     %5d" % (cm[0, 0], cm[0, 1]))
    print("Actual C2       %5d     %5d" % (cm[1, 0], cm[1, 1]))
    print("-" * 40)
    print("Accuracy:   %6.2f%%" % acc)
    print("Error Rate: %6.2f%%" % err)
    print("-" * 40)

    # Plot
    #
    plot_results(X1, X2, pred_labels, DEF_OUT_PLOT)

    return True

if __name__ == '__main__':
    main()