#!/usr/bin/env python
#
# file: svm.py
#
# revision history:
#
# 20260327 (SP): implement simple support vector machine
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import random
import math

# import third-party visualization and numerical libraries
#
import matplotlib.pyplot as plt
import numpy as np

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define default hyperparameters
#
DEF_EPOCHS         = int(3000)
DEF_NUM_SAMPLES    = 100
DEF_LEARNING_RATE  = 0.01
DEF_LAMBDA         = 0.01
DEF_RANDOM_SEED    = 27
DEF_OUTPUT_FILE    = 'svm_boundary.png'
DEF_TOLERANCE      = 1e-3


# set the numpy random seed for reproducible data generation
#
np.random.seed(DEF_RANDOM_SEED)

# generate class -1 (shifted to [3.0, 3.0] to close the gap)
#
class_minus1_x = np.random.rand(DEF_NUM_SAMPLES // 2, 2) * 4.0 + [3.0, 3.0]
class_minus1_y = [-1 for _ in range(DEF_NUM_SAMPLES // 2)]

# generate class +1 (shifted to [5.0, 5.0] to close the gap and create slight overlap)
#
class_plus1_x = np.random.rand(DEF_NUM_SAMPLES // 2, 2) * 4.0 + [5.0, 5.0]
class_plus1_y = [1 for _ in range(DEF_NUM_SAMPLES // 2)]

# combine and convert to standard python lists to preserve the pure-python training loop
#
DEF_DATA_X = class_minus1_x.tolist() + class_plus1_x.tolist()
DEF_DATA_Y = class_minus1_y + class_plus1_y

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

# calculate the dot product between two vectors
#
def dot_product(vector1, vector2):
    """
    method: dot_product
    arguments:
        vector1: list of coordinates for the first vector
        vector2: list of coordinates for the second vector
    return:
        result: the scalar dot product of the two vectors
    description:
        Calculate the dot product of two n-dimensional vectors to measure
        their alignment.
    """

    # verify that both vectors have the same number of dimensions
    #
    if len(vector1) != len(vector2):
        print("Error: vectors must have the same dimensionality")
        return 0.0

    # multiply corresponding elements and sum them up
    #
    result = 0.0
    for i in range(len(vector1)):
        result += vector1[i] * vector2[i]
    
    # exit gracefully
    #
    return result

# train the support vector machine using stochastic gradient descent
#
def train_svm(X, y, epochs = DEF_EPOCHS, lr = DEF_LEARNING_RATE, lambda_param = DEF_LAMBDA, seed = DEF_RANDOM_SEED):
    """
    method: train_svm
    arguments:
        X: list of lists representing the feature vectors
        y: list of class labels (-1 or 1)
        epochs: number of times to iterate over the dataset
        lr: learning rate for gradient descent
        lambda_param: regularization parameter controlling margin width
        seed: random seed for reproducibility
    return:
        w: the optimized weight vector
        b: the optimized bias term
    description:
        Main SVM training loop. Utilizes Stochastic Gradient Descent (SGD) 
        to minimize the hinge loss and maximize the margin between classes.
    """

    # set the random seed for reproducible shuffling
    #
    random.seed(int(seed))

    # initialize weights to zero based on the number of features
    #
    num_features = len(X[0])
    w = [0.0 for _ in range(num_features)]
    
    # initialize the bias term to zero
    #
    b = 0.0

    # loop up to the maximum number of epochs
    #
    for epoch in range(epochs):
        
        # pair features and labels so we can shuffle them together
        #
        dataset = list(zip(X, y))
        random.shuffle(dataset)
        
        # iterate through each individual sample in the shuffled dataset
        #
        for x_val, y_val in dataset:
            
            # calculate the decision function: w * x + b
            #
            decision_value = dot_product(w, x_val) + b
            
            # check if the sample violates the margin constraint
            # y * (w * x + b) >= 1 means it is classified correctly and outside the margin
            #
            if y_val * decision_value >= 1:
                
                # if correctly classified, only apply L2 regularization
                # penalty to weights
                # derivative of lambda * w_j^2 is 2 * lambda * w_j
                #
                for j in range(num_features):
                    w[j] = w[j] - lr * (2 * lambda_param * w[j])
                    
            else:
                
                # if misclassified or inside the margin:
                # apply hinge-loss + L2 regularization gradient
                # L2 regularization 2*lambda*w_j
                # hinge-loss: -x_j*y
                for j in range(num_features):
                    w[j] = w[j] - lr * (2 * lambda_param * w[j] - x_val[j] * y_val)
                
                # bias gradient is -y, so b := b - lr * (-y)
                #
                b = b - lr * (-y_val)

    # exit gracefully
    #
    return w, b

# identify and count the support vectors based on the margin constraint
#
def get_support_vectors(X, y, w, b, tolerance = DEF_TOLERANCE):
    """
    method: get_support_vectors
    arguments:
        X: list of lists representing the feature vectors
        y: list of class labels (-1 or 1)
        w: the optimized weight vector
        b: the optimized bias term
        tolerance: float allowance for gradient descent approximation
    return:
        support_vectors: list of points that define the margin
    description:
        Iterates through the dataset and identifies points that lie on 
        or inside the margin boundaries (where y * (w*x + b) <= 1).
    """

    # initialize an empty list to store the identified vectors
    #
    support_vectors = []

    # iterate through every point in the dataset
    #
    for i in range(len(X)):
        
        # calculate the geometric decision value: w * x + b
        #
        decision_value = dot_product(w, X[i]) + b
        
        # evaluate the functional margin
        # if the value is <= 1 (plus a tiny tolerance), it is a support vector
        #
        if y[i] * decision_value <= 1.0 + tolerance:
            support_vectors.append(X[i])

    # exit gracefully
    #
    return support_vectors

# calculate the classification error rate of the trained model
#
def calculate_error_rate(X, y, w, b):
    """
    method: calculate_error_rate
    arguments:
        X: list of lists representing the feature vectors
        y: list of true class labels (-1 or 1)
        w: the optimized weight vector
        b: the optimized bias term
    return:
        error_rate: float representing the percentage of incorrect predictions
        error_count: integer count of misclassified points
    description:
        Evaluates the model's accuracy by generating a prediction for each 
        point and comparing it against the true label. Returns the overall 
        error rate as a decimal.
    """

    # initialize a counter for misclassified points
    #
    error_count = 0
    total_points = len(y)

    # iterate through every point in the dataset
    #
    for i in range(total_points):
        
        # calculate the raw decision value
        #
        decision_value = dot_product(w, X[i]) + b
        
        # determine the predicted class based on the sign of the decision value
        #
        prediction = 1 if decision_value >= 0 else -1
        
        # check if the prediction does not match the true label
        #
        if prediction != y[i]:
            error_count += 1

    # calculate the error rate as a float
    #
    error_rate = float(error_count) / total_points

    # exit gracefully
    #
    return error_rate, error_count

# render a high-fidelity plot of the data points and decision boundary
#
def plot_decision_boundary(X, y, w, b, support_vectors = None):
    """
    method: plot_decision_boundary
    arguments:
        X: list of lists representing the feature vectors
        y: list of class labels (-1 or 1)
        w: the optimized weight vector
        b: the optimized bias term
        support_vectors: list of support vectors to highlight
    return:
        True: indicates successful execution
    description:
        Generates a graphical plot using matplotlib to display the dataset, 
        the linear SVM decision boundary, and the margin lines.
    """
    # globally set the base font size for all matplotlib elements
    #
    plt.rcParams.update({'font.size': 12})

    # separate the dataset into class +1 and class -1 for colored plotting
    #
    class_1_x = [X[i][0] for i in range(len(X)) if y[i] == 1]
    class_1_y = [X[i][1] for i in range(len(X)) if y[i] == 1]
    
    class_minus1_x = [X[i][0] for i in range(len(X)) if y[i] == -1]
    class_minus1_y = [X[i][1] for i in range(len(X)) if y[i] == -1]

    # plot the data points using scatter
    #
    plt.scatter(class_1_x, class_1_y, color='blue', marker='+', label='Class +1', s=100)
    plt.scatter(class_minus1_x, class_minus1_y, color='red', marker='_', label='Class -1', s=100)
    
    # plot circles around the support vectors
    #
    if support_vectors:
        sv_x = [sv[0] for sv in support_vectors]
        sv_y = [sv[1] for sv in support_vectors]
        plt.scatter(sv_x, sv_y, s=50, facecolors='none', edgecolors='green', linewidths=1, label='Support Vectors')

    # determine the range for the x-axis with a buffer based on the data
    #
    x_min = min(p[0] for p in X) - 2.0
    x_max = max(p[0] for p in X) + 2.0
    x_vals = [x_min, x_max]

    # calculate the corresponding y values for the decision boundary and margins
    # equation: w0*x + w1*y + b = 0  =>  y = -(w0*x + b) / w1
    #
    if w[1] != 0:
        
        # main decision boundary where wx + b = 0
        #
        y_vals = [-(w[0] * x + b) / w[1] for x in x_vals]
        plt.plot(x_vals, y_vals, 'k-', linewidth=2, label='Decision Boundary')
        
        # positive margin where wx + b = 1
        #
        margin_plus_y = [-(w[0] * x + b - 1) / w[1] for x in x_vals]
        plt.plot(x_vals, margin_plus_y, 'b--', alpha=0.6, label='+1 Margin')
        
        # negative margin where wx + b = -1
        #
        margin_minus_y = [-(w[0] * x + b + 1) / w[1] for x in x_vals]
        plt.plot(x_vals, margin_minus_y, 'r--', alpha=0.6, label='-1 Margin')

    # configure the plot aesthetics and labels
    #
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('Support Vector Machine - Decision Boundary')
    plt.legend(loc='lower left', fontsize=11)
    plt.grid(True, linestyle=':', alpha=0.7)
    
    # ensure the axes are scaled equally so the orthogonal margin distances are visually accurate
    #
    plt.axis('equal')

    # save the final plot 
    #
    plt.savefig(DEF_OUTPUT_FILE)

    # exit gracefully
    #
    return True

# function: main
#
def main(argv):
    """
    method: main
    arguments:
        argv: command line arguments
    return:
        True: indicates successful execution
    description:
        Main entry point that demonstrates training an SVM on a simple 
        dataset and visualizing the resulting hyperplane using matplotlib.
    """

    # define the initial dataset and parameters
    #
    X = DEF_DATA_X
    y = DEF_DATA_Y

    # print header
    #
    print("Starting Support Vector Machine demonstration...")
    print("Number of samples: %d" % len(X))
    print("-" * 65)

    # run the svm training loop using gradient descent
    #
    print("Executing SVM Stochastic Gradient Descent...")
    w, b = train_svm(X, y)
    
    # identify and count the support vectors
    #
    svs = get_support_vectors(X, y, w, b)

    # calculate the error rate
    #
    error_rate, error_count = calculate_error_rate(X, y, w, b)
    accuracy = (1.0 - error_rate) * 100

    # print the optimized parameters and metrics
    #
    print("Training Complete.")
    print("-" * 65)
    print("Optimized Weights (w): [%.2f, %.2f]" % (w[0], w[1]))
    print("Optimized Bias    (b): %.2f" % b)
    print("Support Vectors      : %d" % len(svs))
    print("Misclassified Points : %d out of %d" % (error_count, len(X)))
    print("Model Accuracy       : %.2f%%" % accuracy)
    print("-" * 65)

    # render the matplotlib plot
    #
    print("Launching Matplotlib Visualization...")
    plot_decision_boundary(X, y, w, b, support_vectors=svs)
    print("Plot Saved at %s. Execution finished." % DEF_OUTPUT_FILE)
    print("-" * 65)

    # exit gracefully
    #
    return True

# begin gracefully
#
if __name__ == '__main__':
    main(sys.argv[0:])

#
# end of file