#!/usr/bin/env python # # file: svm.py # # revision history: # # 20260327 (SP): implement simple support vector machine #------------------------------------------------------------------------------ # import system modules # import os import sys import random import math # import third-party visualization and numerical libraries # import matplotlib.pyplot as plt import numpy as np #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default hyperparameters # DEF_EPOCHS = int(3000) DEF_NUM_SAMPLES = 100 DEF_LEARNING_RATE = 0.01 DEF_LAMBDA = 0.01 DEF_RANDOM_SEED = 27 DEF_OUTPUT_FILE = 'svm_boundary.png' DEF_TOLERANCE = 1e-3 # set the numpy random seed for reproducible data generation # np.random.seed(DEF_RANDOM_SEED) # generate class -1 (shifted to [3.0, 3.0] to close the gap) # class_minus1_x = np.random.rand(DEF_NUM_SAMPLES // 2, 2) * 4.0 + [3.0, 3.0] class_minus1_y = [-1 for _ in range(DEF_NUM_SAMPLES // 2)] # generate class +1 (shifted to [5.0, 5.0] to close the gap and create slight overlap) # class_plus1_x = np.random.rand(DEF_NUM_SAMPLES // 2, 2) * 4.0 + [5.0, 5.0] class_plus1_y = [1 for _ in range(DEF_NUM_SAMPLES // 2)] # combine and convert to standard python lists to preserve the pure-python training loop # DEF_DATA_X = class_minus1_x.tolist() + class_plus1_x.tolist() DEF_DATA_Y = class_minus1_y + class_plus1_y #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # calculate the dot product between two vectors # def dot_product(vector1, vector2): """ method: dot_product arguments: vector1: list of coordinates for the first vector vector2: list of coordinates for the second vector return: result: the scalar dot product of the two vectors description: Calculate the dot product of two n-dimensional vectors to measure their alignment. """ # verify that both vectors have the same number of dimensions # if len(vector1) != len(vector2): print("Error: vectors must have the same dimensionality") return 0.0 # multiply corresponding elements and sum them up # result = 0.0 for i in range(len(vector1)): result += vector1[i] * vector2[i] # exit gracefully # return result # train the support vector machine using stochastic gradient descent # def train_svm(X, y, epochs = DEF_EPOCHS, lr = DEF_LEARNING_RATE, lambda_param = DEF_LAMBDA, seed = DEF_RANDOM_SEED): """ method: train_svm arguments: X: list of lists representing the feature vectors y: list of class labels (-1 or 1) epochs: number of times to iterate over the dataset lr: learning rate for gradient descent lambda_param: regularization parameter controlling margin width seed: random seed for reproducibility return: w: the optimized weight vector b: the optimized bias term description: Main SVM training loop. Utilizes Stochastic Gradient Descent (SGD) to minimize the hinge loss and maximize the margin between classes. """ # set the random seed for reproducible shuffling # random.seed(int(seed)) # initialize weights to zero based on the number of features # num_features = len(X[0]) w = [0.0 for _ in range(num_features)] # initialize the bias term to zero # b = 0.0 # loop up to the maximum number of epochs # for epoch in range(epochs): # pair features and labels so we can shuffle them together # dataset = list(zip(X, y)) random.shuffle(dataset) # iterate through each individual sample in the shuffled dataset # for x_val, y_val in dataset: # calculate the decision function: w * x + b # decision_value = dot_product(w, x_val) + b # check if the sample violates the margin constraint # y * (w * x + b) >= 1 means it is classified correctly and outside the margin # if y_val * decision_value >= 1: # if correctly classified, only apply L2 regularization # penalty to weights # derivative of lambda * w_j^2 is 2 * lambda * w_j # for j in range(num_features): w[j] = w[j] - lr * (2 * lambda_param * w[j]) else: # if misclassified or inside the margin: # apply hinge-loss + L2 regularization gradient # L2 regularization 2*lambda*w_j # hinge-loss: -x_j*y for j in range(num_features): w[j] = w[j] - lr * (2 * lambda_param * w[j] - x_val[j] * y_val) # bias gradient is -y, so b := b - lr * (-y) # b = b - lr * (-y_val) # exit gracefully # return w, b # identify and count the support vectors based on the margin constraint # def get_support_vectors(X, y, w, b, tolerance = DEF_TOLERANCE): """ method: get_support_vectors arguments: X: list of lists representing the feature vectors y: list of class labels (-1 or 1) w: the optimized weight vector b: the optimized bias term tolerance: float allowance for gradient descent approximation return: support_vectors: list of points that define the margin description: Iterates through the dataset and identifies points that lie on or inside the margin boundaries (where y * (w*x + b) <= 1). """ # initialize an empty list to store the identified vectors # support_vectors = [] # iterate through every point in the dataset # for i in range(len(X)): # calculate the geometric decision value: w * x + b # decision_value = dot_product(w, X[i]) + b # evaluate the functional margin # if the value is <= 1 (plus a tiny tolerance), it is a support vector # if y[i] * decision_value <= 1.0 + tolerance: support_vectors.append(X[i]) # exit gracefully # return support_vectors # calculate the classification error rate of the trained model # def calculate_error_rate(X, y, w, b): """ method: calculate_error_rate arguments: X: list of lists representing the feature vectors y: list of true class labels (-1 or 1) w: the optimized weight vector b: the optimized bias term return: error_rate: float representing the percentage of incorrect predictions error_count: integer count of misclassified points description: Evaluates the model's accuracy by generating a prediction for each point and comparing it against the true label. Returns the overall error rate as a decimal. """ # initialize a counter for misclassified points # error_count = 0 total_points = len(y) # iterate through every point in the dataset # for i in range(total_points): # calculate the raw decision value # decision_value = dot_product(w, X[i]) + b # determine the predicted class based on the sign of the decision value # prediction = 1 if decision_value >= 0 else -1 # check if the prediction does not match the true label # if prediction != y[i]: error_count += 1 # calculate the error rate as a float # error_rate = float(error_count) / total_points # exit gracefully # return error_rate, error_count # render a high-fidelity plot of the data points and decision boundary # def plot_decision_boundary(X, y, w, b, support_vectors = None): """ method: plot_decision_boundary arguments: X: list of lists representing the feature vectors y: list of class labels (-1 or 1) w: the optimized weight vector b: the optimized bias term support_vectors: list of support vectors to highlight return: True: indicates successful execution description: Generates a graphical plot using matplotlib to display the dataset, the linear SVM decision boundary, and the margin lines. """ # globally set the base font size for all matplotlib elements # plt.rcParams.update({'font.size': 12}) # separate the dataset into class +1 and class -1 for colored plotting # class_1_x = [X[i][0] for i in range(len(X)) if y[i] == 1] class_1_y = [X[i][1] for i in range(len(X)) if y[i] == 1] class_minus1_x = [X[i][0] for i in range(len(X)) if y[i] == -1] class_minus1_y = [X[i][1] for i in range(len(X)) if y[i] == -1] # plot the data points using scatter # plt.scatter(class_1_x, class_1_y, color='blue', marker='+', label='Class +1', s=100) plt.scatter(class_minus1_x, class_minus1_y, color='red', marker='_', label='Class -1', s=100) # plot circles around the support vectors # if support_vectors: sv_x = [sv[0] for sv in support_vectors] sv_y = [sv[1] for sv in support_vectors] plt.scatter(sv_x, sv_y, s=50, facecolors='none', edgecolors='green', linewidths=1, label='Support Vectors') # determine the range for the x-axis with a buffer based on the data # x_min = min(p[0] for p in X) - 2.0 x_max = max(p[0] for p in X) + 2.0 x_vals = [x_min, x_max] # calculate the corresponding y values for the decision boundary and margins # equation: w0*x + w1*y + b = 0 => y = -(w0*x + b) / w1 # if w[1] != 0: # main decision boundary where wx + b = 0 # y_vals = [-(w[0] * x + b) / w[1] for x in x_vals] plt.plot(x_vals, y_vals, 'k-', linewidth=2, label='Decision Boundary') # positive margin where wx + b = 1 # margin_plus_y = [-(w[0] * x + b - 1) / w[1] for x in x_vals] plt.plot(x_vals, margin_plus_y, 'b--', alpha=0.6, label='+1 Margin') # negative margin where wx + b = -1 # margin_minus_y = [-(w[0] * x + b + 1) / w[1] for x in x_vals] plt.plot(x_vals, margin_minus_y, 'r--', alpha=0.6, label='-1 Margin') # configure the plot aesthetics and labels # plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.title('Support Vector Machine - Decision Boundary') plt.legend(loc='lower left', fontsize=11) plt.grid(True, linestyle=':', alpha=0.7) # ensure the axes are scaled equally so the orthogonal margin distances are visually accurate # plt.axis('equal') # save the final plot # plt.savefig(DEF_OUTPUT_FILE) # exit gracefully # return True # function: main # def main(argv): """ method: main arguments: argv: command line arguments return: True: indicates successful execution description: Main entry point that demonstrates training an SVM on a simple dataset and visualizing the resulting hyperplane using matplotlib. """ # define the initial dataset and parameters # X = DEF_DATA_X y = DEF_DATA_Y # print header # print("Starting Support Vector Machine demonstration...") print("Number of samples: %d" % len(X)) print("-" * 65) # run the svm training loop using gradient descent # print("Executing SVM Stochastic Gradient Descent...") w, b = train_svm(X, y) # identify and count the support vectors # svs = get_support_vectors(X, y, w, b) # calculate the error rate # error_rate, error_count = calculate_error_rate(X, y, w, b) accuracy = (1.0 - error_rate) * 100 # print the optimized parameters and metrics # print("Training Complete.") print("-" * 65) print("Optimized Weights (w): [%.2f, %.2f]" % (w[0], w[1])) print("Optimized Bias (b): %.2f" % b) print("Support Vectors : %d" % len(svs)) print("Misclassified Points : %d out of %d" % (error_count, len(X))) print("Model Accuracy : %.2f%%" % accuracy) print("-" * 65) # render the matplotlib plot # print("Launching Matplotlib Visualization...") plot_decision_boundary(X, y, w, b, support_vectors=svs) print("Plot Saved at %s. Execution finished." % DEF_OUTPUT_FILE) print("-" * 65) # exit gracefully # return True # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file