#!/usr/bin/env python # # file: simple_neural_network_backprop.py # # description: # This script provides an educational demonstration of a simple Neural # Network trained from scratch using backpropagation. It generates a # synthetic non-linear dataset, trains a single-hidden-layer network, # and visualizes the raw data, the training loss curve, and the final # decision boundary. # # revision history: # 20260402 (AM): initial version #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_moons from sklearn.metrics import accuracy_score #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default values for the dataset, model, and plotting # DEF_N_SAMPLES = 300 DEF_NOISE = 0.15 DEF_RANDOM_SEED = 42 DEF_EPOCHS = 5000 DEF_LEARNING_RATE = 0.5 DEF_HIDDEN_NODES = 6 DEF_OUT_FILE_NAME = "neural_network_3panel.png" DEF_PLOT_TITLE = "Simple Neural Network (Backpropagation from Scratch)" #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # apply sigmoid activation # def sigmoid(z): """ method: sigmoid arguments: z: input array or scalar return: output: array or scalar with sigmoid applied description: Applies the sigmoid mathematical function to map values between 0 and 1. """ # calculate and return sigmoid # return 1.0 / (1.0 + np.exp(-z)) # calculate the derivative of the sigmoid function # def sigmoid_derivative(a): """ method: sigmoid_derivative arguments: a: output from the sigmoid function (post-activation) return: output: derivative of the sigmoid function description: Calculates the derivative of the sigmoid function for backpropagation. """ # calculate and return derivative # return a * (1.0 - a) # generate a synthetic 2D non-linear dataset # def generate_data(n_samples=DEF_N_SAMPLES, noise=DEF_NOISE, seed=DEF_RANDOM_SEED): """ method: generate_data arguments: n_samples: total number of points to generate noise: standard deviation of Gaussian noise added to the data seed: random seed for reproducibility return: X: feature matrix of shape (n_samples, 2) y_true: ground truth binary label array of shape (n_samples, 1) description: Generates a 2D dataset of interlocking half-moons to demonstrate how a neural network learns non-linear decision boundaries. """ # generate the moons dataset using sklearn # X, y_true = make_moons(n_samples=n_samples, noise=noise, random_state=seed) # reshape y_true to be a column vector for matrix math # y_true = y_true.reshape(-1, 1) # exit gracefully # return X, y_true # train the neural network using backpropagation # def train_neural_network(X, y, hidden_nodes=DEF_HIDDEN_NODES, epochs=DEF_EPOCHS, lr=DEF_LEARNING_RATE): """ method: train_neural_network arguments: X: input feature matrix (n_samples, n_features) y: target labels (n_samples, 1) hidden_nodes: number of neurons in the hidden layer epochs: number of training iterations lr: learning rate for weight updates return: weights: dictionary containing trained weights and biases loss_history: list of loss values per epoch description: Initializes weights and trains a 2-layer neural network (Input -> Hidden -> Output) using Gradient Descent and Backpropagation. """ np.random.seed(DEF_RANDOM_SEED) m = X.shape[0] # number of training examples n_x = X.shape[1] # number of input features n_y = y.shape[1] # number of output features # initialize weights and biases randomly # W1 = np.random.randn(n_x, hidden_nodes) * 0.1 b1 = np.zeros((1, hidden_nodes)) W2 = np.random.randn(hidden_nodes, n_y) * 0.1 b2 = np.zeros((1, n_y)) loss_history = [] # training loop over epochs # for epoch in range(epochs): # --- FORWARD PASS --- # Z1 = np.dot(X, W1) + b1 A1 = sigmoid(Z1) Z2 = np.dot(A1, W2) + b2 A2 = sigmoid(Z2) # calculate Binary Cross-Entropy loss # loss = -np.mean(y * np.log(A2 + 1e-8) + (1 - y) * np.log(1 - A2 + 1e-8)) loss_history.append(loss) # --- BACKWARD PASS (Backpropagation) --- # # derivative of loss with respect to output Z2 dZ2 = A2 - y # gradients for Output Layer dW2 = (1 / m) * np.dot(A1.T, dZ2) db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True) # derivative of loss with respect to hidden layer Z1 dZ1 = np.dot(dZ2, W2.T) * sigmoid_derivative(A1) # gradients for Hidden Layer dW1 = (1 / m) * np.dot(X.T, dZ1) db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True) # --- WEIGHT UPDATES --- # W1 = W1 - lr * dW1 b1 = b1 - lr * db1 W2 = W2 - lr * dW2 b2 = b2 - lr * db2 # store final weights in a dictionary # weights = {"W1": W1, "b1": b1, "W2": W2, "b2": b2} # exit gracefully # return weights, loss_history # predict classes using trained weights # def predict(X, weights): """ method: predict arguments: X: input feature matrix weights: dictionary of trained weights and biases return: predictions: binary class predictions (0 or 1) description: Performs a forward pass to generate final binary predictions. """ # unpack weights # W1, b1 = weights["W1"], weights["b1"] W2, b2 = weights["W2"], weights["b2"] # forward pass # Z1 = np.dot(X, W1) + b1 A1 = sigmoid(Z1) Z2 = np.dot(A1, W2) + b2 A2 = sigmoid(Z2) # convert probabilities to binary classes (threshold 0.5) # predictions = (A2 > 0.5).astype(int) # exit gracefully # return predictions # evaluate the model and plot the 3-panel visualization # def evaluate_and_plot_nn(X, y_true, weights, loss_history, outfile=DEF_OUT_FILE_NAME): """ method: evaluate_and_plot_nn arguments: X: feature matrix y_true: ground truth labels weights: trained network weights loss_history: list of loss values per epoch outfile: path to save the resulting image return: status: boolean indicating success description: Evaluates model accuracy and generates an educational 3-panel plot (Raw Data + Loss Curve + Decision Boundary). """ # --- METRICS --- # # generate predictions and calculate accuracy # y_pred = predict(X, weights) acc = accuracy_score(y_true, y_pred) print("\n" + "="*40) print(" NEURAL NETWORK EVALUATION METRICS ") print("="*40) print("Model: 2-Layer Perceptron (Numpy)") print(" Hidden Nodes : %d" % DEF_HIDDEN_NODES) print(" Epochs : %d" % DEF_EPOCHS) print(" Learning Rate: %.2f" % DEF_LEARNING_RATE) print(" Final Loss : %.4f" % loss_history[-1]) print(" Accuracy : %.2f%%" % (acc * 100)) print("-" * 40) # --- PLOTTING --- # # setup a 3-panel figure layout # fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5.5)) # flatten y_true for coloring plots # y_flat = y_true.ravel() # Subplot 1: Raw Data # ax1.scatter(X[:, 0], X[:, 1], c=y_flat, cmap='bwr', edgecolor='k', s=50, alpha=0.8) ax1.set_title("Raw Data (Moons)", fontsize=14) ax1.set_xlabel("Feature 1") ax1.set_ylabel("Feature 2") # Subplot 2: Training Loss Curve # ax2.plot(loss_history, color='purple', linewidth=2) ax2.set_title("Learning Curve (BCE Loss)", fontsize=14) ax2.set_xlabel("Epochs") ax2.set_ylabel("Loss") ax2.grid(True, linestyle='--', alpha=0.6) # Subplot 3: Decision Boundary # # create a mesh grid over the data range # x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) # predict on the mesh grid # mesh_points = np.c_[xx.ravel(), yy.ravel()] Z = predict(mesh_points, weights) Z = Z.reshape(xx.shape) # plot the contour and the original points # ax3.contourf(xx, yy, Z, cmap='bwr', alpha=0.3) ax3.scatter(X[:, 0], X[:, 1], c=y_flat, cmap='bwr', edgecolor='k', s=50, alpha=0.8) ax3.set_title("Learned Decision Boundary\nAccuracy: %.2f%%" % (acc * 100), fontsize=14) ax3.set_xlabel("Feature 1") ax3.set_ylabel("Feature 2") # add a main title to the figure # fig.suptitle(DEF_PLOT_TITLE, fontsize=16, y=0.98) # adjust layout # plt.tight_layout(rect=[0, 0, 1, 0.95]) # save the plot to disk # try: plt.savefig(outfile, dpi=150) print("\nSaved visualization to: %s" % outfile) except Exception as e: print("**> Error saving plot: %s" % str(e)) return False # display the plot to the user # plt.show() # exit gracefully # return True # function: main # def main(argv): print("--- Starting Neural Network Demonstration ---") # 1. Generate the dataset # print("Generating non-linear synthetic dataset...") X, y_true = generate_data() print(" Total samples : %d" % len(y_true)) # 2. Train the Neural Network # print("\nTraining Neural Network via Backpropagation...") weights, loss_history = train_neural_network(X, y_true) # 3. Evaluate and plot # print("Evaluating model and generating visualization...") status = evaluate_and_plot_nn(X, y_true, weights, loss_history) if not status: print("**> Process failed during evaluation/plotting.") return False print("--- Demonstration Complete ---") # exit gracefully # return True # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file