#!/usr/bin/env python
#
# file: simple_neural_network_backprop.py
#
# description:
#  This script provides an educational demonstration of a simple Neural
#  Network trained from scratch using backpropagation. It generates a 
#  synthetic non-linear dataset, trains a single-hidden-layer network, 
#  and visualizes the raw data, the training loss curve, and the final 
#  decision boundary.
#
# revision history:
#  20260402 (AM): initial version
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define default values for the dataset, model, and plotting
#
DEF_N_SAMPLES     = 300
DEF_NOISE         = 0.15
DEF_RANDOM_SEED   = 42
DEF_EPOCHS        = 5000
DEF_LEARNING_RATE = 0.5
DEF_HIDDEN_NODES  = 6
DEF_OUT_FILE_NAME = "neural_network_3panel.png"
DEF_PLOT_TITLE    = "Simple Neural Network (Backpropagation from Scratch)"

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

# apply sigmoid activation
#
def sigmoid(z):
    """
    method: sigmoid
    
    arguments:
     z: input array or scalar
     
    return:
     output: array or scalar with sigmoid applied
     
    description:
     Applies the sigmoid mathematical function to map values between 0 and 1.
    """
    
    # calculate and return sigmoid
    #
    return 1.0 / (1.0 + np.exp(-z))

# calculate the derivative of the sigmoid function
#
def sigmoid_derivative(a):
    """
    method: sigmoid_derivative
    
    arguments:
     a: output from the sigmoid function (post-activation)
     
    return:
     output: derivative of the sigmoid function
     
    description:
     Calculates the derivative of the sigmoid function for backpropagation.
    """
    
    # calculate and return derivative
    #
    return a * (1.0 - a)

# generate a synthetic 2D non-linear dataset
#
def generate_data(n_samples=DEF_N_SAMPLES, noise=DEF_NOISE, seed=DEF_RANDOM_SEED):
    """
    method: generate_data
    
    arguments:
     n_samples: total number of points to generate
     noise: standard deviation of Gaussian noise added to the data
     seed: random seed for reproducibility
     
    return:
     X: feature matrix of shape (n_samples, 2)
     y_true: ground truth binary label array of shape (n_samples, 1)
     
    description:
     Generates a 2D dataset of interlocking half-moons to demonstrate 
     how a neural network learns non-linear decision boundaries.
    """
    
    # generate the moons dataset using sklearn
    #
    X, y_true = make_moons(n_samples=n_samples, noise=noise, random_state=seed)
    
    # reshape y_true to be a column vector for matrix math
    #
    y_true = y_true.reshape(-1, 1)
    
    # exit gracefully
    #
    return X, y_true

# train the neural network using backpropagation
#
def train_neural_network(X, y, hidden_nodes=DEF_HIDDEN_NODES, epochs=DEF_EPOCHS, lr=DEF_LEARNING_RATE):
    """
    method: train_neural_network
    
    arguments:
     X: input feature matrix (n_samples, n_features)
     y: target labels (n_samples, 1)
     hidden_nodes: number of neurons in the hidden layer
     epochs: number of training iterations
     lr: learning rate for weight updates
     
    return:
     weights: dictionary containing trained weights and biases
     loss_history: list of loss values per epoch
     
    description:
     Initializes weights and trains a 2-layer neural network (Input -> Hidden -> Output)
     using Gradient Descent and Backpropagation.
    """
    
    np.random.seed(DEF_RANDOM_SEED)
    m = X.shape[0]  # number of training examples
    n_x = X.shape[1] # number of input features
    n_y = y.shape[1] # number of output features
    
    # initialize weights and biases randomly
    #
    W1 = np.random.randn(n_x, hidden_nodes) * 0.1
    b1 = np.zeros((1, hidden_nodes))
    W2 = np.random.randn(hidden_nodes, n_y) * 0.1
    b2 = np.zeros((1, n_y))
    
    loss_history = []
    
    # training loop over epochs
    #
    for epoch in range(epochs):
        
        # --- FORWARD PASS ---
        #
        Z1 = np.dot(X, W1) + b1
        A1 = sigmoid(Z1)
        
        Z2 = np.dot(A1, W2) + b2
        A2 = sigmoid(Z2)
        
        # calculate Binary Cross-Entropy loss
        #
        loss = -np.mean(y * np.log(A2 + 1e-8) + (1 - y) * np.log(1 - A2 + 1e-8))
        loss_history.append(loss)
        
        # --- BACKWARD PASS (Backpropagation) ---
        #
        # derivative of loss with respect to output Z2
        dZ2 = A2 - y 
        
        # gradients for Output Layer
        dW2 = (1 / m) * np.dot(A1.T, dZ2)
        db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # derivative of loss with respect to hidden layer Z1
        dZ1 = np.dot(dZ2, W2.T) * sigmoid_derivative(A1)
        
        # gradients for Hidden Layer
        dW1 = (1 / m) * np.dot(X.T, dZ1)
        db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # --- WEIGHT UPDATES ---
        #
        W1 = W1 - lr * dW1
        b1 = b1 - lr * db1
        W2 = W2 - lr * dW2
        b2 = b2 - lr * db2
        
    # store final weights in a dictionary
    #
    weights = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    
    # exit gracefully
    #
    return weights, loss_history

# predict classes using trained weights
#
def predict(X, weights):
    """
    method: predict
    
    arguments:
     X: input feature matrix
     weights: dictionary of trained weights and biases
     
    return:
     predictions: binary class predictions (0 or 1)
     
    description:
     Performs a forward pass to generate final binary predictions.
    """
    
    # unpack weights
    #
    W1, b1 = weights["W1"], weights["b1"]
    W2, b2 = weights["W2"], weights["b2"]
    
    # forward pass
    #
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    
    # convert probabilities to binary classes (threshold 0.5)
    #
    predictions = (A2 > 0.5).astype(int)
    
    # exit gracefully
    #
    return predictions

# evaluate the model and plot the 3-panel visualization
#
def evaluate_and_plot_nn(X, y_true, weights, loss_history, outfile=DEF_OUT_FILE_NAME):
    """
    method: evaluate_and_plot_nn
    
    arguments:
     X: feature matrix
     y_true: ground truth labels
     weights: trained network weights
     loss_history: list of loss values per epoch
     outfile: path to save the resulting image
     
    return:
     status: boolean indicating success
     
    description:
     Evaluates model accuracy and generates an educational 3-panel plot 
     (Raw Data + Loss Curve + Decision Boundary).
    """
    
    # --- METRICS ---
    #
    # generate predictions and calculate accuracy
    #
    y_pred = predict(X, weights)
    acc = accuracy_score(y_true, y_pred)
    
    print("\n" + "="*40)
    print(" NEURAL NETWORK EVALUATION METRICS ")
    print("="*40)
    print("Model: 2-Layer Perceptron (Numpy)")
    print("  Hidden Nodes : %d" % DEF_HIDDEN_NODES)
    print("  Epochs       : %d" % DEF_EPOCHS)
    print("  Learning Rate: %.2f" % DEF_LEARNING_RATE)
    print("  Final Loss   : %.4f" % loss_history[-1])
    print("  Accuracy     : %.2f%%" % (acc * 100))
    print("-" * 40)
    
    # --- PLOTTING ---
    #
    # setup a 3-panel figure layout
    #
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5.5))
    
    # flatten y_true for coloring plots
    #
    y_flat = y_true.ravel()
    
    # Subplot 1: Raw Data
    #
    ax1.scatter(X[:, 0], X[:, 1], c=y_flat, cmap='bwr', 
                edgecolor='k', s=50, alpha=0.8)
    ax1.set_title("Raw Data (Moons)", fontsize=14)
    ax1.set_xlabel("Feature 1")
    ax1.set_ylabel("Feature 2")

    # Subplot 2: Training Loss Curve
    #
    ax2.plot(loss_history, color='purple', linewidth=2)
    ax2.set_title("Learning Curve (BCE Loss)", fontsize=14)
    ax2.set_xlabel("Epochs")
    ax2.set_ylabel("Loss")
    ax2.grid(True, linestyle='--', alpha=0.6)

    # Subplot 3: Decision Boundary
    # 
    # create a mesh grid over the data range
    #
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    # predict on the mesh grid
    #
    mesh_points = np.c_[xx.ravel(), yy.ravel()]
    Z = predict(mesh_points, weights)
    Z = Z.reshape(xx.shape)
    
    # plot the contour and the original points
    #
    ax3.contourf(xx, yy, Z, cmap='bwr', alpha=0.3)
    ax3.scatter(X[:, 0], X[:, 1], c=y_flat, cmap='bwr', 
                edgecolor='k', s=50, alpha=0.8)
    ax3.set_title("Learned Decision Boundary\nAccuracy: %.2f%%" % (acc * 100), fontsize=14)
    ax3.set_xlabel("Feature 1")
    ax3.set_ylabel("Feature 2")
    
    # add a main title to the figure
    #
    fig.suptitle(DEF_PLOT_TITLE, fontsize=16, y=0.98)
    
    # adjust layout
    #
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    
    # save the plot to disk
    #
    try:
        plt.savefig(outfile, dpi=150)
        print("\nSaved visualization to: %s" % outfile)
    except Exception as e:
        print("**> Error saving plot: %s" % str(e))
        return False
        
    # display the plot to the user
    #
    plt.show()
    
    # exit gracefully
    #
    return True

# function: main
#
def main(argv):
    
    print("--- Starting Neural Network Demonstration ---")
    
    # 1. Generate the dataset
    #
    print("Generating non-linear synthetic dataset...")
    X, y_true = generate_data()
    
    print("  Total samples : %d" % len(y_true))
    
    # 2. Train the Neural Network
    #
    print("\nTraining Neural Network via Backpropagation...")
    weights, loss_history = train_neural_network(X, y_true)
    
    # 3. Evaluate and plot
    #
    print("Evaluating model and generating visualization...")
    status = evaluate_and_plot_nn(X, y_true, weights, loss_history)
    
    if not status:
        print("**> Process failed during evaluation/plotting.")
        return False
        
    print("--- Demonstration Complete ---")
    
    # exit gracefully
    #
    return True

# begin gracefully
#
if __name__ == '__main__':
    main(sys.argv[0:])

#
# end of file