#!/usr/bin/env python # # file: regression_trio_demo.py # # description: # This script provides an educational demonstration of three fundamental # regression techniques: Linear Regression, Polynomial Regression, and # Binary Logistic Regression. It generates appropriate synthetic data for # each, fits the models, and visualizes the results side-by-side. # # revision history: # 20260402 (AM): initial version #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.preprocessing import PolynomialFeatures #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default values for the dataset, model, and plotting # DEF_N_SAMPLES = 100 DEF_RANDOM_SEED = 42 DEF_POLY_DEGREE = 2 DEF_OUT_FILE_NAME = "regression_trio_3panel.png" DEF_PLOT_TITLE = "Fundamental Regression Models Demonstration" #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # generate data for simple linear regression # def generate_linear_data(n_samples=DEF_N_SAMPLES, seed=DEF_RANDOM_SEED): """ method: generate_linear_data arguments: n_samples: total number of points to generate seed: random seed for reproducibility return: X: feature matrix of shape (n_samples, 1) y: target array of shape (n_samples,) description: Generates a 1D dataset with a linear relationship and Gaussian noise. """ np.random.seed(seed) X = 10 * np.random.rand(n_samples, 1) # y = 2.5x + 1.0 + noise y = 2.5 * X.squeeze() + 1.0 + np.random.randn(n_samples) * 2.5 # exit gracefully # return X, y # generate data for polynomial regression # def generate_polynomial_data(n_samples=DEF_N_SAMPLES, seed=DEF_RANDOM_SEED): """ method: generate_polynomial_data arguments: n_samples: total number of points to generate seed: random seed for reproducibility return: X: feature matrix of shape (n_samples, 1) y: target array of shape (n_samples,) description: Generates a 1D dataset with a quadratic (non-linear) relationship and Gaussian noise. """ np.random.seed(seed + 1) X = 10 * np.random.rand(n_samples, 1) - 5 # y = 0.8x^2 + 1.5x - 2.0 + noise y = 0.8 * (X.squeeze() ** 2) + 1.5 * X.squeeze() - 2.0 + np.random.randn(n_samples) * 4.0 # exit gracefully # return X, y # generate data for binary logistic regression # def generate_logistic_data(n_samples=DEF_N_SAMPLES, seed=DEF_RANDOM_SEED): """ method: generate_logistic_data arguments: n_samples: total number of points to generate seed: random seed for reproducibility return: X: feature matrix of shape (n_samples, 1) y: binary target array of shape (n_samples,) description: Generates a 1D feature that maps to a binary outcome (0 or 1) based on a logistic probability curve. """ np.random.seed(seed + 2) X = 10 * np.random.rand(n_samples, 1) - 5 # calculate probability using a sigmoid function # z = 1.2 * X.squeeze() + 0.5 prob = 1.0 / (1.0 + np.exp(-z)) # assign binary classes based on probability and random chance # y = (np.random.rand(n_samples) < prob).astype(int) # exit gracefully # return X, y # evaluate models and plot the 3-panel visualization # def evaluate_and_plot(outfile=DEF_OUT_FILE_NAME): """ method: evaluate_and_plot arguments: outfile: path to save the resulting image return: status: boolean indicating success description: Generates datasets, fits all three regression models, and produces a side-by-side 3-panel plot of the results. """ # setup a 3-panel figure layout # fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5.5)) # --- PANEL 1: LINEAR REGRESSION --- # X_lin, y_lin = generate_linear_data() lin_model = LinearRegression() lin_model.fit(X_lin, y_lin) X_lin_plot = np.linspace(X_lin.min(), X_lin.max(), 100).reshape(-1, 1) y_lin_plot = lin_model.predict(X_lin_plot) ax1.scatter(X_lin, y_lin, color='skyblue', edgecolor='k', alpha=0.8, label='Raw Data') ax1.plot(X_lin_plot, y_lin_plot, color='red', linewidth=3, label='Linear Fit') r2_lin = lin_model.score(X_lin, y_lin) ax1.set_title("Linear Regression\n$R^2$: %.2f" % r2_lin, fontsize=14) ax1.set_xlabel("Feature X") ax1.set_ylabel("Target y") ax1.legend() ax1.grid(True, linestyle='--', alpha=0.5) # --- PANEL 2: POLYNOMIAL REGRESSION --- # X_poly, y_poly = generate_polynomial_data() # transform features to polynomial space (degree=2) # poly_features = PolynomialFeatures(degree=DEF_POLY_DEGREE) X_poly_transformed = poly_features.fit_transform(X_poly) poly_model = LinearRegression() poly_model.fit(X_poly_transformed, y_poly) X_poly_plot = np.linspace(X_poly.min(), X_poly.max(), 100).reshape(-1, 1) X_poly_plot_transformed = poly_features.transform(X_poly_plot) y_poly_plot = poly_model.predict(X_poly_plot_transformed) ax2.scatter(X_poly, y_poly, color='lightgreen', edgecolor='k', alpha=0.8, label='Raw Data') ax2.plot(X_poly_plot, y_poly_plot, color='purple', linewidth=3, label='Polynomial Fit (d=2)') r2_poly = poly_model.score(X_poly_transformed, y_poly) ax2.set_title("Polynomial Regression\n$R^2$: %.2f" % r2_poly, fontsize=14) ax2.set_xlabel("Feature X") ax2.set_ylabel("Target y") ax2.legend() ax2.grid(True, linestyle='--', alpha=0.5) # --- PANEL 3: BINARY LOGISTIC REGRESSION --- # X_log, y_log = generate_logistic_data() log_model = LogisticRegression(solver='lbfgs') log_model.fit(X_log, y_log) X_log_plot = np.linspace(X_log.min(), X_log.max(), 100).reshape(-1, 1) # get probability of class 1 # y_log_prob = log_model.predict_proba(X_log_plot)[:, 1] ax3.scatter(X_log, y_log, color='salmon', edgecolor='k', alpha=0.8, label='Binary Classes (0 or 1)') ax3.plot(X_log_plot, y_log_prob, color='blue', linewidth=3, label='Logistic S-Curve') ax3.axhline(0.5, color='gray', linestyle='--', alpha=0.7, label='Decision Boundary (0.5)') acc_log = log_model.score(X_log, y_log) ax3.set_title("Logistic Regression\nAccuracy: %.2f%%" % (acc_log * 100), fontsize=14) ax3.set_xlabel("Feature X") ax3.set_ylabel("Probability of Class 1") ax3.legend() ax3.grid(True, linestyle='--', alpha=0.5) # add a main title to the figure # fig.suptitle(DEF_PLOT_TITLE, fontsize=16, y=0.98) # adjust layout # plt.tight_layout(rect=[0, 0, 1, 0.95]) # save the plot to disk # try: plt.savefig(outfile, dpi=150) print("\nSaved visualization to: %s" % outfile) except Exception as e: print("**> Error saving plot: %s" % str(e)) return False # display the plot to the user # plt.show() # exit gracefully # return True # function: main # def main(argv): print("--- Starting Regression Trio Demonstration ---") print("Generating datasets and fitting models...") status = evaluate_and_plot() if not status: print("**> Process failed during plotting.") return False print("--- Demonstration Complete ---") # exit gracefully # return True # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file