#!/usr/bin/env python # # file: Gaussian Likelihood Demo with Log Probabilities # # revision history: # # 20260129 (SP): initial version for likelihood/log-likelihood demo #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np import matplotlib.pyplot as plt #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # define default values # DEF_SEED = 22 DEF_N_POINTS = 10 DEF_X_MIN = 0.0 DEF_X_MAX = 10.0 # gaussian model parameters (known variance) # DEF_VAR = 1.0 DEF_THETA_MIN = 0.0 DEF_THETA_MAX = 10.0 # grid size for theta in [0,10] # DEF_N_THETA = 1001 # output plot # DEF_OUT_PLOT = "gaussian_likelihood_demo.png" #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ def generate_uniform_data(n_points, x_min, x_max, seed): """method: generate_uniform_data arguments: n_points: number of data points to generate (int) x_min: minimum value of uniform range (float) x_max: maximum value of uniform range (float) seed: random seed for reproducibility (int) return: data: generated data points (1D numpy array) description: Generate n_points uniformly at random in the range [x_min, x_max]. """ np.random.seed(seed) data = np.random.uniform(low=x_min, high=x_max, size=n_points) return data def gaussian_pdf(x, mu, var): """method: gaussian_pdf arguments: x: value(s) where the pdf is evaluated (float or numpy array) mu: mean(s) of the Gaussian (float or numpy array) var: variance of the Gaussian (float) return: pdf: Gaussian pdf evaluated at x (numpy array) description: Compute the Gaussian probability density function: N(x; mu, var) = 1/sqrt(2*pi*var) * exp(-(x-mu)^2/(2*var)) Works with broadcasting when x and/or mu are arrays. """ x = np.asarray(x) mu = np.asarray(mu) # guard against non-positive variance # if var <= 0.0: raise ValueError("Variance must be > 0.") # compute Gaussian pdf # coef = 1.0 / np.sqrt(2.0 * np.pi * var) expo = -((x - mu) ** 2) / (2.0 * var) pdf = coef * np.exp(expo) # exit gracefully # return pdf def log_likelihood_gaussian(data, theta_values, var): """method: log_likelihood_gaussian arguments: data: observed samples (1D numpy array) theta_values: candidate mean values for theta (1D numpy array) var: known variance (float) return: logL: log-likelihood values for each theta (1D numpy array) description: Compute the log-likelihood for i.i.d. Gaussian data: log p(D | theta) = sum_i log N(x_i; theta, var) where theta is the mean parameter and var is known. """ data = np.asarray(data) theta_values = np.asarray(theta_values) # guard against empty input # if data.size == 0: raise ValueError("Data array must not be empty.") # constant term: -0.5 * log(2*pi*var) # c0 = -0.5 * np.log(2.0 * np.pi * var) # compute log-likelihood across the theta grid # logL(theta) = sum_i [ c0 - (x_i - theta)^2 / (2*var) ] # diffs = data[:, None] - theta_values[None, :] logL = np.sum(c0 - (diffs ** 2) / (2.0 * var), axis=0) # exit gracefully # return logL def normalize_from_log(log_values): """method: normalize_from_log arguments: log_values: log-domain values (1D numpy array) return: values: normalized values exp(log_values - max(log_values)) (1D numpy array) description: Convert log-values to a numerically stable, peak-normalized linear scale. This avoids underflow by subtracting the maximum log value first. """ # convert to numpy array # log_values = np.asarray(log_values) # get maximum log value # m = np.max(log_values) # compute normalized values # values = np.exp(log_values - m) # exit gracefully # return values def plot_demo(data, theta_values, indiv_likes, like_norm, logL, theta_hat, xbar, outfile): """method: plot_demo arguments: data: observed samples (1D numpy array) theta_values: theta grid (1D numpy array) indiv_likes: per-point likelihood curves p(x_i|theta) (2D numpy array, shape [n, n_theta]) like_norm: normalized likelihood p(D|theta) up to scaling (1D numpy array) logL: log-likelihood log p(D|theta) (1D numpy array) theta_hat: argmax estimate from grid (float) xbar: sample mean (float) outfile: output filename for the plot (str) return: None description: Create a 3-panel plot resembling the attached figure: (1) individual likelihood terms for each data point (2) product likelihood p(D|theta) (shown in normalized form) (3) log-likelihood log p(D|theta) Marks the peak location and compares it to the sample mean. """ fig = plt.figure(figsize=(9, 8)) # individual likelihood terms # ax1 = plt.subplot(3, 1, 1) for i in range(indiv_likes.shape[0]): ax1.plot(theta_values, indiv_likes[i, :], linestyle='--', linewidth=1) # draw observed data points along the theta axis (y=0) # ax1.scatter(data, np.zeros_like(data), s=25) ax1.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX) ax1.set_ylabel("p(x_i | θ)") ax1.set_title("Gaussian likelihood demo (variance=1): individual terms, likelihood, and log-likelihood") ax1.grid(True) # normalized likelihood # ax2 = plt.subplot(3, 1, 2) ax2.plot(theta_values, like_norm, linewidth=2) ax2.axvline(theta_hat, linestyle='-', linewidth=1, label=r'$\hat{\theta}$', color='red') ax2.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX) ax2.set_ylabel("p(D | θ)\n") ax2.grid(True) ax2.legend() # mark theta_hat with a small annotation # ax2.annotate(r'$\hat{\theta}$', xy=(theta_hat, like_norm[np.argmax(like_norm)]), xytext=(theta_hat, like_norm[np.argmax(like_norm)] * 0.8), ha='center') # log-likelihood # ax3 = plt.subplot(3, 1, 3) ax3.plot(theta_values, logL, linewidth=2) ax3.axvline(theta_hat, linestyle='-', linewidth=1, label=r'$\hat{\theta}$', color='red') ax3.axvline(xbar, linestyle='--', linewidth=1, label=r'$\bar{x}$ (sample mean)', color='blue') ax3.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX) ax3.set_xlabel("θ (mean parameter)") ax3.set_ylabel("log p(D | θ)") ax3.legend() ax3.grid(True) plt.tight_layout() plt.savefig(outfile, dpi=200) print("\nVisualization saved to: %s" % outfile) # function: main # def main(): """method: main arguments: none return: status: True on success (bool) description: (1) Generate 10 uniform random data points in [0,10]. (2) Compute and plot the Gaussian log-likelihood for theta in [0,10], with variance fixed at 1. (3) Show the likelihood/log-likelihood peaks at the sample mean. """ print("Running: %s" % __FILE__) # generate data # data = generate_uniform_data(DEF_N_POINTS, DEF_X_MIN, DEF_X_MAX, DEF_SEED) # calculate sample mean # xbar = float(np.mean(data)) print("Generated %d data points in [%.1f, %.1f] (seed=%d):" % (DEF_N_POINTS, DEF_X_MIN, DEF_X_MAX, DEF_SEED)) print(" data =", np.array2string(data, precision=4, separator=", ")) print(" sample mean (xbar) = %.6f" % xbar) # theta grid for plotting in [0,10] # theta_values = np.linspace(DEF_THETA_MIN, DEF_THETA_MAX, DEF_N_THETA) # individual likelihood terms p(x_i | theta) as a function of theta # for fixed x_i and var=1: # p(x_i | theta) = N(x_i; theta, 1) # indiv_likes = np.zeros((data.size, theta_values.size), dtype=float) for i, xi in enumerate(data): # compute p(xi | theta) across the theta grid # indiv_likes[i, :] = gaussian_pdf(xi, theta_values, DEF_VAR) # compute log-likelihood log p(D|theta) # logL = log_likelihood_gaussian(data, theta_values, DEF_VAR) # convert to a stable, peak-normalized likelihood for plotting # like_norm = normalize_from_log(logL) # show the peak occurs at the sample mean # idx_hat = int(np.argmax(logL)) theta_hat = float(theta_values[idx_hat]) print("\nGrid argmax (theta_hat) = %.6f" % theta_hat) print("Difference |theta_hat - xbar| = %.6e" % abs(theta_hat - xbar)) # analytic MLE: theta_MLE = sample mean for Gaussian with known variance # theta_mle = xbar print("Analytic MLE (theta_MLE) = xbar = %.6f" % theta_mle) # plot results # plot_demo(data, theta_values, indiv_likes, like_norm, logL, theta_hat, xbar, DEF_OUT_PLOT) # exit gracefully # return True if __name__ == '__main__': main()