#!/usr/bin/env python
#
# file: Gaussian Likelihood Demo with Log Probabilities
#
# revision history:
#
# 20260129 (SP): initial version for likelihood/log-likelihood demo
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define default values
#
DEF_SEED           = 22
DEF_N_POINTS       = 10
DEF_X_MIN          = 0.0
DEF_X_MAX          = 10.0

# gaussian model parameters (known variance)
#
DEF_VAR            = 1.0          
DEF_THETA_MIN      = 0.0
DEF_THETA_MAX      = 10.0
# grid size for theta in [0,10]
#
DEF_N_THETA        = 1001         

# output plot
#
DEF_OUT_PLOT       = "gaussian_likelihood_demo.png"

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

def generate_uniform_data(n_points, x_min, x_max, seed):
    """method: generate_uniform_data
    arguments:
        n_points: number of data points to generate (int)
        x_min: minimum value of uniform range (float)
        x_max: maximum value of uniform range (float)
        seed: random seed for reproducibility (int)
    return:
        data: generated data points (1D numpy array)
    description:
        Generate n_points uniformly at random in the range [x_min, x_max].
    """
    np.random.seed(seed)
    data = np.random.uniform(low=x_min, high=x_max, size=n_points)
    return data


def gaussian_pdf(x, mu, var):
    """method: gaussian_pdf
    arguments:
        x: value(s) where the pdf is evaluated (float or numpy array)
        mu: mean(s) of the Gaussian (float or numpy array)
        var: variance of the Gaussian (float)
    return:
        pdf: Gaussian pdf evaluated at x (numpy array)
    description:
        Compute the Gaussian probability density function:
            N(x; mu, var) = 1/sqrt(2*pi*var) * exp(-(x-mu)^2/(2*var))
        Works with broadcasting when x and/or mu are arrays.
    """
    x = np.asarray(x)
    mu = np.asarray(mu)

    # guard against non-positive variance
    #
    if var <= 0.0:
        raise ValueError("Variance must be > 0.")
    
    # compute Gaussian pdf
    #
    coef = 1.0 / np.sqrt(2.0 * np.pi * var)
    expo = -((x - mu) ** 2) / (2.0 * var)
    pdf = coef * np.exp(expo)
    
    # exit gracefully
    #
    return pdf


def log_likelihood_gaussian(data, theta_values, var):
    """method: log_likelihood_gaussian
    arguments:
        data: observed samples (1D numpy array)
        theta_values: candidate mean values for theta (1D numpy array)
        var: known variance (float)
    return:
        logL: log-likelihood values for each theta (1D numpy array)
    description:
        Compute the log-likelihood for i.i.d. Gaussian data:
            log p(D | theta) = sum_i log N(x_i; theta, var)
        where theta is the mean parameter and var is known.
    """
    data = np.asarray(data)
    theta_values = np.asarray(theta_values)

    # guard against empty input
    #
    if data.size == 0:
        raise ValueError("Data array must not be empty.")

    # constant term: -0.5 * log(2*pi*var)
    #
    c0 = -0.5 * np.log(2.0 * np.pi * var)

    # compute log-likelihood across the theta grid
    # logL(theta) = sum_i [ c0 - (x_i - theta)^2 / (2*var) ]
    #
    diffs = data[:, None] - theta_values[None, :]
    logL = np.sum(c0 - (diffs ** 2) / (2.0 * var), axis=0)
    
    # exit gracefully
    #
    return logL


def normalize_from_log(log_values):
    """method: normalize_from_log
    arguments:
        log_values: log-domain values (1D numpy array)
    return:
        values: normalized values exp(log_values - max(log_values)) (1D numpy array)
    description:
        Convert log-values to a numerically stable, peak-normalized linear scale.
        This avoids underflow by subtracting the maximum log value first.
    """
    
    # convert to numpy array
    #
    log_values = np.asarray(log_values)
    
    # get maximum log value
    #
    m = np.max(log_values)
    
    # compute normalized values
    #
    values = np.exp(log_values - m)
    
    # exit gracefully
    #
    return values


def plot_demo(data, theta_values, indiv_likes, like_norm, logL, theta_hat, xbar, outfile):
    """method: plot_demo
    arguments:
        data: observed samples (1D numpy array)
        theta_values: theta grid (1D numpy array)
        indiv_likes: per-point likelihood curves p(x_i|theta) (2D numpy array, shape [n, n_theta])
        like_norm: normalized likelihood p(D|theta) up to scaling (1D numpy array)
        logL: log-likelihood log p(D|theta) (1D numpy array)
        theta_hat: argmax estimate from grid (float)
        xbar: sample mean (float)
        outfile: output filename for the plot (str)
    return:
        None
    description:
        Create a 3-panel plot resembling the attached figure:
          (1) individual likelihood terms for each data point
          (2) product likelihood p(D|theta) (shown in normalized form)
          (3) log-likelihood log p(D|theta)
        Marks the peak location and compares it to the sample mean.
    """
    fig = plt.figure(figsize=(9, 8))

    # individual likelihood terms
    #
    ax1 = plt.subplot(3, 1, 1)
    for i in range(indiv_likes.shape[0]):
        ax1.plot(theta_values, indiv_likes[i, :], linestyle='--', linewidth=1)

    # draw observed data points along the theta axis (y=0)
    #
    ax1.scatter(data, np.zeros_like(data), s=25)
    ax1.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX)
    ax1.set_ylabel("p(x_i | θ)")
    ax1.set_title("Gaussian likelihood demo (variance=1): individual terms, likelihood, and log-likelihood")
    ax1.grid(True)

    # normalized likelihood
    #
    ax2 = plt.subplot(3, 1, 2)
    ax2.plot(theta_values, like_norm, linewidth=2)
    ax2.axvline(theta_hat, linestyle='-', linewidth=1, label=r'$\hat{\theta}$', color='red')
    ax2.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX)
    ax2.set_ylabel("p(D | θ)\n")
    ax2.grid(True)
    ax2.legend()

    # mark theta_hat with a small annotation
    #
    ax2.annotate(r'$\hat{\theta}$',
                 xy=(theta_hat, like_norm[np.argmax(like_norm)]),
                 xytext=(theta_hat, like_norm[np.argmax(like_norm)] * 0.8),
                 ha='center')

    # log-likelihood
    #
    ax3 = plt.subplot(3, 1, 3)
    ax3.plot(theta_values, logL, linewidth=2)
    ax3.axvline(theta_hat, linestyle='-', linewidth=1, label=r'$\hat{\theta}$', color='red')
    ax3.axvline(xbar, linestyle='--', linewidth=1, label=r'$\bar{x}$ (sample mean)', color='blue')
    ax3.set_xlim(DEF_THETA_MIN, DEF_THETA_MAX)
    ax3.set_xlabel("θ (mean parameter)")
    ax3.set_ylabel("log p(D | θ)")
    ax3.legend()
    ax3.grid(True)

    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    print("\nVisualization saved to: %s" % outfile)


# function: main
#
def main():
    """method: main
    arguments:
        none
    return:
        status: True on success (bool)
    description:
        (1) Generate 10 uniform random data points in [0,10].
        (2) Compute and plot the Gaussian log-likelihood for theta in [0,10],
            with variance fixed at 1.
        (3) Show the likelihood/log-likelihood peaks at the sample mean.
    """
    print("Running: %s" % __FILE__)

    # generate data
    #
    data = generate_uniform_data(DEF_N_POINTS, DEF_X_MIN, DEF_X_MAX, DEF_SEED)
    
    # calculate sample mean
    #
    xbar = float(np.mean(data))

    print("Generated %d data points in [%.1f, %.1f] (seed=%d):" %
          (DEF_N_POINTS, DEF_X_MIN, DEF_X_MAX, DEF_SEED))
    print("  data =", np.array2string(data, precision=4, separator=", "))
    print("  sample mean (xbar) = %.6f" % xbar)

    # theta grid for plotting in [0,10]
    #
    theta_values = np.linspace(DEF_THETA_MIN, DEF_THETA_MAX, DEF_N_THETA)

    # individual likelihood terms p(x_i | theta) as a function of theta
    # for fixed x_i and var=1:
    #   p(x_i | theta) = N(x_i; theta, 1)
    #
    indiv_likes = np.zeros((data.size, theta_values.size), dtype=float)
    
    for i, xi in enumerate(data):
        # compute p(xi | theta) across the theta grid
        #
        indiv_likes[i, :] = gaussian_pdf(xi, theta_values, DEF_VAR)

    # compute log-likelihood log p(D|theta)
    #
    logL = log_likelihood_gaussian(data, theta_values, DEF_VAR)

    # convert to a stable, peak-normalized likelihood for plotting
    #
    like_norm = normalize_from_log(logL)

    # show the peak occurs at the sample mean
    #
    idx_hat = int(np.argmax(logL))
    theta_hat = float(theta_values[idx_hat])

    print("\nGrid argmax (theta_hat) = %.6f" % theta_hat)
    print("Difference |theta_hat - xbar| = %.6e" % abs(theta_hat - xbar))

    # analytic MLE: theta_MLE = sample mean for Gaussian with known variance
    #
    theta_mle = xbar
    print("Analytic MLE (theta_MLE) = xbar = %.6f" % theta_mle)

    # plot results
    #
    plot_demo(data, theta_values, indiv_likes, like_norm, logL, theta_hat, xbar, DEF_OUT_PLOT)
    
    # exit gracefully
    #
    return True

if __name__ == '__main__':
    main()