#!/usr/bin/env python
#
# file: visual_rudimentary_probabilities.py
#
# description:
#  Calculates information theory metrics (Entropy, Mutual Information, etc.)
#  and generates educational plots to visualize the probability distributions
#  and the resulting metric relationships.
#
# revision history:
#  20260225 (AA): added visualization functions
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using basename
#
__FILE__ = os.path.basename(__file__)

# define numerical stability constant to avoid log2(0)
#
DEF_EPS = 1.0e-12
DEF_OUT_PLOT = "information_theory_metrics.png"

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

# compute the entropy of a 1D probability distribution
#
def compute_entropy(p_x):
    p_safe = np.maximum(p_x, DEF_EPS)
    entropy = -np.sum(p_x * np.log2(p_safe))
    return float(entropy)

# compute the joint entropy of a 2D probability distribution
#
def compute_joint_entropy(p_xy):
    p_safe = np.maximum(p_xy, DEF_EPS)
    j_entropy = -np.sum(p_xy * np.log2(p_safe))
    return float(j_entropy)

# compute the conditional entropy H(Y|X)
#
def compute_conditional_entropy_y_given_x(p_xy):
    p_x = np.sum(p_xy, axis=1)
    p_x_col = np.maximum(p_x.reshape(-1, 1), DEF_EPS)
    p_y_given_x = p_xy / p_x_col
    p_y_given_x_safe = np.maximum(p_y_given_x, DEF_EPS)
    cond_entropy = -np.sum(p_xy * np.log2(p_y_given_x_safe))
    return float(cond_entropy)

# compute the mutual information I(X;Y)
#
def compute_mutual_information(p_xy):
    p_x = np.sum(p_xy, axis=1)
    p_y = np.sum(p_xy, axis=0)
    p_x_times_p_y = np.outer(p_x, p_y)
    p_xy_safe = np.maximum(p_xy, DEF_EPS)
    p_x_times_p_y_safe = np.maximum(p_x_times_p_y, DEF_EPS)
    mi = np.sum(p_xy * np.log2(p_xy_safe / p_x_times_p_y_safe))
    return float(mi)

# plot the distributions and metrics
#
def plot_information_metrics(p_xy, metrics, outfile):
    """
    Generates a 3-panel educational plot:
    1. Joint Probability Heatmap
    2. Marginal Probability Bar Charts
    3. Information Metrics Comparison Bar Chart
    """
    
    # create figure with 3 subplots
    #
    fig = plt.figure(figsize=(16, 5))
    
    # --- Panel 1: Joint Probability Heatmap ---
    #
    ax1 = fig.add_subplot(131)
    cax = ax1.matshow(p_xy, cmap='Blues', alpha=0.8)
    fig.colorbar(cax, ax=ax1, fraction=0.046, pad=0.04)
    
    # annotate heatmap with actual probability values
    #
    for i in range(p_xy.shape[0]):
        for j in range(p_xy.shape[1]):
            ax1.text(j, i, f"{p_xy[i, j]:.2f}", ha='center', va='center', 
                     color='black', fontweight='bold', fontsize=12)
            
    ax1.set_title("Joint Probability P(X,Y)", pad=20, fontsize=14)
    ax1.set_xlabel("Y (Features / Classes)", labelpad=10)
    ax1.set_ylabel("X (Features / Classes)", labelpad=10)
    ax1.set_xticks(range(p_xy.shape[1]))
    ax1.set_yticks(range(p_xy.shape[0]))
    ax1.xaxis.set_ticks_position('bottom')
    
    # --- Panel 2: Marginal Probabilities ---
    #
    ax2 = fig.add_subplot(132)
    p_x = np.sum(p_xy, axis=1)
    p_y = np.sum(p_xy, axis=0)
    
    x_indices = np.arange(len(p_x))
    y_indices = np.arange(len(p_y))
    
    # plot P(X)
    ax2.bar(x_indices - 0.2, p_x, width=0.4, label='P(X)', color='skyblue', edgecolor='black')
    # plot P(Y)
    ax2.bar(y_indices + 0.2, p_y, width=0.4, label='P(Y)', color='lightcoral', edgecolor='black')
    
    ax2.set_title("Marginal Probabilities", fontsize=14)
    ax2.set_xlabel("Index", labelpad=10)
    ax2.set_ylabel("Probability", labelpad=10)
    ax2.set_ylim(0, 1.0)
    ax2.legend()
    ax2.grid(True, axis='y', linestyle='--', alpha=0.7)
    
    # --- Panel 3: Information Metrics ---
    #
    ax3 = fig.add_subplot(133)
    
    # extract metrics from dictionary
    #
    labels = ['H(X)', 'H(Y)', 'H(X,Y)', 'H(Y|X)', 'H(X|Y)', 'I(X;Y)']
    values = [metrics['h_x'], metrics['h_y'], metrics['h_xy'], 
              metrics['h_y_given_x'], metrics['h_x_given_y'], metrics['i_xy']]
    
    # color-code for educational clarity
    # Base entropies = blue/red, Joint = purple, Conditional = grey, Mutual Info = Green
    #
    colors = ['skyblue', 'lightcoral', 'mediumpurple', 'lightgrey', 'lightgrey', 'mediumseagreen']
    
    bars = ax3.bar(labels, values, color=colors, edgecolor='black')
    
    # add text labels on top of bars
    #
    for bar in bars:
        height = bar.get_height()
        ax3.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                 f"{height:.2f}", ha='center', va='bottom', fontsize=10)
                 
    ax3.set_title("Information Theory Metrics (Bits)", fontsize=14)
    ax3.set_ylabel("Bits", labelpad=10)
    ax3.set_ylim(0, max(values) * 1.15) # Add headroom for text
    ax3.grid(True, axis='y', linestyle='--', alpha=0.7)
    
    # save plot and finalize
    #
    plt.tight_layout()
    plt.savefig(outfile, dpi=150)
    print(f"\nSaved visualization to: {outfile}")

# function: main
#
def main(argv):
    
    print("--- Information Theory Visualized ---")
    
    # 1. Define Joint Probability Matrix P(X,Y)
    #
    p_xy = np.array([[0.1, 0.2, 0.1],
                     [0.3, 0.1, 0.2]])
                     
    if not np.isclose(np.sum(p_xy), 1.0):
        print("**> Error: Joint probability matrix does not sum to 1.")
        return False
        
    p_x = np.sum(p_xy, axis=1)
    p_y = np.sum(p_xy, axis=0)
    
    # 2. Compute Metrics
    #
    metrics = {
        'h_x': compute_entropy(p_x),
        'h_y': compute_entropy(p_y),
        'h_xy': compute_joint_entropy(p_xy),
        'h_y_given_x': compute_conditional_entropy_y_given_x(p_xy),
        'h_x_given_y': compute_conditional_entropy_y_given_x(p_xy.T),
        'i_xy': compute_mutual_information(p_xy)
    }
    
    # 3. Print Results
    #
    print("\nCalculated Metrics:")
    print("  H(X)   = %.4f bits" % metrics['h_x'])
    print("  H(Y)   = %.4f bits" % metrics['h_y'])
    print("  H(X,Y) = %.4f bits" % metrics['h_xy'])
    print("  H(Y|X) = %.4f bits" % metrics['h_y_given_x'])
    print("  H(X|Y) = %.4f bits" % metrics['h_x_given_y'])
    print("  I(X;Y) = %.4f bits" % metrics['i_xy'])
    
    # 4. Generate Visualizations
    #
    plot_information_metrics(p_xy, metrics, DEF_OUT_PLOT)
    
    # exit gracefully
    #
    return True

# begin gracefully
#
if __name__ == '__main__':
    main(sys.argv[0:])

#
# end of file