#!/usr/bin/env python
#
# file: em_demo.py
#
# description:
#   Demonstrates EM parameter estimation with overlapping Gaussian data.
#   Includes warning suppression for cleaner output.
#
#------------------------------------------------------------------------------

import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from scipy.stats import norm
import warnings
from sklearn.exceptions import ConvergenceWarning

def main():
    # -------------------------------------------------------------------------
    # Suppress "did not converge" warnings because we are INTENTIONALLY 
    # stopping early (max_iter=1) to visualize the steps.
    # -------------------------------------------------------------------------
    warnings.filterwarnings("ignore", category=ConvergenceWarning)

    print("--- EM Parameter Estimation Demo (Robust & Clean) ---")

    # 1. Generate Overlapping Data
    #    True Means: 0 and 3. True Std: 1.
    #
    np.random.seed(42)
    n_samples = 1000
    
    # Cluster 1 (Mean 0)
    blob1 = np.random.normal(loc=0.0, scale=1.0, size=(n_samples // 2, 1))
    # Cluster 2 (Mean 3)
    blob2 = np.random.normal(loc=3.0, scale=1.0, size=(n_samples // 2, 1))
    
    X = np.vstack([blob1, blob2])
    print(f"Generated {len(X)} points. True Means: [0.0, 3.0]")

    # 2. Setup GMM with Manual Initialization
    #    Start means at -2 and 8 to force a long convergence path.
    #
    bad_means_init = np.array([[-2.0], [8.0]])
    
    gmm = GaussianMixture(n_components=2, 
                          means_init=bad_means_init, 
                          tol=1e-6,                  
                          warm_start=True,           
                          max_iter=1,                
                          verbose=0)

    # 3. Run EM Step-by-Step
    #
    print("\nRunning EM Step-by-Step...")
    
    history_log_lik = []
    max_steps = 50
    converged = False
    prev_score = -np.inf
    
    for i in range(max_steps):
        # Taking one EM step
        gmm.fit(X)
        
        # Capture current log likelihood
        current_score = gmm.score(X) * len(X) 
        history_log_lik.append(current_score)
        
        print(f"  Iter {i+1}: Log-Likelihood = {current_score:.2f}, Means = {gmm.means_.flatten()}")
        
        # Check convergence manually
        change = current_score - prev_score
        if i > 0 and change < 1e-3: 
            print(f"  -> Converged at iteration {i+1}")
            converged = True
            break
            
        prev_score = current_score

    # 4. Visualization
    plot_results(X, gmm, history_log_lik)

def plot_results(X, gmm, history):
    """
    Visualizes the histogram, the final fit, and the convergence curve.
    """
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
    plt.subplots_adjust(hspace=0.4)

    # --- Plot 1: Histogram vs Final PDF ---
    ax1.hist(X, bins=50, density=True, alpha=0.4, color='gray', label='Raw Data')

    # Generate PDF points
    x_axis = np.linspace(np.min(X)-1, np.max(X)+1, 1000)
    
    # Extract learned parameters
    means = gmm.means_.flatten()
    stds = np.sqrt(gmm.covariances_.flatten()) 
    weights = gmm.weights_.flatten()

    # Create individual component PDFs
    pdf_1 = norm.pdf(x_axis, means[0], stds[0])
    pdf_2 = norm.pdf(x_axis, means[1], stds[1])
    
    # Weighted Sum (The GMM Fit)
    pdf_total = (weights[0] * pdf_1) + (weights[1] * pdf_2)

    # Plot
    ax1.plot(x_axis, pdf_1, 'g--', linewidth=1, label=f'Comp 1 (Mean={means[0]:.2f})')
    ax1.plot(x_axis, pdf_2, 'b--', linewidth=1, label=f'Comp 2 (Mean={means[1]:.2f})')
    ax1.plot(x_axis, pdf_total, 'r-', linewidth=2, label='Final GMM Fit')
    
    ax1.set_title(f'EM Result: Means converged to {means[0]:.2f} and {means[1]:.2f}')
    ax1.set_xlabel('Data Value')
    ax1.set_ylabel('Density')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # --- Plot 2: Learning Curve ---
    ax2.plot(range(1, len(history) + 1), history, 'o-', linewidth=2)
    ax2.set_title('EM Convergence History (Log-Likelihood Maximization)')
    ax2.set_xlabel('Iteration')
    ax2.set_ylabel('Log Likelihood')
    ax2.grid(True)
    
    # Force integer ticks on x-axis
    ax2.locator_params(axis='x', integer=True)

    print(f"Saving plot to 'em_demo_visualization.png'...")
    plt.savefig('em_demo_visualization.png')
    plt.show()

if __name__ == "__main__":
    main()