#!/usr/bin/env python # # file: hmm_eval_decode.py # # description: # Demonstrates HMM Evaluation and Decoding based on ECE 8527 Lecture 12. # 1. Isolates the Forward Algorithm for probability evaluation. # 2. Compares Viterbi (Global optimal) vs. Posterior (Local optimal) decoding. # # revision history: # 20260213 (AA): Modified Sadia's code to focus on Eval/Decoding comparison #------------------------------------------------------------------------------ import os import sys import numpy as np import urllib.request import datetime as dt #------------------------------------------------------------------------------ # # global variables / constants # #------------------------------------------------------------------------------ # numerical stability constants # DEF_EPS = 1.0e-50 DEF_NUM_STATES = 3 DEF_MAX_ITERS = 40 DEF_TOL = 1.0e-4 DEF_DATA_URL = "https://raw.githubusercontent.com/natsunoyuki/Data_Science/refs/heads/master/gold/gold/gold_price_usd.csv" DEF_DATA_FILE = "gold_price_usd.csv" DEF_DATA_DIR = "./gold_data" #------------------------------------------------------------------------------ # # Helper Functions (Data Loading & Math) # #------------------------------------------------------------------------------ def download_file(url, outpath): os.makedirs(os.path.dirname(outpath), exist_ok=True) if os.path.exists(outpath) and os.path.getsize(outpath) > 0: return True try: print("Downloading dataset...") urllib.request.urlretrieve(url, outpath) return True except Exception as e: print(f"Error downloading: {e}") return False def load_gold_csv(csv_path): prices = [] with open(csv_path, "r", encoding="utf-8") as fp: header = fp.readline() for line in fp: line = line.strip() if not line: continue parts = line.split(",") if len(parts) >= 2: prices.append(float(parts[1])) return np.asarray(prices, dtype=float) def compute_log_returns(prices): p0 = np.maximum(prices[:-1], 1e-8) p1 = np.maximum(prices[1:], 1e-8) return np.log(p1 / p0) def logsumexp(a, axis=None): return np.logaddexp.reduce(a, axis=axis) def gaussian_logpdf(x, mu, var): """ Log-density of Gaussian emissions. """ x = x.reshape(-1, 1) mu = mu.reshape(1, -1) var = np.maximum(var.reshape(1, -1), DEF_EPS) return -0.5 * (np.log(2.0 * np.pi * var) + ((x - mu) ** 2) / var) #------------------------------------------------------------------------------ # # Core HMM Functions (Lecture 12 Concepts) # #------------------------------------------------------------------------------ def hmm_init_params(x, K): """ Simple initialization: quantile-based means, uniform-ish transitions. """ T = x.size pi = np.ones(K) / K # Transition matrix: 0.9 diagonal, 0.1 spread elsewhere A = np.eye(K) * 0.9 + (0.1 / (K - 1)) * (1 - np.eye(K)) # Initialize means using quantiles qs = np.quantile(x, np.linspace(0, 1, K + 1)) mu = np.zeros(K) var = np.zeros(K) for k in range(K): # Data in this quantile mask = (x >= qs[k]) & (x <= qs[k+1]) xk = x[mask] if len(xk) > 1: mu[k] = np.mean(xk) var[k] = np.var(xk) + DEF_EPS else: mu[k] = np.mean(x) var[k] = np.var(x) + DEF_EPS return pi, A, mu, var def hmm_forward_score(x, pi, A, mu, var): """ OBJECTIVE 1: ISOLATE THE FORWARD PROBABILITY CALCULATION Computes P(V^T | model) using the Forward Algorithm (Lecture 12 Slide 7). Returns only the log-likelihood. """ T = x.size K = pi.size # 1. Precompute Log Probabilities for stability log_pi = np.log(np.maximum(pi, DEF_EPS)) log_A = np.log(np.maximum(A, DEF_EPS)) # 2. Emission Probabilities (log domain) log_B = gaussian_logpdf(x, mu, var) # 3. Initialize Alpha (Forward Variable) # alpha[0, i] = pi[i] * b_i(x_0) log_alpha = np.zeros((T, K)) log_alpha[0, :] = log_pi + log_B[0, :] # 4. Forward Recursion # alpha[t, j] = [ sum_i alpha[t-1, i] * a_ij ] * b_j(x_t) for t in range(1, T): for j in range(K): # Log-sum-exp trick for stable summation of probabilities prev_log_sum = logsumexp(log_alpha[t-1, :] + log_A[:, j]) log_alpha[t, j] = prev_log_sum + log_B[t, j] # 5. Termination # P(V^T | model) = sum_i alpha[T, i] log_prob = logsumexp(log_alpha[T-1, :]) return log_prob def hmm_forward_backward(x, pi, A, mu, var): """ Standard Forward-Backward for Training (returns gamma/xi). Includes the same forward pass logic as above but keeps the tables. """ T = x.size K = pi.size log_pi = np.log(np.maximum(pi, DEF_EPS)) log_A = np.log(np.maximum(A, DEF_EPS)) log_B = gaussian_logpdf(x, mu, var) # --- Forward --- log_alpha = np.zeros((T, K)) log_alpha[0, :] = log_pi + log_B[0, :] for t in range(1, T): for j in range(K): log_alpha[t, j] = logsumexp(log_alpha[t-1, :] + log_A[:, j]) + log_B[t, j] log_lik = logsumexp(log_alpha[T-1, :]) # --- Backward (Lecture 12 Slide 8) --- log_beta = np.zeros((T, K)) # log_beta[T-1, :] is 0.0 (log(1)) by definition for t in range(T-2, -1, -1): for i in range(K): log_beta[t, i] = logsumexp(log_A[i, :] + log_B[t+1, :] + log_beta[t+1, :]) # --- Posteriors (Gamma) --- # gamma[t, i] = P(q_t = i | V^T, model) log_gamma = log_alpha + log_beta - log_lik gamma = np.exp(log_gamma) # --- Xi (Transitions) --- xi_sum = np.zeros((K, K)) for t in range(T-1): # Reshapes allow broadcasting: (K,1) + (K,K) + (1,K) + (1,K) log_xi = (log_alpha[t, :].reshape(K, 1) + log_A + log_B[t+1, :].reshape(1, K) + log_beta[t+1, :].reshape(1, K) - log_lik) xi_sum += np.exp(log_xi) return gamma, xi_sum, log_lik def hmm_viterbi(x, pi, A, mu, var): """ OBJECTIVE 2: VITERBI DECODING (Lecture 12 Slide 11/13) Finds the single best state sequence Q*. """ T = x.size K = pi.size log_pi = np.log(np.maximum(pi, DEF_EPS)) log_A = np.log(np.maximum(A, DEF_EPS)) log_B = gaussian_logpdf(x, mu, var) # delta: best score to reach state j at time t delta = np.zeros((T, K)) # psi: backpointers psi = np.zeros((T, K), dtype=int) # Initialize delta[0, :] = log_pi + log_B[0, :] # Recursion for t in range(1, T): for j in range(K): # Viterbi maximizes instead of sums scores = delta[t-1, :] + log_A[:, j] psi[t, j] = np.argmax(scores) delta[t, j] = np.max(scores) + log_B[t, j] # Backtracking path = np.zeros(T, dtype=int) path[T-1] = np.argmax(delta[T-1, :]) for t in range(T-2, -1, -1): path[t] = psi[t+1, path[t+1]] return path def hmm_em_train(x, K, max_iters=20): """ Trains the model so we have good parameters to evaluate. """ pi, A, mu, var = hmm_init_params(x, K) prev_ll = -np.inf print(f"Training HMM ({K} states) ...") for it in range(max_iters): # E-Step gamma, xi_sum, ll = hmm_forward_backward(x, pi, A, mu, var) # Check convergence if abs(ll - prev_ll) < DEF_TOL: break prev_ll = ll # M-Step # 1. Update Pi pi = gamma[0, :] # 2. Update A gamma_sum_t = np.sum(gamma[:-1, :], axis=0) + DEF_EPS A = xi_sum / gamma_sum_t.reshape(-1, 1) # Normalize A A = A / np.sum(A, axis=1, keepdims=True) # 3. Update Mu/Var gamma_sum = np.sum(gamma, axis=0) + DEF_EPS mu = (gamma.T @ x) / gamma_sum var = np.zeros(K) for k in range(K): diff = x - mu[k] var[k] = (gamma[:, k] @ (diff**2)) / gamma_sum[k] var = np.maximum(var, DEF_EPS) print(f" Converged at iter {it}: LogLik = {ll:.2f}") return pi, A, mu, var #------------------------------------------------------------------------------ # # Main Script # #------------------------------------------------------------------------------ def main(): # 1. Prepare Data data_path = os.path.join(DEF_DATA_DIR, DEF_DATA_FILE) if not download_file(DEF_DATA_URL, data_path): return raw_prices = load_gold_csv(data_path) # Use log returns to make data stationary/Gaussian-like data = compute_log_returns(raw_prices) # 2. Train a model (so we have valid parameters) pi, A, mu, var = hmm_em_train(data, K=DEF_NUM_STATES) print("\n" + "="*60) print("OBJECTIVE 1: ISOLATE FORWARD PROBABILITY CALCULATION") print("="*60) # Use the isolated function log_prob_forward = hmm_forward_score(data, pi, A, mu, var) print(f"Log Probability of Data P(V^T | Model) via Forward Algo:") print(f" {log_prob_forward:.4f}") # Verify against the full Forward-Backward routine _, _, log_prob_check = hmm_forward_backward(data, pi, A, mu, var) print(f"Verification (from Forward-Backward):") print(f" {log_prob_check:.4f}") if np.isclose(log_prob_forward, log_prob_check): print(" >> CALCULATION MATCHES (Success)") else: print(" >> MISMATCH (Check Implementation)") print("\n" + "="*60) print("OBJECTIVE 2: COMPARE VITERBI VS FORWARD-BACKWARD DECODING") print("="*60) # A. Viterbi Decoding (Global Best Path) # Q* = argmax_Q P(Q | V, model) viterbi_seq = hmm_viterbi(data, pi, A, mu, var) # B. Posterior Decoding (Forward-Backward) # q_t* = argmax_k P(q_t=k | V, model) # This minimizes the bit-error rate at each specific time step, # but may produce a sequence that isn't a valid path in the graph. gamma, _, _ = hmm_forward_backward(data, pi, A, mu, var) posterior_seq = np.argmax(gamma, axis=1) # C. Comparison mismatches = np.sum(viterbi_seq != posterior_seq) total_len = len(data) match_pct = 100.0 * (1.0 - mismatches / total_len) print(f"Sequence Length: {total_len}") print(f"Mismatches: {mismatches}") print(f"Agreement: {match_pct:.2f}%") print("\nDetailed Look (First 20 steps):") print(f"{'Time':<6} {'Viterbi':<10} {'Posterior (FB)':<15} {'Match?'}") print("-" * 45) for t in range(20): v = viterbi_seq[t] p = posterior_seq[t] match = "Yes" if v == p else "NO <--" print(f"{t:<6} {v:<10} {p:<15} {match}") print("\nSummary:") print(" Viterbi finds the single most likely *sequence* of states.") print(" Forward-Backward (Posterior) finds the most likely state *at each time*.") print(" They usually agree, but differ when the most likely individual states") print(" form a transition that is impossible or unlikely (prob ~ 0) in A.") if __name__ == "__main__": main()