import wfdb
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import csv
import joblib

def read_ecg_signal(file_path):
    # Read ECG signal
    signals, fields = wfdb.rdsamp(file_path)
    return signals

def preprocess_data(input_files):
    all_flattened_channels = []

    for file_path in input_files:
        with open(file_path, 'r') as f:
            paths = f.readlines()

            # Iterate over each file path
            for path in paths:
                # Remove trailing newline character
                path = path.strip()
                ecg_signals = read_ecg_signal(path)

                # Transpose the signals matrix
                ecg_signals_T = ecg_signals.T

                # Flatten each channel to a single row
                flattened_channels = np.concatenate(ecg_signals_T, axis=0)

                # Append flattened channels to the list
                all_flattened_channels.append(flattened_channels.tolist())

    return all_flattened_channels

def read_annotations(annotation_files):
    annotations = {label: [] for label in ["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"]}

    for annotation_file in annotation_files:
        with open(annotation_file, 'r') as f:
            next(f)  # Skip the header
            for line in f:
                # Split the line by comma to get annotations for each column
                values = line.strip().split(',')
                for i, label in enumerate(["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"]):
                    # Assuming the first column annotation is numeric
                    annotations[label].append(int(values[i]))

    return annotations

def write_predictions_to_csv(filename, predictions):
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Write the header
        writer.writerow(["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"])
        # Write the predictions row by row
        for row in zip(*predictions):
            writer.writerow(row)

def main():
    train_input_files = ["./data/train/data_train_healthy_records.list", "./data/train/data_train_unhealthy_records.list"]  # Path to the list file containing file paths
    train_annotation_files = ["./data/train/data_train_healthy.csv", "./data/train/data_train_unhealthy.csv"]  # Path to the annotation file
    test_input_files = ["./data/train/data_train_healthy_records.list", "./data/train/data_train_unhealthy_records.list"]  # Path to the list file containing file paths for testing data
    test_annotation_files = ["./data/train/data_train_healthy.csv", "./data/train/data_train_unhealthy.csv"]  # Path to the annotation file for testing data

    # Preprocess the training data
    print("Starting training data processing")
    train_data = preprocess_data(train_input_files)
    print("Finished training data processing")

    print("Reading training annotations")
    # Read training annotations
    train_annotations = read_annotations(train_annotation_files)

    # Preprocess the testing data
    print("Starting testing data processing")
    test_data = preprocess_data(test_input_files)
    print("Finished testing data processing")

    print("Reading testing annotations")
    # Read testing annotations
    test_annotations = read_annotations(test_annotation_files)

    classifiers = {}
    predictions = {}

    # Train classifiers and make predictions for each label
    for label in train_annotations:
        print(f"Beginning training for {label}")
        # Initialize the random forest classifier
        classifiers[label] = RandomForestClassifier(n_estimators=100, random_state=42)

        # Train the classifier
        classifiers[label].fit(train_data, train_annotations[label])
        print(f"Finished training for {label}")

        # Predict on the testing set
        predictions[label] = classifiers[label].predict(test_data)

    # Write predictions to CSV file
    write_predictions_to_csv("hyp_dev_rnf.csv", list(predictions.values()))

    print("Predictions written to predictions.csv")
    
    for label in train_annotations:
        print(f"Beginning training for {label}")
        # Initialize the random forest classifier
        classifiers[label] = RandomForestClassifier(n_estimators=100, random_state=42)
        
        # Train the classifier
        classifiers[label].fit(train_data, train_annotations[label])
            
        # Save the trained model
        model_filename = f"{label}_rnf_model.pkl"
        joblib.dump(classifiers[label], model_filename)
        print(f"Finished training for {label}. Model saved as {model_filename}")


if __name__ == "__main__":
    main()