import wfdb import os import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score import csv import joblib def read_ecg_signal(file_path): # Read ECG signal signals, fields = wfdb.rdsamp(file_path) return signals def preprocess_data(input_files): all_flattened_channels = [] for file_path in input_files: with open(file_path, 'r') as f: paths = f.readlines() # Iterate over each file path for path in paths: # Remove trailing newline character path = path.strip() ecg_signals = read_ecg_signal(path) # Transpose the signals matrix ecg_signals_T = ecg_signals.T # Flatten each channel to a single row flattened_channels = np.concatenate(ecg_signals_T, axis=0) # Append flattened channels to the list all_flattened_channels.append(flattened_channels.tolist()) return all_flattened_channels def read_annotations(annotation_files): annotations = {label: [] for label in ["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"]} for annotation_file in annotation_files: with open(annotation_file, 'r') as f: next(f) # Skip the header for line in f: # Split the line by comma to get annotations for each column values = line.strip().split(',') for i, label in enumerate(["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"]): # Assuming the first column annotation is numeric annotations[label].append(int(values[i])) return annotations def write_predictions_to_csv(filename, predictions): with open(filename, 'w', newline='') as csvfile: writer = csv.writer(csvfile) # Write the header writer.writerow(["1dAVb", "RBBB", "LBBB", "SB", "AF", "ST"]) # Write the predictions row by row for row in zip(*predictions): writer.writerow(row) def main(): train_input_files = ["./data/train/data_train_healthy_records.list", "./data/train/data_train_unhealthy_records.list"] # Path to the list file containing file paths train_annotation_files = ["./data/train/data_train_healthy.csv", "./data/train/data_train_unhealthy.csv"] # Path to the annotation file test_input_files = ["./data/train/data_train_healthy_records.list", "./data/train/data_train_unhealthy_records.list"] # Path to the list file containing file paths for testing data test_annotation_files = ["./data/train/data_train_healthy.csv", "./data/train/data_train_unhealthy.csv"] # Path to the annotation file for testing data # Preprocess the training data print("Starting training data processing") train_data = preprocess_data(train_input_files) print("Finished training data processing") print("Reading training annotations") # Read training annotations train_annotations = read_annotations(train_annotation_files) # Preprocess the testing data print("Starting testing data processing") test_data = preprocess_data(test_input_files) print("Finished testing data processing") print("Reading testing annotations") # Read testing annotations test_annotations = read_annotations(test_annotation_files) classifiers = {} predictions = {} # Train classifiers and make predictions for each label for label in train_annotations: print(f"Beginning training for {label}") # Initialize the random forest classifier classifiers[label] = RandomForestClassifier(n_estimators=100, random_state=42) # Train the classifier classifiers[label].fit(train_data, train_annotations[label]) print(f"Finished training for {label}") # Predict on the testing set predictions[label] = classifiers[label].predict(test_data) # Write predictions to CSV file write_predictions_to_csv("hyp_dev_rnf.csv", list(predictions.values())) print("Predictions written to predictions.csv") for label in train_annotations: print(f"Beginning training for {label}") # Initialize the random forest classifier classifiers[label] = RandomForestClassifier(n_estimators=100, random_state=42) # Train the classifier classifiers[label].fit(train_data, train_annotations[label]) # Save the trained model model_filename = f"{label}_rnf_model.pkl" joblib.dump(classifiers[label], model_filename) print(f"Finished training for {label}. Model saved as {model_filename}") if __name__ == "__main__": main()