#!/usr/bin/env python # # file: ECE 8527 Cardiology Data Scoring # #------------------------------------------------------------------------------ # import system modules # import os import sys import numpy as np #------------------------------------------------------------------------------ # # functions are listed here # #------------------------------------------------------------------------------ # function: compute_conf # # note: the matrix is: [TN FP] # [FN TP] def compute_conf(r, h, indx): conf = np.zeros([2,2], float) for (v1, v2) in zip(r,h): conf[v1[indx], v2[indx]] += 1 print(conf) return conf # function: main # def main(argv): # load the data into a list of lists - skip the header # ref = [] flag = int(0) for l in (open(sys.argv[1], 'r')).readlines(): if flag > int(0): tl = list(l.rstrip('\n').split(",")) nl = [] for t in tl: nl.append(int(t)) ref.append(nl) else: labels = l.rstrip('\n').split(',') flag += int(1) hyp = [] flag = int(0) for l in (open(sys.argv[2], 'r')).readlines(): if flag > int(0): tl = list(l.rstrip('\n').split(",")) nl = [] for t in tl: nl.append(int(t)) hyp.append(nl) else: flag += int(1) # compute the vector length based on the header in the ref file # vdim = len(labels) # error check # if len(ref) != len(hyp): print("**> files are not compatible (%d <> %d)" % (len(ref), len(hyp))) return(1) if len(labels) != len(ref[0]): print("**> labels are not compatible (%d <> %d)" % (len(labels), len(ref[0]))) return(1) for (r,h) in zip(ref, hyp): if (len(r) != vdim) or (len(h) != vdim): print("**> vectors are not compatible [%d] (%d <> %d)" % (vdim, len(r), len(h))) return(1) # metric 1: simple accuracy # num_errors = int(0) for (r,h) in zip(ref, hyp): if r != h: num_errors += int(1) err = float(num_errors) / float(len(ref)) acc = float(1) - err print("Metric 1: simple accuracy") print(" err / acc = %6.4f / %6.4f" % (err, acc)) print("") # metric 2: compute the micro-average precision # print("Metric 2: micro accuracy / precision / recall / f1") micro_acc_n = float(0) micro_acc_d = float(0) micro_prec_n = float(0) micro_prec_d = float(0) micro_rec_n = float(0) micro_rec_d = float(0) for i in range(0,vdim): # compute the confusion matrix # conf = compute_conf(ref, hyp, i) # compute the accuracy, precision and recall scores # micro_acc_n += conf[0][0] + conf[1][1] micro_acc_d += conf[0][1] + conf[1][0] + conf[0][0] + conf[1][1] micro_prec_n += conf[1][1] micro_prec_d += conf[0][1] + conf[1][1] micro_rec_n += conf[1][1] micro_rec_d += conf[1][0] + conf[1][1] # compute the final score # micro_acc = micro_acc_n / micro_acc_d micro_prec = micro_prec_n / micro_prec_d micro_rec = micro_rec_n / micro_rec_d micro_f1 = 2 * (micro_prec * micro_rec) / (micro_prec + micro_rec) print(" micro acc / prec / rec / f1 = %6.4f / %6.4f / %6.4f / %6.4f" % (micro_acc, micro_prec, micro_rec, micro_f1)) print("") # metric 3: macro accuracy, precision, recall and f1 # print("Metric 3: macro accuracy / precision / recall / f1") macro_acc = float(0) macro_prec = float(0) macro_rec = float(0) macro_f1 = float(0) for i in range(0,vdim): # compute the confusion matrix # conf = compute_conf(ref, hyp, i) # compute the accuracy, precision, recall and f1 scores # # note: the matrix is: [TN FP] # [FN TP] # acc = (conf[0][0] + conf[1][1]) / \ (conf[0][1] + conf[1][0] + conf[0][0] + conf[1][1]) prec = conf[1][1] / (conf[0][1] + conf[1][1]) rec = conf[1][1] / (conf[1][0] + conf[1][1]) f1 = float(2.0) * (prec * rec) / (prec + rec) macro_acc += acc macro_prec += prec macro_rec += rec macro_f1 += f1 # print the per-class accuracy # print(" [%s] %s %6.4f / %6.4f / %6.4f / %6.4f" % \ (labels[i], "acc / prec / rec / f1 =", acc, prec, rec, f1)) # compute the average # macro_acc /= float(vdim) macro_prec /= float(vdim) macro_rec /= float(vdim) macro_f1 /= float(vdim) print(" macro acc / prec / rec / f1 = %6.4f / %6.4f / %6.4f / %6.4f" % (macro_acc, macro_prec, macro_rec, macro_f1)) print("") # exit gracefully # return True # begin gracefully # if __name__ == '__main__': main(sys.argv[0:]) # # end of file