#!/usr/bin/env python
#
# file: ECE 8527 Cardiology Data Scoring
#
#------------------------------------------------------------------------------

# import system modules
#
import os
import sys
import numpy as np

#------------------------------------------------------------------------------
#
# functions are listed here
#
#------------------------------------------------------------------------------

# function: compute_conf
#                      
# note: the matrix is: [TN FP]
#                      [FN TP]

def compute_conf(r, h, indx):
    conf = np.zeros([2,2], float)
    for (v1, v2) in zip(r,h):
        conf[v1[indx], v2[indx]] += 1
    print(conf)
    return conf

# function: main
#
def main(argv):

    # load the data into a list of lists - skip the header
    #
    ref = []
    flag = int(0)
    for l in (open(sys.argv[1], 'r')).readlines():
        if flag > int(0):
            tl = list(l.rstrip('\n').split(","))
            nl = []
            for t in tl:
                nl.append(int(t))
            ref.append(nl)
        else:
            labels = l.rstrip('\n').split(',')
            flag += int(1)

    hyp = []
    flag = int(0)
    for l in (open(sys.argv[2], 'r')).readlines():
        if flag > int(0):
            tl = list(l.rstrip('\n').split(","))
            nl = []
            for t in tl:
                nl.append(int(t))
            hyp.append(nl)
        else:
            flag += int(1)

    # compute the vector length based on the header in the ref file
    #
    vdim = len(labels)

    # error check
    #
    if len(ref) != len(hyp):
        print("**> files are not compatible (%d <> %d)" % (len(ref), len(hyp)))
        return(1)
    if len(labels) != len(ref[0]):
        print("**> labels are not compatible (%d <> %d)" %
              (len(labels), len(ref[0])))
        return(1)
        
    for (r,h) in zip(ref, hyp):
        if (len(r) != vdim) or (len(h) != vdim):
            print("**> vectors are not compatible [%d] (%d <> %d)" %
                  (vdim, len(r), len(h)))
            return(1)

    # metric 1: simple accuracy
    #
    num_errors = int(0)
    for (r,h) in zip(ref, hyp):
        if r != h:
            num_errors += int(1)
    err = float(num_errors) / float(len(ref))
    acc = float(1) - err

    print("Metric 1: simple accuracy")
    print(" err / acc = %6.4f / %6.4f" % (err, acc))
    print("")

    # metric 2: compute the micro-average precision
    #
    print("Metric 2: micro accuracy / precision / recall / f1")
    micro_acc_n = float(0)
    micro_acc_d = float(0)
    micro_prec_n = float(0)
    micro_prec_d = float(0)
    micro_rec_n = float(0)
    micro_rec_d = float(0)
    for i in range(0,vdim):

        # compute the confusion matrix
        #
        conf = compute_conf(ref, hyp, i)

        # compute the accuracy, precision and recall scores
        #
        micro_acc_n += conf[0][0] + conf[1][1]
        micro_acc_d += conf[0][1] + conf[1][0] + conf[0][0] + conf[1][1] 
        micro_prec_n += conf[1][1]
        micro_prec_d += conf[0][1] + conf[1][1]
        micro_rec_n += conf[1][1]
        micro_rec_d += conf[1][0] + conf[1][1]

    # compute the final score
    #
    micro_acc = micro_acc_n / micro_acc_d
    micro_prec = micro_prec_n / micro_prec_d
    micro_rec = micro_rec_n / micro_rec_d
    micro_f1 = 2 * (micro_prec * micro_rec) / (micro_prec + micro_rec)
    print(" micro acc / prec / rec / f1 = %6.4f / %6.4f / %6.4f / %6.4f" %
          (micro_acc, micro_prec, micro_rec, micro_f1))
    print("")
    
    # metric 3: macro accuracy, precision, recall and f1
    #
    print("Metric 3: macro accuracy / precision / recall / f1")
    macro_acc = float(0)
    macro_prec = float(0)
    macro_rec = float(0)
    macro_f1 = float(0)
    for i in range(0,vdim):

        # compute the confusion matrix
        #
        conf = compute_conf(ref, hyp, i)

        # compute the accuracy, precision, recall and f1 scores
        #
        # note: the matrix is: [TN FP]
        #                      [FN TP]
        #
        acc = (conf[0][0] + conf[1][1]) / \
              (conf[0][1] + conf[1][0] + conf[0][0] + conf[1][1])
        prec = conf[1][1] / (conf[0][1] + conf[1][1])
        rec = conf[1][1] / (conf[1][0] + conf[1][1])
        f1 = float(2.0) * (prec * rec) / (prec + rec)

        macro_acc += acc
        macro_prec += prec
        macro_rec += rec
        macro_f1 += f1

        # print the per-class accuracy
        #
        print("  [%s] %s %6.4f / %6.4f / %6.4f / %6.4f" % \
              (labels[i], "acc / prec / rec / f1 =",
              acc, prec, rec, f1))

    # compute the average
    #
    macro_acc /= float(vdim)
    macro_prec /= float(vdim)
    macro_rec /= float(vdim)
    macro_f1 /= float(vdim)
    print(" macro acc / prec / rec / f1 = %6.4f / %6.4f / %6.4f / %6.4f" %
          (macro_acc, macro_prec, macro_rec, macro_f1))
    print("")
    
    # exit gracefully
    #
    return True

# begin gracefully
#
if __name__ == '__main__':
    main(sys.argv[0:])

#
# end of file