#!/usr/bin/env python # file: $(ISIP)/exp/tuh_eeg/exp_0022/scripts/rescore_p1_conf.py # # revision history: # 20150619 (JP): initial version # # usage: # rescore_p1_conf.py -i rscdir -r reflist -p pfile -o ofile -d dfile # # arguments: # -i (--input): rescoring directory containing hyp elab files (input) # -r (--ref): reference list containing ref lab files (input) # -p (--param): sweep parameter file (input) # -o (--output): output scoring file (output) # -d (--details): detailed scoring report # # This script generates confusion matrices and error rates. #------------------------------------------------------------------------------ # import isip modules # import autoeeg # import required modules # import os import sys import getopt import subprocess # main: rescore_p1_conf.py # def main(argv): # allocate variables for arguments # rscdir_a = "" reflist_a = "" pfile_a = "" ofile_a = "" dfile_a = "" # define the command line options # try: opts, args = getopt.getopt( argv, "h:i:r:p:o:d:", \ ["input=", "ref=", "param=", "output=", "details="]) # error handling for command line options # except getopt.GetoptError: print "*> %s: the option does not exist" % sys.argv[0] sys.exit(-1) # parse the command line arguments # for opt, arg in opts: # option: help # if opt == '-h': print 'usage: rescore_p1_conf.py -i rsc_dir -r ref_list -o out_file' sys.exit(-1) # the hyp directory # elif opt in ("-i", "--input"): rscdir_a = arg # the reference file # elif opt in ("-r", "--ref"): reflist_a = arg # the parameter file # elif opt in ("-p", "--param"): pfile_a = arg # the output file # elif opt in ("-o", "--output"): ofile_a = arg # the detailed scoring file # elif opt in ("-d", "--details"): dfile_a = arg # unknown option # else: print "illegal option: %s" % (arg) sys.exit(-1) # load the sweep parameter file to get the range: # note that we do this simply to get the number of sweep values # so that we can preallocate space for confusion matrices # prm = autoeeg.ParamSweep().load_sweep_parameters(pfile_a) # load the reference transcriptions into memory: # these are .lab files. we postprocess this to get a model list # ref_keys, ref_labels = autoeeg.isip_get_ref_labels(reflist_a) num_models, model_list = autoeeg.isip_get_model_list(ref_labels) # create confusion matrices # num_mats = int(prm.nswps) cnf = [] for i in range(num_mats): tmp_cnf = [[0 for x in range(num_models)] for x in range(num_models)] cnf.append(tmp_cnf) # load the hypothesis list into memory # cmd = "find %s -name *_ch*.elab | sort" % rscdir_a task = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) res = task.stdout.read() assert task.wait() == 0 hyplist = res.split() # create the output directory: # the report files don't necessarily have to be in the same directory. # odir = os.path.dirname(ofile_a) autoeeg.isip_mkdir(odir); odir = os.path.dirname(dfile_a) autoeeg.isip_mkdir(odir); # create a file to hold detailed scoring information # fd = open(dfile_a, "w") # loop over the hypothesis files # for hyp in hyplist: # grab the group name and the channel name to locate the # unique reference transcription # hyp_parts = hyp.split("/") hlen = len(hyp_parts) key_grp = hyp_parts[len(hyp_parts)-2] base = os.path.basename(hyp_parts[hlen-1]) ind2 = len(base) - len(".elab") ind1 = base.rfind("_ch", 0, ind2) key_chan = base[ind1:ind2] hyp_key = key_grp + key_chan # find the corresponding ref file # ind_ref = ref_keys.index(hyp_key) num_labels = ref_labels[ind_ref].num_labels # read the hypothesis file into memory # hyp_labels = autoeeg.isip_get_hyp_labels(hyp, int(prm.nswps)); # loop over sweep parameters # for i in range(int(prm.nswps)): swp_val = prm.min + prm.inc * i # loop over the reference transcription: # note that scoring is done relative to the reference # transcription. for j in range(ref_labels[ind_ref].num_labels): # only score non-null hypotheses # if ref_labels[ind_ref].label[j] != "(null)": # search for the timestamp of the reference in the # the hypothesis string # ind0 = autoeeg.isip_first_string(hyp_labels[i].start, ref_labels[ind_ref].start[j]) # check the stop time also - both must match # if (ind0 < 0) or (hyp_labels[i].stop[ind0] != ref_labels[ind_ref].stop[j]): print "*> error: label mismatch" print " hyp file = %s" % hyp print " ref file = %s" % ref_labels[ind_ref].model_name print " param sweep = %f (%d)" % (swp_val, i) print " hyp = %s %s %s" % \ (hyp_labels[i].start[ind0], hyp_labels[i].stop[ind0], hyp_labels[i].label[ind0]) print " ref = %s %s %s" % \ (ref_labels[ind_ref].start[j], ref_labels[ind_ref].stop[j], ref_labels[ind_ref].label[j]) sys.exit(-1) # get the index for each label in the model list # ind1 = model_list.index(ref_labels[ind_ref].label[j]) ind2 = model_list.index(hyp_labels[i].label[ind0]) cnf[i][ind1][ind2] += 1 # output some basic detailed scoring information # if ind1 != ind2: fd.write("sweep_value = %f\n" % swp_val) fd.write("ref: %s\n" % ref_labels[ind_ref].model_name) fd.write("hyp: %s\n" % hyp) fd.write("ref: %s %s %s %f\n" % \ (ref_labels[ind_ref].start[j], ref_labels[ind_ref].stop[j], ref_labels[ind_ref].label[j], ref_labels[ind_ref].llk[j])) fd.write("hyp: %s %s %s %f\n" % \ (hyp_labels[i].start[ind0], hyp_labels[i].stop[ind0], hyp_labels[i].label[ind0], hyp_labels[i].llk[ind0])) fd.write("\n") # end of loop over the number of labels # end of loop over sweep parameters # end of file loop # create an output file # fo = open(ofile_a, "w") fo.write("models: %s" % model_list[0]) for i in range(1, num_models): fo.write(", %s" % model_list[i]) fo.write("\n") fo.write("sweep: %f : %f : %f\n" % (prm.min, prm.max, prm.inc)) fo.write("penalty labels: ") for i in range(len(prm.lbl)): fo.write("%s " % prm.lbl[i]) fo.write("\n") fo.write("penalty weights: ") for i in range(len(prm.lbl)): fo.write("%s " % prm.wgt[i]) fo.write("\n\n") # display the results in a table # for i in range(num_mats): swp_val = prm.min + prm.inc * i fo.write("sweep_value = %f\n" % swp_val) fo.write("Ref/Hyp: ",) for j in range(num_models): fo.write("%8s" % model_list[j],) fo.write("\n") for j in range(num_models): fo.write("%8s " % model_list[j],) for k in range(num_models): fo.write("%8d" % cnf[i][j][k],) fo.write("\n") fo.write("\n") # end of loop # clean up # fo.close() fd.close() # exit gracefully # # begin gracefully # if __name__ == "__main__": main(sys.argv[1:]) # # end of file #