import sys, os import tempfile import nedc_eval_tools as ntools PWD = os.getcwd() DISK_RAID_STR = "/dsk0_raid10" ## This is unrequired text that is only ## implemented in nedc_000. Passing this string to other nodes might crash ## the job. This string is stripped out during term file generation. def gen_terms(): in_file_ext = "hyp" out_file_ext_a = "tse" rdir_a = "/data/isip/exp/tuh_eeg/exp_2459/scripts/output/naive_b/03_p1_decode" odir_a = os.path.join(PWD ,"output", "naive_b", "04_p1_term") pfile_a = "/data/isip/exp/tuh_eeg/exp_2459/scripts/hyp_format.txt" ## find the hypfilelist interactively # tmpfile = tempfile.NamedTemporaryFile(delete=False) ## find command to collect hyp files # _str = " ".join(['find ', rdir_a, '-name', '"*.hyp" ', '-exec ', 'realpath ', '{} + >', tmpfile.name ]) ## execute the command to get list in tempfile # os.system(_str) ## generate the tse files # hyplist = readflists(tmpfile.name) ## finally unlink the temporary hyplist file to clear it from the memory # os.unlink(tmpfile.name) ## collect the mappings from the parameter file # scmap = ntools.nedc_eval_load_params(pfile_a)[0] if out_file_ext_a == "tse": gen_tse(hyplist, rdir_a, odir_a, scmap) elif out_file_ext_a == "ehyp": gen_ehyp(hyplist, rdir_a, odir_a, scmap) else: print "Error (%s:%s: Invalid output file extension. (%s) " \ %(sys.argv[0], __name__, out_file_ext_a) print "term files generated successfully" ## end of method # ## generate tse files # def gen_tse(hyplist_a, rdir_a, odir_a, scmap_a): for _f in hyplist_a: ## if "disk raid text" exists remove it, read the header for more info # if DISK_RAID_STR in _f: _f = _f.replace(DISK_RAID_STR, "") rdir_a = rdir_a.replace(DISK_RAID_STR, "") odir_a = odir_a.replace(DISK_RAID_STR, "") ## collect the file content # f_cont = readflists(_f) ## everythin from here is collected from Saeedeh's scripts # ## loop throught the file line by line and collect info # sorted_start_list, sorted_stop_list, sorted_event_list, \ sorted_score_list = sort_fields(f_cont, scmap_a) ## generate terms from the collected sorted lists # start_list, stop_list, \ event_list, score_list = get_terms_of_fields(sorted_start_list, sorted_stop_list, sorted_event_list, sorted_score_list) ## create output file name with its destination # op_f_name = ".".join([os.path.splitext(os.path.basename(_f))[0], \ "tse"]) op_dir = os.path.dirname(_f).replace(rdir_a, odir_a) abs_ofile = os.path.join(op_dir, op_f_name) create_dirtree(op_dir) with open(abs_ofile, 'wb') as fout: write_tse(fout, op_f_name, start_list, stop_list, event_list, score_list) fout.close() print op_f_name, " generated successfully..." ## end of method # ## writes tse files from give term lists # def write_tse(fp_a, fname, start_list, stop_list, event_list, score_list): fp_a.write("# filename: " + fname + "\n") fp_a.write("version = tse_v1.0.0\n") fp_a.write("# data starts here\n#\n") for i in range(len(start_list)): fp_a.write("%04.4f" % float(start_list[i])) fp_a.write("\t") fp_a.write("%04.4f" % float(stop_list[i])) fp_a.write("\t") fp_a.write("%s" % (str(event_list[i]))) fp_a.write("\t") fp_a.write("%.6f" % float(score_list[i])) fp_a.write("\n") ## return gracefully # ## end of write_tse method # ## Generates terms from the given sorted fields (sorted by time/1st index) # def get_terms_of_fields(sorted_start_list, sorted_stop_list, sorted_event_list, sorted_score_list): start_list = [] stop_list = [] event_list = [] score_list = [] # if term is true all concurrent same epoches will be marged # and created a long term # i = 0 # loop over to find the consecutive events # while i < len(sorted_start_list): status = False sum_score = float(0) ave_score = float(0) # find the current start time , stop time, event, and # probability # currnt_start = sorted_start_list[i] currnt_stop = sorted_stop_list[i] current_event = sorted_event_list[i] current_score = sorted_score_list[i] sum_score = sum_score + float(current_score) j = i + 1 # find consecutive events and attach them to make a term # num_cons_t = 1 while j < len(sorted_start_list) and \ current_event.lower() == \ sorted_event_list[j].lower() : status = True num_cons_t += 1 currnt_stop = sorted_stop_list[j] sum_score = sum_score + float(sorted_score_list[j]) j += 1 # end of while # if the flag status is true # if status: # find the average score of each term # ave_score = sum_score / (num_cons_t) i = j else: ave_score = float(current_score) i += 1 # end of if start_list.append(round(currnt_start/float(100000))) stop_list. append(round(currnt_stop/float(100000))) event_list.append(current_event.lower()) score_list.append(ave_score) return start_list, stop_list, event_list, score_list ## end of method # ## reads file content and generates sorted lists # def sort_fields(f_cont_a, scmap_a): start_list = [] stop_list = [] event_list = [] score_list = [] sorted_start_list = [] sorted_stop_list = [] soretd_event_list = [] sorted_score_list = [] for line in f_cont_a: # find start and stop times on each line # parts = line.split(",") total_classes = len(parts) - 2 class_confs = [float(x) for x in parts[2:]] start_list.append(int(parts[0])) stop_list.append(int(parts[1])) ## collect the class information related to hyp files # max_class_conf = max(class_confs) max_conf_index = class_confs.index(max_class_conf) ## find out what class it is # classname = scmap_a[str(max_conf_index + 2)][0] \ ## +2 because we of removed start/stop event_list.append(classname.lower()) score_list.append(max_class_conf) sorted_start_list, sorted_stop_list, sorted_event_list, \ sorted_score_list= zip(*sorted(zip(start_list, stop_list, \ event_list, score_list))) ## return sorted lists gracefully # return start_list, stop_list, event_list, score_list ## end of soft_fields method # ## end of method # def gen_ehyp(hyplist, rdir_a, odir_a): pass ## end of method # ## create directory tree # def create_dirtree(dirtree_a): if not os.path.exists(dirtree_a): os.makedirs(dirtree_a) ## end of method # ## reads the filecontent line by line # def readflists(f_a): with open(f_a, 'rb') as fin: fcontlist = fin.read().splitlines() ## exit gracefully # return fcontlist ## end of method # gen_terms()