#!/usr/bin/env python # # file: $NEDC_NFC/class/python/nedc_sys_tools/nedc_file_tools.py # # revision history: # 20220225 (PM): added extract_comments function # 20200623 (JP): reorganized # 20200609 (JP): refactored the code and added atof and atoi # 20170716 (JP): Upgraded to using the new annotation tools. # 20170709 (JP): generalized some functions and abstracted more file I/O # 20170706 (NC): refactored eval_tools into file_tools and display_tools # 20170611 (JP): updated error handling # 20170521 (JP): initial version # # usage: # import nedc_file_tools as nft # # This class contains a collection of functions that deal with file handling #------------------------------------------------------------------------------ # # imports are listed here # #------------------------------------------------------------------------------ # import system modules # import errno import os import re import sys # import NEDC modules # import nedc_debug_tools as ndt #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # set the default character encoding system # DEF_CHAR_ENCODING = "utf-8" # file processing charater constants # DELIM_BLANK = '\x00' DELIM_BOPEN = '{' DELIM_BCLOSE = '}' DELIM_CARRIAGE = '\r' DELIM_CLOSE = ']' DELIM_COLON = ':' DELIM_COMMA = ',' DELIM_COMMENT = '#' DELIM_DASH = '-' DELIM_DOT = '.' DELIM_EQUAL = '=' DELIM_GREATTHAN = '>' DELIM_LESSTHAN = '<' DELIM_NEWLINE = '\n' DELIM_NULL = '' DELIM_OPEN = '[' DELIM_QUOTE = '"' DELIM_SEMI = ';' DELIM_SLASH = '/' DELIM_SPACE = ' ' DELIM_SQUOTE = '\'' DELIM_TAB = '\t' DELIM_USCORE = '_' # define default file extensions # DEF_EXT_CSV = "csv" DEF_EXT_EDF = "edf" DEF_EXT_LBL = "lbl" DEF_EXT_REC = "rec" DEF_EXT_SVS = "svs" DEF_EXT_TXT = "txt" DEF_EXT_XML = "xml" # regular expression constants # DEF_REGEX_ASSIGN_COMMENT = '^%s([a-zA-Z:!?" _-]*)%s(.+?(?=\n))' # file processing string constants # STRING_EMPTY = "" STRING_DASHDASH = "--" # file processing lists: # used to accelerate some functions # LIST_SPECIALS = [DELIM_SPACE, DELIM_BLANK] # i/o constants # MODE_READ_TEXT = "r" MODE_READ_BINARY = "rb" MODE_WRITE_TEXT = "w" MODE_WRITE_BINARY = "wb" # parameter file constants # DELIM_VERSION = "version" PFILE_VERSION = "param_v1.0.0" # define constants for XML tags # DEF_XML_HEIGHT = "height" DEF_XML_WIDTH = "width" DEF_XML_CONFIDENCE = "confidence" DEF_XML_COORDS = "coordinates" DEF_XML_REGION_ID = "region_id" DEF_XML_TEXT = "text" DEF_XML_TISSUE_TYPE = "tissue_type" DEF_XML_LABEL = "label" # define constants for CSV tags # # declare a global debug object so we can use it in functions # dbgl = ndt.Dbgl() #------------------------------------------------------------------------------ # # functions listed here: general string processing # #------------------------------------------------------------------------------ # function: trim_whitespace # # arguments: # istr: input string # # return: an output string that has been trimmed # # This function removes leading and trailing whitespace. # It is needed because text fields in Edf files have all # sorts of junk in them. # def trim_whitespace(istr): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: trimming (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, istr)) # declare local variables # last_index = len(istr) # find the first non-whitespace character # flag = False for i in range(last_index): if not istr[i].isspace(): flag = True break # make sure the string is not all whitespace # if flag == False: return STRING_EMPTY # find the last non-whitespace character # for j in range(last_index - 1, -1, -1): if not istr[j].isspace(): break # exit gracefully: return the trimmed string # return istr[i:j+1] # # end of function # function: first_substring # # arguments: # strings: list of strings (input) # substring: the substring to be matched (input) # # return: the index of the match in strings # # This function finds the index of the first string in strings that # contains the substring. This is similar to running strstr on each # element of the input list. # def first_substring(strings, substring): try: return next(i for i, string in enumerate(strings) if \ substring in string) except: return int(-1) # # end of function # function: first_string # # arguments: # strings: list of strings (input) # substring: the string to be matched (input) # # return: the index of the match in strings # # This function finds the index of the first string in strings that # contains an exact match. This is similar to running strstr on each # element of the input list. # def first_string(strings, tstring): try: return next(i for i, string in enumerate(strings) if \ tstring == string) except: return int(-1) # # end of function # function: atoi # # arguments: # value: the value to be converted as a string # # return: an integer value # # This function emulates what C++ atoi does by replacing # null characters with spaces before conversion. This allows # Python's integer conversion function to work properly. # def atoi(value): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: converting value (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value)) # replace all the null's with spaces: # this code is complicated but can be found here: # https://stackoverflow.com/a/30020228 # ind = (min(map(lambda x: (value.index(x) if (x in value) else len(value)), LIST_SPECIALS))) tstr = value[0:ind] # try to convert the input # try: ival = int(tstr) except: print("Error: %s (line: %s) %s: string conversion error [%s][%s])" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value, tstr)) return None # exit gracefully # return ival # # end of function # function: atof # # arguments: # value: the value to be converted as a string # # return: an integer value # # This function emulates what C++ atof does by replacing # null characters with spaces before conversion. This allows # Python's integer conversion function to work properly. # def atof(value): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: converting value (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value)) # replace all the null's with spaces: # this code is complicated but can be found here: # https://stackoverflow.com/a/30020228 # ind = (min(map(lambda x: (value.index(x) if (x in value) else len(value)), LIST_SPECIALS))) tstr = value[0:ind] # try to convert the input # try: fval = float(tstr) except: print("Error: %s (line: %s) %s: string conversion error [%s][%s])" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value, tstr)) return None # exit gracefully # return fval # # end of function #------------------------------------------------------------------------------ # # functions listed here: manipulate filenames, lists and command line args # #------------------------------------------------------------------------------ # function: get_fullpath # # arguments: # path: path to directory or file # # return: the full path to directory/file path argument # # This function returns the full pathname for a file. It expands # environment variables. # def get_fullpath(path): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: expanding name (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, path)) # exit gracefully # return os.path.abspath(os.path.expanduser(os.path.expandvars(path))) # # end of function # function: create_filename # # arguments: # iname: input filename (string) # odir: output directory (string) # oext: output file extension (string) # rdir: replace directory (string) # cdir: create directory (boolean - true means create the directory) # # return: the output filename # # This function creates an output file name based on the input arguments. It # is a Python version of Edf::create_filename(). # def create_filename(iname, odir, oext, rdir, cdir = False): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, iname)) # get absolute file name # abs_name = os.path.abspath(os.path.realpath(os.path.expanduser(iname))) # replace extension with ext # if oext is None: ofile = os.path.join(os.path.dirname(abs_name), os.path.basename(abs_name)) else: ofile = os.path.join(os.path.dirname(abs_name), os.path.basename(abs_name).split(DELIM_DOT)[0] + DELIM_DOT + oext) # get absolute path of odir # if odir is None: odir = DELIM_DOT else: odir = os.path.abspath(os.path.realpath(os.path.expanduser(odir))) # if the replace directory is valid and specified # if rdir is not None and rdir in ofile: # get absolute path of rdir # rdir = os.path.abspath(os.path.realpath( os.path.expanduser(rdir))) # replace the replace directory portion of path with # the output directory # ofile = ofile.replace(rdir, odir) # if the replace directory is not valid or specified # else: # append basename of ofile to output directory # ofile = os.path.join(odir, os.path.basename(ofile)) # create the directory if necessary # if cdir is True: if make_dir(odir) is False: print("Error: %s (line: %s) %s: make dir failed (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, odir)) sys.exit(os.EX_SOFTWARE) # exit gracefully # return ofile # # end of function # function: concat_names # # arguments: # # odir: the output directory that will hold the file # fname: the output filename # # return: # fname: a filename that is a concatenation of odir and fname # def concat_names(odir, fname): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: concatenating (%s %s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, odir, fname)) # strip any trailing slashes # str = odir if str[-1] == DELIM_SLASH: str = str[:-1] # ceate the full pathname # new_name = str + DELIM_SLASH + fname # exit gracefully # return new_name # # end of function # function: get_flist # # arguments: # fname: full pathname of a filelist file # # return: # flist: a list of filenames # # This function opens a file and reads filenames. It ignores comment # lines and blank lines. # def get_flist(fname): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: opening (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # declare local variables # flist = [] # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("Error: %s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # iterate over lines # try: for line in fp: # remove spaces and newline chars # line = line.replace(DELIM_SPACE, DELIM_NULL) \ .replace(DELIM_NEWLINE, DELIM_NULL) \ .replace(DELIM_TAB, DELIM_NULL) # check if the line starts with comments # if line.startswith(DELIM_COMMENT) or len(line) == 0: pass else: flist.append(line) except: flist = None # close the file # fp.close() # exit gracefully # return flist # # end of function # function: make_fp # # arguments: # # fname: the filename # # return: # fp: a file pointer # def make_fp(fname): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_WRITE_TEXT) except: print("Error: %s (line: %s) %s: error opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # exit gracefully # return fp # # end of function #------------------------------------------------------------------------------ # # functions listed here: manipulate directories # #------------------------------------------------------------------------------ # function: make_dirs # # arguments: # dirlist - the list of directories to create # # return: None # # This function creates all the directories in a given list # def make_dirs(dirlist): # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, dirlist)) # loop over the list # for directory in dirlist: # make the directory # make_dir(directory) # exit gracefully # return True # # end of function # function: make_dir # # arguments: # path: new directory path (input) # # return: a boolean value indicating status # # This function emulates the Unix command "mkdir -p". It creates # a directory tree, recursing through each level automatically. # If the directory already exists, it continues past that level. # def make_dir(path): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, path)) # use a system call to make a directory # try: os.makedirs(path) # if the directory exists, and error is thrown (and caught) # except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise # exit gracefully # return True # # end of function # function: get_dirs # # arguments: # flist: list of files # odir: output directory # rdir: replace directory # oext: output extension # # return: set of unique directory paths # # This function returns a set containing unique directory paths # from a given file list. This is done by replacing the rdir # with odir and adding the base directory of the fname to the set # def get_dirs(flist, odir=DELIM_NULL, rdir=DELIM_NULL, oext=None): # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: fetching (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, flist)) # generate a set of unique directory paths # unique_dirs = set() # for each file name in the list # for fname in flist: # generate the output file name # ofile = create_filename(fname, odir, oext, rdir) # append the base dir of the ofile to the set # unique_dirs.add(os.path.dirname(ofile)) # exit gracefully # return unique_dirs # # end of function #------------------------------------------------------------------------------ # # functions listed here: manage parameter files # #------------------------------------------------------------------------------ # function: load_parameters # # arguments: # pfile: path of a paramter file # keyword: section of the parameter file to load # # return: a dict, containing the values in the section # def load_parameters(pfile, keyword): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: loading (%s %s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile, keyword)) # declare local variables # values = {} # make sure the file is a parameter file # if get_version(pfile) != PFILE_VERSION: return None # open the file # try: fp = open(pfile, MODE_READ_TEXT) except ioerror: print("Error: %s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile)) return None # loop over all lines in the file # flag_pblock = False for line in fp: # initialize empty value for each line value = "" # remove white spaces at the edges of the string # if DELIM_EQUAL in line: value = line.split(DELIM_EQUAL)[1] value = value.strip() # remove white spaces unless string starts with quotes # if (value.startswith(DELIM_QUOTE) and \ value.endswith(DELIM_QUOTE) ): str = line.replace(DELIM_QUOTE, DELIM_NULL).strip() elif (value.startswith(DELIM_SQUOTE) and \ value.endswith(DELIM_SQUOTE)): str = line.replace(DELIM_SQUOTE, DELIM_NULL).strip() else: str = line.replace(DELIM_SPACE, DELIM_NULL) \ .replace(DELIM_NEWLINE, DELIM_NULL) \ .replace(DELIM_TAB, DELIM_NULL) # throw away commented and blank lines # if ((str.startswith(DELIM_COMMENT) == True) or (len(str) == 0)): pass elif ((str.startswith(keyword) == True) and (DELIM_BOPEN in str)): flag_pblock = True elif ((flag_pblock == True) and (DELIM_BCLOSE in str)): fp.close(); return values elif (flag_pblock == True): parts = str.split(DELIM_EQUAL) values[parts[0].strip()] = parts[1].strip() # make sure we found a block # if flag_pblock == False: fp.close() print("Error: %s (line: %s) %s: invalid parameter file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile)) return None # exit gracefully # return values # # end of function # function: generate_map # # arguments: # pblock: a dictionary containing a parameter block # # return: # pmap: a parameter file map # # This function converts a dictionary returned from load_parameters to # a dictionary containing a parameter map. Note that is lowercases the # map so that text is normalized. # def generate_map(pblock): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: generating a map" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # declare local variables # pmap = {} # loop over the input, split the line and assign it to pmap # for key in pblock: lkey = key.lower() pmap[lkey] = pblock[key].split(DELIM_COMMA) pmap[lkey] = list(map(lambda x: x.lower(), pmap[lkey])) # exit gracefully # return pmap # # end of function # function: permute_map # # arguments: # map: the input map # # return: # pmap: an inverted map # # this function permutes a map so symbol lookups can go fast. # def permute_map(map): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: permuting map" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # declare local variables # pmap = {} # loop over the input map: # note there is some redundancy here, but every event should # have only one output symbol # for sym in map: for event in map[sym]: pmap[event] = sym # exit gracefully # return pmap # # end of function # function: map_events # # arguments: # elist: a list of events # pmap: a permuted map (look up symbols to be converted) # # return: # mlist: a list of mapped events # # this function maps event labels to mapped values. # def map_events(elist, pmap): # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: mapping events" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # loop over the input list # mlist = [] i = int(0) for event in elist: # copy the event # mlist.append([event[0], event[1], {}]); # change the label # for key in event[2]: mlist[i][2][pmap[key]] = event[2][key] # increment the counter # i += int(1) # exit gracefully # return mlist # # end of function # function: get_version # # arguments: # fname: input filename # # return: a string containing the type # # this function opens a file, reads the magic sequence and returns # the string. # def get_version(fname): # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("%s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # define version value # ver = None # iterate over lines until we find the magic string # for line in fp: # set every character to be lowercase # line = line.lower() # check if string contains "version" # if line.startswith("version") or line.startswith(" ndt.BRIEF: print("%s (line: %s) %s: opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("%s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # loop through the file # for line in fp: # strip all the spaces within the line # line = line.replace(DELIM_CARRIAGE, DELIM_NULL) # skip all the line that is not a comment # if not line.startswith(cdelim): continue # extract all of the comments # assign_comment = re.findall(regex_assign_comment, line) # append it to the dictionary # if assign_comment: dict_comments[assign_comment[0][0].strip()] \ = assign_comment[0][1].strip() # close the file # fp.close() # exit gracefully # return dict_comments # # end of file