#!/usr/bin/env python # # file: $NEDC_NFC/class/python/nedc_sys_tools/nedc_file_tools.py # # revision history: # 20230710 (SM): modified load_parameters to accept lists # 20230621 (AB): refactored code to new comment style # 20220225 (PM): added extract_comments function # 20200623 (JP): reorganized # 20200609 (JP): refactored the code and added atof and atoi # 20170716 (JP): Upgraded to using the new annotation tools. # 20170709 (JP): generalized some functions and abstracted more file I/O # 20170706 (NC): refactored eval_tools into file_tools and display_tools # 20170611 (JP): updated error handling # 20170521 (JP): initial version # # usage: # import nedc_file_tools as nft # # This class contains a collection of functions that deal with file handling #------------------------------------------------------------------------------ # # imports are listed here # #------------------------------------------------------------------------------ # import system modules # import errno import os import re import sys # import NEDC modules # import nedc_debug_tools as ndt #------------------------------------------------------------------------------ # # global variables are listed here # #------------------------------------------------------------------------------ # set the filename using basename # __FILE__ = os.path.basename(__file__) # set the default character encoding system # DEF_CHAR_ENCODING = "utf-8" # file processing charater constants # DELIM_BLANK = '\x00' DELIM_BOPEN = '{' DELIM_BCLOSE = '}' DELIM_CARRIAGE = '\r' DELIM_CLOSE = ']' DELIM_COLON = ':' DELIM_COMMA = ',' DELIM_COMMENT = '#' DELIM_DASH = '-' DELIM_DOT = '.' DELIM_EQUAL = '=' DELIM_GREATTHAN = '>' DELIM_LESSTHAN = '<' DELIM_NEWLINE = '\n' DELIM_NULL = '' DELIM_OPEN = '[' DELIM_QUOTE = '"' DELIM_SEMI = ';' DELIM_SLASH = '/' DELIM_SPACE = ' ' DELIM_SQUOTE = '\'' DELIM_TAB = '\t' DELIM_USCORE = '_' # define default file extensions # DEF_EXT_CSV = "csv" DEF_EXT_EDF = "edf" DEF_EXT_LBL = "lbl" DEF_EXT_REC = "rec" DEF_EXT_SVS = "svs" DEF_EXT_TXT = "txt" DEF_EXT_XML = "xml" # regular expression constants # DEF_REGEX_ASSIGN_COMMENT = '^%s([a-zA-Z:!?" _-]*)%s(.+?(?=\n))' # file processing string constants # STRING_EMPTY = "" STRING_DASHDASH = "--" # file processing lists: # used to accelerate some functions # LIST_SPECIALS = [DELIM_SPACE, DELIM_BLANK] # i/o constants # MODE_READ_TEXT = "r" MODE_READ_BINARY = "rb" MODE_WRITE_TEXT = "w" MODE_WRITE_BINARY = "wb" # parameter file constants # DELIM_VERSION = "version" PFILE_VERSION = "param_v1.0.0" # define constants for XML tags # DEF_XML_HEIGHT = "height" DEF_XML_WIDTH = "width" DEF_XML_CONFIDENCE = "confidence" DEF_XML_COORDS = "coordinates" DEF_XML_REGION_ID = "region_id" DEF_XML_TEXT = "text" DEF_XML_TISSUE_TYPE = "tissue_type" DEF_XML_LABEL = "label" # define constants for CSV tags # # define number constants related to string processing # DEF_LWIDTH = int(79) # declare a global debug object so we can use it in functions # dbgl = ndt.Dbgl() #------------------------------------------------------------------------------ # # functions listed here: general string processing # #------------------------------------------------------------------------------ def trim_whitespace(istr): """ function: trim_whitespace arguments: istr: input string return: an output string that has been trimmed description: This function removes leading and trailing whitespace. It is needed because text fields in Edf files have all sorts of junk in them. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: trimming (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, istr)) # declare local variables # last_index = len(istr) # find the first non-whitespace character # flag = False for i in range(last_index): if not istr[i].isspace(): flag = True break # make sure the string is not all whitespace # if flag == False: return STRING_EMPTY # find the last non-whitespace character # for j in range(last_index - 1, -1, -1): if not istr[j].isspace(): break # exit gracefully: return the trimmed string # return istr[i:j+1] # # end of function def first_substring(strings, substring): """ function: first_substring arguments: strings: list of strings (input) substring: the substring to be matched (input) return: the index of the match in strings none description: This function finds the index of the first string in strings that contains the substring. This is similar to running strstr on each element of the input list. """ try: return next(i for i, string in enumerate(strings) if \ substring in string) except: return int(-1) # # end of function def first_string(strings, tstring): """ function: first_string arguments: strings: list of strings (input) substring: the string to be matched (input) return: the index of the match in strings none description: This function finds the index of the first string in strings that contains an exact match. This is similar to running strstr on each element of the input list. """ try: return next(i for i, string in enumerate(strings) if \ tstring == string) except: return int(-1) # # end of function # function: atoi # # arguments: # value: the value to be converted as a string # # return: an integer value # # This function emulates what C++ atoi does by replacing # null characters with spaces before conversion. This allows # Python's integer conversion function to work properly. # def atoi(value): """ function: atoi arguments: none none return: none none description: none none none """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: converting value (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value)) # replace all the null's with spaces: # this code is complicated but can be found here: # https://stackoverflow.com/a/30020228 # ind = (min(map(lambda x: (value.index(x) if (x in value) else len(value)), LIST_SPECIALS))) tstr = value[0:ind] # try to convert the input # try: ival = int(tstr) except: print("Error: %s (line: %s) %s: string conversion error [%s][%s])" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value, tstr)) return None # exit gracefully # return ival # # end of function def atof(value): """ function: atof arguments: value: the value to be converted as a string return: an integer value description: This function emulates what C++ atof does by replacing null characters with spaces before conversion. This allows Python's integer conversion function to work properly. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: converting value (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value)) # replace all the null's with spaces: # this code is complicated but can be found here: # https://stackoverflow.com/a/30020228 # ind = (min(map(lambda x: (value.index(x) if (x in value) else len(value)), LIST_SPECIALS))) tstr = value[0:ind] # try to convert the input # try: fval = float(tstr) except: print("Error: %s (line: %s) %s: string conversion error [%s][%s])" % (__FILE__, ndt.__LINE__, ndt.__NAME__, value, tstr)) return None # exit gracefully # return fval # # end of function #------------------------------------------------------------------------------ # # functions listed here: manipulate filenames, lists and command line args # #------------------------------------------------------------------------------ def get_fullpath(path): """ function: get_fullpath arguments: path: path to directory or file return: the full path to directory/file path argument description: This function returns the full pathname for a file. It expands environment variables. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: expanding name (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, path)) # exit gracefully # return os.path.abspath(os.path.expanduser(os.path.expandvars(path))) # # end of function def create_filename(iname, odir, oext, rdir, cdir = False): """ function: create_filename arguments: iname: input filename (string) odir: output directory (string) oext: output file extension (string) rdir: replace directory (string) cdir: create directory (boolean - true means create the directory) return: the output filename description: This function creates an output file name based on the input arguments. It is a Python version of Edf::create_filename(). """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, iname)) # get absolute file name # abs_name = os.path.abspath(os.path.realpath(os.path.expanduser(iname))) # replace extension with ext # if oext is None: ofile = os.path.join(os.path.dirname(abs_name), os.path.basename(abs_name)) else: ofile = os.path.join(os.path.dirname(abs_name), os.path.basename(abs_name).split(DELIM_DOT)[0] + DELIM_DOT + oext) # get absolute path of odir # if odir is None: odir = DELIM_DOT else: odir = os.path.abspath(os.path.realpath(os.path.expanduser(odir))) # if the replace directory is valid and specified # if rdir is not None and rdir in ofile: # get absolute path of rdir # rdir = os.path.abspath(os.path.realpath( os.path.expanduser(rdir))) # replace the replace directory portion of path with # the output directory # ofile = ofile.replace(rdir, odir) # if the replace directory is not valid or specified # else: # append basename of ofile to output directory # ofile = os.path.join(odir, os.path.basename(ofile)) # create the directory if necessary # if cdir is True: if make_dir(odir) is False: print("Error: %s (line: %s) %s: make dir failed (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, odir)) sys.exit(os.EX_SOFTWARE) # exit gracefully # return ofile # # end of function def concat_names(odir, fname): """ function: concat_names arguments: odir: the output directory that will hold the file fname: the output filename return: fname: a filename that is a concatenation of odir and fname none description: none none none """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: concatenating (%s %s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, odir, fname)) # strip any trailing slashes # str = odir if str[-1] == DELIM_SLASH: str = str[:-1] # ceate the full pathname # new_name = str + DELIM_SLASH + fname # exit gracefully # return new_name # # end of function def get_flist(fname): """ function: get_flist arguments: fname: full pathname of a filelist file return: flist: a list of filenames description: This function opens a file and reads filenames. It ignores comment lines and blank lines. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: opening (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # declare local variables # flist = [] # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("Error: %s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # iterate over lines # try: for line in fp: # remove spaces and newline chars # line = line.replace(DELIM_SPACE, DELIM_NULL) \ .replace(DELIM_NEWLINE, DELIM_NULL) \ .replace(DELIM_TAB, DELIM_NULL) # check if the line starts with comments # if line.startswith(DELIM_COMMENT) or len(line) == 0: pass else: flist.append(line) except: flist = None # close the file # fp.close() # exit gracefully # return flist # # end of function def make_fp(fname): """ function: make_fp arguments: fname: the filename none return: fp: a file pointer none description: none """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_WRITE_TEXT) except: print("Error: %s (line: %s) %s: error opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # exit gracefully # return fp # # end of function #------------------------------------------------------------------------------ # # functions listed here: manipulate directories # #------------------------------------------------------------------------------ def make_dirs(dirlist): """ function: make_dirs arguments: dirlist - the list of directories to create return: none description: This function creates all the directories in a given list """ # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, dirlist)) # loop over the list # for directory in dirlist: # make the directory # make_dir(directory) # exit gracefully # return True # # end of function def make_dir(path): """ function: make_dir arguments: path: new directory path (input) none return: a boolean value indicating status none description: This function emulates the Unix command "mkdir -p". It creates a directory tree, recursing through each level automatically. If the directory already exists, it continues past that level. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: creating (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, path)) # use a system call to make a directory # try: os.makedirs(path) # if the directory exists, and error is thrown (and caught) # except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise # exit gracefully # return True # # end of function def get_dirs(flist, odir=DELIM_NULL, rdir=DELIM_NULL, oext=None): """ function: get_dirs arguments: flist: list of files odir: output directory rdir: replace directory oext: output extension return: set of unique directory paths description: This function returns a set containing unique directory paths from a given file list. This is done by replacing the rdir with odir and adding the base directory of the fname to the set """ # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: fetching (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, flist)) # generate a set of unique directory paths # unique_dirs = set() # for each file name in the list # for fname in flist: # generate the output file name # ofile = create_filename(fname, odir, oext, rdir) # append the base dir of the ofile to the set # unique_dirs.add(os.path.dirname(ofile)) # exit gracefully # return unique_dirs # # end of function #------------------------------------------------------------------------------ # # functions listed here: manage parameter files # #------------------------------------------------------------------------------ def get_kv_pair(input_str): """ function: get_kv_pair arguments: str: the input string to turn into a key:value pair return: key: the kay of the determined key:value pair value: the value of the determined key:value pair description: This function parses a parameter string (key = value) and turns it into a key:value pair value. This function supports key:single-value pairs and key:list pairs """ # split the current key into key and value parts # parts = input_str.split(DELIM_EQUAL) # strip whitespace from the key # key = parts[0].strip() # strip whitespace from the key # parts[1] = parts[1].strip() # if the value is surrounded by quotes, determine it as a literal and # remove the surrounding quotes # if ((parts[1].startswith(DELIM_QUOTE) and parts[1].endswith(DELIM_QUOTE)) \ or (parts[1].startswith(DELIM_SQUOTE) and parts[1].endswith(DELIM_SQUOTE))): value = parts[1].strip("'").strip('"') # if the value is not surrounded by quotes, determine the value as a list # or single string # else: # split the value using regex. this expression will split the value string # into lists if there are commas present. if the commas are inside of # parenthesis they will not be counted # parts[1] = re.split(r',\s*(?![^()]*\))', parts[1]) # if there is only one string in the value list, it is not a list # and return the key value pair as strings # if len(parts[1]) <= 1: value = parts[1][0].strip() # if there is more than one string in the value list, return the key # value pair as a list of strings # else: value = [input_str.strip() for input_str in parts[1]] # exit gracefully # return key, value # # end of function def load_parameters(pfile, keyword): """ function: load_parameters arguments: pfile: path of a parameter file keyword: a parameter that has a section or a single value return: values: a dict, containing the value/s of the specified parameter description: This function reads a specified parameter file and reads the specified parameter into a Python dictionary object. This function works on parameter 'blocks' as well as single value parameters. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: loading (%s %s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile, keyword)) # declare local variables # values = {} # make sure the file is a parameter file # if get_version(pfile) != PFILE_VERSION: return None # open the file # try: fp = open(pfile, MODE_READ_TEXT) except ioerror: print("Error: %s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile)) return None # loop over all lines in the file # flag_pblock = False for line in fp: # initialize empty value for each line value = "" # remove white spaces at the edges of the string # if DELIM_EQUAL in line: value = line.split(DELIM_EQUAL)[1] value = value.strip() # remove white spaces unless string starts with quotes # if ((value.startswith(DELIM_QUOTE) and value.endswith(DELIM_QUOTE)) \ or value.startswith(DELIM_SQUOTE) and value.endswith(DELIM_SQUOTE)): str = line else: str = line.replace(DELIM_SPACE, DELIM_NULL) \ .replace(DELIM_NEWLINE, DELIM_NULL) \ .replace(DELIM_TAB, DELIM_NULL) # throw away commented and blank lines # if ((str.startswith(DELIM_COMMENT) == True) or (len(str) == 0)): pass # if the block starts with the given keyword, flag the current block # elif (str.startswith(keyword) == True): # if the keyword contains a block, flag the block # if (DELIM_BOPEN in str): flag_pblock = True # if the keyword is only a single value, return the single value # elif (DELIM_EQUAL in str): # get the key value pair for the current line and add it to the # "values" dictionary # key, value = get_kv_pair(str) values[key] = value # exit gracefully # fp.close() return values # if the block is closed with the "}" character, return the found values # elif ((flag_pblock == True) and (DELIM_BCLOSE in str)): # exit gracefully # fp.close() return values # if the current block is flagged # elif (flag_pblock == True): # get the key value pair for the current line and add it to the # "values" dictionary # key, value = get_kv_pair(str) values[key] = value # make sure we found a block # if flag_pblock == False: fp.close() print("Error: %s (line: %s) %s: invalid parameter file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, pfile)) return None # exit gracefully # return values # # end of function def generate_map(pblock): """ function: generate_map arguments: pblock: a dictionary containing a parameter block return: pmap: a parameter file map description: This function converts a dictionary returned from load_parameters to a dictionary containing a parameter map. Note that is lowercases the map so that text is normalized. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: generating a map" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # declare local variables # pmap = {} # loop over the input, split the line and assign it to pmap # for key in pblock: lkey = key.lower() pmap[lkey] = pblock[key].split(DELIM_COMMA) pmap[lkey] = list(map(lambda x: x.lower(), pmap[lkey])) # exit gracefully # return pmap # # end of function def permute_map(map): """ function: permute_map arguments: map: the input map return: pmap: an inverted map description: this function permutes a map so symbol lookups can go fast. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: permuting map" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # declare local variables # pmap = {} # loop over the input map: # note there is some redundancy here, but every event should # have only one output symbol # for sym in map: for event in map[sym]: pmap[event] = sym # exit gracefully # return pmap # # end of function def map_events(elist, pmap): """ function: map_events arguments: elist: a list of events pmap: a permuted map (look up symbols to be converted) return: mlist: a list of mapped events description: this function maps event labels to mapped values. """ # display informational message # if dbgl == ndt.FULL: print("%s (line: %s) %s: mapping events" % (__FILE__, ndt.__LINE__, ndt.__NAME__)) # loop over the input list # mlist = [] i = int(0) for event in elist: # copy the event # mlist.append([event[0], event[1], {}]); # change the label # for key in event[2]: mlist[i][2][pmap[key]] = event[2][key] # increment the counter # i += int(1) # exit gracefully # return mlist # # end of function def get_version(fname): """ function: get_version arguments: fname: input filename return: a string containing the type description: this function opens a file, reads the magic sequence and returns the string. """ # display informational message # if dbgl > ndt.BRIEF: print("%s (line: %s) %s: opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("%s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # define version value # ver = None # iterate over lines until we find the magic string # for line in fp: # set every character to be lowercase # line = line.lower() # check if string contains "version" # if line.startswith("version") or line.startswith(" ndt.BRIEF: print("%s (line: %s) %s: opening file (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) # open the file # try: fp = open(fname, MODE_READ_TEXT) except IOError: print("%s (line: %s) %s: file not found (%s)" % (__FILE__, ndt.__LINE__, ndt.__NAME__, fname)) return None # loop through the file # for line in fp: # strip all the spaces within the line # line = line.replace(DELIM_CARRIAGE, DELIM_NULL) # skip all the line that is not a comment # if not line.startswith(cdelim): continue # extract all of the comments # assign_comment = re.findall(regex_assign_comment, line) # append it to the dictionary # if assign_comment: dict_comments[assign_comment[0][0].strip()] \ = assign_comment[0][1].strip() # close the file # fp.close() # exit gracefully # return dict_comments # # end of file