#!/usr/bin/env python
#
# file: $NEDC_NFC/class/python/nedc_ann_tools/nedc_ann_dpath_tools.py
#
# revision history:
#
# 20250529 (DH): added more helper constants
# 20240922 (DH): fixed csv file loading bug
# 20240723 (DH): rewrote code to include AnnGrDpath internal data structure
# 20230622 (AB): Refactored code to new comment style
# 20220406 (PM): Write function
# 20220303 (PM): Modified AnnDpath XML section to support schema
# 20220128 (ML): added  functions to support csv
# 20220126 (JP): completed another code review
# 20220122 (PM): Updated the API for the return structure of read
# 20220117 (JP): completed the first pass of code review
# 20220112 (PM): Add the CSV class
# 20220106 (PM): modified the XML class
# 20211229 (PM): added DpathChecker Class
# 20210201 (TC): initial version
#
# This class contains a collection of methods that provide
# the infrastructure for processing annotation-related data.
#------------------------------------------------------------------------------
#
# These file types are supported: csv and xml. Each of these
# files has a specific header structure:
#
# CSV:
#  # version = csv_v1.0.0
#  # MicronsPerPixel = microns_per_pixel_value
#  # bname = file_name_value
#  # width = width_value pixels, height = height_value pixels
#  # tissue = tissue_value_list
#
#  ... data follows ...
#
# XML:
#  <Annotations MicronsPerPixel="microns_per_pixel_value">
#   <Annotation [a_lot_of_meta_data_information="a_lot_of_meta_data_values">
#    <Attributes>
#     <Attribute Name="Width" Id="0" Value="width_value" />
#     <Attribute Name="Height" Id="0" Value="height_value" />
#    </Attributes>
#     <Regions>
#     <RegionAttributeHeaders>
#      <AttributeHeader [a_lot_of_attr="a_lot_of_attr_values"] />
#          [zero to several lines of ImageScope defines attrs]
#     </RegionAttributeHeaders>
#
#  ... data follows ...
#
#------------------------------------------------------------------------------

# import required modules
#
import copy
import os
import pandas as pd
import numpy as np
import re
import sys
import xml.etree.ElementTree as et
from lxml import etree
from xml.dom import minidom as md

# import required NEDC modules
#
import nedc_debug_tools as ndt
import nedc_file_tools as nft

#------------------------------------------------------------------------------
#
# global variables are listed here
#
#------------------------------------------------------------------------------

# set the filename using base name
#
__FILE__ = os.path.basename(__file__)

# define default xml schema
#
DEF_XML_SCHEMA = "$NEDC_NFC/lib/nedc_dpath_xml_schema_v00.xsd"

#******************************************************************************
#
# define the header and graph structure keys for the supported file types
#
#------------------------------------------------------------------------------
# define constants for accessing the AnnGrDpath graph_d data structure
#
CKEY_REGION_ID = nft.DEF_REGION_ID
CKEY_TEXT = nft.DEF_TEXT
CKEY_COORDINATES = nft.DEF_COORDINATES
CKEY_CONFIDENCE = nft.DEF_CONFIDENCE
CKEY_TISSUE_TYPE = nft.DEF_TISSUE_TYPE
CKEY_MICRON_LENGTH = nft.DEF_MICRON_LENGTH
CKEY_MICRON_AREA = nft.DEF_MICRON_AREA
CKEY_LENGTH = nft.DEF_LENGTH
CKEY_AREA = nft.DEF_AREA
CKEY_GEOM_PROPS = nft.DEF_GEOM_PROPS

# define constants for accessing the AnnGrDapth header_d data structure
#
CKEY_TISSUE = nft.DEF_TISSUE
CKEY_HEIGHT = nft.DEF_HEIGHT
CKEY_WIDTH = nft.DEF_WIDTH
CKEY_MICRONS = nft.DEF_MICRONS
CKEY_BNAME = nft.DEF_BNAME

# define constants for acessing the header and graph of AnnGrDpath
#
CKEY_HEADER = 'header'
CKEY_GRAPH = 'data'

#-------------------------------------------------------------------------------
# define constants for accessing the CSV graph_d data structure
#
CSV_KEY_REGION_ID = nft.DEF_REGION_ID
CSV_KEY_TEXT = nft.DEF_TEXT
CSV_KEY_COORDINATES = nft.DEF_COORDINATES
CSV_KEY_CONFIDENCE = nft.DEF_CONFIDENCE
CSV_KEY_TISSUE_TYPE = nft.DEF_TISSUE_TYPE
CSV_KEY_MICRON_LENGTH = nft.DEF_MICRON_LENGTH
CSV_KEY_MICRON_AREA = nft.DEF_MICRON_AREA
CSV_KEY_LENGTH = nft.DEF_LENGTH
CSV_KEY_AREA = nft.DEF_AREA
CSV_KEY_GEOM_PROPS = nft.DEF_GEOM_PROPS

# define constants for accessing the CSV header_d data structure
#
CSV_KEY_TISSUE = nft.DEF_TISSUE
CSV_KEY_HEIGHT = nft.DEF_HEIGHT
CSV_KEY_WIDTH = nft.DEF_WIDTH
CSV_KEY_MICRONS = nft.DEF_MICRONS
CSV_KEY_BNAME = nft.DEF_BNAME

#-------------------------------------------------------------------------------
# define constants for accessing the XML graph_d data structure
#
XML_KEY_REGION_ID = nft.DEF_REGION_ID
XML_KEY_TEXT = nft.DEF_TEXT
XML_KEY_COORDINATES = nft.DEF_COORDINATES
XML_KEY_CONFIDENCE = nft.DEF_CONFIDENCE
XML_KEY_TISSUE_TYPE = nft.DEF_TISSUE_TYPE
XML_KEY_MICRON_LENGTH = nft.DEF_MICRON_LENGTH
XML_KEY_MICRON_AREA = nft.DEF_MICRON_AREA
XML_KEY_LENGTH = nft.DEF_LENGTH
XML_KEY_AREA = nft.DEF_AREA
XML_KEY_GEOM_PROPS = nft.DEF_GEOM_PROPS

# define constants for the XML header_d data structure
#
XML_KEY_TISSUE = nft.DEF_TISSUE
XML_KEY_HEIGHT = nft.DEF_HEIGHT
XML_KEY_WIDTH = nft.DEF_WIDTH
XML_KEY_MICRONS = nft.DEF_MICRONS
XML_KEY_BNAME = nft.DEF_BNAME

#******************************************************************************
#
# define constants specific to DPATH's XML class
#
#------------------------------------------------------------------------------
# define constants for accessing xml data and
# writing data to xml files
#
XML_ATTR_ANNOTATIONS = "Annotations"
XML_ATTR_MICRONS_PER_PIXEL = "MicronsPerPixel"
XML_ATTR_ANNOTATION = "Annotation"
XML_ATTR_ATTRIBUTE = "Attribute"
XML_ATTR_VALUE = "Value"
XML_ATTR_REGION_ATTRIBUTE_HEADERS = "RegionAttributeHeaders"
XML_ATTR_ATTRIBUTE_HEADER = "AttributeHeader"
XML_ATTR_COLUMN_WIDTH = "ColumnWidth"
XML_ATTR_LENGTH = "Length"
XML_ATTR_AREA = "Area"
XML_ATTR_LENGTH_MICRONS = "LengthMicrons"
XML_ATTR_AREA_MICRONS = "AreaMicrons"
XML_ATTR_TEXT = nft.DEF_XML_TEXT.capitalize()
XML_ATTR_REGIONS = "Regions"
XML_ATTR_REGION = "Region"
XML_ATTR_ATTRIBUTES = "Attributes"
XML_ATTR_VERTICES = "Vertices"
XML_ATTR_VERTEX = "Vertex"
XML_ATTR_X = "X"
XML_ATTR_Y = "Y"
XML_ATTR_Z = "Z"
XML_ATTR_PLOTS = "Plots"
XML_ATTR_VERSION = '<?xml version="1.0" ?>\n'
XML_ATTR_WIDTH = "Width"
XML_ATTR_HEIGHT = "Height"
XML_ATTR_ID = "Id"
XML_ATTR_NAME = "Name"
XML_ATTR_READ_ONLY = "ReadOnly"
XML_ATTR_NAME_READ_ONLY = "NameReadOnly"
XML_ATTR_LINE_COLORR = "LineColorReadOnly"
XML_ATTR_INCREMENTAL = "Incremental"
XML_ATTR_TYPE = "Type"
XML_ATTR_LINE_COLOR = "LineColor"
XML_ATTR_VISIBLE = "Visible"
XML_ATTR_MARKUP = "MarkupImagePath"
XML_ATTR_MACRO = "MacroName"
XML_ATTR_ZOOM = "Zoom"
XML_ATTR_TISSUE = "Tisse"
XML_ATTR_TISSUE_VALUE = "Tissue Value"
XML_ATTR_SELECTED = "Selected"
XML_ATTR_IMAGE_LOCATION = "ImageLocation"
XML_ATTR_IMAGE_FOCUS = "ImageFocus"
XML_ATTR_NEGATIVE_ROA = "NegativeROA"
XML_ATTR_INPUT_REGION_ID = "InputRegionId"
XML_ATTR_ANALYZE = "Analyze"
XML_ATTR_DISPLAY_ID = "DisplayId"

#-------------------------------------------------------------------------------
# define default values for writing xml data metadata/options/permissions
#
DEF_BOOL_TRUE = str(int(1))
DEF_BOOL_FALSE = str(int(0))
DEF_ANN_COLOR = str(int(65280))
DEF_IMG_FOCUS = str(int(-1))
DEF_COLUMN_WIDTH = DEF_IMG_FOCUS
DEF_TYPE_OP = str(int(4))
DEF_ID_OP = DEF_BOOL_TRUE
DEF_INT_OP = DEF_BOOL_FALSE
DEF_STR_OP = nft.DELIM_NULL
DEF_FLOAT_OP = str(float(0.0))

#-------------------------------------------------------------------------------
# define generic dictionaries used for writing to xml files
# the variables these dictionaries hold define image scope
# permissions
#
DEF_XML_ANN = {XML_ATTR_ID: DEF_ID_OP,
               XML_ATTR_NAME : DEF_STR_OP,
               XML_ATTR_NAME_READ_ONLY : DEF_BOOL_FALSE,
               XML_ATTR_READ_ONLY : DEF_BOOL_FALSE,
               XML_ATTR_LINE_COLORR : DEF_INT_OP,
               XML_ATTR_INCREMENTAL: DEF_BOOL_FALSE,
               XML_ATTR_TYPE : DEF_TYPE_OP,
               XML_ATTR_LINE_COLOR : DEF_ANN_COLOR,
               XML_ATTR_VISIBLE : DEF_BOOL_TRUE,
               XML_ATTR_SELECTED : DEF_BOOL_TRUE,
               XML_ATTR_MARKUP : DEF_STR_OP,
               XML_ATTR_MACRO : DEF_STR_OP}

DEF_XML_REG = {XML_ATTR_TYPE : DEF_INT_OP,
               XML_ATTR_ZOOM : DEF_FLOAT_OP,
               XML_ATTR_SELECTED : DEF_BOOL_FALSE,
               XML_ATTR_IMAGE_LOCATION : DEF_STR_OP,
               XML_ATTR_IMAGE_FOCUS : DEF_IMG_FOCUS,
               XML_ATTR_NEGATIVE_ROA : DEF_INT_OP,
               XML_ATTR_INPUT_REGION_ID : DEF_INT_OP,
               XML_ATTR_ANALYZE : DEF_BOOL_TRUE}

#-------------------------------------------------------------------------------
# define xml parsing-related variables
#
XML_REGION_PATH = "Annotation/Regions/Region"
XML_REGION_ATTR_PATH = "Attributes/Attribute"
XML_ATTR_PATH = "Annotation/Attributes/Attribute"
XML_CONFIDENCE = 1.0000

#******************************************************************************
#
# define constants specific to DPATH's CSV class
#
#------------------------------------------------------------------------------
# define constants for accessing csv data
# and printing to csv files
#
CSV_ATTR_ROW = "row"
CSV_ATTR_INDEX = "index"
CSV_ATTR_COLUMN = "column"
CSV_ATTR_DEPTH = "depth"
CSV_ATTR_COORDS = nft.DEF_XML_COORDS
CSV_ATTR_CONFIDENCE = nft.DEF_XML_CONFIDENCE
CSV_ATTR_TEXT = nft.DEF_XML_TEXT
CSV_ATTR_REGION_ID = nft.DEF_XML_REGION_ID
CSV_ATTR_TISSUE_VALUE = nft.DEF_XML_TISSUE_TYPE
CSV_ATTR_TISSUE = 'tissue'
CSV_ATTR_NULL = "null"
CSV_ATTR_LENGTH = XML_ATTR_LENGTH
CSV_ATTR_AREA = XML_ATTR_AREA
CSV_ATTR_AREA_MICRONS = XML_ATTR_AREA_MICRONS
CSV_ATTR_LENGTH_MICRONS = XML_ATTR_LENGTH_MICRONS
CSV_ATTR_BNAME = "bname"
CSV_ATTR_MICRONS = XML_ATTR_MICRONS_PER_PIXEL
CSV_ATTR_VERSION = "# version = csv_v1.0.0"
CSV_ATTR_VERTICES = XML_ATTR_VERTICES
CSV_ATTR_CONFIDENCE = nft.DEF_XML_CONFIDENCE
CSV_ATTR_TEXT = nft.DEF_XML_TEXT.capitalize()
CSV_ATTR_TISSUE_VALUE = "Tissue Value"
CSV_ATTR_RINDEX = nft.DEF_XML_REGION_ID
CSV_ATTR_LABEL = nft.DEF_XML_LABEL

#-------------------------------------------------------------------------------
# define the string to check the file header
#
CSV_FMT_VERSION = CSV_ATTR_VERSION
CSV_FMT_EXT = nft.DEF_EXT_CSV

#******************************************************************************
#
# define DPATH format strings for printing/writing
#
#------------------------------------------------------------------------------
# define dpath output formats
#
DEF_FMT_REGION = " Region %s:\n"
DEF_FMT_DIMEN = " width = %s pixels, height = %s pixels\n"
DEF_FMT_HEAD = " %s = %s\n"
DEF_FMT_ITEM = "  %s = %s\n"
DEF_FMT_VERTS = "  %s: min_x = %s, max_x = %s, min_y = %s, max_y = %s\n"
DEF_FMT_HEADER = "index,region_id,tissue,label,coord_index," + \
    "row,column,depth,confidence,Length,Area,LengthMicrons,AreaMicrons"

#******************************************************************************
#
# define DPATH regex objects for data extraction
#
#------------------------------------------------------------------------------
# define regular expressions for headers/comments
#
DEF_REGEX_COMMENT = re.compile(f'(# [a-z].+?(?=\n))', re.IGNORECASE)
DEF_REGEX_MICRON = re.compile(f'(MicronsPerPixel) = (\d+.\d+)')
DEF_REGEX_WIDTH = re.compile(f'(width) = (\d+)')
DEF_REGEX_HEIGHT = re.compile(f'(height) = (\d+)')
DEF_REGEX_TISSUE = re.compile(
    r'^\s*#\s*tissue\s*=\s*'
    r'([^,\s]+(?:\s*,\s*[^,\s]+)*)'
    r'\s*$',
    re.IGNORECASE
)

#*******************************************************************************
#
# define default graph/header values here
#
#-------------------------------------------------------------------------------

# define default header values
#
DEF_HEADER_TISSUE = "unkown"
DEF_HEADER_MICRONS = float(0.502200)
DEF_HEADER_WIDTH = int(0)
DEF_HEADER_HEIGHT = int(0)

# define default graph values
#
DEF_GRAPH_TISSUE = "unkown"

#*******************************************************************************
#
# define all class types below
#
#-------------------------------------------------------------------------------
DEF_BCKG = 'bckg'
DEF_NORM = 'norm'
DEF_NULL = 'null'
DEF_ARTF = 'artf'
DEF_NNEO = 'nneo'
DEF_INFL = 'infl'
DEF_SUSP = 'susp'
DEF_INDC = 'indc'
DEF_DCIS = 'dcis'
DEF_CLASSES = [
    DEF_BCKG,
    DEF_NORM,
    DEF_NULL,
    DEF_ARTF,
    DEF_NNEO,
    DEF_INFL,
    DEF_SUSP,
    DEF_INDC,
    DEF_DCIS,
]

# define a default parameter to int map
#
DEF_CLASS_MAP = {
    DEF_NORM: 0,
    DEF_ARTF: 1,
    DEF_NNEO: 2,
    DEF_INFL: 3,
    DEF_SUSP: 4,
    DEF_DCIS: 5,
    DEF_INDC: 6,
    DEF_NULL: 7,
    DEF_BCKG: 8
}

# define keys to access the label map file parameters
#
DPATH_LABEL_MAP = "DPATH_LABEL_MAP"
DPATH_PRIORITY_MAP = "DPATH_PRIORITY_MAP"

# declare a global debug object so we can use it in functions and classes
#
dbgl = ndt.Dbgl()

#------------------------------------------------------------------------------
#
# functions listed here
#
#------------------------------------------------------------------------------

def validate_AnnGrDpath(graph, header):
    """
    function: validate_graph

    arguments:
     graph: a dictionary with AnnGrDpath graph data
     header: header dict used for bounds checking

    return:
     status: boolean value indicating graph validity

    description:
     This function validates that each region entry has the expected
     fields and types. Coordinates are assumed to be (x, y, z) for
     all file types. If header is provided and valid, the function
     checks that all (x, y) fall inside [0, width) x [0, height).
    """

    # display debugging information
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s: validating graph" %
              (__FILE__, ndt.__LINE__, ndt.__NAME__))

    # graph must be a dict
    #
    if not isinstance(graph, dict):
        print("Error: %s (line: %s) %s: graph is not a dict" %
              (__FILE__, ndt.__LINE__, ndt.__NAME__))
        return False

    # graph must be a dict
    #
    if not isinstance(header, dict):
        print("Error: %s (line: %s) %s: graph is not a dict" %
              (__FILE__, ndt.__LINE__, ndt.__NAME__))
        return False

    # bounds from header
    #
    try:
        w = int(header[CKEY_WIDTH])
        h = int(header[CKEY_HEIGHT])
    except Exception:
        return False

    # helper: test (x, y) inside image bounds
    #
    def _fits(coords, w, h):
        for c in coords:
            try:
                x, y = int(c[0]), int(c[1])
            except Exception:
                return False
            if x < 0 or y < 0 or x > w or y > h:
                return False
        return True

    # required per-region keys
    #
    req = [CKEY_REGION_ID, CKEY_TEXT, CKEY_COORDINATES, CKEY_CONFIDENCE,
           CKEY_TISSUE_TYPE, CKEY_GEOM_PROPS]

    # iterate regions
    #
    for idx, reg in graph.items():

        # index must be an int
        #
        if not isinstance(idx, int):
            print("Error: %s (line: %s) %s: index not int (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False

        # region must be a dict with required keys
        #
        if not isinstance(reg, dict):
            print("Error: %s (line: %s) %s: region not dict (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False
        missing = [k for k in req if k not in reg]
        if missing:
            print("Error: %s (line: %s) %s: missing keys %s (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   ", ".join(missing), idx))
            return False

        # region_id
        #
        try:
            rid = int(reg[CKEY_REGION_ID])
            if rid < 0:
                raise ValueError("rid < 0")
        except Exception as e:
            print("Error: %s (line: %s) %s: region_id invalid (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, e))
            return False

        # text/label
        #
        if not isinstance(reg[CKEY_TEXT], str):
            print("Error: %s (line: %s) %s: text not str (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False

        # coordinates must be non-empty list of triplets (x, y, z)
        #
        coords = reg[CKEY_COORDINATES]
        if not isinstance(coords, (list, tuple)) or len(coords) == 0:
            print("Error: %s (line: %s) %s: coords empty (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False
        for c in coords:
            if (not isinstance(c, (list, tuple))) or len(c) < 3:
                print("Error: %s (line: %s) %s: coord bad (idx=%d)" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
                return False
            try:
                int(c[0]); int(c[1]); int(c[2])
            except Exception:
                print("Error: %s (line: %s) %s: coord types (idx=%d)" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
                return False

        # optional bounds check if header ok
        #
        if w is not None and h is not None:
            if not _fits(coords, w, h):
                print("Error: %s (line: %s) %s: coords OOB (idx=%d)" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
                return False

        # confidence in [0, 1]
        #
        try:
            conf = float(reg[CKEY_CONFIDENCE])
            if conf < 0.0 or conf > 1.0:
                raise ValueError("confidence out of range")
        except Exception as e:
            print("Error: %s (line: %s) %s: confidence invalid (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, e))
            return False

        # tissue_type
        #
        if not isinstance(reg[CKEY_TISSUE_TYPE], str):
            print("Error: %s (line: %s) %s: tissue not str (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False

        # geom props: must be a dict with non-negative floats if present
        #
        gp = reg[CKEY_GEOM_PROPS]
        if gp is None:
            print("Error: %s (line: %s) %s: geom props missing (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False
        if not isinstance(gp, dict):
            print("Error: %s (line: %s) %s: geom not dict (idx=%d)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, idx))
            return False
        for k in [CKEY_LENGTH, CKEY_AREA,
                  CKEY_MICRON_LENGTH, CKEY_MICRON_AREA]:
            if k in gp:
                try:
                    if float(gp[k]) < 0.0:
                        raise ValueError("neg value")
                except Exception as e:
                    print("Error: %s (line: %s) %s: geom bad (%s)" %
                          (__FILE__, ndt.__LINE__, ndt.__NAME__, e))
                    return False

    # exit gracefully
    #
    return True
#
# end of function

def remap_labels(graph, label_map):
    """
    function: remap_labels

    arguments:
      graph: original annotation graph (index -> region-dict)
      label_map: dict from TOML under 'DPATH_LABEL_MAP', e.g.
                 {
                   "MAP": {
                     "NULL": ["null","norm"],
                     "BCKG": ["bckg"],
                    ...
                   }
                 }

    return:
      new_graph: deep-copied graph with CKEY_TEXT replaced by the integer index

    description:
      1) Build raw -> target and target -> index lookups from the two subtables.
      2) Walk a deep copy of the graph, collapsing each regions text
         first to the target string, then to its numeric code.
    """

    # build raw_label -> target_label
    #
    raw2tgt = {
        raw: tgt
        for tgt, raws in label_map.items()
        for raw in raws
    }

    # deep-copy so we dont modify the original
    #
    new_graph = copy.deepcopy(graph)

    # apply both mappings
    #
    for idx, region in new_graph.items():
        raw = region.get(CKEY_TEXT)
 
        if raw in raw2tgt:

            # fetch target
            #
            tgt = raw2tgt[raw]

            # replace text with target (string)
            #
            region[CKEY_TEXT] = tgt

    # exit gracefully
    #  return augmented graph
    #
    return new_graph
#
# end of function

def re_index(graph):
    """
    method: re_index

    arguments:
     graph: graph data structure

    return:
     re-indexed graph object

    description:
     this method re indexes the graph
    """

    # re-index graph
    #
    new_graph = dict(enumerate(graph.values()))

    # return new graph
    #
    return new_graph

#
# end of method

def write_data_to_file(graph, width, height, ofile):
    """
    function: write_data_to_file
    
    arguments:
     graph: annotation graph to write
     width: the image width
     height: the image height
     ofile: the output file name

    return: None

    description:
     Builds an AnnDpath object with provided graph and header,
     then writes to a DPATH CSV-style annotation file.
    """

    # display debug information
    #
    if dbgl > ndt.BRIEF:
        print("%s (line: %s) %s::%s: preparing output file data (%s)" %
              (__FILE__, ndt.__LINE__, DPATHResNetDecode.__CLASS_NAME__,
               ndt.__NAME__, ann))
        
    # instantiate annotation writer
    #
    ann = AnnDpath()
    
    # set annotation type to ANN
    #
    ann.set_type(nft.DEF_EXT_CSV)
    
    # prepare header dictionary
    #
    header = {
        CKEY_BNAME: ofile,
        CKEY_MICRONS: DEF_HEADER_MICRONS,
        CKEY_WIDTH: width,
        CKEY_HEIGHT: height,
        CKEY_TISSUE: [DEF_HEADER_TISSUE]
    }
    
    # set header on annotation
    #
    ann.set_header(header)
    
    # set graph on annotation
    #
    ann.set_graph(graph)
    
    # write annotation to file
    #
    ann.write(ofile)
#
# end of function

#------------------------------------------------------------------------------
#
# + Classes are listed here:
#   There are four classes in this file arranged in this hierarchy
#   AnnGrDpath -> {Csv, Xml} -> AnnDpath
#
# + Breakdown of Ann_DPATH_Tools:
#
#   AnnGrDpath : The basic data structure that that every other class uses
#   Csv        : The class that deals with Csv files
#   Xml        : The class that deals with Xml files
#   AnnDpath   : This is a wrapper for all the other classes.
#                You would ONLY need to instantiate this class.
#
#   Between the two classes {Csv, Xml}, each of the classes share
#   a common method that has the same name (it is important that their name is
#   is the same for AnnDpath to work).
#
#   Here are the common methods:
#    + load()
#    + write()
#    + create()
#    + add()
#    + validate()
#    + print_events_from_files()
#    + delete()
#    + get_graph()
#    + set_graph()
#    + get_graph()
#    + set_graph()
#    + delete_header()
#    + delete_graph()
#
#   Nedc_dpath_ann_tools works by using the AnnDpath class to automatically call
#   correct method for the correct file type. So DO NOT REMOVE any of the
#   common method pointed out above. Additionally, AnnDpath relies on the
#   FTYPE_OBJECTS dictionary at the bottom of this file. Please look into changing
#   this dictionary before changing the AnnDpath class.
#
# + Graphing Object Structure:
#
#   Below is the returning Graphing Object Structure:
#
#    graph = { index : { region_id : val, text : val, coordinates : val_list,
#                        confidence : val, tissue_type : val, geom_props : val_dict
#                       } }
#
#    index: int
#    region_id: int
#    text: string
#    coordinates: list of list of x, y, z components represented as integers
#    confidence:  float
#    tissue_type: string
#    geom_props : dictionary of geometric properties (length, area, ...)
#
#   Ex:
#    graph = { 0 { region_id : 1, text : bckg, coordinates : [ [1, 2, 3] ],
#                  confidence : 1.0, tissue_type : breast,
#                  geom_props : {Length : 0.0, Area : 0.0,
#                                LengthMicrons : 0.0, AreaMicorns : 0.0} } }
#
# + header object structure
#
#   Below is the returning header data dictionary
#
#    header = {bname : val,
#              MicronsPerPixel : val
#              width : val
#              height : val
#              tissue : val_list}
#
#    bname: string
#    MicronsPerPixel: int
#    width: int
#    height: int
#    tissue: list of strings representing all annotated tissue's
#
#------------------------------------------------------------------------------

class AnnGrDpath:
    """
    Class: AnnGrDpath

    description:
     This class implements the main data structure used to hold an annotation.
    """

    def __init__(self):
        """
        method: constructor

        arguments:
         none

        return:
         none

        description:
         This method initializes the internal data structure class
        """

        # set the class name
        #
        AnnGrDpath.__CLASS_NAME__ = self.__class__.__name__

        # declare a dictionary to hold header data
        #
        self.header_d = {}

        # declare a dictionary to hold annotations
        #
        self.graph_d = {}
    #
    # end of method

    def set_graph(self, graph):
        """
        method: set_graph

        arguments:
         graph: a graph object

        return:
         boolean value indicating status

        description:
         this method sets the graph object
        """

        # set new graph object
        #
        self.graph_d = graph

        # exit gracefully
        #
        return True
    #
    # end of method

    def set_header(self, header):
        """
        method: set_header

        arguments:
         header: a header object

        return:
         boolean value indicating status

        description:
         this method sets the header object
        """

        # set new graph object
        #
        self.header_d = header

        # exit gracefully
        #
        return True
    #
    # end of method

    def get_graph(self):
        """
        method: get_graph

        arguments:
         none

        return:
         a graph object

        description:
         this method fetches the graph object
        """

        # exit gracefully
        # return graph object
        #
        return self.graph_d
    #
    # end of method

    def get_header(self):
        """
        method: get_header

        arguments:
         none

        return:
         a header object

        description:
         this method fetches the header object
        """

        # exit gracefully
        # return header object
        #
        return self.header_d
    #
    # end of method

    def delete_graph(self):
        """
        method: delete_graph

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the graph object to empty
        """

        # set graph to empty
        #
        self.graph_d = {}

        # exit gracefully
        #
        return True
    #
    # end of method

    def delete_header(self):
        """
        method: delete_header

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the header object to empty
        """

        # set header to empty
        #
        self.header_d = {}

        # exit gracefully
        #
        return True
    #
    # end of method

    def delete(self, region):
        """
        method: delete

        arguments:
         region: the region to delete

        return:
         boolean value indication stats

        description:
         this method deletes from graph_d a region
        """

        # attempt to remove region within index
        #
        # loop over all indexes of graph_d
        #
        for index in self.graph_d:

            # if the index contains the region id
            #
            if region == self.graph_d[index][CKEY_REGION_ID]:

                # set index to empty
                #
                self.graph_d.pop(index)

                # re-index graph after popping
                #
                self.graph_d = re_index(self.graph_d)

                # exit gracefully
                #
                return True

        # exit ungracefully
        #  failed to delete region
        #
        return False

    #
    # end of method

    def add(self,
            index,
            region_id,
            text,
            coordinates,
            confidence,
            tissue_type,
            geom_props):
        """
        method: add

        arguments:
         index: the integer index to add
         region_id: the integer region_id to add
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method adds an annotation to the underlying graph_d object
        """

        # attempt to access graph at index level
        #
        try:

            # access self.graph_d[index]
            #
            self.graph_d[index]

            # attempt to access graph at index, region_id level
            #
            try:

                # access self.graph_d[index][CKEY_REGION_ID]
                #
                self.graph_d[index][CKEY_REGION_ID]

            # if attempting to access region_id failed complete graph
            #
            except:

                self.graph_d[index][CKEY_REGION_ID] = region_id


            # attempt to access graph at index, label level
            #
            try:

                # access self.graph_d[index][CKEY_TEXT]
                #
                self.graph_d[index][CKEY_TEXT]

            # if attempting to access label failed complete graph
            #
            except:

                self.graph_d[index][CKEY_TEXT] = text


            # attempt to access graph at index, coordinates level
            #
            try:

                # access self.graph_d[index][CKEY_COORDINATES]
                #
                coords = self.graph_d[index][CKEY_COORDINATES]

                # if accessing self.graph_d[index][CKEY_COORDINATES]
                # was successful append new coordinates to the coords
                # section
                #
                if coords != coordinates:
                    self.graph_d[index][CKEY_COORDINATES] \
                        .append(set(coordinates) - set(coords))

            # if attempting to access coordinates failed complete graph
            #
            except:

                self.graph_d[index][CKEY_COORDINATES] = coordinates


            # attempt to access graph at index, confidence level
            #
            try:

                # access self.graph_d[index][CKEY_CONFIDENCE]
                #
                self.graph_d[index][CKEY_CONFIDENCE]

            # if attempting to access confidence failed complete graph
            #
            except:

                self.graph_d[index][CKEY_CONFIDENCE] = confidence


            # attempt to access graph at index, tissue_type level
            #
            try:

                # access self.graph_d[index][CKEY_TISSUE_TYPE]
                #
                self.graph_d[index][CKEY_TISSUE_TYPE]

            # if attempting to access tissue_type failed complete graph
            #
            except:

                self.graph_d[index][CKEY_TISSUE_TYPE] = tissue_type

            # attempt to access graph at index, geom props level
            #
            try:

                # if geom props is empty add info
                #
                if self.graph_d[index][CKEY_GEOM_PROPS] is None:

                    # add length
                    #
                    self.graph_d[index][CKEY_GEOM_PROPS] = geom_props

            # if attempting to access geom_props failed complete graph
            except:

                self.graph_d[index][CKEY_GEOM_PROPS] = geom_props


        # if attempting to access index failed complete graph
        #
        except:

            # initialize index
            #
            self.graph_d[index] = {}

            # initialize region_id
            #
            self.graph_d[index][CKEY_REGION_ID] = region_id

            # initialize label
            #
            self.graph_d[index][CKEY_TEXT] = text

            # initialize coordinates
            #
            self.graph_d[index][CKEY_COORDINATES] = coordinates

            # initialize confidence
            #
            self.graph_d[index][CKEY_CONFIDENCE] = confidence

            # initialize tissue_type
            #
            self.graph_d[index][CKEY_TISSUE_TYPE] = tissue_type

            # initialize geom_props
            #
            self.graph_d[index][CKEY_GEOM_PROPS] = geom_props

        # exit gracefully
        #
        return True
    #
    # end of method

    def create(self,
               index,
               region_id,
               text,
               coordinates,
               confidence,
               tissue_type,
               geom_props):
        """
        method: create

        arguments:
         index: the integer index to create
         region_id: the integer region_id to create
         text: the text to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method creates an annotation to the underlying graph_d object
        """

        # initialize index
        #
        self.graph_d[index] = {}

        # initialize region_id
        #
        self.graph_d[index][CKEY_REGION_ID] = region_id

        # initialize label
        #
        self.graph_d[index][CKEY_TEXT] = text

        # initialize coordinates
        #
        self.graph_d[index][CKEY_COORDINATES] = coordinates

        # initialize confidence
        #
        self.graph_d[index][CKEY_CONFIDENCE] = confidence

        # initialize tissue_type
        #
        self.graph_d[index][CKEY_TISSUE_TYPE] = tissue_type

        # initialize geometric properties dictionary
        #
        self.graph_d[index][CKEY_GEOM_PROPS] = geom_props

        # exit gracefully
        #
        return True

#
# end of class

class AnnDpath:
    """
    Class: AnnDpath

    description:
     This class is the main class of this file. It contains methods to
     manipulate the set of supported annotation file formats (xml/csv).
    """

    def __init__(self, *, schema = DEF_XML_SCHEMA):
        """
        method: constructor

        arguments:
         schema : an schema file

        return:
         none

        description:
         This method constructs an AnnDpath object.
        """

        # set the class name
        #
        AnnDpath.__CLASS_NAME__ = self.__class__.__name__

        # display debug information
        #
        if dbgl == ndt.FULL:
            print("%s (line: %s) %s::%s: constructing an annotation object" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # initialize schema file path place holder
        #
        self.schema = schema

        # instantiate FTYPES variable
        #
        self.ftype_obj_d = copy.deepcopy(FTYPE_OBJECTS)

        # initialize all sub classes
        #
        for type_name in self.ftype_obj_d.keys():

            # initialize type_name class
            #
            self.ftype_obj_d[type_name][1].__init__(schema = schema)

        # declare variable to store type of annotations
        #
        self.type_d = None
    #
    # end of method

    def get_type(self, fname):
        """
        method: get_type

        arguments:
         fname: the file name

        return:
         the annotation type

        description:
         This method finds the annotation type of fname
         or returns none if it does not exist
        """

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: getting ann type (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))

        # loop over all class's in ftype_obj_d
        #
        for type_name in self.ftype_obj_d.keys():

            # check if file is of type fname
            #
            if self.ftype_obj_d[type_name][1].validate(fname):

                # if file is of type fname return class name
                #
                return type_name

        # if type_name is not found return none
        #
        return None
    #
    # end of method
    
    def load(self, fname, schema = DEF_XML_SCHEMA ):
        """
        function: load

        arguments:
         fname: the file to be processed
         schema: optional schema file

        return:
         boolean value indicating status

        description:
         This method loads an annotation file from disk.
        """

        # re-instantiate FTYPES variable
        #
        self.ftype_obj_d = copy.deepcopy(FTYPE_OBJECTS)

        # fetch schema file if present for re-initialization
        #
        if schema == DEF_XML_SCHEMA:
            schema = self.schema

        # re-initialize all sub classes
        #
        for type_name in self.ftype_obj_d.keys():

            # initialize type_name
            #
            self.ftype_obj_d[type_name][1].__init__(schema = schema)

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: fetching file type %s" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))

        # fetch and set type_d
        #
        self.type_d = self.get_type(fname)

        if self.type_d is None:
            print("Error: %s (line: %s) %s: unknown file type (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))
            return None

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: reading file %s" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))

        # load file
        #
        status = self.ftype_obj_d[self.type_d][1].load(fname)

        # exit gracefully
        #
        return status
    #
    # end of method

    def write(self, ofile):
        """
        function: write

        arguments:
         ofile: output file

        return:
         boolean value

        description:
         writes currently loaded annotation to ofile
        """

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: writing to file %s" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))

        # check if data has been loaded
        #
        if self.type_d is None:
            print("Error: %s (line: %s) %s: no annotations to write" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            return False

        # write contents to ofile
        #
        status = self.ftype_obj_d[self.type_d][1].write(ofile)

        # exit gracefully
        #
        return status

    #
    # end of method

    def print_events_from_file(self, fp = sys.stdout):
        """
        function: print_events_from_file

        arguments:
         fp: the output file pointer

        return:
         a boolean value indicating status

        description:
         This method displays annotation events in a readable format.
        """

        # ensure annotation is loaded
        #
        if self.type_d is None:
            print("Error: %s (line: %s) %s: no events to print" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            return False

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: printing events" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))

        # call lower level classes print_events_from_file method
        # if method returned false then print Error message and
        # return false
        #
        if not self.ftype_obj_d[self.type_d][1] \
                   .print_events_from_file(fp = sys.stdout):
            print("Error: %s (line: %s) %s: error printing events" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            return False

        # exit gracefully
        #
        return True

    #
    # end of method

    def add(self,
            index,
            region_id,
            text,
            coordinates,
            confidence,
            tissue_type,
            geom_props):
        """
        method: add

        arguments:
         index: the integer index to add
         region_id: the integer region_id to add
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: dictionary of geometric properties

        return:
         a boolean value indication status

        description:
         this method adds an annotation to the underlying graph_d object
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: addition to annotation " +
                  "(%s, %s, %s, %s, %s)" %
                (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                 ndt.__NAME__, index, region_id, text, coordinates,
                 confidence, tissue_type, geom_props))

        # attempt to add annotation
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].add(
                index, region_id, text, coordinates,
                confidence, tissue_type, geom_props
            )
            
        else:
            print("Error: %s (line: %s) %s: no annotations to add to" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def create(self,
               index,
               region_id,
               text,
               coordinates,
               confidence,
               tissue_type,
               geom_props):
        """
        method: create

        arguments:
         index: the integer index to add
         region_id: the integer region_id to add
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary containing geometric properties

        return:
         a boolean value indication status

        description:
         this method creates an annotation to the underlying graph_d object
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: creating event " +
                  "(%s, %s, %s ,%s, %s, %s, %s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, index, region_id, text, coordinates,
                   confidence, tissue_type, geom_props))

        # attempt to create annotation
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].create(
                index, region_id, text, coordinates,
                confidence, tissue_type, geom_props
            )
        else:
            print("Error: %s (line: %s) %s: no annotations to create" %
                 (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete(self, region):
        """
        method: delete

        arguments:
         region: region to delete

        return:
         a boolean value indicating status

        description:
         This method deletes an region
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: deleting (%s, %s, %s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, region))

        # attempt to delete annotation
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].delete(region)

        # exit gracefully
        #
        return status
    #
    # end of method

    def set_schema(self, schema):
        """
        method: set_schema

        arguments:
         schema: schema file to set

        return:
         a Boolean value indication status

        description:
         this method sets the schema file
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: setting schema (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, schema))

        # create boolean status variable
        #
        status = False

        # loop through all classes and set the
        # schema for file types that have them
        #
        for type in self.ftype_obj_d.keys():

            # if the class has a schema attribute call its set_schema method
            #
            if hasattr(self.ftype_obj_d[type][1], nft.DEF_SCHEMA):
                status = self.ftype_obj_d[type][1].set_schema(schema)

        # exit gracefully
        #
        return status
    #
    # end of method
    
    def validate(self, fname, schema = DEF_XML_SCHEMA):
        """
        method: validate

        arguments:
         fname: file to validate
         schema: a schema file

        return:
         a boolean value indicating status

        description:
         This method validate the file
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: validating file (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))

        # create status boolean variable
        #
        status = False

        # get type
        #
        type_s = self.get_type(fname)

        # check that type is not none
        #
        if type_s is None:
            if dbgl > ndt.BRIEF:
                print("Error: %s (line %s) %s: file type is unsupported" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__))
            return status

        # attempt to validate file
        #

        status = self.ftype_obj_d[type_s][1].validate(fname, schema = schema)

        # exit gracefully
        #
        return status
    #
    # end of method

    def set_type(self, ann_type):
        """
        method: set_type

        arguments:
         type: the type of ann object to set

        return:
         a boolean value indicating status

        description:
         This method sets the type and graph in type from self.type_d
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: setting file type" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # convert ann_type to a lowercase string
        #
        ann_type = ann_type.lower()

        # ensure ann type is in FTYPE_OBJECTS
        #
        if not ann_type in self.ftype_obj_d.keys():

            # print an error message and exit
            #
            print("Error: %s (line: %s) %s: %s (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   "Annotation type not supported", ann_type))
            sys.exit(os.EX_SOFTWARE)

        # attempt to set file type
        #
        if self.type_d is not None:

            # if annotation type is supported update ftype_obj_d and
            # change type_d
            #
            if ann_type in FTYPE_OBJECTS.keys():

                # update graph
                #
                graph_status = self.ftype_obj_d[ann_type][1] \
                                   .set_graph(self.ftype_obj_d \
                                              [self.type_d][1].get_graph())

                # update header
                #
                header_status = self.ftype_obj_d[ann_type][1] \
                                    .set_header(self.ftype_obj_d \
                                                [self.type_d][1].get_header())

                # update type_d
                #
                self.type_d = ann_type

            else:
                print("Error: %s (line: %s) %s: ann type not supported (%s)" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__, ann_type))

        # else if annotation type is none simply set the type
        #
        else:

            # set the annotation type
            #
            self.type_d = ann_type

        # exit gracefully
        #
        return True
    #
    # end of method

    def set_graph(self, graph):
        """
        method: set_graph

        arguments:
         graph: the graph to replace the current graph_d

        return:
         a boolean value indicating status

        description:
         This method sets graph_d to graph
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: setting graph (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, graph))

        # attempt to set graph
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].set_graph(graph)
        else:
            print("Error: %s (line: %s) %s: no graph to set" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def set_header(self, header):
        """
        method: set_header

        arguments:
         header: the header to replace the current header_d

        return:
         a boolean value indicating status

        description:
         This method sets header_d to header
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: setting header (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, header))

        # attempt to set header
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].set_header(header)
        else:
            print("Error: %s (line: %s) %s: no header to set" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete_graph(self):
        """
        method: delete_graph

        arguments:
         none

        return:
         boolean value indicating status

        description:
         deletes all of graph_d's contents
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: deleting graph" %
         nedc_130_         (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # attempt to delete graph
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].delete_graph()
        else:
            print("Error: %s (line: %s) %s: no graph to delete" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete_header(self):
        """
        method: delete_header

        arguments:
         none

        return:
         boolean value indicating status

        description:
         deletes all of header_d's contents
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: deleting header" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # attempt to delete header
        #
        if self.type_d is not None:
            status = self.ftype_obj_d[self.type_d][1].delete_header()
        else:
            print("Error: %s (line: %s) %s: no header to delete" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            status = False

        # exit gracefully
        #
        return status
    #
    # end of method

    def get_graph(self):
        """
        method: get_graph

        arguments:
         none

        return:
         the entire annotation graph

        description:
         This method returns the entire stored annotation graph
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: getting graph" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # attempt to get graph
        #
        if self.type_d is not None:
            graph = self.ftype_obj_d[self.type_d][1].get_graph()
        else:
            print("Error: %s (line: %s) %s: no graph to get" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            graph = None

        # exit gracefully
        #
        return graph
    #
    # end of method

    def get_header(self):
        """
        method: get_header

        arguments:
         none

        return
         file header information

        description:
         This method returns the files header information
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: getting header information" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__))

        # attempt to get header information
        #
        if self.type_d is not None:
            header = self.ftype_obj_d[self.type_d][1].get_header()
        else:
            print("Error: %s (line: %s) %s: no header to get" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__))
            header = None

        # exit gracefully
        #
        return header
    #
    # end of method

#
# end of class

class Xml:
    """
    Class: Xml

    description:
     This class abstracts xml processing.
    """

    def __init__(self, schema = DEF_XML_SCHEMA):
        """
        method: constructor

        arguments:
         schema: an xml schema file

        return:
         none

        description:
         This is a constructor for the xml class.
        """

        # set the class name
        #
        Xml.__CLASS_NAME__ = self.__class__.__name__

        # set the schema
        #
        self.schema = nft.get_fullpath(schema)

        # declare data object, to store annotations
        #
        self.data_d = AnnGrDpath()

    #
    # end of method

    def load(self, fname, confidence=XML_CONFIDENCE):
        """
         method: load
        
        arguments:
         fname: filename
         confidence: confidence value

        return:
         boolean value indicating status

        description:
         This method takes the filename and returns two dictionaries
         containing the xml header and data.
        """

        # clear left over graph/header info
        #
        self.data_d.header_d.clear()
        self.data_d.graph_d.clear()

        # validate and pull top-level header attributes
        #
        if not self.validate(fname):
            print("Error: %s (line: %s) %s: invalid xml file (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))
            return False
        
        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: loading xml file (%s)" %
                (__FILE__, ndt.__LINE__, Xml.__CLASS_NAME__,
                 ndt.__NAME__, fname))

        # create objects to hold data
        #
        graph = {}
        tissues = []

        # create temp helper variables
        #
        width = height = None
        bname = os.path.splitext(os.path.basename(fname))[0]
        idx = -1


        # iterate over the element tree parser output
        #
        for event, elem in etree.iterparse(fname, events=("start", "end")):
            
            # fetch header info
            #
            if event == "start" and elem.tag == XML_ATTR_ANNOTATIONS:
                microns = float(elem.get(XML_ATTR_MICRONS_PER_PIXEL))
                
            if (
                    event == "end"
                    and elem.tag == XML_ATTR_ATTRIBUTE
                    and elem.getparent().tag == XML_ATTR_ATTRIBUTES
                    and elem.getparent().getparent().tag == XML_ATTR_ANNOTATION
            ):
                name = elem.get("Name", "").lower()
                value = elem.get(XML_ATTR_VALUE)
                if name == "width":
                    width = int(value)
                elif name == "height":
                    height = int(value)
                        
            # fetch region info
            #
            if event == "end" and elem.tag == XML_ATTR_REGION:
                idx += 1
                
                rid = int(elem.get(XML_ATTR_ID))
                text = elem.get(XML_ATTR_TEXT)
                length = float(elem.get(XML_ATTR_LENGTH))
                area = float(elem.get(XML_ATTR_AREA))
                length_m = float(elem.get(XML_ATTR_LENGTH_MICRONS))
                area_m = float(elem.get(XML_ATTR_AREA_MICRONS))
                
                attr_elem = elem.find(f"{XML_ATTR_ATTRIBUTES}/{XML_ATTR_ATTRIBUTE}")
                tissue = (
                    attr_elem.get(XML_ATTR_VALUE)
                    if attr_elem is not None
                    else DEF_HEADER_TISSUE
                )
                
                # collect vertices present *inside* this Region only
                #
                coords = [
                    [
                        int(float(v.get(XML_ATTR_X))),
                        int(float(v.get(XML_ATTR_Y))),
                        int(float(v.get(XML_ATTR_Z))),
                    ]
                    for v in elem.findall(f"{XML_ATTR_VERTICES}/{XML_ATTR_VERTEX}")
                ]
                
                graph[idx] = {
                    XML_KEY_REGION_ID: rid,
                    XML_KEY_TEXT: text,
                    XML_KEY_COORDINATES: coords,
                    XML_KEY_CONFIDENCE: f"{float(confidence):.4g}",
                    XML_KEY_TISSUE_TYPE: tissue,
                    XML_KEY_GEOM_PROPS: {
                        XML_KEY_LENGTH: length,
                        XML_KEY_AREA: area,
                        XML_KEY_MICRON_LENGTH: length_m,
                        XML_KEY_MICRON_AREA: area_m,
                    },
                }

                # append tissue info
                #
                if tissue not in tissues:
                    tissues.append(tissue)

                # free memory
                #
                elem.clear()

        # create dpath header
        #
        self.data_d.header_d = {
            XML_KEY_MICRONS: DEF_HEADER_MICRONS,
            XML_KEY_BNAME: bname,
            XML_KEY_WIDTH: width if width is not None else DEF_HEADER_WIDTH,
            XML_KEY_HEIGHT: height if height is not None else DEF_HEADER_HEIGHT,
            XML_KEY_TISSUE: tissues or [DEF_HEADER_TISSUE],
        }
                
        # hand off to AnnGrDpath
        #
        self.set_graph(graph)

        # exit gracefully
        #  return status
        #
        return True

    #
    # end of method

    def validate(self, fname, schema = DEF_XML_SCHEMA):

        """
        method: validate

        arguments:
         fname: the file name
         schema: an xml schema

        return:
         a boolean value indicating status

        description:
         This method returns True if the parsed xml matches the schema.
        """

        # if schema is the default schema set
        # schema to schema path located in self.schema
        #
        if nft.get_fullpath(schema) == nft.get_fullpath(DEF_XML_SCHEMA):
            schema = self.schema

        # fetch schemes full file path
        #
        schema = nft.get_fullpath(schema)


        # create xml validator object
        #
        xml_validator = etree.XMLSchema(file = schema)

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: checking for xml (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))

        # attempt to parse the file
        #
        try:
            fp = etree.parse(fname)
        except OSError:
            print("Error: %s (line: %s) %s::%s: error opening file (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))
            return False
        except etree.XMLSyntaxError:
            if dbgl > ndt.BRIEF:
                print("Error: %s (line: %s) %s::%s: (%s) not an XML file" %
                      (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                       ndt.__NAME__, fname))
            return False

        # validate the xml file to the schema
        #
        if xml_validator.validate(fp):
            return True
        else:
            if dbgl > ndt.BRIEF:
                for err in xml_validator.error_log:

                    # print failure points
                    #
                    print("Schema validation failed: line %d: %s" %
                          (err.line, err.message))

                # print debug message
                #
                print("Error: %s (line: %s) %s::%s: processing error (%s)" %
                      (__FILE__, ndt.__LINE__,
                       AnnDpath.__CLASS_NAME__, ndt.__NAME__, fname))
                return False
    #
    # end of method

    def set_schema(self, new_schema):
        """
        method: set_schema

        arguments:
         new_schema: the new schema to set it

        return:
         boolean value indicating status

        description:
         This function sets the global schema variable
        """

        # display debug information
        #
        if dbgl > ndt.BRIEF:
                print("%s (line: %s) %s::%s: New Schema = %s" %
                      (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                       ndt.__NAME__, new_schema))

        # set new schema path
        #
        self.schema = nft.get_fullpath(new_schema)

        # exit gracefully
        #
        return True
    #
    # end of method

    def write(self, ofile):
        """
        method: write

        arguments:
         ofile: output file name

        return:
         boolean value indicating status

        description:
         this method writes the data to ofile in
         the standard schema format
        """

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: writing to file (%s)\n" %
                    (__FILE__, ndt.__LINE__, ndt.__NAME__, ofile))

        # get graph data
        #
        graph_data = self.get_graph()

        # get header data
        #
        header_data = self.get_header()

        # create the root
        #
        root = et.Element(XML_ATTR_ANNOTATIONS,
                          {XML_ATTR_MICRONS_PER_PIXEL :
                           header_data[CKEY_MICRONS]})

        # create blank annotation
        #
        annotation = et.SubElement(root, XML_ATTR_ANNOTATION, DEF_XML_ANN)

        # create attributes element
        #
        attributes = et.SubElement(annotation, XML_ATTR_ATTRIBUTES)

        # create attribute element containing width
        #
        attribute = et.SubElement(attributes, XML_ATTR_ATTRIBUTE,
                                  Name = XML_ATTR_WIDTH, Id = DEF_INT_OP,
                                  Value = str(header_data[XML_KEY_WIDTH]))

        # create attribute element containing height
        #
        attribute = et.SubElement(attributes, XML_ATTR_ATTRIBUTE,
                                  Name = XML_ATTR_HEIGHT, Id = DEF_INT_OP,
                                  Value = str(header_data[XML_KEY_HEIGHT]))

        # create regions element
        #
        regions = et.SubElement(annotation, XML_ATTR_REGIONS)

        # create region attribute header element
        #
        region_attr_headers = et.SubElement(regions,
                                            XML_ATTR_REGION_ATTRIBUTE_HEADERS)

        # create attribute header element
        #
        attr_header = et.SubElement(region_attr_headers,
                                    XML_ATTR_ATTRIBUTE_HEADER,
                                    Id = DEF_ID_OP,
                                    Name = XML_ATTR_TISSUE,
                                    ColumnWidth = DEF_COLUMN_WIDTH)

        # loop to write index specific data
        #
        for index in graph_data.keys():

            # fetch all needed region label data
            #
            region_id = str(graph_data[index][XML_KEY_REGION_ID])
            text = str(graph_data[index][XML_KEY_TEXT])
            geom_props = graph_data[index][XML_KEY_GEOM_PROPS]

            # changes all values in geom_props to strings
            #
            for key in geom_props.keys():
                geom_props[key] = str(geom_props[key])

            # create local copy of default region dictionary
            #
            region_dict = copy.deepcopy(DEF_XML_REG)

            # add all needed region label data to region dict
            #
            region_dict.update({XML_ATTR_ID : str(region_id),
                                XML_ATTR_TEXT : str(text),
                                XML_ATTR_DISPLAY_ID : str(region_id)})

            # update region dict to include geom_props
            #
            region_dict.update(geom_props)

            # create region data
            #
            region = et.SubElement(regions, XML_ATTR_REGION, region_dict)

            # create attributes element
            #
            attributes = et.SubElement(region, XML_ATTR_ATTRIBUTES)

            # create attribute element
            #
            attribute = et.SubElement(attributes,
                                      XML_ATTR_ATTRIBUTE,
                                      Name = DEF_INT_OP,
                                      Id = DEF_INT_OP,
                                      Value = str(graph_data[index] \
                                      [XML_KEY_TISSUE_TYPE]))

            # create vertices element
            #
            vertices = et.SubElement(region, XML_ATTR_VERTICES)

            # create vertex data
            #
            for coordinate in graph_data[index][XML_KEY_COORDINATES]:

                # fetch x, y, z coordinates
                #
                x_coord, y_coord, z_coord = coordinate

                # create vertex element
                #
                vertex = et.SubElement(vertices, XML_ATTR_VERTEX,
                                       X = str(x_coord),
                                       Y = str(y_coord),
                                       Z = str(z_coord))

        # add plot element
        #
        plots = et.SubElement(annotation, XML_ATTR_PLOTS)

        # encode root
        #
        xmlstr = et.tostring(root, encoding = nft.DEF_CHAR_ENCODING)

        # convert the string to a pretty print
        #
        reparsed = md.parseString(
            xmlstr).toprettyxml(indent=nft.DELIM_TAB)

        # remove xml version information
        #
        result = reparsed.replace(XML_ATTR_VERSION, nft.DELIM_NULL)

        # open the output file to write
        #
        with open(ofile, nft.MODE_WRITE_TEXT) as writer:

            # write the xml file
            #
            writer.write(result)

        # exit gracefully
        #
        return True

    #
    # end of method

    def print_events_from_file(self, fp = sys.stdout):
        """
        method: print_events_from_file

        arguments:
         fp: file pointer

        return:
         a boolean value indicating status

        description:
         This method pretty prints the xml annotation information.
        """

        if dbgl > ndt.BRIEF:
            fp.write("%s (line: %s) %s: printing events\n" %
                     (__FILE__, ndt.__LINE__, ndt.__NAME__))

        # check if there's a file pointer
        #
        if fp is not sys.stdout:

            # check if file pointer is valid
            #
            try:

                # check by attempting to open fp
                #
                fp = open(fp, nft.MODE_TEXT_WRITE)

            except:

                # file pointer is not valid print error message
                #
                print("Error: %s (line: %s) %s: %s (%s)" %
                      (__FILE__, ndt.__LINE__, ndt.__NAME__,
                       "Unable to find file", fp))
                sys.exit(os.EX_SOFTWARE)

        # get file data
        #
        graph_data = self.get_graph()
        header_data = self.get_header()

        # get the dimension
        #
        width = header_data[XML_KEY_WIDTH]
        height = header_data[XML_KEY_HEIGHT]

        # print the header
        #
        fp.write(DEF_FMT_HEAD %
                 (XML_ATTR_MICRONS_PER_PIXEL, (header_data[XML_KEY_MICRONS])))

        # Note: width = dimension[0], height = dimension[1]
        #
        fp.write(DEF_FMT_DIMEN %
                    (width, height))

        # print the region's information
        #
        for index in graph_data:

            x_vertices = list()
            y_vertices = list()

            for vertex in graph_data[index][XML_KEY_COORDINATES]:

                x_vertices.aTIppend(vertex[0])

                y_vertices.append(vertex[1])

            values = header_data[XML_KEY_TISSUE]

            geom_props = graph_data[index][XML_KEY_GEOM_PROPS]

            fp.write(DEF_FMT_REGION %
                     (graph_data[index][XML_KEY_REGION_ID]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_TISSUE_VALUE, values[0]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_LENGTH,
                      geom_props[XML_KEY_LENGTH]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_AREA,
                      geom_props[XML_KEY_AREA]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_LENGTH_MICRONS,
                      geom_props[XML_KEY_MICRON_LENGTH]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_AREA_MICRONS,
                      geom_props[XML_KEY_MICRON_AREA]))
            fp.write(DEF_FMT_ITEM %
                     (XML_ATTR_TEXT, graph_data[index][XML_KEY_TEXT]))
            fp.write(DEF_FMT_VERTS %
                     (XML_ATTR_VERTICES, min(x_vertices), max(x_vertices),
                      min(y_vertices), max(y_vertices)))

        # exit gracefully
        #
        return True

    #
    # end of method

    def set_graph(self, graph):
        """
        method: set_graph

        arguments:
         graph: a graph object

        return:
         boolean value indicating status

        description:
         this method sets the graph object
        """

        # attempt to set graph
        #
        status = self.data_d.set_graph(graph)

        # exit gracefully
        #
        return status

    #
    # end of method

    def set_header(self, header):
        """
        method: set_header

        arguments:
         header: a header object

        return:
         boolean value indicating status

        description:
         this method sets the header object
        """

        # attempt to set header
        #
        status = self.data_d.set_header(header)

        # exit gracefully
        #
        return status

    #
    # end of method

    def get_graph(self):
        """
        method: get_graph

        arguments:
         none

        return:
         a graph object

        description:
         this method fetches the graph object
        """

        # attempt to get graph data
        #
        graph_data = self.data_d.get_graph()

        # exit gracefully
        #
        return graph_data
    #
    # end of method

    def get_header(self):
        """
        method: get_header

        arguments:
         none

        return:
         a header object

        description:
         this method fetches the header object
        """

        # attempt to get header data
        #
        header_data = self.data_d.get_header()

        # exit gracefully
        #
        return header_data
    #
    # end of method

    def delete_graph(self):
        """
        method: delete_graph

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the graph object to empty
        """

        # attempt to delete graph
        #
        status = self.data_d.delete_graph()

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete_header(self):
        """
        method: delete_header

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the header object to empty
        """

        # attempt to delete header
        #
        status = self.data_d.delete_header()

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete(self, region):
        """
        method: delete

        arguments:
         region: the region to delete

        return:
         boolean value indication status

        description:
         this method deletes from graph_d a region
        """

        # attempt to delete a region
        #
        status = self.data_d.delete(region)

        # exit gracefully
        #
        return status
    #
    # end of method

    def add(self,
            index,
            region_id,
            text,
            coordinates,
            confidence,
            tissue_type,
            geom_props):
        """
        method: add

        arguments:
         index: the integer index to add
         region_id: the integer region_id to add
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method adds an annotation to the underlying graph_d object
        """

        # attempt to add an annotation
        #
        status = self.data_d.add(index, region_id, text, coordinates,
                                 confidence, tissue_type, geom_props)

        # exit gracefully
        #
        return status
    #
    # end of method

    def create(self,
               index,
               region_id,
               text,
               coordinates,
               confidence,
               tissue_type,
               geom_props):
        """
        method: create

        arguments:
         index: the integer index to create
         region_id: the integer region_id to create
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method creates an annotation to the underlying graph_d object
        """

        # attempt to create an annotation
        #
        status = self.data_d.create(index, region_id, text, coordinates,
                                    confidence, tissue_type, geom_props)

        # exit gracefully
        #
        return status

    #
    # end of method
#
# end of class

class Csv:
    """
    Class: Csv

    description:
     This class abstracts csv processing.
    """

    def __init__(self, schema = None):
        """
        method: constructor

        arguments:
         schema: unused

        return:
         none

        description:
         This is a constructor for the Csv class.
        """

        # set the class name
        #
        Csv.__CLASS_NAME__ = self.__class__.__name__

        # create instance out of internal structure
        #
        self.data_d = AnnGrDpath()
    #
    # end of method

    def load(self, fname):
        """
        method: load

        arguments:
         fname: filename

        return:
         boolean value indicating status

        description:
         This method takes the filename and returns two dictionaries
         containing the csv header and data.
        """

        # load header info
        #
        self.load_header(fname)
        
        # read the csv into a dataframe
        #
        try:
            df = pd.read_csv(
                fname,
                comment="#",
                usecols=[
                    "index","region_id","tissue","label",
                    "row","column","depth","confidence",
                    "Length","Area","LengthMicrons","AreaMicrons",
                ],
                dtype={
                    "index" : "int32",
                    "region_id" : "int32",
                    "tissue" : "category",
                    "label" : "string",
                    "row" : "int32",
                    "column" : "int32",
                    "depth" : "int8",
                    "confidence" : "float32",
                    "Length" : "float32",
                    "Area" : "float32",
                    "LengthMicrons" : "float32",
                    "AreaMicrons" : "float32",
                },
                engine="c",
                memory_map=True,          
                low_memory=False,         
                na_filter = False,        
            )
        except Exception as e:
            print("Error: %s (line: %s) %s: csv schema failure (%s)" %
                   (__FILE__, ndt.__LINE__, ndt.__NAME__, e))
            return False

        # split static vs. coordinate columns
        #
        stat_cols = [
            "region_id","tissue","label","confidence",
            "Length","Area","LengthMicrons","AreaMicrons",
        ]
        static = \
            (df.set_index("index")[stat_cols].groupby(level=0, sort=False)
                  .first())                              
        
        # coordinates as one ndarray per region, still vectorised
        #
        coords = (
            df[["index","row","column","depth"]]
            .to_numpy(dtype="int32")
        )

        # split the coordinate vector by region
        #
        counts = (df["index"].value_counts(sort=False)
                  .sort_index().to_numpy())
        coord_arrays = np.split(coords[:, 1:], counts.cumsum()[:-1])

        # normalize to (x, y, z) = (row, column, depth)
        # csv rows are [row, column, depth].
        #
        coord_arrays = [a[:, [0, 1, 2]] for a in coord_arrays]

        # assemble the graph dict
        #
        graph = {}

        # iterate over static/coordinate info
        #
        for idx, arr in zip(static.index.to_numpy(), coord_arrays):

            # fetch regions static info
            #
            s = static.loc[idx]
            graph[int(idx)] = {
                CSV_KEY_REGION_ID: int(s.region_id),
                CSV_KEY_TEXT: s.label,
                CSV_KEY_COORDINATES: arr.tolist(),      
                CSV_KEY_CONFIDENCE: f"{float(s.confidence):.4g}",
                CSV_KEY_TISSUE_TYPE: s.tissue,
                CSV_KEY_GEOM_PROPS: {
                    CSV_KEY_LENGTH: float(s.Length),
                    CSV_KEY_AREA: float(s.Area),
                    CSV_KEY_MICRON_LENGTH: float(s.LengthMicrons),
                    CSV_KEY_MICRON_AREA: float(s.AreaMicrons),
                },
            }
            
        # set the graph
        #
        self.set_graph(graph)

        # exit gracefully
        #  return trie
        #
        return True

    #
    # end of method

    def load_header(self, fname):
        """
        method: load_header

        arguments:
         fname: file name to load

        return:
         boolean value indicating status

        description:
         this method finds and loads all header data into
         the data_d.header_d data structure
        """

        # open a file
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: opening (%s)\n" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))

        fp = open(fname, nft.MODE_READ_TEXT)
        if fp is None:
            print("Error: %s (line: %s) %s: error opening file (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))
            return False

        # skip csv version as its already included in FTYPE_OBJECTS
        #
        fp.readline()

        # fetch microns per pixel and store in header_d
        #
        # clean up line and fetch microns_per_pixel value
        #
        micron_per_pixel = re.findall(DEF_REGEX_MICRON, fp.readline())[0][1]

        # add micron per pixel value to header_d
        #
        self.data_d.header_d[CSV_KEY_MICRONS] = micron_per_pixel

        # fetch bname and store in header_d
        #
        # add bname data
        #
        self.data_d.header_d[CSV_KEY_BNAME] = \
            os.path.splitext(os.path.basename(fname))[0]

        # skip bname info
        #
        fp.readline()

        # fetch width and height and store in header_d
        #
        # clean up line and fetch width, height information
        #
        width_str, height_str = fp.readline().split(nft.DELIM_COMMA)

        # further clean up width, height strings and
        # separate them into there own key value pairs
        #
        width_val = re.findall(DEF_REGEX_WIDTH, width_str)
        height_val = re.findall(DEF_REGEX_HEIGHT, height_str)

        # add width and key information to
        # the header_d dictionary
        #
        self.data_d.header_d[CSV_KEY_WIDTH] = width_val[0][1]
        self.data_d.header_d[CSV_KEY_HEIGHT] = height_val[0][1]


        # find the tissue list
        #
        line = fp.readline().rstrip(nft.DELIM_NEWLINE)
        match_d = DEF_REGEX_TISSUE.match(line)

        # store tissue list in header
        #
        tissue_list = [t.strip() for t in match_d.group(1).split(nft.DELIM_COMMA)]
        self.data_d.header_d[CSV_KEY_TISSUE] = tissue_list

        # reset file pointer to beginning
        #
        fp.seek(0,0)

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: closing (%s)\n" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))
            
        # close file pointer
        #
        fp.close()

        # exit gracefully
        #
        return True

    #
    # end of method

    def write(self, ofile):
        """
        method: write

        arguments:
         ofile: the output file name

        return:
         boolean value indicating status

        description:
         this method writes the fname data
         to an output file
        """

        # attempt to write contents contents
        #
        header_status = self.write_header(fp = ofile)
        graph_status = self.write_data(fp = ofile)

        # exit gracefully
        #
        return header_status and graph_status
    #
    # end of method

    def validate(self, fname, schema = None):
        """
        method: validate

        arguments:
         fname: the file name
         schema: unused

        return:
         a boolean value indicating status

        description:
         This method returns True if the metadata is a valid csv header.
        """

        # display debugging information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: checking for csv (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))

        # open the file
        #
        fp = open(fname, nft.MODE_READ_TEXT)
        if fp is None:
            print("Error: %s (line: %s) %s::%s: error opening file (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, fname))
            return False

        # read the first line in the file
        #
        header = fp.readline()
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s::%s: header (%s)" %
                  (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                   ndt.__NAME__, header))

        # First check;
        #   Make sure that it is at least a CSV file.
        #   If the beginning of the file is not the magic sequence
        #   then return
        #
        if CSV_FMT_VERSION not in header.strip():
            if dbgl > ndt.BRIEF:
                print("Error: %s (line: %s) %s::%s: processing error (%s)" %
                      (__FILE__, ndt.__LINE__, AnnDpath.__CLASS_NAME__,
                       ndt.__NAME__, fname))
            return False

        # Second check:
        #   This is needed to distinguished between a DPATH CSV and EEG CSV
        #   We need this as nedc_ann_convert_files has these two libraries
        #   working together
        #
        for line in fp:

            # skip comment
            #
            if line.startswith(nft.DELIM_COMMENT):
                continue

            # found the DPATH Specific header
            #
            if DEF_FMT_HEADER in line.strip():
                fp.close()
                return True

        # close the file pointer
        #
        fp.close()

        # exit ungracefully:
        #  invalid csv dpath file
        #
        return False
    #
    # end of method

    def print_events_from_file(self, fp = sys.stdout):
        """
        method: print_events_from_file

        arguments:
         fp: file pointer

        return:
         a boolean indicating status

        description:
         This method pretty prints the CSV annotation information.
        """

        # retrieve data
        #
        header_data = self.get_header()
        graph_data = self.get_graph()

        # open a file
        #
        if dbgl > ndt.BRIEF:
            fp.write("%s (line: %s) %s: opening (%s)\n" %
                     (__FILE__, ndt.__LINE__, ndt.__NAME__, fname))

        # print the header
        #
        # write the proper CSV header format
        #
        fp.write("%s" % (CSV_ATTR_VERSION) + nft.DELIM_NEWLINE)
        fp.write(nft.DELIM_COMMENT + " %s = %s" %
                 (CSV_ATTR_MICRONS,
                  header_data[CSV_KEY_MICRONS]) + nft.DELIM_NEWLINE)

        fp.write(nft.DELIM_COMMENT + " %s = %s" %
                 (CSV_ATTR_BNAME, header_data[CSV_KEY_BNAME]) +
                 nft.DELIM_NEWLINE)

        fp.write(nft.DELIM_COMMENT + "%s" %
                 (DEF_FMT_DIMEN %
                  (header_data[CSV_KEY_WIDTH], header_data[CSV_KEY_HEIGHT])))

        # fetch list of all different tissue types
        #
        tissue_list = self.data_d.header_d[CSV_KEY_TISSUE]

        fp.write(nft.DELIM_COMMENT + " %s = " % (CSV_ATTR_TISSUE))
        fp.write(', '.join(tissue_list) + nft.DELIM_NEWLINE)

        # pretty print the information
        #
        for val in graph_data.values():

            # we first create a list of tuples of (x, y) values, and
            # then we use the zip function to join all row and column into
            # their respective lists by unpacking the tuple value with the
            # * operator.
            #
            # compute min/max with x first, then y
            #
            column_vertices, row_vertices = zip(
                *[(coord[0], coord[1]) for coord in
                  val[CSV_KEY_COORDINATES]])

            fp.write(DEF_FMT_VERTS %
                     (CSV_ATTR_VERTICES,
                      min(column_vertices), max(column_vertices),
                      min(row_vertices), max(row_vertices)))
            
            fp.write(DEF_FMT_ITEM %
                     (CSV_ATTR_TISSUE_VALUE, val[CSV_KEY_TISSUE_TYPE]))

            fp.write(DEF_FMT_ITEM % (CSV_ATTR_TEXT, val[CSV_KEY_TEXT]))

            fp.write(DEF_FMT_ITEM %
                    (CSV_ATTR_CONFIDENCE, val[CSV_KEY_CONFIDENCE]))

            fp.write(DEF_FMT_VERTS % (CSV_ATTR_VERTICES, min(row_vertices),
                    max(row_vertices), min(column_vertices),
                    max(column_vertices)))

        # exit gracefully
        #
        return True

    #
    # end of method

    def write_header(self, fp = sys.stdout):
        """
        method: write_header

        arguments:
         fp: pointer to output, default is sys.stdout

        return:
         a boolean value representing status

        description:
         This function will get the header dictionary for a CSV
         file and write the header to fp
        """

        # check if there's a file pointer
        #
        if fp is not sys.stdout:

            # check if file pointer is valid
            #
            try:

                # check by attempting to open fp
                #
                fp = open(fp, nft.MODE_WRITE_TEXT)
                
            except Exception as e:

                # file pointer is not valid print error message
                #
                print("Error: %s (line: %s) %s: %s (%s, %s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   "error in header creation", fp, e))
                sys.exit(os.EX_SOFTWARE)

        header_data = self.get_header()

        # check if data or header is empty
        #
        if not header_data:
            print("Error: %s (line: %s) %s: %s (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   "No header data available"))
            sys.exit(os.EX_SOFTWARE)

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: printing header API (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, header))
            print("%s (line: %s) %s: printing data API (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, data))

        # write the proper CSV header format
        #
        # write csv version
        #
        fp.write("%s" % (CSV_ATTR_VERSION) + nft.DELIM_NEWLINE)

        # write micron data
        #
        fp.write(nft.DELIM_COMMENT + " %s = %s\n" %
                 (CSV_ATTR_MICRONS,
                  header_data[CSV_KEY_MICRONS]))

        # write file name information
        #
        fp.write(nft.DELIM_COMMENT + " %s = %s\n" %
                 (CSV_ATTR_BNAME, header_data[CSV_KEY_BNAME]))

        # write width/height information
        #
        fp.write(nft.DELIM_COMMENT + "%s" %
                 (DEF_FMT_DIMEN %
                  (header_data[CSV_KEY_WIDTH], header_data[CSV_KEY_HEIGHT])))

        # fetch list of all different tissue types
        #
        tissue_list = self.data_d.header_d[CSV_KEY_TISSUE]

        # write tissue information
        #
        fp.write(nft.DELIM_COMMENT + " %s = " % (CSV_ATTR_TISSUE))
        fp.write(', '.join(tissue_list) + nft.DELIM_NEWLINE)
        fp.write(nft.DELIM_COMMENT + nft.DELIM_NEWLINE)
        fp.write("%s" % (DEF_FMT_HEADER) + nft.DELIM_NEWLINE)

        # close file pointer
        #
        if fp is not sys.stdout:
            fp.close()

        # exit gracefully
        #
        return True

    #
    # end of method

    def write_data(self, fp = sys.stdout):
        """
        method: write_data

        arguments:
         fp: pointer to output, default is sys.stdout

        return:
         a boolean value representing status

        description:
         This function will get the data dictionary containing
         CSV data information and write it to fp
        """

        # check if there's a file pointer
        #
        if fp is not sys.stdout:
            try:
                fp = open(fp, nft.MODE_APPEND_TEXT)
            except:
                print("Error: %s (line: %s) %s: %s (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   "Unable to find file", fname))
                sys.exit(os.EX_SOFTWARE)

        # fetch needed data
        #
        graph_data = self.get_graph()

        # check if data or header is empty
        #
        if not graph_data:
            print("Error: %s (line: %s) %s: %s" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__,
                   "no graph data"))
            sys.exit(os.EX_SOFTWARE)

        # display debug information
        #
        if dbgl > ndt.BRIEF:
            print("%s (line: %s) %s: printing data API (%s)" %
                  (__FILE__, ndt.__LINE__, ndt.__NAME__, graph_data))

        # loop over the data items and write the data
        #
        for index, region_info in graph_data.items():
            for i in range(len(region_info[CSV_KEY_COORDINATES])):
                fp.write("%s," % (index))
                fp.write("%s," % (region_info[CSV_KEY_REGION_ID]))
                fp.write("%s," % (region_info[CSV_KEY_TISSUE_TYPE]))
                fp.write("%s," % (region_info[CSV_KEY_TEXT]))
                fp.write("%s," % (i))
                fp.write("%s," % (region_info[CSV_KEY_COORDINATES][i][0]))
                fp.write("%s," % (region_info[CSV_KEY_COORDINATES][i][1]))
                fp.write("%s," % (region_info[CSV_KEY_COORDINATES][i][2]))
                fp.write("%s,"  % (region_info[CSV_KEY_CONFIDENCE]))
                geom_props = region_info[CSV_KEY_GEOM_PROPS]
                fp.write("%s," % (geom_props[CSV_KEY_LENGTH]))
                fp.write("%s," % (geom_props[CSV_KEY_AREA]))
                fp.write("%s," % (geom_props[CSV_KEY_MICRON_LENGTH]))
                fp.write("%s\n"  % (geom_props[CSV_KEY_MICRON_AREA]))

        # close file pointer
        #
        if fp is not sys.stdout:
            fp.close()

        # exit gracefully
        #
        return True

    #
    # end of method

    def set_graph(self, graph):
        """
        method: set_graph

        arguments:
         graph: a graph object

        return:
         boolean value indicating status

        description:
         this method sets the graph object
        """

        # attempt to set the graph
        #
        status = self.data_d.set_graph(graph)

        # exit gracefully
        #
        return status

    #
    # end of method

    def set_header(self, header):
        """
        method: set_header

        arguments:
         header: a header object

        return:
         boolean value indicating status

        description:
         this method sets the header object
        """

        # attempt to set header
        #
        status = self.data_d.set_header(header)

        # exit gracefully
        #
        return status

    #
    # end of method

    def get_graph(self):
        """
        method: get_graph

        arguments:
         none

        return:
         a graph object

        description:
         this method fetches the graph object
        """

        # attempt to get graph data
        #
        graph_data = self.data_d.get_graph()

        # exit gracefully
        #
        return graph_data
    #
    # end of method

    def get_header(self):
        """
        method: get_header

        arguments:
         none

        return:
         a header object

        description:
         this method fetches the header object
        """

        # attempt to get header data
        #
        header_data = self.data_d.get_header()

        # exit gracefully
        #
        return header_data
    #
    # end of method

    def delete_graph(self):
        """
        method: delete_graph

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the graph object to empty
        """

        # attempt to delete graph
        #
        status = self.data_d.delete_graph()

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete_header(self):
        """
        method: delete_header

        arguments:
         none

        return:
         a boolean value indicating status

        description:
         this method sets the header object to empty
        """

        # attempt to delete header
        #
        status = self.data_d.delete_header()

        # exit gracefully
        #
        return status
    #
    # end of method

    def delete(self, region):
        """
        method: delete

        arguments:
         region: the region to delete

        return:
         boolean value indication status

        description:
         this method deletes from graph_d a region
        """

        # attempt to delete region
        #
        status = self.data_d.delete(region)

        # exit gracefully
        #
        return status
    #
    # end of method

    def add(self,
            index,
            region_id,
            text,
            coordinates,
            confidence,
            tissue_type,
            geom_props):
        """
        method: add

        arguments:
         index: the integer index to add
         region_id: the integer region_id to add
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method adds an annotation to the underlying graph_d object
        """

        # attempt to add an annotation
        #
        status = self.data_d.add(index, region_id, text, coordinates,
                                 confidence, tissue_type, geom_props)

        # exit gracefully
        #
        return status
    #
    # end of method

    def create(self,
               index,
               region_id,
               text,
               coordinates,
               confidence,
               tissue_type,
               geom_props):
        """
        method: create

        arguments:
         index: the integer index to create
         region_id: the integer region_id to create
         text: the label to add (ex: bckg, indc, dcis, ...)
         coordinates: list of a list of x, y, and z coordinates
         confidence: boolean value indicating confidence
         tissue_type: a string indicating tissue that is being annotated
         geom_props: a dictionary of the geometric properties

        return:
         a boolean value indication status

        description:
         this method creates an annotation to the underlying graph_d object
        """

        # attempt to create an annotation
        #
        status = self.data_d.create(index, region_id, text, coordinates,
                                    confidence, tissue_type, geom_props)

        # exit gracefully
        #
        return status

#
# end of class

# -----------------------------------------------------------------------------
#
# Beginning of Most Important Section
#
#------------------------------------------------------------------------------

# define FTYPE_OBJECTS
#
FTYPE_OBJECTS = {nft.CSV_NAME : [nft.CSV_VERSION, Csv()],
                 nft.XML_NAME : [nft.XML_VERSION, Xml()]}

#------------------------------------------------------------------------------
#
# End of Most Important Section
#
#------------------------------------------------------------------------------

#
# end of file
