# file: $NEDC_NFC/util/python/nedc_ml_tools/imld_alg_params.txt # # A parameter file that defines the parameters for each ML Tools algorithm # to be used in IMLD. Contains parameter blocks for each algorithm and presents # all of the possible options. Also allows user to add their own algorithm to # IMLD by creating their own parameter block. # # Guide: # The parameters for each algorithm are simple key:value pairs. Each key is # limited to a single, non-spaced word. Each key must be identically named # to its corresponding ML Tools parameter name. The values for each key can # have a few different forms. Before discussing the various value options, it # important to understand how IMLD interprets numeric values and it how # discerns between float and integer values. # # # Numerics: # In the IMLD parameter file, providing an integer will limit the user to # strictly whole numbers. Integers are typed into the parameter file as # whole numbers WITHOUT any decimal points or places: # # example: key = 1 # # Alternative to integers, float numbers can also be used. If a float number # is specified in the parameter file, the user will be able to choose # decimal or non-whole numbers. An important note about floats in the IMLD # parameter file is that if a range of numbers is specifed as a parameters # value, and one of the range values is presented as a float, the user prompt # for that parameter will be floats. Meaning that users will be able to use # decimal numbers. For this reason it is important to be concise when # presenting numerics in the parameter file. Floats can be written as decimal # numbers WITH decimal points: # # example: key = 1.50 # # If you want to set the parameter value to a whole number but still allow the # user to use floats, be sure to include decimal points and places even if # they are just zeros. # # example: key = 1.00 # # 1. Single Values: # A single value can be used as the value for a parameter. This single value # can either numeric or non-numeric. A numeric value would set the default # value of the parameter to the given number. If a single number is given, the # range will be an "infinite" set of positive numbers. As mentioned, the # numerics can be float or integer types. # # example: key = 1 # example: key = 1.0 # example: key = * # # If a single non-numeric value is needed for any particular reason, this can # also be done. Simply place the single word-like value as the only value # to its key. If this is done, the user will not be able to select anything # other than the given value for that parameter. # # example: key = gini # # 2. List Values: # If a parameter has specific, predetermined options, a list value should be # used. A list is simply a list a values that the user can choose as a # parameter. List can be written as comma-delimited values: # # example: scale = none, biased, unbiased, empirical # # If the predetermined options are numbers, developer are encouraged to list # the numbers as words (ex. "one"), or use a range value for that parameter. # # example: key = one, two, three # # 3. Numeric-Range Values: # The third parameter value type is a numeric range. If the developer wants # user to be able to specific a numeric value in between a specific, numeric # range, this is the value to use. A numeric range is comprised of three # parts; the minimum value, maximum value, and default value. The user will # only be able to choose a number in between the minimum and maximum value. # The default value is what the text box displays before the user makes any # edits. The range can be written as a list of 3 values, with the first being # the minimum value, the second being the maximum value, and the third being # the default value: # # example: neighbors = 0, 2, 1 # example: neighbors = 1, 3, 2 # # 4. Class-Based Numerics: # The final parameter value type can be called a class-based numeric. A class- # based numeric value will automatically populate the parameter box with # options for each of the classes present in the data. If there are 2 classes # present, there will be 4 numeric-boxes for each class. The numeric boxes # have float precision and the name of each box will be the parameter keys # name with the index (ex. weights 2). The class-based numeric return # a list of the values of each class to the parameter. # # example: weights = // # # The '*' Character: # The '*', or astericks character can be used in single and numeric range # values. The '*' will automatically populate that value with the amount of # classes present in the training data set. This option is useful when the # developer wants the parameter to default or limit to the number of classes # present in the training data. The '*' character can be inserted into single # and numeric range values just like any number character. # # example: neighbors = 0, *, * # example: neighbors = 0, *, 0 # example: neighbors = *, *, * # example: neighbors = * # # Spaced Non-Numerics: # If the developer would like to assign a non-numeric single, or list value, # but the value has spaces in it, the value can be wrapped in quotations to # encompass the entire value: # # example: name = "Principal Components Analysis (PCA)" # example: key = "first value", "second value", "third value" # # This feature is particularly useful for the required "name" key for each # parameter. # # Important Notes: # There are a couple requirements for each algorithm and its corresponding # parameter blocks: # # 1. Every parameter block must have a "name" key, preferably a detailed # algorithm name. The "name" parameter will not be included when # instantiating the ML Tools algorithm. # # 2. Every parameter in the parameter block (other than the "name" key) must # be named indentically to the corresponding ML Tools parameter dictionary # keys. If the keys are not the exact same as the ML Tools dictionary, the # connection to ML Tools will fail and IMLD will crash. # # These are the rules and requirments for the IMLD parameter file. If the # requirments are not strictly adhered, IMLD is prone to undefined behavior and # crashing. Please use this guide and preexisting algorithms as a reference # when implementing or modifying a algorithm. # # DO NOT FORGET TO INCLUDE EVERY ALGORITHM IN THE "ALGS" LIST. If an algorithm # is not present in that list, it will not be included in IMLD. The sequence # of the list is the order in which the algorithms are presented in the # drop-down menu. # # set version information # version = param_v1.0.0 #------------------------------------------------------------------------------ # # Section 0: a list of all the algorithms # #------------------------------------------------------------------------------ ALGS = EUCLIDEAN, PCA, QDA, LDA, QLDA, NB, KMEANS, KNN, RNF, SVM, MLP #------------------------------------------------------------------------------ # # Section 1: discriminant-based algorithms # #------------------------------------------------------------------------------ EUCLIDEAN { name = "Euclidean Distance (EUCLIDEAN)" weights = // } PCA { name = "Principal Components Analysis (PCA)" prior = ml, map ctype = full, diagonal center = none, tied, untied scale = none, biased, unbiased, empirical n_components = 0, 2, 2 } QDA { name = "Quadratic Components Analysis (QDA)" prior = ml, map ctype = full, diagonal center = none, tied, untied scale = none, biased, unbiased, empirical n_components = 0, 2, 2 } LDA { name = "Linear Discriminant Analysis (LDA)" prior = ml, map ctype = full, diagonal center = none, tied, untied scale = none, biased, unbiased, empirical } QLDA { name = "Quadratic Linear Discriminate Analysis (QLDA)" prior = ml, map ctype = full, diagonal center = none, tied, untied scale = none, biased, unbiased, empirical } NB { name = "Naive Bayes (NB)" prior = ml, map } #------------------------------------------------------------------------------ # # Section 2: nonparametric models # #------------------------------------------------------------------------------ KNN { name = "K Nearest Neighbor (KNN)" neighbor = 5 } RNF { name = "Random Forest (RNF)" estimator = 100 max_depth = 5 criterion = gini, entropy, log_loss random_state = 0 } SVM { name = "Support Vector Machines (SVM)" c = 0, 1.0, 10000.0 gamma = 0, 0.1, 10000.0 kernel = linear, poly, rbf, sigmoid, precomputed } KMEANS { name = "K-Means (KMEANS)" n_cluster = 8 n_init = 3 random_state = 0 max_iter = 100 } #------------------------------------------------------------------------------ # # Section 3: neural network-based models # #------------------------------------------------------------------------------ MLP { name = "Multilayer Perceptron (MLP)" hidden_size = 3 activation = relu, identity, logistic, tanh solver = adam, lbfgs, sgd batch_size = auto learning_rate = constant, invscaling, adaptive learning_rate_init = 0, 0.009, 10000.0 random_state = 0 momentum = 0, 0.9, 1.0 validation_fraction = 0, 0.1, 1.0 max_iter = 0, 20, 10000 shuffle = False, True early_stopping = False, True } #------------------------------------------------------------------------------ # # Section 4: custom algorithms # #------------------------------------------------------------------------------ #TEMPLATE { # name = "TEMPLATE" # param = 0 } #------------------------------------------------------------------------------ # # end of file # #------------------------------------------------------------------------------