## import system modules
#
import os
import sys

## import ML and datatype modules
#
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn import preprocessing
import numpy as np
np.set_printoptions(suppress=True)
np.random.seed(512)

## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30


## This method defines the NN architecture as well as performs training and saves the model info
#
def train_neural_network(xtrain, ytrain, odir):

    learning_rate = 0.009

    ## Define the network (MLP)
    #
    model = Sequential()
    model.add(Dense(N_nodes_hl1, input_dim=FEAT_DIM, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(N_nodes_hl2, activation="relu"))
    model.add(Dropout(0.1))
    model.add(Dense(N_nodes_hl3, activation="relu"))
    model.add(Dense(NO_OF_CLASSES, activation="softmax"))

    ## optimizer
    #
    sgd = SGD(lr=learning_rate)
    model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy'])
    print model.summary()
    
    ## train the model
    model.fit(x=xtrain, y=ytrain, epochs=100)

    model_json = model.to_json()

    create_dirtree(odir)

    ## save the model architecture
    #
    with open(os.path.join(odir, "mlp_net.json"), "wb") as json_net:
        json_net.write(model_json)
        
    ## save the model weights
    #
    model.save(os.path.join(odir, "dnn.nnet.h5"), overwrite=True)
## end of method
#

def extract_data(fp_a):

    dat = readflines(fp_a)

    ## initialize the feature and label list
    #
    feats = []
    labs = []

    ## collect all the features and labels
    #
    for line in dat:
        l_fields = line.split()

        ## convert strings to a int/float32 datatype
        #
        feats_m = map(float,l_fields[1:])
        labs_m = map(int,l_fields[0])

        feats.append(feats_m)
        labs.append(labs_m)

    feats = np.asarray(feats)
    labs = np.asarray(labs)
    
    ## return feat and labels as tuples
    #
    return (feats, labs)

## This method reads lines of a filelist and returns them as a list
#
def readflines(list_a):
    with open(list_a, 'r') as fl:
        return fl.read().splitlines()

## create a directory tree if the path doesn't exist
#
def create_dirtree(dirtree_a):
    if not os.path.exists(dirtree_a):
        os.makedirs(dirtree_a)


def main():

    labfeatslist = "../exam_dat/train.txt"
    odir = "../keras_output"
    normalize_f = True

    ## collect training data
    #
    feats, labs = extract_data(labfeatslist)

    ## normalize features
    #
    if normalize_f:
        max_abs_scalar = preprocessing.MaxAbsScaler()
        max_abs_scalar.fit(feats)
        feats = max_abs_scalar.transform(feats)

    ## do one hot coding for labels. This necessary when using softmax
    #
    labs_cat = to_categorical(labs, num_classes = NO_OF_CLASSES)

    ## gather the data along with probs from rf and nb model
    #
    train_neural_network(feats, labs_cat, odir)

if __name__ == "__main__": main()