## import system modules
#
import os
import sys

## import ML modules
#
import tensorflow as tf
import numpy as np
from keras.utils import to_categorical
from sklearn import preprocessing

logs_path = '../logs/'
np.random.seed(512)

## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30

## define the network architecture
#
## This model is a simple multilayer perceptron network with 3 hidden layers.
## Input to the layer has the dimensions equal to feature dimensions.
## We create a complete graph in this method with input placeholder as an input argument and
## output placeholder as an returning argument
#
def neural_network_model(data):

    ## defining dictionaries specifying the specification of each layer.
    #
    hidden_1_layer = {'weights': tf.Variable(tf.random_normal([FEAT_DIM, N_nodes_hl1]), name='w1'),\
                      'biases': tf.Variable(tf.random_normal([N_nodes_hl1]), name='b1')}
    
    hidden_2_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl1, N_nodes_hl2]), name='w2'), \
                      'biases': tf.Variable(tf.random_normal([N_nodes_hl2]), name='b2')}

    hidden_3_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl2, N_nodes_hl3]), name='w3'),\
                      'biases': tf.Variable(tf.random_normal([N_nodes_hl3]), name='b3')}

    output_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl3, NO_OF_CLASSES]), name='w4'), \
                      'biases': tf.Variable(tf.random_normal([NO_OF_CLASSES]), name='b4')}

    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.relu(l1) ## adding non-linearity to the affine transform

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.relu(l2) ## adding non-linearity to the affine transform

    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.relu(l3) ## adding non-linearity to the affine transform

    output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'], name="last_layer")

    ## return the final layer's output gracefully
    #
    return output
## end of method
#

## This method trains a neural network along with collecting statistics related to 
## the graphs.
#
def train_neural_network(xtrain, ytrain, odir):

    learning_rate = 0.0008
    epoch_iter = 100

    ## input/ output  placeholders where data would be plugged in...
    #
    x = tf.placeholder('float', [None, FEAT_DIM], name="input")
    y_ = tf.placeholder('float', name="output")

    ## define the network
    #
    logits = neural_network_model(x)
    prediction = tf.nn.softmax(logits, name="op_to_restore") ## softmax normalizes the output results

    ## define the loss function, the function will optimize based on cross-entropy between labels
    #
    loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y_) )

    ## Major OP for the training procedure. The "train" op defined here tries to minimize loss
    #
    with tf.name_scope('ADAM'):
        # Gradient Descent
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train = optimizer.minimize(loss)
        # Op to calculate every variable gradient. The grads variable is not important
        ## This is just for demonstration purpose to show that you can get stats related to only
        ## one layer.
        grads = tf.gradients(loss, tf.trainable_variables()[-1])

    with tf.name_scope('Accuracy'):
        ## Accuracy calculation by comparing the predicted and detected labels
        #
        acc = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
        acc = tf.reduce_mean(tf.cast(acc, tf.float32))

    ## summary and display variables
    #
    loss_sum = tf.summary.scalar("loss", loss)
    acc_sum = tf.summary.scalar("accuracy", acc) 
    grads_sum = tf.summary.tensor_summary("gradients_last_layer", grads)

    ## Merge all summaries into a single variable. This summaries will be displayed using Tensorboard
    #
    merged_summary_op = tf.summary.merge([loss_sum, acc_sum, grads_sum])
    
    ## create a session for the graph (graph initialization)
    #
    with tf.Session() as sess:
        
        ## initialize all the variables. Note that before this point, all the variables were empty buckets !!
        #
        sess.run(tf.global_variables_initializer())
        
        ## initialize the summary writer (For tensorboard)
        #
        summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

        ## iterate over epochs (complete forward-backward for the entire training set)
        #
        for epoch in range(epoch_iter):

            ## initialize some variables to keep track of progress during training
            #
            epoch_loss = 0 
            epoch_accuracy = 0

            ## minibatch training. Splitting input data in to smaller chunks is better
            #
            for i in range( int(len(xtrain)/ BATCH_SIZE) ):
                epoch_x = xtrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]
                epoch_y = ytrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]

                ## run the session and collect the intermediate stats. Feed dict kwarg takes in input/output placeholdar names as
                ## a key and features/labels as values
                #
                _, ac, ls, summary = sess.run([train, acc, loss, merged_summary_op], feed_dict = {x: epoch_x, y_: epoch_y})
                
                ## writhe the summary in logs to visualize it later
                #
                summary_writer.add_summary(summary, epoch * int(len(xtrain)/BATCH_SIZE)+i)
                
                ## update stats
                #
                epoch_loss += ls
                epoch_accuracy += ac

            print ("Epoch ", epoch, " completed out of ", epoch_iter, " loss: ", epoch_loss, "accuracy: ", ac)

        ## saver module to save tf graph variables
        #
        saver = tf.train.Saver()
        create_dirtree(odir)
        save_path = os.path.join(odir, "mlp_mdl")
        saver.save(sess=sess, save_path=save_path) ## model saved with the weights

    ## end of session
    #
## end of method
#


## collecing labels and features from the input files
#
def extract_data(fp_a):

    dat = readflines(fp_a)

    ## initialize the feature and label list
    #
    feats = []
    labs = []


    ## collect all the features and labels
    #
    for line in dat:
        l_fields = line.split()

        ## convert strings to a int/float32 datatype
        #
        feats_m = map(float,l_fields[1:])
        labs_m = map(int,l_fields[0])

        feats.append(feats_m)
        labs.append(labs_m)

    ## convert data into numpy arrays
    #
    feats = np.asarray(feats)
    labs = np.asarray(labs)
    
    ## return feat and labels as tuples
    #
    return (feats, labs)

## This method reads lines of a filelist and returns them as a list
#
def readflines(list_a):
    with open(list_a, 'r') as fl:
        return fl.read().splitlines()

## create a directory tree if the path doesn't exist
#
def create_dirtree(dirtree_a):
    if not os.path.exists(dirtree_a):
        os.makedirs(dirtree_a)


## main function starts here.
#
def main():

    labfeatslist = "../exam_dat/train.txt"
    odir = "../tf_output"
    normalize_f = True

    ## collect training data
    #
    feats, labs = extract_data(labfeatslist)

    ## normalize features
    #
    if normalize_f:
        max_abs_scalar = preprocessing.MaxAbsScaler()
        max_abs_scalar.fit(feats)
        feats = max_abs_scalar.transform(feats)

    ## do one hot coding for labels. This necessary when using softmax
    #
    labs_cat = to_categorical(labs, num_classes = NO_OF_CLASSES)

    ## decode data using trained network
    #
    train_neural_network(feats, labs_cat, odir)

## end of main
#

if __name__ == "__main__": main()