#!/usr/bin/env python
#
# file: $ISIP_EXP/tuh_dpath/exp_0074/scripts/train.py
#
# revision history:
#  20190925 (TE): first version
#
# usage:
#  python train.py mdir data
#
# arguments:
#  mdir: the directory where the output model is stored
#  data: the input data list
#
# This script trains a simple MLP model
#------------------------------------------------------------------------------

# import pytorch modules
#
import torch
import torch.nn as nn
from torch.optim import Adam

# import the model and all of its variables/functions
#
from model import *

# import modules
#
import sys
import os

#-----------------------------------------------------------------------------
#
# global variables are listed here
#
#-----------------------------------------------------------------------------

# general global values
#
NUM_ARGS = 2
NUM_EPOCHS = 100
BATCH_SIZE = 36
LEARNING_RATE = "lr"
BETAS = "betas"
EPS = "eps"
WEIGHT_DECAY = "weight_decay"

# for reproducibility, we seed the rng
#
set_seed(SEED1)            

#------------------------------------------------------------------------------
#
# the main program starts here
#
#------------------------------------------------------------------------------

# function: main
#
# arguments: none
#
# return: none
#
# This method is the main function.
#
def main(argv):

    # ensure we have the correct amount of arguments
    #
    if(len(argv) != NUM_ARGS):
        print("usage: python nedc_train_mdl.py [MDL_PATH] [TRAIN_SET]")
        exit(-1)

    # define local variables
    #
    mdl_path = argv[0]
    fname = argv[1]
    num_feats = DEF_NUM_FEATS
    if("DL_NUM_FEATS" in os.environ):
        num_feats = int(os.environ["DL_NUM_FEATS"])

    # get the output directory name
    #
    odir = os.path.dirname(mdl_path)

    # if the odir doesn't exits, we make it
    #
    if not os.path.exists(odir):
        os.makedirs(odir)

    # set the device to use GPU if available
    #
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # get a file pointer
    #
    try:
        train_fp = open(fname, "r")
    except (IOError) as e:
        print("[%s]: %s" % (fname, e.strerror))
        exit(-1)

    # get array of the data
    # data: [[0, 1, ... 26], [27, 28, ...] ...]
    # labels: [0, 0, 1, ...]
    #
    train_data, train_labels = get_data(train_fp, num_feats)

    # close the file
    #
    train_fp.close()

    # instantiate a model
    #
    model = Model(num_feats, NUM_NODES, NUM_CLASSES)

    # moves the model to device (cpu in our case so no change)
    #
    model.to(device)

    # set the adam optimizer parameters
    #
    opt_params = { LEARNING_RATE: 0.005,
                   BETAS: (.9,0.999),
                   EPS: 1e-08,
                   WEIGHT_DECAY: .00001 }

    # set the loss and optimizer
    #
    loss_fx = nn.CrossEntropyLoss()
    loss_fx.to(device)

    # create an optimizer, and pass the model params to it
    #
    adam_opt = Adam(model.parameters(), **opt_params)

    # get the number of epochs to train on
    #
    epochs = NUM_EPOCHS

    # get the batch size
    #
    batch_size = BATCH_SIZE

    # get the number of batches (ceiling of train_data/batch_size)
    #
    num_batches = -(-len(train_data) // batch_size)

    # for each epoch
    #
    for epoch in range(epochs):

        # index represents the batch number
        #
        index = 0

        # for each batch in increments of batch size
        #
        for batch in range(0, len(train_data), batch_size):

            # set all gradients to 0
            #
            adam_opt.zero_grad()

            # collect the samples as a batch
            #
            batch_data = torch.tensor(train_data[batch:batch + batch_size], \
                                      dtype=torch.float32).to(device)
            batch_labels = torch.tensor(train_labels[batch:batch + batch_size]).long().to(device)

            # feed the network the batch
            #
            output = model(batch_data)

            # get the loss
            #
            loss = loss_fx(output, batch_labels)

            # perform back propagation
            #
            loss.backward()
            adam_opt.step()

            # display informational message
            #
            print('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, epochs, index + 1, num_batches, loss.item()))

            # increment the batch number
            #
            index += 1

    # save the model
    #
    torch.save(model.state_dict(), mdl_path)

    # exit gracefully
    #
    return True
#
# end of function


# begin gracefully
#
if __name__ == '__main__':
    main(sys.argv[1:])
#
# end of file