#!/usr/bin/env python # # file: $ISIP_EXP/tuh_dpath/exp_0074/scripts/nedc_train_mdl.py # # revision history: # 20190925 (TE): first version # # usage: # python nedc_train_mdl.py -p params -o odir # # This script trains a simple MLP model #------------------------------------------------------------------------------ # import pytorch modules # import torch import torch.nn as nn from torch.optim import Adam # import the model # from model import Model # import modules # import numpy as np import sys import os import random #----------------------------------------------------------------------------- # # global variables are listed here # #----------------------------------------------------------------------------- # for reproducibility, we seed the rng # SEED1 = 1337 # general global values # NUM_FEATS = 26 NUM_NODES = 26 NUM_CLASSES = 2 NUM_ARGS= 2 NUM_EPOCHS = 100 BATCH_SIZE=36 NEW_LINE = "\n" LEARNING_RATE = "lr" BETAS = "betas" EPS = "eps" WEIGHT_DECAY = "weight_decay" MODEL_FILE = "model.pth" #------------------------------------------------------------------------------ # # helper function listed here # #------------------------------------------------------------------------------ # function: set_seed # # arguments: seed - the seed for all the rng # # returns: none # # this method seeds all the random number generators and makes # the results deterministic # def set_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) set_seed(SEED1) # function: get_data # # arguments: fp - file pointer # # returns: data - the signals/features # labels - the correct labels for them # # this method takes in a fp and returns the data and labels # def get_data(fp): # initialize the data and labels # data = [] labels = [] # for each line of the file # for line in fp.read().split(NEW_LINE): # split the string by white space # temp = line.split() # if we dont have number of feats + 1 label # if len(temp) != NUM_FEATS + 1: continue # append the labels and data # labels.append(int(temp[0])) data.append([float(sample) for sample in temp[1:]]) # close the file # fp.close() # exit gracefully # return data, labels # # end of function #------------------------------------------------------------------------------ # # the main program starts here # #------------------------------------------------------------------------------ # function: main # # arguments: none # # return: none # # This method is the main function. # def main(argv): # ensure we have the correct amount of arguments # if(len(argv) != NUM_ARGS): print("usage: python nedc_train_mdl.py [ODIR] [TRAIN_SET]") exit(-1) # define local variables # odir = argv[0] fname = argv[1] # set the device to use GPU if available # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # get a file pointer # try: train_fp = open(fname, "r") except (IOError) as e: print("[%s]: %s" % (fname, e.strerror)) exit(-1) # get array of the data # data: [[0, 1, ... 26], [27, 28, ...] ...] # labels: [0, 0, 1, ...] # train_data, train_labels = get_data(train_fp) # instantiate a model # model = Model(NUM_FEATS, NUM_NODES, NUM_CLASSES) # moves the model to device (cpu in our case so no change) # model.to(device) # set the adam optimizer parameters # opt_params = { LEARNING_RATE: 0.005, BETAS: (.9,0.999), EPS: 1e-08, WEIGHT_DECAY: .00001 } # set the loss and optimizer # loss_fx = nn.CrossEntropyLoss() loss_fx.to(device) # create an optimizer, and pass the model params to it # adam_opt = Adam(model.parameters(), **opt_params) # get the number of epochs to train on # epochs = NUM_EPOCHS # get the batch size # batch_size = BATCH_SIZE # get the number of batches (ceiling of train_data/batch_size) # num_batches = -(-len(train_data) // batch_size) # for each epoch # for epoch in range(epochs): # index represents the batch number # index = 0 # for each batch in increments of batch size # for batch in range(0, len(train_data), batch_size): # set all gradients to 0 # adam_opt.zero_grad() # collect the samples as a batch # batch_data = torch.tensor(train_data[batch:batch + batch_size], \ dtype=torch.float32).to(device) batch_labels = torch.tensor(train_labels[batch:batch + batch_size]).long().to(device) # feed the network the batch # output = model(batch_data) # get the loss # loss = loss_fx(output, batch_labels) # perform back propagation # loss.backward() adam_opt.step() # display informational message # print('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}' .format(epoch + 1, epochs, index + 1, num_batches, loss.item())) # increment the batch number # index += 1 # generate the fname as odir + model.pth # fname = os.path.join(odir, MODEL_FILE) # save the model # torch.save(model.state_dict(), fname) # exit gracefully # return True # # end of function # begin gracefully # if __name__ == '__main__': main(sys.argv[1:]) # # end of file