## import system modules # import os import sys ## import ML and datatype modules # import keras from keras.models import Sequential from keras.layers import Dense, Dropout from keras.callbacks import ModelCheckpoint from keras.optimizers import SGD from keras.utils import to_categorical from sklearn import preprocessing import numpy as np np.set_printoptions(suppress=True) np.random.seed(512) ## Default constants # NO_OF_CLASSES = 2 BATCH_SIZE = 32 FEAT_DIM = 26 N_nodes_hl1 = 300 N_nodes_hl2 = 30 N_nodes_hl3 = 30 ## This method defines the NN architecture as well as performs training and saves the model info # def train_neural_network(xtrain, ytrain, odir): learning_rate = 0.009 ## Define the network (MLP) # model = Sequential() model.add(Dense(N_nodes_hl1, input_dim=FEAT_DIM, activation="relu")) model.add(Dropout(0.3)) model.add(Dense(N_nodes_hl2, activation="relu")) model.add(Dropout(0.1)) model.add(Dense(N_nodes_hl3, activation="relu")) model.add(Dense(NO_OF_CLASSES, activation="softmax")) ## optimizer # sgd = SGD(lr=learning_rate) model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy']) print model.summary() ## train the model model.fit(x=xtrain, y=ytrain, epochs=100) model_json = model.to_json() create_dirtree(odir) ## save the model architecture # with open(os.path.join(odir, "mlp_net.json"), "wb") as json_net: json_net.write(model_json) ## save the model weights # model.save(os.path.join(odir, "dnn.nnet.h5"), overwrite=True) ## end of method # def extract_data(fp_a): dat = readflines(fp_a) ## initialize the feature and label list # feats = [] labs = [] ## collect all the features and labels # for line in dat: l_fields = line.split() ## convert strings to a int/float32 datatype # feats_m = map(float,l_fields[1:]) labs_m = map(int,l_fields[0]) feats.append(feats_m) labs.append(labs_m) feats = np.asarray(feats) labs = np.asarray(labs) ## return feat and labels as tuples # return (feats, labs) ## This method reads lines of a filelist and returns them as a list # def readflines(list_a): with open(list_a, 'r') as fl: return fl.read().splitlines() ## create a directory tree if the path doesn't exist # def create_dirtree(dirtree_a): if not os.path.exists(dirtree_a): os.makedirs(dirtree_a) def main(): labfeatslist = "../exam_dat/train.txt" odir = "../keras_output" normalize_f = True ## collect training data # feats, labs = extract_data(labfeatslist) ## normalize features # if normalize_f: max_abs_scalar = preprocessing.MaxAbsScaler() max_abs_scalar.fit(feats) feats = max_abs_scalar.transform(feats) ## do one hot coding for labels. This necessary when using softmax # labs_cat = to_categorical(labs, num_classes = NO_OF_CLASSES) ## gather the data along with probs from rf and nb model # train_neural_network(feats, labs_cat, odir) if __name__ == "__main__": main()