## import system modules # import os import sys ## import ML and datatype modules # import tensorflow as tf import numpy as np from keras.utils import to_categorical from sklearn import preprocessing from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) logs_path = '../logs/' ## Default constants # NO_OF_CLASSES = 10 BATCH_SIZE = 32 FEAT_DIM = 784 N_nodes_hl1 = 300 N_nodes_hl2 = 30 N_nodes_hl3 = 30 ## define the network architecture # ## define the network architecture # ## This model is a simple multilayer perceptron network with 3 hidden layers. ## Input to the layer has the dimensions equal to feature dimensions. ## We create a complete graph in this method with input placeholder as an input argument and ## output placeholder as an returning argument # def neural_network_model(data): hidden_1_layer = {'weights': tf.Variable(tf.random_normal([FEAT_DIM, N_nodes_hl1]), name='w1'),\ 'biases': tf.Variable(tf.random_normal([N_nodes_hl1]), name='b1')} hidden_2_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl1, N_nodes_hl2]), name='w2'), \ 'biases': tf.Variable(tf.random_normal([N_nodes_hl2]), name='b2')} hidden_3_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl2, N_nodes_hl3]), name='w3'),\ 'biases': tf.Variable(tf.random_normal([N_nodes_hl3]), name='b3')} output_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl3, NO_OF_CLASSES]), name='w4'), \ 'biases': tf.Variable(tf.random_normal([NO_OF_CLASSES]), name='b4')} l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases']) l1 = tf.nn.relu(l1) l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases']) l2 = tf.nn.relu(l2) l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases']) l3 = tf.nn.relu(l3) output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'], name="last_layer") return output ## This method trains and evaluates neural network on MNIST data # def train_neural_network(): learning_rate = 0.002 epoch_iter = 30 ## input/ output placeholders where data would be plugged in... # x = tf.placeholder('float', [None, FEAT_DIM], name="input") y_ = tf.placeholder('float', [None, NO_OF_CLASSES], name="output") ## define the network # logits = neural_network_model(x) prediction = tf.nn.softmax(logits, name="op_to_restore") ## define the loss function, the function will optimize based on cross-entropy between labels # loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y_) ) # Gradient Descent optimizer = tf.train.AdamOptimizer(learning_rate) train = optimizer.minimize(loss) ## create a session for the graph (graph initialization) # with tf.Session() as sess: ## initialize all the variables # sess.run(tf.initialize_all_variables()) batch_size = 100 ## iterate over epochs (complete forward-backward for the entire training set) # for epoch in range(epoch_iter): total_batch = int(mnist.train.num_examples/batch_size) ## initialize some variables to keep track of progress during training # epoch_loss = 0 ## minibatch training. Splitting input data in to smaller chunks is better # for i in range(total_batch): epoch_x, epoch_y = mnist.train.next_batch(batch_size) ## run the session and collect the intermediate stats. Feed dict kwarg takes in input/output placeholdar names as ## a key and features/labels as values # _, ls = sess.run([train, loss], feed_dict = {x: epoch_x, y_: epoch_y}) ## update stats # epoch_loss += ls / total_batch print ("Epoch ", epoch, " completed out of ", epoch_iter, " loss: ", epoch_loss) print "optimization finished..." ## score on the test set # correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print ("Accuracy: ", accuracy.eval({x: mnist.test.images, y_:mnist.test.labels})) def extract_data(fp_a): dat = readflines(fp_a) ## initialize the feature and label list # feats = [] labs = [] ## collect all the features and labels # for line in dat: l_fields = line.split() ## convert strings to a int/float32 datatype # feats_m = map(float,l_fields[1:]) labs_m = map(int,l_fields[0]) feats.append(feats_m) labs.append(labs_m) feats = np.asarray(feats) labs = np.asarray(labs) ## return feat and labels as tuples # return (feats, labs) ## This method reads lines of a filelist and returns them as a list # def readflines(list_a): with open(list_a, 'r') as fl: return fl.read().splitlines() def create_dirtree(dirtree_a): if not os.path.exists(dirtree_a): os.makedirs(dirtree_a) def main(): ## gather the data along with probs from rf and nb model # train_neural_network() if __name__ == "__main__": main()