Nov-26-2018, 04:17 AM
Hello dear forum members,
I am having difficulty understanding how I can feed my own data into a neural network after training it and validating. I am following example provided here. I have no problems training the model and validating it. But how do I feed other data (CSV) for predictions? Any help would be greatly appreciated, as admittedly programming is not among my fortes. Here is my working code:
I am having difficulty understanding how I can feed my own data into a neural network after training it and validating. I am following example provided here. I have no problems training the model and validating it. But how do I feed other data (CSV) for predictions? Any help would be greatly appreciated, as admittedly programming is not among my fortes. Here is my working code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import numpy as np # linear algebra import seaborn as sns sns. set (style = 'whitegrid' ) import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt import tensorflow as tf import pandas as pd iris = pd.read_csv(‘filename.csv') # outcome variable — target, predictor variables - feature1/feature95 iris.shape iris.head() X = iris.drop(labels = [ 'id' ], axis = 1 ).values y = iris.target.values # set seed for numpy and tensorflow # set for reproducible results seed = 5 np.random.seed(seed) tf.set_random_seed(seed) # set replace=False, Avoid double sampling train_index = np.random.choice( len (X), round ( len (X) * 0.8 ), replace = False ) # diff set test_index = np.array( list ( set ( range ( len (X))) - set (train_index))) train_X = X[train_index] train_y = y[train_index] test_X = X[test_index] test_y = y[test_index] # Define the normalized function def min_max_normalized(data): col_max = np. max (data, axis = 0 ) col_min = np. min (data, axis = 0 ) return np.divide(data - col_min, col_max - col_min) # Normalized processing, must be placed after the data set segmentation, # otherwise the test set will be affected by the training set train_X = min_max_normalized(train_X) test_X = min_max_normalized(test_X) # Begin building the model framework # Declare the variables that need to be learned and initialization A = tf.Variable(tf.random_normal(shape = [ 91 , 1 ])) b = tf.Variable(tf.random_normal(shape = [ 1 , 1 ])) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # Define placeholders data = tf.placeholder(dtype = tf.float32, shape = [ None , 91 ]) target = tf.placeholder(dtype = tf.float32, shape = [ None , 1 ]) # Declare the model you need to learn mod = tf.matmul(data, A) + b # Declare loss function # Use the sigmoid cross-entropy loss function, # first doing a sigmoid on the model result and then using the cross-entropy loss function loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = mod, labels = target)) # Define the learning rate, batch_size etc. learning_rate = 0.003 batch_size = 100 iter_num = 15000 # Define the optimizer opt = tf.train.GradientDescentOptimizer(learning_rate) # Define the goal goal = opt.minimize(loss) # Define the accuracy # The default threshold is 0.5, rounded off directly prediction = tf. round (tf.sigmoid(mod)) # Bool into float32 type correct = tf.cast(tf.equal(prediction, target), dtype = tf.float32) # Average accuracy = tf.reduce_mean(correct) # Start training model # Define the variable that stores the result loss_trace = [] train_acc = [] test_acc = [] # training model for epoch in range (iter_num): # Generate random batch index batch_index = np.random.choice( len (train_X), size = batch_size) batch_train_X = train_X[batch_index] batch_train_y = np.matrix(train_y[batch_index]).T sess.run(goal, feed_dict = {data: batch_train_X, target: batch_train_y}) temp_loss = sess.run(loss, feed_dict = {data: batch_train_X, target: batch_train_y}) # convert into a matrix, and the shape of the placeholder to correspond temp_train_acc = sess.run(accuracy, feed_dict = {data: train_X, target: np.matrix(train_y).T}) temp_test_acc = sess.run(accuracy, feed_dict = {data: test_X, target: np.matrix(test_y).T}) # recode the result loss_trace.append(temp_loss) train_acc.append(temp_train_acc) test_acc.append(temp_test_acc) # output if (epoch + 1 ) % 300 = = 0 : print ( 'epoch: {:4d} loss: {:5f} train_acc: {:5f} test_acc: {:5f}' . format (epoch + 1 , temp_loss, temp_train_acc, temp_test_acc)) # Visualization of the results # loss function plt.plot(loss_trace) plt.title( 'Cross Entropy Loss' ) plt.xlabel( 'epoch' ) plt.ylabel( 'loss' ) plt.show() # accuracy plt.plot(train_acc, 'b-' , label = 'train accuracy' ) plt.plot(test_acc, 'k-' , label = 'test accuracy' ) plt.xlabel( 'epoch' ) plt.ylabel( 'accuracy' ) plt.title( 'Train and Test Accuracy' ) plt.legend(loc = 'best' ) plt.show() |