-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrestore_lstm.py
89 lines (74 loc) · 3.22 KB
/
restore_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Code to restore a saved model of lstm and run it
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
import tensorflow as tf
import numpy as np
def inference(model,X,Y, remark=None):
if remark:
print(remark)
labels_prob = model.predict(X)
# Obtain labels from predicted probabilities
my_labels = np.argmax(labels_prob,axis=1)
return my_labels
def accuracy(Y, my_labels, remark=None):
if remark:
print(remark)
actual_labels = np.argmax(Y,axis=1)
correct = np.sum(np.equal(actual_labels,my_labels))
print('Accuracy after voting = %f'%(correct/len(Y))) # This is the accuracy from labels predicted from probabilities
# IMDB Dataset loading
train, val, test = imdb.load_data(path='imdb.pkl', n_words=10000, valid_portion=0.2)
trainX, trainY = train
valX, valY = val
testX, testY = test
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
valX = pad_sequences(valX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
valY = to_categorical(valY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=10000, output_dim=64)
net = tflearn.lstm(net, 64, dropout=0.8) # edited
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.005, loss='categorical_crossentropy')
N=7 # No. of classifiers
majority=1 # Whether we are employing majority voting or checking one single classifier
if majority:
models = [tflearn.DNN(net) for i in range(N)] # Create space for N models
models[0].load("/tmp/tflearn_logs0/8250") # Loads a model with the given name (cannot be pre-determined)
models[1].load("/tmp/tflearn_logs1/8282")
models[2].load("/tmp/tflearn_logs2/8232")
models[3].load("/tmp/tflearn_logs3/8184")
models[4].load("/tmp/tflearn_logs4/8216")
models[5].load("/tmp/tflearn_logs5/8254")
models[6].load("/tmp/tflearn_logs6/8208")
for i in range(N):
score_val = models[i].evaluate(valX,valY) # Evaluate all the models
score_test = models[i].evaluate(testX,testY)
print('Accuracy of classifier %d on validation set = %f and test set = %f'%(i+1,score_val[0],score_test[0]))
else:
# Used to load and evaluate a single model
model = tflearn.DNN(net)
model.load("/tmp/tflearn_logs4/8072")
score_val = model.evaluate(valX,valY)
score_test = model.evaluate(testX,testY)
print('Accuracy on validation set = %f and test set = %f'%(score_val[0],score_test[0]))
# For majority voting
if majority:
my_labels = [i for i in range(N)]
for i in range(N):
my_labels[i] = inference(models[i],testX,testY) # Obtain the labels of all the models
# Now vote and check accuracy
total_labels = np.zeros(len(testY)); final_labels = np.zeros(len(testY))
for i in range(len(testY)):
for j in range(N):
total_labels[i] = total_labels[i] + (my_labels[j])[i]
final_labels[i] = np.round(total_labels[i]/N)
accuracy(testY, final_labels, 'Employing majority voting on test set...')