-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathTrain_Adagrad.py
95 lines (78 loc) · 3.5 KB
/
Train_Adagrad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from ComputeCostAndGradMiniBatch import ComputeCostAndGradMiniBatch
from RNTNModel import RNTNModel
import random
import numpy as np
from Test import Test
import pickle
import time
import sys
class Train_Adagrad:
def __init__(self, dictionary, X_train, X_dev=None, X_test=None):
self.X_train = X_train
self.X_dev = X_dev
self.X_test = X_test
self.dictionary = dictionary
self.costObj = ComputeCostAndGradMiniBatch()
dumb_model = RNTNModel(dictionary)
self.theta_init = dumb_model.getTheta()
self.num_data = len(X_train)
self.num_parameters = dumb_model.num_parameters
# SGD params
self.batch_size = dumb_model.batch_size
self.num_batches = self.num_data / self.batch_size
self.max_epochs = dumb_model.max_epochs
self.learning_rate = dumb_model.learning_rate
self.fudge = 1E-3
self.epoch_save_freq = 5 # save every 5 epochs
def costWrapper(self, theta, X_train_mbatch):
cost, grad = self.costObj.compute(
theta, self.dictionary, X_train_mbatch, self.X_dev)
return cost, grad
def train(self):
print "[INFO] Training .."
grad_history = np.zeros(self.num_parameters)
theta = self.theta_init
# Loop over epochs
for epochid in range(self.max_epochs):
# create a shuffled copy of the data
X_shuffled = random.sample(self.X_train, self.num_data)
# reset grad history per each epoch
grad_history = np.zeros(self.num_parameters)
# Loop over batches
for batch_id in range(self.num_batches):
start_i = batch_id * self.batch_size
end_i = (batch_id+1) * self.batch_size
if end_i + self.batch_size > self.num_data:
end_i = self.num_data
X_batch = X_shuffled[start_i:end_i]
theta, grad_history = self.trainOneBatch(
theta, X_batch, grad_history, batch_id)
print "Finished epoch %d." % epochid
# Save the model at every 5 epochs
if epochid % self.epoch_save_freq == 0:
filename = "optResult-RNTN-" + \
time.strftime("%Y%m%d-%H%M%S") + "-epoch-" + str(epochid)
with open(filename, 'wb') as output:
pickle.dump(theta, output, -1)
# Evaluate on train, test set
testObj_train = Test(self.dictionary, self.X_train)
tree_accuracy_train, root_accuracy_train = testObj_train.test(theta)
print "[Train accuracy] tree: %.2f, root: %.2f" %\
(tree_accuracy_train, root_accuracy_train)
# Test on test data
testObj_test = Test(self.dictionary, self.X_test)
tree_accuracy_test, root_accuracy_test = testObj_test.test(theta)
print "[Test accuracy] tree: %.2f, root: %.2f" %\
(tree_accuracy_test, root_accuracy_test)
sys.stdout.flush()
return theta
def trainOneBatch(self, theta, X, grad_history, batch_id):
cost, grad = self.costWrapper(theta, X)
if batch_id % 30 == 0:
print '%d/%d' % (batch_id, self.num_batches),
print 'batch cost: ', cost
sys.stdout.flush()
grad_history_out = grad_history + grad**2
theta_out = theta - self.learning_rate * \
grad / (np.sqrt(grad_history_out) + self.fudge)
return theta_out, grad_history_out