diff --git a/pytorchTUT/01_torch_numpy.py b/pytorchTUT/01_torch_numpy.py new file mode 100644 index 0000000..fbd195b --- /dev/null +++ b/pytorchTUT/01_torch_numpy.py @@ -0,0 +1,44 @@ +import torch +import numpy as np + +# details about math operation in torch can be found in: http://pytorch.org/docs/torch.html#math-operations + +# abs +data = [-1, -2, 1, 2] +tensor = torch.FloatTensor(data) # 32-bit floating point +print( + '\nabs', + '\nnumpy: ', np.abs(data), # [1 2 1 2] + '\ntorch: ', torch.abs(tensor) # [1 2 1 2] +) + +# sin +print( + '\nsin', + '\nnumpy: ', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743] + '\ntorch: ', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093] +) + +# mean +print( + '\nmean', + '\nnumpy: ', np.mean(data), # 0.0 + '\ntorch: ', torch.mean(tensor) # 0.0 +) + +# matrix multiplication +data = [[1,2], [3,4]] +tensor = torch.FloatTensor(data) # 32-bit floating point +# correct method +print( + '\nmatrix multiplication (matmul)', + '\nnumpy: ', np.matmul(data, data), # [[7, 10], [15, 22]] + '\ntorch: ', torch.mm(tensor, tensor) # [[7, 10], [15, 22]] +) +# incorrect method +data = np.array(data) +print( + '\nmatrix multiplication (dot)', + '\nnumpy: ', data.dot(data), # [[7, 10], [15, 22]] + '\ntorch: ', tensor.dot(tensor) # this will convert tensor to [1,2,3,4], you'll get 30.0 +) \ No newline at end of file diff --git a/pytorchTUT/02_variable.py b/pytorchTUT/02_variable.py new file mode 100644 index 0000000..1527104 --- /dev/null +++ b/pytorchTUT/02_variable.py @@ -0,0 +1,29 @@ +import torch +from torch.autograd import Variable + +# Variable in torch is to build a computational graph, +# but this graph is dynamic compared with a static graph in Tensorflow or Theano. +# So torch does not have placeholder, torch can just pass variable to the computational graph. + +tensor = torch.FloatTensor([[1,2],[3,4]]) # build a tensor +variable = Variable(tensor, requires_grad=True) # build a variable, usually for compute gradients + +print(tensor) # [torch.FloatTensor of size 2x2] +print(variable) # [torch.FloatTensor of size 2x2] + +# till now the tensor and variable seem the same. +# However, the variable is a part of the graph, it's a part of the auto-gradient. + +t_out = torch.mean(tensor*tensor) # x^2 +v_out = torch.mean(variable*variable) # x^2 +print(t_out) +print(v_out) # 7.5 + +v_out.backward() # backpropagation from v_out +# v_out = 1/4 * sum(variable*variable) +# the gradients w.r.t the variable, d(v_out)/d(variable) = 1/4*2*variable = variable/2 +print(variable.grad) +''' + 0.5000 1.0000 + 1.5000 2.0000 +''' diff --git a/pytorchTUT/03_activation.py b/pytorchTUT/03_activation.py new file mode 100644 index 0000000..45f3771 --- /dev/null +++ b/pytorchTUT/03_activation.py @@ -0,0 +1,41 @@ +import torch +import torch.nn.functional as F +from torch.autograd import Variable +import matplotlib.pyplot as plt + +# fake data +x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1) +x = Variable(x) +x_np = x.data.numpy() + +# following are popular activation functions +y_relu = F.relu(x).data.numpy() +y_sigmoid = F.sigmoid(x).data.numpy() +y_tanh = F.tanh(x).data.numpy() +y_softplus = F.softplus(x).data.numpy() +# y_softmax = F.softmax(x) softmax is a special kind of activation function, it is about probability + + +# plt to visualize these activation function +plt.figure(1, figsize=(8, 6)) +plt.subplot(221) +plt.plot(x_np, y_relu, c='red', label='relu') +plt.ylim((-1, 5)) +plt.legend(loc='best') + +plt.subplot(222) +plt.plot(x_np, y_sigmoid, c='red', label='sigmoid') +plt.ylim((-0.2, 1.2)) +plt.legend(loc='best') + +plt.subplot(223) +plt.plot(x_np, y_tanh, c='red', label='tanh') +plt.ylim((-1.2, 1.2)) +plt.legend(loc='best') + +plt.subplot(224) +plt.plot(x_np, y_softplus, c='red', label='softplus') +plt.ylim((-0.2, 6)) +plt.legend(loc='best') + +plt.show() \ No newline at end of file diff --git a/pytorchTUT/04_regression.py b/pytorchTUT/04_regression.py new file mode 100644 index 0000000..90f02b2 --- /dev/null +++ b/pytorchTUT/04_regression.py @@ -0,0 +1,53 @@ +import torch +from torch.autograd import Variable +import torch.nn.functional as F +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + + +class Net(torch.nn.Module): + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() + self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer + self.predict = torch.nn.Linear(n_hidden, n_output) # output layer + + def forward(self, x): + x = F.relu(self.hidden(x)) # activation function for hidden layer + x = self.predict(x) # linear output + return x + +x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1) +y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1) + +# torch can only train on Variable, so convert them to Variable +x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False) + +net = Net(n_feature=1, n_hidden=10, n_output=1) # define the network +print(net) # net architecture + +optimizer = torch.optim.SGD(net.parameters(), lr=0.5) + +plt.ion() # something about plotting +plt.show() + +for t in range(100): + prediction = net(x) # input x and predict based on x + + loss_func = torch.nn.MSELoss() # this is for regression mean squared loss + loss = loss_func(prediction, y) # must be (1. nn output, 2. target) + + optimizer.zero_grad() # clear gradients for next train + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + if t % 5 == 0: + # plot and show learning process + plt.cla() + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5) + plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'}) + plt.pause(0.1) + +plt.ioff() +plt.show() \ No newline at end of file diff --git a/pytorchTUT/05_classification.py b/pytorchTUT/05_classification.py new file mode 100644 index 0000000..64972a8 --- /dev/null +++ b/pytorchTUT/05_classification.py @@ -0,0 +1,60 @@ +import torch +from torch.autograd import Variable +import torch.nn.functional as F +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + + +class Net(torch.nn.Module): + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() + self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer + self.out = torch.nn.Linear(n_hidden, n_output) # output layer + + def forward(self, x): + x = F.relu(self.hidden(x)) # activation function for hidden layer + x = self.out(x) + return x + +# make fake data +n_data = torch.ones(100, 2) +x0 = torch.normal(2*n_data, 1) # class0 x data (tensor), shape=(100, 2) +y0 = torch.zeros(100) # class0 y data (tensor), shape=(100, 1) +x1 = torch.normal(-2*n_data, 1) # class1 x data (tensor), shape=(100, 1) +y1 = torch.ones(100) # class1 y data (tensor), shape=(100, 1) +x = torch.cat((x0, x1), 0).type(torch.FloatTensor) # FloatTensor = 32-bit floating +y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer + +# torch can only train on Variable, so convert them to Variable +x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False) + +net = Net(n_feature=2, n_hidden=10, n_output=2) # define the network +print(net) # net architecture + +optimizer = torch.optim.SGD(net.parameters(), lr=0.02) +loss_func = torch.nn.CrossEntropyLoss() # the target label is not one-hotted + +plt.ion() # something about plotting +plt.show() + +for t in range(100): + prediction = net(x) # input x and predict based on x + loss = loss_func(prediction, y) # must be (1. nn output, 2. target), the target label is not one-hotted + + optimizer.zero_grad() # clear gradients for next train + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + if t % 2 == 0: + # plot and show learning process + plt.cla() + pred_y = torch.max(F.softmax(prediction), 1)[1].data.numpy().squeeze() + target_y = y.data.numpy() + plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn') + accuracy = sum(pred_y == target_y)/200 + plt.text(2, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'}) + plt.pause(0.1) + +plt.ioff() +plt.show() \ No newline at end of file diff --git a/pytorchTUT/06_build_nn_quickly.py b/pytorchTUT/06_build_nn_quickly.py new file mode 100644 index 0000000..ddfe17c --- /dev/null +++ b/pytorchTUT/06_build_nn_quickly.py @@ -0,0 +1,59 @@ +import torch +from torch.autograd import Variable +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# fake data +x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1) +y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1) +# torch can only train on Variable, so convert them to Variable +x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False) + + +# replace following code with an easy sequential network +""" +class Net(torch.nn.Module): + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() + self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer + self.predict = torch.nn.Linear(n_hidden, n_output) # output layer + + def forward(self, x): + x = F.relu(self.hidden(x)) # activation function for hidden layer + x = self.predict(x) # linear output + return x +""" +net = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) +) +print(net) # net architecture + + +optimizer = torch.optim.SGD(net.parameters(), lr=0.5) + +plt.ion() # something about plotting +plt.show() + +for t in range(100): + prediction = net(x) # input x and predict based on x + + loss_func = torch.nn.MSELoss() # this is for regression mean squared loss + loss = loss_func(prediction, y) # must be (1. nn output, 2. target) + + optimizer.zero_grad() # clear gradients for next train + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + if t % 5 == 0: + # plot and show learning process + plt.cla() + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5) + plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'}) + plt.pause(0.1) + +plt.ioff() +plt.show() \ No newline at end of file diff --git a/pytorchTUT/07_save_reload.py b/pytorchTUT/07_save_reload.py new file mode 100644 index 0000000..a8ca393 --- /dev/null +++ b/pytorchTUT/07_save_reload.py @@ -0,0 +1,79 @@ +import torch +from torch.autograd import Variable +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# fake data +x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1) +y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1) +x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False) + + +def save(): + # save net1 + net1 = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) + ) + optimizer = torch.optim.SGD(net1.parameters(), lr=0.5) + for t in range(100): + prediction = net1(x) + loss_func = torch.nn.MSELoss(size_average=True) + loss = loss_func(prediction, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # plot result + plt.figure(1, figsize=(10, 3)) + plt.subplot(131) + plt.title('Net1') + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5) + + # 2 ways to save the net + torch.save(net1, 'net.pkl') # save entire net + torch.save(net1.state_dict(), 'net_params.pkl') # save only the parameters + + +def restore_net(): + # restore entire net1 to net2 + net2 = torch.load('net.pkl') + prediction = net2(x) + + # plot result + plt.subplot(132) + plt.title('Net2') + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5) + + +def restore_params(): + # restore only the parameters in net1 to net3 + net3 = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) + ) + + # copy net1's parameters into net3 + net3.load_state_dict(torch.load('net_params.pkl')) + prediction = net3(x) + + # plot result + plt.subplot(133) + plt.title('Net3') + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5) + plt.show() + +# save net1 +save() + +# restore entire net (slow) +restore_net() + +# restore only the net parameters +restore_params() diff --git a/pytorchTUT/08_batch_train.py b/pytorchTUT/08_batch_train.py new file mode 100644 index 0000000..ef53598 --- /dev/null +++ b/pytorchTUT/08_batch_train.py @@ -0,0 +1,21 @@ +import torch +import torch.utils.data as Data + +BATCH_SIZE = 8 + +x = torch.linspace(1, 10, 10) # this is x data (torch tensor) +y = torch.linspace(10, 1, 10) # this is y data (torch tensor) + +torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y) +loader = Data.DataLoader( + dataset=torch_dataset, # torch TensorDataset format + batch_size=BATCH_SIZE, # mini batch size + shuffle=True, # random shuffle for training + num_workers=2, # subprocesses for loading data +) + +for epoch in range(3): # train entire dataset 3 times + for step, (batch_x, batch_y) in enumerate(loader): # for each training step + # train your data... + print('Epoch: ', epoch, '| Step: ', step, '| batch x: ', + batch_x.numpy(), '| batch y: ', batch_y.numpy()) diff --git a/pytorchTUT/09_CNN.py b/pytorchTUT/09_CNN.py new file mode 100644 index 0000000..525112e --- /dev/null +++ b/pytorchTUT/09_CNN.py @@ -0,0 +1,93 @@ +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.utils.data as Data +import torchvision + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch +BATCH_SIZE = 50 +LR = 0.001 # learning rate +DOWNLOAD_MNIST = False + + +# Mnist digits dataset +train_data = torchvision.datasets.MNIST( + root='./mnist/', + train=True, # this is training data + transform=torchvision.transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to + # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0] + download=DOWNLOAD_MNIST, # download it if you don't have it +) + +test_data = torchvision.datasets.MNIST(root='./mnist/', train=False) + +# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28) +train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) + +# convert test data into Variable, pick 2000 samples to speed up testing +test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1)).type(torch.FloatTensor)[:2000]/255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) +test_y = test_data.test_labels[:2000] + + +class CNN(nn.Module): + def __init__(self): + super(CNN, self).__init__() + self.conv1 = nn.Sequential( # input shape (1, 28, 28) + nn.Conv2d( + in_channels=1, # input height + out_channels=16, # n_filters + kernel_size=5, # filter size + stride=1, # filter movement/step + padding=2, # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1 + ), # output shape (16, 28, 28) + nn.ReLU(), # activation + nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) + ) + self.conv2 = nn.Sequential( # input shape (1, 28, 28) + nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) + nn.ReLU(), # activation + nn.MaxPool2d(2), # output shape (32, 7, 7) + ) + self.out = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) + output = self.out(x) + return output + + +cnn = CNN() +print(cnn) # net architecture + +optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted + +# training and testing +for epoch in range(EPOCH): + for step, (x, y) in enumerate(train_loader): # gives batch data, normalize x when iterate train_loader + b_x = Variable(x) # batch x + b_y = Variable(y) # batch y + + output = cnn(b_x) # cnn output + loss = loss_func(output, b_y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + if step % 50 == 0: + test_output = cnn(test_x) + pred_y = torch.max(test_output, 1)[1].data.squeeze() + accuracy = sum(pred_y == test_y) / test_y.size(0) + print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy) + + +# print 10 predictions from test data +test_output = cnn(test_x[:10]) +pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() +print(pred_y, 'prediction number') +print(test_y[:10].numpy(), 'real number') diff --git a/pytorchTUT/10_RNN_classifier.py b/pytorchTUT/10_RNN_classifier.py new file mode 100644 index 0000000..9befc17 --- /dev/null +++ b/pytorchTUT/10_RNN_classifier.py @@ -0,0 +1,91 @@ +import torch +from torch import nn +from torch.autograd import Variable +import torchvision.datasets as dsets +import torchvision.transforms as transforms + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch +BATCH_SIZE = 64 +TIME_STEP = 28 # rnn time step / image height +INPUT_SIZE = 28 # rnn input size / image width +LR = 0.01 # learning rate +DOWNLOAD_MNIST = False # set to True if haven't download the data + + +# Mnist digital dataset +train_data = dsets.MNIST( + root='./mnist/', + train=True, # this is training data + transform=transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to + # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0] + download=DOWNLOAD_MNIST, # download it if you don't have it +) + +test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor()) + +# Data Loader for easy mini-batch return in training +train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) + +# convert test data into Variable, pick 2000 samples to speed up testing +test_x = Variable(test_data.test_data).type(torch.FloatTensor)[:2000]/255. # shape (2000, 28, 28) value in range(0,1) +test_y = test_data.test_labels.numpy().squeeze()[:2000] # covert to numpy array + + +class RNN(nn.Module): + def __init__(self): + super(RNN, self).__init__() + + self.rnn = nn.LSTM( # if use nn.RNN(), it hardly learns + input_size=28, + hidden_size=64, # rnn hidden unit + num_layers=1, # number of rnn layer + batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) + ) + + self.out = nn.Linear(64, 10) + + def forward(self, x): + # x shape (batch, time_step, input_size) + # r_out shape (batch, time_step, output_size) + # h_n shape (n_layers, batch, hidden_size) + # h_c shape (n_layers, batch, hidden_size) + r_out, (h_n, h_c) = self.rnn(x, None) # None represents zero initial hidden state + + # choose r_out at the last time step + out = self.out(r_out[:, -1, :]) + return out + + +rnn = RNN() +print(rnn) + +optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted + +# training and testing +for epoch in range(EPOCH): + for step, (x, y) in enumerate(train_loader): # gives batch data + b_x = Variable(x.view(-1, 28, 28)) # reshape x to (batch, time_step, input_size) + b_y = Variable(y) # batch y + + output = rnn(b_x) # rnn output + loss = loss_func(output, b_y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + if step % 50 == 0: + test_output = rnn(test_x) # (samples, time_step, input_size) + pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() + accuracy = sum(pred_y == test_y) / test_y.size + print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy) + +# print 10 predictions from test data +test_output = rnn(test_x[:10].view(-1, 28, 28)) +pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() +print(pred_y, 'prediction number') +print(test_y[:10], 'real number') + diff --git a/pytorchTUT/11_RNN_regressor.py b/pytorchTUT/11_RNN_regressor.py new file mode 100644 index 0000000..04991c3 --- /dev/null +++ b/pytorchTUT/11_RNN_regressor.py @@ -0,0 +1,79 @@ +import torch +from torch import nn +from torch.autograd import Variable +import numpy as np +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +BATCH_SIZE = 64 +TIME_STEP = 5 # rnn time step / image height +INPUT_SIZE = 1 # rnn input size / image width +LR = 0.02 # learning rate +DOWNLOAD_MNIST = False # set to True if haven't download the data + + +class RNN(nn.Module): + def __init__(self): + super(RNN, self).__init__() + + self.rnn = nn.RNN( + input_size=1, + hidden_size=32, # rnn hidden unit + num_layers=1, # number of rnn layer + batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) + ) + self.out = nn.Linear(32, 1) + + def forward(self, x, h_state): + # x (batch, time_step, input_size) + # h_state (n_layers, batch, hidden_size) + # r_out (batch, time_step, output_size) + r_out, h_state = self.rnn(x, h_state) + + outs = [] # this is where you can find torch is dynamic + for time_step in range(r_out.size(1)): # calculate output for each time step + outs.append(self.out(r_out[:, time_step, :])) + return torch.stack(outs, dim=1), h_state + + +rnn = RNN() +print(rnn) + +optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters +loss_func = nn.MSELoss() # the target label is not one-hotted + +h_state = None # for initial hidden state + +plt.figure(1, figsize=(12, 5)) +plt.ion() # continuously plot +plt.show() + +for step in range(60): + start, end = step * np.pi, (step+1)*np.pi + # use sin predicts cos + steps = np.linspace(start, end, 10, dtype=np.float32) + x_np = np.sin(steps) # float32 for converting torch FloatTensor + y_np = np.cos(steps) + + x = Variable(torch.from_numpy(x_np[np.newaxis, :, np.newaxis])) # shape (batch, time_step, input_size) + y = Variable(torch.from_numpy(y_np[np.newaxis, :, np.newaxis])) + + prediction, h_state = rnn(x, h_state) # rnn output + # !! next step is important !! + h_state = Variable(h_state.data) # repack the hidden state, break the connection from last iteration + + loss = loss_func(prediction, y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + + # plotting + plt.plot(steps, y_np.flatten(), 'r-') + plt.plot(steps, prediction.data.numpy().flatten(), 'b-') + plt.draw() + plt.pause(0.05) + +plt.ioff() +plt.show() diff --git a/tensorflowTUT/tf20_RNN2/full_code.py b/tensorflowTUT/tf20_RNN2/full_code.py index 0cd4eed..b46f670 100644 --- a/tensorflowTUT/tf20_RNN2/full_code.py +++ b/tensorflowTUT/tf20_RNN2/full_code.py @@ -124,7 +124,7 @@ def RNN(X, weights, biases): print(sess.run(accuracy, feed_dict={ x: batch_xs, y: batch_ys, - })) + })) step += 1