Skip to content

Commit

Permalink
update torch tutorial code
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Apr 28, 2017
1 parent 1879c68 commit f2b9240
Show file tree
Hide file tree
Showing 12 changed files with 650 additions and 1 deletion.
44 changes: 44 additions & 0 deletions pytorchTUT/01_torch_numpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import torch
import numpy as np

# details about math operation in torch can be found in: http://pytorch.org/docs/torch.html#math-operations

# abs
data = [-1, -2, 1, 2]
tensor = torch.FloatTensor(data) # 32-bit floating point
print(
'\nabs',
'\nnumpy: ', np.abs(data), # [1 2 1 2]
'\ntorch: ', torch.abs(tensor) # [1 2 1 2]
)

# sin
print(
'\nsin',
'\nnumpy: ', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743]
'\ntorch: ', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093]
)

# mean
print(
'\nmean',
'\nnumpy: ', np.mean(data), # 0.0
'\ntorch: ', torch.mean(tensor) # 0.0
)

# matrix multiplication
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data) # 32-bit floating point
# correct method
print(
'\nmatrix multiplication (matmul)',
'\nnumpy: ', np.matmul(data, data), # [[7, 10], [15, 22]]
'\ntorch: ', torch.mm(tensor, tensor) # [[7, 10], [15, 22]]
)
# incorrect method
data = np.array(data)
print(
'\nmatrix multiplication (dot)',
'\nnumpy: ', data.dot(data), # [[7, 10], [15, 22]]
'\ntorch: ', tensor.dot(tensor) # this will convert tensor to [1,2,3,4], you'll get 30.0
)
29 changes: 29 additions & 0 deletions pytorchTUT/02_variable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import torch
from torch.autograd import Variable

# Variable in torch is to build a computational graph,
# but this graph is dynamic compared with a static graph in Tensorflow or Theano.
# So torch does not have placeholder, torch can just pass variable to the computational graph.

tensor = torch.FloatTensor([[1,2],[3,4]]) # build a tensor
variable = Variable(tensor, requires_grad=True) # build a variable, usually for compute gradients

print(tensor) # [torch.FloatTensor of size 2x2]
print(variable) # [torch.FloatTensor of size 2x2]

# till now the tensor and variable seem the same.
# However, the variable is a part of the graph, it's a part of the auto-gradient.

t_out = torch.mean(tensor*tensor) # x^2
v_out = torch.mean(variable*variable) # x^2
print(t_out)
print(v_out) # 7.5

v_out.backward() # backpropagation from v_out
# v_out = 1/4 * sum(variable*variable)
# the gradients w.r.t the variable, d(v_out)/d(variable) = 1/4*2*variable = variable/2
print(variable.grad)
'''
0.5000 1.0000
1.5000 2.0000
'''
41 changes: 41 additions & 0 deletions pytorchTUT/03_activation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt

# fake data
x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1)
x = Variable(x)
x_np = x.data.numpy()

# following are popular activation functions
y_relu = F.relu(x).data.numpy()
y_sigmoid = F.sigmoid(x).data.numpy()
y_tanh = F.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x) softmax is a special kind of activation function, it is about probability


# plt to visualize these activation function
plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np, y_tanh, c='red', label='tanh')
plt.ylim((-1.2, 1.2))
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np, y_softplus, c='red', label='softplus')
plt.ylim((-0.2, 6))
plt.legend(loc='best')

plt.show()
53 changes: 53 additions & 0 deletions pytorchTUT/04_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible


class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer
self.predict = torch.nn.Linear(n_hidden, n_output) # output layer

def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)

# torch can only train on Variable, so convert them to Variable
x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)

net = Net(n_feature=1, n_hidden=10, n_output=1) # define the network
print(net) # net architecture

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

plt.ion() # something about plotting
plt.show()

for t in range(100):
prediction = net(x) # input x and predict based on x

loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
loss = loss_func(prediction, y) # must be (1. nn output, 2. target)

optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

if t % 5 == 0:
# plot and show learning process
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)

plt.ioff()
plt.show()
60 changes: 60 additions & 0 deletions pytorchTUT/05_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible


class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer
self.out = torch.nn.Linear(n_hidden, n_output) # output layer

def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.out(x)
return x

# make fake data
n_data = torch.ones(100, 2)
x0 = torch.normal(2*n_data, 1) # class0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100) # class0 y data (tensor), shape=(100, 1)
x1 = torch.normal(-2*n_data, 1) # class1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100) # class1 y data (tensor), shape=(100, 1)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor) # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer

# torch can only train on Variable, so convert them to Variable
x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)

net = Net(n_feature=2, n_hidden=10, n_output=2) # define the network
print(net) # net architecture

optimizer = torch.optim.SGD(net.parameters(), lr=0.02)
loss_func = torch.nn.CrossEntropyLoss() # the target label is not one-hotted

plt.ion() # something about plotting
plt.show()

for t in range(100):
prediction = net(x) # input x and predict based on x
loss = loss_func(prediction, y) # must be (1. nn output, 2. target), the target label is not one-hotted

optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

if t % 2 == 0:
# plot and show learning process
plt.cla()
pred_y = torch.max(F.softmax(prediction), 1)[1].data.numpy().squeeze()
target_y = y.data.numpy()
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
accuracy = sum(pred_y == target_y)/200
plt.text(2, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)

plt.ioff()
plt.show()
59 changes: 59 additions & 0 deletions pytorchTUT/06_build_nn_quickly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible

# fake data
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)
# torch can only train on Variable, so convert them to Variable
x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)


# replace following code with an easy sequential network
"""
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer
self.predict = torch.nn.Linear(n_hidden, n_output) # output layer
def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
"""
net = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
print(net) # net architecture


optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

plt.ion() # something about plotting
plt.show()

for t in range(100):
prediction = net(x) # input x and predict based on x

loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
loss = loss_func(prediction, y) # must be (1. nn output, 2. target)

optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

if t % 5 == 0:
# plot and show learning process
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)

plt.ioff()
plt.show()
79 changes: 79 additions & 0 deletions pytorchTUT/07_save_reload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible

# fake data
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)
x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)


def save():
# save net1
net1 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
for t in range(100):
prediction = net1(x)
loss_func = torch.nn.MSELoss(size_average=True)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()

# plot result
plt.figure(1, figsize=(10, 3))
plt.subplot(131)
plt.title('Net1')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 2 ways to save the net
torch.save(net1, 'net.pkl') # save entire net
torch.save(net1.state_dict(), 'net_params.pkl') # save only the parameters


def restore_net():
# restore entire net1 to net2
net2 = torch.load('net.pkl')
prediction = net2(x)

# plot result
plt.subplot(132)
plt.title('Net2')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)


def restore_params():
# restore only the parameters in net1 to net3
net3 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)

# copy net1's parameters into net3
net3.load_state_dict(torch.load('net_params.pkl'))
prediction = net3(x)

# plot result
plt.subplot(133)
plt.title('Net3')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.show()

# save net1
save()

# restore entire net (slow)
restore_net()

# restore only the net parameters
restore_params()
21 changes: 21 additions & 0 deletions pytorchTUT/08_batch_train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import torch
import torch.utils.data as Data

BATCH_SIZE = 8

x = torch.linspace(1, 10, 10) # this is x data (torch tensor)
y = torch.linspace(10, 1, 10) # this is y data (torch tensor)

torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # random shuffle for training
num_workers=2, # subprocesses for loading data
)

for epoch in range(3): # train entire dataset 3 times
for step, (batch_x, batch_y) in enumerate(loader): # for each training step
# train your data...
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
Loading

0 comments on commit f2b9240

Please sign in to comment.