update torch tutorial code

Hcracker-luke · Apr 28, 2017 · f2b9240 · f2b9240
1 parent 1879c68
commit f2b9240
Show file tree

Hide file tree

Showing 12 changed files with 650 additions and 1 deletion.
diff --git a/pytorchTUT/01_torch_numpy.py b/pytorchTUT/01_torch_numpy.py
@@ -0,0 +1,44 @@
+import torch
+import numpy as np
+
+# details about math operation in torch can be found in: http://pytorch.org/docs/torch.html#math-operations
+
+# abs
+data = [-1, -2, 1, 2]
+tensor = torch.FloatTensor(data)  # 32-bit floating point
+print(
+    '\nabs',
+    '\nnumpy: ', np.abs(data),          # [1 2 1 2]
+    '\ntorch: ', torch.abs(tensor)      # [1 2 1 2]
+)
+
+# sin
+print(
+    '\nsin',
+    '\nnumpy: ', np.sin(data),      # [-0.84147098 -0.90929743  0.84147098  0.90929743]
+    '\ntorch: ', torch.sin(tensor)  # [-0.8415 -0.9093  0.8415  0.9093]
+)
+
+# mean
+print(
+    '\nmean',
+    '\nnumpy: ', np.mean(data),         # 0.0
+    '\ntorch: ', torch.mean(tensor)     # 0.0
+)
+
+# matrix multiplication
+data = [[1,2], [3,4]]
+tensor = torch.FloatTensor(data)  # 32-bit floating point
+# correct method
+print(
+    '\nmatrix multiplication (matmul)',
+    '\nnumpy: ', np.matmul(data, data),     # [[7, 10], [15, 22]]
+    '\ntorch: ', torch.mm(tensor, tensor)   # [[7, 10], [15, 22]]
+)
+# incorrect method
+data = np.array(data)
+print(
+    '\nmatrix multiplication (dot)',
+    '\nnumpy: ', data.dot(data),        # [[7, 10], [15, 22]]
+    '\ntorch: ', tensor.dot(tensor)     # this will convert tensor to [1,2,3,4], you'll get 30.0
+)
diff --git a/pytorchTUT/02_variable.py b/pytorchTUT/02_variable.py
@@ -0,0 +1,29 @@
+import torch
+from torch.autograd import Variable
+
+# Variable in torch is to build a computational graph,
+# but this graph is dynamic compared with a static graph in Tensorflow or Theano.
+# So torch does not have placeholder, torch can just pass variable to the computational graph.
+
+tensor = torch.FloatTensor([[1,2],[3,4]])            # build a tensor
+variable = Variable(tensor, requires_grad=True)      # build a variable, usually for compute gradients
+
+print(tensor)       # [torch.FloatTensor of size 2x2]
+print(variable)     # [torch.FloatTensor of size 2x2]
+
+# till now the tensor and variable seem the same.
+# However, the variable is a part of the graph, it's a part of the auto-gradient.
+
+t_out = torch.mean(tensor*tensor)       # x^2
+v_out = torch.mean(variable*variable)   # x^2
+print(t_out)
+print(v_out)    # 7.5
+
+v_out.backward()    # backpropagation from v_out
+# v_out = 1/4 * sum(variable*variable)
+# the gradients w.r.t the variable, d(v_out)/d(variable) = 1/4*2*variable = variable/2
+print(variable.grad)
+'''
+ 0.5000  1.0000
+ 1.5000  2.0000
+'''
diff --git a/pytorchTUT/03_activation.py b/pytorchTUT/03_activation.py
@@ -0,0 +1,41 @@
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import matplotlib.pyplot as plt
+
+# fake data
+x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
+x = Variable(x)
+x_np = x.data.numpy()
+
+# following are popular activation functions
+y_relu = F.relu(x).data.numpy()
+y_sigmoid = F.sigmoid(x).data.numpy()
+y_tanh = F.tanh(x).data.numpy()
+y_softplus = F.softplus(x).data.numpy()
+# y_softmax = F.softmax(x)  softmax is a special kind of activation function, it is about probability
+
+
+# plt to visualize these activation function
+plt.figure(1, figsize=(8, 6))
+plt.subplot(221)
+plt.plot(x_np, y_relu, c='red', label='relu')
+plt.ylim((-1, 5))
+plt.legend(loc='best')
+
+plt.subplot(222)
+plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
+plt.ylim((-0.2, 1.2))
+plt.legend(loc='best')
+
+plt.subplot(223)
+plt.plot(x_np, y_tanh, c='red', label='tanh')
+plt.ylim((-1.2, 1.2))
+plt.legend(loc='best')
+
+plt.subplot(224)
+plt.plot(x_np, y_softplus, c='red', label='softplus')
+plt.ylim((-0.2, 6))
+plt.legend(loc='best')
+
+plt.show()
diff --git a/pytorchTUT/04_regression.py b/pytorchTUT/04_regression.py
@@ -0,0 +1,53 @@
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+
+torch.manual_seed(1)    # reproducible
+
+
+class Net(torch.nn.Module):
+    def __init__(self, n_feature, n_hidden, n_output):
+        super(Net, self).__init__()
+        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
+        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer
+
+    def forward(self, x):
+        x = F.relu(self.hidden(x))      # activation function for hidden layer
+        x = self.predict(x)             # linear output
+        return x
+
+x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
+y = x.pow(2) + 0.2*torch.rand(x.size())                 # noisy y data (tensor), shape=(100, 1)
+
+# torch can only train on Variable, so convert them to Variable
+x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)
+
+net = Net(n_feature=1, n_hidden=10, n_output=1)     # define the network
+print(net)  # net architecture
+
+optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
+
+plt.ion()   # something about plotting
+plt.show()
+
+for t in range(100):
+    prediction = net(x)     # input x and predict based on x
+
+    loss_func = torch.nn.MSELoss()      # this is for regression mean squared loss
+    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)
+
+    optimizer.zero_grad()   # clear gradients for next train
+    loss.backward()         # backpropagation, compute gradients
+    optimizer.step()        # apply gradients
+
+    if t % 5 == 0:
+        # plot and show learning process
+        plt.cla()
+        plt.scatter(x.data.numpy(), y.data.numpy())
+        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
+        plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color':  'red'})
+        plt.pause(0.1)
+
+plt.ioff()
+plt.show()
diff --git a/pytorchTUT/05_classification.py b/pytorchTUT/05_classification.py
@@ -0,0 +1,60 @@
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+
+torch.manual_seed(1)    # reproducible
+
+
+class Net(torch.nn.Module):
+    def __init__(self, n_feature, n_hidden, n_output):
+        super(Net, self).__init__()
+        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
+        self.out = torch.nn.Linear(n_hidden, n_output)   # output layer
+
+    def forward(self, x):
+        x = F.relu(self.hidden(x))      # activation function for hidden layer
+        x = self.out(x)
+        return x
+
+# make fake data
+n_data = torch.ones(100, 2)
+x0 = torch.normal(2*n_data, 1)      # class0 x data (tensor), shape=(100, 2)
+y0 = torch.zeros(100)               # class0 y data (tensor), shape=(100, 1)
+x1 = torch.normal(-2*n_data, 1)     # class1 x data (tensor), shape=(100, 1)
+y1 = torch.ones(100)                # class1 y data (tensor), shape=(100, 1)
+x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # FloatTensor = 32-bit floating
+y = torch.cat((y0, y1), ).type(torch.LongTensor)    # LongTensor = 64-bit integer
+
+# torch can only train on Variable, so convert them to Variable
+x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)
+
+net = Net(n_feature=2, n_hidden=10, n_output=2)     # define the network
+print(net)  # net architecture
+
+optimizer = torch.optim.SGD(net.parameters(), lr=0.02)
+loss_func = torch.nn.CrossEntropyLoss()  # the target label is not one-hotted
+
+plt.ion()   # something about plotting
+plt.show()
+
+for t in range(100):
+    prediction = net(x)                 # input x and predict based on x
+    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target), the target label is not one-hotted
+
+    optimizer.zero_grad()   # clear gradients for next train
+    loss.backward()         # backpropagation, compute gradients
+    optimizer.step()        # apply gradients
+
+    if t % 2 == 0:
+        # plot and show learning process
+        plt.cla()
+        pred_y = torch.max(F.softmax(prediction), 1)[1].data.numpy().squeeze()
+        target_y = y.data.numpy()
+        plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
+        accuracy = sum(pred_y == target_y)/200
+        plt.text(2, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color':  'red'})
+        plt.pause(0.1)
+
+plt.ioff()
+plt.show()
diff --git a/pytorchTUT/06_build_nn_quickly.py b/pytorchTUT/06_build_nn_quickly.py
@@ -0,0 +1,59 @@
+import torch
+from torch.autograd import Variable
+import matplotlib.pyplot as plt
+
+torch.manual_seed(1)    # reproducible
+
+# fake data
+x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
+y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)
+# torch can only train on Variable, so convert them to Variable
+x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)
+
+
+# replace following code with an easy sequential network
+"""
+class Net(torch.nn.Module):
+    def __init__(self, n_feature, n_hidden, n_output):
+        super(Net, self).__init__()
+        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
+        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer
+
+    def forward(self, x):
+        x = F.relu(self.hidden(x))      # activation function for hidden layer
+        x = self.predict(x)             # linear output
+        return x
+"""
+net = torch.nn.Sequential(
+    torch.nn.Linear(1, 10),
+    torch.nn.ReLU(),
+    torch.nn.Linear(10, 1)
+)
+print(net)  # net architecture
+
+
+optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
+
+plt.ion()   # something about plotting
+plt.show()
+
+for t in range(100):
+    prediction = net(x)     # input x and predict based on x
+
+    loss_func = torch.nn.MSELoss()     # this is for regression mean squared loss
+    loss = loss_func(prediction, y)   # must be (1. nn output, 2. target)
+
+    optimizer.zero_grad()   # clear gradients for next train
+    loss.backward()         # backpropagation, compute gradients
+    optimizer.step()        # apply gradients
+
+    if t % 5 == 0:
+        # plot and show learning process
+        plt.cla()
+        plt.scatter(x.data.numpy(), y.data.numpy())
+        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
+        plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color':  'red'})
+        plt.pause(0.1)
+
+plt.ioff()
+plt.show()
diff --git a/pytorchTUT/07_save_reload.py b/pytorchTUT/07_save_reload.py
@@ -0,0 +1,79 @@
+import torch
+from torch.autograd import Variable
+import matplotlib.pyplot as plt
+
+torch.manual_seed(1)    # reproducible
+
+# fake data
+x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
+y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)
+x, y = torch.autograd.Variable(x, requires_grad=False), Variable(y, requires_grad=False)
+
+
+def save():
+    # save net1
+    net1 = torch.nn.Sequential(
+        torch.nn.Linear(1, 10),
+        torch.nn.ReLU(),
+        torch.nn.Linear(10, 1)
+    )
+    optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
+    for t in range(100):
+        prediction = net1(x)
+        loss_func = torch.nn.MSELoss(size_average=True)
+        loss = loss_func(prediction, y)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    # plot result
+    plt.figure(1, figsize=(10, 3))
+    plt.subplot(131)
+    plt.title('Net1')
+    plt.scatter(x.data.numpy(), y.data.numpy())
+    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
+
+    # 2 ways to save the net
+    torch.save(net1, 'net.pkl')  # save entire net
+    torch.save(net1.state_dict(), 'net_params.pkl')   # save only the parameters
+
+
+def restore_net():
+    # restore entire net1 to net2
+    net2 = torch.load('net.pkl')
+    prediction = net2(x)
+
+    # plot result
+    plt.subplot(132)
+    plt.title('Net2')
+    plt.scatter(x.data.numpy(), y.data.numpy())
+    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
+
+
+def restore_params():
+    # restore only the parameters in net1 to net3
+    net3 = torch.nn.Sequential(
+        torch.nn.Linear(1, 10),
+        torch.nn.ReLU(),
+        torch.nn.Linear(10, 1)
+    )
+
+    # copy net1's parameters into net3
+    net3.load_state_dict(torch.load('net_params.pkl'))
+    prediction = net3(x)
+
+    # plot result
+    plt.subplot(133)
+    plt.title('Net3')
+    plt.scatter(x.data.numpy(), y.data.numpy())
+    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
+    plt.show()
+
+# save net1
+save()
+
+# restore entire net (slow)
+restore_net()
+
+# restore only the net parameters
+restore_params()
diff --git a/pytorchTUT/08_batch_train.py b/pytorchTUT/08_batch_train.py
@@ -0,0 +1,21 @@
+import torch
+import torch.utils.data as Data
+
+BATCH_SIZE = 8
+
+x = torch.linspace(1, 10, 10)       # this is x data (torch tensor)
+y = torch.linspace(10, 1, 10)       # this is y data (torch tensor)
+
+torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)
+loader = Data.DataLoader(
+    dataset=torch_dataset,      # torch TensorDataset format
+    batch_size=BATCH_SIZE,      # mini batch size
+    shuffle=True,               # random shuffle for training
+    num_workers=2,              # subprocesses for loading data
+)
+
+for epoch in range(3):   # train entire dataset 3 times
+    for step, (batch_x, batch_y) in enumerate(loader):  # for each training step
+        # train your data...
+        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
+              batch_x.numpy(), '| batch y: ', batch_y.numpy())