-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bc83d96
commit 99c26f8
Showing
3 changed files
with
262 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch.optim as optim | ||
|
||
torch.manual_seed(1) | ||
|
||
lin = nn.Linear(5, 3) # maps from R^5 to R^3, parameters A, b | ||
# data is 2x5. A maps from 5 to 3... can we map "data" under A? | ||
data = torch.randn(2, 5) | ||
print(lin(data)) | ||
|
||
# In pytorch, most non-linearities are in torch.functional (we have it imported as F) | ||
# Note that non-linearites typically don't have parameters like affine maps do. | ||
# That is, they don't have weights that are updated during training. | ||
data = torch.randn(2, 2) | ||
print(data) | ||
print(F.relu(data)) | ||
|
||
# Softmax is also in torch.nn.functional | ||
data = torch.randn(5) | ||
print(data) | ||
print(F.softmax(data, dim=0)) | ||
print(F.softmax(data, dim=0).sum()) # Sum to 1 because it is a distribution! | ||
print(F.log_softmax(data, dim=0)) # theres also log_softmax | ||
|
||
data = [("me gusta comer en la cafeteria".split(), "SPANISH"), | ||
("Give it to me".split(), "ENGLISH"), | ||
("No creo que sea una buena idea".split(), "SPANISH"), | ||
("No it is not a good idea to get lost at sea".split(), "ENGLISH")] | ||
|
||
test_data = [("Yo creo que si".split(), "SPANISH"), | ||
("it is lost on me".split(), "ENGLISH")] | ||
|
||
# word_to_ix maps each word in the vocab to a unique integer, which will be its index into the Bag of words vector | ||
word_to_ix = {} | ||
for sent, _ in data + test_data: | ||
for word in sent: | ||
if word not in word_to_ix: | ||
word_to_ix[word] = len(word_to_ix) | ||
print(word_to_ix) | ||
|
||
VOCAB_SIZE = len(word_to_ix) | ||
NUM_LABELS = 2 | ||
|
||
class BoWClassifier(nn.Module): # inheriting from nn.Module! | ||
def __init__(self, num_labels, vocab_size): | ||
# calls the init function of nn.Module. Dont get confused by syntax, | ||
# just always do it in an nn.Module | ||
super(BoWClassifier, self).__init__() | ||
|
||
# Define the parameters that you will need. In this case, we need A and b | ||
# the parameters of the affline mapping. | ||
# Torch defines nn.Linear(), which provides the affine map. | ||
# Make sure you understand why the input dimension is vocab_size | ||
# and the output is num_labels! | ||
self.linear = nn.Linear(vocab_size, num_labels) | ||
|
||
# Note! The non-linearity log softmax does not have parameters! So we don't need to worry about that here | ||
|
||
def forward(self, bow_vec): | ||
# Pass the input through the linear layer, | ||
# then pass that through log_softmax. | ||
# Many non-linearities and other functions are in torch.nn.functional | ||
return F.log_softmax(self.linear(bow_vec), dim=1) | ||
|
||
def make_bow_vector(sentence, word_to_ix): | ||
vec = torch.zeros(len(word_to_ix)) | ||
for word in sentence: | ||
vec[word_to_ix[word]] += 1 | ||
return vec.view(1, -1) | ||
|
||
def make_target(label, label_to_ix): | ||
return torch.LongTensor([label_to_ix[label]]) | ||
|
||
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE) | ||
|
||
# the model knows its parameters. The first output below is A, the second is b. | ||
# Whenever you assign a component to a class variable in the __init__ function | ||
# of a module, which was done with the line | ||
# self.linear = nn.Linear(...) | ||
# Then through some Python magic from the PyTorch devs, your module | ||
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters | ||
for param in model.parameters(): | ||
print(param) | ||
|
||
# To run the model, pass in a BoW vector | ||
# Here we don't need to train, so the code is wrapped in torch.no_grad() | ||
with torch.no_grad(): | ||
sample = data[0] | ||
bow_vector = make_bow_vector(sample[0], word_to_ix) | ||
log_probs = model(bow_vector) | ||
print(log_probs) | ||
|
||
label_to_ix = {"SPANISH": 0, "ENGLISH": 1} | ||
|
||
# Run on test data before we train, just to see a before-and-after | ||
with torch.no_grad(): | ||
for instance, label in test_data: | ||
bow_vec = make_bow_vector(instance, word_to_ix) | ||
log_probs = model(bow_vec) | ||
print(log_probs) | ||
|
||
# Print the matrix column corresponding to "creo" | ||
print(next(model.parameters())[:, word_to_ix["creo"]]) | ||
|
||
loss_function = nn.NLLLoss() | ||
optimizer = optim.SGD(model.parameters(), lr=0.1) | ||
|
||
# Usually you want to pass over the training data several times. | ||
# 100 is much bigger than on a real data set, but real datasets have more than | ||
# two instances. Usually, somewhere between 5 and 30 epochs is reasonable. | ||
for epoch in range(100): | ||
for instance, label in data: | ||
# Step 1. Remember that PyTorch accumulates gradients. | ||
# We need to clear them out before each instance | ||
model.zero_grad() | ||
|
||
# Step 2. Make our BOW vector and also we must wrap the target in a | ||
# Tensor as an integer. For example, if the target is SPANISH, then | ||
# we wrap the integer 0. The loss function then knows that the 0th | ||
# element of the log probabilities is the log probability | ||
# corresponding to SPANISH | ||
bow_vec = make_bow_vector(instance, word_to_ix) | ||
target = make_target(label, label_to_ix) | ||
|
||
# Step 3. Run our forward pass. | ||
log_probs = model(bow_vec) | ||
|
||
# Step 4. Compute the loss, gradients, and update the parameters by | ||
# calling optimizer.step() | ||
loss = loss_function(log_probs, target) | ||
loss.backward() | ||
optimizer.step() | ||
|
||
with torch.no_grad(): | ||
for instance, label in test_data: | ||
bow_vec = make_bow_vector(instance, word_to_ix) | ||
log_probs = model(bow_vec) | ||
print(log_probs) | ||
|
||
# Index corresponding to Spanish goes up, English goes down! | ||
print(next(model.parameters())[:, word_to_ix["creo"]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import torch | ||
import torch.autograd as autograd | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch.optim as optim | ||
|
||
torch.manual_seed(1) | ||
|
||
# torch.tensor(data) creates a torch.Tensor object with the given data | ||
V_data = [1., 2., 3.] | ||
V = torch.tensor(V_data) | ||
print(V) | ||
|
||
# Creates a matrix | ||
M_data = [[1., 2., 3.], [4., 5., 6.]] | ||
M = torch.tensor(M_data) | ||
print(M) | ||
|
||
# Create a 3D tensor of size 2x2x2 | ||
T_data = [[[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]]] | ||
T = torch.tensor(T_data) | ||
print(T) | ||
|
||
# Index into V and get a scalar (0 dimensional tensor) | ||
print(V[0]) | ||
# Get a Python number from it | ||
print(V[0].item()) | ||
|
||
# Index into M and get a vector | ||
print(M[0]) | ||
|
||
# Index into T and get a matrix | ||
print(T[0]) | ||
|
||
x = torch.randn((3, 4, 5)) | ||
print(x) | ||
|
||
# Operations with Tensors | ||
x = torch.Tensor([1., 2., 3.]) | ||
y = torch.Tensor([4., 5., 6.]) | ||
z = x + y | ||
print(z) | ||
|
||
# By default, it concatenates along the first axis (concatenates rows) | ||
x_1 = torch.randn(2, 5) | ||
y_1 = torch.randn(3, 5) | ||
z_1 = torch.cat([x_1, y_1]) | ||
print(z_1) | ||
|
||
# Concatenate columns: | ||
x_2 = torch.randn(2, 3) | ||
y_2 = torch.randn(2, 5) | ||
# second arg specifies which axis to concat along | ||
z_2 = torch.cat([x_2, y_2], 1) | ||
print(z_2) | ||
|
||
x = torch.randn(2, 3, 4) | ||
print(x) | ||
print(x.view(2, 12)) # Reshape to 2 rows. 12 columns | ||
# Same as above. If one of the dimensions is -1, its size can be inferred | ||
print(x.view(2, -1)) | ||
|
||
# Tensor factory methods have a ``requires_grad`` flag | ||
x = torch.tensor([1., 2., 3.], requires_grad=True) | ||
|
||
# With requires_grad=True, you can still do all the operations you previsouly could | ||
y = torch.tensor([4., 5., 6.], requires_grad=True) | ||
z = x + y | ||
print(z) | ||
|
||
# But z knows something extra | ||
print(z.grad_fn) | ||
|
||
# Let sum up all the entries in z | ||
s = z.sum() | ||
print(s) | ||
print(s.grad_fn) | ||
|
||
# calling .backward() on any variable will run backprop, starting from it. | ||
s.backward() | ||
print(x.grad) | ||
|
||
x = torch.randn(2, 2) | ||
y = torch.randn(2, 2) | ||
# By default, user created Tensors have ``require_grad=False`` | ||
print(x.requires_grad, y.requires_grad) | ||
z = x + y | ||
# So you can't backprop through z | ||
print(z.grad_fn) | ||
|
||
# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad`` | ||
# flag in-place. The input flag defaults to ``True`` if not given. | ||
x = x.requires_grad_() | ||
y = y.requires_grad_() | ||
# z contains enough information to compute gradients, as we saw above | ||
z = x + y | ||
print(z.grad_fn) | ||
# If any input to an operation has ``requires_grad=True``, so will the output | ||
print(z.requires_grad) | ||
|
||
|
||
# Now z has the computation history that relates itself to x and y | ||
# Can we just take its values, and **detach** it from its history? | ||
new_z = z.detach() | ||
|
||
# ... does new_z have information to backprop to x and y? | ||
# NO! | ||
print(new_z.grad_fn) | ||
# And how could it? ``z.detach()`` returns a tensor that shares the same storage | ||
# as ``z``, but with the computation history forgotten. It doesn't know anything | ||
# about how it was computed. | ||
# In essence, we have broken the Tensor away from its past history | ||
|
||
print(x.requires_grad) | ||
print((x ** 2).requires_grad) | ||
|
||
with torch.no_grad(): | ||
print((x ** 2).requires_grad) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters