Skip to content

Commit

Permalink
Add pytorch NLP.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mansterteddy committed Feb 28, 2019
1 parent bc83d96 commit 99c26f8
Show file tree
Hide file tree
Showing 3 changed files with 262 additions and 1 deletion.
143 changes: 143 additions & 0 deletions pytorch/NLP/Blocks/blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

lin = nn.Linear(5, 3) # maps from R^5 to R^3, parameters A, b
# data is 2x5. A maps from 5 to 3... can we map "data" under A?
data = torch.randn(2, 5)
print(lin(data))

# In pytorch, most non-linearities are in torch.functional (we have it imported as F)
# Note that non-linearites typically don't have parameters like affine maps do.
# That is, they don't have weights that are updated during training.
data = torch.randn(2, 2)
print(data)
print(F.relu(data))

# Softmax is also in torch.nn.functional
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum()) # Sum to 1 because it is a distribution!
print(F.log_softmax(data, dim=0)) # theres also log_softmax

data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
("Give it to me".split(), "ENGLISH"),
("No creo que sea una buena idea".split(), "SPANISH"),
("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
for word in sent:
if word not in word_to_ix:
word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

class BoWClassifier(nn.Module): # inheriting from nn.Module!
def __init__(self, num_labels, vocab_size):
# calls the init function of nn.Module. Dont get confused by syntax,
# just always do it in an nn.Module
super(BoWClassifier, self).__init__()

# Define the parameters that you will need. In this case, we need A and b
# the parameters of the affline mapping.
# Torch defines nn.Linear(), which provides the affine map.
# Make sure you understand why the input dimension is vocab_size
# and the output is num_labels!
self.linear = nn.Linear(vocab_size, num_labels)

# Note! The non-linearity log softmax does not have parameters! So we don't need to worry about that here

def forward(self, bow_vec):
# Pass the input through the linear layer,
# then pass that through log_softmax.
# Many non-linearities and other functions are in torch.nn.functional
return F.log_softmax(self.linear(bow_vec), dim=1)

def make_bow_vector(sentence, word_to_ix):
vec = torch.zeros(len(word_to_ix))
for word in sentence:
vec[word_to_ix[word]] += 1
return vec.view(1, -1)

def make_target(label, label_to_ix):
return torch.LongTensor([label_to_ix[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# the model knows its parameters. The first output below is A, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
print(param)

# To run the model, pass in a BoW vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(bow_vector)
print(log_probs)

label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

# Run on test data before we train, just to see a before-and-after
with torch.no_grad():
for instance, label in test_data:
bow_vec = make_bow_vector(instance, word_to_ix)
log_probs = model(bow_vec)
print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances. Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
for instance, label in data:
# Step 1. Remember that PyTorch accumulates gradients.
# We need to clear them out before each instance
model.zero_grad()

# Step 2. Make our BOW vector and also we must wrap the target in a
# Tensor as an integer. For example, if the target is SPANISH, then
# we wrap the integer 0. The loss function then knows that the 0th
# element of the log probabilities is the log probability
# corresponding to SPANISH
bow_vec = make_bow_vector(instance, word_to_ix)
target = make_target(label, label_to_ix)

# Step 3. Run our forward pass.
log_probs = model(bow_vec)

# Step 4. Compute the loss, gradients, and update the parameters by
# calling optimizer.step()
loss = loss_function(log_probs, target)
loss.backward()
optimizer.step()

with torch.no_grad():
for instance, label in test_data:
bow_vec = make_bow_vector(instance, word_to_ix)
log_probs = model(bow_vec)
print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])
118 changes: 118 additions & 0 deletions pytorch/NLP/Tensor/tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

# torch.tensor(data) creates a torch.Tensor object with the given data
V_data = [1., 2., 3.]
V = torch.tensor(V_data)
print(V)

# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6.]]
M = torch.tensor(M_data)
print(M)

# Create a 3D tensor of size 2x2x2
T_data = [[[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]]]
T = torch.tensor(T_data)
print(T)

# Index into V and get a scalar (0 dimensional tensor)
print(V[0])
# Get a Python number from it
print(V[0].item())

# Index into M and get a vector
print(M[0])

# Index into T and get a matrix
print(T[0])

x = torch.randn((3, 4, 5))
print(x)

# Operations with Tensors
x = torch.Tensor([1., 2., 3.])
y = torch.Tensor([4., 5., 6.])
z = x + y
print(z)

# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1])
print(z_1)

# Concatenate columns:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
# second arg specifies which axis to concat along
z_2 = torch.cat([x_2, y_2], 1)
print(z_2)

x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12)) # Reshape to 2 rows. 12 columns
# Same as above. If one of the dimensions is -1, its size can be inferred
print(x.view(2, -1))

# Tensor factory methods have a ``requires_grad`` flag
x = torch.tensor([1., 2., 3.], requires_grad=True)

# With requires_grad=True, you can still do all the operations you previsouly could
y = torch.tensor([4., 5., 6.], requires_grad=True)
z = x + y
print(z)

# But z knows something extra
print(z.grad_fn)

# Let sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

# calling .backward() on any variable will run backprop, starting from it.
s.backward()
print(x.grad)

x = torch.randn(2, 2)
y = torch.randn(2, 2)
# By default, user created Tensors have ``require_grad=False``
print(x.requires_grad, y.requires_grad)
z = x + y
# So you can't backprop through z
print(z.grad_fn)

# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
# flag in-place. The input flag defaults to ``True`` if not given.
x = x.requires_grad_()
y = y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)
# If any input to an operation has ``requires_grad=True``, so will the output
print(z.requires_grad)


# Now z has the computation history that relates itself to x and y
# Can we just take its values, and **detach** it from its history?
new_z = z.detach()

# ... does new_z have information to backprop to x and y?
# NO!
print(new_z.grad_fn)
# And how could it? ``z.detach()`` returns a tensor that shares the same storage
# as ``z``, but with the computation history forgotten. It doesn't know anything
# about how it was computed.
# In essence, we have broken the Tensor away from its past history

print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
print((x ** 2).requires_grad)
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# Start training
training_iters = 1000
for step in range(training_iters):
if step % 20 == 0 or (step + 1)==training_iters:
if step % 20 == 0 or (step + 1) == training_iters:
print(step, sess.run(W), sess.run(b))

_ = sess.run([train_optimizer])

0 comments on commit 99c26f8

Please sign in to comment.