Add pytorch NLP.

Mansterteddy · Feb 28, 2019 · 99c26f8 · 99c26f8
1 parent bc83d96
commit 99c26f8
Show file tree

Hide file tree

Showing 3 changed files with 262 additions and 1 deletion.
diff --git a/pytorch/NLP/Blocks/blocks.py b/pytorch/NLP/Blocks/blocks.py
@@ -0,0 +1,143 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+torch.manual_seed(1)
+
+lin = nn.Linear(5, 3) # maps from R^5 to R^3, parameters A, b
+# data is 2x5. A maps from 5 to 3... can we map "data" under A?
+data = torch.randn(2, 5)
+print(lin(data))
+
+# In pytorch, most non-linearities are in torch.functional (we have it imported as F)
+# Note that non-linearites typically don't have parameters like affine maps do.
+# That is, they don't have weights that are updated during training.
+data = torch.randn(2, 2)
+print(data)
+print(F.relu(data))
+
+# Softmax is also in torch.nn.functional
+data = torch.randn(5)
+print(data)
+print(F.softmax(data, dim=0))
+print(F.softmax(data, dim=0).sum()) # Sum to 1 because it is a distribution!
+print(F.log_softmax(data, dim=0)) # theres also log_softmax
+
+data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
+        ("Give it to me".split(), "ENGLISH"),
+        ("No creo que sea una buena idea".split(), "SPANISH"),
+        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]
+
+test_data = [("Yo creo que si".split(), "SPANISH"),
+             ("it is lost on me".split(), "ENGLISH")]
+
+# word_to_ix maps each word in the vocab to a unique integer, which will be its index into the Bag of words vector
+word_to_ix = {}
+for sent, _ in data + test_data:
+    for word in sent:
+        if word not in word_to_ix:
+            word_to_ix[word] = len(word_to_ix)
+print(word_to_ix)
+
+VOCAB_SIZE = len(word_to_ix)
+NUM_LABELS = 2
+
+class BoWClassifier(nn.Module): # inheriting from nn.Module!
+    def __init__(self, num_labels, vocab_size):
+        # calls the init function of nn.Module. Dont get confused by syntax,
+        # just always do it in an nn.Module
+        super(BoWClassifier, self).__init__()
+
+        # Define the parameters that you will need. In this case, we need A and b
+        # the parameters of the affline mapping.
+        # Torch defines nn.Linear(), which provides the affine map.
+        # Make sure you understand why the input dimension is vocab_size
+        # and the output is num_labels!
+        self.linear = nn.Linear(vocab_size, num_labels)
+
+        # Note! The non-linearity log softmax does not have parameters! So we don't need to worry about that here
+
+    def forward(self, bow_vec):
+        # Pass the input through the linear layer,
+        # then pass that through log_softmax.
+        # Many non-linearities and other functions are in torch.nn.functional
+        return F.log_softmax(self.linear(bow_vec), dim=1)
+
+def make_bow_vector(sentence, word_to_ix):
+    vec = torch.zeros(len(word_to_ix))
+    for word in sentence:
+        vec[word_to_ix[word]] += 1
+    return vec.view(1, -1)
+
+def make_target(label, label_to_ix):
+    return torch.LongTensor([label_to_ix[label]])
+
+model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)
+
+# the model knows its parameters.  The first output below is A, the second is b.
+# Whenever you assign a component to a class variable in the __init__ function
+# of a module, which was done with the line
+# self.linear = nn.Linear(...)
+# Then through some Python magic from the PyTorch devs, your module
+# (in this case, BoWClassifier) will store knowledge of the nn.Linear's parameters
+for param in model.parameters():
+    print(param)
+
+# To run the model, pass in a BoW vector
+# Here we don't need to train, so the code is wrapped in torch.no_grad()
+with torch.no_grad():
+    sample = data[0]
+    bow_vector = make_bow_vector(sample[0], word_to_ix)
+    log_probs = model(bow_vector)
+    print(log_probs)
+
+label_to_ix = {"SPANISH": 0, "ENGLISH": 1}
+
+# Run on test data before we train, just to see a before-and-after
+with torch.no_grad():
+    for instance, label in test_data:
+        bow_vec = make_bow_vector(instance, word_to_ix)
+        log_probs = model(bow_vec)
+        print(log_probs)
+
+# Print the matrix column corresponding to "creo"
+print(next(model.parameters())[:, word_to_ix["creo"]])
+
+loss_function = nn.NLLLoss()
+optimizer = optim.SGD(model.parameters(), lr=0.1)
+
+# Usually you want to pass over the training data several times.
+# 100 is much bigger than on a real data set, but real datasets have more than
+# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
+for epoch in range(100):
+    for instance, label in data:
+        # Step 1. Remember that PyTorch accumulates gradients.
+        # We need to clear them out before each instance
+        model.zero_grad()
+
+        # Step 2. Make our BOW vector and also we must wrap the target in a
+        # Tensor as an integer. For example, if the target is SPANISH, then
+        # we wrap the integer 0. The loss function then knows that the 0th
+        # element of the log probabilities is the log probability
+        # corresponding to SPANISH
+        bow_vec = make_bow_vector(instance, word_to_ix)
+        target = make_target(label, label_to_ix)
+
+        # Step 3. Run our forward pass.
+        log_probs = model(bow_vec)
+
+        # Step 4. Compute the loss, gradients, and update the parameters by
+        # calling optimizer.step()
+        loss = loss_function(log_probs, target)
+        loss.backward()
+        optimizer.step()
+
+with torch.no_grad():
+    for instance, label in test_data:
+        bow_vec = make_bow_vector(instance, word_to_ix)
+        log_probs = model(bow_vec)
+        print(log_probs)
+
+# Index corresponding to Spanish goes up, English goes down!
+print(next(model.parameters())[:, word_to_ix["creo"]])
diff --git a/pytorch/NLP/Tensor/tensor.py b/pytorch/NLP/Tensor/tensor.py
@@ -0,0 +1,118 @@
+import torch
+import torch.autograd as autograd
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+torch.manual_seed(1)
+
+# torch.tensor(data) creates a torch.Tensor object with the given data
+V_data = [1., 2., 3.]
+V = torch.tensor(V_data)
+print(V)
+
+# Creates a matrix
+M_data = [[1., 2., 3.], [4., 5., 6.]]
+M = torch.tensor(M_data)
+print(M)
+
+# Create a 3D tensor of size 2x2x2
+T_data = [[[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]]]
+T = torch.tensor(T_data)
+print(T)
+
+# Index into V and get a scalar (0 dimensional tensor)
+print(V[0])
+# Get a Python number from it
+print(V[0].item())
+
+# Index into M and get a vector
+print(M[0])
+
+# Index into T and get a matrix
+print(T[0])
+
+x = torch.randn((3, 4, 5))
+print(x)
+
+# Operations with Tensors
+x = torch.Tensor([1., 2., 3.])
+y = torch.Tensor([4., 5., 6.])
+z = x + y
+print(z)
+
+# By default, it concatenates along the first axis (concatenates rows)
+x_1 = torch.randn(2, 5)
+y_1 = torch.randn(3, 5)
+z_1 = torch.cat([x_1, y_1])
+print(z_1)
+
+# Concatenate columns:
+x_2 = torch.randn(2, 3)
+y_2 = torch.randn(2, 5)
+# second arg specifies which axis to concat along
+z_2 = torch.cat([x_2, y_2], 1)
+print(z_2)
+
+x = torch.randn(2, 3, 4)
+print(x)
+print(x.view(2, 12)) # Reshape to 2 rows. 12 columns
+# Same as above. If one of the dimensions is -1, its size can be inferred
+print(x.view(2, -1))
+
+# Tensor factory methods have a ``requires_grad`` flag
+x = torch.tensor([1., 2., 3.], requires_grad=True)
+
+# With requires_grad=True, you can still do all the operations you previsouly could
+y = torch.tensor([4., 5., 6.], requires_grad=True)
+z = x + y
+print(z)
+
+# But z knows something extra
+print(z.grad_fn)
+
+# Let sum up all the entries in z
+s = z.sum()
+print(s)
+print(s.grad_fn)
+
+# calling .backward() on any variable will run backprop, starting from it.
+s.backward()
+print(x.grad)
+
+x = torch.randn(2, 2)
+y = torch.randn(2, 2)
+# By default, user created Tensors have ``require_grad=False``
+print(x.requires_grad, y.requires_grad)
+z = x + y
+# So you can't backprop through z
+print(z.grad_fn)
+
+# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
+# flag in-place. The input flag defaults to ``True`` if not given.
+x = x.requires_grad_()
+y = y.requires_grad_()
+# z contains enough information to compute gradients, as we saw above
+z = x + y
+print(z.grad_fn)
+# If any input to an operation has ``requires_grad=True``, so will the output
+print(z.requires_grad)
+
+
+# Now z has the computation history that relates itself to x and y
+# Can we just take its values, and **detach** it from its history?
+new_z = z.detach()
+
+# ... does new_z have information to backprop to x and y?
+# NO!
+print(new_z.grad_fn)
+# And how could it? ``z.detach()`` returns a tensor that shares the same storage
+# as ``z``, but with the computation history forgotten. It doesn't know anything
+# about how it was computed.
+# In essence, we have broken the Tensor away from its past history
+
+print(x.requires_grad)
+print((x ** 2).requires_grad)
+
+with torch.no_grad():
+    print((x ** 2).requires_grad)
diff --git a/tensorflow/TensorBasic/LinearRegression/LinearRegression.py b/tensorflow/TensorBasic/LinearRegression/LinearRegression.py
@@ -31,7 +31,7 @@
     # Start training
     training_iters = 1000
     for step in range(training_iters):
-        if step % 20 == 0 or (step + 1)==training_iters:
+        if step % 20 == 0 or (step + 1) == training_iters:
             print(step, sess.run(W), sess.run(b))
 
         _ = sess.run([train_optimizer])