-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpractice14.py
executable file
·135 lines (114 loc) · 4.94 KB
/
practice14.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import torch
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(1234)
device = ('cuda' if torch.cuda.is_available() else 'cpu')
#Vanilla RNN -> 다음 글자를 예측한다.
#Data Preprocessing
string = 'tomato'
# print(string)
string = list(string)
# print(string) #['t', 'o', 'm', 'a', 't', 'o']
# print(string[0])
set_string = set(string) #set : 중복을 허용하지 않는 data type
# print(set_string) #{'m', 'o', 't', 'a'}
# print(type(set_string)) #class 'set'
set_string = ['t', 'o', 'm', 'a']
# print(set_string)
# print(type(set_string)) #list
#Data -> One-hot Encoding
#Data x는 마지막 글자를 뺀다.
#괄호를 하나 더 만들어 준 이유는 list를 Tensor로 변환했을 때, [seq, batch, feature]의 형태로 만들기 위해서,
x_train = [[[1, 0, 0, 0], #t
[0, 1, 0, 0], #o
[0, 1, 1, 0], #m
[0, 0, 0, 1], #a
[1, 0, 0, 0]]] #t
# print(x_train)
# print(type(x_train)) #list
#Data y는 첫 번째 글자를 뺀다.
#Data y는 index로 구성된다. -> classification의 문제 -> Cross-Entropy Loss(Softmax + argmax를 포함)
y_train = [[1, 2, 3, 0, 1]]
# print(y_train)
# print(type(y_train)) #list
x_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.LongTensor(y_train)
# print(x_train_tensor)
# print(x_train_tensor.shape) #[1, 5, 4] -> [batch, seq, feature]
# print(y_train_tensor)
# print(y_train_tensor.shape) #[1, 5]
#Hyper parameter
#[seq, batch, feature]
input_size = 4 #variable length, feature의 개수
hidden_size = 4 #Q. hidden_size는 input_size와 같아야 하지 않나? 혹은 hidden_layer의 개수인가?
learning_rate = 0.1
nb_epochs = 20
#Model 설정
#nn.RNN(input_size, hidden_size, num_layers, nonlinearity, bias, batch_first, dropout, bidirectional)
#input_size : The number of expected features in the input x.
#hidden_size : The number of features in the hidden state h.
#num_layers : Number of recurrent layers. Default: 1
#nonlinearity : The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'
#bias : If False, the the layer does not use bias weights b_ih and b_hh. Default: True
#batch_first : If True, the the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature).
#Note that this does not apply to hidden or cell states. Default: False
#bidirectional : If True, becomes a bidirectional RNN. Default: True
model = nn.RNN(input_size, hidden_size, batch_first = True).to(device)
#batch_first = True -> [batch, seq, feature]
#Model의 output
#output : tensor of shape (L, D*Hout) for unbatched input, (L, N, D*Hout) for batched input (batch_first = False)
#h_n : tensor of shape (D*num_layers, Hout) for unbatched input or (D*num_layers, N, Hout) for batched input
#Hout = hidden_size
#D = 2 if bidirectional else 1
x_train_tensor = x_train_tensor.to(device)
output, h_n = model(x_train_tensor)
# print(output)
# print(type(output)) #Tensor
# print(output.shape) #[1, 5, 4] -> [batch_size, seq, feature]
# print(h_n)
# print(type(h_n)) #Tensor
# print(h_n.shape) #[1, 1, 4]
#Optimizer, Criterion 설정
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss() #결국, classification의 문제이다.
#output = loss(input, target)
#Train -> 배치 학습 x
for epoch in range(nb_epochs):
output, h_n = model(x_train_tensor)
# output = output.to(device)
y_train_tensor = y_train_tensor.to(device)
# print(output)
# print(output.shape) #[1, 5, 4]
# print(y_train_tensor.shape) #[1, 5]
# print(y_train_tensor.reshape(-1)) #[1, 2, 3, 0, 1]
# print((y_train_tensor.reshape(-1)).shape)
output = output.reshape(-1, input_size)
y_train_tensor = y_train_tensor.reshape(-1)
loss = criterion(output, y_train_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
#softmax + argmax -> 가장 큰 확률의 index 추출
output_softmax = torch.softmax(output, dim = 1)
# print(output_softmax)
output_argmax = torch.argmax(output, dim = 1)
output_argmax = output_argmax.cpu().detach().numpy() #cpu -> numpy 형태
# print(output_argmax) #[2, 3, 2, 2, 2]
result = ''.join([set_string[s] for s in output_argmax])
print('Epoch : {}, Train Loss : {}, result_argmax : {}, result : {}'.format(epoch + 1, loss.item(), output_argmax, result))
#Test
x_test = torch.FloatTensor([[1, 0, 0, 0],
[0, 1, 0, 0]])
print(x_test)
print(x_test.shape) #[2, 4]
x_test = x_test.reshape(1, 2, 4) #batch_size, length, feature
with torch.no_grad():
output = output.to(device)
x_test = x_test.to(device)
output, h_n = model(x_test)
output = output.reshape(-1, input_size) #input_size = feature
output_softmax = torch.softmax(output, dim = 1)
output_argmax = torch.argmax(output, dim = 1)
output_argmax = output_argmax.cpu().detach().numpy()
result = ''.join([set_string[s] for s in output_argmax])
print('result_argmax : {}, result : {}'.format(output_argmax, result)) #om