forked from shobrook/sequitur
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcore.py
64 lines (51 loc) · 2.14 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Standard Library
import os
import pandas as pd
import numpy as np
# Third Party
import torch
from torch.autograd import Variable
from torch.nn import CrossEntropyLoss, MSELoss
# Local Modules
from .autoencoders import LSTM_AE
###############
# GPU Setting #
###############
os.environ["CUDA_VISIBLE_DEVICES"]="0" # comment this line if you want to use all of your GPUs
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
####################
# Data preparation #
####################
def prepare_dataset(sequential_data) :
if type(sequential_data) == pd.DataFrame:
data_in_numpy = np.array(sequential_data)
data_in_tensor = torch.tensor(data_in_numpy, dtype=torch.float)
unsqueezed_data = data_in_tensor.unsqueeze(2)
elif type(sequential_data) == np.array:
data_in_tensor = torch.tensor(sequential_data, dtype=torch.float)
unsqueezed_data = data_in_tensor.unsqueeze(2)
elif type(sequential_data) == list:
data_in_tensor = torch.tensor(sequential_data, dtype = torch.float)
unsqueezed_data = data_in_tensor.unsqueeze(2)
seq_len = unsqueezed_data.shape[1]
no_features = unsqueezed_data.shape[2]
# shape[0] is the number of batches
return unsqueezed_data, seq_len, no_features
##################################################
# QuickEncode : Encoding & Decoding & Final_loss #
##################################################
def QuickEncode(input_data,
embedding_dim,
learning_rate = 1e-3,
every_epoch_print = 100,
epochs = 10000,
patience = 20,
max_grad_norm = 0.005):
refined_input_data, seq_len, no_features = prepare_dataset(input_data)
model = LSTM_AE(seq_len, no_features, embedding_dim, learning_rate, every_epoch_print, epochs, patience, max_grad_norm)
final_loss = model.fit(refined_input_data)
# recording_results
embedded_points = model.encode(refined_input_data)
decoded_points = model.decode(embedded_points)
return embedded_points.cpu().data, decoded_points.cpu().data, final_loss