-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrainingdataset.py
63 lines (52 loc) · 2.06 KB
/
trainingdataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pickle
import random
import numpy as np
import word_utils
class TrainingDataset:
# Class for creation of training- and test datasets from intents or user input
# New class for test data, because it only needs get_features
def __init__(self, intents):
self.intents = intents
self.x_train, self.y_train = self.get_training_data_from_intents()
def get_training_data_from_intents(self):
features = self.get_features()
labels = self.get_labels()
return self.shuffle(features, labels)
def get_features(self):
features = []
for doc in self.intents.documents:
bag = word_utils.build_bag_of_words(doc['words'], self.intents.lexicon)
features.append(bag)
return np.array(features)
def get_labels(self):
"""one hot encoding of the intent's class to a vector"""
labels = []
for doc in self.intents.documents:
label = np.zeros(len(self.intents.classes))
# for words that match in lexicon and document: set labels index to 1, else zero
label[self.intents.classes.index(doc['class'])] = 1
labels.append(label)
return np.array(labels)
@staticmethod
def shuffle(features, labels):
training_data = list(zip(features, labels))
random.shuffle(training_data)
x_train, y_train = zip(*training_data)
x_train = np.array(x_train)
y_train = np.array(y_train)
return x_train, y_train
def save_data(self):
with open("training_data", "wb") as file:
pickle.dump(
{'lexicon': self.intents.lexicon, 'classes': self.intents.classes, 'x_train': self.x_train,
'y_train': self.y_train},
file)
@staticmethod
def load_data():
with open("training_data", "rb") as file:
data = pickle.load(file)
lexicon = data['lexicon']
classes = data['classes']
x_train = data['x_train']
y_train = data['y_train']
return lexicon, classes, x_train, y_train