-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvqa_dataset.py
101 lines (77 loc) · 3.49 KB
/
vqa_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
## This file contains the class for the data object we create
import numpy as np
class DataSet(object):
def __init__(self,
image_id_list,
image_file_list,
question_id_list=None,
question_idxs_list=None,
question_masks_list=None,
question_type_list=None,
answer_id_list=None,
answer_idxs_list=None,
answer_masks_list=None,
answer_type_list=None,
batch_size=1,
phase="train",
shuffle=False):
self.image_id_list = np.array(image_id_list)
self.image_file_list = np.array(image_file_list)
self.question_id_list = np.array(question_id_list)
self.question_idxs_list = np.array(question_idxs_list)
self.question_masks_list = np.array(question_masks_list)
self.question_type_list = np.array(question_type_list)
self.answer_ids_list = np.array(answer_id_list)
self.answer_idxs_list = np.array(answer_idxs_list)
self.answer_masks_list = np.array(answer_masks_list)
self.answer_type_list = np.array(answer_type_list)
self.batch_size = batch_size
self.phase = phase
self.shuffle = shuffle
self.setup()
def setup(self):
""" Setup the dataset. """
self.count = len(self.image_id_list)
self.num_batches = int(np.ceil(self.count * 1.0 / self.batch_size))
self.fake_count = self.num_batches * self.batch_size - self.count
self.idxs = list(range(self.count))
self.reset()
def reset(self):
""" Reset the dataset. """
self.current_idx = 0
if self.shuffle:
np.random.shuffle(self.idxs)
def next_batch(self):
""" Fetch the next batch. """
assert self.has_next_batch()
if self.has_full_next_batch():
start, end = self.current_idx, \
self.current_idx + self.batch_size
current_idxs = self.idxs[start:end]
else:
start, end = self.current_idx, self.count
current_idxs = self.idxs[start:end] + \
list(np.random.choice(self.count, self.fake_count))
image_files = self.image_file_list[current_idxs]
image_idxs = self.image_id_list[current_idxs]
if self.phase == "train":
question_idxs = self.question_idxs_list[current_idxs]
question_masks = self.question_masks_list[current_idxs]
answer_idxs = self.answer_idxs_list[current_idxs]
answer_masks = self.answer_masks_list[current_idxs]
self.current_idx += self.batch_size
return image_files,image_idxs, question_idxs, question_masks, answer_idxs, answer_masks
elif self.phase == "test":
question_idxs = self.question_idxs_list[current_idxs]
question_masks = self.question_masks_list[current_idxs]
self.current_idx += self.batch_size
return image_files,image_idxs,question_idxs,question_masks
elif self.phase == "cnn_features":
self.current_idx += self.batch_size
return image_files, image_idxs
def has_next_batch(self):
""" Determine whether there is a batch left. """
return self.current_idx < self.count
def has_full_next_batch(self):
""" Determine whether there is a full batch left. """
return self.current_idx + self.batch_size <= self.count