-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbasic_questions.py
48 lines (38 loc) · 2.11 KB
/
basic_questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from utilities import *
#---------------------------------------------------------------------------
# Global Variables
#---------------------------------------------------------------------------
#the folder where all
vqa_questions_path = './data/questions/vqa_v1/'
vqa_questions_file = './data/questions/vqa_v1.json'
vqa_processed_questions_file = './data/questions/vqa_v1_processed.json'
vqa_embedded_questions_file = './data/questions/vqa_v1_embedded.json'
A_file = './models/A.npy'
B_path = './models/B/'
A_data_file = './data/A.json'
B_data_file = './data/B.json'
C_data_file = './data/C.json'
#---------------------------------------------------------------------------
# Utility Functions
#---------------------------------------------------------------------------
#extract VQA dataset to parallel arrays structure
dataset = extract_vqa(folder=vqa_questions_path, output_file=vqa_questions_file, force=False, verbose=True)
#preprocess the dataset and save it into the file
preprocess(dataset, output_file=vqa_processed_questions_file, force=False, verbose=True)
#embed the dataset of questions using skip thoughts embedding
embed(dataset=dataset, output_file=vqa_embedded_questions_file, force=False, verbose=True)
#get the set of real train+val questions
#A_data = vqa_subset(vqa_embedded_questions_file, output_file=A_data_file, force=False, real=True, train=True, validation=True, open_ended=True)
#use them as the entire set of questions
#A = get_embedding(A_data, chunks=1, output=A_file, force=False)
#get the set of real dev-test questions
#B_data = vqa_subset(vqa_embedded_questions_file, output_file=B_data_file, force=False,
# abstract=False, real=True,
# train=False, validation=False, test=False, dev=True,
# open_ended=True, multiple_choice=False)
#use them as target questions
#B = get_embedding(B_data, chunks=800*8, output=B_path, force=False)
C_data = vqa_subset(vqa_embedded_questions_file, output_file=C_data_file, force=False,
abstract=False, real=True,
train=False, validation=False, test=False, dev=True,
open_ended=False, multiple_choice=True)