-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcompute_missing.py
59 lines (47 loc) · 1.49 KB
/
compute_missing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import sys
import json
import numpy as np
from datetime import datetime
from solver import LassoSolver
from embedder import SkipThoughtEmbedder
from decomposer import QuestionDecomposer
top_N = 20
lmda = 1e-5
dataset_file = './data/vqa_train_val_questions.json'
questions_file ='./data/vqa_test_questions.json'
output_folder = './data/basic_vqa_questions/'
embedded_dataset = './models/skipthoughts_vqa_train_val_dataset.npy'
job_id = int(os.environ['SLURM_ARRAY_TASK_ID'])
core_id = int(sys.argv[1])
def load_questions(input_file):
with open(input_file, 'r') as f:
return json.load(f)
incomplete = load_questions('incomplete.json')[job_id*8 + core_id]
question = load_questions(questions_file)[incomplete[0]*1421+incomplete[1]]
file = output_folder+str(incomplete[0])+'/'+str(incomplete[1])+'.json'
if os.path.isfile(file):
exit()
dataset = load_questions(dataset_file)
embedder = SkipThoughtEmbedder(dataset, load=embedded_dataset)
solver = LassoSolver(l=lmda)
decomposer = QuestionDecomposer(embedder, solver=solver)
basic = decomposer.decompose(question)
data = {
'question':question,
'basic':[{'question':q,'score':s} for q, s in basic[:top_N]]
}
with open(file, 'w') as f:
json.dump(data, f)
def get_missing():
import os
folder = './data/basic_vqa_questions/'
data = []
for i in range(100):
files = os.listdir(folder+str(i))
for j in range(1421):
if str(j)+'.json' not in files:
data.append([i, j])
import json
with open('incomplete.json', 'w') as f:
json.dump(data, f)