from utilities import *

#---------------------------------------------------------------------------
#	Global Variables
#---------------------------------------------------------------------------

#the folder where all 
vqa_questions_path = './data/questions/vqa_v1/'
vqa_questions_file = './data/questions/vqa_v1.json'
vqa_processed_questions_file = './data/questions/vqa_v1_processed.json'
vqa_embedded_questions_file = './data/questions/vqa_v1_embedded.json'
A_file = './models/A.npy'
B_path = './models/B/'
A_data_file = './data/A.json'
B_data_file = './data/B.json'
C_data_file = './data/C.json'

#---------------------------------------------------------------------------
#	Utility Functions
#---------------------------------------------------------------------------

#extract VQA dataset to parallel arrays structure
dataset = extract_vqa(folder=vqa_questions_path, output_file=vqa_questions_file, force=False, verbose=True)

#preprocess the dataset and save it into the file
preprocess(dataset, output_file=vqa_processed_questions_file, force=False, verbose=True)

#embed the dataset of questions using skip thoughts embedding
embed(dataset=dataset, output_file=vqa_embedded_questions_file, force=False, verbose=True)

#get the set of real train+val questions
#A_data = vqa_subset(vqa_embedded_questions_file, output_file=A_data_file, force=False, real=True, train=True, validation=True, open_ended=True)
#use them as the entire set of questions
#A = get_embedding(A_data, chunks=1, output=A_file, force=False)

#get the set of real dev-test questions
#B_data = vqa_subset(vqa_embedded_questions_file, output_file=B_data_file, force=False,
#												abstract=False, real=True,
#												train=False, validation=False, test=False, dev=True, 
#												open_ended=True, multiple_choice=False)
#use them as target questions
#B = get_embedding(B_data, chunks=800*8, output=B_path, force=False)


C_data = vqa_subset(vqa_embedded_questions_file, output_file=C_data_file, force=False,
												abstract=False, real=True,
												train=False, validation=False, test=False, dev=True, 
												open_ended=False, multiple_choice=True)