-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgemini_base_evaluation.py
104 lines (95 loc) · 4.77 KB
/
gemini_base_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import time
import os
import json
import argparse
from tqdm import tqdm
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
import vertexai.preview.generative_models as generative_models
def parse_args():
parser = argparse.ArgumentParser(description="question-answer-generation-using-gpt-3")
parser.add_argument("--cvrr_dataset_path", required=True, help="The path to file containing prediction.")
parser.add_argument("--output_dir", required=True, help="The path to save annotation json files.")
parser.add_argument("--google_cloud_bucket_name", required=True, help="Bucket name. For Gemini, CVRR-ES dataset needs to be also uploaded to google cloud bucket.")
parser.add_argument("--google_cloud_project_name", required=True, help="Bucket name. For Gemini, please provide the google cloud project name.")
args = parser.parse_args()
return args
def evaluate_single_video_dimension(cvrr_dataset_path, single_folder, output_dir, gcp_cloud_bucket_name,
gcp_project_name):
# Parse arguments.
vertexai.init(project=gcp_project_name, location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")
# Skip this if the json file already exists
json_file_path = os.path.join(output_dir, single_folder + '.json')
if os.path.exists(json_file_path):
return
print(f"Generating Gemini-Pro-Vision predictions on CVRR-ES benchmark for dimension: {single_folder}")
annotation_path = os.path.join(cvrr_dataset_path,
single_folder + "/" + "annotations_" + single_folder + ".json")
qa_pairs = json.load(open(annotation_path, "r")) # list of dictionaries
# iterate over each question
model_response = []
for single_dict in tqdm(qa_pairs):
user_question = single_dict["Q"]
# Load the video
# The dataset must be additionally uploaded to google cloud bucket
my_path = f"gs://{gcp_cloud_bucket_name}/" + "CVRR-ES/" + single_folder + "/" + single_dict['VideoID']
video_part = Part.from_uri(
my_path, mime_type="video/mp4"
)
message = False
while True:
try:
result = model.generate_content(
[video_part, user_question,],
generation_config={
"max_output_tokens": 2048,
"temperature": 1,
"top_p": 1,
"top_k": 32
},
safety_settings={
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
},
)
answer = result.text
break
except Exception as e:
print(f"Error: {e}, sleeping for 60 sec")
time.sleep(60)
try:
message = result.to_dict()['prompt_feedback']['block_reason'] >= 0
if message:
print(result.to_dict()['prompt_feedback'])
print(f"gemini has blocked qa for video {my_path}")
break
except Exception as e:
print(e)
print("Retrying for the same prompt")
if message:
# Means Gemini is not allowing us to get responses for this video/question
model_response.append({"Q": user_question, "A": ""})
else:
model_response.append({"Q": user_question, "A": answer})
# Save the response dictionary into a JSON file
with open(json_file_path, "w") as f:
json.dump(model_response, f)
def main():
"""
Main function to control the flow of the program.
"""
args = parse_args()
all_folder_names = os.listdir(args.cvrr_dataset_path)
output_dir = args.output_dir
# Create output directory if not exists.
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for single_folder in all_folder_names:
evaluate_single_video_dimension(args.cvrr_dataset_path, single_folder, args.output_dir,
args.google_cloud_bucket_name, args.google_cloud_project_name)
print("Inference with Gemini-Pro-Vision model completed!")
if __name__ == "__main__":
main()