-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvision_query.py
86 lines (68 loc) · 2.5 KB
/
vision_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
import base64
import json
import time
import os
def explain_image(image_path, model, prompt, ollama_url):
with open(image_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read())
url = ollama_url + "/api/chat"
payload = {
"model": model,
"messages": [
{
"role": "user",
"content": prompt,
"images": [encoded_image.decode("utf-8")]
}
]
}
headers = {
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
response_data = ""
if response.status_code == 200:
for chunk in response.iter_lines():
if chunk:
data = chunk.decode('utf-8')
data_list = json.loads(data)
content = data_list['message']['content']
response_data += content
else:
print(f"Error: {response.status_code} - {response.text}")
return response_data
def explain_images_directory(directory, model, prompt, url):
# Initialize an empty list to store the metadata
metadata_list = []
# get a list of all files in the directory
files = os.listdir(directory)
current_epoch = int(time.time())
# filter the list to only include image files (you can add more extensions if needed)
image_files = [f for f in files if f.endswith('.jpg') or f.endswith('.png')]
# process each image
for image_file in image_files:
drone_picture = os.path.join(directory, image_file)
image_metadata = explain_image(drone_picture, model, prompt, url)
# Add the metadata to the list, including a timestamp
metadata_list.append({
'filename': drone_picture,
'timestamp': time.time(),
'metadata': image_metadata
})
metadata_file_name = f'{current_epoch}_image_metadata.json'
# After the loop, write the list to a JSON file
with open(metadata_file_name, 'w') as f:
json.dump(metadata_list, f)
return metadata_file_name
def main():
# Load the prompt from a JSON file
with open('prompt.json', 'r') as f:
prompt_data = json.load(f)
model = prompt_data['model']
prompt = prompt_data['prompt']
url = prompt_data['url']
image_inference = explain_image("../../docs/images/speech-inference.png", model, prompt, url)
print(image_inference)
if __name__ == "__main__":
main()