forked from aau-cns/poet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvis.py
209 lines (175 loc) · 7.38 KB
/
vis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import cv2
import numpy as np
import json
import torch
from torchvision.ops import box_convert
import supervision as sv
# def visualize_bop_box_old(data_path, scene_id, im_id, box_format="xyxy"):
# """
# This function visualizes a bounding box from a BOP detection result file,
# projecting the corners based on the rotation matrix and camera intrinsics.
#
# Args:
# data_path (str): Path to the BOP dataset directory containing images.
# scene_id (int): Scene ID from the detection result.
# im_id (int): Image ID from the detection result.
# box_format (str, optional): Format of the bounding box coordinates in the result file.
# Defaults to "xywh" (x-min, y-min, width, height). Other options include "xyxy" (x-min, y-min, x-max, y-max).
# """
# # Load the image based on scene and image IDs
# image_path = f"{data_path}/{im_id:06d}.png"
# image = cv2.imread(image_path)
#
# # Read detection results from the file
# with open("output/results.csv", "r") as f:
# next(f)
# lines = f.readlines()
#
# # Find the specific detection line for this image and object
# for line in lines:
# data = line.strip().split(",")
# if int(data[0]) == scene_id and int(data[1]) == im_id:
# # Extract bounding box coordinates based on format
# if box_format == "xywh":
# x_min, y_min, width, height = [float(val) for val in data[7].split(" ")]
# elif box_format == "xyxy":
# x_min, y_min, x_max, y_max = [float(val) for val in data[7].split(" ")]
# width = x_max - x_min
# height = y_max - y_min
# else:
# raise ValueError("Unsupported box format. Choose between 'xywh' or 'xyxy'")
#
# # Define 8 corners of a 3D bounding box (assuming a cuboid) with center at (x_min + width/2, y_min + height/2)
# box_corners_3d = np.array([
# [-width/2, -height/2, 1],
# [ width/2, -height/2, 1],
# [ width/2, height/2, 1],
# [-width/2, height/2, 1],
# [-width/2, -height/2, -1],
# [ width/2, -height/2, -1],
# [ width/2, height/2, -1],
# [-width/2, height/2, -1],
# ])
#
# # Extract rotation matrix from the data (assuming it's after commas following the bounding box coordinates)
# rotation_matrix = np.array([[float(val) for val in data[4].split(" ")]])
#
# # Load camera intrinsic parameters from scene_camera.json
# camera_path = f"{data_path}/test/{scene_id}/scene_camera.json"
# with open(camera_path, "r") as f:
# camera_data = json.load(f)
# camera_matrix = np.array(camera_data["cam_K"]) # Assuming "cam_K" key stores the intrinsic matrix
#
# # Project bounding box corners onto the image plane
# projection_matrix = np.concatenate((camera_matrix, np.array([[0], [0], [1]])), axis=1) # Assuming no distortion
# projected_corners = np.matmul(projection_matrix, np.matmul(rotation_matrix, box_corners_3d.T).T)
#
# # Normalize projected points (might be necessary depending on the projection matrix)
# projected_corners = projected_corners[:, :2] / projected_corners[:, 2] # Normalize by z-component
#
# # Extract projected x and y coordinates
# projected_x = projected_corners[:, 0]
# projected_y = projected_corners[:, 1]
#
# # Find minimum and maximum projected points for drawing the box
# min_x = int(np.min(projected_x))
# max_x = int(np.max(projected_x))
# min_y = int(np.min(projected_y))
# max_y = int(np.max(projected_y))
#
# # Draw the projected bounding box on the image
# cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (0, 255, 0), 2) # Green for visualization
#
# # Display the image with the bounding box
# cv2.imshow(f"BOP Detection - Scene {scene_id}, Image {im_id}", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# break # Exit after finding the specific detection
def visualize_bop_box(path, im_id, scene_id, box_format = "xywh"):
# Load the image based on scene and image IDs
image_path = f"{path}/{im_id:06d}.png"
image = cv2.imread(image_path)
# Read detection results from the file
with open("output/results.csv", "r") as f:
next(f)
lines = f.readlines()
# Find the specific detection line for this image and object
for line in lines:
data = line.strip().split(",")
if int(data[0]) == scene_id and int(data[1]) == im_id:
# Extract bounding box coordinates based on format
if box_format == "xywh":
boxes = torch.tensor([[float(val) for val in data[7].split(" ")]])
elif box_format == "xyxy":
x_min, y_min, x_max, y_max = [float(val) for val in data[7].split(" ")]
width = x_max - x_min
height = y_max - y_min
else:
raise ValueError("Unsupported box format. Choose between 'xywh' or 'xyxy'")
h, w, _ = image.shape
boxes = boxes * torch.Tensor([w, h, w, h])
xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
detections = sv.Detections(xyxy=xyxy)
# labels = [
# f"{phrase} {logit:.2f}"
# for phrase, logit
# in zip("object", 1.0)
# ]
labels = ["object", 1.0]
box_annotator = sv.BoxAnnotator()
annotated_frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
cv2.imshow(f"BOP Detection - Scene {scene_id:06d}, Image {im_id:06d}", annotated_frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Edit these values with your data paths and desired box format
data_path = "input"
box_format = "xyxy" # Change to "xyxy" if your results use that format
visualize_bop_box("./input", 1, 48)
# Read detection results from the file
# with open("/media/sebastian/TEMP/poet/results/backbone_ycbv-test.csv", "r") as f:
with open("./output/bop_gt/ycbv.csv", "r") as f:
next(f)
lines = f.readlines()
for line in lines:
data = line.strip().split(",")
scene_id = int(data[0])
img_id = int(data[1])
obj_id = int(data[2])
score = float(data[3])
R_obj = data[4]
t_obj = data[5]
time = float(data[6])
R_values = list(map(float, R_obj.split()))
R_obj = np.array(R_values).reshape(3, 3)
t_values = list(map(float, t_obj.split()))
t_obj = np.array(t_values)
# Load camera intrinsic parameters from scene_camera.json
camera_path = f"{data_path}/test_all/{scene_id:06d}/scene_camera.json"
with open(camera_path, "r") as f:
camera_data = json.load(f)
cam_intr = camera_data.get(str(img_id))
cam_K = cam_intr.get("cam_K")
cam_R = cam_intr.get("cam_R_w2c")
cam_t = cam_intr.get("cam_t_w2c")
cam_K = np.array(cam_K).reshape(3, 3)
cam_R = np.array(cam_R).reshape(3, 3)
cam_t = np.array(cam_t)
tmp = np.hstack((cam_R, cam_t.reshape(3, 1)))
#C = np.dot(cam_K, tmp)
# t_obj = np.append(t_obj, 1)
#t_img = np.dot(cam_K, np.dot(tmp, t_obj))
t_img = np.dot(cam_K, t_obj)
t_img = t_img / t_img[2]
# Load the image based on scene and image IDs
image_path = f"{data_path}/test_all/{scene_id:06d}/rgb/{img_id:06d}.png"
image = cv2.imread(image_path)
x = int(t_img[0])
y = int(t_img[1])
radius = 5
color = (0, 255, 0) # Red color (BGR format)
thickness = -1 # Filled circle
cv2.circle(image, (x, y), radius, color, thickness)
cv2.imshow(f"BOP Detection - Scene {scene_id:06d}, Image {img_id:06d}", image)
cv2.waitKey(0)
cv2.destroyAllWindows()