-
Notifications
You must be signed in to change notification settings - Fork 136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
strange detection result using the pretrained detection model on kitti-car #30
Comments
Sorry, the code is messy, the following is the whole test code #!/usr/bin/env python
import numpy as np
import os, sys
import pickle
import timeit, Image, ImageDraw
from google.protobuf import text_format
caffe_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
print('caffe_root is {0}'.format(caffe_root))
CONFIDENCE_THRES = 0.2
kitti_detection_dataset_dir = '/home/hzw/projects/dataset/kitti/detection/training/image_2/'
model_store_dir = os.path.join(caffe_root, 'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/')
model_def_path = os.path.join(model_store_dir, 'deploy.prototxt')
model_weights_path = os.path.join(model_store_dir, 'VGG_KITTI_RRC_2560x768_kitti_4r4b_max_size_iter_60000.caffemodel')
voc_labelmap_file_path = os.path.join(caffe_root, 'data/KITTI-car/labelmap_voc.prototxt')
save_dir = os.path.join(caffe_root, 'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/result-test-kitti_detection_training/')
txt_dir = os.path.join(caffe_root, \
'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/result-test-kitti_detection_training/conf-{0}/'.format(CONFIDENCE_THRES))
# make sure the working directory is caffe_root
os.chdir(caffe_root)
# add the caffe python module
sys.path.insert(0, 'python')
import caffe
from caffe.proto import caffe_pb2
from _ensemble import *
def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames
def get_net_out(image, net, transformer):
"""forward the image and get the detection output"""
# print("processing image: {0}".format(img_path))
# image = caffe.io.loadimg(img_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
# forward process
net_out = net.forward()
return net_out
def parse_net_out(net_out_dict, voc_labelmap, img_size, detection_num=3, conf_thres=CONFIDENCE_THRES):
"""parse the detection output"""
img_h, img_w = img_size[0:2]
ensemble_num = 0
det_total = np.zeros([0, 6], float)
for out_name, out_val in net_out_dict.items():
# if out_name == 'x_flip':
# continue
net_out = out_val
for out_i in range(2, detection_num + 1):
detection_i = net_out['detection_out%d'%(out_i)].copy()
# parse the output
det_label_i = detection_i[0, 0, :, 1]
det_conf_i = detection_i[0, 0, :, 2]
# if out_name == 'orin':
# det_xmin_i = detection_i[0, 0, :, 3]
# det_ymin_i = detection_i[0, 0, :, 4]
# det_xmax_i = detection_i[0, 0, :, 5]
# det_ymax_i = detection_i[0, 0, :, 6]
if True:#out_name == 'x_flip':
det_xmin_i = 1 - detection_i[0, 0, :, 5]
det_ymin_i = detection_i[0, 0, :, 4]
det_xmax_i = 1 - detection_i[0, 0, :, 3]
det_ymax_i = detection_i[0, 0, :, 6]
# get detections with confidence higher than thres
top_indices_i = [j for j, conf in enumerate(det_conf_i) if conf >= conf_thres]
if len(top_indices_i) == 0:
det_this_i = np.zeros([0, 6], float)
else:
top_conf_i = det_conf_i[top_indices_i]
top_labels_i = det_label_i[top_indices_i]
# top_labelnames_i= get_labelname(voc_labelmap, top_labels_i.tolist())
top_xmin_i = det_xmin_i[top_indices_i] * img_w
top_ymin_i = det_ymin_i[top_indices_i] * img_h
top_xmax_i = det_xmax_i[top_indices_i] * img_w
top_ymax_i = det_ymax_i[top_indices_i] * img_h
det_this_i = np.concatenate((top_xmin_i.reshape(-1, 1), top_ymin_i.reshape(-1, 1), top_xmax_i.reshape(-1, 1), top_ymax_i.reshape(-1, 1),
top_conf_i.reshape(-1, 1), top_labels_i.reshape(-1, 1)), axis=1)
det_total = np.concatenate((det_total, det_this_i), axis=0)
ensemble_num= ensemble_num + 1
# print("det_total: ")
# print(det_total)
det_results = det_ensemble(det_total, ensemble_num)
return det_results
def transform_det_results_to_linelist(det_results, voc_labelmap, conf_thres=CONFIDENCE_THRES):
""""""
result_line_list = []
# print(det_results)
assert det_results.shape[0] > 0, "det_results.shape is: " + str(det_results.shape)
idxs = np.where(det_results[:, 4] > conf_thres)[0]
top_xmin = det_results[idxs,0]
top_ymin = det_results[idxs,1]
top_xmax = det_results[idxs,2]
top_ymax = det_results[idxs,3]
top_conf = det_results[idxs,4]
top_label = det_results[idxs,5]
if top_label.shape[0] > 0:
top_labelname = get_labelname(voc_labelmap, top_label.tolist())
for i in range(top_conf.shape[0]):
label= top_labelname[i]
xmin = top_xmin[i]
ymin = top_ymin[i]
xmax = top_xmax[i]
ymax = top_ymax[i]
h = float(ymax - ymin)
w = float(xmax - xmin)
if (w==0) or (h==0):
continue
if (h/w >=2)and((xmin<10)or(xmax > 1230)):
continue
score = top_conf[i]
result_line = "%s -1 -1 -10 %.3f %.3f %3.f %3.f -1 -1 -1 -1000 -1000 -1000 -10 %.8f" % (label, xmin, ymin, xmax, ymax, score)
result_line_list.append(result_line)
return result_line_list
if __name__ == "__main__":
caffe.set_device(0)
caffe.set_mode_gpu()
STORE_OUTPUT = False
PROCESS_OUTPUT = True
# prepare the store addr
if not os.path.exists(txt_dir):
os.makedirs(txt_dir)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# read the labelmap configuration
with open(voc_labelmap_file_path, 'r') as fid:
voc_labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(fid.read()), voc_labelmap)
if STORE_OUTPUT:
# construct the net
net = caffe.Net(model_def_path,
model_weights_path,
caffe.TEST)
# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))# HWC -> CHW
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_mean('data', np.array([104, 117, 123]))# mean pixel value
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
image_width, image_height, image_channel, batch_num = 2560, 768, 3, 1
net.blobs['data'].reshape(batch_num, image_channel, image_height, image_width)
if PROCESS_OUTPUT:
write_img_result = True
dataset_img_dir = kitti_detection_dataset_dir
for img_name in sorted(os.listdir(dataset_img_dir)):
img_path = os.path.join(dataset_img_dir, img_name)
if os.path.isfile(img_path):
[img_index, img_ext] = img_name.split(".")
if img_ext == 'png':
# the path of NN's output
nn_output_result_path = os.path.join(save_dir, img_index + '.pkl')
img_number = int(img_index)
if STORE_OUTPUT:
print("Processing image: {0}".format(img_path))
orin_img = caffe.io.load_image(img_path)
# # multiply 255.0 ,refer to http://www.cnblogs.com/jianyingzhou/p/4571454.html
# orin_img = orin_img * 255.0
x_flip_img = orin_img[:, ::-1, :]
orin_net_out = get_net_out(orin_img, net, transformer)
x_flip_net_out = get_net_out(x_flip_img, net, transformer)
cur_net_out_dict = {'orin': orin_net_out, 'x_flip': x_flip_net_out}
store_dict = {'output': cur_net_out_dict, 'img_shape': orin_img.shape[0:2]}
# store output into pickle
with open(nn_output_result_path, 'wb') as fid:
pickle.dump(store_dict, fid)
if PROCESS_OUTPUT:
if not STORE_OUTPUT:
# read output from pickle
with open(nn_output_result_path, 'rb') as fid:
store_dict = pickle.load(fid)
print("Processing output: {0}".format(nn_output_result_path))
cur_net_out_dict, img_shape = store_dict['output'], store_dict['img_shape']
cur_det_result = parse_net_out(cur_net_out_dict, voc_labelmap, img_size=img_shape)
if len(cur_det_result) > 0:
cur_result_line_list = transform_det_results_to_linelist(cur_det_result, voc_labelmap)
else:
cur_result_line_list = []
# print('cur_result_line_list is: ')
# print(cur_result_line_list)
if write_img_result:
img_txt_result_path = os.path.join(txt_dir, img_index + '.txt')
with open(img_txt_result_path, 'w') as fid:
for line in cur_result_line_list:
fid.write(line + '\n') And I do not know why I do the same operation det_xmin_i = 1 - detection_i[0, 0, :, 5]
det_ymin_i = detection_i[0, 0, :, 4]
det_xmax_i = 1 - detection_i[0, 0, :, 3]
det_ymax_i = detection_i[0, 0, :, 6] for original image's output and horizontally-flipped image's output, and get the good result. It's so strange |
Hello, I would like to ask you about the test code, how do I see the test results on the image?Thank you very much! |
@ZhihongChen123 The code for testing pretrained model is listed at 2-nd comment.The evaluation code is from kitti benchmark. |
I have just download the pretrained detection model which is released, and test it on the training dataset of kitti 2d-object detection.
At the beginning, I just modify the kitti's dataset directory in the rrc_test.py. But I get a bad result. The pr-curve is:
When I see the detection result on the image ,I see the following things:
As you can see, the results is bilateral symmetry.
I suppose the coordinate postprocessing is wrong, So I just modify it to current code
`#!/usr/bin/env python
import numpy as np
import os, sys
import pickle
import timeit, Image, ImageDraw
from google.protobuf import text_format
caffe_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(file))))
print('caffe_root is {0}'.format(caffe_root))
CONFIDENCE_THRES = 0.2
kitti_detection_dataset_dir = '/home/hzw/projects/dataset/kitti/detection/training/image_2/'
model_store_dir = os.path.join(caffe_root, 'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/')
model_def_path = os.path.join(model_store_dir, 'deploy.prototxt')
model_weights_path = os.path.join(model_store_dir, 'VGG_KITTI_RRC_2560x768_kitti_4r4b_max_size_iter_60000.caffemodel')
voc_labelmap_file_path = os.path.join(caffe_root, 'data/KITTI-car/labelmap_voc.prototxt')
save_dir = os.path.join(caffe_root, 'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/result-test-kitti_detection_training/')
txt_dir = os.path.join(caffe_root,
'models/VGGNet/KITTI/RRC_2560x768_kitti_4r4b_max_size/result-test-kitti_detection_training/conf-{0}/'.format(CONFIDENCE_THRES))
make sure the working directory is caffe_root
os.chdir(caffe_root)
add the caffe python module
sys.path.insert(0, 'python')
import caffe
from caffe.proto import caffe_pb2
from _ensemble import *
def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames
def get_net_out(image, net, transformer):
"""forward the image and get the detection output"""
# print("processing image: {0}".format(img_path))
# image = caffe.io.loadimg(img_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
def parse_net_out(net_out_dict, voc_labelmap, img_size, detection_num=3, conf_thres=CONFIDENCE_THRES):
"""parse the detection output"""
img_h, img_w = img_size[0:2]
ensemble_num = 0
det_total = np.zeros([0, 6], float)
def transform_det_results_to_linelist(det_results, voc_labelmap, conf_thres=CONFIDENCE_THRES):
""""""
result_line_list = []
if name == "main":
caffe.set_device(0)
caffe.set_mode_gpu()
`
And I find It works so well. For example,
And the pr-curve is:
And I just test this model on kitti-tracking's training set, the pr-curve is:
The text was updated successfully, but these errors were encountered: