do anyone have yolov5 inference code using openvino using python??. i found some but none of them are working #5408

Rajesh-mandal opened this issue Oct 30, 2021 · 9 comments
question Further information is requested


Search before asking


I have a code but it's not working . i am getting output like below, when i am inferencing with openvino model using python



@Rajesh-mandal Rajesh-mandal added the question Further information is requested label Oct 30, 2021
@github-actions github-actions bot added the Stale Stale and schedule for closing soon label Nov 30, 2021
i manage to fix the code. now it's working

@github-actions github-actions bot removed the Stale Stale and schedule for closing soon label Dec 3, 2021
divide the image by 255..or normalize the image after resizing the images..

@Rajesh-mandal following OpenVINO export PR #6057 we'll be doing a new PR for OpenVINO inference with

@Rajesh-mandal do you think you could help us with OpenVINO inference now that export is complete? We need to add OpenVino fields to DetectMultBackend() for this purpose. I've never used OpenVINO though so I don't have a good inference example to start from:


Lines 277 to 437 in db6ec66

class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='', device=None, dnn=False):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# CoreML: *.mlmodel
# TensorFlow: *_saved_model
# TensorFlow: *.pb
# TensorFlow Lite: *.tflite
# ONNX Runtime: *.onnx
# OpenCV DNN: *.onnx with dnn=True
# TensorRT: *.engine
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
w = str(weights[0] if isinstance(weights, list) else weights)
suffix = Path(w).suffix.lower()
suffixes = ['.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel']
check_suffix(w, suffixes) # check weights have acceptable suffix
pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
w = attempt_download(w) # download if not local
if jit: # TorchScript'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files)
if extra_files['config.txt']:
d = json.loads(extra_files['config.txt']) # extra_files dict
stride, names = int(d['stride']), d['names']
elif pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = int(model.stride.max()) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif coreml: # CoreML'Loading {w} for CoreML inference...')
import coremltools as ct
model = ct.models.MLModel(w)
elif dnn: # ONNX OpenCV DNN'Loading {w} for ONNX OpenCV DNN inference...')
net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime'Loading {w} for ONNX Runtime inference...')
cuda = torch.cuda.is_available()
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers)
elif engine: # TensorRT'Loading {w} for TensorRT inference...')
import tensorrt as trt #
check_version(trt.__version__, '8.0.0', verbose=True) # version requirement
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(
bindings = OrderedDict()
for index in range(model.num_bindings):
name = model.get_binding_name(index)
dtype = trt.nptype(model.get_binding_dtype(index))
shape = tuple(model.get_binding_shape(index))
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
context = model.create_execution_context()
batch_size = bindings['images'].shape[0]
else: # TensorFlow model (TFLite, pb, saved_model)
if pb: #'Loading {w} for TensorFlow *.pb inference...')
import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
tf.nest.map_structure(x.graph.as_graph_element, outputs))
graph_def = tf.Graph().as_graph_def()
graph_def.ParseFromString(open(w, 'rb').read())
frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
elif saved_model:'Loading {w} for TensorFlow saved_model inference...')
import tensorflow as tf
model = tf.keras.models.load_model(w)
elif tflite: #
if 'edgetpu' in w.lower():'Loading {w} for TensorFlow Lite Edge TPU inference...')
import tflite_runtime.interpreter as tfli
delegate = {'Linux': '', # install
'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
else:'Loading {w} for TensorFlow Lite inference...')
import tensorflow as tf
interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False, val=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if or self.jit: # PyTorch
y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
return y if val else y[0]
elif self.coreml: # CoreML
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.ANTIALIAS)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
elif self.onnx: # ONNX
im = im.cpu().numpy() # torch to numpy
if self.dnn: # ONNX OpenCV DNN
y =
else: # ONNX Runtime
y =[self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
elif self.engine: # TensorRT
assert im.shape == self.bindings['images'].shape, (im.shape, self.bindings['images'].shape)
self.binding_addrs['images'] = int(im.data_ptr())
y = self.bindings['output'].data
else: # TensorFlow model (TFLite, pb, saved_model)
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.pb:
y = self.frozen_func(
elif self.saved_model:
y = self.model(im, training=False).numpy()
elif self.tflite:
input, output = self.input_details[0], self.output_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
y = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
y = (y.astype(np.float32) - zero_point) * scale # re-scale
y[..., 0] *= w # x
y[..., 1] *= h # y
y[..., 2] *= w # w
y[..., 3] *= h # h
y = torch.tensor(y) if isinstance(y, np.ndarray) else y
return (y, []) if val else y
def warmup(self, imgsz=(1, 3, 640, 640), half=False):
# Warmup model by running inference once
if or self.engine or self.onnx: # warmup types
if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
im = torch.zeros(*imgsz).to(self.device).type(torch.half if half else torch.float) # input image
self.forward(im) # warmup

Rajesh-mandal commented Dec 23, 2021 via email

@Rajesh-mandal great, thanks!

divide the image by 255..or normalize the image after resizing the images..

Can you elaborate a bit more with a piece of code? Thanks.

@victorsoyvictor sure, when preparing images for model inference, you can divide the image by 255 to normalize it. Here's an example using Python:

# Load the image
image = load_image_somehow()

# Resize the image
resized_image = resize_image_somehow(image)

# Normalize the image
normalized_image = resized_image / 255.0

This will scale the pixel values in the range [0, 1], which is a common practice for preprocessing images before feeding them into a neural network.

question Further information is requested
