From 83eac13f3c631b0b633ff9518622df1783eda4a7 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Wed, 24 Mar 2021 11:26:06 +0300 Subject: [PATCH] [CPU] DetectionOutput migration on nGraph. (#41) --- .../src/mkldnn_plugin/CMakeLists.txt | 4 +- .../mkldnn_plugin/nodes/detectionoutput.cpp | 178 +++++++++--------- .../nodes/detectionoutput_onnx.cpp | 74 ++++---- .../src/mkldnn_plugin/nodes/list_tbl.hpp | 4 +- 4 files changed, 133 insertions(+), 127 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 15ecc283f5dc81..525c34f5327556 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -54,8 +54,8 @@ set(LAYERS # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/depth_to_space.cpp -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput.cpp -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput_onnx.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput_onnx.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_offset_sum.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_packed_sum.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_sum.cpp diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp index 780bceb8770984..bd3b1da8fc878c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp @@ -10,12 +10,17 @@ #include #include #include +#include "caseless.hpp" #include "ie_parallel.hpp" +#include "common/tensor_desc_creator.h" +#include namespace InferenceEngine { namespace Extensions { namespace Cpu { +using MKLDNNPlugin::TensorDescCreatorTypes; + template static bool SortScorePairDescend(const std::pair& pair1, const std::pair& pair2) { @@ -24,98 +29,95 @@ static bool SortScorePairDescend(const std::pair& pair1, class DetectionOutputImpl: public ExtLayerBase { public: - explicit DetectionOutputImpl(const CNNLayer* layer) { + bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (layer->insData.size() != 3 && layer->insData.size() != 5) - IE_THROW() << "Incorrect number of input edges for layer " << layer->name; - if (layer->outData.empty()) - IE_THROW() << "Incorrect number of output edges for layer " << layer->name; - - _num_classes = layer->GetParamAsInt("num_classes"); - _background_label_id = layer->GetParamAsInt("background_label_id", 0); - _top_k = layer->GetParamAsInt("top_k", -1); - _variance_encoded_in_target = layer->GetParamAsBool("variance_encoded_in_target", false); - _keep_top_k = layer->GetParamAsInt("keep_top_k", -1); - _nms_threshold = layer->GetParamAsFloat("nms_threshold"); - _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX); - _share_location = layer->GetParamAsBool("share_location", true); - _clip_before_nms = layer->GetParamAsBool("clip_before_nms", false) || - layer->GetParamAsBool("clip", false); // for backward compatibility - _clip_after_nms = layer->GetParamAsBool("clip_after_nms", false); - _decrease_label_id = layer->GetParamAsBool("decrease_label_id", false); - _normalized = layer->GetParamAsBool("normalized", true); - _image_height = layer->GetParamAsInt("input_height", 1); - _image_width = layer->GetParamAsInt("input_width", 1); + auto doOp = ngraph::as_type_ptr(op); + if (!doOp) { + errorMessage = "Node is not an instance of the DetectionOutput from the operations set v0."; + return false; + } + if (!details::CaselessEq()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CENTER_SIZE") && + !details::CaselessEq()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CORNER")) { + errorMessage = "Unsupported code_type attribute."; + return false; + } + } catch (...) { + return false; + } + return true; + } + + explicit DetectionOutputImpl(const std::shared_ptr& op) { + try { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + if (op->get_input_size() != 3 && op->get_input_size() != 5) + IE_THROW() << "Invalid number of input edges."; + + if (op->get_output_size() != 1) + IE_THROW() << "Invalid number of output edges."; + + auto doOp = ngraph::as_type_ptr(op); + auto attributes = doOp->get_attrs(); + + _num_classes = attributes.num_classes; + _background_label_id = attributes.background_label_id; + _top_k = attributes.top_k; + _variance_encoded_in_target = attributes.variance_encoded_in_target; + _keep_top_k = attributes.keep_top_k[0]; + _nms_threshold = attributes.nms_threshold; + _confidence_threshold = attributes.confidence_threshold; + _share_location = attributes.share_location; + _clip_before_nms = attributes.clip_before_nms; + _clip_after_nms = attributes.clip_after_nms; + _decrease_label_id = attributes.decrease_label_id; + _normalized = attributes.normalized; + _image_height = attributes.input_height; + _image_width = attributes.input_width; _prior_size = _normalized ? 4 : 5; _offset = _normalized ? 0 : 1; _num_loc_classes = _share_location ? 1 : _num_classes; - with_add_box_pred = layer->insData.size() == 5; - _objectness_score = layer->GetParamAsFloat("objectness_score", 0.0f); + with_add_box_pred = op->get_input_size() == 5; + _objectness_score = attributes.objectness_score; - std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER"); - _code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE - : CodeType::CORNER); + _code_type = (details::CaselessEq()(attributes.code_type, "caffe.PriorBoxParameter.CENTER_SIZE") ? + CodeType::CENTER_SIZE : CodeType::CORNER); - _num_priors = static_cast(layer->insData[idx_priors].lock()->getDims().back() / _prior_size); - _priors_batches = layer->insData[idx_priors].lock()->getDims().front() != 1; + _num_priors = static_cast(op->get_input_shape(idx_priors).back() / _prior_size); + _priors_batches = op->get_input_shape(idx_priors).front() != 1; - if (_num_priors * _num_loc_classes * 4 != static_cast(layer->insData[idx_location].lock()->getDims()[1])) + if (_num_priors * _num_loc_classes * 4 != static_cast(op->get_input_shape(idx_location)[1])) IE_THROW() << "Number of priors must match number of location predictions (" << _num_priors * _num_loc_classes * 4 << " vs " - << layer->insData[idx_location].lock()->getDims()[1] << ")"; + << op->get_input_shape(idx_location)[1] << ")"; - if (_num_priors * _num_classes != static_cast(layer->insData[idx_confidence].lock()->getTensorDesc().getDims().back())) + if (_num_priors * _num_classes != static_cast(op->get_input_shape(idx_confidence).back())) IE_THROW() << "Number of priors must match number of confidence predictions."; if (_decrease_label_id && _background_label_id != 0) IE_THROW() << "Cannot use decrease_label_id and background_label_id parameter simultaneously."; - _num = static_cast(layer->insData[idx_confidence].lock()->getTensorDesc().getDims()[0]); - - InferenceEngine::SizeVector bboxes_size{static_cast(_num), - static_cast(_num_classes), - static_cast(_num_priors), - 4}; - _decoded_bboxes = InferenceEngine::make_shared_blob({Precision::FP32, bboxes_size, NCHW}); - _decoded_bboxes->allocate(); - - InferenceEngine::SizeVector buf_size{static_cast(_num), - static_cast(_num_classes), - static_cast(_num_priors)}; - _buffer = InferenceEngine::make_shared_blob({Precision::I32, buf_size, {buf_size, {0, 1, 2}}}); - _buffer->allocate(); - - InferenceEngine::SizeVector indices_size{static_cast(_num), - static_cast(_num_classes), - static_cast(_num_priors)}; - _indices = InferenceEngine::make_shared_blob( - {Precision::I32, indices_size, {indices_size, {0, 1, 2}}}); - _indices->allocate(); - - InferenceEngine::SizeVector detections_size{static_cast((size_t)(_num) * _num_classes)}; - _detections_count = InferenceEngine::make_shared_blob({Precision::I32, detections_size, C}); - _detections_count->allocate(); - - const InferenceEngine::SizeVector &conf_size = layer->insData[idx_confidence].lock()->getTensorDesc().getDims(); - _reordered_conf = InferenceEngine::make_shared_blob({Precision::FP32, conf_size, ANY}); - _reordered_conf->allocate(); - - InferenceEngine::SizeVector decoded_bboxes_size{static_cast(_num), - static_cast(_num_priors), - static_cast(_num_classes)}; - _bbox_sizes = InferenceEngine::make_shared_blob( - {Precision::FP32, decoded_bboxes_size, {decoded_bboxes_size, {0, 1, 2}}}); - _bbox_sizes->allocate(); - - InferenceEngine::SizeVector num_priors_actual_size{static_cast(_num)}; - _num_priors_actual = InferenceEngine::make_shared_blob({Precision::I32, num_priors_actual_size, C}); - _num_priors_actual->allocate(); - - std::vector in_data_conf(layer->insData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32)); - addConfig(layer, in_data_conf, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}); + _num = static_cast(op->get_input_shape(idx_confidence)[0]); + + _decoded_bboxes.resize(_num * _num_classes * _num_priors * 4); + _buffer.resize(_num * _num_classes * _num_priors); + _indices.resize(_num * _num_classes * _num_priors); + _detections_count.resize(_num * _num_classes); + _bbox_sizes.resize(_num * _num_classes * _num_priors); + _num_priors_actual.resize(_num); + + const auto &confSize = op->get_input_shape(idx_confidence); + _reordered_conf.resize(std::accumulate(confSize.begin(), confSize.end(), 1, std::multiplies())); + + std::vector inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32}); + addConfig(op, inDataConfigurators, + {{TensorDescCreatorTypes::ncsp, Precision::FP32}}); } catch (InferenceEngine::Exception &ex) { errorMsg = ex.what(); + throw; } } @@ -131,13 +133,13 @@ class DetectionOutputImpl: public ExtLayerBase { const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0]; - float *decoded_bboxes_data = _decoded_bboxes->buffer().as(); - float *reordered_conf_data = _reordered_conf->buffer().as(); - float *bbox_sizes_data = _bbox_sizes->buffer().as(); - int *detections_data = _detections_count->buffer().as(); - int *buffer_data = _buffer->buffer().as(); - int *indices_data = _indices->buffer().as(); - int *num_priors_actual = _num_priors_actual->buffer().as(); + float *decoded_bboxes_data = _decoded_bboxes.data(); + float *reordered_conf_data = _reordered_conf.data(); + float *bbox_sizes_data = _bbox_sizes.data(); + int *detections_data = _detections_count.data(); + int *buffer_data = _buffer.data(); + int *indices_data = _indices.data(); + int *num_priors_actual = _num_priors_actual.data(); for (int n = 0; n < N; ++n) { const float *ppriors = prior_data; @@ -396,13 +398,13 @@ class DetectionOutputImpl: public ExtLayerBase { void nms_mx(const float *conf_data, const float *bboxes, const float *sizes, int *buffer, int *indices, int *detections, int num_priors_actual); - InferenceEngine::Blob::Ptr _decoded_bboxes; - InferenceEngine::Blob::Ptr _buffer; - InferenceEngine::Blob::Ptr _indices; - InferenceEngine::Blob::Ptr _detections_count; - InferenceEngine::Blob::Ptr _reordered_conf; - InferenceEngine::Blob::Ptr _bbox_sizes; - InferenceEngine::Blob::Ptr _num_priors_actual; + std::vector _decoded_bboxes; + std::vector _buffer; + std::vector _indices; + std::vector _detections_count; + std::vector _reordered_conf; + std::vector _bbox_sizes; + std::vector _num_priors_actual; }; struct ConfidenceComparator { diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp index 1b96434b2b94ec..fefcee872cea4f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp @@ -12,6 +12,8 @@ #include #include #include "ie_parallel.hpp" +#include "common/tensor_desc_creator.h" +#include namespace { @@ -44,6 +46,8 @@ namespace InferenceEngine { namespace Extensions { namespace Cpu { +using MKLDNNPlugin::TensorDescCreatorTypes; + static void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores, float* refined_boxes, float* refined_boxes_areas, float* refined_scores, @@ -235,46 +239,46 @@ class ExperimentalDetectronDetectionOutputImpl: public ExtLayerBase { const int OUTPUT_SCORES {2}; public: - explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) { + bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - score_threshold_ = layer->GetParamAsFloat("score_threshold"); - nms_threshold_ = layer->GetParamAsFloat("nms_threshold"); - max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh"); - classes_num_ = layer->GetParamAsInt("num_classes"); - max_detections_per_class_ = layer->GetParamAsInt("post_nms_count"); - max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image"); - class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false); - deltas_weights_ = layer->GetParamAsFloats("deltas_weights"); - - - LayerConfig config; - for (auto in : layer->insData) { - auto in_ = in.lock(); - auto dims = in_->getTensorDesc().getDims(); - DataConfig data; - data.desc = TensorDesc(Precision::FP32, dims, in_->getTensorDesc().getLayoutByDims(dims)); - config.inConfs.push_back(data); + auto doOp = ngraph::as_type_ptr(op); + if (!doOp) { + errorMessage = "Node is not an instance of the ExperimentalDetectronDetectionOutput from the operations set v6."; + return false; } + } catch (...) { + return false; + } + return true; + } - auto dimsB = layer->outData[OUTPUT_BOXES]->getTensorDesc().getDims(); - DataConfig dataB; - dataB.desc = TensorDesc(Precision::FP32, dimsB, - layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsB)); - config.outConfs.push_back(dataB); - auto dimsC = layer->outData[OUTPUT_CLASSES]->getTensorDesc().getDims(); - DataConfig dataC; - dataC.desc = TensorDesc(Precision::I32, dimsC, - layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsC)); - config.outConfs.push_back(dataC); - auto dimsS = layer->outData[OUTPUT_SCORES]->getTensorDesc().getDims(); - DataConfig dataS; - dataS.desc = TensorDesc(Precision::FP32, dimsS, - layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsS)); - config.outConfs.push_back(dataS); - config.dynBatchSupport = false; - confs.push_back(config); + explicit ExperimentalDetectronDetectionOutputImpl(const std::shared_ptr& op) { + try { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + auto doOp = ngraph::as_type_ptr(op); + auto attributes = doOp->get_attrs(); + + score_threshold_ = attributes.score_threshold; + nms_threshold_ = attributes.nms_threshold; + max_delta_log_wh_ = attributes.max_delta_log_wh; + classes_num_ = attributes.num_classes; + max_detections_per_class_ = attributes.post_nms_count; + max_detections_per_image_ = attributes.max_detections_per_image; + class_agnostic_box_regression_ = attributes.class_agnostic_box_regression; + deltas_weights_ = attributes.deltas_weights; + + std::vector inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32}); + + addConfig(op, inDataConfigurators, + {{TensorDescCreatorTypes::ncsp, Precision::FP32}, + {TensorDescCreatorTypes::ncsp, Precision::I32}, + {TensorDescCreatorTypes::ncsp, Precision::FP32}}); } catch (InferenceEngine::Exception &ex) { errorMsg = ex.what(); + throw; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp index 9a9baf4868127c..da46a74aaadb4e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp @@ -39,11 +39,11 @@ MKLDNN_EXTENSION_NODE(MathImpl, Tan); //MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs); //MKLDNN_EXTENSION_NODE(ExtractImagePatchesImpl, ExtractImagePatches); //MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence); -//MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput); +MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput); //MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax); //MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze); //MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice); -//MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); +MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo); //MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax); MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);