wang-xinyu · wang-xinyu · Sep 23, 2024 · Apr 30, 2024 · May 2, 2024 · May 2, 2024
diff --git a/yolov8/CMakeLists.txt b/yolov8/CMakeLists.txt
@@ -57,3 +57,6 @@ target_link_libraries(yolov8_pose nvinfer cudart myplugins ${OpenCV_LIBS})
 
 add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS})
 target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS})
+
+add_executable(yolov8_5u_det ${PROJECT_SOURCE_DIR}/yolov8_5u_det.cpp ${SRCS})
+target_link_libraries(yolov8_5u_det nvinfer cudart myplugins ${OpenCV_LIBS})
diff --git a/yolov8/README.md b/yolov8/README.md
@@ -43,8 +43,16 @@ python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect
 
 // For p2 model
 // download https://github.com/lindsayshuo/yolov8_p2_tensorrtx/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
+cd {ultralytics}/ultralytics
 python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for  10 cls p2 model)
 // a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated.
+
+// For yolov8_5u_det model
+// download https://github.com/ultralytics/assets/releases/yolov5nu.pt
+cd {ultralytics}/ultralytics
+python gen_wts.py -w yolov5nu.pt -o yolov5nu.wts -t detect
+// a file 'yolov5nu.wts' will be generated.
+
 ```
 
 2. build tensorrtx/yolov8 and run
@@ -74,6 +82,11 @@ wget https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_y
 cp -r 0000008_01999_d_0000040.jpg ../images
 sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images c //cpu postprocess
 sudo ./yolov8_det -d VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine ../images g //gpu postprocess
+
+// For yolov8_5u_det(YOLOv5u with the anchor-free, objectness-free split head structure based on YOLOv8 features) model:
+sudo ./yolov8_5u_det -s [.wts] [.engine] [n/s/m/l/x//n6/s6/m6/l6/x6]
+sudo ./yolov8_5u_det -d yolov5xu.engine ../images c //cpu postprocess
+sudo ./yolov8_5u_det -d yolov5xu.engine ../images g //gpu postprocess
 ```
 
 ### Instance Segmentation
@@ -141,6 +154,7 @@ python yolov8_det_trt.py  # Detection
 python yolov8_seg_trt.py  # Segmentation
 python yolov8_cls_trt.py  # Classification
 python yolov8_pose_trt.py  # Pose Estimation
+python yolov8_5u_det_trt.py  # yolov8_5u_det(YOLOv5u with the anchor-free, objectness-free split head structure based on YOLOv8 features) model
 ```
 
 # INT8 Quantization

diff --git a/yolov8/include/block.h b/yolov8/include/block.h
@@ -4,6 +4,8 @@
 #include <vector>
 #include "NvInfer.h"
 
+int calculateP(int ksize);
+
 std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
 
 nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network,
@@ -18,6 +20,10 @@ nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network,
                                 std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, int c1,
                                 int c2, int n, bool shortcut, float e, std::string lname);
 
+nvinfer1::IElementWiseLayer* C3(nvinfer1::INetworkDefinition* network,
+                                std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
+                                int c2, int n, bool shortcut, float e, std::string lname);
+
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network,
                                   std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
                                   int c2, int k, std::string lname);

diff --git a/yolov8/include/model.h b/yolov8/include/model.h
@@ -29,3 +29,11 @@ nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfe
 nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
                                                nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
                                                int& max_channels);
+
+nvinfer1::IHostMemory* buildEngineYolov8_5uDet(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                               nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                               int& max_channels);
+
+nvinfer1::IHostMemory* buildEngineYolov8_5uDetP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                                 nvinfer1::DataType dt, const std::string& wts_path, float& gd,
+                                                 float& gw, int& max_channels);
diff --git a/yolov8/src/block.cpp b/yolov8/src/block.cpp
@@ -6,12 +6,18 @@
 #include "config.h"
 #include "yololayer.h"
 
+int calculateP(int ksize) {
+    return ksize / 3;
+}
+
 std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file) {
     std::cout << "Loading weights: " << file << std::endl;
     std::map<std::string, nvinfer1::Weights> WeightMap;
 
     std::ifstream input(file);
-    assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!");
+    assert(input.is_open() &&
+           "Unable to load weight file. please check if the "
+           ".wts file path is right!!!!!!");
 
     int32_t count;
     input >> count;
@@ -103,6 +109,20 @@ nvinfer1::ILayer* bottleneck(nvinfer1::INetworkDefinition* network, std::map<std
     return conv2;
 }
 
+static nvinfer1::ILayer* bottleneck_c3(nvinfer1::INetworkDefinition* network,
+                                       std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input,
+                                       int c1, int c2, bool shortcut, float e, std::string lname) {
+    nvinfer1::IElementWiseLayer* cv1 =
+            convBnSiLU(network, weightMap, input, (int)((float)c2 * e), 1, 1, calculateP(1), lname + ".cv1");
+    nvinfer1::IElementWiseLayer* cv2 =
+            convBnSiLU(network, weightMap, *cv1->getOutput(0), c2, 3, 1, calculateP(3), lname + ".cv2");
+    if (shortcut && c1 == c2) {
+        auto ew = network->addElementWise(input, *cv2->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
+        return ew;
+    }
+    return cv2;
+}
+
 nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network,
                                  std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
                                  int c2, int n, bool shortcut, float e, std::string lname) {
@@ -173,6 +193,24 @@ nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network,
     return conv2;
 }
 
+nvinfer1::IElementWiseLayer* C3(nvinfer1::INetworkDefinition* network,
+                                std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
+                                int c2, int n, bool shortcut, float e, std::string lname) {
+    int c_ = (float)c2 * e;
+    nvinfer1::IElementWiseLayer* cv1 = convBnSiLU(network, weightMap, input, c_, 1, 1, calculateP(1), lname + ".cv1");
+    nvinfer1::IElementWiseLayer* cv2 = convBnSiLU(network, weightMap, input, c_, 1, 1, calculateP(1), lname + ".cv2");
+    nvinfer1::ITensor* y1 = cv1->getOutput(0);
+    for (int i = 0; i < n; i++) {
+        auto b = bottleneck_c3(network, weightMap, *y1, c_, c_, shortcut, 1.0, lname + ".m." + std::to_string(i));
+        y1 = b->getOutput(0);
+    }
+    nvinfer1::ITensor* inputTensors[] = {y1, cv2->getOutput(0)};
+    nvinfer1::IConcatenationLayer* cat = network->addConcatenation(inputTensors, 2);
+    nvinfer1::IElementWiseLayer* conv3 =
+            convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, calculateP(1), lname + ".cv3");
+    return conv3;
+}
+
 nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network,
                                   std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
                                   int c2, int k, std::string lname) {
@@ -236,7 +274,8 @@ nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network,
     combinedInfo[6] = is_segmentation;
     combinedInfo[7] = is_pose;
 
-    // Copy the contents of px_arry into the combinedInfo vector after the initial 5 elements.
+    // Copy the contents of px_arry into the combinedInfo vector after the initial
+    // 5 elements.
     std::copy(px_arry, px_arry + px_arry_num, combinedInfo.begin() + netinfo_count);
 
     // Now let's create the PluginField object to hold this combined information.