Add support to YOLOv5 v4.0 and v5.0

2022-04-08 17:13:05 -03:00
parent 2aa52a8e8c
commit 7bcc9b62fa
6 changed files with 102 additions and 63 deletions
--- a/nvdsinfer_custom_impl_Yolo/Makefile
+++ b/nvdsinfer_custom_impl_Yolo/Makefile
@@ -61,7 +61,7 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \
           layers/upsample_layer.cpp \
           layers/maxpool_layer.cpp \
           layers/activation_layer.cpp \
-           layers/reorg_r_layer.cpp \
+           layers/reorgv5_layer.cpp \
           utils.cpp \
           yolo.cpp \
           yoloForward.cu \
--- a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.cpp
@@ -3,9 +3,9 @@
 * https://www.github.com/marcoslucianops
 */
-#include "reorg_r_layer.h"
+#include "reorgv5_layer.h"
-nvinfer1::ILayer* reorgRLayer(
+nvinfer1::ILayer* reorgV5Layer(
    int layerIdx,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network)
--- a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.h
+++ b/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.h
@@ -3,8 +3,8 @@
 * https://www.github.com/marcoslucianops
 */
-#ifndef __REORG_R_LAYER_H__
+#ifndef __REORGV5_LAYER_H__
-#define __REORG_R_LAYER_H__
+#define __REORGV5_LAYER_H__
 #include <map>
 #include <vector>
@@ -12,7 +12,7 @@
 #include "NvInfer.h"
-nvinfer1::ILayer* reorgRLayer(
+nvinfer1::ILayer* reorgV5Layer(
    int layerIdx,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network);
--- a/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp
@@ -301,15 +301,15 @@ NvDsInferStatus Yolo::buildYoloNetwork(
        }
        else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
-            if (m_NetworkType.find("yolor") != std::string::npos) {
+            if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolor") != std::string::npos) {
                std::string inputVol = dimsToString(previous->getDimensions());
-                nvinfer1::ILayer* out = reorgRLayer(i, previous, &network);
+                nvinfer1::ILayer* out = reorgV5Layer(i, previous, &network);
                previous = out->getOutput(0);
                assert(previous != nullptr);
                channels = getNumChannels(previous);
                std::string outputVol = dimsToString(previous->getDimensions());
                tensorOutputs.push_back(previous);
-                std::string layerType = "reorgR";
+                std::string layerType = "reorgV5";
                printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
            }
            else {
--- a/nvdsinfer_custom_impl_Yolo/yolo.h
+++ b/nvdsinfer_custom_impl_Yolo/yolo.h
@@ -34,7 +34,7 @@
 #include "layers/route_layer.h"
 #include "layers/upsample_layer.h"
 #include "layers/maxpool_layer.h"
-#include "layers/reorg_r_layer.h"
+#include "layers/reorgv5_layer.h"
 #include "nvdsinfer_custom_impl.h"
--- a/utils/gen_wts_yoloV5.py
+++ b/utils/gen_wts_yoloV5.py
@@ -21,6 +21,9 @@ class YoloLayers():
        return "\n[route]\n" + \
               "layers=%s\n" % layers
    def reorg(self):
        return "\n[reorg]\n"
    def shortcut(self, route=-1, activation="linear"):
        return "\n[shortcut]\n" + \
               "from=%d\n" % route + \
@@ -120,59 +123,20 @@ model.to(device).eval()
 anchors = ""
 masks = []
-with open(wts_file, "w") as f:
+for k, v in model.state_dict().items():
-    wts_write = ""
+    if "anchor_grid" in k:
-    conv_count = 0
+        vr = v.cpu().numpy().tolist()
-    cv1 = ""
+        a = v.reshape(-1).cpu().numpy().astype(float).tolist()
-    cv3 = ""
+        anchors = str(a)[1:-1]
-    cv3_idx = 0
+        num = 0
-    sppf_idx = 11 if p6 else 9
+        for m in vr:
-    for k, v in model.state_dict().items():
+            mask = []
-        if "num_batches_tracked" not in k and "anchors" not in k and "anchor_grid" not in k:
+            for _ in range(len(m)):
-            vr = v.reshape(-1).cpu().numpy()
+                mask.append(num)
-            idx = int(k.split(".")[1])
+                num += 1
-            if ".cv1." in k and ".m." not in k and idx != sppf_idx:
+            masks.append(mask)
-                cv1 += "{} {} ".format(k, len(vr))
+
-                for vv in vr:
+spp_idx = 0
                    cv1 += " "
                    cv1 += struct.pack(">f", float(vv)).hex()
                cv1 += "\n"
                conv_count += 1
            elif cv1 != "" and ".m." in k:
                wts_write += cv1
                cv1 = ""
            if ".cv3." in k:
                cv3 += "{} {} ".format(k, len(vr))
                for vv in vr:
                    cv3 += " "
                    cv3 += struct.pack(">f", float(vv)).hex()
                cv3 += "\n"
                cv3_idx = idx
                conv_count += 1
            elif cv3 != "" and cv3_idx != idx:
                wts_write += cv3
                cv3 = ""
                cv3_idx = 0
            if ".cv3." not in k and not (".cv1." in k and ".m." not in k and idx != sppf_idx):
                wts_write += "{} {} ".format(k, len(vr))
                for vv in vr:
                    wts_write += " "
                    wts_write += struct.pack(">f", float(vv)).hex()
                wts_write += "\n"
                conv_count += 1
        elif "anchor_grid" in k:
            vr = v.cpu().numpy().tolist()
            a = v.reshape(-1).cpu().numpy().astype(float).tolist()
            anchors = str(a)[1:-1]
            num = 0
            for m in vr:
                mask = []
                for _ in range(len(m)):
                    mask.append(num)
                    num += 1
                masks.append(mask)
    f.write("{}\n".format(conv_count))
    f.write(wts_write)
 with open(cfg_file, "w") as c:
    with open(yaml_file, "r", encoding="utf-8") as f:
@@ -195,6 +159,15 @@ with open(cfg_file, "w") as c:
                width_multiple = f[topic]
            elif topic == "backbone" or topic == "head":
                for v in f[topic]:
                    if v[2] == "Focus":
                        layer = "\n# Focus\n"
                        blocks = 0
                        layer += yoloLayers.reorg()
                        blocks += 1
                        layer += yoloLayers.convolutional(bn=True, filters=get_width(v[3][0], width_multiple), size=v[3][1],
                                                          activation="silu")
                        blocks += 1
                        layers.append([layer, blocks])
                    if v[2] == "Conv":
                        layer = "\n# Conv\n"
                        blocks = 0
@@ -244,7 +217,31 @@ with open(cfg_file, "w") as c:
                                                          activation="silu")
                        blocks += 1
                        layers.append([layer, blocks])
                    elif v[2] == "SPP":
                        spp_idx = len(layers)
                        layer = "\n# SPP\n"
                        blocks = 0
                        layer += yoloLayers.convolutional(bn=True, filters=get_width(v[3][0], width_multiple) / 2,
                                                          activation="silu")
                        blocks += 1
                        layer += yoloLayers.maxpool(size=v[3][1][0])
                        blocks += 1
                        layer += yoloLayers.route(layers="-2")
                        blocks += 1
                        layer += yoloLayers.maxpool(size=v[3][1][1])
                        blocks += 1
                        layer += yoloLayers.route(layers="-4")
                        blocks += 1
                        layer += yoloLayers.maxpool(size=v[3][1][2])
                        blocks += 1
                        layer += yoloLayers.route(layers="-6, -5, -3, -1")
                        blocks += 1
                        layer += yoloLayers.convolutional(bn=True, filters=get_width(v[3][0], width_multiple),
                                                          activation="silu")
                        blocks += 1
                        layers.append([layer, blocks])
                    elif v[2] == "SPPF":
                        spp_idx = len(layers)
                        layer = "\n# SPPF\n"
                        blocks = 0
                        layer += yoloLayers.convolutional(bn=True, filters=get_width(v[3][0], width_multiple) / 2,
@@ -291,3 +288,45 @@ with open(cfg_file, "w") as c:
                            layers.append([layer, blocks])
        for layer in layers:
            c.write(layer[0])
 with open(wts_file, "w") as f:
    wts_write = ""
    conv_count = 0
    cv1 = ""
    cv3 = ""
    cv3_idx = 0
    for k, v in model.state_dict().items():
        if "num_batches_tracked" not in k and "anchors" not in k and "anchor_grid" not in k:
            vr = v.reshape(-1).cpu().numpy()
            idx = int(k.split(".")[1])
            if ".cv1." in k and ".m." not in k and idx != spp_idx:
                cv1 += "{} {} ".format(k, len(vr))
                for vv in vr:
                    cv1 += " "
                    cv1 += struct.pack(">f", float(vv)).hex()
                cv1 += "\n"
                conv_count += 1
            elif cv1 != "" and ".m." in k:
                wts_write += cv1
                cv1 = ""
            if ".cv3." in k:
                cv3 += "{} {} ".format(k, len(vr))
                for vv in vr:
                    cv3 += " "
                    cv3 += struct.pack(">f", float(vv)).hex()
                cv3 += "\n"
                cv3_idx = idx
                conv_count += 1
            elif cv3 != "" and cv3_idx != idx:
                wts_write += cv3
                cv3 = ""
                cv3_idx = 0
            if ".cv3." not in k and not (".cv1." in k and ".m." not in k and idx != spp_idx):
                wts_write += "{} {} ".format(k, len(vr))
                for vv in vr:
                    wts_write += " "
                    wts_write += struct.pack(">f", float(vv)).hex()
                wts_write += "\n"
                conv_count += 1
    f.write("{}\n".format(conv_count))
    f.write(wts_write)