New YOLOv5 conversion and support (>= v2.0)

2022-07-14 11:50:55 -03:00
parent 095696a296
commit 058db92ad1
15 changed files with 763 additions and 502 deletions
--- a/nvdsinfer_custom_impl_Yolo/Makefile
+++ b/nvdsinfer_custom_impl_Yolo/Makefile
@@ -53,6 +53,7 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \
           nvdsparsebbox_Yolo.cpp \
           yoloPlugins.cpp \
           layers/convolutional_layer.cpp \
+           layers/batchnorm_layer.cpp \
           layers/implicit_layer.cpp \
           layers/channels_layer.cpp \
           layers/shortcut_layer.cpp \
--- a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp
@@ -12,7 +12,8 @@ nvinfer1::ILayer* activationLayer(
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network)
 {
-    if (activation == "linear") {
+    if (activation == "linear")
+    {
        // Pass
    }
    else if (activation == "relu")
@@ -46,8 +47,8 @@ nvinfer1::ILayer* activationLayer(
    {
        nvinfer1::IActivationLayer* leaky = network->addActivation(
            *input, nvinfer1::ActivationType::kLEAKY_RELU);
-        leaky->setAlpha(0.1);
        assert(leaky != nullptr);
+        leaky->setAlpha(0.1);
        std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
        leaky->setName(leakyLayerName.c_str());
        output = leaky;
@@ -74,7 +75,7 @@ nvinfer1::ILayer* activationLayer(
        std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
        tanh->setName(tanhLayerName.c_str());
        nvinfer1::IElementWiseLayer* mish = network->addElementWise(
-            *tanh->getOutput(0), *input,
+            *input, *tanh->getOutput(0),
            nvinfer1::ElementWiseOperation::kPROD);
        assert(mish != nullptr);
        std::string mishLayerName = "mish_" + std::to_string(layerIdx);
@@ -89,14 +90,32 @@ nvinfer1::ILayer* activationLayer(
        std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
        sigmoid->setName(sigmoidLayerName.c_str());
        nvinfer1::IElementWiseLayer* silu = network->addElementWise(
-            *sigmoid->getOutput(0), *input,
+            *input, *sigmoid->getOutput(0),
            nvinfer1::ElementWiseOperation::kPROD);
        assert(silu != nullptr);
        std::string siluLayerName = "silu_" + std::to_string(layerIdx);
        silu->setName(siluLayerName.c_str());
        output = silu;
    }
-    else {
+    else if (activation == "hardswish")
+    {
+        nvinfer1::IActivationLayer* hard_sigmoid = network->addActivation(
+            *input, nvinfer1::ActivationType::kHARD_SIGMOID);
+        assert(hard_sigmoid != nullptr);
+        hard_sigmoid->setAlpha(1.0 / 6.0);
+        hard_sigmoid->setBeta(0.5);
+        std::string hardSigmoidLayerName = "hard_sigmoid_" + std::to_string(layerIdx);
+        hard_sigmoid->setName(hardSigmoidLayerName.c_str());
+        nvinfer1::IElementWiseLayer* hard_swish = network->addElementWise(
+            *input, *hard_sigmoid->getOutput(0),
+            nvinfer1::ElementWiseOperation::kPROD);
+        assert(hard_swish != nullptr);
+        std::string hardSwishLayerName = "hard_swish_" + std::to_string(layerIdx);
+        hard_swish->setName(hardSwishLayerName.c_str());
+        output = hard_swish;
+    }
+    else
+    {
        std::cerr << "Activation not supported: " << activation << std::endl;
        std::abort();
    }
--- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp
@@ -0,0 +1,114 @@
+/*
+ * Created by Marcos Luciano
+ * https://www.github.com/marcoslucianops
+ */
+
+#include <math.h>
+#include "batchnorm_layer.h"
+
+nvinfer1::ILayer* batchnormLayer(
+    int layerIdx,
+    std::map<std::string, std::string>& block,
+    std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights,
+    int& weightPtr,
+    std::string weightsType,
+    float eps,
+    nvinfer1::ITensor* input,
+    nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "batchnorm");
+    assert(block.find("filters") != block.end());
+
+    int filters = std::stoi(block.at("filters"));
+    std::string activation = block.at("activation");
+
+    std::vector<float> bnBiases;
+    std::vector<float> bnWeights;
+    std::vector<float> bnRunningMean;
+    std::vector<float> bnRunningVar;
+
+    if (weightsType == "weights") {
+        for (int i = 0; i < filters; ++i)
+        {
+            bnBiases.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnWeights.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnRunningMean.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
+            weightPtr++;
+        }
+    }
+    else {
+        for (int i = 0; i < filters; ++i)
+        {
+            bnWeights.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnBiases.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnRunningMean.push_back(weights[weightPtr]);
+            weightPtr++;
+        }
+        for (int i = 0; i < filters; ++i)
+        {
+            bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
+            weightPtr++;
+        }
+    }
+
+    int size = filters;
+    nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* shiftWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        shiftWt[i]
+            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+    }
+    shift.values = shiftWt;
+    float* scaleWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+    }
+    scale.values = scaleWt;
+    float* powerWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        powerWt[i] = 1.0;
+    }
+    power.values = powerWt;
+    trtWeights.push_back(shift);
+    trtWeights.push_back(scale);
+    trtWeights.push_back(power);
+
+    nvinfer1::IScaleLayer* bn = network->addScale(
+        *input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+    assert(bn != nullptr);
+    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+    bn->setName(bnLayerName.c_str());
+    nvinfer1::ILayer* output = bn;
+
+    output = activationLayer(layerIdx, activation, output, output->getOutput(0), network);
+    assert(output != nullptr);
+
+    return output;
+}
--- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h
+++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h
@@ -0,0 +1,27 @@
+/*
+ * Created by Marcos Luciano
+ * https://www.github.com/marcoslucianops
+ */
+
+#ifndef __BATCHNORM_LAYER_H__
+#define __BATCHNORM_LAYER_H__
+
+#include <map>
+#include <vector>
+
+#include "NvInfer.h"
+
+#include "activation_layer.h"
+
+nvinfer1::ILayer* batchnormLayer(
+    int layerIdx,
+    std::map<std::string, std::string>& block,
+    std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights,
+    int& weightPtr,
+    std::string weightsType,
+    float eps,
+    nvinfer1::ITensor* input,
+    nvinfer1::INetworkDefinition* network);
+
+#endif
--- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
@@ -44,6 +44,11 @@ nvinfer1::ILayer* convolutionalLayer(
        groups = std::stoi(block.at("groups"));
    }

+    if (block.find("bias") != block.end())
+    {
+        bias = std::stoi(block.at("bias"));
+    }
+
    int pad;
    if (padding)
        pad = (kernelSize - 1) / 2;
@@ -61,14 +66,17 @@ nvinfer1::ILayer* convolutionalLayer(
    if (weightsType == "weights") {
        if (batchNormalize == false)
        {
-            float* val = new float[filters];
-            for (int i = 0; i < filters; ++i)
-            {
-                val[i] = weights[weightPtr];
-                weightPtr++;
+            float* val;
+            if (bias != 0) {
+                val = new float[filters];
+                for (int i = 0; i < filters; ++i)
+                {
+                    val[i] = weights[weightPtr];
+                    weightPtr++;
+                }
+                convBias.values = val;
+                trtWeights.push_back(convBias);
            }
-            convBias.values = val;
-            trtWeights.push_back(convBias);
            val = new float[size];
            for (int i = 0; i < size; ++i)
            {
@@ -108,7 +116,8 @@ nvinfer1::ILayer* convolutionalLayer(
            }
            convWt.values = val;
            trtWeights.push_back(convWt);
-            trtWeights.push_back(convBias);
+            if (bias != 0)
+                trtWeights.push_back(convBias);
        }
    }
    else {
@@ -122,14 +131,16 @@ nvinfer1::ILayer* convolutionalLayer(
            }
            convWt.values = val;
            trtWeights.push_back(convWt);
-            val = new float[filters];
-            for (int i = 0; i < filters; ++i)
-            {
-                val[i] = weights[weightPtr];
-                weightPtr++;
+            if (bias != 0) {
+                val = new float[filters];
+                for (int i = 0; i < filters; ++i)
+                {
+                    val[i] = weights[weightPtr];
+                    weightPtr++;
+                }
+                convBias.values = val;
+                trtWeights.push_back(convBias);
            }
-            convBias.values = val;
-            trtWeights.push_back(convBias);
        }
        else
        {
@@ -161,7 +172,8 @@ nvinfer1::ILayer* convolutionalLayer(
                weightPtr++;
            }
            trtWeights.push_back(convWt);
-            trtWeights.push_back(convBias);
+            if (bias != 0)
+                trtWeights.push_back(convBias);
        }
    }

--- a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp
@@ -19,11 +19,11 @@ nvinfer1::ILayer* maxpoolLayer(
    int stride = std::stoi(block.at("stride"));

    nvinfer1::IPoolingLayer* pool
-        = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
+        = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}});
    assert(pool);
    std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
-    pool->setStrideNd(nvinfer1::DimsHW{stride, stride});
-    pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
+    pool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
+    pool->setPaddingNd(nvinfer1::Dims{2, {size / 2, size / 2}});
    pool->setName(maxpoolLayerName.c_str());

    return pool;
--- a/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp
@@ -207,6 +207,20 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
            printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
        }

+        else if (m_ConfigBlocks.at(i).at("type") == "batchnorm")
+        {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out = batchnormLayer(
+                i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
+            previous = out->getOutput(0);
+            assert(previous != nullptr);
+            channels = getNumChannels(previous);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(previous);
+            std::string layerType = "bn_" + m_ConfigBlocks.at(i).at("activation");
+            printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
+        }
+
        else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul")
        {
            std::string type;
--- a/nvdsinfer_custom_impl_Yolo/yolo.h
+++ b/nvdsinfer_custom_impl_Yolo/yolo.h
@@ -27,6 +27,7 @@
 #define _YOLO_H_

 #include "layers/convolutional_layer.h"
+#include "layers/batchnorm_layer.h"
 #include "layers/implicit_layer.h"
 #include "layers/channels_layer.h"
 #include "layers/shortcut_layer.h"