Added YOLOv5 6.0 native support

2021-12-09 15:44:17 -03:00
parent dcc44b730c
commit bfd9268a31
8 changed files with 688 additions and 80 deletions
--- a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp
@@ -12,7 +12,10 @@ nvinfer1::ILayer* activationLayer(
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network)
 {
-    if (activation == "relu")
+    if (activation == "linear") {
+        // Pass
+    }
+    else if (activation == "relu")
    {
        nvinfer1::IActivationLayer* relu = network->addActivation(
            *input, nvinfer1::ActivationType::kRELU);
@@ -78,5 +81,24 @@ nvinfer1::ILayer* activationLayer(
        mish->setName(mishLayerName.c_str());
        output = mish;
    }
+    else if (activation == "silu")
+    {
+        nvinfer1::IActivationLayer* sigmoid = network->addActivation(
+            *input, nvinfer1::ActivationType::kSIGMOID);
+        assert(sigmoid != nullptr);
+        std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
+        sigmoid->setName(sigmoidLayerName.c_str());
+        nvinfer1::IElementWiseLayer* silu = network->addElementWise(
+            *sigmoid->getOutput(0), *input,
+            nvinfer1::ElementWiseOperation::kPROD);
+        assert(silu != nullptr);
+        std::string siluLayerName = "silu_" + std::to_string(layerIdx);
+        silu->setName(siluLayerName.c_str());
+        output = silu;
+    }
+    else {
+        std::cerr << "Activation not supported: " << activation << std::endl;
+        std::abort();
+    }
    return output;
 }
--- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
@@ -12,6 +12,7 @@ nvinfer1::ILayer* convolutionalLayer(
    std::vector<float>& weights,
    std::vector<nvinfer1::Weights>& trtWeights,
    int& weightPtr,
+    std::string weightsType,
    int& inputChannels,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network)
@@ -56,57 +57,111 @@ nvinfer1::ILayer* convolutionalLayer(
    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};

-    if (batchNormalize == false)
-    {
-        float* val = new float[filters];
-        for (int i = 0; i < filters; ++i)
+    if (weightsType == "weights") {
+        if (batchNormalize == false)
        {
-            val[i] = weights[weightPtr];
-            weightPtr++;
+            float* val = new float[filters];
+            for (int i = 0; i < filters; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convBias.values = val;
+            trtWeights.push_back(convBias);
+            val = new float[size];
+            for (int i = 0; i < size; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convWt.values = val;
+            trtWeights.push_back(convWt);
        }
-        convBias.values = val;
-        trtWeights.push_back(convBias);
-        val = new float[size];
-        for (int i = 0; i < size; ++i)
+        else
        {
-            val[i] = weights[weightPtr];
-            weightPtr++;
+            for (int i = 0; i < filters; ++i)
+            {
+                bnBiases.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnWeights.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnRunningMean.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
+                weightPtr++;
+            }
+            float* val = new float[size];
+            for (int i = 0; i < size; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convWt.values = val;
+            trtWeights.push_back(convWt);
+            trtWeights.push_back(convBias);
        }
-        convWt.values = val;
-        trtWeights.push_back(convWt);
    }
-    else
-    {
-        for (int i = 0; i < filters; ++i)
+    else {
+        if (batchNormalize == false)
        {
-            bnBiases.push_back(weights[weightPtr]);
-            weightPtr++;
+            float* val = new float[size];
+            for (int i = 0; i < size; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convWt.values = val;
+            trtWeights.push_back(convWt);
+            val = new float[filters];
+            for (int i = 0; i < filters; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convBias.values = val;
+            trtWeights.push_back(convBias);
        }
-
-        for (int i = 0; i < filters; ++i)
+        else
        {
-            bnWeights.push_back(weights[weightPtr]);
-            weightPtr++;
+            float* val = new float[size];
+            for (int i = 0; i < size; ++i)
+            {
+                val[i] = weights[weightPtr];
+                weightPtr++;
+            }
+            convWt.values = val;
+            for (int i = 0; i < filters; ++i)
+            {
+                bnWeights.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnBiases.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnRunningMean.push_back(weights[weightPtr]);
+                weightPtr++;
+            }
+            for (int i = 0; i < filters; ++i)
+            {
+                bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
+                weightPtr++;
+            }
+            trtWeights.push_back(convWt);
+            trtWeights.push_back(convBias);
        }
-        for (int i = 0; i < filters; ++i)
-        {
-            bnRunningMean.push_back(weights[weightPtr]);
-            weightPtr++;
-        }
-        for (int i = 0; i < filters; ++i)
-        {
-            bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
-            weightPtr++;
-        }
-        float* val = new float[size];
-        for (int i = 0; i < size; ++i)
-        {
-            val[i] = weights[weightPtr];
-            weightPtr++;
-        }
-        convWt.values = val;
-        trtWeights.push_back(convWt);
-        trtWeights.push_back(convBias);
    }

    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
--- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h
+++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h
@@ -19,6 +19,7 @@ nvinfer1::ILayer* convolutionalLayer(
    std::vector<float>& weights,
    std::vector<nvinfer1::Weights>& trtWeights,
    int& weightPtr,
+    std::string weightsType,
    int& inputChannels,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network);
--- a/nvdsinfer_custom_impl_Yolo/utils.cpp
+++ b/nvdsinfer_custom_impl_Yolo/utils.cpp
@@ -67,32 +67,63 @@ std::vector<float> loadWeights(const std::string weightsFilePath, const std::str
 {
    assert(fileExists(weightsFilePath));
    std::cout << "\nLoading pre-trained weights" << std::endl;
-    std::ifstream file(weightsFilePath, std::ios_base::binary);
-    assert(file.good());
-    std::string line;
-
-    if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
-    {
-        // Remove 4 int32 bytes of data from the stream belonging to the header
-        file.ignore(4 * 4);
-    }
-    else
-    {
-        // Remove 5 int32 bytes of data from the stream belonging to the header
-        file.ignore(4 * 5);
-    }

    std::vector<float> weights;
-    char floatWeight[4];
-    while (!file.eof())
-    {
-        file.read(floatWeight, 4);
-        assert(file.gcount() == 4);
-        weights.push_back(*reinterpret_cast<float*>(floatWeight));
-        if (file.peek() == std::istream::traits_type::eof()) break;
+
+    if (weightsFilePath.find(".weights") != std::string::npos) {
+        std::ifstream file(weightsFilePath, std::ios_base::binary);
+        assert(file.good());
+        std::string line;
+
+        if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
+        {
+            // Remove 4 int32 bytes of data from the stream belonging to the header
+            file.ignore(4 * 4);
+        }
+        else
+        {
+            // Remove 5 int32 bytes of data from the stream belonging to the header
+            file.ignore(4 * 5);
+        }
+
+        char floatWeight[4];
+        while (!file.eof())
+        {
+            file.read(floatWeight, 4);
+            assert(file.gcount() == 4);
+            weights.push_back(*reinterpret_cast<float*>(floatWeight));
+            if (file.peek() == std::istream::traits_type::eof()) break;
+        }
    }
+
+    else if (weightsFilePath.find(".wts") != std::string::npos) {
+        std::ifstream file(weightsFilePath);
+        assert(file.good());
+        int32_t count;
+        file >> count;
+        assert(count > 0 && "Invalid .wts file.");
+
+        uint32_t floatWeight;
+        std::string name;
+        uint32_t size;
+
+        while (count--) {
+            file >> name >> std::dec >> size;
+            for (uint32_t x = 0, y = size; x < y; ++x)
+            {
+                file >> std::hex >> floatWeight;
+                weights.push_back(*reinterpret_cast<float *>(&floatWeight));
+            };
+        }
+    }
+
+    else {
+        std::cerr << "File " << weightsFilePath << " is not supported" << std::endl;
+        std::abort();
+    }
+
    std::cout << "Loading weights of " << networkType << " complete"
-              << std::endl;
+            << std::endl;
    std::cout << "Total weights read: " << weights.size() << std::endl;
    return weights;
 }
--- a/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp
@@ -73,9 +73,6 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder)
    parseConfigBlocks();
    orderParams(&m_OutputMasks);

-    std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
-    std::vector<nvinfer1::Weights> trtWeights;
-
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0);
    if (parseModel(*network) != NVDSINFER_SUCCESS) {
        network->destroy();
@@ -134,7 +131,7 @@ NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
    destroyNetworkUtils();

    std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
-    std::cout << "Building YOLO network" << std::endl;
+    std::cout << "Building YOLO network\n" << std::endl;
    NvDsInferStatus status = buildYoloNetwork(weights, network);

    if (status == NVDSINFER_SUCCESS) {
@@ -151,6 +148,15 @@ NvDsInferStatus Yolo::buildYoloNetwork(
    int weightPtr = 0;
    int channels = m_InputC;

+    std::string weightsType;
+
+    if (m_WtsFilePath.find(".weights") != std::string::npos) {
+        weightsType = "weights";
+    }
+    else {
+        weightsType = "wts";
+    }
+
    nvinfer1::ITensor* data =
        network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
            nvinfer1::Dims3{static_cast<int>(m_InputC),
@@ -171,7 +177,7 @@ NvDsInferStatus Yolo::buildYoloNetwork(
        
        else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
            std::string inputVol = dimsToString(previous->getDimensions());
-            nvinfer1::ILayer* out = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network);
+            nvinfer1::ILayer* out = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, previous, &network);
            previous = out->getOutput(0);
            assert(previous != nullptr);
            channels = getNumChannels(previous);
@@ -272,10 +278,10 @@ NvDsInferStatus Yolo::buildYoloNetwork(
                beta_nms = std::stof(m_ConfigBlocks.at(i).at("beta_nms"));
            }
            nvinfer1::IPluginV2* yoloPlugin
-                = new YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes,
-                                  m_OutputTensors.at(outputTensorCount).numClasses,
-                                  m_OutputTensors.at(outputTensorCount).gridSizeX,
-                                  m_OutputTensors.at(outputTensorCount).gridSizeY,
+                = new YoloLayer(curYoloTensor.numBBoxes,
+                                  curYoloTensor.numClasses,
+                                  curYoloTensor.gridSizeX,
+                                  curYoloTensor.gridSizeY,
                                  1, new_coords, scale_x_y, beta_nms,
                                  curYoloTensor.anchors,
                                  m_OutputMasks);
@@ -436,7 +442,7 @@ void Yolo::parseConfigBlocks()
                m_LetterBox = 0;
            }
        }
-        else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
+        else if ((block.at("type") == "region") || (block.at("type") == "yolo") || (block.at("type") == "detect"))
        {
            assert((block.find("num") != block.end())
                   && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
@@ -466,9 +472,7 @@ void Yolo::parseConfigBlocks()
                }
            }

-            
            if (block.find("mask") != block.end()) {
-
                std::string maskString = block.at("mask");
                std::vector<int> pMASKS;
                while (!maskString.empty())