Add YOLOv6 support

2023-02-01 02:52:01 -03:00
parent 69f29f8934
commit 087a41acf6
19 changed files with 982 additions and 65 deletions
--- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp
@@ -34,13 +34,16 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
    batchNormalize = (block.at("batch_normalize") == "1");
  }

+  if (block.find("bias") != block.end()) {
+    bias = std::stoi(block.at("bias"));
+    if (bias == 1)
+      bias = filters;
+  }
+
  int groups = 1;
  if (block.find("groups") != block.end())
    groups = std::stoi(block.at("groups"));

-  if (block.find("bias") != block.end())
-    bias = std::stoi(block.at("bias"));
-
  int pad;
  if (padding)
    pad = (kernelSize - 1) / 2;
@@ -92,7 +95,16 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
        ++weightPtr;
      }
-      float* val = new float[size];
+      float* val;
+      if (bias != 0) {
+        val = new float[filters];
+        for (int i = 0; i < filters; ++i) {
+          val[i] = weights[weightPtr];
+          ++weightPtr;
+        }
+        convBias.values = val;
+      }
+      val = new float[size];
      for (int i = 0; i < size; ++i) {
        val[i] = weights[weightPtr];
        ++weightPtr;
@@ -129,6 +141,14 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
        ++weightPtr;
      }
      convWt.values = val;
+      if (bias != 0) {
+        val = new float[filters];
+        for (int i = 0; i < filters; ++i) {
+          val[i] = weights[weightPtr];
+          ++weightPtr;
+        }
+        convBias.values = val;
+      }
      for (int i = 0; i < filters; ++i) {
        bnWeights.push_back(weights[weightPtr]);
        ++weightPtr;
--- a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp
@@ -0,0 +1,102 @@
+/*
+ * Created by Marcos Luciano
+ * https://www.github.com/marcoslucianops
+ */
+
+#include "deconvolutional_layer.h"
+
+#include <cassert>
+
+nvinfer1::ITensor*
+deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels,
+    nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName)
+{
+  nvinfer1::ITensor* output;
+
+  assert(block.at("type") == "deconvolutional");
+  assert(block.find("filters") != block.end());
+  assert(block.find("pad") != block.end());
+  assert(block.find("size") != block.end());
+  assert(block.find("stride") != block.end());
+
+  int filters = std::stoi(block.at("filters"));
+  int padding = std::stoi(block.at("pad"));
+  int kernelSize = std::stoi(block.at("size"));
+  int stride = std::stoi(block.at("stride"));
+  int bias = filters;
+
+  int groups = 1;
+  if (block.find("groups") != block.end())
+    groups = std::stoi(block.at("groups"));
+
+  if (block.find("bias") != block.end())
+    bias = std::stoi(block.at("bias"));
+
+  int pad;
+  if (padding)
+    pad = (kernelSize - 1) / 2;
+  else
+    pad = 0;
+
+  int size = filters * inputChannels * kernelSize * kernelSize / groups;
+  std::vector<float> bnBiases;
+  std::vector<float> bnWeights;
+  std::vector<float> bnRunningMean;
+  std::vector<float> bnRunningVar;
+  nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
+  nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
+
+  if (weightsType == "weights") {
+    float* val;
+    if (bias != 0) {
+      val = new float[filters];
+      for (int i = 0; i < filters; ++i) {
+          val[i] = weights[weightPtr];
+          ++weightPtr;
+      }
+      convBias.values = val;
+      trtWeights.push_back(convBias);
+    }
+    val = new float[size];
+    for (int i = 0; i < size; ++i) {
+        val[i] = weights[weightPtr];
+        ++weightPtr;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+  }
+  else {
+    float* val = new float[size];
+    for (int i = 0; i < size; ++i) {
+      val[i] = weights[weightPtr];
+      ++weightPtr;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    if (bias != 0) {
+      val = new float[filters];
+      for (int i = 0; i < filters; ++i) {
+        val[i] = weights[weightPtr];
+        ++weightPtr;
+      }
+      convBias.values = val;
+      trtWeights.push_back(convBias);
+    }
+  }
+
+  nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters,
+      nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
+  assert(conv != nullptr);
+  std::string convLayerName = "deconv_" + layerName + std::to_string(layerIdx);
+  conv->setName(convLayerName.c_str());
+  conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
+  conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
+
+  if (block.find("groups") != block.end())
+    conv->setNbGroups(groups);
+
+  output = conv->getOutput(0);
+
+  return output;
+}
--- a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h
+++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h
@@ -0,0 +1,18 @@
+/*
+ * Created by Marcos Luciano
+ * https://www.github.com/marcoslucianops
+ */
+
+#ifndef __DECONVOLUTIONAL_LAYER_H__
+#define __DECONVOLUTIONAL_LAYER_H__
+
+#include <map>
+#include <vector>
+
+#include "NvInfer.h"
+
+nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels,
+    nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
+
+#endif
--- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp
@@ -6,7 +6,7 @@
 #include "shuffle_layer.h"

 nvinfer1::ITensor*
-shuffleLayer(int layerIdx, std::string& layer, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
+shuffleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
    std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
 {
  nvinfer1::ITensor* output;
@@ -18,16 +18,8 @@ shuffleLayer(int layerIdx, std::string& layer, std::map<std::string, std::string
  std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
  shuffle->setName(shuffleLayerName.c_str());

-  int from = -1;
-  if (block.find("from") != block.end())
-      from = std::stoi(block.at("from"));
-  if (from < 0)
-      from = tensorOutputs.size() + from;
-
-  layer = std::to_string(from);
-
  if (block.find("reshape") != block.end()) {
-    nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions();
+    nvinfer1::Dims inputTensorDims = input->getDimensions();

    std::string strReshape = block.at("reshape");
    std::vector<int32_t> reshape;
--- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h
+++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h
@@ -8,7 +8,7 @@

 #include "../utils.h"

-nvinfer1::ITensor* shuffleLayer(int layerIdx, std::string& layer, std::map<std::string, std::string>& block,
-    nvinfer1::ITensor* input, std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
+nvinfer1::ITensor* shuffleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
+    std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);

 #endif
--- a/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp
@@ -135,8 +135,9 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
      weightsType = "weights";

  float eps = 1.0e-5;
-  if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov7") != std::string::npos ||
-      m_NetworkType.find("yolov8") != std::string::npos || m_NetworkType.find("yolox") != std::string::npos)
+  if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov6") != std::string::npos ||
+      m_NetworkType.find("yolov7") != std::string::npos || m_NetworkType.find("yolov8") != std::string::npos ||
+      m_NetworkType.find("yolox") != std::string::npos)
    eps = 1.0e-3;
  else if (m_NetworkType.find("yolor") != std::string::npos)
    eps = 1.0e-4;
@@ -169,6 +170,17 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
      std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation");
      printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
    }
+    else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") {
+      int channels = getNumChannels(previous);
+      std::string inputVol = dimsToString(previous->getDimensions());
+      previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels,
+          previous, &network);
+      assert(previous != nullptr);
+      std::string outputVol = dimsToString(previous->getDimensions());
+      tensorOutputs.push_back(previous);
+      std::string layerName = "deconv";
+      printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
+    }
    else if (m_ConfigBlocks.at(i).at("type") == "c2f") {
      std::string inputVol = dimsToString(previous->getDimensions());
      previous = c2fLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
@@ -299,13 +311,12 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
      printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
    }
    else if (m_ConfigBlocks.at(i).at("type") == "shuffle") {
-      std::string layer;
      std::string inputVol = dimsToString(previous->getDimensions());
-      previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network);
+      previous = shuffleLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs, &network);
      assert(previous != nullptr);
      std::string outputVol = dimsToString(previous->getDimensions());
      tensorOutputs.push_back(previous);
-      std::string layerName = "shuffle: " + layer;
+      std::string layerName = "shuffle";
      printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
    }
    else if (m_ConfigBlocks.at(i).at("type") == "softmax") {
--- a/nvdsinfer_custom_impl_Yolo/yolo.h
+++ b/nvdsinfer_custom_impl_Yolo/yolo.h
@@ -30,6 +30,7 @@
 #include "nvdsinfer_custom_impl.h"

 #include "layers/convolutional_layer.h"
+#include "layers/deconvolutional_layer.h"
 #include "layers/c2f_layer.h"
 #include "layers/batchnorm_layer.h"
 #include "layers/implicit_layer.h"