DeepStream 7.1 + Fixes + New model output format

2024-11-07 11:25:17 -03:00
parent bca9e59d07
commit b451b036b2
75 changed files with 2383 additions and 1113 deletions
--- a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp
+++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp
@@ -6,6 +6,7 @@
 #include "deconvolutional_layer.h"

 #include <cassert>
+#include <math.h>

 nvinfer1::ITensor*
 deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
@@ -14,7 +15,7 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
 {
  nvinfer1::ITensor* output;

-  assert(block.at("type") == "deconvolutional");
+  assert(block.at("type") == "deconv" || block.at("type") == "deconvolutional");
  assert(block.find("filters") != block.end());
  assert(block.find("pad") != block.end());
  assert(block.find("size") != block.end());
@@ -24,20 +25,38 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
  int padding = std::stoi(block.at("pad"));
  int kernelSize = std::stoi(block.at("size"));
  int stride = std::stoi(block.at("stride"));
+  std::string activation = block.at("activation");
  int bias = filters;

-  int groups = 1;
-  if (block.find("groups") != block.end())
-    groups = std::stoi(block.at("groups"));
+  int batchNormalize = 0;
+  float eps = 1.0e-5;
+  if (block.find("batch_normalize") != block.end()) {
+    bias = 0;
+    batchNormalize = (block.at("batch_normalize") == "1");
+    if (block.find("eps") != block.end()) {
+      eps = std::stof(block.at("eps"));
+    }
+  }

-  if (block.find("bias") != block.end())
+  if (block.find("bias") != block.end()) {
    bias = std::stoi(block.at("bias"));
+    if (bias == 1) {
+      bias = filters;
+    }
+  }
+
+  int groups = 1;
+  if (block.find("groups") != block.end()) {
+    groups = std::stoi(block.at("groups"));
+  }

  int pad;
-  if (padding)
+  if (padding) {
    pad = (kernelSize - 1) / 2;
-  else
+  }
+  else {
    pad = 0;
+  }

  int size = filters * inputChannels * kernelSize * kernelSize / groups;
  std::vector<float> bnBiases;
@@ -47,23 +66,62 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
  nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
  nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};

-  float* val;
-  if (bias != 0) {
-    val = new float[filters];
-    for (int i = 0; i < filters; ++i) {
+  if (batchNormalize == 0) {
+    float* val;
+    if (bias != 0) {
+      val = new float[filters];
+      for (int i = 0; i < filters; ++i) {
+          val[i] = weights[weightPtr];
+          ++weightPtr;
+      }
+      convBias.values = val;
+      trtWeights.push_back(convBias);
+    }
+    val = new float[size];
+    for (int i = 0; i < size; ++i) {
        val[i] = weights[weightPtr];
        ++weightPtr;
    }
-    convBias.values = val;
-    trtWeights.push_back(convBias);
+    convWt.values = val;
+    trtWeights.push_back(convWt);
  }
-  val = new float[size];
-  for (int i = 0; i < size; ++i) {
+  else {
+    for (int i = 0; i < filters; ++i) {
+      bnBiases.push_back(weights[weightPtr]);
+      ++weightPtr;
+    }
+    for (int i = 0; i < filters; ++i) {
+      bnWeights.push_back(weights[weightPtr]);
+      ++weightPtr;
+    }
+    for (int i = 0; i < filters; ++i) {
+      bnRunningMean.push_back(weights[weightPtr]);
+      ++weightPtr;
+    }
+    for (int i = 0; i < filters; ++i) {
+      bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
+      ++weightPtr;
+    }
+    float* val;
+    if (bias != 0) {
+      val = new float[filters];
+      for (int i = 0; i < filters; ++i) {
+        val[i] = weights[weightPtr];
+        ++weightPtr;
+      }
+      convBias.values = val;
+    }
+    val = new float[size];
+    for (int i = 0; i < size; ++i) {
      val[i] = weights[weightPtr];
      ++weightPtr;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    if (bias != 0) {
+      trtWeights.push_back(convBias);
+    }
  }
-  convWt.values = val;
-  trtWeights.push_back(convWt);

  nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters,
      nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
@@ -73,10 +131,49 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
  conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
  conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});

-  if (block.find("groups") != block.end())
+  if (block.find("groups") != block.end()) {
    conv->setNbGroups(groups);
+  }

  output = conv->getOutput(0);

+  if (batchNormalize == 1) {
+    size = filters;
+    nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
+
+    float* shiftWt = new float[size];
+    for (int i = 0; i < size; ++i) {
+      shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+    }
+    shift.values = shiftWt;
+
+    float* scaleWt = new float[size];
+    for (int i = 0; i < size; ++i) {
+      scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+    }
+    scale.values = scaleWt;
+
+    float* powerWt = new float[size];
+    for (int i = 0; i < size; ++i) {
+      powerWt[i] = 1.0;
+    }
+    power.values = powerWt;
+
+    trtWeights.push_back(shift);
+    trtWeights.push_back(scale);
+    trtWeights.push_back(power);
+
+    nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+    assert(batchnorm != nullptr);
+    std::string batchnormLayerName = "batchnorm_" + layerName + std::to_string(layerIdx);
+    batchnorm->setName(batchnormLayerName.c_str());
+    output = batchnorm->getOutput(0);
+  }
+
+  output = activationLayer(layerIdx, activation, output, network, layerName);
+  assert(output != nullptr);
+
  return output;
 }