DeepStream 7.1 + Fixes + New model output format
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
@@ -56,10 +56,15 @@ endif
|
||||
|
||||
CUFLAGS:= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
|
||||
|
||||
LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
||||
ifeq ($(shell ldconfig -p | grep -q libnvparsers && echo 1 || echo 0), 1)
|
||||
LIBS+= -lnvparsers
|
||||
endif
|
||||
|
||||
LIBS+= -lnvinfer_plugin -lnvinfer -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
||||
|
||||
INCS:= $(wildcard *.h)
|
||||
INCS:= $(wildcard layers/*.h)
|
||||
INCS+= $(wildcard *.h)
|
||||
|
||||
SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp))
|
||||
|
||||
|
||||
@@ -8,9 +8,10 @@
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
|
||||
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width,
|
||||
const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath) :
|
||||
batchSize(batchSize), inputC(channels), inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets),
|
||||
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height,
|
||||
const int& width, const float& scaleFactor, const float* offsets, const int& inputFormat,
|
||||
const std::string& imgPath, const std::string& calibTablePath) : batchSize(batchSize), inputC(channels),
|
||||
inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets), inputFormat(inputFormat),
|
||||
calibTablePath(calibTablePath), imageIndex(0)
|
||||
{
|
||||
inputCount = batchSize * channels * height * width;
|
||||
@@ -54,7 +55,7 @@ Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBind
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets);
|
||||
std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets, inputFormat);
|
||||
|
||||
size_t len = inputData.size();
|
||||
memcpy(ptr, inputData.data(), len * sizeof(float));
|
||||
@@ -93,32 +94,46 @@ Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t len
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor, const float* offsets)
|
||||
prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor, const float* offsets, int inputFormat)
|
||||
{
|
||||
cv::Mat out;
|
||||
|
||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
||||
if (inputFormat == 0) {
|
||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
||||
}
|
||||
else if (inputFormat == 2) {
|
||||
cv::cvtColor(img, out, cv::COLOR_BGR2GRAY);
|
||||
}
|
||||
else {
|
||||
out = img;
|
||||
}
|
||||
|
||||
int image_w = img.cols;
|
||||
int image_h = img.rows;
|
||||
int imageW = img.cols;
|
||||
int imageH = img.rows;
|
||||
|
||||
if (image_w != input_w || image_h != input_h) {
|
||||
float resizeFactor = std::max(input_w / (float) image_w, input_h / (float) img.rows);
|
||||
if (imageW != inputW || imageH != inputH) {
|
||||
float resizeFactor = std::max(inputW / (float) imageW, inputH / (float) imageH);
|
||||
cv::resize(out, out, cv::Size(0, 0), resizeFactor, resizeFactor, cv::INTER_CUBIC);
|
||||
cv::Rect crop(cv::Point(0.5 * (out.cols - input_w), 0.5 * (out.rows - input_h)), cv::Size(input_w, input_h));
|
||||
cv::Rect crop(cv::Point(0.5 * (out.cols - inputW), 0.5 * (out.rows - inputH)), cv::Size(inputW, inputH));
|
||||
out = out(crop);
|
||||
}
|
||||
|
||||
out.convertTo(out, CV_32F, scaleFactor);
|
||||
cv::subtract(out, cv::Scalar(offsets[2] / 255, offsets[1] / 255, offsets[0] / 255), out, cv::noArray(), -1);
|
||||
|
||||
std::vector<cv::Mat> input_channels(input_c);
|
||||
cv::split(out, input_channels);
|
||||
std::vector<float> result(input_h * input_w * input_c);
|
||||
if (inputFormat == 2) {
|
||||
cv::subtract(out, cv::Scalar(offsets[0] / 255), out);
|
||||
}
|
||||
else {
|
||||
cv::subtract(out, cv::Scalar(offsets[0] / 255, offsets[1] / 255, offsets[3] / 255), out);
|
||||
}
|
||||
|
||||
std::vector<cv::Mat> inputChannels(inputC);
|
||||
cv::split(out, inputChannels);
|
||||
std::vector<float> result(inputH * inputW * inputC);
|
||||
auto data = result.data();
|
||||
int channelLength = input_h * input_w;
|
||||
for (int i = 0; i < input_c; ++i) {
|
||||
memcpy(data, input_channels[i].data, channelLength * sizeof(float));
|
||||
int channelLength = inputH * inputW;
|
||||
for (int i = 0; i < inputC; ++i) {
|
||||
memcpy(data, inputChannels[i].data, channelLength * sizeof(float));
|
||||
data += channelLength;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,18 +12,19 @@
|
||||
#include "NvInfer.h"
|
||||
#include "opencv2/opencv.hpp"
|
||||
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \
|
||||
std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << \
|
||||
__LINE__ << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width,
|
||||
const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath);
|
||||
const float& scaleFactor, const float* offsets, const int& inputFormat, const std::string& imgPath,
|
||||
const std::string& calibTablePath);
|
||||
|
||||
virtual ~Int8EntropyCalibrator2();
|
||||
|
||||
@@ -43,6 +44,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
int letterBox;
|
||||
float scaleFactor;
|
||||
const float* offsets;
|
||||
int inputFormat;
|
||||
std::string calibTablePath;
|
||||
size_t imageIndex;
|
||||
size_t inputCount;
|
||||
@@ -53,7 +55,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
std::vector<char> calibrationCache;
|
||||
};
|
||||
|
||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor,
|
||||
const float* offsets);
|
||||
std::vector<float> prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor,
|
||||
const float* offsets, int inputFormat);
|
||||
|
||||
#endif //CALIBRATOR_H
|
||||
|
||||
@@ -14,8 +14,9 @@ activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input,
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
if (activation == "linear")
|
||||
if (activation == "linear") {
|
||||
output = input;
|
||||
}
|
||||
else if (activation == "relu") {
|
||||
nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU);
|
||||
assert(relu != nullptr);
|
||||
|
||||
@@ -21,6 +21,11 @@ batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vec
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
std::string activation = block.at("activation");
|
||||
|
||||
float eps = 1.0e-5;
|
||||
if (block.find("eps") != block.end()) {
|
||||
eps = std::stof(block.at("eps"));
|
||||
}
|
||||
|
||||
std::vector<float> bnBiases;
|
||||
std::vector<float> bnWeights;
|
||||
std::vector<float> bnRunningMean;
|
||||
@@ -39,7 +44,7 @@ batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vec
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
|
||||
@@ -47,18 +52,25 @@ batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vec
|
||||
nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
}
|
||||
shift.values = shiftWt;
|
||||
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
}
|
||||
scale.values = scaleWt;
|
||||
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
powerWt[i] = 1.0;
|
||||
}
|
||||
power.values = powerWt;
|
||||
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
@@ -15,7 +15,7 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "convolutional" || block.at("type") == "c2f");
|
||||
assert(block.at("type") == "conv" || block.at("type") == "convolutional");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.find("pad") != block.end());
|
||||
assert(block.find("size") != block.end());
|
||||
@@ -28,27 +28,35 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
std::string activation = block.at("activation");
|
||||
int bias = filters;
|
||||
|
||||
bool batchNormalize = false;
|
||||
int batchNormalize = 0;
|
||||
float eps = 1.0e-5;
|
||||
if (block.find("batch_normalize") != block.end()) {
|
||||
bias = 0;
|
||||
batchNormalize = (block.at("batch_normalize") == "1");
|
||||
if (block.find("eps") != block.end()) {
|
||||
eps = std::stof(block.at("eps"));
|
||||
}
|
||||
}
|
||||
|
||||
if (block.find("bias") != block.end()) {
|
||||
bias = std::stoi(block.at("bias"));
|
||||
if (bias == 1)
|
||||
if (bias == 1) {
|
||||
bias = filters;
|
||||
}
|
||||
}
|
||||
|
||||
int groups = 1;
|
||||
if (block.find("groups") != block.end())
|
||||
if (block.find("groups") != block.end()) {
|
||||
groups = std::stoi(block.at("groups"));
|
||||
}
|
||||
|
||||
int pad;
|
||||
if (padding)
|
||||
if (padding) {
|
||||
pad = (kernelSize - 1) / 2;
|
||||
else
|
||||
}
|
||||
else {
|
||||
pad = 0;
|
||||
}
|
||||
|
||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
||||
std::vector<float> bnBiases;
|
||||
@@ -58,7 +66,7 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
|
||||
if (batchNormalize == false) {
|
||||
if (batchNormalize == 0) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
@@ -91,7 +99,7 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
float* val;
|
||||
@@ -110,40 +118,49 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
if (bias != 0) {
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}},
|
||||
convWt, convBias);
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters,
|
||||
nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
|
||||
assert(conv != nullptr);
|
||||
std::string convLayerName = "conv_" + layerName + std::to_string(layerIdx);
|
||||
conv->setName(convLayerName.c_str());
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
|
||||
|
||||
if (block.find("groups") != block.end())
|
||||
if (block.find("groups") != block.end()) {
|
||||
conv->setNbGroups(groups);
|
||||
}
|
||||
|
||||
output = conv->getOutput(0);
|
||||
|
||||
if (batchNormalize == true) {
|
||||
if (batchNormalize == 1) {
|
||||
size = filters;
|
||||
nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
}
|
||||
shift.values = shiftWt;
|
||||
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
}
|
||||
scale.values = scaleWt;
|
||||
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
for (int i = 0; i < size; ++i) {
|
||||
powerWt[i] = 1.0;
|
||||
}
|
||||
power.values = powerWt;
|
||||
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "deconvolutional_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <math.h>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
@@ -14,7 +15,7 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "deconvolutional");
|
||||
assert(block.at("type") == "deconv" || block.at("type") == "deconvolutional");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.find("pad") != block.end());
|
||||
assert(block.find("size") != block.end());
|
||||
@@ -24,20 +25,38 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
|
||||
int padding = std::stoi(block.at("pad"));
|
||||
int kernelSize = std::stoi(block.at("size"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
std::string activation = block.at("activation");
|
||||
int bias = filters;
|
||||
|
||||
int groups = 1;
|
||||
if (block.find("groups") != block.end())
|
||||
groups = std::stoi(block.at("groups"));
|
||||
int batchNormalize = 0;
|
||||
float eps = 1.0e-5;
|
||||
if (block.find("batch_normalize") != block.end()) {
|
||||
bias = 0;
|
||||
batchNormalize = (block.at("batch_normalize") == "1");
|
||||
if (block.find("eps") != block.end()) {
|
||||
eps = std::stof(block.at("eps"));
|
||||
}
|
||||
}
|
||||
|
||||
if (block.find("bias") != block.end())
|
||||
if (block.find("bias") != block.end()) {
|
||||
bias = std::stoi(block.at("bias"));
|
||||
if (bias == 1) {
|
||||
bias = filters;
|
||||
}
|
||||
}
|
||||
|
||||
int groups = 1;
|
||||
if (block.find("groups") != block.end()) {
|
||||
groups = std::stoi(block.at("groups"));
|
||||
}
|
||||
|
||||
int pad;
|
||||
if (padding)
|
||||
if (padding) {
|
||||
pad = (kernelSize - 1) / 2;
|
||||
else
|
||||
}
|
||||
else {
|
||||
pad = 0;
|
||||
}
|
||||
|
||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
||||
std::vector<float> bnBiases;
|
||||
@@ -47,23 +66,62 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
if (batchNormalize == 0) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
else {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0) {
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
|
||||
nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters,
|
||||
nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
|
||||
@@ -73,10 +131,49 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
|
||||
|
||||
if (block.find("groups") != block.end())
|
||||
if (block.find("groups") != block.end()) {
|
||||
conv->setNbGroups(groups);
|
||||
}
|
||||
|
||||
output = conv->getOutput(0);
|
||||
|
||||
if (batchNormalize == 1) {
|
||||
size = filters;
|
||||
nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
}
|
||||
shift.values = shiftWt;
|
||||
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
}
|
||||
scale.values = scaleWt;
|
||||
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
powerWt[i] = 1.0;
|
||||
}
|
||||
power.values = powerWt;
|
||||
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(batchnorm != nullptr);
|
||||
std::string batchnormLayerName = "batchnorm_" + layerName + std::to_string(layerIdx);
|
||||
batchnorm->setName(batchnormLayerName.c_str());
|
||||
output = batchnorm->getOutput(0);
|
||||
}
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network, layerName);
|
||||
assert(output != nullptr);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -8,12 +8,13 @@
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, uint batchSize)
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
@@ -35,17 +35,17 @@ reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::IT
|
||||
nvinfer1::Dims sizeAll = {4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}};
|
||||
nvinfer1::Dims strideAll = {4, {1, 1, stride, stride}};
|
||||
|
||||
nvinfer1::ITensor* slice1 = sliceLayer(layerIdx, name1, input, start1, sizeAll, strideAll, network, batchSize);
|
||||
assert(output != nullptr);
|
||||
nvinfer1::ITensor* slice1 = sliceLayer(layerIdx, name1, input, start1, sizeAll, strideAll, network);
|
||||
assert(slice1 != nullptr);
|
||||
|
||||
nvinfer1::ITensor* slice2 = sliceLayer(layerIdx, name2, input, start2, sizeAll, strideAll, network, batchSize);
|
||||
assert(output != nullptr);
|
||||
nvinfer1::ITensor* slice2 = sliceLayer(layerIdx, name2, input, start2, sizeAll, strideAll, network);
|
||||
assert(slice2 != nullptr);
|
||||
|
||||
nvinfer1::ITensor* slice3 = sliceLayer(layerIdx, name3, input, start3, sizeAll, strideAll, network, batchSize);
|
||||
assert(output != nullptr);
|
||||
nvinfer1::ITensor* slice3 = sliceLayer(layerIdx, name3, input, start3, sizeAll, strideAll, network);
|
||||
assert(slice3 != nullptr);
|
||||
|
||||
nvinfer1::ITensor* slice4 = sliceLayer(layerIdx, name4, input, start4, sizeAll, strideAll, network, batchSize);
|
||||
assert(output != nullptr);
|
||||
nvinfer1::ITensor* slice4 = sliceLayer(layerIdx, name4, input, start4, sizeAll, strideAll, network);
|
||||
assert(slice4 != nullptr);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(slice1);
|
||||
|
||||
@@ -14,6 +14,6 @@
|
||||
#include "slice_layer.h"
|
||||
|
||||
nvinfer1::ITensor* reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, uint batchSize);
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network, uint batchSize)
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
@@ -49,7 +49,6 @@ routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>
|
||||
int axis = 1;
|
||||
if (block.find("axis") != block.end()) {
|
||||
axis += std::stoi(block.at("axis"));
|
||||
std::cout << axis << std::endl;
|
||||
}
|
||||
if (axis < 0) {
|
||||
axis += concatInputs[0]->getDimensions().nbDims;
|
||||
@@ -75,7 +74,7 @@ routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>
|
||||
nvinfer1::Dims size = {4, {prevTensorDims.d[0], channelSlice, prevTensorDims.d[2], prevTensorDims.d[3]}};
|
||||
nvinfer1::Dims stride = {4, {1, 1, 1, 1}};
|
||||
|
||||
output = sliceLayer(layerIdx, name, output, start, size, stride, network, batchSize);
|
||||
output = sliceLayer(layerIdx, name, output, start, size, stride, network);
|
||||
assert(output != nullptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,6 @@
|
||||
#include "slice_layer.h"
|
||||
|
||||
nvinfer1::ITensor* routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network, uint batchSize);
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
nvinfer1::ITensor*
|
||||
shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput,
|
||||
nvinfer1::INetworkDefinition* network, uint batchSize)
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
@@ -20,15 +20,17 @@ shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::s
|
||||
std::string name = "slice";
|
||||
nvinfer1::Dims start = {4, {0, 0, 0, 0}};
|
||||
nvinfer1::Dims size = input->getDimensions();
|
||||
nvinfer1::Dims stride = nvinfer1::Dims{4, {1, 1, 1, 1}};
|
||||
nvinfer1::Dims stride = {4, {1, 1, 1, 1}};
|
||||
|
||||
output = sliceLayer(layerIdx, name, shortcutInput, start, size, stride, network, batchSize);
|
||||
output = sliceLayer(layerIdx, name, shortcutInput, start, size, stride, network);
|
||||
assert(output != nullptr);
|
||||
}
|
||||
else
|
||||
else {
|
||||
output = shortcutInput;
|
||||
}
|
||||
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, nvinfer1::ElementWiseOperation::kSUM);
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(shortcut != nullptr);
|
||||
std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
|
||||
shortcut->setName(shortcutLayerName.c_str());
|
||||
|
||||
@@ -15,6 +15,6 @@
|
||||
|
||||
nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut,
|
||||
nvinfer1::INetworkDefinition* network, uint batchSize);
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,58 +9,72 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, nvinfer1::Dims size,
|
||||
nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network, uint batchSize)
|
||||
nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
int tensorBatch = input->getDimensions().d[0];
|
||||
nvinfer1::ISliceLayer* slice;
|
||||
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(*input, start, size, stride);
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
if (inputDims.d[0] == -1) {
|
||||
slice = network->addSlice(*input, start, nvinfer1::Dims{}, stride);
|
||||
assert(slice != nullptr);
|
||||
|
||||
if (tensorBatch == -1) {
|
||||
int nbDims = size.nbDims;
|
||||
|
||||
nvinfer1::Weights constant1Wt {nvinfer1::DataType::kINT32, nullptr, nbDims};
|
||||
nvinfer1::IShapeLayer* shape = network->addShape(*input);
|
||||
assert(shape != nullptr);
|
||||
std::string shapeLayerName = "shape_" + name + "_" + std::to_string(layerIdx);
|
||||
shape->setName(shapeLayerName.c_str());
|
||||
nvinfer1::ITensor* shapeTensor = shape->getOutput(0);
|
||||
assert(shapeTensor != nullptr);
|
||||
|
||||
int* val1 = new int[nbDims];
|
||||
val1[0] = 1;
|
||||
for (int i = 1; i < nbDims; ++i) {
|
||||
val1[i] = size.d[i];
|
||||
#if NV_TENSORRT_MAJOR >= 10
|
||||
nvinfer1::ICastLayer* castShape = network->addCast(*shapeTensor, nvinfer1::DataType::kINT32);
|
||||
assert(castShape != nullptr);
|
||||
std::string castShapeLayerName = "cast_shape_" + name + "_" + std::to_string(layerIdx);
|
||||
castShape->setName(castShapeLayerName.c_str());
|
||||
nvinfer1::ITensor* castShapeTensor = castShape->getOutput(0);
|
||||
assert(castShapeTensor != nullptr);
|
||||
shapeTensor = castShapeTensor;
|
||||
#endif
|
||||
|
||||
nvinfer1::Weights constantWt {nvinfer1::DataType::kINT32, nullptr, nbDims};
|
||||
|
||||
int* val = new int[nbDims];
|
||||
for (int i = 0; i < nbDims; ++i) {
|
||||
if (inputDims.d[i] == size.d[i]) {
|
||||
val[i] = 0;
|
||||
}
|
||||
else {
|
||||
val[i] = inputDims.d[i] - size.d[i];
|
||||
}
|
||||
}
|
||||
constant1Wt.values = val1;
|
||||
constantWt.values = val;
|
||||
|
||||
nvinfer1::IConstantLayer* constant1 = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constant1Wt);
|
||||
assert(constant1 != nullptr);
|
||||
std::string constant1LayerName = "constant1_" + name + "_" + std::to_string(layerIdx);
|
||||
constant1->setName(constant1LayerName.c_str());
|
||||
nvinfer1::ITensor* constant1Tensor = constant1->getOutput(0);
|
||||
nvinfer1::IConstantLayer* constant = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constantWt);
|
||||
assert(constant != nullptr);
|
||||
std::string constantLayerName = "constant_" + name + "_" + std::to_string(layerIdx);
|
||||
constant->setName(constantLayerName.c_str());
|
||||
nvinfer1::ITensor* constantTensor = constant->getOutput(0);
|
||||
assert(constantTensor != nullptr);
|
||||
|
||||
nvinfer1::Weights constant2Wt {nvinfer1::DataType::kINT32, nullptr, nbDims};
|
||||
nvinfer1::IElementWiseLayer* divide = network->addElementWise(*shapeTensor, *constantTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(divide != nullptr);
|
||||
std::string divideLayerName = "divide_" + name + "_" + std::to_string(layerIdx);
|
||||
divide->setName(divideLayerName.c_str());
|
||||
nvinfer1::ITensor* divideTensor = divide->getOutput(0);
|
||||
assert(divideTensor != nullptr);
|
||||
|
||||
int* val2 = new int[nbDims];
|
||||
val2[0] = batchSize;
|
||||
for (int i = 1; i < nbDims; ++i) {
|
||||
val2[i] = 1;
|
||||
}
|
||||
constant2Wt.values = val2;
|
||||
|
||||
nvinfer1::IConstantLayer* constant2 = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constant2Wt);
|
||||
assert(constant2 != nullptr);
|
||||
std::string constant2LayerName = "constant2_" + name + "_" + std::to_string(layerIdx);
|
||||
constant2->setName(constant2LayerName.c_str());
|
||||
nvinfer1::ITensor* constant2Tensor = constant2->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* newSize = network->addElementWise(*constant1Tensor, *constant2Tensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(newSize != nullptr);
|
||||
std::string newSizeLayerName = "new_size_" + name + "_" + std::to_string(layerIdx);
|
||||
newSize->setName(newSizeLayerName.c_str());
|
||||
nvinfer1::ITensor* newSizeTensor = newSize->getOutput(0);
|
||||
|
||||
slice->setInput(2, *newSizeTensor);
|
||||
slice->setInput(2, *divideTensor);
|
||||
}
|
||||
else {
|
||||
slice = network->addSlice(*input, start, size, stride);
|
||||
assert(slice != nullptr);
|
||||
}
|
||||
|
||||
assert(slice != nullptr);
|
||||
std::string sliceLayerName = name + "_" + std::to_string(layerIdx);
|
||||
slice->setName(sliceLayerName.c_str());
|
||||
output = slice->getOutput(0);
|
||||
|
||||
@@ -11,6 +11,6 @@
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start,
|
||||
nvinfer1::Dims size, nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network, uint batchSize);
|
||||
nvinfer1::Dims size, nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,13 @@ upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1:
|
||||
assert(resize != nullptr);
|
||||
std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
|
||||
resize->setName(resizeLayerName.c_str());
|
||||
|
||||
#if NV_TENSORRT_MAJOR >= 8 && NV_TENSORRT_MINOR > 0
|
||||
resize->setResizeMode(nvinfer1::InterpolationMode::kNEAREST);
|
||||
#else
|
||||
resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
||||
#endif
|
||||
|
||||
resize->setScales(scale, 4);
|
||||
output = resize->getOutput(0);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,14 +35,14 @@
|
||||
static bool
|
||||
getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* initParams)
|
||||
{
|
||||
std::string onnxWtsFilePath = initParams->onnxFilePath;
|
||||
std::string darknetWtsFilePath = initParams->modelFilePath;
|
||||
std::string darknetCfgFilePath = initParams->customNetworkConfigFilePath;
|
||||
std::string onnxFilePath = initParams->onnxFilePath;
|
||||
std::string wtsFilePath = initParams->modelFilePath;
|
||||
std::string cfgFilePath = initParams->customNetworkConfigFilePath;
|
||||
|
||||
std::string yoloType = onnxWtsFilePath != "" ? "onnx" : "darknet";
|
||||
std::string yoloType = onnxFilePath != "" ? "onnx" : "darknet";
|
||||
std::string modelName = yoloType == "onnx" ?
|
||||
onnxWtsFilePath.substr(0, onnxWtsFilePath.find(".onnx")).substr(onnxWtsFilePath.rfind("/") + 1) :
|
||||
darknetWtsFilePath.substr(0, darknetWtsFilePath.find(".weights")).substr(darknetWtsFilePath.rfind("/") + 1);
|
||||
onnxFilePath.substr(0, onnxFilePath.find(".onnx")).substr(onnxFilePath.rfind("/") + 1) :
|
||||
cfgFilePath.substr(0, cfgFilePath.find(".cfg")).substr(cfgFilePath.rfind("/") + 1);
|
||||
|
||||
std::transform(modelName.begin(), modelName.end(), modelName.begin(), [] (uint8_t c) {
|
||||
return std::tolower(c);
|
||||
@@ -51,9 +51,9 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i
|
||||
networkInfo.inputBlobName = "input";
|
||||
networkInfo.networkType = yoloType;
|
||||
networkInfo.modelName = modelName;
|
||||
networkInfo.onnxWtsFilePath = onnxWtsFilePath;
|
||||
networkInfo.darknetWtsFilePath = darknetWtsFilePath;
|
||||
networkInfo.darknetCfgFilePath = darknetCfgFilePath;
|
||||
networkInfo.onnxFilePath = onnxFilePath;
|
||||
networkInfo.wtsFilePath = wtsFilePath;
|
||||
networkInfo.cfgFilePath = cfgFilePath;
|
||||
networkInfo.batchSize = initParams->maxBatchSize;
|
||||
networkInfo.implicitBatch = initParams->forceImplicitBatchDimension;
|
||||
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
|
||||
@@ -63,26 +63,30 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i
|
||||
networkInfo.scaleFactor = initParams->networkScaleFactor;
|
||||
networkInfo.offsets = initParams->offsets;
|
||||
networkInfo.workspaceSize = initParams->workspaceSize;
|
||||
networkInfo.inputFormat = initParams->networkInputFormat;
|
||||
|
||||
if (initParams->networkMode == NvDsInferNetworkMode_FP32)
|
||||
if (initParams->networkMode == NvDsInferNetworkMode_FP32) {
|
||||
networkInfo.networkMode = "FP32";
|
||||
else if (initParams->networkMode == NvDsInferNetworkMode_INT8)
|
||||
}
|
||||
else if (initParams->networkMode == NvDsInferNetworkMode_INT8) {
|
||||
networkInfo.networkMode = "INT8";
|
||||
else if (initParams->networkMode == NvDsInferNetworkMode_FP16)
|
||||
}
|
||||
else if (initParams->networkMode == NvDsInferNetworkMode_FP16) {
|
||||
networkInfo.networkMode = "FP16";
|
||||
}
|
||||
|
||||
if (yoloType == "onnx") {
|
||||
if (!fileExists(networkInfo.onnxWtsFilePath)) {
|
||||
std::cerr << "ONNX model file does not exist\n" << std::endl;
|
||||
if (!fileExists(networkInfo.onnxFilePath)) {
|
||||
std::cerr << "ONNX file does not exist\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!fileExists(networkInfo.darknetWtsFilePath)) {
|
||||
if (!fileExists(networkInfo.wtsFilePath)) {
|
||||
std::cerr << "Darknet weights file does not exist\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
else if (!fileExists(networkInfo.darknetCfgFilePath)) {
|
||||
else if (!fileExists(networkInfo.cfgFilePath)) {
|
||||
std::cerr << "Darknet cfg file does not exist\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
@@ -106,7 +110,8 @@ NvDsInferCreateModelParser(const NvDsInferContextInitParams* initParams)
|
||||
#if NV_TENSORRT_MAJOR >= 8
|
||||
extern "C" bool
|
||||
NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig,
|
||||
const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine);
|
||||
const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType,
|
||||
nvinfer1::ICudaEngine*& cudaEngine);
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig,
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Edited by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
|
||||
bool
|
||||
NvDsInferInitializeInputLayers(std::vector<NvDsInferLayerInfo> const& inputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, unsigned int maxBatchSize)
|
||||
{
|
||||
float* scaleFactor = (float*) inputLayersInfo[0].buffer;
|
||||
for (unsigned int i = 0; i < maxBatchSize; i++) {
|
||||
scaleFactor[i * 2 + 0] = 1.0;
|
||||
scaleFactor[i * 2 + 1] = 1.0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -31,10 +31,6 @@ extern "C" bool
|
||||
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
static NvDsInferParseObjectInfo
|
||||
convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH)
|
||||
{
|
||||
@@ -65,7 +61,7 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
|
||||
NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
|
||||
|
||||
if (bbi.width < 1 || bbi.height < 1) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
bbi.detectionConfidence = maxProb;
|
||||
@@ -74,53 +70,23 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
|
||||
}
|
||||
|
||||
static std::vector<NvDsInferParseObjectInfo>
|
||||
decodeTensorYolo(const float* boxes, const float* scores, const float* classes, const uint& outputSize, const uint& netW,
|
||||
const uint& netH, const std::vector<float>& preclusterThreshold)
|
||||
decodeTensorYolo(const float* output, const uint& outputSize, const uint& netW, const uint& netH,
|
||||
const std::vector<float>& preclusterThreshold)
|
||||
{
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
|
||||
for (uint b = 0; b < outputSize; ++b) {
|
||||
float maxProb = scores[b];
|
||||
int maxIndex = (int) classes[b];
|
||||
float maxProb = output[b * 6 + 4];
|
||||
int maxIndex = (int) output[b * 6 + 5];
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float bxc = boxes[b * 4 + 0];
|
||||
float byc = boxes[b * 4 + 1];
|
||||
float bw = boxes[b * 4 + 2];
|
||||
float bh = boxes[b * 4 + 3];
|
||||
|
||||
float bx1 = bxc - bw / 2;
|
||||
float by1 = byc - bh / 2;
|
||||
float bx2 = bx1 + bw;
|
||||
float by2 = by1 + bh;
|
||||
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
|
||||
return binfo;
|
||||
}
|
||||
|
||||
static std::vector<NvDsInferParseObjectInfo>
|
||||
decodeTensorYoloE(const float* boxes, const float* scores, const float* classes, const uint& outputSize, const uint& netW,
|
||||
const uint& netH, const std::vector<float>& preclusterThreshold)
|
||||
{
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
|
||||
for (uint b = 0; b < outputSize; ++b) {
|
||||
float maxProb = scores[b];
|
||||
int maxIndex = (int) classes[b];
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float bx1 = boxes[b * 4 + 0];
|
||||
float by1 = boxes[b * 4 + 1];
|
||||
float bx2 = boxes[b * 4 + 2];
|
||||
float by2 = boxes[b * 4 + 3];
|
||||
float bx1 = output[b * 6 + 0];
|
||||
float by1 = output[b * 6 + 1];
|
||||
float bx2 = output[b * 6 + 2];
|
||||
float by2 = output[b * 6 + 3];
|
||||
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
@@ -129,8 +95,9 @@ decodeTensorYoloE(const float* boxes, const float* scores, const float* classes,
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
@@ -139,43 +106,11 @@ NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[2];
|
||||
const NvDsInferLayerInfo& output = outputLayersInfo[0];
|
||||
const uint outputSize = output.inferDims.d[0];
|
||||
|
||||
const uint outputSize = boxes.inferDims.d[0];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (boxes.buffer),
|
||||
(const float*) (scores.buffer), (const float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height,
|
||||
detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
objectList = objects;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustomYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[2];
|
||||
|
||||
const uint outputSize = boxes.inferDims.d[0];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (boxes.buffer),
|
||||
(const float*) (scores.buffer), (const float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height,
|
||||
detectionParams.perClassPreclusterThreshold);
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (output.buffer), outputSize,
|
||||
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
@@ -191,12 +126,4 @@ NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDs
|
||||
return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustomYoloE(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloE);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -23,7 +23,6 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
@@ -33,12 +32,8 @@ extern "C" bool
|
||||
NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
__global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, float* classes,
|
||||
int outputSize, int netW, int netH, float minPreclusterThreshold)
|
||||
__global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, const float* output, const uint outputSize,
|
||||
const uint netW, const uint netH, const float* preclusterThreshold)
|
||||
{
|
||||
int x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
@@ -46,68 +41,28 @@ __global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, float* box
|
||||
return;
|
||||
}
|
||||
|
||||
float maxProb = scores[x_id];
|
||||
int maxIndex = (int) classes[x_id];
|
||||
float maxProb = output[x_id * 6 + 4];
|
||||
int maxIndex = (int) output[x_id * 6 + 5];
|
||||
|
||||
if (maxProb < minPreclusterThreshold) {
|
||||
if (maxProb < preclusterThreshold[maxIndex]) {
|
||||
binfo[x_id].detectionConfidence = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
float bxc = boxes[x_id * 4 + 0];
|
||||
float byc = boxes[x_id * 4 + 1];
|
||||
float bw = boxes[x_id * 4 + 2];
|
||||
float bh = boxes[x_id * 4 + 3];
|
||||
float bx1 = output[x_id * 6 + 0];
|
||||
float by1 = output[x_id * 6 + 1];
|
||||
float bx2 = output[x_id * 6 + 2];
|
||||
float by2 = output[x_id * 6 + 3];
|
||||
|
||||
float x0 = bxc - bw / 2;
|
||||
float y0 = byc - bh / 2;
|
||||
float x1 = x0 + bw;
|
||||
float y1 = y0 + bh;
|
||||
bx1 = fminf(float(netW), fmaxf(float(0.0), bx1));
|
||||
by1 = fminf(float(netH), fmaxf(float(0.0), by1));
|
||||
bx2 = fminf(float(netW), fmaxf(float(0.0), bx2));
|
||||
by2 = fminf(float(netH), fmaxf(float(0.0), by2));
|
||||
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[x_id].left = x0;
|
||||
binfo[x_id].top = y0;
|
||||
binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[x_id].detectionConfidence = maxProb;
|
||||
binfo[x_id].classId = maxIndex;
|
||||
}
|
||||
|
||||
__global__ void decodeTensorYoloECuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, float* classes,
|
||||
int outputSize, int netW, int netH, float minPreclusterThreshold)
|
||||
{
|
||||
int x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize) {
|
||||
return;
|
||||
}
|
||||
|
||||
float maxProb = scores[x_id];
|
||||
int maxIndex = (int) classes[x_id];
|
||||
|
||||
if (maxProb < minPreclusterThreshold) {
|
||||
binfo[x_id].detectionConfidence = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
float x0 = boxes[x_id * 4 + 0];
|
||||
float y0 = boxes[x_id * 4 + 1];
|
||||
float x1 = boxes[x_id * 4 + 2];
|
||||
float y1 = boxes[x_id * 4 + 3];
|
||||
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[x_id].left = x0;
|
||||
binfo[x_id].top = y0;
|
||||
binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[x_id].left = bx1;
|
||||
binfo[x_id].top = by1;
|
||||
binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), bx2 - bx1));
|
||||
binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), by2 - by1));
|
||||
binfo[x_id].detectionConfidence = maxProb;
|
||||
binfo[x_id].classId = maxIndex;
|
||||
}
|
||||
@@ -121,56 +76,19 @@ static bool NvDsInferParseCustomYoloCuda(std::vector<NvDsInferLayerInfo> const&
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[2];
|
||||
const NvDsInferLayerInfo& output = outputLayersInfo[0];
|
||||
const uint outputSize = output.inferDims.d[0];
|
||||
|
||||
const int outputSize = boxes.inferDims.d[0];
|
||||
thrust::device_vector<float> perClassPreclusterThreshold = detectionParams.perClassPreclusterThreshold;
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
float minPreclusterThreshold = *(std::min_element(detectionParams.perClassPreclusterThreshold.begin(),
|
||||
detectionParams.perClassPreclusterThreshold.end()));
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
int number_of_blocks = ((outputSize) / threads_per_block) + 1;
|
||||
|
||||
decodeTensorYoloCuda<<<number_of_blocks, threads_per_block>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (float*) (boxes.buffer), (float*) (scores.buffer),
|
||||
(float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold);
|
||||
|
||||
objectList.resize(outputSize);
|
||||
thrust::copy(objects.begin(), objects.end(), objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool NvDsInferParseCustomYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[2];
|
||||
|
||||
const int outputSize = boxes.inferDims.d[0];
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
float minPreclusterThreshold = *(std::min_element(detectionParams.perClassPreclusterThreshold.begin(),
|
||||
detectionParams.perClassPreclusterThreshold.end()));
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensorYoloECuda<<<number_of_blocks, threads_per_block>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (float*) (boxes.buffer), (float*) (scores.buffer),
|
||||
(float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold);
|
||||
thrust::raw_pointer_cast(objects.data()), (float*) (output.buffer), outputSize, networkInfo.width,
|
||||
networkInfo.height, thrust::raw_pointer_cast(perClassPreclusterThreshold.data()));
|
||||
|
||||
objectList.resize(outputSize);
|
||||
thrust::copy(objects.begin(), objects.end(), objectList.begin());
|
||||
@@ -185,12 +103,4 @@ NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
return NvDsInferParseCustomYoloCuda(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustomYoloECuda(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloCuda);
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloECuda);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -69,7 +69,7 @@ fileExists(const std::string fileName, bool verbose)
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
loadWeights(const std::string weightsFilePath, const std::string& modelName)
|
||||
loadWeights(const std::string weightsFilePath)
|
||||
{
|
||||
assert(fileExists(weightsFilePath));
|
||||
std::cout << "\nLoading pre-trained weights" << std::endl;
|
||||
@@ -81,13 +81,14 @@ loadWeights(const std::string weightsFilePath, const std::string& modelName)
|
||||
assert(file.good());
|
||||
std::string line;
|
||||
|
||||
if (modelName.find("yolov2") != std::string::npos && modelName.find("yolov2-tiny") == std::string::npos) {
|
||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 4);
|
||||
if (weightsFilePath.find("yolov2") != std::string::npos &&
|
||||
weightsFilePath.find("yolov2-tiny") == std::string::npos) {
|
||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 4);
|
||||
}
|
||||
else {
|
||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 5);
|
||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 5);
|
||||
}
|
||||
|
||||
char floatWeight[4];
|
||||
@@ -105,7 +106,7 @@ loadWeights(const std::string weightsFilePath, const std::string& modelName)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::cout << "Loading weights of " << modelName << " complete" << std::endl;
|
||||
std::cout << "Loading " << weightsFilePath << " complete" << std::endl;
|
||||
std::cout << "Total weights read: " << weights.size() << std::endl;
|
||||
|
||||
return weights;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -41,13 +41,14 @@ float clamp(const float val, const float minVal, const float maxVal);
|
||||
|
||||
bool fileExists(const std::string fileName, bool verbose = true);
|
||||
|
||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& modelName);
|
||||
std::vector<float> loadWeights(const std::string weightsFilePath);
|
||||
|
||||
std::string dimsToString(const nvinfer1::Dims d);
|
||||
|
||||
int getNumChannels(nvinfer1::ITensor* t);
|
||||
|
||||
void printLayerInfo(
|
||||
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr);
|
||||
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput,
|
||||
std::string weightPtr);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -34,13 +34,14 @@
|
||||
|
||||
Yolo::Yolo(const NetworkInfo& networkInfo) : m_InputBlobName(networkInfo.inputBlobName),
|
||||
m_NetworkType(networkInfo.networkType), m_ModelName(networkInfo.modelName),
|
||||
m_OnnxWtsFilePath(networkInfo.onnxWtsFilePath), m_DarknetWtsFilePath(networkInfo.darknetWtsFilePath),
|
||||
m_DarknetCfgFilePath(networkInfo.darknetCfgFilePath), m_BatchSize(networkInfo.batchSize),
|
||||
m_OnnxFilePath(networkInfo.onnxFilePath), m_WtsFilePath(networkInfo.wtsFilePath),
|
||||
m_CfgFilePath(networkInfo.cfgFilePath), m_BatchSize(networkInfo.batchSize),
|
||||
m_ImplicitBatch(networkInfo.implicitBatch), m_Int8CalibPath(networkInfo.int8CalibPath),
|
||||
m_DeviceType(networkInfo.deviceType), m_NumDetectedClasses(networkInfo.numDetectedClasses),
|
||||
m_ClusterMode(networkInfo.clusterMode), m_NetworkMode(networkInfo.networkMode), m_ScaleFactor(networkInfo.scaleFactor),
|
||||
m_Offsets(networkInfo.offsets), m_WorkspaceSize(networkInfo.workspaceSize), m_InputC(0), m_InputH(0), m_InputW(0),
|
||||
m_InputSize(0), m_NumClasses(0), m_LetterBox(0), m_NewCoords(0), m_YoloCount(0)
|
||||
m_ClusterMode(networkInfo.clusterMode), m_NetworkMode(networkInfo.networkMode),
|
||||
m_ScaleFactor(networkInfo.scaleFactor), m_Offsets(networkInfo.offsets), m_WorkspaceSize(networkInfo.workspaceSize),
|
||||
m_InputFormat(networkInfo.inputFormat), m_InputC(0), m_InputH(0), m_InputW(0), m_InputSize(0), m_NumClasses(0),
|
||||
m_LetterBox(0), m_NewCoords(0), m_YoloCount(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -82,8 +83,8 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
parser = nvonnxparser::createParser(*network, logger);
|
||||
#endif
|
||||
|
||||
if (!parser->parseFromFile(m_OnnxWtsFilePath.c_str(), static_cast<INT>(nvinfer1::ILogger::Severity::kWARNING))) {
|
||||
std::cerr << "\nCould not parse the ONNX model\n" << std::endl;
|
||||
if (!parser->parseFromFile(m_OnnxFilePath.c_str(), static_cast<INT>(nvinfer1::ILogger::Severity::kWARNING))) {
|
||||
std::cerr << "\nCould not parse the ONNX file\n" << std::endl;
|
||||
|
||||
#if NV_TENSORRT_MAJOR >= 8
|
||||
delete parser;
|
||||
@@ -101,7 +102,7 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
m_InputW = network->getInput(0)->getDimensions().d[3];
|
||||
}
|
||||
else {
|
||||
m_ConfigBlocks = parseConfigFile(m_DarknetCfgFilePath);
|
||||
m_ConfigBlocks = parseConfigFile(m_CfgFilePath);
|
||||
parseConfigBlocks();
|
||||
if (parseModel(*network) != NVDSINFER_SUCCESS) {
|
||||
|
||||
@@ -138,15 +139,16 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
if (m_NetworkType == "darknet") {
|
||||
if (m_NumClasses != m_NumDetectedClasses) {
|
||||
std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
<< " on the config_infer file\n" << std::endl;
|
||||
}
|
||||
if (m_LetterBox == 1) {
|
||||
std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file"
|
||||
<< " to get better accuracy\n" << std::endl;
|
||||
std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 on the " <<
|
||||
"config_infer file to get better accuracy\n" << std::endl;
|
||||
}
|
||||
}
|
||||
if (m_ClusterMode != 2) {
|
||||
std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file\n" << std::endl;
|
||||
std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 on the config_infer file\n" <<
|
||||
std::endl;
|
||||
}
|
||||
|
||||
if (m_NetworkMode == "FP16") {
|
||||
@@ -156,9 +158,11 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
else if (m_NetworkMode == "INT8") {
|
||||
assert(builder->platformHasFastInt8());
|
||||
config->setFlag(nvinfer1::BuilderFlag::kINT8);
|
||||
if (m_Int8CalibPath != "" && !fileExists(m_Int8CalibPath)) {
|
||||
if (m_Int8CalibPath != "") {
|
||||
|
||||
#ifdef OPENCV
|
||||
fileExists(m_Int8CalibPath);
|
||||
|
||||
std::string calib_image_list;
|
||||
int calib_batch_size;
|
||||
if (getenv("INT8_CALIB_IMG_PATH")) {
|
||||
@@ -176,25 +180,10 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
assert(0);
|
||||
}
|
||||
nvinfer1::IInt8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(calib_batch_size, m_InputC, m_InputH,
|
||||
m_InputW, m_ScaleFactor, m_Offsets, calib_image_list, m_Int8CalibPath);
|
||||
m_InputW, m_ScaleFactor, m_Offsets, m_InputFormat, calib_image_list, m_Int8CalibPath);
|
||||
config->setInt8Calibrator(calibrator);
|
||||
#else
|
||||
std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl;
|
||||
|
||||
#if NV_TENSORRT_MAJOR >= 8
|
||||
if (m_NetworkType == "onnx") {
|
||||
delete parser;
|
||||
}
|
||||
delete network;
|
||||
#else
|
||||
if (m_NetworkType == "onnx") {
|
||||
parser->destroy();
|
||||
}
|
||||
config->destroy();
|
||||
network->destroy();
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
assert(0 && "OpenCV is required to run INT8 calibrator\n");
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -204,7 +193,12 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
|
||||
#endif
|
||||
|
||||
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
|
||||
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(*builder->getLogger());
|
||||
assert(runtime);
|
||||
|
||||
nvinfer1::IHostMemory* serializedEngine = builder->buildSerializedNetwork(*network, *config);
|
||||
|
||||
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size());
|
||||
if (engine) {
|
||||
std::cout << "Building complete\n" << std::endl;
|
||||
}
|
||||
@@ -212,6 +206,12 @@ Yolo::createEngine(nvinfer1::IBuilder* builder)
|
||||
std::cerr << "Building engine failed\n" << std::endl;
|
||||
}
|
||||
|
||||
#if NV_TENSORRT_MAJOR >= 8
|
||||
delete serializedEngine;
|
||||
#else
|
||||
serializedEngine->destroy();
|
||||
#endif
|
||||
|
||||
#ifdef GRAPH
|
||||
nvinfer1::IExecutionContext *context = engine->createExecutionContext();
|
||||
nvinfer1::IEngineInspector *inpector = engine->createEngineInspector();
|
||||
@@ -252,7 +252,7 @@ NvDsInferStatus
|
||||
Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
|
||||
destroyNetworkUtils();
|
||||
|
||||
std::vector<float> weights = loadWeights(m_DarknetWtsFilePath, m_ModelName);
|
||||
std::vector<float> weights = loadWeights(m_WtsFilePath);
|
||||
std::cout << "Building YOLO network\n" << std::endl;
|
||||
NvDsInferStatus status = buildYoloNetwork(weights, network);
|
||||
|
||||
@@ -292,14 +292,15 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "conv" || m_ConfigBlocks.at(i).at("type") == "convolutional") {
|
||||
int channels = getNumChannels(previous);
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network);
|
||||
previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous,
|
||||
&network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation");
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") {
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "deconv" || m_ConfigBlocks.at(i).at("type") == "deconvolutional") {
|
||||
int channels = getNumChannels(previous);
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous,
|
||||
@@ -328,11 +329,13 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
std::string layerName = "implicit";
|
||||
printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels") {
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" ||
|
||||
m_ConfigBlocks.at(i).at("type") == "control_channels") {
|
||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||
if (from > 0)
|
||||
if (from > 0) {
|
||||
from = from - i + 1;
|
||||
}
|
||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||
assert(i + from - 1 < i - 2);
|
||||
@@ -348,41 +351,46 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "shortcut") {
|
||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||
if (from > 0)
|
||||
if (from > 0) {
|
||||
from = from - i + 1;
|
||||
}
|
||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||
assert(i + from - 1 < i - 2);
|
||||
|
||||
std::string activation = "linear";
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) {
|
||||
activation = m_ConfigBlocks.at(i).at("activation");
|
||||
}
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
||||
previous = shortcutLayer(i, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous,
|
||||
tensorOutputs[i + from - 1], &network, m_BatchSize);
|
||||
tensorOutputs[i + from - 1], &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "shortcut_" + activation + ": " + std::to_string(i + from - 1);
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
|
||||
if (inputVol != shortcutVol)
|
||||
if (inputVol != shortcutVol) {
|
||||
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
||||
}
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "sam") {
|
||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||
if (from > 0)
|
||||
if (from > 0) {
|
||||
from = from - i + 1;
|
||||
}
|
||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||
assert(i + from - 1 < i - 2);
|
||||
|
||||
std::string activation = "linear";
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) {
|
||||
activation = m_ConfigBlocks.at(i).at("activation");
|
||||
}
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = samLayer(i, activation, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network);
|
||||
@@ -394,7 +402,7 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "route") {
|
||||
std::string layers;
|
||||
previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network, m_BatchSize);
|
||||
previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
@@ -422,7 +430,7 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg" || m_ConfigBlocks.at(i).at("type") == "reorg3d") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network, m_BatchSize);
|
||||
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
@@ -441,7 +449,7 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo" : "region";
|
||||
std::string layerName = m_ConfigBlocks.at(i).at("type");
|
||||
printLayerInfo(layerIndex, layerName, inputVol, "-", "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "dropout") {
|
||||
@@ -465,27 +473,19 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
outputSize += curYoloTensor.numBBoxes * curYoloTensor.gridSizeY * curYoloTensor.gridSizeX;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors,
|
||||
outputSize);
|
||||
nvinfer1::IPluginV2DynamicExt* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords,
|
||||
m_YoloTensors, outputSize);
|
||||
assert(yoloPlugin != nullptr);
|
||||
nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin);
|
||||
assert(yolo != nullptr);
|
||||
std::string yoloLayerName = "yolo";
|
||||
std::string yoloLayerName = m_WtsFilePath;
|
||||
yolo->setName(yoloLayerName.c_str());
|
||||
|
||||
std::string outputlayerName;
|
||||
nvinfer1::ITensor* detection_boxes = yolo->getOutput(0);
|
||||
outputlayerName = "boxes";
|
||||
detection_boxes->setName(outputlayerName.c_str());
|
||||
nvinfer1::ITensor* detection_scores = yolo->getOutput(1);
|
||||
outputlayerName = "scores";
|
||||
detection_scores->setName(outputlayerName.c_str());
|
||||
nvinfer1::ITensor* detection_classes = yolo->getOutput(2);
|
||||
outputlayerName = "classes";
|
||||
detection_classes->setName(outputlayerName.c_str());
|
||||
network.markOutput(*detection_boxes);
|
||||
network.markOutput(*detection_scores);
|
||||
network.markOutput(*detection_classes);
|
||||
nvinfer1::ITensor* detection_output = yolo->getOutput(0);
|
||||
outputlayerName = "output";
|
||||
detection_output->setName(outputlayerName.c_str());
|
||||
network.markOutput(*detection_output);
|
||||
}
|
||||
else {
|
||||
std::cerr << "\nError in yolo cfg file" << std::endl;
|
||||
@@ -493,8 +493,9 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
}
|
||||
|
||||
std::cout << "\nOutput YOLO blob names: " << std::endl;
|
||||
for (auto& tensor : m_YoloTensors)
|
||||
for (auto& tensor : m_YoloTensors) {
|
||||
std::cout << tensor.blobName << std::endl;
|
||||
}
|
||||
|
||||
int nbLayers = network.getNbLayers();
|
||||
std::cout << "\nTotal number of YOLO layers: " << nbLayers << "\n" << std::endl;
|
||||
@@ -513,8 +514,9 @@ Yolo::parseConfigFile(const std::string cfgFilePath)
|
||||
std::map<std::string, std::string> block;
|
||||
|
||||
while (getline(file, line)) {
|
||||
if (line.size() == 0 || line.front() == ' ' || line.front() == '#')
|
||||
if (line.size() == 0 || line.front() == ' ' || line.front() == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
line = trim(line);
|
||||
if (line.front() == '[') {
|
||||
@@ -543,20 +545,21 @@ Yolo::parseConfigBlocks()
|
||||
{
|
||||
for (auto block : m_ConfigBlocks) {
|
||||
if (block.at("type") == "net") {
|
||||
assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg");
|
||||
assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg");
|
||||
assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
|
||||
assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg");
|
||||
|
||||
m_InputC = std::stoul(block.at("channels"));
|
||||
m_InputH = std::stoul(block.at("height"));
|
||||
m_InputW = std::stoul(block.at("width"));
|
||||
m_InputC = std::stoul(block.at("channels"));
|
||||
|
||||
m_InputSize = m_InputC * m_InputH * m_InputW;
|
||||
|
||||
if (block.find("letter_box") != block.end())
|
||||
if (block.find("letter_box") != block.end()) {
|
||||
m_LetterBox = std::stoul(block.at("letter_box"));
|
||||
}
|
||||
}
|
||||
else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
|
||||
{
|
||||
else if ((block.at("type") == "region") || (block.at("type") == "yolo")) {
|
||||
assert((block.find("num") != block.end()) &&
|
||||
std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
|
||||
assert((block.find("classes") != block.end()) &&
|
||||
@@ -568,8 +571,9 @@ Yolo::parseConfigBlocks()
|
||||
|
||||
m_NumClasses = std::stoul(block.at("classes"));
|
||||
|
||||
if (block.find("new_coords") != block.end())
|
||||
if (block.find("new_coords") != block.end()) {
|
||||
m_NewCoords = std::stoul(block.at("new_coords"));
|
||||
}
|
||||
|
||||
TensorInfo outputTensor;
|
||||
|
||||
@@ -605,12 +609,15 @@ Yolo::parseConfigBlocks()
|
||||
}
|
||||
}
|
||||
|
||||
if (block.find("scale_x_y") != block.end())
|
||||
if (block.find("scale_x_y") != block.end()) {
|
||||
outputTensor.scaleXY = std::stof(block.at("scale_x_y"));
|
||||
else
|
||||
}
|
||||
else {
|
||||
outputTensor.scaleXY = 1.0;
|
||||
}
|
||||
|
||||
outputTensor.numBBoxes = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num")));
|
||||
outputTensor.numBBoxes = outputTensor.mask.size() > 0 ? outputTensor.mask.size() :
|
||||
std::stoul(trim(block.at("num")));
|
||||
|
||||
m_YoloTensors.push_back(outputTensor);
|
||||
}
|
||||
@@ -620,8 +627,10 @@ Yolo::parseConfigBlocks()
|
||||
void
|
||||
Yolo::destroyNetworkUtils()
|
||||
{
|
||||
for (uint i = 0; i < m_TrtWeights.size(); ++i)
|
||||
if (m_TrtWeights[i].count > 0)
|
||||
for (uint i = 0; i < m_TrtWeights.size(); ++i) {
|
||||
if (m_TrtWeights[i].count > 0) {
|
||||
free(const_cast<void*>(m_TrtWeights[i].values));
|
||||
}
|
||||
}
|
||||
m_TrtWeights.clear();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -61,9 +61,9 @@ struct NetworkInfo
|
||||
std::string inputBlobName;
|
||||
std::string networkType;
|
||||
std::string modelName;
|
||||
std::string onnxWtsFilePath;
|
||||
std::string darknetWtsFilePath;
|
||||
std::string darknetCfgFilePath;
|
||||
std::string onnxFilePath;
|
||||
std::string wtsFilePath;
|
||||
std::string cfgFilePath;
|
||||
uint batchSize;
|
||||
int implicitBatch;
|
||||
std::string int8CalibPath;
|
||||
@@ -74,6 +74,7 @@ struct NetworkInfo
|
||||
float scaleFactor;
|
||||
const float* offsets;
|
||||
uint workspaceSize;
|
||||
int inputFormat;
|
||||
};
|
||||
|
||||
struct TensorInfo
|
||||
@@ -96,8 +97,7 @@ class Yolo : public IModelParser {
|
||||
bool hasFullDimsSupported() const override { return false; }
|
||||
|
||||
const char* getModelName() const override {
|
||||
return m_NetworkType == "onnx" ? m_OnnxWtsFilePath.substr(0, m_OnnxWtsFilePath.find(".onnx")).c_str() :
|
||||
m_DarknetCfgFilePath.substr(0, m_DarknetCfgFilePath.find(".cfg")).c_str();
|
||||
return m_ModelName.c_str();
|
||||
}
|
||||
|
||||
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
|
||||
@@ -112,9 +112,9 @@ class Yolo : public IModelParser {
|
||||
const std::string m_InputBlobName;
|
||||
const std::string m_NetworkType;
|
||||
const std::string m_ModelName;
|
||||
const std::string m_OnnxWtsFilePath;
|
||||
const std::string m_DarknetWtsFilePath;
|
||||
const std::string m_DarknetCfgFilePath;
|
||||
const std::string m_OnnxFilePath;
|
||||
const std::string m_WtsFilePath;
|
||||
const std::string m_CfgFilePath;
|
||||
const uint m_BatchSize;
|
||||
const int m_ImplicitBatch;
|
||||
const std::string m_Int8CalibPath;
|
||||
@@ -125,6 +125,7 @@ class Yolo : public IModelParser {
|
||||
const float m_ScaleFactor;
|
||||
const float* m_Offsets;
|
||||
const uint m_WorkspaceSize;
|
||||
const int m_InputFormat;
|
||||
|
||||
uint m_InputC;
|
||||
uint m_InputH;
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, float* classes, const uint netWidth,
|
||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
__global__ void gpuYoloLayer(const float* input, float* output, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@@ -50,22 +50,22 @@ __global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, fl
|
||||
|
||||
int count = numGridCells * z_id + bbindex + lastInputSize;
|
||||
|
||||
boxes[count * 4 + 0] = xc;
|
||||
boxes[count * 4 + 1] = yc;
|
||||
boxes[count * 4 + 2] = w;
|
||||
boxes[count * 4 + 3] = h;
|
||||
scores[count] = maxProb * objectness;
|
||||
classes[count] = (float) maxIndex;
|
||||
output[count * 6 + 0] = xc - w * 0.5;
|
||||
output[count * 6 + 1] = yc - h * 0.5;
|
||||
output[count * 6 + 2] = xc + w * 0.5;
|
||||
output[count * 6 + 3] = yc + h * 0.5;
|
||||
output[count * 6 + 4] = maxProb * objectness;
|
||||
output[count * 6 + 5] = (float) maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
@@ -75,9 +75,7 @@ cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* cl
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||
}
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, float* classes, const uint netWidth,
|
||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
__global__ void gpuYoloLayer_nc(const float* input, float* output, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@@ -29,9 +29,11 @@ __global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores,
|
||||
float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight /
|
||||
gridSizeY;
|
||||
|
||||
float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2];
|
||||
float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) *
|
||||
anchors[mask[z_id] * 2];
|
||||
|
||||
float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * anchors[mask[z_id] * 2 + 1];
|
||||
float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) *
|
||||
anchors[mask[z_id] * 2 + 1];
|
||||
|
||||
const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
||||
|
||||
@@ -48,22 +50,22 @@ __global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores,
|
||||
|
||||
int count = numGridCells * z_id + bbindex + lastInputSize;
|
||||
|
||||
boxes[count * 4 + 0] = xc;
|
||||
boxes[count * 4 + 1] = yc;
|
||||
boxes[count * 4 + 2] = w;
|
||||
boxes[count * 4 + 3] = h;
|
||||
scores[count] = maxProb * objectness;
|
||||
classes[count] = (float) maxIndex;
|
||||
output[count * 6 + 0] = xc - w * 0.5;
|
||||
output[count * 6 + 1] = yc - h * 0.5;
|
||||
output[count * 6 + 2] = xc + w * 0.5;
|
||||
output[count * 6 + 3] = yc + h * 0.5;
|
||||
output[count * 6 + 4] = maxProb * objectness;
|
||||
output[count * 6 + 5] = (float) maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
@@ -73,9 +75,7 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void*
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||
}
|
||||
|
||||
@@ -27,9 +27,9 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, float* classes,
|
||||
const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
||||
const uint numBBoxes, const uint64_t lastInputSize, const float* anchors)
|
||||
__global__ void gpuRegionLayer(const float* input, float* softmax, float* output, const uint netWidth,
|
||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
const uint64_t lastInputSize, const float* anchors)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -42,15 +42,17 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes,
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX;
|
||||
float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth /
|
||||
gridSizeX;
|
||||
|
||||
float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY;
|
||||
float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight /
|
||||
gridSizeY;
|
||||
|
||||
float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth /
|
||||
gridSizeX;
|
||||
|
||||
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] * netHeight /
|
||||
gridSizeY;
|
||||
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] *
|
||||
netHeight / gridSizeY;
|
||||
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
@@ -69,22 +71,22 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes,
|
||||
|
||||
int count = numGridCells * z_id + bbindex + lastInputSize;
|
||||
|
||||
boxes[count * 4 + 0] = xc;
|
||||
boxes[count * 4 + 1] = yc;
|
||||
boxes[count * 4 + 2] = w;
|
||||
boxes[count * 4 + 3] = h;
|
||||
scores[count] = maxProb * objectness;
|
||||
classes[count] = (float) maxIndex;
|
||||
output[count * 6 + 0] = xc - w * 0.5;
|
||||
output[count * 6 + 1] = yc - h * 0.5;
|
||||
output[count * 6 + 2] = xc + w * 0.5;
|
||||
output[count * 6 + 3] = yc + h * 0.5;
|
||||
output[count * 6 + 4] = maxProb * objectness;
|
||||
output[count * 6 + 5] = (float) maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
|
||||
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
|
||||
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
@@ -95,9 +97,7 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void*
|
||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize,
|
||||
reinterpret_cast<const float*> (anchors));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -38,19 +38,19 @@ namespace {
|
||||
}
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
|
||||
const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
|
||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
|
||||
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize,
|
||||
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
YoloLayer::YoloLayer(const void* data, size_t length) {
|
||||
@@ -98,6 +98,8 @@ YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& nu
|
||||
{
|
||||
assert(m_NetWidth > 0);
|
||||
assert(m_NetHeight > 0);
|
||||
assert(m_NumClasses > 0);
|
||||
assert(m_OutputSize > 0);
|
||||
};
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt*
|
||||
@@ -155,13 +157,15 @@ YoloLayer::serialize(void* buffer) const noexcept
|
||||
|
||||
uint anchorsSize = curYoloTensor.anchors.size();
|
||||
write(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j)
|
||||
for (uint j = 0; j < anchorsSize; ++j) {
|
||||
write(d, curYoloTensor.anchors[j]);
|
||||
}
|
||||
|
||||
uint maskSize = curYoloTensor.mask.size();
|
||||
write(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j)
|
||||
for (uint j = 0; j < maskSize; ++j) {
|
||||
write(d, curYoloTensor.mask[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,17 +173,14 @@ nvinfer1::DimsExprs
|
||||
YoloLayer::getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims,
|
||||
nvinfer1::IExprBuilder& exprBuilder)noexcept
|
||||
{
|
||||
assert(index < 3);
|
||||
if (index == 0) {
|
||||
return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast<int>(m_OutputSize)),
|
||||
exprBuilder.constant(4)}};
|
||||
}
|
||||
assert(index < 1);
|
||||
return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast<int>(m_OutputSize)),
|
||||
exprBuilder.constant(1)}};
|
||||
exprBuilder.constant(6)}};
|
||||
}
|
||||
|
||||
bool
|
||||
YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
|
||||
YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs)
|
||||
noexcept
|
||||
{
|
||||
return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && inOut[pos].type == nvinfer1::DataType::kFLOAT;
|
||||
}
|
||||
@@ -187,7 +188,7 @@ YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc*
|
||||
nvinfer1::DataType
|
||||
YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
|
||||
{
|
||||
assert(index < 3);
|
||||
assert(index < 1);
|
||||
return nvinfer1::DataType::kFLOAT;
|
||||
}
|
||||
|
||||
@@ -206,10 +207,6 @@ YoloLayer::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::
|
||||
{
|
||||
INT batchSize = inputDesc[0].dims.d[0];
|
||||
|
||||
void* boxes = outputs[0];
|
||||
void* scores = outputs[1];
|
||||
void* classes = outputs[2];
|
||||
|
||||
uint64_t lastInputSize = 0;
|
||||
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
@@ -223,45 +220,47 @@ YoloLayer::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::
|
||||
const std::vector<float> anchors = curYoloTensor.anchors;
|
||||
const std::vector<int> mask = curYoloTensor.mask;
|
||||
|
||||
void* v_anchors;
|
||||
void* v_mask;
|
||||
void* d_anchors;
|
||||
void* d_mask;
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
|
||||
CUDA_CHECK(cudaMalloc(&d_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(d_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
CUDA_CHECK(cudaMalloc(&d_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(d_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
const uint64_t inputSize = (numBBoxes * (4 + 1 + m_NumClasses)) * gridSizeY * gridSizeX;
|
||||
|
||||
if (mask.size() > 0) {
|
||||
if (m_NewCoords) {
|
||||
CUDA_CHECK(cudaYoloLayer_nc(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize,
|
||||
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
CUDA_CHECK(cudaYoloLayer_nc(inputs[i], outputs[0], batchSize, inputSize, m_OutputSize, lastInputSize,
|
||||
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, d_anchors, d_mask,
|
||||
stream));
|
||||
}
|
||||
else {
|
||||
CUDA_CHECK(cudaYoloLayer(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize,
|
||||
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
CUDA_CHECK(cudaYoloLayer(inputs[i], outputs[0], batchSize, inputSize, m_OutputSize, lastInputSize, m_NetWidth,
|
||||
m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, d_anchors, d_mask, stream));
|
||||
}
|
||||
}
|
||||
else {
|
||||
void* softmax;
|
||||
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*) softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
|
||||
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, boxes, scores, classes, batchSize, inputSize, m_OutputSize,
|
||||
lastInputSize, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream));
|
||||
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, outputs[0], batchSize, inputSize, m_OutputSize, lastInputSize,
|
||||
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, d_anchors, stream));
|
||||
|
||||
CUDA_CHECK(cudaFree(softmax));
|
||||
}
|
||||
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_anchors));
|
||||
CUDA_CHECK(cudaFree(d_anchors));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_mask));
|
||||
CUDA_CHECK(cudaFree(d_mask));
|
||||
}
|
||||
|
||||
lastInputSize += numBBoxes * gridSizeY * gridSizeX;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -30,12 +30,12 @@
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \
|
||||
std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << \
|
||||
__LINE__ << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -62,7 +62,7 @@ class YoloLayer : public nvinfer1::IPluginV2DynamicExt {
|
||||
|
||||
void serialize(void* buffer) const noexcept override;
|
||||
|
||||
int getNbOutputs() const noexcept override { return 3; }
|
||||
int getNbOutputs() const noexcept override { return 1; }
|
||||
|
||||
nvinfer1::DimsExprs getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims,
|
||||
nvinfer1::IExprBuilder& exprBuilder) noexcept override;
|
||||
@@ -70,8 +70,8 @@ class YoloLayer : public nvinfer1::IPluginV2DynamicExt {
|
||||
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, INT nbInputs,
|
||||
const nvinfer1::PluginTensorDesc* outputs, INT nbOutputs) const noexcept override { return 0; }
|
||||
|
||||
bool supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
|
||||
override;
|
||||
bool supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs)
|
||||
noexcept override;
|
||||
|
||||
const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
|
||||
@@ -84,8 +84,8 @@ class YoloLayer : public nvinfer1::IPluginV2DynamicExt {
|
||||
nvinfer1::DataType getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
|
||||
override;
|
||||
|
||||
void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator)
|
||||
noexcept override {}
|
||||
void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext,
|
||||
nvinfer1::IGpuAllocator* gpuAllocator) noexcept override {}
|
||||
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, INT nbInput,
|
||||
const nvinfer1::DynamicPluginTensorDesc* out, INT nbOutput) noexcept override;
|
||||
@@ -126,8 +126,8 @@ class YoloLayerPluginCreator : public nvinfer1::IPluginCreator {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept
|
||||
override {
|
||||
nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength)
|
||||
noexcept override {
|
||||
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
|
||||
return new YoloLayer(serialData, serialLength);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user