Big update
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
@@ -26,41 +26,21 @@ batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vec
|
||||
std::vector<float> bnRunningMean;
|
||||
std::vector<float> bnRunningVar;
|
||||
|
||||
if (weightsType == "weights") {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
|
||||
int size = filters;
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "c2f_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "convolutional_layer.h"
|
||||
|
||||
nvinfer1::ITensor*
|
||||
c2fLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "c2f");
|
||||
assert(block.find("n") != block.end());
|
||||
assert(block.find("shortcut") != block.end());
|
||||
assert(block.find("filters") != block.end());
|
||||
|
||||
int n = std::stoi(block.at("n"));
|
||||
bool shortcut = (block.at("shortcut") == "1");
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLt = network->addSlice(*input,nvinfer1::Dims{3, {0, 0, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(sliceLt != nullptr);
|
||||
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||
sliceLt->setName(sliceLtLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRb = network->addSlice(*input,nvinfer1::Dims{3, {inputDims.d[0] / 2, 0, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(sliceRb != nullptr);
|
||||
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||
sliceRb->setName(sliceRbLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(lt);
|
||||
concatInputs.push_back(rb);
|
||||
output = rb;
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::string cv1MlayerName = "c2f_1_" + std::to_string(i + 1) + "_";
|
||||
nvinfer1::ITensor* cv1M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps,
|
||||
output, network, cv1MlayerName);
|
||||
assert(cv1M != nullptr);
|
||||
|
||||
std::string cv2MlayerName = "c2f_2_" + std::to_string(i + 1) + "_";
|
||||
nvinfer1::ITensor* cv2M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps,
|
||||
cv1M, network, cv2MlayerName);
|
||||
assert(cv2M != nullptr);
|
||||
|
||||
if (shortcut) {
|
||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(*output, *cv2M, nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(ew != nullptr);
|
||||
std::string ewLayerName = "shortcut_c2f_" + std::to_string(i + 1) + "_" + std::to_string(layerIdx);
|
||||
ew->setName(ewLayerName.c_str());
|
||||
output = ew->getOutput(0);
|
||||
concatInputs.push_back(output);
|
||||
}
|
||||
else {
|
||||
output = cv2M;
|
||||
concatInputs.push_back(output);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "route_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __C2F_LAYER_H__
|
||||
#define __C2F_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* c2fLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "cls_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
clsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "cls");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __CLS_LAYER_H__
|
||||
#define __CLS_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* clsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -10,8 +10,8 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName)
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
@@ -58,117 +58,60 @@ convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std:
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
|
||||
if (weightsType == "weights") {
|
||||
if (batchNormalize == false) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (batchNormalize == false) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
}
|
||||
else {
|
||||
if (batchNormalize == false) {
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
convBias.values = val;
|
||||
}
|
||||
else {
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}},
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
|
||||
nvinfer1::ITensor*
|
||||
deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName)
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
@@ -47,43 +47,23 @@ deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, st
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
|
||||
if (weightsType == "weights") {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
else {
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
|
||||
nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters,
|
||||
nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,196 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "detect_v8_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
detectV8Layer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "detect_v8");
|
||||
assert(block.find("num") != block.end());
|
||||
assert(block.find("classes") != block.end());
|
||||
|
||||
int num = std::stoi(block.at("num"));
|
||||
int classes = std::stoi(block.at("classes"));
|
||||
int reg_max = num / 4;
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceBox = network->addSlice(*input, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {num, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceBox != nullptr);
|
||||
std::string sliceBoxLayerName = "slice_box_" + std::to_string(layerIdx);
|
||||
sliceBox->setName(sliceBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* box = sliceBox->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceCls = network->addSlice(*input, nvinfer1::Dims{2, {num, 0}},
|
||||
nvinfer1::Dims{2, {classes, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceCls != nullptr);
|
||||
std::string sliceClsLayerName = "slice_cls_" + std::to_string(layerIdx);
|
||||
sliceCls->setName(sliceClsLayerName.c_str());
|
||||
nvinfer1::ITensor* cls = sliceCls->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle1Box = network->addShuffle(*box);
|
||||
assert(shuffle1Box != nullptr);
|
||||
std::string shuffle1BoxLayerName = "shuffle1_box_" + std::to_string(layerIdx);
|
||||
shuffle1Box->setName(shuffle1BoxLayerName.c_str());
|
||||
nvinfer1::Dims reshape1Dims = {3, {4, reg_max, inputDims.d[1]}};
|
||||
shuffle1Box->setReshapeDimensions(reshape1Dims);
|
||||
nvinfer1::Permutation permutation1Box;
|
||||
permutation1Box.order[0] = 1;
|
||||
permutation1Box.order[1] = 0;
|
||||
permutation1Box.order[2] = 2;
|
||||
shuffle1Box->setSecondTranspose(permutation1Box);
|
||||
box = shuffle1Box->getOutput(0);
|
||||
|
||||
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*box);
|
||||
assert(softmax != nullptr);
|
||||
std::string softmaxLayerName = "softmax_box_" + std::to_string(layerIdx);
|
||||
softmax->setName(softmaxLayerName.c_str());
|
||||
softmax->setAxes(1 << 0);
|
||||
box = softmax->getOutput(0);
|
||||
|
||||
nvinfer1::Weights dflWt {nvinfer1::DataType::kFLOAT, nullptr, reg_max};
|
||||
|
||||
float* val = new float[reg_max];
|
||||
for (int i = 0; i < reg_max; ++i) {
|
||||
val[i] = i;
|
||||
}
|
||||
dflWt.values = val;
|
||||
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*box, 1, nvinfer1::Dims{2, {1, 1}}, dflWt,
|
||||
nvinfer1::Weights{});
|
||||
assert(conv != nullptr);
|
||||
std::string convLayerName = "conv_box_" + std::to_string(layerIdx);
|
||||
conv->setName(convLayerName.c_str());
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {1, 1}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {0, 0}});
|
||||
box = conv->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle2Box = network->addShuffle(*box);
|
||||
assert(shuffle2Box != nullptr);
|
||||
std::string shuffle2BoxLayerName = "shuffle2_box_" + std::to_string(layerIdx);
|
||||
shuffle2Box->setName(shuffle2BoxLayerName.c_str());
|
||||
nvinfer1::Dims reshape2Dims = {2, {4, inputDims.d[1]}};
|
||||
shuffle2Box->setReshapeDimensions(reshape2Dims);
|
||||
box = shuffle2Box->getOutput(0);
|
||||
|
||||
nvinfer1::Dims shuffle2BoxDims = box->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLtBox = network->addSlice(*box, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceLtBox != nullptr);
|
||||
std::string sliceLtBoxLayerName = "slice_lt_box_" + std::to_string(layerIdx);
|
||||
sliceLtBox->setName(sliceLtBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLtBox->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRbBox = network->addSlice(*box, nvinfer1::Dims{2, {2, 0}},
|
||||
nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceRbBox != nullptr);
|
||||
std::string sliceRbBoxLayerName = "slice_rb_box_" + std::to_string(layerIdx);
|
||||
sliceRbBox->setName(sliceRbBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRbBox->getOutput(0);
|
||||
|
||||
int channels = 2 * shuffle2BoxDims.d[1];
|
||||
nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
anchorPointsWt.values = val;
|
||||
trtWeights.push_back(anchorPointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}},
|
||||
anchorPointsWt);
|
||||
assert(anchorPoints != nullptr);
|
||||
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt,
|
||||
nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(x1y1 != nullptr);
|
||||
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||
x1y1->setName(x1y1LayerName.c_str());
|
||||
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(x2y2 != nullptr);
|
||||
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||
x2y2->setName(x2y2LayerName.c_str());
|
||||
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatBoxInputs;
|
||||
concatBoxInputs.push_back(x1y1Tensor);
|
||||
concatBoxInputs.push_back(x2y2Tensor);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concatBox = network->addConcatenation(concatBoxInputs.data(), concatBoxInputs.size());
|
||||
assert(concatBox != nullptr);
|
||||
std::string concatBoxLayerName = "concat_box_" + std::to_string(layerIdx);
|
||||
concatBox->setName(concatBoxLayerName.c_str());
|
||||
concatBox->setAxis(0);
|
||||
box = concatBox->getOutput(0);
|
||||
|
||||
channels = shuffle2BoxDims.d[1];
|
||||
nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
stridePointsWt.values = val;
|
||||
trtWeights.push_back(stridePointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {1, shuffle2BoxDims.d[1]}},
|
||||
stridePointsWt);
|
||||
assert(stridePoints != nullptr);
|
||||
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||
stridePoints->setName(stridePointsLayerName.c_str());
|
||||
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* pred = network->addElementWise(*box, *stridePointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(pred != nullptr);
|
||||
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||
pred->setName(predLayerName.c_str());
|
||||
box = pred->getOutput(0);
|
||||
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*cls, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_cls_" + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
cls = sigmoid->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(box);
|
||||
concatInputs.push_back(cls);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*output);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __DETECT_V8_LAYER_H__
|
||||
#define __DETECT_V8_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* detectV8Layer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -13,7 +13,7 @@ implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vect
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
|
||||
assert(block.at("type") == "implicit" || block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
|
||||
assert(block.find("filters") != block.end());
|
||||
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
|
||||
@@ -14,9 +14,10 @@ poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "maxpool" || block.at("type") == "avgpool");
|
||||
assert(block.at("type") == "max" || block.at("type") == "maxpool" || block.at("type") == "avg" ||
|
||||
block.at("type") == "avgpool");
|
||||
|
||||
if (block.at("type") == "maxpool") {
|
||||
if (block.at("type") == "max" || block.at("type") == "maxpool") {
|
||||
assert(block.find("size") != block.end());
|
||||
assert(block.find("stride") != block.end());
|
||||
|
||||
@@ -36,7 +37,7 @@ poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::
|
||||
}
|
||||
output = maxpool->getOutput(0);
|
||||
}
|
||||
else if (block.at("type") == "avgpool") {
|
||||
else if (block.at("type") == "avg" || block.at("type") == "avgpool") {
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE,
|
||||
nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "reduce_layer.h"
|
||||
|
||||
nvinfer1::ITensor*
|
||||
reduceLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reduce");
|
||||
assert(block.find("mode") != block.end());
|
||||
assert(block.find("axes") != block.end());
|
||||
|
||||
std::string mode = block.at("mode");
|
||||
|
||||
nvinfer1::ReduceOperation operation;
|
||||
if (mode == "mean")
|
||||
operation = nvinfer1::ReduceOperation::kAVG;
|
||||
|
||||
std::string strAxes = block.at("axes");
|
||||
std::vector<int32_t> axes;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strAxes.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos)));
|
||||
axes.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strAxes.length()) {
|
||||
std::string lastV = trim(strAxes.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
axes.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!axes.empty());
|
||||
|
||||
uint32_t axisMask = 0;
|
||||
for (int axis : axes)
|
||||
axisMask |= 1 << axis;
|
||||
|
||||
bool keepDims = false;
|
||||
if (block.find("keep") != block.end())
|
||||
keepDims = std::stoi(block.at("keep")) == 1 ? true : false;
|
||||
|
||||
nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims);
|
||||
assert(reduce != nullptr);
|
||||
std::string reduceLayerName = "reduce_" + std::to_string(layerIdx);
|
||||
reduce->setName(reduceLayerName.c_str());
|
||||
output = reduce->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __REDUCE_LAYER_H__
|
||||
#define __REDUCE_LAYER_H__
|
||||
|
||||
#include "../utils.h"
|
||||
|
||||
nvinfer1::ITensor* reduceLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -1,109 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "reg_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
regLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reg");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
nvinfer1::Dims shuffleDims = output->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLt = network->addSlice(*output, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceLt != nullptr);
|
||||
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||
sliceLt->setName(sliceLtLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRb = network->addSlice(*output, nvinfer1::Dims{2, {0, 2}},
|
||||
nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceRb != nullptr);
|
||||
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||
sliceRb->setName(sliceRbLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||
|
||||
int channels = shuffleDims.d[0] * 2;
|
||||
nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
float* val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
anchorPointsWt.values = val;
|
||||
trtWeights.push_back(anchorPointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt);
|
||||
assert(anchorPoints != nullptr);
|
||||
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt,
|
||||
nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(x1y1 != nullptr);
|
||||
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||
x1y1->setName(x1y1LayerName.c_str());
|
||||
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(x2y2 != nullptr);
|
||||
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||
x2y2->setName(x2y2LayerName.c_str());
|
||||
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(x1y1Tensor);
|
||||
concatInputs.push_back(x2y2Tensor);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(1);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
channels = shuffleDims.d[0];
|
||||
nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
stridePointsWt.values = val;
|
||||
trtWeights.push_back(stridePointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt);
|
||||
assert(stridePoints != nullptr);
|
||||
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||
stridePoints->setName(stridePointsLayerName.c_str());
|
||||
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* pred = network->addElementWise(*output, *stridePointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(pred != nullptr);
|
||||
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||
pred->setName(predLayerName.c_str());
|
||||
output = pred->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __REG_LAYER_H__
|
||||
#define __REG_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* regLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -14,7 +14,7 @@ reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::IT
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reorg");
|
||||
assert(block.at("type") == "reorg3d");
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
|
||||
28
nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp
Normal file
28
nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "sam_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
samLayer(int layerIdx, std::string activation, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "sam");
|
||||
|
||||
nvinfer1::IElementWiseLayer* sam = network->addElementWise(*input, *samInput, nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(sam != nullptr);
|
||||
std::string samLayerName = "sam_" + std::to_string(layerIdx);
|
||||
sam->setName(samLayerName.c_str());
|
||||
output = sam->getOutput(0);
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
|
||||
return output;
|
||||
}
|
||||
18
nvdsinfer_custom_impl_Yolo/layers/sam_layer.h
Normal file
18
nvdsinfer_custom_impl_Yolo/layers/sam_layer.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __SAM_LAYER_H__
|
||||
#define __SAM_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* samLayer(int layerIdx, std::string activation, std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input, nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
@@ -16,12 +16,7 @@ shortcutLayer(int layerIdx, std::string mode, std::string activation, std::strin
|
||||
|
||||
assert(block.at("type") == "shortcut");
|
||||
|
||||
nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM;
|
||||
|
||||
if (mode == "mul")
|
||||
operation = nvinfer1::ElementWiseOperation::kPROD;
|
||||
|
||||
if (mode == "add" && inputVol != shortcutVol) {
|
||||
if (inputVol != shortcutVol) {
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(),
|
||||
nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(slice != nullptr);
|
||||
@@ -32,7 +27,7 @@ shortcutLayer(int layerIdx, std::string mode, std::string activation, std::strin
|
||||
else
|
||||
output = shortcutInput;
|
||||
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation);
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(shortcut != nullptr);
|
||||
std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
|
||||
shortcut->setName(shortcutLayerName.c_str());
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol,
|
||||
std::string shortcutVol, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* shortcut, nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "shuffle_layer.h"
|
||||
|
||||
nvinfer1::ITensor*
|
||||
shuffleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "shuffle");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
|
||||
if (block.find("reshape") != block.end()) {
|
||||
nvinfer1::Dims inputTensorDims = input->getDimensions();
|
||||
|
||||
std::string strReshape = block.at("reshape");
|
||||
std::vector<int32_t> reshape;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strReshape.find(',', lastPos)) != std::string::npos) {
|
||||
std::string V = trim(strReshape.substr(lastPos, pos - lastPos));
|
||||
if (V == "c")
|
||||
reshape.push_back(inputTensorDims.d[0]);
|
||||
else if (V == "ch")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]);
|
||||
else if (V == "cw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]);
|
||||
else if (V == "h")
|
||||
reshape.push_back(inputTensorDims.d[1]);
|
||||
else if (V == "hw")
|
||||
reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else if (V == "w")
|
||||
reshape.push_back(inputTensorDims.d[2]);
|
||||
else if (V == "chw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else
|
||||
reshape.push_back(std::stoi(V));
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strReshape.length()) {
|
||||
std::string lastV = trim(strReshape.substr(lastPos));
|
||||
if (!lastV.empty()) {
|
||||
if (lastV == "c")
|
||||
reshape.push_back(inputTensorDims.d[0]);
|
||||
else if (lastV == "ch")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]);
|
||||
else if (lastV == "cw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]);
|
||||
else if (lastV == "h")
|
||||
reshape.push_back(inputTensorDims.d[1]);
|
||||
else if (lastV == "hw")
|
||||
reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else if (lastV == "w")
|
||||
reshape.push_back(inputTensorDims.d[2]);
|
||||
else if (lastV == "chw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else
|
||||
reshape.push_back(std::stoi(lastV));
|
||||
}
|
||||
}
|
||||
assert(!reshape.empty());
|
||||
|
||||
nvinfer1::Dims reshapeDims;
|
||||
reshapeDims.nbDims = reshape.size();
|
||||
|
||||
for (uint i = 0; i < reshape.size(); ++i)
|
||||
reshapeDims.d[i] = reshape[i];
|
||||
|
||||
shuffle->setReshapeDimensions(reshapeDims);
|
||||
}
|
||||
|
||||
if (block.find("transpose1") != block.end()) {
|
||||
std::string strTranspose1 = block.at("transpose1");
|
||||
std::vector<int32_t> transpose1;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos)));
|
||||
transpose1.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose1.length()) {
|
||||
std::string lastV = trim(strTranspose1.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose1.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose1.empty());
|
||||
|
||||
nvinfer1::Permutation permutation1;
|
||||
for (uint i = 0; i < transpose1.size(); ++i)
|
||||
permutation1.order[i] = transpose1[i];
|
||||
|
||||
shuffle->setFirstTranspose(permutation1);
|
||||
}
|
||||
|
||||
if (block.find("transpose2") != block.end()) {
|
||||
std::string strTranspose2 = block.at("transpose2");
|
||||
std::vector<int32_t> transpose2;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos)));
|
||||
transpose2.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose2.length()) {
|
||||
std::string lastV = trim(strTranspose2.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose2.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose2.empty());
|
||||
|
||||
nvinfer1::Permutation permutation2;
|
||||
for (uint i = 0; i < transpose2.size(); ++i)
|
||||
permutation2.order[i] = transpose2[i];
|
||||
|
||||
shuffle->setSecondTranspose(permutation2);
|
||||
}
|
||||
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __SHUFFLE_LAYER_H__
|
||||
#define __SHUFFLE_LAYER_H__
|
||||
|
||||
#include "../utils.h"
|
||||
|
||||
nvinfer1::ITensor* shuffleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "softmax_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
softmaxLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "softmax");
|
||||
assert(block.find("axes") != block.end());
|
||||
|
||||
int axes = std::stoi(block.at("axes"));
|
||||
|
||||
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input);
|
||||
assert(softmax != nullptr);
|
||||
std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx);
|
||||
softmax->setName(softmaxLayerName.c_str());
|
||||
softmax->setAxes(1 << axes);
|
||||
output = softmax->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __SOFTMAX_LAYER_H__
|
||||
#define __SOFTMAX_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* softmaxLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -44,7 +44,7 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i
|
||||
|
||||
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
|
||||
|
||||
networkInfo.inputBlobName = "data";
|
||||
networkInfo.inputBlobName = "input";
|
||||
networkInfo.networkType = yoloType;
|
||||
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
|
||||
networkInfo.wtsFilePath = initParams->modelFilePath;
|
||||
@@ -52,7 +52,6 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i
|
||||
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
|
||||
networkInfo.numDetectedClasses = initParams->numDetectedClasses;
|
||||
networkInfo.clusterMode = initParams->clusterMode;
|
||||
networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold;
|
||||
|
||||
if (initParams->networkMode == 0)
|
||||
networkInfo.networkMode = "FP32";
|
||||
|
||||
@@ -26,12 +26,15 @@
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "yoloPlugins.h"
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
static NvDsInferParseObjectInfo
|
||||
convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH)
|
||||
{
|
||||
@@ -60,7 +63,9 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
|
||||
const int maxIndex, const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
|
||||
{
|
||||
NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
|
||||
if (bbi.width < 1 || bbi.height < 1) return;
|
||||
|
||||
if (bbi.width < 1 || bbi.height < 1)
|
||||
return;
|
||||
|
||||
bbi.detectionConfidence = maxProb;
|
||||
bbi.classId = maxIndex;
|
||||
@@ -68,23 +73,55 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
|
||||
}
|
||||
|
||||
static std::vector<NvDsInferParseObjectInfo>
|
||||
decodeYoloTensor(const int* counts, const float* boxes, const float* scores, const int* classes, const uint& netW,
|
||||
const uint& netH)
|
||||
decodeTensorYolo(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH,
|
||||
const std::vector<float>& preclusterThreshold)
|
||||
{
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
|
||||
uint numBoxes = counts[0];
|
||||
for (uint b = 0; b < numBoxes; ++b) {
|
||||
float bx1 = boxes[b * 4 + 0];
|
||||
float by1 = boxes[b * 4 + 1];
|
||||
float bx2 = boxes[b * 4 + 2];
|
||||
float by2 = boxes[b * 4 + 3];
|
||||
for (uint b = 0; b < outputSize; ++b) {
|
||||
float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6];
|
||||
int maxIndex = (int) detection[b * count + 5];
|
||||
|
||||
float maxProb = scores[b];
|
||||
int maxIndex = classes[b];
|
||||
if (maxProb < preclusterThreshold[maxIndex])
|
||||
continue;
|
||||
|
||||
float bxc = detection[b * count + 0];
|
||||
float byc = detection[b * count + 1];
|
||||
float bw = detection[b * count + 2];
|
||||
float bh = detection[b * count + 3];
|
||||
|
||||
float bx1 = bxc - bw / 2;
|
||||
float by1 = byc - bh / 2;
|
||||
float bx2 = bx1 + bw;
|
||||
float by2 = by1 + bh;
|
||||
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
|
||||
return binfo;
|
||||
}
|
||||
|
||||
static std::vector<NvDsInferParseObjectInfo>
|
||||
decodeTensorYoloE(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH,
|
||||
const std::vector<float>& preclusterThreshold)
|
||||
{
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
|
||||
for (uint b = 0; b < outputSize; ++b) {
|
||||
float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6];
|
||||
int maxIndex = (int) detection[b * count + 5];
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex])
|
||||
continue;
|
||||
|
||||
float bx1 = detection[b * count + 0];
|
||||
float by1 = detection[b * count + 1];
|
||||
float bx2 = detection[b * count + 2];
|
||||
float by2 = detection[b * count + 3];
|
||||
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
|
||||
return binfo;
|
||||
}
|
||||
|
||||
@@ -99,14 +136,39 @@ NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo& counts = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[2];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[3];
|
||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeYoloTensor((const int*) (counts.buffer),
|
||||
(const float*) (boxes.buffer), (const float*) (scores.buffer), (const int*) (classes.buffer), networkInfo.width,
|
||||
networkInfo.height);
|
||||
const uint outputSize = layer.inferDims.d[0];
|
||||
const uint count = layer.inferDims.d[1];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (layer.buffer), outputSize, count,
|
||||
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
objectList = objects;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustomYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||
|
||||
const uint outputSize = layer.inferDims.d[0];
|
||||
const uint count = layer.inferDims.d[1];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (layer.buffer), outputSize, count,
|
||||
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
@@ -122,4 +184,11 @@ NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDs
|
||||
return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustomYoloE(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
|
||||
|
||||
@@ -1,530 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Edited by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "yoloPlugins.h"
|
||||
|
||||
__global__ void decodeTensor_YOLO_ONNX(NvDsInferParseObjectInfo *binfo, const float* detections, const int numClasses,
|
||||
const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numClasses; ++i) {
|
||||
float prob = detections[x_id * (5 + numClasses) + 5 + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
const float objectness = detections[x_id * (5 + numClasses) + 4];
|
||||
|
||||
if (objectness * maxProb < preclusterThreshold[maxIndex])
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(numDetections, 1);
|
||||
|
||||
const float bxc = detections[x_id * (5 + numClasses) + 0];
|
||||
const float byc = detections[x_id * (5 + numClasses) + 1];
|
||||
const float bw = detections[x_id * (5 + numClasses) + 2];
|
||||
const float bh = detections[x_id * (5 + numClasses) + 3];
|
||||
|
||||
float x0 = bxc - bw / 2;
|
||||
float y0 = byc - bh / 2;
|
||||
float x1 = x0 + bw;
|
||||
float y1 = y0 + bh;
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[count].left = x0;
|
||||
binfo[count].top = y0;
|
||||
binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[count].detectionConfidence = objectness * maxProb;
|
||||
binfo[count].classId = maxIndex;
|
||||
}
|
||||
|
||||
__global__ void decodeTensor_YOLOV8_ONNX(NvDsInferParseObjectInfo* binfo, const float* detections, const int numClasses,
|
||||
const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numClasses; ++i) {
|
||||
float prob = detections[x_id + outputSize * (i + 4)];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex])
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(numDetections, 1);
|
||||
|
||||
const float bxc = detections[x_id + outputSize * 0];
|
||||
const float byc = detections[x_id + outputSize * 1];
|
||||
const float bw = detections[x_id + outputSize * 2];
|
||||
const float bh = detections[x_id + outputSize * 3];
|
||||
|
||||
float x0 = bxc - bw / 2;
|
||||
float y0 = byc - bh / 2;
|
||||
float x1 = x0 + bw;
|
||||
float y1 = y0 + bh;
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[count].left = x0;
|
||||
binfo[count].top = y0;
|
||||
binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[count].detectionConfidence = maxProb;
|
||||
binfo[count].classId = maxIndex;
|
||||
}
|
||||
|
||||
__global__ void decodeTensor_YOLOX_ONNX(NvDsInferParseObjectInfo *binfo, const float* detections, const int numClasses,
|
||||
const int outputSize, float netW, float netH, const int *grid0, const int *grid1, const int *strides,
|
||||
const float* preclusterThreshold, int* numDetections)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numClasses; ++i) {
|
||||
float prob = detections[x_id * (5 + numClasses) + 5 + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
const float objectness = detections[x_id * (5 + numClasses) + 4];
|
||||
|
||||
if (objectness * maxProb < preclusterThreshold[maxIndex])
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(numDetections, 1);
|
||||
|
||||
const float bxc = (detections[x_id * (5 + numClasses) + 0] + grid0[x_id]) * strides[x_id];
|
||||
const float byc = (detections[x_id * (5 + numClasses) + 1] + grid1[x_id]) * strides[x_id];
|
||||
const float bw = __expf(detections[x_id * (5 + numClasses) + 2]) * strides[x_id];
|
||||
const float bh = __expf(detections[x_id * (5 + numClasses) + 3]) * strides[x_id];
|
||||
|
||||
float x0 = bxc - bw / 2;
|
||||
float y0 = byc - bh / 2;
|
||||
float x1 = x0 + bw;
|
||||
float y1 = y0 + bh;
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[count].left = x0;
|
||||
binfo[count].top = y0;
|
||||
binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[count].detectionConfidence = objectness * maxProb;
|
||||
binfo[count].classId = maxIndex;
|
||||
}
|
||||
|
||||
__global__ void decodeTensor_YOLO_NAS_ONNX(NvDsInferParseObjectInfo *binfo, const float* scores, const float* boxes,
|
||||
const int numClasses, const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numClasses; ++i) {
|
||||
float prob = scores[x_id * numClasses + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex])
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(numDetections, 1);
|
||||
|
||||
float x0 = boxes[x_id * 4 + 0];
|
||||
float y0 = boxes[x_id * 4 + 1];
|
||||
float x1 = boxes[x_id * 4 + 2];
|
||||
float y1 = boxes[x_id * 4 + 3];
|
||||
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[count].left = x0;
|
||||
binfo[count].top = y0;
|
||||
binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[count].detectionConfidence = maxProb;
|
||||
binfo[count].classId = maxIndex;
|
||||
}
|
||||
|
||||
__global__ void decodeTensor_PPYOLOE_ONNX(NvDsInferParseObjectInfo *binfo, const float* scores, const float* boxes,
|
||||
const int numClasses, const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numClasses; ++i) {
|
||||
float prob = scores[x_id + outputSize * i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < preclusterThreshold[maxIndex])
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(numDetections, 1);
|
||||
|
||||
float x0 = boxes[x_id * 4 + 0];
|
||||
float y0 = boxes[x_id * 4 + 1];
|
||||
float x1 = boxes[x_id * 4 + 2];
|
||||
float y1 = boxes[x_id * 4 + 3];
|
||||
|
||||
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
|
||||
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
|
||||
x1 = fminf(float(netW), fmaxf(float(0.0), x1));
|
||||
y1 = fminf(float(netH), fmaxf(float(0.0), y1));
|
||||
|
||||
binfo[count].left = x0;
|
||||
binfo[count].top = y0;
|
||||
binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0));
|
||||
binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0));
|
||||
binfo[count].detectionConfidence = maxProb;
|
||||
binfo[count].classId = maxIndex;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustom_YOLO_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||
|
||||
const uint outputSize = layer.inferDims.d[0];
|
||||
const uint numClasses = layer.inferDims.d[1] - 5;
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured) {
|
||||
std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
}
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
std::vector<int> numDetections = { 0 };
|
||||
thrust::device_vector<int> d_numDetections(numDetections);
|
||||
|
||||
thrust::device_vector<float> preclusterThreshold(detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensor_YOLO_ONNX<<<threads_per_block, number_of_blocks>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize,
|
||||
static_cast<float>(networkInfo.width), static_cast<float>(networkInfo.height),
|
||||
thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data()));
|
||||
|
||||
thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin());
|
||||
objectList.resize(numDetections[0]);
|
||||
thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustom_YOLOV8_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||
|
||||
const uint numClasses = layer.inferDims.d[0] - 4;
|
||||
const uint outputSize = layer.inferDims.d[1];
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured) {
|
||||
std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
}
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
std::vector<int> numDetections = { 0 };
|
||||
thrust::device_vector<int> d_numDetections(numDetections);
|
||||
|
||||
thrust::device_vector<float> preclusterThreshold(detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensor_YOLOV8_ONNX<<<threads_per_block, number_of_blocks>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize,
|
||||
static_cast<float>(networkInfo.width), static_cast<float>(networkInfo.height),
|
||||
thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data()));
|
||||
|
||||
thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin());
|
||||
objectList.resize(numDetections[0]);
|
||||
thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustom_YOLOX_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||
|
||||
const uint outputSize = layer.inferDims.d[0];
|
||||
const uint numClasses = layer.inferDims.d[1] - 5;
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured) {
|
||||
std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
}
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
std::vector<int> numDetections = { 0 };
|
||||
thrust::device_vector<int> d_numDetections(numDetections);
|
||||
|
||||
thrust::device_vector<float> preclusterThreshold(detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
std::vector<int> strides = {8, 16, 32};
|
||||
|
||||
std::vector<int> grid0;
|
||||
std::vector<int> grid1;
|
||||
std::vector<int> gridStrides;
|
||||
|
||||
for (uint s = 0; s < strides.size(); ++s) {
|
||||
int num_grid_y = networkInfo.height / strides[s];
|
||||
int num_grid_x = networkInfo.width / strides[s];
|
||||
for (int g1 = 0; g1 < num_grid_y; ++g1) {
|
||||
for (int g0 = 0; g0 < num_grid_x; ++g0) {
|
||||
grid0.push_back(g0);
|
||||
grid1.push_back(g1);
|
||||
gridStrides.push_back(strides[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
thrust::device_vector<int> d_grid0(grid0);
|
||||
thrust::device_vector<int> d_grid1(grid1);
|
||||
thrust::device_vector<int> d_gridStrides(gridStrides);
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensor_YOLOX_ONNX<<<threads_per_block, number_of_blocks>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize,
|
||||
static_cast<float>(networkInfo.width), static_cast<float>(networkInfo.height),
|
||||
thrust::raw_pointer_cast(d_grid0.data()), thrust::raw_pointer_cast(d_grid1.data()),
|
||||
thrust::raw_pointer_cast(d_gridStrides.data()), thrust::raw_pointer_cast(preclusterThreshold.data()),
|
||||
thrust::raw_pointer_cast(d_numDetections.data()));
|
||||
|
||||
thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin());
|
||||
objectList.resize(numDetections[0]);
|
||||
thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustom_YOLO_NAS_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[1];
|
||||
|
||||
const uint outputSize = scores.inferDims.d[0];
|
||||
const uint numClasses = scores.inferDims.d[1];
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured) {
|
||||
std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
}
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
std::vector<int> numDetections = { 0 };
|
||||
thrust::device_vector<int> d_numDetections(numDetections);
|
||||
|
||||
thrust::device_vector<float> preclusterThreshold(detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensor_YOLO_NAS_ONNX<<<threads_per_block, number_of_blocks>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (const float*) (scores.buffer), (const float*) (boxes.buffer), numClasses,
|
||||
outputSize, static_cast<float>(networkInfo.width), static_cast<float>(networkInfo.height),
|
||||
thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data()));
|
||||
|
||||
thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin());
|
||||
objectList.resize(numDetections[0]);
|
||||
thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
NvDsInferParseCustom_PPYOLOE_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
||||
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
|
||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[1];
|
||||
|
||||
const uint numClasses = scores.inferDims.d[0];
|
||||
const uint outputSize = scores.inferDims.d[1];
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured) {
|
||||
std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses
|
||||
<< " in config_infer file\n" << std::endl;
|
||||
}
|
||||
|
||||
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
|
||||
|
||||
std::vector<int> numDetections = { 0 };
|
||||
thrust::device_vector<int> d_numDetections(numDetections);
|
||||
|
||||
thrust::device_vector<float> preclusterThreshold(detectionParams.perClassPreclusterThreshold);
|
||||
|
||||
int threads_per_block = 1024;
|
||||
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
|
||||
|
||||
decodeTensor_PPYOLOE_ONNX<<<threads_per_block, number_of_blocks>>>(
|
||||
thrust::raw_pointer_cast(objects.data()), (const float*) (scores.buffer), (const float*) (boxes.buffer), numClasses,
|
||||
outputSize, static_cast<float>(networkInfo.width), static_cast<float>(networkInfo.height),
|
||||
thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data()));
|
||||
|
||||
thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin());
|
||||
objectList.resize(numDetections[0]);
|
||||
thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParse_YOLO_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustom_YOLO_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParse_YOLOV8_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustom_YOLOV8_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParse_YOLOX_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustom_YOLOX_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParse_YOLO_NAS_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustom_YOLO_NAS_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParse_PPYOLOE_ONNX(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustom_PPYOLOE_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
@@ -98,25 +98,6 @@ loadWeights(const std::string weightsFilePath, const std::string& networkType)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (weightsFilePath.find(".wts") != std::string::npos) {
|
||||
std::ifstream file(weightsFilePath);
|
||||
assert(file.good());
|
||||
int32_t count;
|
||||
file >> count;
|
||||
assert(count > 0 && "\nInvalid .wts file.");
|
||||
|
||||
uint32_t floatWeight;
|
||||
std::string name;
|
||||
uint32_t size;
|
||||
|
||||
while (count--) {
|
||||
file >> name >> std::dec >> size;
|
||||
for (uint32_t x = 0, y = size; x < y; ++x) {
|
||||
file >> std::hex >> floatWeight;
|
||||
weights.push_back(*reinterpret_cast<float*>(&floatWeight));
|
||||
};
|
||||
}
|
||||
}
|
||||
else {
|
||||
std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl;
|
||||
assert(0);
|
||||
|
||||
@@ -34,8 +34,8 @@ Yolo::Yolo(const NetworkInfo& networkInfo) : m_InputBlobName(networkInfo.inputBl
|
||||
m_NetworkType(networkInfo.networkType), m_ConfigFilePath(networkInfo.configFilePath),
|
||||
m_WtsFilePath(networkInfo.wtsFilePath), m_Int8CalibPath(networkInfo.int8CalibPath), m_DeviceType(networkInfo.deviceType),
|
||||
m_NumDetectedClasses(networkInfo.numDetectedClasses), m_ClusterMode(networkInfo.clusterMode),
|
||||
m_NetworkMode(networkInfo.networkMode), m_ScoreThreshold(networkInfo.scoreThreshold), m_InputH(0), m_InputW(0),
|
||||
m_InputC(0), m_InputSize(0), m_NumClasses(0), m_LetterBox(0), m_NewCoords(0), m_YoloCount(0)
|
||||
m_NetworkMode(networkInfo.networkMode), m_InputH(0), m_InputW(0), m_InputC(0), m_InputSize(0), m_NumClasses(0),
|
||||
m_LetterBox(0), m_NewCoords(0), m_YoloCount(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -130,18 +130,6 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
{
|
||||
int weightPtr = 0;
|
||||
|
||||
std::string weightsType = "wts";
|
||||
if (m_WtsFilePath.find(".weights") != std::string::npos)
|
||||
weightsType = "weights";
|
||||
|
||||
float eps = 1.0e-5;
|
||||
if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov6") != std::string::npos ||
|
||||
m_NetworkType.find("yolov7") != std::string::npos || m_NetworkType.find("yolov8") != std::string::npos ||
|
||||
m_NetworkType.find("yolox") != std::string::npos)
|
||||
eps = 1.0e-3;
|
||||
else if (m_NetworkType.find("yolor") != std::string::npos)
|
||||
eps = 1.0e-4;
|
||||
|
||||
nvinfer1::ITensor* data = network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
|
||||
nvinfer1::Dims{3, {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}});
|
||||
assert(data != nullptr && data->getDimensions().nbDims > 0);
|
||||
@@ -152,18 +140,15 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
nvinfer1::ITensor* yoloTensorInputs[m_YoloCount];
|
||||
uint yoloCountInputs = 0;
|
||||
|
||||
int modelType = -1;
|
||||
|
||||
for (uint i = 0; i < m_ConfigBlocks.size(); ++i) {
|
||||
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
|
||||
|
||||
if (m_ConfigBlocks.at(i).at("type") == "net")
|
||||
printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr");
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "conv" || m_ConfigBlocks.at(i).at("type") == "convolutional") {
|
||||
int channels = getNumChannels(previous);
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps,
|
||||
previous, &network);
|
||||
previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
@@ -173,39 +158,30 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") {
|
||||
int channels = getNumChannels(previous);
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels,
|
||||
previous, &network);
|
||||
previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous,
|
||||
&network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "deconv";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "c2f") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = c2fLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "c2f_" + m_ConfigBlocks.at(i).at("activation");
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "batchnorm") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = batchnormLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous,
|
||||
&network);
|
||||
previous = batchnormLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation");
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul") {
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "implicit" || m_ConfigBlocks.at(i).at("type") == "implicit_add" ||
|
||||
m_ConfigBlocks.at(i).at("type") == "implicit_mul") {
|
||||
previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = m_ConfigBlocks.at(i).at("type");
|
||||
std::string layerName = "implicit";
|
||||
printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels") {
|
||||
@@ -234,27 +210,44 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||
assert(i + from - 1 < i - 2);
|
||||
|
||||
std::string mode = "add";
|
||||
if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end())
|
||||
mode = m_ConfigBlocks.at(i).at("mode");
|
||||
|
||||
std::string activation = "linear";
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
|
||||
activation = m_ConfigBlocks.at(i).at("activation");
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
||||
previous = shortcutLayer(i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous,
|
||||
previous = shortcutLayer(i, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous,
|
||||
tensorOutputs[i + from - 1], &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1);
|
||||
std::string layerName = "shortcut_" + activation + ": " + std::to_string(i + from - 1);
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
|
||||
if (mode == "add" && inputVol != shortcutVol)
|
||||
if (inputVol != shortcutVol)
|
||||
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "sam") {
|
||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||
if (from > 0)
|
||||
from = from - i + 1;
|
||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||
assert(i + from - 1 < i - 2);
|
||||
|
||||
std::string activation = "linear";
|
||||
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
|
||||
activation = m_ConfigBlocks.at(i).at("activation");
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = samLayer(i, activation, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "sam_" + activation + ": " + std::to_string(i + from - 1);
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "route") {
|
||||
std::string layers;
|
||||
previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
||||
@@ -273,7 +266,8 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
std::string layerName = "upsample";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool") {
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "max" || m_ConfigBlocks.at(i).at("type") == "maxpool" ||
|
||||
m_ConfigBlocks.at(i).at("type") == "avg" || m_ConfigBlocks.at(i).at("type") == "avgpool") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
@@ -282,62 +276,33 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
std::string layerName = m_ConfigBlocks.at(i).at("type");
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg3d") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "reorg3d";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
if (m_NetworkType.find("yolov2") != std::string::npos) {
|
||||
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
|
||||
assert(reorgPlugin != nullptr);
|
||||
nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin);
|
||||
assert(reorg != nullptr);
|
||||
std::string reorglayerName = "reorg_" + std::to_string(i);
|
||||
reorg->setName(reorglayerName.c_str());
|
||||
previous = reorg->getOutput(0);
|
||||
}
|
||||
else
|
||||
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
|
||||
assert(reorgPlugin != nullptr);
|
||||
nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin);
|
||||
assert(reorg != nullptr);
|
||||
std::string reorglayerName = "reorg_" + std::to_string(i);
|
||||
reorg->setName(reorglayerName.c_str());
|
||||
previous = reorg->getOutput(0);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "reorg";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "reduce") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "reduce";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "shuffle") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = shuffleLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "shuffle";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "softmax") {
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
std::string layerName = "softmax";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") {
|
||||
if (m_ConfigBlocks.at(i).at("type") == "yolo")
|
||||
if (m_NetworkType.find("yolor") != std::string::npos)
|
||||
modelType = 2;
|
||||
else
|
||||
modelType = 1;
|
||||
else
|
||||
modelType = 0;
|
||||
|
||||
std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i);
|
||||
std::string blobName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo_" + std::to_string(i) :
|
||||
"region_" + std::to_string(i);
|
||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||
curYoloTensor.blobName = blobName;
|
||||
@@ -348,83 +313,11 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = modelType != 0 ? "yolo" : "region";
|
||||
std::string layerName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo" : "region";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, "-", "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "cls") {
|
||||
modelType = 3;
|
||||
|
||||
std::string blobName = "cls_" + std::to_string(i);
|
||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||
curYoloTensor.blobName = blobName;
|
||||
curYoloTensor.numBBoxes = prevTensorDims.d[1];
|
||||
m_NumClasses = prevTensorDims.d[0];
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = "cls";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "reg") {
|
||||
modelType = 3;
|
||||
|
||||
std::string blobName = "reg_" + std::to_string(i);
|
||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||
curYoloTensor.blobName = blobName;
|
||||
curYoloTensor.numBBoxes = prevTensorDims.d[1];
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = "reg";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "detect_v8") {
|
||||
modelType = 4;
|
||||
|
||||
std::string blobName = "detect_v8_" + std::to_string(i);
|
||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||
curYoloTensor.blobName = blobName;
|
||||
curYoloTensor.numBBoxes = prevTensorDims.d[1];
|
||||
|
||||
std::string inputVol = dimsToString(previous->getDimensions());
|
||||
previous = detectV8Layer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network);
|
||||
assert(previous != nullptr);
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = "detect_v8";
|
||||
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||
}
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "detect_x") {
|
||||
modelType = 5;
|
||||
|
||||
std::string blobName = "detect_x_" + std::to_string(i);
|
||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||
curYoloTensor.blobName = blobName;
|
||||
curYoloTensor.numBBoxes = prevTensorDims.d[0];
|
||||
m_NumClasses = prevTensorDims.d[1] - 5;
|
||||
|
||||
std::string outputVol = dimsToString(previous->getDimensions());
|
||||
tensorOutputs.push_back(previous);
|
||||
yoloTensorInputs[yoloCountInputs] = previous;
|
||||
++yoloCountInputs;
|
||||
std::string layerName = "detect_x";
|
||||
printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr));
|
||||
else if (m_ConfigBlocks.at(i).at("type") == "dropout") {
|
||||
// pass
|
||||
}
|
||||
else {
|
||||
std::cerr << "\nUnsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
|
||||
@@ -438,42 +331,24 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
|
||||
}
|
||||
|
||||
if (m_YoloCount == yoloCountInputs) {
|
||||
assert((modelType != -1) && "\nCould not determine model type");
|
||||
|
||||
uint64_t outputSize = 0;
|
||||
for (uint j = 0; j < yoloCountInputs; ++j) {
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(j);
|
||||
if (modelType == 3 || modelType == 4 || modelType == 5)
|
||||
outputSize = curYoloTensor.numBBoxes;
|
||||
else
|
||||
outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
|
||||
outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize,
|
||||
modelType, m_ScoreThreshold);
|
||||
nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors,
|
||||
outputSize);
|
||||
assert(yoloPlugin != nullptr);
|
||||
nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin);
|
||||
assert(yolo != nullptr);
|
||||
std::string yoloLayerName = "yolo";
|
||||
yolo->setName(yoloLayerName.c_str());
|
||||
|
||||
std::string outputlayerName;
|
||||
nvinfer1::ITensor* num_detections = yolo->getOutput(0);
|
||||
outputlayerName = "num_detections";
|
||||
num_detections->setName(outputlayerName.c_str());
|
||||
nvinfer1::ITensor* detection_boxes = yolo->getOutput(1);
|
||||
outputlayerName = "detection_boxes";
|
||||
detection_boxes->setName(outputlayerName.c_str());
|
||||
nvinfer1::ITensor* detection_scores = yolo->getOutput(2);
|
||||
outputlayerName = "detection_scores";
|
||||
detection_scores->setName(outputlayerName.c_str());
|
||||
nvinfer1::ITensor* detection_classes = yolo->getOutput(3);
|
||||
outputlayerName = "detection_classes";
|
||||
detection_classes->setName(outputlayerName.c_str());
|
||||
network.markOutput(*num_detections);
|
||||
network.markOutput(*detection_boxes);
|
||||
network.markOutput(*detection_scores);
|
||||
network.markOutput(*detection_classes);
|
||||
nvinfer1::ITensor* outputYolo = yolo->getOutput(0);
|
||||
std::string outputYoloLayerName = "output";
|
||||
outputYolo->setName(outputYoloLayerName.c_str());
|
||||
network.markOutput(*outputYolo);
|
||||
}
|
||||
else {
|
||||
std::cerr << "\nError in yolo cfg file" << std::endl;
|
||||
@@ -600,54 +475,6 @@ Yolo::parseConfigBlocks()
|
||||
|
||||
outputTensor.numBBoxes = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num")));
|
||||
|
||||
m_YoloTensors.push_back(outputTensor);
|
||||
}
|
||||
else if ((block.at("type") == "cls") || (block.at("type") == "reg")) {
|
||||
++m_YoloCount;
|
||||
TensorInfo outputTensor;
|
||||
m_YoloTensors.push_back(outputTensor);
|
||||
}
|
||||
else if (block.at("type") == "detect_v8") {
|
||||
++m_YoloCount;
|
||||
|
||||
m_NumClasses = std::stoul(block.at("classes"));
|
||||
|
||||
TensorInfo outputTensor;
|
||||
m_YoloTensors.push_back(outputTensor);
|
||||
}
|
||||
else if (block.at("type") == "detect_x") {
|
||||
++m_YoloCount;
|
||||
TensorInfo outputTensor;
|
||||
|
||||
std::vector<int> strides;
|
||||
|
||||
std::string stridesString = block.at("strides");
|
||||
while (!stridesString.empty()) {
|
||||
int npos = stridesString.find_first_of(',');
|
||||
if (npos != -1) {
|
||||
int stride = std::stof(trim(stridesString.substr(0, npos)));
|
||||
strides.push_back(stride);
|
||||
stridesString.erase(0, npos + 1);
|
||||
}
|
||||
else {
|
||||
int stride = std::stof(trim(stridesString));
|
||||
strides.push_back(stride);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i = 0; i < strides.size(); ++i) {
|
||||
int num_grid_y = m_InputH / strides[i];
|
||||
int num_grid_x = m_InputW / strides[i];
|
||||
for (int g1 = 0; g1 < num_grid_y; ++g1) {
|
||||
for (int g0 = 0; g0 < num_grid_x; ++g0) {
|
||||
outputTensor.anchors.push_back((float) g0);
|
||||
outputTensor.anchors.push_back((float) g1);
|
||||
outputTensor.mask.push_back(strides[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_YoloTensors.push_back(outputTensor);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,21 +31,15 @@
|
||||
|
||||
#include "layers/convolutional_layer.h"
|
||||
#include "layers/deconvolutional_layer.h"
|
||||
#include "layers/c2f_layer.h"
|
||||
#include "layers/batchnorm_layer.h"
|
||||
#include "layers/implicit_layer.h"
|
||||
#include "layers/channels_layer.h"
|
||||
#include "layers/shortcut_layer.h"
|
||||
#include "layers/sam_layer.h"
|
||||
#include "layers/route_layer.h"
|
||||
#include "layers/upsample_layer.h"
|
||||
#include "layers/pooling_layer.h"
|
||||
#include "layers/reorg_layer.h"
|
||||
#include "layers/reduce_layer.h"
|
||||
#include "layers/shuffle_layer.h"
|
||||
#include "layers/softmax_layer.h"
|
||||
#include "layers/cls_layer.h"
|
||||
#include "layers/reg_layer.h"
|
||||
#include "layers/detect_v8_layer.h"
|
||||
|
||||
struct NetworkInfo
|
||||
{
|
||||
@@ -57,7 +51,6 @@ struct NetworkInfo
|
||||
std::string deviceType;
|
||||
uint numDetectedClasses;
|
||||
int clusterMode;
|
||||
float scoreThreshold;
|
||||
std::string networkMode;
|
||||
};
|
||||
|
||||
@@ -98,7 +91,6 @@ class Yolo : public IModelParser {
|
||||
const uint m_NumDetectedClasses;
|
||||
const int m_ClusterMode;
|
||||
const std::string m_NetworkMode;
|
||||
const float m_ScoreThreshold;
|
||||
|
||||
uint m_InputH;
|
||||
uint m_InputW;
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__global__ void gpuYoloLayer(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
__global__ void gpuYoloLayer(const float* input, float* output, int* count, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY,
|
||||
const float* anchors, const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -24,18 +24,13 @@ __global__ void gpuYoloLayer(const float* input, int* num_detections, float* det
|
||||
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
|
||||
float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
|
||||
* netWidth / gridSizeX;
|
||||
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
|
||||
float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
|
||||
* netHeight / gridSizeY;
|
||||
|
||||
float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2];
|
||||
@@ -53,23 +48,26 @@ __global__ void gpuYoloLayer(const float* input, int* num_detections, float* det
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
int _count = (int)atomicAdd(count, 1);
|
||||
|
||||
output[_count * 7 + 0] = xc;
|
||||
output[_count * 7 + 1] = yc;
|
||||
output[_count * 7 + 2] = w;
|
||||
output[_count * 7 + 3] = h;
|
||||
output[_count * 7 + 4] = maxProb;
|
||||
output[_count * 7 + 5] = maxIndex;
|
||||
output[_count * 7 + 6] = objectness;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
@@ -77,12 +75,11 @@ cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detecti
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
||||
reinterpret_cast<int*> (count) + (batch),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_e(const float* cls, const float* reg, int* num_detections, float* detection_boxes,
|
||||
float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint numOutputClasses, const uint64_t outputSize)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = cls[x_id * numOutputClasses + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0];
|
||||
detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1];
|
||||
detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2];
|
||||
detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3];
|
||||
detection_scores[count] = maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_e<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(cls) + (batch * numOutputClasses * outputSize),
|
||||
reinterpret_cast<const float*>(reg) + (batch * 4 * outputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight,
|
||||
numOutputClasses, outputSize);
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
@@ -5,10 +5,9 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
__global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY,
|
||||
const float* anchors, const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -22,18 +21,13 @@ __global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float*
|
||||
|
||||
const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth /
|
||||
float xc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth /
|
||||
gridSizeX;
|
||||
|
||||
float y = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight /
|
||||
float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight /
|
||||
gridSizeY;
|
||||
|
||||
float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2];
|
||||
@@ -51,23 +45,26 @@ __global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float*
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
int _count = (int)atomicAdd(count, 1);
|
||||
|
||||
output[_count * 7 + 0] = xc;
|
||||
output[_count * 7 + 1] = yc;
|
||||
output[_count * 7 + 2] = w;
|
||||
output[_count * 7 + 3] = h;
|
||||
output[_count * 7 + 4] = maxProb;
|
||||
output[_count * 7 + 5] = maxIndex;
|
||||
output[_count * 7 + 6] = objectness;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
@@ -75,12 +72,11 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* dete
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
||||
reinterpret_cast<int*> (count) + (batch),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__global__ void gpuYoloLayer_r(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
|
||||
* netWidth / gridSizeX;
|
||||
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
|
||||
* netHeight / gridSizeY;
|
||||
|
||||
float w = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2];
|
||||
|
||||
float h = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2 + 1];
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_r<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
@@ -27,9 +27,9 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void gpuRegionLayer(const float* input, float* softmax, int* num_detections, float* detection_boxes,
|
||||
float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors)
|
||||
__global__ void gpuRegionLayer(const float* input, float* softmax, float* output, int* count, const uint netWidth,
|
||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||
const float* anchors)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -43,14 +43,9 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, int* num_dete
|
||||
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX;
|
||||
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY;
|
||||
float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY;
|
||||
|
||||
float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth /
|
||||
gridSizeX;
|
||||
@@ -71,23 +66,24 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, int* num_dete
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
int _count = (int)atomicAdd(count, 1);
|
||||
|
||||
output[_count * 7 + 0] = xc;
|
||||
output[_count * 7 + 1] = yc;
|
||||
output[_count * 7 + 2] = w;
|
||||
output[_count * 7 + 3] = h;
|
||||
output[_count * 7 + 4] = maxProb;
|
||||
output[_count * 7 + 5] = maxIndex;
|
||||
output[_count * 7 + 6] = objectness;
|
||||
}
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
|
||||
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
|
||||
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream)
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
|
||||
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
|
||||
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
@@ -95,12 +91,12 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detectio
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<float*>(softmax) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, reinterpret_cast<const float*>(anchors));
|
||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
||||
reinterpret_cast<int*> (count) + (batch),
|
||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
|
||||
reinterpret_cast<const float*> (anchors));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_v8(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint numOutputClasses, const uint64_t outputSize)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = input[x_id * (4 + numOutputClasses) + 4 + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
detection_boxes[count * 4 + 0] = input[x_id * (4 + numOutputClasses) + 0];
|
||||
detection_boxes[count * 4 + 1] = input[x_id * (4 + numOutputClasses) + 1];
|
||||
detection_boxes[count * 4 + 2] = input[x_id * (4 + numOutputClasses) + 2];
|
||||
detection_boxes[count * 4 + 3] = input[x_id * (4 + numOutputClasses) + 3];
|
||||
detection_scores[count] = maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream)
|
||||
{
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_v8<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * (4 + numOutputClasses) * outputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize);
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_x(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint numOutputClasses, const uint64_t outputSize, const float* anchors, const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
const float objectness = input[x_id * (5 + numOutputClasses) + 4];
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
float x = (input[x_id * (5 + numOutputClasses) + 0] + anchors[x_id * 2]) * mask[x_id];
|
||||
|
||||
float y = (input[x_id * (5 + numOutputClasses) + 1] + anchors[x_id * 2 + 1]) * mask[x_id];
|
||||
|
||||
float w = __expf(input[x_id * (5 + numOutputClasses) + 2]) * mask[x_id];
|
||||
|
||||
float h = __expf(input[x_id * (5 + numOutputClasses) + 3]) * mask[x_id];
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = input[x_id * (5 + numOutputClasses) + 5 + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_x<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * (5 + numOutputClasses) * outputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
@@ -38,38 +38,19 @@ namespace {
|
||||
}
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
|
||||
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
|
||||
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
YoloLayer::YoloLayer(const void* data, size_t length) {
|
||||
const char* d = static_cast<const char*>(data);
|
||||
@@ -79,45 +60,40 @@ YoloLayer::YoloLayer(const void* data, size_t length) {
|
||||
read(d, m_NumClasses);
|
||||
read(d, m_NewCoords);
|
||||
read(d, m_OutputSize);
|
||||
read(d, m_Type);
|
||||
read(d, m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize;
|
||||
read(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo curYoloTensor;
|
||||
read(d, curYoloTensor.gridSizeX);
|
||||
read(d, curYoloTensor.gridSizeY);
|
||||
read(d, curYoloTensor.numBBoxes);
|
||||
read(d, curYoloTensor.scaleXY);
|
||||
uint yoloTensorsSize;
|
||||
read(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo curYoloTensor;
|
||||
read(d, curYoloTensor.gridSizeX);
|
||||
read(d, curYoloTensor.gridSizeY);
|
||||
read(d, curYoloTensor.numBBoxes);
|
||||
read(d, curYoloTensor.scaleXY);
|
||||
|
||||
uint anchorsSize;
|
||||
read(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j) {
|
||||
float result;
|
||||
read(d, result);
|
||||
curYoloTensor.anchors.push_back(result);
|
||||
}
|
||||
|
||||
uint maskSize;
|
||||
read(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j) {
|
||||
int result;
|
||||
read(d, result);
|
||||
curYoloTensor.mask.push_back(result);
|
||||
}
|
||||
|
||||
m_YoloTensors.push_back(curYoloTensor);
|
||||
uint anchorsSize;
|
||||
read(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j) {
|
||||
float result;
|
||||
read(d, result);
|
||||
curYoloTensor.anchors.push_back(result);
|
||||
}
|
||||
|
||||
uint maskSize;
|
||||
read(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j) {
|
||||
int result;
|
||||
read(d, result);
|
||||
curYoloTensor.mask.push_back(result);
|
||||
}
|
||||
|
||||
m_YoloTensors.push_back(curYoloTensor);
|
||||
}
|
||||
};
|
||||
|
||||
YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
|
||||
const float& scoreThreshold) : m_NetWidth(netWidth), m_NetHeight(netHeight), m_NumClasses(numClasses),
|
||||
m_NewCoords(newCoords), m_YoloTensors(yoloTensors), m_OutputSize(outputSize), m_Type(modelType),
|
||||
m_ScoreThreshold(scoreThreshold)
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize) : m_NetWidth(netWidth),
|
||||
m_NetHeight(netHeight), m_NumClasses(numClasses), m_NewCoords(newCoords), m_YoloTensors(yoloTensors),
|
||||
m_OutputSize(outputSize)
|
||||
{
|
||||
assert(m_NetWidth > 0);
|
||||
assert(m_NetHeight > 0);
|
||||
@@ -126,12 +102,8 @@ YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& nu
|
||||
nvinfer1::Dims
|
||||
YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
|
||||
{
|
||||
assert(index <= 4);
|
||||
if (index == 0)
|
||||
return nvinfer1::Dims{1, {1}};
|
||||
else if (index == 1)
|
||||
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 4}};
|
||||
return nvinfer1::Dims{1, {static_cast<int>(m_OutputSize)}};
|
||||
assert(index == 0);
|
||||
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 7}};
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -152,36 +124,56 @@ int32_t
|
||||
YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||
noexcept
|
||||
{
|
||||
void* num_detections = outputs[0];
|
||||
void* detection_boxes = outputs[1];
|
||||
void* detection_scores = outputs[2];
|
||||
void* detection_classes = outputs[3];
|
||||
void* output = outputs[0];
|
||||
CUDA_CHECK(cudaMemsetAsync((float*) output, 0, sizeof(float) * m_OutputSize * 7 * batchSize, stream));
|
||||
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
||||
void* count = workspace;
|
||||
CUDA_CHECK(cudaMemsetAsync((int*) count, 0, sizeof(int) * batchSize, stream));
|
||||
|
||||
if (m_Type == 5) {
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(0);
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
|
||||
uint numBBoxes = curYoloTensor.numBBoxes;
|
||||
float scaleXY = curYoloTensor.scaleXY;
|
||||
uint gridSizeX = curYoloTensor.gridSizeX;
|
||||
uint gridSizeY = curYoloTensor.gridSizeY;
|
||||
std::vector<float> anchors = curYoloTensor.anchors;
|
||||
std::vector<int> mask = curYoloTensor.mask;
|
||||
|
||||
void* v_anchors;
|
||||
void* v_mask;
|
||||
if (anchors.size() > 0) {
|
||||
float* f_anchors = anchors.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
int* f_mask = mask.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
CUDA_CHECK(cudaYoloLayer_x(inputs[0], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, v_anchors, v_mask, stream));
|
||||
uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
|
||||
|
||||
if (mask.size() > 0) {
|
||||
if (m_NewCoords) {
|
||||
CUDA_CHECK(cudaYoloLayer_nc(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight,
|
||||
gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
else {
|
||||
CUDA_CHECK(cudaYoloLayer(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight,
|
||||
gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
}
|
||||
else {
|
||||
void* softmax;
|
||||
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
|
||||
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, output, count, batchSize, inputSize, m_OutputSize, m_NetWidth,
|
||||
m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream));
|
||||
|
||||
CUDA_CHECK(cudaFree(softmax));
|
||||
}
|
||||
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_anchors));
|
||||
@@ -190,78 +182,6 @@ YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* output
|
||||
CUDA_CHECK(cudaFree(v_mask));
|
||||
}
|
||||
}
|
||||
else if (m_Type == 4) {
|
||||
CUDA_CHECK(cudaYoloLayer_v8(inputs[0], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||
}
|
||||
else if (m_Type == 3) {
|
||||
CUDA_CHECK(cudaYoloLayer_e(inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||
}
|
||||
else {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
|
||||
uint numBBoxes = curYoloTensor.numBBoxes;
|
||||
float scaleXY = curYoloTensor.scaleXY;
|
||||
uint gridSizeX = curYoloTensor.gridSizeX;
|
||||
uint gridSizeY = curYoloTensor.gridSizeY;
|
||||
std::vector<float> anchors = curYoloTensor.anchors;
|
||||
std::vector<int> mask = curYoloTensor.mask;
|
||||
|
||||
void* v_anchors;
|
||||
void* v_mask;
|
||||
if (anchors.size() > 0) {
|
||||
float* f_anchors = anchors.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
int* f_mask = mask.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
|
||||
|
||||
if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0
|
||||
CUDA_CHECK(cudaYoloLayer_r(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, 2.0, v_anchors, v_mask, stream));
|
||||
}
|
||||
else if (m_Type == 1) {
|
||||
if (m_NewCoords) {
|
||||
CUDA_CHECK(cudaYoloLayer_nc( inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
else {
|
||||
CUDA_CHECK(cudaYoloLayer(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
}
|
||||
else {
|
||||
void* softmax;
|
||||
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
|
||||
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, v_anchors, stream));
|
||||
|
||||
CUDA_CHECK(cudaFree(softmax));
|
||||
}
|
||||
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_anchors));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_mask));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -276,22 +196,18 @@ YoloLayer::getSerializationSize() const noexcept
|
||||
totalSize += sizeof(m_NumClasses);
|
||||
totalSize += sizeof(m_NewCoords);
|
||||
totalSize += sizeof(m_OutputSize);
|
||||
totalSize += sizeof(m_Type);
|
||||
totalSize += sizeof(m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
totalSize += sizeof(yoloTensorsSize);
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
totalSize += sizeof(yoloTensorsSize);
|
||||
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeX);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeY);
|
||||
totalSize += sizeof(curYoloTensor.numBBoxes);
|
||||
totalSize += sizeof(curYoloTensor.scaleXY);
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
||||
}
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeX);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeY);
|
||||
totalSize += sizeof(curYoloTensor.numBBoxes);
|
||||
totalSize += sizeof(curYoloTensor.scaleXY);
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
||||
}
|
||||
|
||||
return totalSize;
|
||||
@@ -307,37 +223,32 @@ YoloLayer::serialize(void* buffer) const noexcept
|
||||
write(d, m_NumClasses);
|
||||
write(d, m_NewCoords);
|
||||
write(d, m_OutputSize);
|
||||
write(d, m_Type);
|
||||
write(d, m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
write(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
write(d, curYoloTensor.gridSizeX);
|
||||
write(d, curYoloTensor.gridSizeY);
|
||||
write(d, curYoloTensor.numBBoxes);
|
||||
write(d, curYoloTensor.scaleXY);
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
write(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
write(d, curYoloTensor.gridSizeX);
|
||||
write(d, curYoloTensor.gridSizeY);
|
||||
write(d, curYoloTensor.numBBoxes);
|
||||
write(d, curYoloTensor.scaleXY);
|
||||
|
||||
uint anchorsSize = curYoloTensor.anchors.size();
|
||||
write(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j)
|
||||
write(d, curYoloTensor.anchors[j]);
|
||||
uint anchorsSize = curYoloTensor.anchors.size();
|
||||
write(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j)
|
||||
write(d, curYoloTensor.anchors[j]);
|
||||
|
||||
uint maskSize = curYoloTensor.mask.size();
|
||||
write(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j)
|
||||
write(d, curYoloTensor.mask[j]);
|
||||
}
|
||||
uint maskSize = curYoloTensor.mask.size();
|
||||
write(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j)
|
||||
write(d, curYoloTensor.mask[j]);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2*
|
||||
YoloLayer::clone() const noexcept
|
||||
{
|
||||
return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type,
|
||||
m_ScoreThreshold);
|
||||
return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize);
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
|
||||
|
||||
@@ -48,14 +48,13 @@ class YoloLayer : public nvinfer1::IPluginV2 {
|
||||
YoloLayer(const void* data, size_t length);
|
||||
|
||||
YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
|
||||
const float& scoreThreshold);
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize);
|
||||
|
||||
const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
|
||||
const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
|
||||
|
||||
int getNbOutputs() const noexcept override { return 4; }
|
||||
int getNbOutputs() const noexcept override { return 1; }
|
||||
|
||||
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept override;
|
||||
|
||||
@@ -68,7 +67,9 @@ class YoloLayer : public nvinfer1::IPluginV2 {
|
||||
|
||||
void terminate() noexcept override {}
|
||||
|
||||
size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0; }
|
||||
size_t getWorkspaceSize(int maxBatchSize) const noexcept override {
|
||||
return maxBatchSize * sizeof(int);
|
||||
}
|
||||
|
||||
int32_t enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||
noexcept override;
|
||||
@@ -93,8 +94,6 @@ class YoloLayer : public nvinfer1::IPluginV2 {
|
||||
uint m_NewCoords {0};
|
||||
std::vector<TensorInfo> m_YoloTensors;
|
||||
uint64_t m_OutputSize {0};
|
||||
uint m_Type {0};
|
||||
float m_ScoreThreshold {0};
|
||||
};
|
||||
|
||||
class YoloLayerPluginCreator : public nvinfer1::IPluginCreator {
|
||||
|
||||
Reference in New Issue
Block a user