New features and fixes

This commit is contained in:
Marcos Luciano
2023-06-05 14:48:23 -03:00
parent 3f14b0d95d
commit 66a6754b77
57 changed files with 2137 additions and 1534 deletions

View File

@@ -33,9 +33,9 @@ ifeq ($(OPENCV),)
OPENCV=0
endif
LEGACY?=
ifeq ($(LEGACY),)
LEGACY=0
GRAPH?=
ifeq ($(GRAPH),)
GRAPH=0
endif
CC:= g++
@@ -50,13 +50,13 @@ ifeq ($(OPENCV), 1)
LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv)
endif
ifeq ($(LEGACY), 1)
COMMON+= -DLEGACY
ifeq ($(GRAPH), 1)
COMMON+= -GRAPH
endif
CUFLAGS:= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
INCS:= $(wildcard *.h)

View File

@@ -8,17 +8,18 @@
#include <fstream>
#include <iterator>
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchsize, const int& channels, const int& height,
const int& width, const int& letterbox, const std::string& imgPath,
const std::string& calibTablePath) : batchSize(batchsize), inputC(channels), inputH(height), inputW(width),
letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width,
const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath) :
batchSize(batchSize), inputC(channels), inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets),
calibTablePath(calibTablePath), imageIndex(0)
{
inputCount = batchsize * channels * height * width;
inputCount = batchSize * channels * height * width;
std::fstream f(imgPath);
if (f.is_open()) {
std::string temp;
while (std::getline(f, temp))
while (std::getline(f, temp)) {
imgPaths.push_back(temp);
}
}
batchData = new float[inputCount];
CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
@@ -27,8 +28,9 @@ Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchsize, const int&
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
{
CUDA_CHECK(cudaFree(deviceInput));
if (batchData)
if (batchData) {
delete[] batchData;
}
}
int
@@ -40,24 +42,33 @@ Int8EntropyCalibrator2::getBatchSize() const noexcept
bool
Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBindings) noexcept
{
if (imageIndex + batchSize > uint(imgPaths.size()))
if (imageIndex + batchSize > uint(imgPaths.size())) {
return false;
}
float* ptr = batchData;
for (size_t i = imageIndex; i < imageIndex + batchSize; ++i) {
cv::Mat img = cv::imread(imgPaths[i], cv::IMREAD_COLOR);
std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
cv::Mat img = cv::imread(imgPaths[i]);
if (img.empty()){
std::cerr << "Failed to read image for calibration" << std::endl;
return false;
}
std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets);
int len = (int) (inputData.size());
size_t len = inputData.size();
memcpy(ptr, inputData.data(), len * sizeof(float));
ptr += inputData.size();
std::cout << "Load image: " << imgPaths[i] << std::endl;
std::cout << "Progress: " << (i + 1)*100. / imgPaths.size() << "%" << std::endl;
std::cout << "Progress: " << (i + 1) * 100. / imgPaths.size() << "%" << std::endl;
}
imageIndex += batchSize;
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
bindings[0] = deviceInput;
return true;
}
@@ -67,8 +78,9 @@ Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept
calibrationCache.clear();
std::ifstream input(calibTablePath, std::ios::binary);
input >> std::noskipws;
if (readCache && input.good())
if (readCache && input.good()) {
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calibrationCache));
}
length = calibrationCache.size();
return length ? calibrationCache.data() : nullptr;
}
@@ -81,43 +93,24 @@ Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t len
}
std::vector<float>
prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor, const float* offsets)
{
cv::Mat out;
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
int image_w = img.cols;
int image_h = img.rows;
if (image_w != input_w || image_h != input_h) {
if (letter_box == 1) {
float ratio_w = (float) image_w / (float) input_w;
float ratio_h = (float) image_h / (float) input_h;
if (ratio_w > ratio_h) {
int new_width = input_w * ratio_h;
int x = (image_w - new_width) / 2;
cv::Rect roi(abs(x), 0, new_width, image_h);
out = img(roi);
}
else if (ratio_w < ratio_h) {
int new_height = input_h * ratio_w;
int y = (image_h - new_height) / 2;
cv::Rect roi(0, abs(y), image_w, new_height);
out = img(roi);
}
else
out = img;
cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
}
else {
cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
}
cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
}
else
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
if (input_c == 3)
out.convertTo(out, CV_32FC3, 1.0 / 255.0);
else
out.convertTo(out, CV_32FC1, 1.0 / 255.0);
if (image_w != input_w || image_h != input_h) {
float resizeFactor = std::max(input_w / (float) image_w, input_h / (float) img.rows);
cv::resize(out, out, cv::Size(0, 0), resizeFactor, resizeFactor, cv::INTER_CUBIC);
cv::Rect crop(cv::Point(0.5 * (out.cols - input_w), 0.5 * (out.rows - input_h)), cv::Size(input_w, input_h));
out = out(crop);
}
out.convertTo(out, CV_32F, scaleFactor);
cv::subtract(out, cv::Scalar(offsets[2] / 255, offsets[1] / 255, offsets[0] / 255), out, cv::noArray(), -1);
std::vector<cv::Mat> input_channels(input_c);
cv::split(out, input_channels);

View File

@@ -22,8 +22,8 @@
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
public:
Int8EntropyCalibrator2(const int& batchsize, const int& channels, const int& height, const int& width,
const int& letterbox, const std::string& imgPath, const std::string& calibTablePath);
Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width,
const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath);
virtual ~Int8EntropyCalibrator2();
@@ -41,6 +41,8 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
int inputH;
int inputW;
int letterBox;
float scaleFactor;
const float* offsets;
std::string calibTablePath;
size_t imageIndex;
size_t inputCount;
@@ -51,6 +53,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
std::vector<char> calibrationCache;
};
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box);
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor,
const float* offsets);
#endif //CALIBRATOR_H

View File

@@ -28,7 +28,7 @@ implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vect
convWt.values = val;
trtWeights.push_back(convWt);
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt);
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{4, {1, filters, 1, 1}}, convWt);
assert(implicit != nullptr);
std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
implicit->setName(implicitLayerName.c_str());

View File

@@ -14,46 +14,100 @@ reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::IT
{
nvinfer1::ITensor* output;
assert(block.at("type") == "reorg3d");
assert(block.at("type") == "reorg" || block.at("type") == "reorg3d");
int stride = 1;
if(block.find("stride") != block.end()) {
stride = std::stoi(block.at("stride"));
}
nvinfer1::Dims inputDims = input->getDimensions();
nvinfer1::ISliceLayer *slice1 = network->addSlice(*input, nvinfer1::Dims{3, {0, 0, 0}},
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
assert(slice1 != nullptr);
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
slice1->setName(slice1LayerName.c_str());
if (block.at("type") == "reorg3d") {
nvinfer1::ISliceLayer* slice1 = network->addSlice(*input, nvinfer1::Dims{4, {0, 0, 0, 0}},
nvinfer1::Dims{4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}},
nvinfer1::Dims{4, {1, 1, stride, stride}});
assert(slice1 != nullptr);
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
slice1->setName(slice1LayerName.c_str());
nvinfer1::ISliceLayer *slice2 = network->addSlice(*input, nvinfer1::Dims{3, {0, 1, 0}},
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
assert(slice2 != nullptr);
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
slice2->setName(slice2LayerName.c_str());
nvinfer1::ISliceLayer* slice2 = network->addSlice(*input, nvinfer1::Dims{4, {0, 0, 0, 1}},
nvinfer1::Dims{4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}},
nvinfer1::Dims{4, {1, 1, stride, stride}});
assert(slice2 != nullptr);
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
slice2->setName(slice2LayerName.c_str());
nvinfer1::ISliceLayer *slice3 = network->addSlice(*input, nvinfer1::Dims{3, {0, 0, 1}},
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
assert(slice3 != nullptr);
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
slice3->setName(slice3LayerName.c_str());
nvinfer1::ISliceLayer* slice3 = network->addSlice(*input, nvinfer1::Dims{4, {0, 0, 1, 0}},
nvinfer1::Dims{4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}},
nvinfer1::Dims{4, {1, 1, stride, stride}});
assert(slice3 != nullptr);
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
slice3->setName(slice3LayerName.c_str());
nvinfer1::ISliceLayer *slice4 = network->addSlice(*input, nvinfer1::Dims{3, {0, 1, 1}},
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
assert(slice4 != nullptr);
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
slice4->setName(slice4LayerName.c_str());
nvinfer1::ISliceLayer* slice4 = network->addSlice(*input, nvinfer1::Dims{4, {0, 0, 1, 1}},
nvinfer1::Dims{4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}},
nvinfer1::Dims{4, {1, 1, stride, stride}});
assert(slice4 != nullptr);
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
slice4->setName(slice4LayerName.c_str());
std::vector<nvinfer1::ITensor*> concatInputs;
concatInputs.push_back(slice1->getOutput(0));
concatInputs.push_back(slice2->getOutput(0));
concatInputs.push_back(slice3->getOutput(0));
concatInputs.push_back(slice4->getOutput(0));
std::vector<nvinfer1::ITensor*> concatInputs;
concatInputs.push_back(slice1->getOutput(0));
concatInputs.push_back(slice2->getOutput(0));
concatInputs.push_back(slice3->getOutput(0));
concatInputs.push_back(slice4->getOutput(0));
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
assert(concat != nullptr);
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
concat->setName(concatLayerName.c_str());
concat->setAxis(0);
output = concat->getOutput(0);
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
assert(concat != nullptr);
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
concat->setName(concatLayerName.c_str());
concat->setAxis(0);
output = concat->getOutput(0);
}
else {
nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(*input);
assert(shuffle1 != nullptr);
std::string shuffle1LayerName = "shuffle1_" + std::to_string(layerIdx);
shuffle1->setName(shuffle1LayerName.c_str());
nvinfer1::Dims reshapeDims1{6, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2], stride,
inputDims.d[3], stride}};
shuffle1->setReshapeDimensions(reshapeDims1);
nvinfer1::Permutation permutation1{{0, 1, 2, 4, 3, 5}};
shuffle1->setSecondTranspose(permutation1);
output = shuffle1->getOutput(0);
nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*output);
assert(shuffle2 != nullptr);
std::string shuffle2LayerName = "shuffle2_" + std::to_string(layerIdx);
shuffle2->setName(shuffle2LayerName.c_str());
nvinfer1::Dims reshapeDims2{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2] * inputDims.d[3],
stride * stride}};
shuffle2->setReshapeDimensions(reshapeDims2);
nvinfer1::Permutation permutation2{{0, 1, 3, 2}};
shuffle2->setSecondTranspose(permutation2);
output = shuffle2->getOutput(0);
nvinfer1::IShuffleLayer* shuffle3 = network->addShuffle(*output);
assert(shuffle3 != nullptr);
std::string shuffle3LayerName = "shuffle3_" + std::to_string(layerIdx);
shuffle3->setName(shuffle3LayerName.c_str());
nvinfer1::Dims reshapeDims3{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), stride * stride,
inputDims.d[2] * inputDims.d[3]}};
shuffle3->setReshapeDimensions(reshapeDims3);
nvinfer1::Permutation permutation3{{0, 2, 1, 3}};
shuffle3->setSecondTranspose(permutation3);
output = shuffle3->getOutput(0);
nvinfer1::IShuffleLayer* shuffle4 = network->addShuffle(*output);
assert(shuffle4 != nullptr);
std::string shuffle4LayerName = "shuffle4_" + std::to_string(layerIdx);
shuffle4->setName(shuffle4LayerName.c_str());
nvinfer1::Dims reshapeDims4{4, {inputDims.d[0], inputDims.d[1] * stride * stride, inputDims.d[2] / stride,
inputDims.d[3] / stride}};
shuffle4->setReshapeDimensions(reshapeDims4);
output = shuffle4->getOutput(0);
}
return output;
}

View File

@@ -24,29 +24,36 @@ routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>
}
if (lastPos < strLayers.length()) {
std::string lastV = trim(strLayers.substr(lastPos));
if (!lastV.empty())
if (!lastV.empty()) {
idxLayers.push_back(std::stoi(lastV));
}
}
assert (!idxLayers.empty());
assert(!idxLayers.empty());
std::vector<nvinfer1::ITensor*> concatInputs;
for (uint i = 0; i < idxLayers.size(); ++i) {
if (idxLayers[i] < 0)
if (idxLayers[i] < 0) {
idxLayers[i] = tensorOutputs.size() + idxLayers[i];
assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
}
assert(idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
concatInputs.push_back(tensorOutputs[idxLayers[i]]);
if (i < idxLayers.size() - 1)
if (i < idxLayers.size() - 1) {
layers += std::to_string(idxLayers[i]) + ", ";
}
}
layers += std::to_string(idxLayers[idxLayers.size() - 1]);
if (concatInputs.size() == 1)
if (concatInputs.size() == 1) {
output = concatInputs[0];
}
else {
int axis = 0;
if (block.find("axis") != block.end())
axis = std::stoi(block.at("axis"));
if (axis < 0)
axis = concatInputs[0]->getDimensions().nbDims + axis;
int axis = 1;
if (block.find("axis") != block.end()) {
axis += std::stoi(block.at("axis"));
std::cout << axis << std::endl;
}
if (axis < 0) {
axis += concatInputs[0]->getDimensions().nbDims;
}
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
assert(concat != nullptr);
@@ -60,10 +67,11 @@ routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>
nvinfer1::Dims prevTensorDims = output->getDimensions();
int groups = stoi(block.at("groups"));
int group_id = stoi(block.at("group_id"));
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
int channelSlice = (prevTensorDims.d[0] / groups);
nvinfer1::ISliceLayer* slice = network->addSlice(*output, nvinfer1::Dims{3, {startSlice, 0, 0}},
nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
int startSlice = (prevTensorDims.d[1] / groups) * group_id;
int channelSlice = (prevTensorDims.d[1] / groups);
nvinfer1::ISliceLayer* slice = network->addSlice(*output, nvinfer1::Dims{4, {0, startSlice, 0, 0}},
nvinfer1::Dims{4, {prevTensorDims.d[0], channelSlice, prevTensorDims.d[2], prevTensorDims.d[3]}},
nvinfer1::Dims{4, {1, 1, 1, 1}});
assert(slice != nullptr);
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
slice->setName(sliceLayerName.c_str());

View File

@@ -17,8 +17,8 @@ shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::s
assert(block.at("type") == "shortcut");
if (inputVol != shortcutVol) {
nvinfer1::ISliceLayer* slice = network->addSlice(*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(),
nvinfer1::Dims{3, {1, 1, 1}});
nvinfer1::ISliceLayer* slice = network->addSlice(*shortcutInput, nvinfer1::Dims{4, {0, 0, 0, 0}}, input->getDimensions(),
nvinfer1::Dims{4, {1, 1, 1, 1}});
assert(slice != nullptr);
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
slice->setName(sliceLayerName.c_str());

View File

@@ -18,14 +18,14 @@ upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1:
int stride = std::stoi(block.at("stride"));
float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
float scale[4] = {1, 1, static_cast<float>(stride), static_cast<float>(stride)};
nvinfer1::IResizeLayer* resize = network->addResize(*input);
assert(resize != nullptr);
std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
resize->setName(resizeLayerName.c_str());
resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
resize->setScales(scale, 3);
resize->setScales(scale, 4);
output = resize->getOutput(0);
return output;

View File

@@ -35,39 +35,56 @@
static bool
getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* initParams)
{
std::string yoloCfg = initParams->customNetworkConfigFilePath;
std::string yoloType;
std::string onnxWtsFilePath = initParams->onnxFilePath;
std::string darknetWtsFilePath = initParams->modelFilePath;
std::string darknetCfgFilePath = initParams->customNetworkConfigFilePath;
std::transform(yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
std::string yoloType = onnxWtsFilePath != "" ? "onnx" : "darknet";
std::string modelName = yoloType == "onnx" ?
onnxWtsFilePath.substr(0, onnxWtsFilePath.find(".onnx")).substr(onnxWtsFilePath.rfind("/") + 1) :
darknetWtsFilePath.substr(0, darknetWtsFilePath.find(".weights")).substr(darknetWtsFilePath.rfind("/") + 1);
std::transform(modelName.begin(), modelName.end(), modelName.begin(), [] (uint8_t c) {
return std::tolower(c);
});
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
networkInfo.inputBlobName = "input";
networkInfo.networkType = yoloType;
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
networkInfo.wtsFilePath = initParams->modelFilePath;
networkInfo.modelName = modelName;
networkInfo.onnxWtsFilePath = onnxWtsFilePath;
networkInfo.darknetWtsFilePath = darknetWtsFilePath;
networkInfo.darknetCfgFilePath = darknetCfgFilePath;
networkInfo.batchSize = initParams->maxBatchSize;
networkInfo.implicitBatch = initParams->forceImplicitBatchDimension;
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
networkInfo.deviceType = initParams->useDLA ? "kDLA" : "kGPU";
networkInfo.numDetectedClasses = initParams->numDetectedClasses;
networkInfo.clusterMode = initParams->clusterMode;
networkInfo.scaleFactor = initParams->networkScaleFactor;
networkInfo.offsets = initParams->offsets;
if (initParams->networkMode == 0)
if (initParams->networkMode == NvDsInferNetworkMode_FP32)
networkInfo.networkMode = "FP32";
else if (initParams->networkMode == 1)
else if (initParams->networkMode == NvDsInferNetworkMode_INT8)
networkInfo.networkMode = "INT8";
else if (initParams->networkMode == 2)
else if (initParams->networkMode == NvDsInferNetworkMode_FP16)
networkInfo.networkMode = "FP16";
if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty()) {
std::cerr << "YOLO config file or weights file is not specified\n" << std::endl;
return false;
if (yoloType == "onnx") {
if (!fileExists(networkInfo.onnxWtsFilePath)) {
std::cerr << "ONNX model file does not exist\n" << std::endl;
return false;
}
}
if (!fileExists(networkInfo.configFilePath) || !fileExists(networkInfo.wtsFilePath)) {
std::cerr << "YOLO config file or weights file is not exist\n" << std::endl;
return false;
else {
if (!fileExists(networkInfo.darknetWtsFilePath)) {
std::cerr << "Darknet weights file does not exist\n" << std::endl;
return false;
}
else if (!fileExists(networkInfo.darknetCfgFilePath)) {
std::cerr << "Darknet cfg file does not exist\n" << std::endl;
return false;
}
}
return true;
@@ -99,7 +116,7 @@ NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilder
Yolo yolo(networkInfo);
cudaEngine = yolo.createEngine(builder, builderConfig);
if (cudaEngine == nullptr) {
std::cerr << "Failed to build CUDA engine on " << networkInfo.configFilePath << std::endl;
std::cerr << "Failed to build CUDA engine" << std::endl;
return false;
}

View File

@@ -26,10 +26,10 @@
#include "nvdsinfer_custom_impl.h"
bool
NvDsInferInitializeInputLayers(std::vector<NvDsInferLayerInfo> const &inputLayersInfo,
NvDsInferNetworkInfo const &networkInfo, unsigned int maxBatchSize)
NvDsInferInitializeInputLayers(std::vector<NvDsInferLayerInfo> const& inputLayersInfo,
NvDsInferNetworkInfo const& networkInfo, unsigned int maxBatchSize)
{
float *scaleFactor = (float *) inputLayersInfo[0].buffer;
float* scaleFactor = (float*) inputLayersInfo[0].buffer;
for (unsigned int i = 0; i < maxBatchSize; i++) {
scaleFactor[i * 2 + 0] = 1.0;
scaleFactor[i * 2 + 1] = 1.0;

View File

@@ -73,22 +73,22 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
}
static std::vector<NvDsInferParseObjectInfo>
decodeTensorYolo(const float* detection, const uint& outputSize, const uint& netW, const uint& netH,
const std::vector<float>& preclusterThreshold)
decodeTensorYolo(const float* boxes, const float* scores, const int* classes, const uint& outputSize, const uint& netW,
const uint& netH, const std::vector<float>& preclusterThreshold)
{
std::vector<NvDsInferParseObjectInfo> binfo;
for (uint b = 0; b < outputSize; ++b) {
float maxProb = detection[b * 6 + 4];
int maxIndex = (int) detection[b * 6 + 5];
float maxProb = scores[b];
int maxIndex = classes[b];
if (maxProb < preclusterThreshold[maxIndex])
continue;
float bxc = detection[b * 6 + 0];
float byc = detection[b * 6 + 1];
float bw = detection[b * 6 + 2];
float bh = detection[b * 6 + 3];
float bxc = boxes[b * 4 + 0];
float byc = boxes[b * 4 + 1];
float bw = boxes[b * 4 + 2];
float bh = boxes[b * 4 + 3];
float bx1 = bxc - bw / 2;
float by1 = byc - bh / 2;
@@ -102,22 +102,22 @@ decodeTensorYolo(const float* detection, const uint& outputSize, const uint& net
}
static std::vector<NvDsInferParseObjectInfo>
decodeTensorYoloE(const float* detection, const uint& outputSize, const uint& netW, const uint& netH,
const std::vector<float>& preclusterThreshold)
decodeTensorYoloE(const float* boxes, const float* scores, const int* classes, const uint& outputSize, const uint& netW,
const uint& netH, const std::vector<float>& preclusterThreshold)
{
std::vector<NvDsInferParseObjectInfo> binfo;
for (uint b = 0; b < outputSize; ++b) {
float maxProb = detection[b * 6 + 4];
int maxIndex = (int) detection[b * 6 + 5];
float maxProb = scores[b];
int maxIndex = classes[b];
if (maxProb < preclusterThreshold[maxIndex])
continue;
float bx1 = detection[b * 6 + 0];
float by1 = detection[b * 6 + 1];
float bx2 = detection[b * 6 + 2];
float by2 = detection[b * 6 + 3];
float bx1 = boxes[b * 4 + 0];
float by1 = boxes[b * 4 + 1];
float bx2 = boxes[b * 4 + 2];
float by2 = boxes[b * 4 + 3];
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
}
@@ -136,12 +136,27 @@ NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
NvDsInferLayerInfo* boxes;
NvDsInferLayerInfo* scores;
NvDsInferLayerInfo* classes;
const uint outputSize = layer.inferDims.d[0];
for (uint i = 0; i < 3; ++i) {
if (outputLayersInfo[i].dataType == NvDsInferDataType::INT32) {
classes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else if (outputLayersInfo[i].inferDims.d[1] == 4) {
boxes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else {
scores = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
}
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (layer.buffer), outputSize,
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
const uint outputSize = boxes->inferDims.d[0];
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (boxes->buffer),
(const float*) (scores->buffer), (const int*) (classes->buffer), outputSize, networkInfo.width, networkInfo.height,
detectionParams.perClassPreclusterThreshold);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
@@ -161,12 +176,27 @@ NvDsInferParseCustomYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInf
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
NvDsInferLayerInfo* boxes;
NvDsInferLayerInfo* scores;
NvDsInferLayerInfo* classes;
const uint outputSize = layer.inferDims.d[0];
for (uint i = 0; i < 3; ++i) {
if (outputLayersInfo[i].dataType == NvDsInferDataType::INT32) {
classes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else if (outputLayersInfo[i].inferDims.d[1] == 4) {
boxes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else {
scores = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
}
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (layer.buffer), outputSize,
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
const uint outputSize = boxes->inferDims.d[0];
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (boxes->buffer),
(const float*) (scores->buffer), (const int*) (classes->buffer), outputSize, networkInfo.width, networkInfo.height,
detectionParams.perClassPreclusterThreshold);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());

View File

@@ -30,33 +30,33 @@
#include "nvdsinfer_custom_impl.h"
extern "C" bool
NvDsInferParseYolo_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool
NvDsInferParseYoloE_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
__global__ void decodeTensorYolo_cuda(NvDsInferParseObjectInfo *binfo, float* input, int outputSize, int netW, int netH,
float minPreclusterThreshold)
__global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, int* classes,
int outputSize, int netW, int netH, float minPreclusterThreshold)
{
int x_id = blockIdx.x * blockDim.x + threadIdx.x;
if (x_id >= outputSize)
return;
float maxProb = input[x_id * 6 + 4];
int maxIndex = (int) input[x_id * 6 + 5];
float maxProb = scores[x_id];
int maxIndex = classes[x_id];
if (maxProb < minPreclusterThreshold) {
binfo[x_id].detectionConfidence = 0.0;
return;
}
float bxc = input[x_id * 6 + 0];
float byc = input[x_id * 6 + 1];
float bw = input[x_id * 6 + 2];
float bh = input[x_id * 6 + 3];
float bxc = boxes[x_id * 4 + 0];
float byc = boxes[x_id * 4 + 1];
float bw = boxes[x_id * 4 + 2];
float bh = boxes[x_id * 4 + 3];
float x0 = bxc - bw / 2;
float y0 = byc - bh / 2;
@@ -76,26 +76,26 @@ __global__ void decodeTensorYolo_cuda(NvDsInferParseObjectInfo *binfo, float* in
binfo[x_id].classId = maxIndex;
}
__global__ void decodeTensorYoloE_cuda(NvDsInferParseObjectInfo *binfo, float* input, int outputSize, int netW, int netH,
float minPreclusterThreshold)
__global__ void decodeTensorYoloECuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, int* classes,
int outputSize, int netW, int netH, float minPreclusterThreshold)
{
int x_id = blockIdx.x * blockDim.x + threadIdx.x;
if (x_id >= outputSize)
return;
float maxProb = input[x_id * 6 + 4];
int maxIndex = (int) input[x_id * 6 + 5];
float maxProb = scores[x_id];
int maxIndex = classes[x_id];
if (maxProb < minPreclusterThreshold) {
binfo[x_id].detectionConfidence = 0.0;
return;
}
float x0 = input[x_id * 6 + 0];
float y0 = input[x_id * 6 + 1];
float x1 = input[x_id * 6 + 2];
float y1 = input[x_id * 6 + 3];
float x0 = boxes[x_id * 4 + 0];
float y0 = boxes[x_id * 4 + 1];
float x1 = boxes[x_id * 4 + 2];
float y1 = boxes[x_id * 4 + 3];
x0 = fminf(float(netW), fmaxf(float(0.0), x0));
y0 = fminf(float(netH), fmaxf(float(0.0), y0));
@@ -110,7 +110,7 @@ __global__ void decodeTensorYoloE_cuda(NvDsInferParseObjectInfo *binfo, float* i
binfo[x_id].classId = maxIndex;
}
static bool NvDsInferParseCustomYolo_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
static bool NvDsInferParseCustomYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
@@ -119,9 +119,23 @@ static bool NvDsInferParseCustomYolo_cuda(std::vector<NvDsInferLayerInfo> const&
return false;
}
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
NvDsInferLayerInfo* boxes;
NvDsInferLayerInfo* scores;
NvDsInferLayerInfo* classes;
const int outputSize = layer.inferDims.d[0];
for (uint i = 0; i < 3; ++i) {
if (outputLayersInfo[i].dataType == NvDsInferDataType::INT32) {
classes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else if (outputLayersInfo[i].inferDims.d[1] == 4) {
boxes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else {
scores = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
}
const int outputSize = boxes->inferDims.d[0];
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
@@ -131,9 +145,9 @@ static bool NvDsInferParseCustomYolo_cuda(std::vector<NvDsInferLayerInfo> const&
int threads_per_block = 1024;
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
decodeTensorYolo_cuda<<<number_of_blocks, threads_per_block>>>(
thrust::raw_pointer_cast(objects.data()), (float*) layer.buffer, outputSize, networkInfo.width, networkInfo.height,
minPreclusterThreshold);
decodeTensorYoloCuda<<<number_of_blocks, threads_per_block>>>(
thrust::raw_pointer_cast(objects.data()), (float*) (boxes->buffer), (float*) (scores->buffer),
(int*) (classes->buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold);
objectList.resize(outputSize);
thrust::copy(objects.begin(), objects.end(), objectList.begin());
@@ -141,7 +155,7 @@ static bool NvDsInferParseCustomYolo_cuda(std::vector<NvDsInferLayerInfo> const&
return true;
}
static bool NvDsInferParseCustomYoloE_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
static bool NvDsInferParseCustomYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
@@ -150,9 +164,23 @@ static bool NvDsInferParseCustomYoloE_cuda(std::vector<NvDsInferLayerInfo> const
return false;
}
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
NvDsInferLayerInfo* boxes;
NvDsInferLayerInfo* scores;
NvDsInferLayerInfo* classes;
const int outputSize = layer.inferDims.d[0];
for (uint i = 0; i < 3; ++i) {
if (outputLayersInfo[i].dataType == NvDsInferDataType::INT32) {
classes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else if (outputLayersInfo[i].inferDims.d[1] == 4) {
boxes = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
else {
scores = (NvDsInferLayerInfo*) &outputLayersInfo[i];
}
}
const int outputSize = boxes->inferDims.d[0];
thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
@@ -162,9 +190,9 @@ static bool NvDsInferParseCustomYoloE_cuda(std::vector<NvDsInferLayerInfo> const
int threads_per_block = 1024;
int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1;
decodeTensorYoloE_cuda<<<number_of_blocks, threads_per_block>>>(
thrust::raw_pointer_cast(objects.data()), (float*) layer.buffer, outputSize, networkInfo.width, networkInfo.height,
minPreclusterThreshold);
decodeTensorYoloECuda<<<number_of_blocks, threads_per_block>>>(
thrust::raw_pointer_cast(objects.data()), (float*) (boxes->buffer), (float*) (scores->buffer),
(int*) (classes->buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold);
objectList.resize(outputSize);
thrust::copy(objects.begin(), objects.end(), objectList.begin());
@@ -173,18 +201,18 @@ static bool NvDsInferParseCustomYoloE_cuda(std::vector<NvDsInferLayerInfo> const
}
extern "C" bool
NvDsInferParseYolo_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseCustomYolo_cuda(outputLayersInfo, networkInfo, detectionParams, objectList);
return NvDsInferParseCustomYoloCuda(outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool
NvDsInferParseYoloE_cuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseYoloECuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseCustomYoloE_cuda(outputLayersInfo, networkInfo, detectionParams, objectList);
return NvDsInferParseCustomYoloECuda(outputLayersInfo, networkInfo, detectionParams, objectList);
}
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo_cuda);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloE_cuda);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloCuda);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloECuda);

View File

@@ -60,15 +60,16 @@ bool
fileExists(const std::string fileName, bool verbose)
{
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) {
if (verbose)
if (verbose) {
std::cout << "\nFile does not exist: " << fileName << std::endl;
}
return false;
}
return true;
}
std::vector<float>
loadWeights(const std::string weightsFilePath, const std::string& networkType)
loadWeights(const std::string weightsFilePath, const std::string& modelName)
{
assert(fileExists(weightsFilePath));
std::cout << "\nLoading pre-trained weights" << std::endl;
@@ -80,7 +81,7 @@ loadWeights(const std::string weightsFilePath, const std::string& networkType)
assert(file.good());
std::string line;
if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos) {
if (modelName.find("yolov2") != std::string::npos && modelName.find("yolov2-tiny") == std::string::npos) {
// Remove 4 int32 bytes of data from the stream belonging to the header
file.ignore(4 * 4);
}
@@ -94,8 +95,9 @@ loadWeights(const std::string weightsFilePath, const std::string& networkType)
file.read(floatWeight, 4);
assert(file.gcount() == 4);
weights.push_back(*reinterpret_cast<float*>(floatWeight));
if (file.peek() == std::istream::traits_type::eof())
if (file.peek() == std::istream::traits_type::eof()) {
break;
}
}
}
else {
@@ -103,7 +105,7 @@ loadWeights(const std::string weightsFilePath, const std::string& networkType)
assert(0);
}
std::cout << "Loading weights of " << networkType << " complete" << std::endl;
std::cout << "Loading weights of " << modelName << " complete" << std::endl;
std::cout << "Total weights read: " << weights.size() << std::endl;
return weights;
@@ -116,8 +118,9 @@ dimsToString(const nvinfer1::Dims d)
std::stringstream s;
s << "[";
for (int i = 0; i < d.nbDims - 1; ++i)
for (int i = 1; i < d.nbDims - 1; ++i) {
s << d.d[i] << ", ";
}
s << d.d[d.nbDims - 1] << "]";
return s.str();
@@ -127,16 +130,15 @@ int
getNumChannels(nvinfer1::ITensor* t)
{
nvinfer1::Dims d = t->getDimensions();
assert(d.nbDims == 3);
return d.d[0];
assert(d.nbDims == 4);
return d.d[1];
}
void
printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput,
std::string weightPtr)
{
std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName;
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput;
std::cout << std::setw(7) << std::left << layerIndex << std::setw(40) << std::left << layerName;
std::cout << std::setw(19) << std::left << layerInput << std::setw(19) << std::left << layerOutput;
std::cout << weightPtr << std::endl;
}

View File

@@ -40,7 +40,7 @@ float clamp(const float val, const float minVal, const float maxVal);
bool fileExists(const std::string fileName, bool verbose = true);
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& modelName);
std::string dimsToString(const nvinfer1::Dims d);

View File

@@ -23,6 +23,8 @@
* https://www.github.com/marcoslucianops
*/
#include "NvOnnxParser.h"
#include "yolo.h"
#include "yoloPlugins.h"
@@ -31,11 +33,14 @@
#endif
Yolo::Yolo(const NetworkInfo& networkInfo) : m_InputBlobName(networkInfo.inputBlobName),
m_NetworkType(networkInfo.networkType), m_ConfigFilePath(networkInfo.configFilePath),
m_WtsFilePath(networkInfo.wtsFilePath), m_Int8CalibPath(networkInfo.int8CalibPath), m_DeviceType(networkInfo.deviceType),
m_NumDetectedClasses(networkInfo.numDetectedClasses), m_ClusterMode(networkInfo.clusterMode),
m_NetworkMode(networkInfo.networkMode), m_InputH(0), m_InputW(0), m_InputC(0), m_InputSize(0), m_NumClasses(0),
m_LetterBox(0), m_NewCoords(0), m_YoloCount(0)
m_NetworkType(networkInfo.networkType), m_ModelName(networkInfo.modelName),
m_OnnxWtsFilePath(networkInfo.onnxWtsFilePath), m_DarknetWtsFilePath(networkInfo.darknetWtsFilePath),
m_DarknetCfgFilePath(networkInfo.darknetCfgFilePath), m_BatchSize(networkInfo.batchSize),
m_ImplicitBatch(networkInfo.implicitBatch), m_Int8CalibPath(networkInfo.int8CalibPath),
m_DeviceType(networkInfo.deviceType), m_NumDetectedClasses(networkInfo.numDetectedClasses),
m_ClusterMode(networkInfo.clusterMode), m_NetworkMode(networkInfo.networkMode), m_ScaleFactor(networkInfo.scaleFactor),
m_Offsets(networkInfo.offsets), m_InputC(0), m_InputH(0), m_InputW(0), m_InputSize(0), m_NumClasses(0), m_LetterBox(0),
m_NewCoords(0), m_YoloCount(0)
{
}
@@ -47,74 +52,175 @@ Yolo::~Yolo()
nvinfer1::ICudaEngine*
Yolo::createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config)
{
assert (builder);
assert(builder);
m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
parseConfigBlocks();
nvinfer1::NetworkDefinitionCreationFlags flags =
(1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0);
if (parseModel(*network) != NVDSINFER_SUCCESS) {
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(flags);
assert(network);
#ifdef LEGACY
network->destroy();
nvonnxparser::IParser* parser;
if (m_NetworkType == "onnx") {
parser = nvonnxparser::createParser(*network, *builder->getLogger());
if (!parser->parseFromFile(m_OnnxWtsFilePath.c_str(), static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING))) {
std::cerr << "\nCould not parse the ONNX model\n" << std::endl;
#if NV_TENSORRT_MAJOR >= 8
delete parser;
delete network;
#else
delete network;
parser->destroy();
network->destroy();
#endif
return nullptr;
return nullptr;
}
m_InputC = network->getInput(0)->getDimensions().d[1];
m_InputH = network->getInput(0)->getDimensions().d[2];
m_InputW = network->getInput(0)->getDimensions().d[3];
}
else {
m_ConfigBlocks = parseConfigFile(m_DarknetCfgFilePath);
parseConfigBlocks();
if (parseModel(*network) != NVDSINFER_SUCCESS) {
#if NV_TENSORRT_MAJOR >= 8
delete network;
#else
network->destroy();
#endif
return nullptr;
}
}
std::cout << "Building the TensorRT Engine\n" << std::endl;
if (m_NumClasses != m_NumDetectedClasses) {
std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses
<< " in config_infer file\n" << std::endl;
if (!m_ImplicitBatch && network->getInput(0)->getDimensions().d[0] == -1) {
nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
assert(profile);
for (int32_t i = 0; i < network->getNbInputs(); ++i) {
nvinfer1::ITensor* input = network->getInput(i);
nvinfer1::Dims inputDims = input->getDimensions();
nvinfer1::Dims dims = inputDims;
dims.d[0] = 1;
profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, dims);
dims.d[0] = m_BatchSize;
profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, dims);
dims.d[0] = m_BatchSize;
profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, dims);
}
config->addOptimizationProfile(profile);
}
if (m_LetterBox == 1) {
std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file"
<< " to get better accuracy\n" << std::endl;
std::cout << "\nBuilding the TensorRT Engine\n" << std::endl;
if (m_NetworkType == "darknet") {
if (m_NumClasses != m_NumDetectedClasses) {
std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses
<< " in config_infer file\n" << std::endl;
}
if (m_LetterBox == 1) {
std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file"
<< " to get better accuracy\n" << std::endl;
}
}
if (m_ClusterMode != 2) {
std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file\n" << std::endl;
}
if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) {
if (m_NetworkMode == "FP16") {
assert(builder->platformHasFastFp16());
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
else if (m_NetworkMode == "INT8") {
assert(builder->platformHasFastInt8());
#ifdef OPENCV
std::string calib_image_list;
int calib_batch_size;
if (getenv("INT8_CALIB_IMG_PATH"))
calib_image_list = getenv("INT8_CALIB_IMG_PATH");
else {
std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl;
assert(0);
}
if (getenv("INT8_CALIB_BATCH_SIZE"))
calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE"));
else {
std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl;
assert(0);
}
nvinfer1::IInt8EntropyCalibrator2 *calibrator = new Int8EntropyCalibrator2(calib_batch_size, m_InputC, m_InputH,
m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath);
config->setFlag(nvinfer1::BuilderFlag::kINT8);
config->setInt8Calibrator(calibrator);
if (m_Int8CalibPath != "" && !fileExists(m_Int8CalibPath)) {
#ifdef OPENCV
std::string calib_image_list;
int calib_batch_size;
if (getenv("INT8_CALIB_IMG_PATH")) {
calib_image_list = getenv("INT8_CALIB_IMG_PATH");
}
else {
std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl;
assert(0);
}
if (getenv("INT8_CALIB_BATCH_SIZE")) {
calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE"));
}
else {
std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl;
assert(0);
}
nvinfer1::IInt8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(calib_batch_size, m_InputC, m_InputH,
m_InputW, m_ScaleFactor, m_Offsets, calib_image_list, m_Int8CalibPath);
config->setInt8Calibrator(calibrator);
#else
std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl;
assert(0);
std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl;
#if NV_TENSORRT_MAJOR >= 8
if (m_NetworkType == "onnx") {
delete parser;
}
delete network;
#else
if (m_NetworkType == "onnx") {
parser->destroy();
}
network->destroy();
#endif
return nullptr;
#endif
}
}
nvinfer1::ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
if (engine)
std::cout << "Building complete\n" << std::endl;
else
std::cerr << "Building engine failed\n" << std::endl;
#ifdef GRAPH
config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
#endif
#ifdef LEGACY
network->destroy();
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
if (engine) {
std::cout << "Building complete\n" << std::endl;
}
else {
std::cerr << "Building engine failed\n" << std::endl;
}
#ifdef GRAPH
nvinfer1::IExecutionContext *context = engine->createExecutionContext();
nvinfer1::IEngineInspector *inpector = engine->createEngineInspector();
inpector->setExecutionContext(context);
std::ofstream graph;
graph.open("graph.json");
graph << inpector->getEngineInformation(nvinfer1::LayerInformationFormat::kJSON);
graph.close();
std::cout << "Network graph saved to graph.json\n" << std::endl;
#if NV_TENSORRT_MAJOR >= 8
delete inpector;
delete context;
#else
delete network;
inpector->destroy();
context->destroy();
#endif
#endif
#if NV_TENSORRT_MAJOR >= 8
if (m_NetworkType == "onnx") {
delete parser;
}
delete network;
#else
if (m_NetworkType == "onnx") {
parser->destroy();
}
network->destroy();
#endif
return engine;
@@ -124,14 +230,16 @@ NvDsInferStatus
Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
destroyNetworkUtils();
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
std::vector<float> weights = loadWeights(m_DarknetWtsFilePath, m_ModelName);
std::cout << "Building YOLO network\n" << std::endl;
NvDsInferStatus status = buildYoloNetwork(weights, network);
if (status == NVDSINFER_SUCCESS)
if (status == NVDSINFER_SUCCESS) {
std::cout << "Building YOLO network complete" << std::endl;
else
}
else {
std::cerr << "Building YOLO network failed" << std::endl;
}
return status;
}
@@ -141,8 +249,11 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
{
int weightPtr = 0;
uint batchSize = m_ImplicitBatch ? m_BatchSize : -1;
nvinfer1::ITensor* data = network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
nvinfer1::Dims{3, {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}});
nvinfer1::Dims{4, {static_cast<int>(batchSize), static_cast<int>(m_InputC), static_cast<int>(m_InputH),
static_cast<int>(m_InputW)}});
assert(data != nullptr && data->getDimensions().nbDims > 0);
nvinfer1::ITensor* previous = data;
@@ -287,28 +398,13 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
std::string layerName = m_ConfigBlocks.at(i).at("type");
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
}
else if (m_ConfigBlocks.at(i).at("type") == "reorg3d") {
else if (m_ConfigBlocks.at(i).at("type") == "reorg" || m_ConfigBlocks.at(i).at("type") == "reorg3d") {
std::string inputVol = dimsToString(previous->getDimensions());
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(previous);
std::string layerName = "reorg3d";
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
}
else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
assert(reorgPlugin != nullptr);
nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin);
assert(reorg != nullptr);
std::string reorglayerName = "reorg_" + std::to_string(i);
reorg->setName(reorglayerName.c_str());
previous = reorg->getOutput(0);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(previous);
std::string layerName = "reorg";
std::string layerName = m_ConfigBlocks.at(i).at("type");
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
}
else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") {
@@ -317,9 +413,8 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
nvinfer1::Dims prevTensorDims = previous->getDimensions();
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
curYoloTensor.blobName = blobName;
curYoloTensor.gridSizeX = prevTensorDims.d[2];
curYoloTensor.gridSizeY = prevTensorDims.d[1];
curYoloTensor.gridSizeY = prevTensorDims.d[2];
curYoloTensor.gridSizeX = prevTensorDims.d[3];
std::string inputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(previous);
yoloTensorInputs[yoloCountInputs] = previous;
@@ -345,10 +440,10 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
uint64_t outputSize = 0;
for (uint j = 0; j < yoloCountInputs; ++j) {
TensorInfo& curYoloTensor = m_YoloTensors.at(j);
outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
outputSize += curYoloTensor.numBBoxes * curYoloTensor.gridSizeY * curYoloTensor.gridSizeX;
}
nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors,
nvinfer1::IPluginV2DynamicExt* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors,
outputSize);
assert(yoloPlugin != nullptr);
nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin);
@@ -356,10 +451,19 @@ Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition
std::string yoloLayerName = "yolo";
yolo->setName(yoloLayerName.c_str());
nvinfer1::ITensor* outputYolo = yolo->getOutput(0);
std::string outputYoloLayerName = "output";
outputYolo->setName(outputYoloLayerName.c_str());
network.markOutput(*outputYolo);
std::string outputlayerName;
nvinfer1::ITensor* detection_boxes = yolo->getOutput(0);
outputlayerName = "boxes";
detection_boxes->setName(outputlayerName.c_str());
nvinfer1::ITensor* detection_scores = yolo->getOutput(1);
outputlayerName = "scores";
detection_scores->setName(outputlayerName.c_str());
nvinfer1::ITensor* detection_classes = yolo->getOutput(2);
outputlayerName = "classes";
detection_classes->setName(outputlayerName.c_str());
network.markOutput(*detection_boxes);
network.markOutput(*detection_scores);
network.markOutput(*detection_classes);
}
else {
std::cerr << "\nError in yolo cfg file" << std::endl;

View File

@@ -45,13 +45,19 @@ struct NetworkInfo
{
std::string inputBlobName;
std::string networkType;
std::string configFilePath;
std::string wtsFilePath;
std::string modelName;
std::string onnxWtsFilePath;
std::string darknetWtsFilePath;
std::string darknetCfgFilePath;
uint batchSize;
int implicitBatch;
std::string int8CalibPath;
std::string deviceType;
uint numDetectedClasses;
int clusterMode;
std::string networkMode;
float scaleFactor;
const float* offsets;
};
struct TensorInfo
@@ -74,7 +80,8 @@ class Yolo : public IModelParser {
bool hasFullDimsSupported() const override { return false; }
const char* getModelName() const override {
return m_ConfigFilePath.empty() ? m_NetworkType.c_str() : m_ConfigFilePath.c_str();
return m_NetworkType == "onnx" ? m_OnnxWtsFilePath.substr(0, m_OnnxWtsFilePath.find(".onnx")).c_str() :
m_DarknetCfgFilePath.substr(0, m_DarknetCfgFilePath.find(".cfg")).c_str();
}
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
@@ -84,17 +91,23 @@ class Yolo : public IModelParser {
protected:
const std::string m_InputBlobName;
const std::string m_NetworkType;
const std::string m_ConfigFilePath;
const std::string m_WtsFilePath;
const std::string m_ModelName;
const std::string m_OnnxWtsFilePath;
const std::string m_DarknetWtsFilePath;
const std::string m_DarknetCfgFilePath;
const uint m_BatchSize;
const int m_ImplicitBatch;
const std::string m_Int8CalibPath;
const std::string m_DeviceType;
const uint m_NumDetectedClasses;
const int m_ClusterMode;
const std::string m_NetworkMode;
const float m_ScaleFactor;
const float* m_Offsets;
uint m_InputC;
uint m_InputH;
uint m_InputW;
uint m_InputC;
uint64_t m_InputSize;
uint m_NumClasses;
uint m_LetterBox;

View File

@@ -4,13 +4,12 @@
*/
#include <stdint.h>
#include <stdio.h>
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
__global__ void gpuYoloLayer(const float* input, float* output, int* count, const uint netWidth, const uint netHeight,
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY,
const float* anchors, const int* mask)
__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, int* classes, const uint netWidth,
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
@@ -22,8 +21,6 @@ __global__ void gpuYoloLayer(const float* input, float* output, int* count, cons
const int numGridCells = gridSizeX * gridSizeY;
const int bbindex = y_id * gridSizeX + x_id;
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
const float alpha = scaleXY;
const float beta = -0.5 * (scaleXY - 1);
@@ -37,6 +34,8 @@ __global__ void gpuYoloLayer(const float* input, float* output, int* count, cons
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1];
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
float maxProb = 0.0f;
int maxIndex = -1;
@@ -48,25 +47,25 @@ __global__ void gpuYoloLayer(const float* input, float* output, int* count, cons
}
}
int _count = (int)atomicAdd(count, 1);
int count = z_id * gridSizeX * gridSizeY + y_id * gridSizeY + x_id + lastInputSize;
output[_count * 6 + 0] = xc;
output[_count * 6 + 1] = yc;
output[_count * 6 + 2] = w;
output[_count * 6 + 3] = h;
output[_count * 6 + 4] = maxProb * objectness;
output[_count * 6 + 5] = maxIndex;
boxes[count * 4 + 0] = xc;
boxes[count * 4 + 1] = yc;
boxes[count * 4 + 2] = w;
boxes[count * 4 + 3] = h;
scores[count] = maxProb * objectness;
classes[count] = maxIndex;
}
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream);
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream)
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
@@ -75,9 +74,10 @@ cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const ui
for (unsigned int batch = 0; batch < batchSize; ++batch) {
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
reinterpret_cast<const float*> (input) + (batch * inputSize),
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
reinterpret_cast<int*> (count) + (batch),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
}
return cudaGetLastError();

View File

@@ -5,9 +5,9 @@
#include <stdint.h>
__global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, const uint netWidth, const uint netHeight,
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY,
const float* anchors, const int* mask)
__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, int* classes, const uint netWidth,
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
@@ -19,8 +19,6 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, c
const int numGridCells = gridSizeX * gridSizeY;
const int bbindex = y_id * gridSizeX + x_id;
const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
const float alpha = scaleXY;
const float beta = -0.5 * (scaleXY - 1);
@@ -34,6 +32,8 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, c
float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * anchors[mask[z_id] * 2 + 1];
const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
float maxProb = 0.0f;
int maxIndex = -1;
@@ -45,25 +45,25 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, c
}
}
int _count = (int)atomicAdd(count, 1);
int count = z_id * gridSizeX * gridSizeY + y_id * gridSizeY + x_id + lastInputSize;
output[_count * 6 + 0] = xc;
output[_count * 6 + 1] = yc;
output[_count * 6 + 2] = w;
output[_count * 6 + 3] = h;
output[_count * 6 + 4] = maxProb * objectness;
output[_count * 6 + 5] = maxIndex;
boxes[count * 4 + 0] = xc;
boxes[count * 4 + 1] = yc;
boxes[count * 4 + 2] = w;
boxes[count * 4 + 3] = h;
scores[count] = maxProb * objectness;
classes[count] = maxIndex;
}
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream);
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream)
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
@@ -72,9 +72,10 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const
for (unsigned int batch = 0; batch < batchSize; ++batch) {
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
reinterpret_cast<const float*> (input) + (batch * inputSize),
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
reinterpret_cast<int*> (count) + (batch),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
}
return cudaGetLastError();

View File

@@ -27,9 +27,9 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG
}
}
__global__ void gpuRegionLayer(const float* input, float* softmax, float* output, int* count, const uint netWidth,
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
const float* anchors)
__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, int* classes,
const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
const uint numBBoxes, const uint64_t lastInputSize, const float* anchors)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
@@ -41,8 +41,6 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* output
const int numGridCells = gridSizeX * gridSizeY;
const int bbindex = y_id * gridSizeX + x_id;
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX;
float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY;
@@ -53,6 +51,8 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* output
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] * netHeight /
gridSizeY;
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax);
float maxProb = 0.0f;
@@ -66,23 +66,25 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* output
}
}
int _count = (int)atomicAdd(count, 1);
int count = z_id * gridSizeX * gridSizeY + y_id * gridSizeY + x_id + lastInputSize;
output[_count * 6 + 0] = xc;
output[_count * 6 + 1] = yc;
output[_count * 6 + 2] = w;
output[_count * 6 + 3] = h;
output[_count * 6 + 4] = maxProb * objectness;
output[_count * 6 + 5] = maxIndex;
boxes[count * 4 + 0] = xc;
boxes[count * 4 + 1] = yc;
boxes[count * 4 + 2] = w;
boxes[count * 4 + 3] = h;
scores[count] = maxProb * objectness;
classes[count] = maxIndex;
}
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream)
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
const uint& numBBoxes, const void* anchors, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
@@ -92,9 +94,10 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
reinterpret_cast<const float*> (input) + (batch * inputSize),
reinterpret_cast<float*> (softmax) + (batch * inputSize),
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
reinterpret_cast<int*> (count) + (batch),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize,
reinterpret_cast<const float*> (anchors));
}
return cudaGetLastError();

View File

@@ -38,19 +38,20 @@ namespace {
}
}
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream);
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask,
cudaStream_t stream);
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX,
const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize,
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
YoloLayer::YoloLayer(const void* data, size_t length) {
const char* d = static_cast<const char*>(data);
@@ -99,96 +100,10 @@ YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& nu
assert(m_NetHeight > 0);
};
nvinfer1::Dims
YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
nvinfer1::IPluginV2DynamicExt*
YoloLayer::clone() const noexcept
{
assert(index == 0);
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 6}};
}
bool
YoloLayer::supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept {
return (type == nvinfer1::DataType::kFLOAT && format == nvinfer1::PluginFormat::kLINEAR);
}
void
YoloLayer::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims,
int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept
{
assert(nbInputs > 0);
assert(format == nvinfer1::PluginFormat::kLINEAR);
assert(inputDims != nullptr);
}
#ifdef LEGACY
int
YoloLayer::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
#else
int32_t
YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
noexcept
#endif
{
void* output = outputs[0];
CUDA_CHECK(cudaMemsetAsync((float*) output, 0, sizeof(float) * m_OutputSize * 6 * batchSize, stream));
void* count = workspace;
CUDA_CHECK(cudaMemsetAsync((int*) count, 0, sizeof(int) * batchSize, stream));
uint yoloTensorsSize = m_YoloTensors.size();
for (uint i = 0; i < yoloTensorsSize; ++i) {
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
uint numBBoxes = curYoloTensor.numBBoxes;
float scaleXY = curYoloTensor.scaleXY;
uint gridSizeX = curYoloTensor.gridSizeX;
uint gridSizeY = curYoloTensor.gridSizeY;
std::vector<float> anchors = curYoloTensor.anchors;
std::vector<int> mask = curYoloTensor.mask;
void* v_anchors;
void* v_mask;
if (anchors.size() > 0) {
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
}
if (mask.size() > 0) {
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
}
uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
if (mask.size() > 0) {
if (m_NewCoords) {
CUDA_CHECK(cudaYoloLayer_nc(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight,
gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
}
else {
CUDA_CHECK(cudaYoloLayer(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight,
gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
}
}
else {
void* softmax;
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, output, count, batchSize, inputSize, m_OutputSize, m_NetWidth,
m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream));
CUDA_CHECK(cudaFree(softmax));
}
if (anchors.size() > 0) {
CUDA_CHECK(cudaFree(v_anchors));
}
if (mask.size() > 0) {
CUDA_CHECK(cudaFree(v_mask));
}
}
return 0;
return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize);
}
size_t
@@ -250,10 +165,113 @@ YoloLayer::serialize(void* buffer) const noexcept
}
}
nvinfer1::IPluginV2*
YoloLayer::clone() const noexcept
nvinfer1::DimsExprs
YoloLayer::getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims,
nvinfer1::IExprBuilder& exprBuilder)noexcept
{
return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize);
assert(index < 3);
if (index == 0) {
return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast<int>(m_OutputSize)),
exprBuilder.constant(4)}};
}
return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast<int>(m_OutputSize)),
exprBuilder.constant(1)}};
}
bool
YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
{
return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && (inOut[pos].type == nvinfer1::DataType::kFLOAT ||
inOut[pos].type == nvinfer1::DataType::kINT32);
}
nvinfer1::DataType
YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
{
assert(index < 3);
if (index == 2) {
return nvinfer1::DataType::kINT32;
}
return nvinfer1::DataType::kFLOAT;
}
void
YoloLayer::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, INT nbInput,
const nvinfer1::DynamicPluginTensorDesc* out, INT nbOutput) noexcept
{
assert(nbInput > 0);
assert(in->desc.format == nvinfer1::PluginFormat::kLINEAR);
assert(in->desc.dims.d != nullptr);
}
INT
YoloLayer::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept
{
INT batchSize = inputDesc[0].dims.d[0];
void* boxes = outputs[0];
void* scores = outputs[1];
void* classes = outputs[2];
uint64_t lastInputSize = 0;
uint yoloTensorsSize = m_YoloTensors.size();
for (uint i = 0; i < yoloTensorsSize; ++i) {
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
const uint numBBoxes = curYoloTensor.numBBoxes;
const float scaleXY = curYoloTensor.scaleXY;
const uint gridSizeX = curYoloTensor.gridSizeX;
const uint gridSizeY = curYoloTensor.gridSizeY;
const std::vector<float> anchors = curYoloTensor.anchors;
const std::vector<int> mask = curYoloTensor.mask;
void* v_anchors;
void* v_mask;
if (anchors.size() > 0) {
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
}
if (mask.size() > 0) {
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
}
const uint64_t inputSize = (numBBoxes * (4 + 1 + m_NumClasses)) * gridSizeY * gridSizeX;
if (mask.size() > 0) {
if (m_NewCoords) {
CUDA_CHECK(cudaYoloLayer_nc(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize,
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
}
else {
CUDA_CHECK(cudaYoloLayer(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize,
m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
}
}
else {
void* softmax;
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, boxes, scores, classes, batchSize, inputSize, m_OutputSize,
lastInputSize, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream));
CUDA_CHECK(cudaFree(softmax));
}
if (anchors.size() > 0) {
CUDA_CHECK(cudaFree(v_anchors));
}
if (mask.size() > 0) {
CUDA_CHECK(cudaFree(v_mask));
}
lastInputSize += numBBoxes * gridSizeY * gridSizeX;
}
return 0;
}
REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);

View File

@@ -38,57 +38,68 @@
} \
}
#if NV_TENSORRT_MAJOR >= 8
#define INT int32_t
#else
#define INT int
#endif
namespace {
const char* YOLOLAYER_PLUGIN_VERSION {"1"};
const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
} // namespace
class YoloLayer : public nvinfer1::IPluginV2 {
class YoloLayer : public nvinfer1::IPluginV2DynamicExt {
public:
YoloLayer(const void* data, size_t length);
YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize);
const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
int getNbOutputs() const noexcept override { return 1; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept override;
bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept override;
void configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs,
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept override;
nvinfer1::IPluginV2DynamicExt* clone() const noexcept override;
int initialize() noexcept override { return 0; }
void terminate() noexcept override {}
size_t getWorkspaceSize(int maxBatchSize) const noexcept override {
return maxBatchSize * sizeof(int);
}
#ifdef LEGACY
int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override;
#else
int32_t enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
noexcept override;
#endif
void destroy() noexcept override { delete this; }
size_t getSerializationSize() const noexcept override;
void serialize(void* buffer) const noexcept override;
void destroy() noexcept override { delete this; }
int getNbOutputs() const noexcept override { return 3; }
nvinfer1::IPluginV2* clone() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims,
nvinfer1::IExprBuilder& exprBuilder) noexcept override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, INT nbInputs,
const nvinfer1::PluginTensorDesc* outputs, INT nbOutputs) const noexcept override { return 0; }
bool supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
override;
const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
void setPluginNamespace(const char* pluginNamespace) noexcept override { m_Namespace = pluginNamespace; }
virtual const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); }
const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); }
nvinfer1::DataType getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
override;
void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator)
noexcept override {}
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, INT nbInput,
const nvinfer1::DynamicPluginTensorDesc* out, INT nbOutput) noexcept override;
void detachFromContext() noexcept override {}
INT enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override;
private:
std::string m_Namespace {""};
@@ -115,12 +126,14 @@ class YoloLayerPluginCreator : public nvinfer1::IPluginCreator {
return nullptr;
}
nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override {
nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept
override {
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
return nullptr;
}
nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override {
nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept
override {
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
return new YoloLayer(serialData, serialLength);
}