Add YOLOv8 support
This commit is contained in:
@@ -4,139 +4,130 @@
|
||||
*/
|
||||
|
||||
#include "calibrator.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
|
||||
namespace nvinfer1
|
||||
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchsize, const int& channels, const int& height,
|
||||
const int& width, const int& letterbox, const std::string& imgPath,
|
||||
const std::string& calibTablePath) : batchSize(batchsize), inputC(channels), inputH(height), inputW(width),
|
||||
letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
|
||||
{
|
||||
Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath,
|
||||
const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
|
||||
{
|
||||
inputCount = batchsize * channels * height * width;
|
||||
std::fstream f(imgPath);
|
||||
if (f.is_open())
|
||||
{
|
||||
std::string temp;
|
||||
while (std::getline(f, temp)) imgPaths.push_back(temp);
|
||||
}
|
||||
batchData = new float[inputCount];
|
||||
CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
|
||||
}
|
||||
|
||||
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
|
||||
{
|
||||
CUDA_CHECK(cudaFree(deviceInput));
|
||||
if (batchData)
|
||||
delete[] batchData;
|
||||
}
|
||||
|
||||
int Int8EntropyCalibrator2::getBatchSize() const noexcept
|
||||
{
|
||||
return batchSize;
|
||||
}
|
||||
|
||||
bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int nbBindings) noexcept
|
||||
{
|
||||
if (imageIndex + batchSize > uint(imgPaths.size()))
|
||||
return false;
|
||||
|
||||
float* ptr = batchData;
|
||||
for (size_t j = imageIndex; j < imageIndex + batchSize; ++j)
|
||||
{
|
||||
cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR);
|
||||
std::vector<float>inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
|
||||
|
||||
int len = (int)(inputData.size());
|
||||
memcpy(ptr, inputData.data(), len * sizeof(float));
|
||||
|
||||
ptr += inputData.size();
|
||||
std::cout << "Load image: " << imgPaths[j] << std::endl;
|
||||
std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl;
|
||||
}
|
||||
imageIndex += batchSize;
|
||||
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
|
||||
bindings[0] = deviceInput;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void* Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept
|
||||
{
|
||||
calibrationCache.clear();
|
||||
std::ifstream input(calibTablePath, std::ios::binary);
|
||||
input >> std::noskipws;
|
||||
if (readCache && input.good())
|
||||
{
|
||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
|
||||
std::back_inserter(calibrationCache));
|
||||
}
|
||||
length = calibrationCache.size();
|
||||
return length ? calibrationCache.data() : nullptr;
|
||||
}
|
||||
|
||||
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept
|
||||
{
|
||||
std::ofstream output(calibTablePath, std::ios::binary);
|
||||
output.write(reinterpret_cast<const char*>(cache), length);
|
||||
}
|
||||
inputCount = batchsize * channels * height * width;
|
||||
std::fstream f(imgPath);
|
||||
if (f.is_open()) {
|
||||
std::string temp;
|
||||
while (std::getline(f, temp))
|
||||
imgPaths.push_back(temp);
|
||||
}
|
||||
batchData = new float[inputCount];
|
||||
CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
|
||||
}
|
||||
|
||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
|
||||
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
|
||||
{
|
||||
cv::Mat out;
|
||||
int image_w = img.cols;
|
||||
int image_h = img.rows;
|
||||
if (image_w != input_w || image_h != input_h)
|
||||
{
|
||||
if (letter_box == 1)
|
||||
{
|
||||
float ratio_w = (float)image_w / (float)input_w;
|
||||
float ratio_h = (float)image_h / (float)input_h;
|
||||
if (ratio_w > ratio_h)
|
||||
{
|
||||
int new_width = input_w * ratio_h;
|
||||
int x = (image_w - new_width) / 2;
|
||||
cv::Rect roi(abs(x), 0, new_width, image_h);
|
||||
out = img(roi);
|
||||
}
|
||||
else if (ratio_w < ratio_h)
|
||||
{
|
||||
int new_height = input_h * ratio_w;
|
||||
int y = (image_h - new_height) / 2;
|
||||
cv::Rect roi(0, abs(y), image_w, new_height);
|
||||
out = img(roi);
|
||||
}
|
||||
else {
|
||||
out = img;
|
||||
}
|
||||
cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
||||
}
|
||||
if (input_c == 3)
|
||||
{
|
||||
out.convertTo(out, CV_32FC3, 1.0 / 255.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.convertTo(out, CV_32FC1, 1.0 / 255.0);
|
||||
}
|
||||
std::vector<cv::Mat> input_channels(input_c);
|
||||
cv::split(out, input_channels);
|
||||
std::vector<float> result(input_h * input_w * input_c);
|
||||
auto data = result.data();
|
||||
int channelLength = input_h * input_w;
|
||||
for (int i = 0; i < input_c; ++i)
|
||||
{
|
||||
memcpy(data, input_channels[i].data, channelLength * sizeof(float));
|
||||
data += channelLength;
|
||||
}
|
||||
return result;
|
||||
CUDA_CHECK(cudaFree(deviceInput));
|
||||
if (batchData)
|
||||
delete[] batchData;
|
||||
}
|
||||
|
||||
int
|
||||
Int8EntropyCalibrator2::getBatchSize() const noexcept
|
||||
{
|
||||
return batchSize;
|
||||
}
|
||||
|
||||
bool
|
||||
Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBindings) noexcept
|
||||
{
|
||||
if (imageIndex + batchSize > uint(imgPaths.size()))
|
||||
return false;
|
||||
|
||||
float* ptr = batchData;
|
||||
for (size_t i = imageIndex; i < imageIndex + batchSize; ++i) {
|
||||
cv::Mat img = cv::imread(imgPaths[i], cv::IMREAD_COLOR);
|
||||
std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
|
||||
|
||||
int len = (int) (inputData.size());
|
||||
memcpy(ptr, inputData.data(), len * sizeof(float));
|
||||
|
||||
ptr += inputData.size();
|
||||
std::cout << "Load image: " << imgPaths[i] << std::endl;
|
||||
std::cout << "Progress: " << (i + 1)*100. / imgPaths.size() << "%" << std::endl;
|
||||
}
|
||||
imageIndex += batchSize;
|
||||
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
|
||||
bindings[0] = deviceInput;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void*
|
||||
Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept
|
||||
{
|
||||
calibrationCache.clear();
|
||||
std::ifstream input(calibTablePath, std::ios::binary);
|
||||
input >> std::noskipws;
|
||||
if (readCache && input.good())
|
||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calibrationCache));
|
||||
length = calibrationCache.size();
|
||||
return length ? calibrationCache.data() : nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept
|
||||
{
|
||||
std::ofstream output(calibTablePath, std::ios::binary);
|
||||
output.write(reinterpret_cast<const char*>(cache), length);
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
|
||||
{
|
||||
cv::Mat out;
|
||||
int image_w = img.cols;
|
||||
int image_h = img.rows;
|
||||
if (image_w != input_w || image_h != input_h) {
|
||||
if (letter_box == 1) {
|
||||
float ratio_w = (float) image_w / (float) input_w;
|
||||
float ratio_h = (float) image_h / (float) input_h;
|
||||
if (ratio_w > ratio_h) {
|
||||
int new_width = input_w * ratio_h;
|
||||
int x = (image_w - new_width) / 2;
|
||||
cv::Rect roi(abs(x), 0, new_width, image_h);
|
||||
out = img(roi);
|
||||
}
|
||||
else if (ratio_w < ratio_h) {
|
||||
int new_height = input_h * ratio_w;
|
||||
int y = (image_h - new_height) / 2;
|
||||
cv::Rect roi(0, abs(y), image_w, new_height);
|
||||
out = img(roi);
|
||||
}
|
||||
else
|
||||
out = img;
|
||||
cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
else {
|
||||
cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
||||
}
|
||||
cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
|
||||
}
|
||||
else
|
||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
||||
|
||||
if (input_c == 3)
|
||||
out.convertTo(out, CV_32FC3, 1.0 / 255.0);
|
||||
else
|
||||
out.convertTo(out, CV_32FC1, 1.0 / 255.0);
|
||||
|
||||
std::vector<cv::Mat> input_channels(input_c);
|
||||
cv::split(out, input_channels);
|
||||
std::vector<float> result(input_h * input_w * input_c);
|
||||
auto data = result.data();
|
||||
int channelLength = input_h * input_w;
|
||||
for (int i = 0; i < input_c; ++i) {
|
||||
memcpy(data, input_channels[i].data, channelLength * sizeof(float));
|
||||
data += channelLength;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -6,57 +6,50 @@
|
||||
#ifndef CALIBRATOR_H
|
||||
#define CALIBRATOR_H
|
||||
|
||||
#include "opencv2/opencv.hpp"
|
||||
#include "cuda_runtime.h"
|
||||
#include "NvInfer.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#ifndef CUDA_CHECK
|
||||
#define CUDA_CHECK(callstr) \
|
||||
{ \
|
||||
cudaError_t error_code = callstr; \
|
||||
if (error_code != cudaSuccess) { \
|
||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
|
||||
assert(0); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
#include "NvInfer.h"
|
||||
#include "opencv2/opencv.hpp"
|
||||
|
||||
namespace nvinfer1 {
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
Int8EntropyCalibrator2(const int &batchsize,
|
||||
const int &channels,
|
||||
const int &height,
|
||||
const int &width,
|
||||
const int &letterbox,
|
||||
const std::string &imgPath,
|
||||
const std::string &calibTablePath);
|
||||
|
||||
virtual ~Int8EntropyCalibrator2();
|
||||
int getBatchSize() const noexcept override;
|
||||
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
|
||||
const void* readCalibrationCache(std::size_t& length) noexcept override;
|
||||
void writeCalibrationCache(const void* cache, size_t length) noexcept override;
|
||||
|
||||
private:
|
||||
int batchSize;
|
||||
int inputC;
|
||||
int inputH;
|
||||
int inputW;
|
||||
int letterBox;
|
||||
std::string calibTablePath;
|
||||
size_t imageIndex;
|
||||
size_t inputCount;
|
||||
std::vector<std::string> imgPaths;
|
||||
float *batchData{ nullptr };
|
||||
void *deviceInput{ nullptr };
|
||||
bool readCache;
|
||||
std::vector<char> calibrationCache;
|
||||
};
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \
|
||||
std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
Int8EntropyCalibrator2(const int& batchsize, const int& channels, const int& height, const int& width,
|
||||
const int& letterbox, const std::string& imgPath, const std::string& calibTablePath);
|
||||
|
||||
virtual ~Int8EntropyCalibrator2();
|
||||
|
||||
int getBatchSize() const noexcept override;
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
|
||||
|
||||
const void* readCalibrationCache(std::size_t& length) noexcept override;
|
||||
|
||||
void writeCalibrationCache(const void* cache, size_t length) noexcept override;
|
||||
|
||||
private:
|
||||
int batchSize;
|
||||
int inputC;
|
||||
int inputH;
|
||||
int inputW;
|
||||
int letterBox;
|
||||
std::string calibTablePath;
|
||||
size_t imageIndex;
|
||||
size_t inputCount;
|
||||
std::vector<std::string> imgPaths;
|
||||
float* batchData {nullptr};
|
||||
void* deviceInput {nullptr};
|
||||
bool readCache;
|
||||
std::vector<char> calibrationCache;
|
||||
};
|
||||
|
||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box);
|
||||
|
||||
#endif //CALIBRATOR_H
|
||||
|
||||
@@ -5,118 +5,107 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* activationLayer(
|
||||
int layerIdx,
|
||||
std::string activation,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
if (activation == "linear")
|
||||
{
|
||||
output = input;
|
||||
}
|
||||
else if (activation == "relu")
|
||||
{
|
||||
nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU);
|
||||
assert(relu != nullptr);
|
||||
std::string reluLayerName = "relu_" + std::to_string(layerIdx);
|
||||
relu->setName(reluLayerName.c_str());
|
||||
output = relu->getOutput(0);
|
||||
}
|
||||
else if (activation == "sigmoid" || activation == "logistic")
|
||||
{
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
output = sigmoid->getOutput(0);
|
||||
}
|
||||
else if (activation == "tanh")
|
||||
{
|
||||
nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH);
|
||||
assert(tanh != nullptr);
|
||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
||||
tanh->setName(tanhLayerName.c_str());
|
||||
output = tanh->getOutput(0);
|
||||
}
|
||||
else if (activation == "leaky")
|
||||
{
|
||||
nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
||||
assert(leaky != nullptr);
|
||||
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
|
||||
leaky->setName(leakyLayerName.c_str());
|
||||
leaky->setAlpha(0.1);
|
||||
output = leaky->getOutput(0);
|
||||
}
|
||||
else if (activation == "softplus")
|
||||
{
|
||||
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||
assert(softplus != nullptr);
|
||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
||||
softplus->setName(softplusLayerName.c_str());
|
||||
output = softplus->getOutput(0);
|
||||
}
|
||||
else if (activation == "mish")
|
||||
{
|
||||
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||
assert(softplus != nullptr);
|
||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
||||
softplus->setName(softplusLayerName.c_str());
|
||||
nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
||||
assert(tanh != nullptr);
|
||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
||||
tanh->setName(tanhLayerName.c_str());
|
||||
nvinfer1::IElementWiseLayer* mish
|
||||
= network->addElementWise(*input, *tanh->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(mish != nullptr);
|
||||
std::string mishLayerName = "mish_" + std::to_string(layerIdx);
|
||||
mish->setName(mishLayerName.c_str());
|
||||
output = mish->getOutput(0);
|
||||
}
|
||||
else if (activation == "silu" || activation == "swish")
|
||||
{
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
nvinfer1::IElementWiseLayer* silu
|
||||
= network->addElementWise(*input, *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(silu != nullptr);
|
||||
std::string siluLayerName = "silu_" + std::to_string(layerIdx);
|
||||
silu->setName(siluLayerName.c_str());
|
||||
output = silu->getOutput(0);
|
||||
}
|
||||
else if (activation == "hardsigmoid")
|
||||
{
|
||||
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||
assert(hardsigmoid != nullptr);
|
||||
std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
|
||||
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||
hardsigmoid->setBeta(0.5);
|
||||
output = hardsigmoid->getOutput(0);
|
||||
}
|
||||
else if (activation == "hardswish")
|
||||
{
|
||||
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||
assert(hardsigmoid != nullptr);
|
||||
std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
|
||||
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||
hardsigmoid->setBeta(0.5);
|
||||
nvinfer1::IElementWiseLayer* hardswish
|
||||
= network->addElementWise(*input, *hardsigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(hardswish != nullptr);
|
||||
std::string hardswishLayerName = "hardswish_" + std::to_string(layerIdx);
|
||||
hardswish->setName(hardswishLayerName.c_str());
|
||||
output = hardswish->getOutput(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Activation not supported: " << activation << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
return output;
|
||||
nvinfer1::ITensor*
|
||||
activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network,
|
||||
std::string layerName)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
if (activation == "linear")
|
||||
output = input;
|
||||
else if (activation == "relu") {
|
||||
nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU);
|
||||
assert(relu != nullptr);
|
||||
std::string reluLayerName = "relu_" + layerName + std::to_string(layerIdx);
|
||||
relu->setName(reluLayerName.c_str());
|
||||
output = relu->getOutput(0);
|
||||
}
|
||||
else if (activation == "sigmoid" || activation == "logistic") {
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_" + layerName + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
output = sigmoid->getOutput(0);
|
||||
}
|
||||
else if (activation == "tanh") {
|
||||
nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH);
|
||||
assert(tanh != nullptr);
|
||||
std::string tanhLayerName = "tanh_" + layerName + std::to_string(layerIdx);
|
||||
tanh->setName(tanhLayerName.c_str());
|
||||
output = tanh->getOutput(0);
|
||||
}
|
||||
else if (activation == "leaky") {
|
||||
nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
||||
assert(leaky != nullptr);
|
||||
std::string leakyLayerName = "leaky_" + layerName + std::to_string(layerIdx);
|
||||
leaky->setName(leakyLayerName.c_str());
|
||||
leaky->setAlpha(0.1);
|
||||
output = leaky->getOutput(0);
|
||||
}
|
||||
else if (activation == "softplus") {
|
||||
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||
assert(softplus != nullptr);
|
||||
std::string softplusLayerName = "softplus_" + layerName + std::to_string(layerIdx);
|
||||
softplus->setName(softplusLayerName.c_str());
|
||||
output = softplus->getOutput(0);
|
||||
}
|
||||
else if (activation == "mish") {
|
||||
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||
assert(softplus != nullptr);
|
||||
std::string softplusLayerName = "softplus_" + layerName + std::to_string(layerIdx);
|
||||
softplus->setName(softplusLayerName.c_str());
|
||||
nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
||||
assert(tanh != nullptr);
|
||||
std::string tanhLayerName = "tanh_" + layerName + std::to_string(layerIdx);
|
||||
tanh->setName(tanhLayerName.c_str());
|
||||
nvinfer1::IElementWiseLayer* mish = network->addElementWise(*input, *tanh->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(mish != nullptr);
|
||||
std::string mishLayerName = "mish_" + layerName + std::to_string(layerIdx);
|
||||
mish->setName(mishLayerName.c_str());
|
||||
output = mish->getOutput(0);
|
||||
}
|
||||
else if (activation == "silu" || activation == "swish") {
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_" + layerName + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
nvinfer1::IElementWiseLayer* silu = network->addElementWise(*input, *sigmoid->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(silu != nullptr);
|
||||
std::string siluLayerName = "silu_" + layerName + std::to_string(layerIdx);
|
||||
silu->setName(siluLayerName.c_str());
|
||||
output = silu->getOutput(0);
|
||||
}
|
||||
else if (activation == "hardsigmoid") {
|
||||
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||
assert(hardsigmoid != nullptr);
|
||||
std::string hardsigmoidLayerName = "hardsigmoid_" + layerName + std::to_string(layerIdx);
|
||||
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||
hardsigmoid->setBeta(0.5);
|
||||
output = hardsigmoid->getOutput(0);
|
||||
}
|
||||
else if (activation == "hardswish") {
|
||||
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||
assert(hardsigmoid != nullptr);
|
||||
std::string hardsigmoidLayerName = "hardsigmoid_" + layerName + std::to_string(layerIdx);
|
||||
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||
hardsigmoid->setBeta(0.5);
|
||||
nvinfer1::IElementWiseLayer* hardswish = network->addElementWise(*input, *hardsigmoid->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(hardswish != nullptr);
|
||||
std::string hardswishLayerName = "hardswish_" + layerName + std::to_string(layerIdx);
|
||||
hardswish->setName(hardswishLayerName.c_str());
|
||||
output = hardswish->getOutput(0);
|
||||
}
|
||||
else {
|
||||
std::cerr << "Activation not supported: " << activation << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -6,15 +6,11 @@
|
||||
#ifndef __ACTIVATION_LAYER_H__
|
||||
#define __ACTIVATION_LAYER_H__
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* activationLayer(
|
||||
int layerIdx,
|
||||
std::string activation,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,108 +3,94 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "batchnorm_layer.h"
|
||||
|
||||
nvinfer1::ITensor* batchnormLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
std::string weightsType,
|
||||
float eps,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
#include <math.h>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "batchnorm");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.at("type") == "batchnorm");
|
||||
assert(block.find("filters") != block.end());
|
||||
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
std::string activation = block.at("activation");
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
std::string activation = block.at("activation");
|
||||
|
||||
std::vector<float> bnBiases;
|
||||
std::vector<float> bnWeights;
|
||||
std::vector<float> bnRunningMean;
|
||||
std::vector<float> bnRunningVar;
|
||||
std::vector<float> bnBiases;
|
||||
std::vector<float> bnWeights;
|
||||
std::vector<float> bnRunningMean;
|
||||
std::vector<float> bnRunningVar;
|
||||
|
||||
if (weightsType == "weights")
|
||||
{
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
weightPtr++;
|
||||
}
|
||||
if (weightsType == "weights") {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
}
|
||||
|
||||
int size = filters;
|
||||
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
shift.values = shiftWt;
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
scale.values = scaleWt;
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
powerWt[i] = 1.0;
|
||||
power.values = powerWt;
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
int size = filters;
|
||||
nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
shift.values = shiftWt;
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
scale.values = scaleWt;
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
powerWt[i] = 1.0;
|
||||
power.values = powerWt;
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(batchnorm != nullptr);
|
||||
std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
|
||||
batchnorm->setName(batchnormLayerName.c_str());
|
||||
output = batchnorm->getOutput(0);
|
||||
nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(batchnorm != nullptr);
|
||||
std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
|
||||
batchnorm->setName(batchnormLayerName.c_str());
|
||||
output = batchnorm->getOutput(0);
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -13,15 +13,8 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* batchnormLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
std::string weightsType,
|
||||
float eps,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
82
nvdsinfer_custom_impl_Yolo/layers/c2f_layer.cpp
Normal file
82
nvdsinfer_custom_impl_Yolo/layers/c2f_layer.cpp
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "c2f_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "convolutional_layer.h"
|
||||
|
||||
nvinfer1::ITensor*
|
||||
c2fLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "c2f");
|
||||
assert(block.find("n") != block.end());
|
||||
assert(block.find("shortcut") != block.end());
|
||||
assert(block.find("filters") != block.end());
|
||||
|
||||
int n = std::stoi(block.at("n"));
|
||||
bool shortcut = (block.at("shortcut") == "1");
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLt = network->addSlice(*input,nvinfer1::Dims{3, {0, 0, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(sliceLt != nullptr);
|
||||
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||
sliceLt->setName(sliceLtLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRb = network->addSlice(*input,nvinfer1::Dims{3, {inputDims.d[0] / 2, 0, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(sliceRb != nullptr);
|
||||
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||
sliceRb->setName(sliceRbLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(lt);
|
||||
concatInputs.push_back(rb);
|
||||
output = rb;
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::string cv1MlayerName = "c2f_1_" + std::to_string(i + 1) + "_";
|
||||
nvinfer1::ITensor* cv1M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps,
|
||||
output, network, cv1MlayerName);
|
||||
assert(cv1M != nullptr);
|
||||
|
||||
std::string cv2MlayerName = "c2f_2_" + std::to_string(i + 1) + "_";
|
||||
nvinfer1::ITensor* cv2M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps,
|
||||
cv1M, network, cv2MlayerName);
|
||||
assert(cv2M != nullptr);
|
||||
|
||||
if (shortcut) {
|
||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(*rb, *cv2M, nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(ew != nullptr);
|
||||
std::string ewLayerName = "shortcut_c2f_" + std::to_string(i + 1) + "_" + std::to_string(layerIdx);
|
||||
ew->setName(ewLayerName.c_str());
|
||||
output = ew->getOutput(0);
|
||||
concatInputs.push_back(output);
|
||||
}
|
||||
else {
|
||||
output = cv2M;
|
||||
concatInputs.push_back(output);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "route_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
18
nvdsinfer_custom_impl_Yolo/layers/c2f_layer.h
Normal file
18
nvdsinfer_custom_impl_Yolo/layers/c2f_layer.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __C2F_LAYER_H__
|
||||
#define __C2F_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* c2fLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -5,33 +5,32 @@
|
||||
|
||||
#include "channels_layer.h"
|
||||
|
||||
nvinfer1::ITensor* channelsLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* implicitTensor,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
channelsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels");
|
||||
assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels");
|
||||
|
||||
if (block.at("type") == "shift_channels") {
|
||||
nvinfer1::IElementWiseLayer* shift
|
||||
= network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(shift != nullptr);
|
||||
std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx);
|
||||
shift->setName(shiftLayerName.c_str());
|
||||
output = shift->getOutput(0);
|
||||
}
|
||||
else if (block.at("type") == "control_channels") {
|
||||
nvinfer1::IElementWiseLayer* control
|
||||
= network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(control != nullptr);
|
||||
std::string controlLayerName = "control_channels_" + std::to_string(layerIdx);
|
||||
control->setName(controlLayerName.c_str());
|
||||
output = control->getOutput(0);
|
||||
}
|
||||
if (block.at("type") == "shift_channels") {
|
||||
nvinfer1::IElementWiseLayer* shift = network->addElementWise(*input, *implicitTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(shift != nullptr);
|
||||
std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx);
|
||||
shift->setName(shiftLayerName.c_str());
|
||||
output = shift->getOutput(0);
|
||||
}
|
||||
else if (block.at("type") == "control_channels") {
|
||||
nvinfer1::IElementWiseLayer* control = network->addElementWise(*input, *implicitTensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(control != nullptr);
|
||||
std::string controlLayerName = "control_channels_" + std::to_string(layerIdx);
|
||||
control->setName(controlLayerName.c_str());
|
||||
output = control->getOutput(0);
|
||||
}
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -7,15 +7,10 @@
|
||||
#define __CHANNELS_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* channelsLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* implicitTensor,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* channelsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,25 +5,25 @@
|
||||
|
||||
#include "cls_layer.h"
|
||||
|
||||
nvinfer1::ITensor* clsLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
clsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "cls");
|
||||
assert(block.at("type") == "cls");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -7,14 +7,10 @@
|
||||
#define __CLS_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* clsLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* clsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,224 +3,197 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "convolutional_layer.h"
|
||||
|
||||
nvinfer1::ITensor* convolutionalLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
std::string weightsType,
|
||||
int& inputChannels,
|
||||
float eps,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
#include <cassert>
|
||||
#include <math.h>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "convolutional");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.find("pad") != block.end());
|
||||
assert(block.find("size") != block.end());
|
||||
assert(block.find("stride") != block.end());
|
||||
assert(block.at("type") == "convolutional" || block.at("type") == "c2f");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.find("pad") != block.end());
|
||||
assert(block.find("size") != block.end());
|
||||
assert(block.find("stride") != block.end());
|
||||
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
int padding = std::stoi(block.at("pad"));
|
||||
int kernelSize = std::stoi(block.at("size"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
std::string activation = block.at("activation");
|
||||
int bias = filters;
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
int padding = std::stoi(block.at("pad"));
|
||||
int kernelSize = std::stoi(block.at("size"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
std::string activation = block.at("activation");
|
||||
int bias = filters;
|
||||
|
||||
bool batchNormalize = false;
|
||||
if (block.find("batch_normalize") != block.end())
|
||||
{
|
||||
bias = 0;
|
||||
batchNormalize = (block.at("batch_normalize") == "1");
|
||||
}
|
||||
bool batchNormalize = false;
|
||||
if (block.find("batch_normalize") != block.end()) {
|
||||
bias = 0;
|
||||
batchNormalize = (block.at("batch_normalize") == "1");
|
||||
}
|
||||
|
||||
int groups = 1;
|
||||
if (block.find("groups") != block.end())
|
||||
groups = std::stoi(block.at("groups"));
|
||||
int groups = 1;
|
||||
if (block.find("groups") != block.end())
|
||||
groups = std::stoi(block.at("groups"));
|
||||
|
||||
if (block.find("bias") != block.end())
|
||||
bias = std::stoi(block.at("bias"));
|
||||
if (block.find("bias") != block.end())
|
||||
bias = std::stoi(block.at("bias"));
|
||||
|
||||
int pad;
|
||||
if (padding)
|
||||
pad = (kernelSize - 1) / 2;
|
||||
else
|
||||
pad = 0;
|
||||
int pad;
|
||||
if (padding)
|
||||
pad = (kernelSize - 1) / 2;
|
||||
else
|
||||
pad = 0;
|
||||
|
||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
||||
std::vector<float> bnBiases;
|
||||
std::vector<float> bnWeights;
|
||||
std::vector<float> bnRunningMean;
|
||||
std::vector<float> bnRunningVar;
|
||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
||||
std::vector<float> bnBiases;
|
||||
std::vector<float> bnWeights;
|
||||
std::vector<float> bnRunningMean;
|
||||
std::vector<float> bnRunningVar;
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||
|
||||
if (weightsType == "weights")
|
||||
{
|
||||
if (batchNormalize == false)
|
||||
{
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
weightPtr++;
|
||||
}
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
if (weightsType == "weights") {
|
||||
if (batchNormalize == false) {
|
||||
float* val;
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (batchNormalize == false)
|
||||
{
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convWt.values = val;
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
weightPtr++;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
weightPtr++;
|
||||
}
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
||||
++weightPtr;
|
||||
}
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
|
||||
nvinfer1::IConvolutionLayer* conv
|
||||
= network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
|
||||
assert(conv != nullptr);
|
||||
std::string convLayerName = "conv_" + std::to_string(layerIdx);
|
||||
conv->setName(convLayerName.c_str());
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
|
||||
|
||||
if (block.find("groups") != block.end())
|
||||
conv->setNbGroups(groups);
|
||||
|
||||
output = conv->getOutput(0);
|
||||
|
||||
if (batchNormalize == true)
|
||||
{
|
||||
size = filters;
|
||||
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
shift.values = shiftWt;
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
scale.values = scaleWt;
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
powerWt[i] = 1.0;
|
||||
power.values = powerWt;
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(batchnorm != nullptr);
|
||||
std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
|
||||
batchnorm->setName(batchnormLayerName.c_str());
|
||||
output = batchnorm->getOutput(0);
|
||||
}
|
||||
else {
|
||||
if (batchNormalize == false) {
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0) {
|
||||
val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convBias.values = val;
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
else {
|
||||
float* val = new float[size];
|
||||
for (int i = 0; i < size; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnWeights.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnBiases.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningMean.push_back(weights[weightPtr]);
|
||||
++weightPtr;
|
||||
}
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
|
||||
++weightPtr;
|
||||
}
|
||||
trtWeights.push_back(convWt);
|
||||
if (bias != 0)
|
||||
trtWeights.push_back(convBias);
|
||||
}
|
||||
}
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}},
|
||||
convWt, convBias);
|
||||
assert(conv != nullptr);
|
||||
std::string convLayerName = "conv_" + layerName + std::to_string(layerIdx);
|
||||
conv->setName(convLayerName.c_str());
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
|
||||
|
||||
return output;
|
||||
if (block.find("groups") != block.end())
|
||||
conv->setNbGroups(groups);
|
||||
|
||||
output = conv->getOutput(0);
|
||||
|
||||
if (batchNormalize == true) {
|
||||
size = filters;
|
||||
nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||
float* shiftWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||
shift.values = shiftWt;
|
||||
float* scaleWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||
scale.values = scaleWt;
|
||||
float* powerWt = new float[size];
|
||||
for (int i = 0; i < size; ++i)
|
||||
powerWt[i] = 1.0;
|
||||
power.values = powerWt;
|
||||
trtWeights.push_back(shift);
|
||||
trtWeights.push_back(scale);
|
||||
trtWeights.push_back(power);
|
||||
|
||||
nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||
assert(batchnorm != nullptr);
|
||||
std::string batchnormLayerName = "batchnorm_" + layerName + std::to_string(layerIdx);
|
||||
batchnorm->setName(batchnormLayerName.c_str());
|
||||
output = batchnorm->getOutput(0);
|
||||
}
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network, layerName);
|
||||
assert(output != nullptr);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -13,16 +13,8 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* convolutionalLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
std::string weightsType,
|
||||
int& inputChannels,
|
||||
float eps,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps,
|
||||
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
|
||||
|
||||
#endif
|
||||
|
||||
196
nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.cpp
Normal file
196
nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.cpp
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include "detect_v8_layer.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
detectV8Layer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "detect_v8");
|
||||
assert(block.find("num") != block.end());
|
||||
assert(block.find("classes") != block.end());
|
||||
|
||||
int num = std::stoi(block.at("num"));
|
||||
int classes = std::stoi(block.at("classes"));
|
||||
int reg_max = num / 4;
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceBox = network->addSlice(*input, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {num, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceBox != nullptr);
|
||||
std::string sliceBoxLayerName = "slice_box_" + std::to_string(layerIdx);
|
||||
sliceBox->setName(sliceBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* box = sliceBox->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceCls = network->addSlice(*input, nvinfer1::Dims{2, {num, 0}},
|
||||
nvinfer1::Dims{2, {classes, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceCls != nullptr);
|
||||
std::string sliceClsLayerName = "slice_cls_" + std::to_string(layerIdx);
|
||||
sliceCls->setName(sliceClsLayerName.c_str());
|
||||
nvinfer1::ITensor* cls = sliceCls->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle1Box = network->addShuffle(*box);
|
||||
assert(shuffle1Box != nullptr);
|
||||
std::string shuffle1BoxLayerName = "shuffle1_box_" + std::to_string(layerIdx);
|
||||
shuffle1Box->setName(shuffle1BoxLayerName.c_str());
|
||||
nvinfer1::Dims reshape1Dims = {3, {4, reg_max, inputDims.d[1]}};
|
||||
shuffle1Box->setReshapeDimensions(reshape1Dims);
|
||||
nvinfer1::Permutation permutation1;
|
||||
permutation1.order[0] = 1;
|
||||
permutation1.order[1] = 0;
|
||||
permutation1.order[2] = 2;
|
||||
shuffle1Box->setSecondTranspose(permutation1);
|
||||
box = shuffle1Box->getOutput(0);
|
||||
|
||||
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*box);
|
||||
assert(softmax != nullptr);
|
||||
std::string softmaxLayerName = "softmax_box_" + std::to_string(layerIdx);
|
||||
softmax->setName(softmaxLayerName.c_str());
|
||||
softmax->setAxes(1 << 0);
|
||||
box = softmax->getOutput(0);
|
||||
|
||||
nvinfer1::Weights dflWt {nvinfer1::DataType::kFLOAT, nullptr, reg_max};
|
||||
|
||||
float* val = new float[reg_max];
|
||||
for (int i = 0; i < reg_max; ++i) {
|
||||
val[i] = i;
|
||||
}
|
||||
dflWt.values = val;
|
||||
|
||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*box, 1, nvinfer1::Dims{2, {1, 1}}, dflWt,
|
||||
nvinfer1::Weights{});
|
||||
assert(conv != nullptr);
|
||||
std::string convLayerName = "conv_box_" + std::to_string(layerIdx);
|
||||
conv->setName(convLayerName.c_str());
|
||||
conv->setStrideNd(nvinfer1::Dims{2, {1, 1}});
|
||||
conv->setPaddingNd(nvinfer1::Dims{2, {0, 0}});
|
||||
box = conv->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle2Box = network->addShuffle(*box);
|
||||
assert(shuffle2Box != nullptr);
|
||||
std::string shuffle2BoxLayerName = "shuffle2_box_" + std::to_string(layerIdx);
|
||||
shuffle2Box->setName(shuffle2BoxLayerName.c_str());
|
||||
nvinfer1::Dims reshape2Dims = {2, {4, inputDims.d[1]}};
|
||||
shuffle2Box->setReshapeDimensions(reshape2Dims);
|
||||
box = shuffle2Box->getOutput(0);
|
||||
|
||||
nvinfer1::Dims shuffle2BoxDims = box->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLtBox = network->addSlice(*box, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceLtBox != nullptr);
|
||||
std::string sliceLtBoxLayerName = "slice_lt_box_" + std::to_string(layerIdx);
|
||||
sliceLtBox->setName(sliceLtBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLtBox->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRbBox = network->addSlice(*box, nvinfer1::Dims{2, {2, 0}},
|
||||
nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceRbBox != nullptr);
|
||||
std::string sliceRbBoxLayerName = "slice_rb_box_" + std::to_string(layerIdx);
|
||||
sliceRbBox->setName(sliceRbBoxLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRbBox->getOutput(0);
|
||||
|
||||
int channels = 2 * shuffle2BoxDims.d[1];
|
||||
nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
anchorPointsWt.values = val;
|
||||
trtWeights.push_back(anchorPointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}},
|
||||
anchorPointsWt);
|
||||
assert(anchorPoints != nullptr);
|
||||
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt,
|
||||
nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(x1y1 != nullptr);
|
||||
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||
x1y1->setName(x1y1LayerName.c_str());
|
||||
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(x2y2 != nullptr);
|
||||
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||
x2y2->setName(x2y2LayerName.c_str());
|
||||
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatBoxInputs;
|
||||
concatBoxInputs.push_back(x1y1Tensor);
|
||||
concatBoxInputs.push_back(x2y2Tensor);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concatBox = network->addConcatenation(concatBoxInputs.data(), concatBoxInputs.size());
|
||||
assert(concatBox != nullptr);
|
||||
std::string concatBoxLayerName = "concat_box_" + std::to_string(layerIdx);
|
||||
concatBox->setName(concatBoxLayerName.c_str());
|
||||
concatBox->setAxis(0);
|
||||
box = concatBox->getOutput(0);
|
||||
|
||||
channels = shuffle2BoxDims.d[1];
|
||||
nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
stridePointsWt.values = val;
|
||||
trtWeights.push_back(stridePointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {1, shuffle2BoxDims.d[1]}},
|
||||
stridePointsWt);
|
||||
assert(stridePoints != nullptr);
|
||||
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||
stridePoints->setName(stridePointsLayerName.c_str());
|
||||
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* pred = network->addElementWise(*box, *stridePointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(pred != nullptr);
|
||||
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||
pred->setName(predLayerName.c_str());
|
||||
box = pred->getOutput(0);
|
||||
|
||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*cls, nvinfer1::ActivationType::kSIGMOID);
|
||||
assert(sigmoid != nullptr);
|
||||
std::string sigmoidLayerName = "sigmoid_cls_" + std::to_string(layerIdx);
|
||||
sigmoid->setName(sigmoidLayerName.c_str());
|
||||
cls = sigmoid->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(box);
|
||||
concatInputs.push_back(cls);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*output);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation2;
|
||||
permutation2.order[0] = 1;
|
||||
permutation2.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation2);
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
18
nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.h
Normal file
18
nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __DETECT_V8_LAYER_H__
|
||||
#define __DETECT_V8_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* detectV8Layer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
@@ -5,37 +5,34 @@
|
||||
|
||||
#include "implicit_layer.h"
|
||||
|
||||
nvinfer1::ITensor* implicitLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
|
||||
assert(block.find("filters") != block.end());
|
||||
assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
|
||||
assert(block.find("filters") != block.end());
|
||||
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
int filters = std::stoi(block.at("filters"));
|
||||
|
||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, filters};
|
||||
nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, filters};
|
||||
|
||||
float* val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
float* val = new float[filters];
|
||||
for (int i = 0; i < filters; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
convWt.values = val;
|
||||
trtWeights.push_back(convWt);
|
||||
|
||||
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt);
|
||||
assert(implicit != nullptr);
|
||||
std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
|
||||
implicit->setName(implicitLayerName.c_str());
|
||||
output = implicit->getOutput(0);
|
||||
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt);
|
||||
assert(implicit != nullptr);
|
||||
std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
|
||||
implicit->setName(implicitLayerName.c_str());
|
||||
output = implicit->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -8,16 +8,10 @@
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* implicitLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,53 +5,50 @@
|
||||
|
||||
#include "pooling_layer.h"
|
||||
|
||||
nvinfer1::ITensor* poolingLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "maxpool" || block.at("type") == "avgpool");
|
||||
assert(block.at("type") == "maxpool" || block.at("type") == "avgpool");
|
||||
|
||||
if (block.at("type") == "maxpool")
|
||||
{
|
||||
assert(block.find("size") != block.end());
|
||||
assert(block.find("stride") != block.end());
|
||||
if (block.at("type") == "maxpool") {
|
||||
assert(block.find("size") != block.end());
|
||||
assert(block.find("stride") != block.end());
|
||||
|
||||
int size = std::stoi(block.at("size"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
int size = std::stoi(block.at("size"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
|
||||
nvinfer1::IPoolingLayer* maxpool
|
||||
= network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}});
|
||||
assert(maxpool != nullptr);
|
||||
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
||||
maxpool->setName(maxpoolLayerName.c_str());
|
||||
maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
|
||||
if (size == 2 && stride == 1)
|
||||
{
|
||||
maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
|
||||
maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
|
||||
}
|
||||
output = maxpool->getOutput(0);
|
||||
}
|
||||
else if (block.at("type") == "avgpool")
|
||||
{
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(
|
||||
*input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
|
||||
assert(avgpool != nullptr);
|
||||
std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx);
|
||||
avgpool->setName(avgpoolLayerName.c_str());
|
||||
output = avgpool->getOutput(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Pooling not supported: " << block.at("type") << std::endl;
|
||||
std::abort();
|
||||
nvinfer1::IPoolingLayer* maxpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX,
|
||||
nvinfer1::Dims{2, {size, size}});
|
||||
assert(maxpool != nullptr);
|
||||
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
||||
maxpool->setName(maxpoolLayerName.c_str());
|
||||
maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||
maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
|
||||
if (size == 2 && stride == 1) {
|
||||
maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
|
||||
maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
|
||||
}
|
||||
output = maxpool->getOutput(0);
|
||||
}
|
||||
else if (block.at("type") == "avgpool") {
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE,
|
||||
nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
|
||||
assert(avgpool != nullptr);
|
||||
std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx);
|
||||
avgpool->setName(avgpoolLayerName.c_str());
|
||||
output = avgpool->getOutput(0);
|
||||
}
|
||||
else {
|
||||
std::cerr << "Pooling not supported: " << block.at("type") << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -7,15 +7,10 @@
|
||||
#define __POOLING_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* poolingLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,54 +5,50 @@
|
||||
|
||||
#include "reduce_layer.h"
|
||||
|
||||
nvinfer1::ITensor* reduceLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor*
|
||||
reduceLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reduce");
|
||||
assert(block.find("mode") != block.end());
|
||||
assert(block.find("axes") != block.end());
|
||||
assert(block.at("type") == "reduce");
|
||||
assert(block.find("mode") != block.end());
|
||||
assert(block.find("axes") != block.end());
|
||||
|
||||
std::string mode = block.at("mode");
|
||||
std::string mode = block.at("mode");
|
||||
|
||||
nvinfer1::ReduceOperation operation;
|
||||
if (mode == "mean")
|
||||
operation = nvinfer1::ReduceOperation::kAVG;
|
||||
nvinfer1::ReduceOperation operation;
|
||||
if (mode == "mean")
|
||||
operation = nvinfer1::ReduceOperation::kAVG;
|
||||
|
||||
std::string strAxes = block.at("axes");
|
||||
std::vector<int32_t> axes;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strAxes.find(',', lastPos)) != std::string::npos)
|
||||
{
|
||||
int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos)));
|
||||
axes.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strAxes.length())
|
||||
{
|
||||
std::string lastV = trim(strAxes.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
axes.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!axes.empty());
|
||||
|
||||
uint32_t axisMask = 0;
|
||||
for (int axis : axes)
|
||||
axisMask |= 1 << axis;
|
||||
|
||||
bool keepDims = false;
|
||||
if (block.find("keep") != block.end())
|
||||
keepDims = std::stoi(block.at("keep")) == 1 ? true : false;
|
||||
std::string strAxes = block.at("axes");
|
||||
std::vector<int32_t> axes;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strAxes.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos)));
|
||||
axes.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strAxes.length()) {
|
||||
std::string lastV = trim(strAxes.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
axes.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!axes.empty());
|
||||
|
||||
nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims);
|
||||
assert(reduce != nullptr);
|
||||
std::string reduceLayerName = "reduce_" + std::to_string(layerIdx);
|
||||
reduce->setName(reduceLayerName.c_str());
|
||||
output = reduce->getOutput(0);
|
||||
uint32_t axisMask = 0;
|
||||
for (int axis : axes)
|
||||
axisMask |= 1 << axis;
|
||||
|
||||
return output;
|
||||
bool keepDims = false;
|
||||
if (block.find("keep") != block.end())
|
||||
keepDims = std::stoi(block.at("keep")) == 1 ? true : false;
|
||||
|
||||
nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims);
|
||||
assert(reduce != nullptr);
|
||||
std::string reduceLayerName = "reduce_" + std::to_string(layerIdx);
|
||||
reduce->setName(reduceLayerName.c_str());
|
||||
output = reduce->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -6,13 +6,9 @@
|
||||
#ifndef __REDUCE_LAYER_H__
|
||||
#define __REDUCE_LAYER_H__
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "../utils.h"
|
||||
|
||||
nvinfer1::ITensor* reduceLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* reduceLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,109 +5,105 @@
|
||||
|
||||
#include "reg_layer.h"
|
||||
|
||||
nvinfer1::ITensor* regLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
regLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reg");
|
||||
assert(block.at("type") == "reg");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
nvinfer1::Dims shuffleDims = output->getDimensions();
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::Permutation permutation;
|
||||
permutation.order[0] = 1;
|
||||
permutation.order[1] = 0;
|
||||
shuffle->setFirstTranspose(permutation);
|
||||
output = shuffle->getOutput(0);
|
||||
nvinfer1::Dims shuffleDims = output->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer* sliceLt = network->addSlice(
|
||||
*output, nvinfer1::Dims{2, {0, 0}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceLt != nullptr);
|
||||
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||
sliceLt->setName(sliceLtLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||
nvinfer1::ISliceLayer* sliceLt = network->addSlice(*output, nvinfer1::Dims{2, {0, 0}},
|
||||
nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceLt != nullptr);
|
||||
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||
sliceLt->setName(sliceLtLayerName.c_str());
|
||||
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||
|
||||
nvinfer1::ISliceLayer* sliceRb = network->addSlice(
|
||||
*output, nvinfer1::Dims{2, {0, 2}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceRb != nullptr);
|
||||
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||
sliceRb->setName(sliceRbLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||
nvinfer1::ISliceLayer* sliceRb = network->addSlice(*output, nvinfer1::Dims{2, {0, 2}},
|
||||
nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||
assert(sliceRb != nullptr);
|
||||
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||
sliceRb->setName(sliceRbLayerName.c_str());
|
||||
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||
|
||||
int channels = shuffleDims.d[0] * 2;
|
||||
nvinfer1::Weights anchorPointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
float* val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
anchorPointsWt.values = val;
|
||||
trtWeights.push_back(anchorPointsWt);
|
||||
int channels = shuffleDims.d[0] * 2;
|
||||
nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
float* val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
anchorPointsWt.values = val;
|
||||
trtWeights.push_back(anchorPointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt);
|
||||
assert(anchorPoints != nullptr);
|
||||
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt);
|
||||
assert(anchorPoints != nullptr);
|
||||
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x1y1
|
||||
= network->addElementWise(*anchorPointsTensor, *lt, nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(x1y1 != nullptr);
|
||||
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||
x1y1->setName(x1y1LayerName.c_str());
|
||||
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||
nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt,
|
||||
nvinfer1::ElementWiseOperation::kSUB);
|
||||
assert(x1y1 != nullptr);
|
||||
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||
x1y1->setName(x1y1LayerName.c_str());
|
||||
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* x2y2
|
||||
= network->addElementWise(*rb, *anchorPointsTensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(x2y2 != nullptr);
|
||||
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||
x2y2->setName(x2y2LayerName.c_str());
|
||||
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||
nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kSUM);
|
||||
assert(x2y2 != nullptr);
|
||||
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||
x2y2->setName(x2y2LayerName.c_str());
|
||||
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(x1y1Tensor);
|
||||
concatInputs.push_back(x2y2Tensor);
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(x1y1Tensor);
|
||||
concatInputs.push_back(x2y2Tensor);
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(1);
|
||||
output = concat->getOutput(0);
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(1);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
channels = shuffleDims.d[0];
|
||||
nvinfer1::Weights stridePointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i)
|
||||
{
|
||||
val[i] = weights[weightPtr];
|
||||
weightPtr++;
|
||||
}
|
||||
stridePointsWt.values = val;
|
||||
trtWeights.push_back(stridePointsWt);
|
||||
channels = shuffleDims.d[0];
|
||||
nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||
val = new float[channels];
|
||||
for (int i = 0; i < channels; ++i) {
|
||||
val[i] = weights[weightPtr];
|
||||
++weightPtr;
|
||||
}
|
||||
stridePointsWt.values = val;
|
||||
trtWeights.push_back(stridePointsWt);
|
||||
|
||||
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt);
|
||||
assert(stridePoints != nullptr);
|
||||
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||
stridePoints->setName(stridePointsLayerName.c_str());
|
||||
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt);
|
||||
assert(stridePoints != nullptr);
|
||||
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||
stridePoints->setName(stridePointsLayerName.c_str());
|
||||
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer* pred
|
||||
= network->addElementWise(*output, *stridePointsTensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(pred != nullptr);
|
||||
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||
pred->setName(predLayerName.c_str());
|
||||
output = pred->getOutput(0);
|
||||
nvinfer1::IElementWiseLayer* pred = network->addElementWise(*output, *stridePointsTensor,
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
assert(pred != nullptr);
|
||||
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||
pred->setName(predLayerName.c_str());
|
||||
output = pred->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -8,17 +8,11 @@
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* regLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights,
|
||||
int& weightPtr,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* regLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
|
||||
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,58 +5,55 @@
|
||||
|
||||
#include "reorg_layer.h"
|
||||
|
||||
nvinfer1::ITensor* reorgLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "reorg");
|
||||
assert(block.at("type") == "reorg");
|
||||
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
nvinfer1::Dims inputDims = input->getDimensions();
|
||||
|
||||
nvinfer1::ISliceLayer *slice1 = network->addSlice(
|
||||
*input, nvinfer1::Dims{3, {0, 0, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||
nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice1 != nullptr);
|
||||
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
|
||||
slice1->setName(slice1LayerName.c_str());
|
||||
nvinfer1::ISliceLayer *slice1 = network->addSlice(*input, nvinfer1::Dims{3, {0, 0, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice1 != nullptr);
|
||||
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
|
||||
slice1->setName(slice1LayerName.c_str());
|
||||
|
||||
nvinfer1::ISliceLayer *slice2 = network->addSlice(
|
||||
*input, nvinfer1::Dims{3, {0, 1, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||
nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice2 != nullptr);
|
||||
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
|
||||
slice2->setName(slice2LayerName.c_str());
|
||||
nvinfer1::ISliceLayer *slice2 = network->addSlice(*input, nvinfer1::Dims{3, {0, 1, 0}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice2 != nullptr);
|
||||
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
|
||||
slice2->setName(slice2LayerName.c_str());
|
||||
|
||||
nvinfer1::ISliceLayer *slice3 = network->addSlice(
|
||||
*input, nvinfer1::Dims{3, {0, 0, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||
nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice3 != nullptr);
|
||||
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
|
||||
slice3->setName(slice3LayerName.c_str());
|
||||
nvinfer1::ISliceLayer *slice3 = network->addSlice(*input, nvinfer1::Dims{3, {0, 0, 1}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice3 != nullptr);
|
||||
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
|
||||
slice3->setName(slice3LayerName.c_str());
|
||||
|
||||
nvinfer1::ISliceLayer *slice4 = network->addSlice(
|
||||
*input, nvinfer1::Dims{3, {0, 1, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||
nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice4 != nullptr);
|
||||
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
|
||||
slice4->setName(slice4LayerName.c_str());
|
||||
nvinfer1::ISliceLayer *slice4 = network->addSlice(*input, nvinfer1::Dims{3, {0, 1, 1}},
|
||||
nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, nvinfer1::Dims{3, {1, 2, 2}});
|
||||
assert(slice4 != nullptr);
|
||||
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
|
||||
slice4->setName(slice4LayerName.c_str());
|
||||
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(slice1->getOutput(0));
|
||||
concatInputs.push_back(slice2->getOutput(0));
|
||||
concatInputs.push_back(slice3->getOutput(0));
|
||||
concatInputs.push_back(slice4->getOutput(0));
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
concatInputs.push_back(slice1->getOutput(0));
|
||||
concatInputs.push_back(slice2->getOutput(0));
|
||||
concatInputs.push_back(slice3->getOutput(0));
|
||||
concatInputs.push_back(slice4->getOutput(0));
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(0);
|
||||
output = concat->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -3,19 +3,14 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#ifndef __REORGV5_LAYER_H__
|
||||
#define __REORGV5_LAYER_H__
|
||||
#ifndef __REORG_LAYER_H__
|
||||
#define __REORG_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* reorgLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,78 +5,70 @@
|
||||
|
||||
#include "route_layer.h"
|
||||
|
||||
nvinfer1::ITensor* routeLayer(
|
||||
int layerIdx,
|
||||
std::string& layers,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
nvinfer1::ITensor*
|
||||
routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "route");
|
||||
assert(block.find("layers") != block.end());
|
||||
assert(block.at("type") == "route");
|
||||
assert(block.find("layers") != block.end());
|
||||
|
||||
std::string strLayers = block.at("layers");
|
||||
std::vector<int> idxLayers;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strLayers.find(',', lastPos)) != std::string::npos)
|
||||
{
|
||||
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
||||
idxLayers.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strLayers.length())
|
||||
{
|
||||
std::string lastV = trim(strLayers.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
idxLayers.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert (!idxLayers.empty());
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
for (uint i = 0; i < idxLayers.size(); ++i)
|
||||
{
|
||||
if (idxLayers[i] < 0)
|
||||
idxLayers[i] = tensorOutputs.size() + idxLayers[i];
|
||||
assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
|
||||
concatInputs.push_back(tensorOutputs[idxLayers[i]]);
|
||||
if (i < idxLayers.size() - 1)
|
||||
layers += std::to_string(idxLayers[i]) + ", ";
|
||||
}
|
||||
layers += std::to_string(idxLayers[idxLayers.size() - 1]);
|
||||
std::string strLayers = block.at("layers");
|
||||
std::vector<int> idxLayers;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
||||
idxLayers.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strLayers.length()) {
|
||||
std::string lastV = trim(strLayers.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
idxLayers.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert (!idxLayers.empty());
|
||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||
for (uint i = 0; i < idxLayers.size(); ++i) {
|
||||
if (idxLayers[i] < 0)
|
||||
idxLayers[i] = tensorOutputs.size() + idxLayers[i];
|
||||
assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
|
||||
concatInputs.push_back(tensorOutputs[idxLayers[i]]);
|
||||
if (i < idxLayers.size() - 1)
|
||||
layers += std::to_string(idxLayers[i]) + ", ";
|
||||
}
|
||||
layers += std::to_string(idxLayers[idxLayers.size() - 1]);
|
||||
|
||||
if (concatInputs.size() == 1)
|
||||
output = concatInputs[0];
|
||||
else {
|
||||
int axis = 0;
|
||||
if (block.find("axis") != block.end())
|
||||
axis = std::stoi(block.at("axis"));
|
||||
if (axis < 0)
|
||||
axis = concatInputs[0]->getDimensions().nbDims + axis;
|
||||
if (concatInputs.size() == 1)
|
||||
output = concatInputs[0];
|
||||
else {
|
||||
int axis = 0;
|
||||
if (block.find("axis") != block.end())
|
||||
axis = std::stoi(block.at("axis"));
|
||||
if (axis < 0)
|
||||
axis = concatInputs[0]->getDimensions().nbDims + axis;
|
||||
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "route_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(axis);
|
||||
output = concat->getOutput(0);
|
||||
}
|
||||
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||
assert(concat != nullptr);
|
||||
std::string concatLayerName = "route_" + std::to_string(layerIdx);
|
||||
concat->setName(concatLayerName.c_str());
|
||||
concat->setAxis(axis);
|
||||
output = concat->getOutput(0);
|
||||
}
|
||||
|
||||
if (block.find("groups") != block.end())
|
||||
{
|
||||
nvinfer1::Dims prevTensorDims = output->getDimensions();
|
||||
int groups = stoi(block.at("groups"));
|
||||
int group_id = stoi(block.at("group_id"));
|
||||
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
||||
int channelSlice = (prevTensorDims.d[0] / groups);
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(
|
||||
*output, nvinfer1::Dims{3, {startSlice, 0, 0}},
|
||||
nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(slice != nullptr);
|
||||
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||
slice->setName(sliceLayerName.c_str());
|
||||
output = slice->getOutput(0);
|
||||
}
|
||||
if (block.find("groups") != block.end()) {
|
||||
nvinfer1::Dims prevTensorDims = output->getDimensions();
|
||||
int groups = stoi(block.at("groups"));
|
||||
int group_id = stoi(block.at("group_id"));
|
||||
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
||||
int channelSlice = (prevTensorDims.d[0] / groups);
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(*output, nvinfer1::Dims{3, {startSlice, 0, 0}},
|
||||
nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(slice != nullptr);
|
||||
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||
slice->setName(sliceLayerName.c_str());
|
||||
output = slice->getOutput(0);
|
||||
}
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -6,14 +6,9 @@
|
||||
#ifndef __ROUTE_LAYER_H__
|
||||
#define __ROUTE_LAYER_H__
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "../utils.h"
|
||||
|
||||
nvinfer1::ITensor* routeLayer(
|
||||
int layerIdx,
|
||||
std::string& layers,
|
||||
std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,48 +5,41 @@
|
||||
|
||||
#include "shortcut_layer.h"
|
||||
|
||||
nvinfer1::ITensor* shortcutLayer(
|
||||
int layerIdx,
|
||||
std::string mode,
|
||||
std::string activation,
|
||||
std::string inputVol,
|
||||
std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* shortcutInput,
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol, std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "shortcut");
|
||||
assert(block.at("type") == "shortcut");
|
||||
|
||||
nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM;
|
||||
nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM;
|
||||
|
||||
if (mode == "mul")
|
||||
operation = nvinfer1::ElementWiseOperation::kPROD;
|
||||
if (mode == "mul")
|
||||
operation = nvinfer1::ElementWiseOperation::kPROD;
|
||||
|
||||
if (mode == "add" && inputVol != shortcutVol)
|
||||
{
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(
|
||||
*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(slice != nullptr);
|
||||
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||
slice->setName(sliceLayerName.c_str());
|
||||
output = slice->getOutput(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
output = shortcutInput;
|
||||
}
|
||||
if (mode == "add" && inputVol != shortcutVol) {
|
||||
nvinfer1::ISliceLayer* slice = network->addSlice(*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(),
|
||||
nvinfer1::Dims{3, {1, 1, 1}});
|
||||
assert(slice != nullptr);
|
||||
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||
slice->setName(sliceLayerName.c_str());
|
||||
output = slice->getOutput(0);
|
||||
}
|
||||
else
|
||||
output = shortcutInput;
|
||||
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation);
|
||||
assert(shortcut != nullptr);
|
||||
std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
|
||||
shortcut->setName(shortcutLayerName.c_str());
|
||||
output = shortcut->getOutput(0);
|
||||
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation);
|
||||
assert(shortcut != nullptr);
|
||||
std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
|
||||
shortcut->setName(shortcutLayerName.c_str());
|
||||
output = shortcut->getOutput(0);
|
||||
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
output = activationLayer(layerIdx, activation, output, network);
|
||||
assert(output != nullptr);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -12,15 +12,8 @@
|
||||
|
||||
#include "activation_layer.h"
|
||||
|
||||
nvinfer1::ITensor* shortcutLayer(
|
||||
int layerIdx,
|
||||
std::string mode,
|
||||
std::string activation,
|
||||
std::string inputVol,
|
||||
std::string shortcutVol,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* shortcut,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol,
|
||||
std::string shortcutVol, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* shortcut, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,119 +5,133 @@
|
||||
|
||||
#include "shuffle_layer.h"
|
||||
|
||||
nvinfer1::ITensor* shuffleLayer(
|
||||
int layerIdx,
|
||||
std::string& layer,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
nvinfer1::ITensor*
|
||||
shuffleLayer(int layerIdx, std::string& layer, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "shuffle");
|
||||
assert(block.at("type") == "shuffle");
|
||||
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||
assert(shuffle != nullptr);
|
||||
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||
shuffle->setName(shuffleLayerName.c_str());
|
||||
|
||||
if (block.find("reshape") != block.end())
|
||||
{
|
||||
std::string strReshape = block.at("reshape");
|
||||
std::vector<int32_t> reshape;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strReshape.find(',', lastPos)) != std::string::npos)
|
||||
{
|
||||
int vL = std::stoi(trim(strReshape.substr(lastPos, pos - lastPos)));
|
||||
reshape.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strReshape.length())
|
||||
{
|
||||
std::string lastV = trim(strReshape.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
reshape.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!reshape.empty());
|
||||
if (block.find("reshape") != block.end()) {
|
||||
int from = -1;
|
||||
if (block.find("from") != block.end())
|
||||
from = std::stoi(block.at("from"));
|
||||
|
||||
int from = -1;
|
||||
if (block.find("from") != block.end())
|
||||
from = std::stoi(block.at("from"));
|
||||
if (from < 0)
|
||||
from = tensorOutputs.size() + from;
|
||||
|
||||
if (from < 0)
|
||||
from = tensorOutputs.size() + from;
|
||||
layer = std::to_string(from);
|
||||
|
||||
layer = std::to_string(from);
|
||||
nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions();
|
||||
|
||||
nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions();
|
||||
int32_t l = inputTensorDims.d[1] * inputTensorDims.d[2];
|
||||
|
||||
nvinfer1::Dims reshapeDims;
|
||||
reshapeDims.nbDims = reshape.size();
|
||||
|
||||
for (uint i = 0; i < reshape.size(); ++i)
|
||||
if (reshape[i] == 0)
|
||||
reshapeDims.d[i] = l;
|
||||
else
|
||||
reshapeDims.d[i] = reshape[i];
|
||||
|
||||
shuffle->setReshapeDimensions(reshapeDims);
|
||||
std::string strReshape = block.at("reshape");
|
||||
std::vector<int32_t> reshape;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strReshape.find(',', lastPos)) != std::string::npos) {
|
||||
std::string V = trim(strReshape.substr(lastPos, pos - lastPos));
|
||||
if (V == "c")
|
||||
reshape.push_back(inputTensorDims.d[0]);
|
||||
else if (V == "ch")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]);
|
||||
else if (V == "cw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]);
|
||||
else if (V == "h")
|
||||
reshape.push_back(inputTensorDims.d[1]);
|
||||
else if (V == "hw")
|
||||
reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else if (V == "w")
|
||||
reshape.push_back(inputTensorDims.d[2]);
|
||||
else if (V == "chw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else
|
||||
reshape.push_back(std::stoi(V));
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
|
||||
if (block.find("transpose1") != block.end())
|
||||
{
|
||||
std::string strTranspose1 = block.at("transpose1");
|
||||
std::vector<int32_t> transpose1;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos)
|
||||
{
|
||||
int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos)));
|
||||
transpose1.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose1.length())
|
||||
{
|
||||
std::string lastV = trim(strTranspose1.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose1.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose1.empty());
|
||||
|
||||
nvinfer1::Permutation permutation1;
|
||||
for (uint i = 0; i < transpose1.size(); ++i)
|
||||
permutation1.order[i] = transpose1[i];
|
||||
|
||||
shuffle->setFirstTranspose(permutation1);
|
||||
if (lastPos < strReshape.length()) {
|
||||
std::string lastV = trim(strReshape.substr(lastPos));
|
||||
if (!lastV.empty()) {
|
||||
if (lastV == "c")
|
||||
reshape.push_back(inputTensorDims.d[0]);
|
||||
else if (lastV == "ch")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]);
|
||||
else if (lastV == "cw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]);
|
||||
else if (lastV == "h")
|
||||
reshape.push_back(inputTensorDims.d[1]);
|
||||
else if (lastV == "hw")
|
||||
reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else if (lastV == "w")
|
||||
reshape.push_back(inputTensorDims.d[2]);
|
||||
else if (lastV == "chw")
|
||||
reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]);
|
||||
else
|
||||
reshape.push_back(std::stoi(lastV));
|
||||
}
|
||||
}
|
||||
assert(!reshape.empty());
|
||||
|
||||
if (block.find("transpose2") != block.end())
|
||||
{
|
||||
std::string strTranspose2 = block.at("transpose2");
|
||||
std::vector<int32_t> transpose2;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos)
|
||||
{
|
||||
int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos)));
|
||||
transpose2.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose2.length())
|
||||
{
|
||||
std::string lastV = trim(strTranspose2.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose2.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose2.empty());
|
||||
nvinfer1::Dims reshapeDims;
|
||||
reshapeDims.nbDims = reshape.size();
|
||||
|
||||
nvinfer1::Permutation permutation2;
|
||||
for (uint i = 0; i < transpose2.size(); ++i)
|
||||
permutation2.order[i] = transpose2[i];
|
||||
for (uint i = 0; i < reshape.size(); ++i)
|
||||
reshapeDims.d[i] = reshape[i];
|
||||
|
||||
shuffle->setSecondTranspose(permutation2);
|
||||
shuffle->setReshapeDimensions(reshapeDims);
|
||||
}
|
||||
|
||||
if (block.find("transpose1") != block.end()) {
|
||||
std::string strTranspose1 = block.at("transpose1");
|
||||
std::vector<int32_t> transpose1;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos)));
|
||||
transpose1.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose1.length()) {
|
||||
std::string lastV = trim(strTranspose1.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose1.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose1.empty());
|
||||
|
||||
output = shuffle->getOutput(0);
|
||||
nvinfer1::Permutation permutation1;
|
||||
for (uint i = 0; i < transpose1.size(); ++i)
|
||||
permutation1.order[i] = transpose1[i];
|
||||
|
||||
return output;
|
||||
shuffle->setFirstTranspose(permutation1);
|
||||
}
|
||||
|
||||
if (block.find("transpose2") != block.end()) {
|
||||
std::string strTranspose2 = block.at("transpose2");
|
||||
std::vector<int32_t> transpose2;
|
||||
size_t lastPos = 0, pos = 0;
|
||||
while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos) {
|
||||
int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos)));
|
||||
transpose2.push_back(vL);
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
if (lastPos < strTranspose2.length()) {
|
||||
std::string lastV = trim(strTranspose2.substr(lastPos));
|
||||
if (!lastV.empty())
|
||||
transpose2.push_back(std::stoi(lastV));
|
||||
}
|
||||
assert(!transpose2.empty());
|
||||
|
||||
nvinfer1::Permutation permutation2;
|
||||
for (uint i = 0; i < transpose2.size(); ++i)
|
||||
permutation2.order[i] = transpose2[i];
|
||||
|
||||
shuffle->setSecondTranspose(permutation2);
|
||||
}
|
||||
|
||||
output = shuffle->getOutput(0);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -6,15 +6,9 @@
|
||||
#ifndef __SHUFFLE_LAYER_H__
|
||||
#define __SHUFFLE_LAYER_H__
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "../utils.h"
|
||||
|
||||
nvinfer1::ITensor* shuffleLayer(
|
||||
int layerIdx,
|
||||
std::string& layer,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
nvinfer1::ITensor* shuffleLayer(int layerIdx, std::string& layer, std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input, std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,25 +5,25 @@
|
||||
|
||||
#include "softmax_layer.h"
|
||||
|
||||
nvinfer1::ITensor* softmaxLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
softmaxLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "softmax");
|
||||
assert(block.find("axes") != block.end());
|
||||
assert(block.at("type") == "softmax");
|
||||
assert(block.find("axes") != block.end());
|
||||
|
||||
int axes = std::stoi(block.at("axes"));
|
||||
int axes = std::stoi(block.at("axes"));
|
||||
|
||||
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input);
|
||||
assert(softmax != nullptr);
|
||||
std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx);
|
||||
softmax->setName(softmaxLayerName.c_str());
|
||||
softmax->setAxes(1 << axes);
|
||||
output = softmax->getOutput(0);
|
||||
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input);
|
||||
assert(softmax != nullptr);
|
||||
std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx);
|
||||
softmax->setName(softmaxLayerName.c_str());
|
||||
softmax->setAxes(1 << axes);
|
||||
output = softmax->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -7,14 +7,10 @@
|
||||
#define __SOFTMAX_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* softmaxLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* softmaxLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,28 +5,28 @@
|
||||
|
||||
#include "upsample_layer.h"
|
||||
|
||||
nvinfer1::ITensor* upsampleLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
#include <cassert>
|
||||
|
||||
nvinfer1::ITensor*
|
||||
upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network)
|
||||
{
|
||||
nvinfer1::ITensor* output;
|
||||
nvinfer1::ITensor* output;
|
||||
|
||||
assert(block.at("type") == "upsample");
|
||||
assert(block.find("stride") != block.end());
|
||||
assert(block.at("type") == "upsample");
|
||||
assert(block.find("stride") != block.end());
|
||||
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
int stride = std::stoi(block.at("stride"));
|
||||
|
||||
float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
|
||||
float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
|
||||
|
||||
nvinfer1::IResizeLayer* resize = network->addResize(*input);
|
||||
assert(resize != nullptr);
|
||||
std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
|
||||
resize->setName(resizeLayerName.c_str());
|
||||
resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
||||
resize->setScales(scale, 3);
|
||||
output = resize->getOutput(0);
|
||||
nvinfer1::IResizeLayer* resize = network->addResize(*input);
|
||||
assert(resize != nullptr);
|
||||
std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
|
||||
resize->setName(resizeLayerName.c_str());
|
||||
resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
||||
resize->setScales(scale, 3);
|
||||
output = resize->getOutput(0);
|
||||
|
||||
return output;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -7,14 +7,10 @@
|
||||
#define __UPSAMPLE_LAYER_H__
|
||||
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
nvinfer1::ITensor* upsampleLayer(
|
||||
int layerIdx,
|
||||
std::map<std::string, std::string>& block,
|
||||
nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
|
||||
nvinfer1::INetworkDefinition* network);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -23,94 +23,87 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
#include "nvdsinfer_context.h"
|
||||
#include "yoloPlugins.h"
|
||||
#include "yolo.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include "yolo.h"
|
||||
|
||||
#define USE_CUDA_ENGINE_GET_API 1
|
||||
|
||||
static bool getYoloNetworkInfo(NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
|
||||
static bool
|
||||
getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* initParams)
|
||||
{
|
||||
std::string yoloCfg = initParams->customNetworkConfigFilePath;
|
||||
std::string yoloType;
|
||||
std::string yoloCfg = initParams->customNetworkConfigFilePath;
|
||||
std::string yoloType;
|
||||
|
||||
std::transform(yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
|
||||
return std::tolower(c);
|
||||
});
|
||||
std::transform(yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
|
||||
return std::tolower(c);
|
||||
});
|
||||
|
||||
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
|
||||
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
|
||||
|
||||
networkInfo.inputBlobName = "data";
|
||||
networkInfo.networkType = yoloType;
|
||||
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
|
||||
networkInfo.wtsFilePath = initParams->modelFilePath;
|
||||
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
|
||||
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
|
||||
networkInfo.numDetectedClasses = initParams->numDetectedClasses;
|
||||
networkInfo.clusterMode = initParams->clusterMode;
|
||||
networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold;
|
||||
networkInfo.inputBlobName = "data";
|
||||
networkInfo.networkType = yoloType;
|
||||
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
|
||||
networkInfo.wtsFilePath = initParams->modelFilePath;
|
||||
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
|
||||
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
|
||||
networkInfo.numDetectedClasses = initParams->numDetectedClasses;
|
||||
networkInfo.clusterMode = initParams->clusterMode;
|
||||
networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold;
|
||||
|
||||
if (initParams->networkMode == 0)
|
||||
networkInfo.networkMode = "FP32";
|
||||
else if (initParams->networkMode == 1)
|
||||
networkInfo.networkMode = "INT8";
|
||||
else if (initParams->networkMode == 2)
|
||||
networkInfo.networkMode = "FP16";
|
||||
if (initParams->networkMode == 0)
|
||||
networkInfo.networkMode = "FP32";
|
||||
else if (initParams->networkMode == 1)
|
||||
networkInfo.networkMode = "INT8";
|
||||
else if (initParams->networkMode == 2)
|
||||
networkInfo.networkMode = "FP16";
|
||||
|
||||
if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty())
|
||||
{
|
||||
std::cerr << "YOLO config file or weights file is not specified\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty()) {
|
||||
std::cerr << "YOLO config file or weights file is not specified\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fileExists(networkInfo.configFilePath) || !fileExists(networkInfo.wtsFilePath))
|
||||
{
|
||||
std::cerr << "YOLO config file or weights file is not exist\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (!fileExists(networkInfo.configFilePath) || !fileExists(networkInfo.wtsFilePath)) {
|
||||
std::cerr << "YOLO config file or weights file is not exist\n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
#if !USE_CUDA_ENGINE_GET_API
|
||||
IModelParser* NvDsInferCreateModelParser(
|
||||
const NvDsInferContextInitParams* initParams) {
|
||||
NetworkInfo networkInfo;
|
||||
if (!getYoloNetworkInfo(networkInfo, initParams))
|
||||
return nullptr;
|
||||
IModelParser*
|
||||
NvDsInferCreateModelParser(const NvDsInferContextInitParams* initParams)
|
||||
{
|
||||
NetworkInfo networkInfo;
|
||||
if (!getYoloNetworkInfo(networkInfo, initParams))
|
||||
return nullptr;
|
||||
|
||||
return new Yolo(networkInfo);
|
||||
return new Yolo(networkInfo);
|
||||
}
|
||||
#else
|
||||
extern "C"
|
||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
||||
nvinfer1::IBuilderConfig * const builderConfig,
|
||||
const NvDsInferContextInitParams * const initParams,
|
||||
nvinfer1::DataType dataType,
|
||||
nvinfer1::ICudaEngine *& cudaEngine);
|
||||
extern "C" bool
|
||||
NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig,
|
||||
const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine);
|
||||
|
||||
extern "C"
|
||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
||||
nvinfer1::IBuilderConfig * const builderConfig,
|
||||
const NvDsInferContextInitParams * const initParams,
|
||||
nvinfer1::DataType dataType,
|
||||
nvinfer1::ICudaEngine *& cudaEngine)
|
||||
extern "C" bool
|
||||
NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig,
|
||||
const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine)
|
||||
{
|
||||
NetworkInfo networkInfo;
|
||||
if (!getYoloNetworkInfo(networkInfo, initParams))
|
||||
return false;
|
||||
NetworkInfo networkInfo;
|
||||
if (!getYoloNetworkInfo(networkInfo, initParams))
|
||||
return false;
|
||||
|
||||
Yolo yolo(networkInfo);
|
||||
cudaEngine = yolo.createEngine (builder, builderConfig);
|
||||
if (cudaEngine == nullptr)
|
||||
{
|
||||
std::cerr << "Failed to build CUDA engine on " << networkInfo.configFilePath << std::endl;
|
||||
return false;
|
||||
}
|
||||
Yolo yolo(networkInfo);
|
||||
cudaEngine = yolo.createEngine(builder, builderConfig);
|
||||
if (cudaEngine == nullptr) {
|
||||
std::cerr << "Failed to build CUDA engine on " << networkInfo.configFilePath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -23,118 +23,103 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "yoloPlugins.h"
|
||||
|
||||
extern "C" bool NvDsInferParseYolo(
|
||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
extern "C" bool
|
||||
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
|
||||
|
||||
static NvDsInferParseObjectInfo convertBBox(
|
||||
const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH)
|
||||
static NvDsInferParseObjectInfo
|
||||
convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH)
|
||||
{
|
||||
NvDsInferParseObjectInfo b;
|
||||
NvDsInferParseObjectInfo b;
|
||||
|
||||
float x1 = bx1;
|
||||
float y1 = by1;
|
||||
float x2 = bx2;
|
||||
float y2 = by2;
|
||||
float x1 = bx1;
|
||||
float y1 = by1;
|
||||
float x2 = bx2;
|
||||
float y2 = by2;
|
||||
|
||||
x1 = clamp(x1, 0, netW);
|
||||
y1 = clamp(y1, 0, netH);
|
||||
x2 = clamp(x2, 0, netW);
|
||||
y2 = clamp(y2, 0, netH);
|
||||
x1 = clamp(x1, 0, netW);
|
||||
y1 = clamp(y1, 0, netH);
|
||||
x2 = clamp(x2, 0, netW);
|
||||
y2 = clamp(y2, 0, netH);
|
||||
|
||||
b.left = x1;
|
||||
b.width = clamp(x2 - x1, 0, netW);
|
||||
b.top = y1;
|
||||
b.height = clamp(y2 - y1, 0, netH);
|
||||
b.left = x1;
|
||||
b.width = clamp(x2 - x1, 0, netW);
|
||||
b.top = y1;
|
||||
b.height = clamp(y2 - y1, 0, netH);
|
||||
|
||||
return b;
|
||||
return b;
|
||||
}
|
||||
|
||||
static void addBBoxProposal(
|
||||
const float bx1, const float by1, const float bx2, const float by2, const uint& netW, const uint& netH,
|
||||
static void
|
||||
addBBoxProposal(const float bx1, const float by1, const float bx2, const float by2, const uint& netW, const uint& netH,
|
||||
const int maxIndex, const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
|
||||
{
|
||||
NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
|
||||
if (bbi.width < 1 || bbi.height < 1) return;
|
||||
NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
|
||||
if (bbi.width < 1 || bbi.height < 1) return;
|
||||
|
||||
bbi.detectionConfidence = maxProb;
|
||||
bbi.classId = maxIndex;
|
||||
binfo.push_back(bbi);
|
||||
bbi.detectionConfidence = maxProb;
|
||||
bbi.classId = maxIndex;
|
||||
binfo.push_back(bbi);
|
||||
}
|
||||
|
||||
static std::vector<NvDsInferParseObjectInfo> decodeYoloTensor(
|
||||
const int* counts, const float* boxes, const float* scores, const int* classes, const uint& netW, const uint& netH)
|
||||
static std::vector<NvDsInferParseObjectInfo>
|
||||
decodeYoloTensor(const int* counts, const float* boxes, const float* scores, const int* classes, const uint& netW,
|
||||
const uint& netH)
|
||||
{
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||
|
||||
uint numBoxes = counts[0];
|
||||
for (uint b = 0; b < numBoxes; ++b)
|
||||
{
|
||||
float bx1 = boxes[b * 4 + 0];
|
||||
float by1 = boxes[b * 4 + 1];
|
||||
float bx2 = boxes[b * 4 + 2];
|
||||
float by2 = boxes[b * 4 + 3];
|
||||
uint numBoxes = counts[0];
|
||||
for (uint b = 0; b < numBoxes; ++b) {
|
||||
float bx1 = boxes[b * 4 + 0];
|
||||
float by1 = boxes[b * 4 + 1];
|
||||
float bx2 = boxes[b * 4 + 2];
|
||||
float by2 = boxes[b * 4 + 3];
|
||||
|
||||
float maxProb = scores[b];
|
||||
int maxIndex = classes[b];
|
||||
float maxProb = scores[b];
|
||||
int maxIndex = classes[b];
|
||||
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
return binfo;
|
||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||
}
|
||||
return binfo;
|
||||
}
|
||||
|
||||
static bool NvDsInferParseCustomYolo(
|
||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList,
|
||||
const uint &numClasses)
|
||||
{
|
||||
if (outputLayersInfo.empty())
|
||||
{
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (numClasses != detectionParams.numClassesConfigured)
|
||||
{
|
||||
std::cerr << "WARNING: Num classes mismatch. Configured: " << detectionParams.numClassesConfigured
|
||||
<< ", detected by network: " << numClasses << std::endl;
|
||||
}
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo &counts = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo &boxes = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo &scores = outputLayersInfo[2];
|
||||
const NvDsInferLayerInfo &classes = outputLayersInfo[3];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs =
|
||||
decodeYoloTensor(
|
||||
(const int*)(counts.buffer), (const float*)(boxes.buffer), (const float*)(scores.buffer),
|
||||
(const int*)(classes.buffer), networkInfo.width, networkInfo.height);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
objectList = objects;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" bool NvDsInferParseYolo(
|
||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
static bool
|
||||
NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
int num_classes = kNUM_CLASSES;
|
||||
if (outputLayersInfo.empty()) {
|
||||
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return NvDsInferParseCustomYolo (
|
||||
outputLayersInfo, networkInfo, detectionParams, objectList, num_classes);
|
||||
std::vector<NvDsInferParseObjectInfo> objects;
|
||||
|
||||
const NvDsInferLayerInfo& counts = outputLayersInfo[0];
|
||||
const NvDsInferLayerInfo& boxes = outputLayersInfo[1];
|
||||
const NvDsInferLayerInfo& scores = outputLayersInfo[2];
|
||||
const NvDsInferLayerInfo& classes = outputLayersInfo[3];
|
||||
|
||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeYoloTensor((const int*) (counts.buffer),
|
||||
(const float*) (boxes.buffer), (const float*) (scores.buffer), (const int*) (classes.buffer), networkInfo.width,
|
||||
networkInfo.height);
|
||||
|
||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||
|
||||
objectList = objects;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" bool
|
||||
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
|
||||
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
|
||||
{
|
||||
return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList);
|
||||
}
|
||||
|
||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
|
||||
|
||||
@@ -25,133 +25,137 @@
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <experimental/filesystem>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
#include <math.h>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
static void leftTrim(std::string& s)
|
||||
static void
|
||||
leftTrim(std::string& s)
|
||||
{
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
|
||||
}
|
||||
|
||||
static void rightTrim(std::string& s)
|
||||
static void
|
||||
rightTrim(std::string& s)
|
||||
{
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
|
||||
}
|
||||
|
||||
std::string trim(std::string s)
|
||||
std::string
|
||||
trim(std::string s)
|
||||
{
|
||||
leftTrim(s);
|
||||
rightTrim(s);
|
||||
return s;
|
||||
leftTrim(s);
|
||||
rightTrim(s);
|
||||
return s;
|
||||
}
|
||||
|
||||
float clamp(const float val, const float minVal, const float maxVal)
|
||||
float
|
||||
clamp(const float val, const float minVal, const float maxVal)
|
||||
{
|
||||
assert(minVal <= maxVal);
|
||||
return std::min(maxVal, std::max(minVal, val));
|
||||
assert(minVal <= maxVal);
|
||||
return std::min(maxVal, std::max(minVal, val));
|
||||
}
|
||||
|
||||
bool fileExists(const std::string fileName, bool verbose)
|
||||
bool
|
||||
fileExists(const std::string fileName, bool verbose)
|
||||
{
|
||||
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
|
||||
{
|
||||
if (verbose) std::cout << "\nFile does not exist: " << fileName << std::endl;
|
||||
return false;
|
||||
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) {
|
||||
if (verbose)
|
||||
std::cout << "\nFile does not exist: " << fileName << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<float>
|
||||
loadWeights(const std::string weightsFilePath, const std::string& networkType)
|
||||
{
|
||||
assert(fileExists(weightsFilePath));
|
||||
std::cout << "\nLoading pre-trained weights" << std::endl;
|
||||
|
||||
std::vector<float> weights;
|
||||
|
||||
if (weightsFilePath.find(".weights") != std::string::npos) {
|
||||
std::ifstream file(weightsFilePath, std::ios_base::binary);
|
||||
assert(file.good());
|
||||
std::string line;
|
||||
|
||||
if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos) {
|
||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 4);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
|
||||
{
|
||||
assert(fileExists(weightsFilePath));
|
||||
std::cout << "\nLoading pre-trained weights" << std::endl;
|
||||
|
||||
std::vector<float> weights;
|
||||
|
||||
if (weightsFilePath.find(".weights") != std::string::npos) {
|
||||
std::ifstream file(weightsFilePath, std::ios_base::binary);
|
||||
assert(file.good());
|
||||
std::string line;
|
||||
|
||||
if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
|
||||
{
|
||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 5);
|
||||
}
|
||||
|
||||
char floatWeight[4];
|
||||
while (!file.eof())
|
||||
{
|
||||
file.read(floatWeight, 4);
|
||||
assert(file.gcount() == 4);
|
||||
weights.push_back(*reinterpret_cast<float*>(floatWeight));
|
||||
if (file.peek() == std::istream::traits_type::eof()) break;
|
||||
}
|
||||
}
|
||||
|
||||
else if (weightsFilePath.find(".wts") != std::string::npos) {
|
||||
std::ifstream file(weightsFilePath);
|
||||
assert(file.good());
|
||||
int32_t count;
|
||||
file >> count;
|
||||
assert(count > 0 && "\nInvalid .wts file.");
|
||||
|
||||
uint32_t floatWeight;
|
||||
std::string name;
|
||||
uint32_t size;
|
||||
|
||||
while (count--) {
|
||||
file >> name >> std::dec >> size;
|
||||
for (uint32_t x = 0, y = size; x < y; ++x)
|
||||
{
|
||||
file >> std::hex >> floatWeight;
|
||||
weights.push_back(*reinterpret_cast<float *>(&floatWeight));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl;
|
||||
std::abort();
|
||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
||||
file.ignore(4 * 5);
|
||||
}
|
||||
|
||||
std::cout << "Loading weights of " << networkType << " complete"
|
||||
<< std::endl;
|
||||
std::cout << "Total weights read: " << weights.size() << std::endl;
|
||||
return weights;
|
||||
char floatWeight[4];
|
||||
while (!file.eof()) {
|
||||
file.read(floatWeight, 4);
|
||||
assert(file.gcount() == 4);
|
||||
weights.push_back(*reinterpret_cast<float*>(floatWeight));
|
||||
if (file.peek() == std::istream::traits_type::eof())
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (weightsFilePath.find(".wts") != std::string::npos) {
|
||||
std::ifstream file(weightsFilePath);
|
||||
assert(file.good());
|
||||
int32_t count;
|
||||
file >> count;
|
||||
assert(count > 0 && "\nInvalid .wts file.");
|
||||
|
||||
uint32_t floatWeight;
|
||||
std::string name;
|
||||
uint32_t size;
|
||||
|
||||
while (count--) {
|
||||
file >> name >> std::dec >> size;
|
||||
for (uint32_t x = 0, y = size; x < y; ++x) {
|
||||
file >> std::hex >> floatWeight;
|
||||
weights.push_back(*reinterpret_cast<float*>(&floatWeight));
|
||||
};
|
||||
}
|
||||
}
|
||||
else {
|
||||
std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::cout << "Loading weights of " << networkType << " complete" << std::endl;
|
||||
std::cout << "Total weights read: " << weights.size() << std::endl;
|
||||
|
||||
return weights;
|
||||
}
|
||||
|
||||
std::string dimsToString(const nvinfer1::Dims d)
|
||||
std::string
|
||||
dimsToString(const nvinfer1::Dims d)
|
||||
{
|
||||
std::stringstream s;
|
||||
assert(d.nbDims >= 1);
|
||||
s << "[";
|
||||
for (int i = 0; i < d.nbDims - 1; ++i)
|
||||
s << d.d[i] << ", ";
|
||||
s << d.d[d.nbDims - 1] << "]";
|
||||
assert(d.nbDims >= 1);
|
||||
|
||||
return s.str();
|
||||
std::stringstream s;
|
||||
s << "[";
|
||||
for (int i = 0; i < d.nbDims - 1; ++i)
|
||||
s << d.d[i] << ", ";
|
||||
s << d.d[d.nbDims - 1] << "]";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
int getNumChannels(nvinfer1::ITensor* t)
|
||||
int
|
||||
getNumChannels(nvinfer1::ITensor* t)
|
||||
{
|
||||
nvinfer1::Dims d = t->getDimensions();
|
||||
assert(d.nbDims == 3);
|
||||
nvinfer1::Dims d = t->getDimensions();
|
||||
assert(d.nbDims == 3);
|
||||
|
||||
return d.d[0];
|
||||
return d.d[0];
|
||||
}
|
||||
|
||||
void printLayerInfo(
|
||||
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr)
|
||||
void
|
||||
printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput,
|
||||
std::string weightPtr)
|
||||
{
|
||||
std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName;
|
||||
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput;
|
||||
std::cout << weightPtr << std::endl;
|
||||
std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName;
|
||||
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput;
|
||||
std::cout << weightPtr << std::endl;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __UTILS_H__
|
||||
#define __UTILS_H__
|
||||
|
||||
@@ -36,11 +35,17 @@
|
||||
#include "NvInfer.h"
|
||||
|
||||
std::string trim(std::string s);
|
||||
|
||||
float clamp(const float val, const float minVal, const float maxVal);
|
||||
|
||||
bool fileExists(const std::string fileName, bool verbose = true);
|
||||
|
||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
|
||||
|
||||
std::string dimsToString(const nvinfer1::Dims d);
|
||||
|
||||
int getNumChannels(nvinfer1::ITensor* t);
|
||||
|
||||
void printLayerInfo(
|
||||
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,7 +26,11 @@
|
||||
#ifndef _YOLO_H_
|
||||
#define _YOLO_H_
|
||||
|
||||
#include "NvInferPlugin.h"
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
|
||||
#include "layers/convolutional_layer.h"
|
||||
#include "layers/c2f_layer.h"
|
||||
#include "layers/batchnorm_layer.h"
|
||||
#include "layers/implicit_layer.h"
|
||||
#include "layers/channels_layer.h"
|
||||
@@ -40,36 +44,35 @@
|
||||
#include "layers/softmax_layer.h"
|
||||
#include "layers/cls_layer.h"
|
||||
#include "layers/reg_layer.h"
|
||||
|
||||
#include "nvdsinfer_custom_impl.h"
|
||||
#include "layers/detect_v8_layer.h"
|
||||
|
||||
struct NetworkInfo
|
||||
{
|
||||
std::string inputBlobName;
|
||||
std::string networkType;
|
||||
std::string configFilePath;
|
||||
std::string wtsFilePath;
|
||||
std::string int8CalibPath;
|
||||
std::string deviceType;
|
||||
uint numDetectedClasses;
|
||||
int clusterMode;
|
||||
float scoreThreshold;
|
||||
std::string networkMode;
|
||||
std::string inputBlobName;
|
||||
std::string networkType;
|
||||
std::string configFilePath;
|
||||
std::string wtsFilePath;
|
||||
std::string int8CalibPath;
|
||||
std::string deviceType;
|
||||
uint numDetectedClasses;
|
||||
int clusterMode;
|
||||
float scoreThreshold;
|
||||
std::string networkMode;
|
||||
};
|
||||
|
||||
struct TensorInfo
|
||||
{
|
||||
std::string blobName;
|
||||
uint gridSizeX {0};
|
||||
uint gridSizeY {0};
|
||||
uint numBBoxes {0};
|
||||
float scaleXY;
|
||||
std::vector<float> anchors;
|
||||
std::vector<int> mask;
|
||||
std::string blobName;
|
||||
uint gridSizeX {0};
|
||||
uint gridSizeY {0};
|
||||
uint numBBoxes {0};
|
||||
float scaleXY;
|
||||
std::vector<float> anchors;
|
||||
std::vector<int> mask;
|
||||
};
|
||||
|
||||
class Yolo : public IModelParser {
|
||||
public:
|
||||
public:
|
||||
Yolo(const NetworkInfo& networkInfo);
|
||||
|
||||
~Yolo() override;
|
||||
@@ -77,14 +80,14 @@ public:
|
||||
bool hasFullDimsSupported() const override { return false; }
|
||||
|
||||
const char* getModelName() const override {
|
||||
return m_ConfigFilePath.empty() ? m_NetworkType.c_str() : m_ConfigFilePath.c_str();
|
||||
return m_ConfigFilePath.empty() ? m_NetworkType.c_str() : m_ConfigFilePath.c_str();
|
||||
}
|
||||
|
||||
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
|
||||
|
||||
nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config);
|
||||
nvinfer1::ICudaEngine* createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config);
|
||||
|
||||
protected:
|
||||
protected:
|
||||
const std::string m_InputBlobName;
|
||||
const std::string m_NetworkType;
|
||||
const std::string m_ConfigFilePath;
|
||||
@@ -109,7 +112,7 @@ protected:
|
||||
std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
|
||||
std::vector<nvinfer1::Weights> m_TrtWeights;
|
||||
|
||||
private:
|
||||
private:
|
||||
NvDsInferStatus buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
|
||||
|
||||
std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
|
||||
|
||||
@@ -7,98 +7,82 @@
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__global__ void gpuYoloLayer(
|
||||
const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
|
||||
const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
|
||||
const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
|
||||
__global__ void gpuYoloLayer(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
const float objectness
|
||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
|
||||
* alpha + beta + x_id) * netWidth / gridSizeX;
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
|
||||
* netWidth / gridSizeX;
|
||||
|
||||
float y
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
|
||||
* alpha + beta + y_id) * netHeight / gridSizeY;
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
|
||||
* netHeight / gridSizeY;
|
||||
|
||||
float w
|
||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)])
|
||||
* anchors[mask[z_id] * 2];
|
||||
float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2];
|
||||
|
||||
float h
|
||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)])
|
||||
* anchors[mask[z_id] * 2 + 1];
|
||||
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1];
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i)
|
||||
{
|
||||
float prob
|
||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
||||
|
||||
if (prob > maxProb)
|
||||
{
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
||||
(gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||
{
|
||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||
reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -4,69 +4,61 @@
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
__global__ void gpuYoloLayer_e(
|
||||
const float* cls, const float* reg, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
__global__ void gpuYoloLayer_e(const float* cls, const float* reg, int* num_detections, float* detection_boxes,
|
||||
float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint numOutputClasses, const uint64_t outputSize)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i)
|
||||
{
|
||||
float prob
|
||||
= cls[x_id * numOutputClasses + i];
|
||||
|
||||
if (prob > maxProb)
|
||||
{
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = cls[x_id * numOutputClasses + i];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < scoreThreshold)
|
||||
return;
|
||||
if (maxProb < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0];
|
||||
detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1];
|
||||
detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2];
|
||||
detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3];
|
||||
detection_scores[count] = maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0];
|
||||
detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1];
|
||||
detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2];
|
||||
detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3];
|
||||
detection_scores[count] = maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_e(
|
||||
const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_e(
|
||||
const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||
{
|
||||
gpuYoloLayer_e<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(cls) + (batch * numOutputClasses * outputSize),
|
||||
reinterpret_cast<const float*>(reg) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize);
|
||||
}
|
||||
return cudaGetLastError();
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_e<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(cls) + (batch * numOutputClasses * outputSize),
|
||||
reinterpret_cast<const float*>(reg) + (batch * 4 * outputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight,
|
||||
numOutputClasses, outputSize);
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -5,98 +5,82 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_nc(
|
||||
const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
|
||||
const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
|
||||
const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
|
||||
__global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
const float objectness
|
||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
||||
const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x
|
||||
= (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
||||
* alpha + beta + x_id) * netWidth / gridSizeX;
|
||||
float x = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth /
|
||||
gridSizeX;
|
||||
|
||||
float y
|
||||
= (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
||||
* alpha + beta + y_id) * netHeight / gridSizeY;
|
||||
float y = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight /
|
||||
gridSizeY;
|
||||
|
||||
float w
|
||||
= __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2)
|
||||
* anchors[mask[z_id] * 2];
|
||||
float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2];
|
||||
|
||||
float h
|
||||
= __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2)
|
||||
* anchors[mask[z_id] * 2 + 1];
|
||||
float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * anchors[mask[z_id] * 2 + 1];
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i)
|
||||
{
|
||||
float prob
|
||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
|
||||
if (prob > maxProb)
|
||||
{
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
||||
(gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||
{
|
||||
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||
reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -7,98 +7,84 @@
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__global__ void gpuYoloLayer_r(
|
||||
const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
|
||||
const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
|
||||
const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
|
||||
__global__ void gpuYoloLayer_r(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors,
|
||||
const int* mask)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
const float objectness
|
||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
const float alpha = scaleXY;
|
||||
const float beta = -0.5 * (scaleXY - 1);
|
||||
|
||||
float x
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
|
||||
* alpha + beta + x_id) * netWidth / gridSizeX;
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
|
||||
* netWidth / gridSizeX;
|
||||
|
||||
float y
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
|
||||
* alpha + beta + y_id) * netHeight / gridSizeY;
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
|
||||
* netHeight / gridSizeY;
|
||||
|
||||
float w
|
||||
= __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2];
|
||||
float w = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2];
|
||||
|
||||
float h
|
||||
= __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2 + 1];
|
||||
float h = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2)
|
||||
* anchors[mask[z_id] * 2 + 1];
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i)
|
||||
{
|
||||
float prob
|
||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
||||
|
||||
if (prob > maxProb)
|
||||
{
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_r(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_r(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
||||
(gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||
{
|
||||
gpuYoloLayer_r<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||
reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_r<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast<const float*>(anchors),
|
||||
reinterpret_cast<const int*>(mask));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
@@ -7,119 +7,100 @@
|
||||
|
||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||
|
||||
__device__ void softmaxGPU(
|
||||
const float* input, const int bbindex, const int numGridCells, uint z_id, const uint numOutputClasses, float temp,
|
||||
float* output)
|
||||
__device__ void softmaxGPU(const float* input, const int bbindex, const int numGridCells, uint z_id,
|
||||
const uint numOutputClasses, float temp, float* output)
|
||||
{
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -INFINITY;
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
largest = (val>largest) ? val : largest;
|
||||
}
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
|
||||
sum += e;
|
||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
|
||||
}
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
|
||||
}
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -INFINITY;
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
largest = (val>largest) ? val : largest;
|
||||
}
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
|
||||
sum += e;
|
||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
|
||||
}
|
||||
for (i = 0; i < numOutputClasses; ++i) {
|
||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void gpuRegionLayer(
|
||||
const float* input, float* softmax, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
|
||||
const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors)
|
||||
__global__ void gpuRegionLayer(const float* input, float* softmax, int* num_detections, float* detection_boxes,
|
||||
float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
|
||||
return;
|
||||
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
const int numGridCells = gridSizeX * gridSizeY;
|
||||
const int bbindex = y_id * gridSizeX + x_id;
|
||||
|
||||
const float objectness
|
||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
||||
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
if (objectness < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
float x
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
|
||||
+ x_id) * netWidth / gridSizeX;
|
||||
float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX;
|
||||
|
||||
float y
|
||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
|
||||
+ y_id) * netHeight / gridSizeY;
|
||||
float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY;
|
||||
|
||||
float w
|
||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)])
|
||||
* anchors[z_id * 2] * netWidth / gridSizeX;
|
||||
float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth /
|
||||
gridSizeX;
|
||||
|
||||
float h
|
||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)])
|
||||
* anchors[z_id * 2 + 1] * netHeight / gridSizeY;
|
||||
float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] * netHeight /
|
||||
gridSizeY;
|
||||
|
||||
softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax);
|
||||
softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax);
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i)
|
||||
{
|
||||
float prob
|
||||
= softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
|
||||
if (prob > maxProb)
|
||||
{
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
detection_boxes[count * 4 + 0] = x - 0.5 * w;
|
||||
detection_boxes[count * 4 + 1] = y - 0.5 * h;
|
||||
detection_boxes[count * 4 + 2] = x + 0.5 * w;
|
||||
detection_boxes[count * 4 + 3] = y + 0.5 * h;
|
||||
detection_scores[count] = objectness * maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaRegionLayer(
|
||||
const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(
|
||||
const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream)
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
||||
(gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
dim3 threads_per_block(16, 16, 4);
|
||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
|
||||
(numBBoxes / threads_per_block.z) + 1);
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||
{
|
||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize),
|
||||
reinterpret_cast<float*>(softmax) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
|
||||
reinterpret_cast<const float*>(anchors));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * inputSize), reinterpret_cast<float*>(softmax) + (batch * inputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX,
|
||||
gridSizeY, numOutputClasses, numBBoxes, reinterpret_cast<const float*>(anchors));
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
|
||||
62
nvdsinfer_custom_impl_Yolo/yoloForward_v8.cu
Normal file
62
nvdsinfer_custom_impl_Yolo/yoloForward_v8.cu
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Created by Marcos Luciano
|
||||
* https://www.github.com/marcoslucianops
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
__global__ void gpuYoloLayer_v8(const float* input, int* num_detections, float* detection_boxes, float* detection_scores,
|
||||
int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
|
||||
const uint numOutputClasses, const uint64_t outputSize)
|
||||
{
|
||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (x_id >= outputSize)
|
||||
return;
|
||||
|
||||
float maxProb = 0.0f;
|
||||
int maxIndex = -1;
|
||||
|
||||
for (uint i = 0; i < numOutputClasses; ++i) {
|
||||
float prob = input[x_id * (4 + numOutputClasses) + i + 4];
|
||||
if (prob > maxProb) {
|
||||
maxProb = prob;
|
||||
maxIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxProb < scoreThreshold)
|
||||
return;
|
||||
|
||||
int count = (int)atomicAdd(num_detections, 1);
|
||||
|
||||
detection_boxes[count * 4 + 0] = input[x_id * (4 + numOutputClasses) + 0];
|
||||
detection_boxes[count * 4 + 1] = input[x_id * (4 + numOutputClasses) + 1];
|
||||
detection_boxes[count * 4 + 2] = input[x_id * (4 + numOutputClasses) + 2];
|
||||
detection_boxes[count * 4 + 3] = input[x_id * (4 + numOutputClasses) + 3];
|
||||
detection_scores[count] = maxProb;
|
||||
detection_classes[count] = maxIndex;
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream)
|
||||
{
|
||||
int threads_per_block = 16;
|
||||
int number_of_blocks = (outputSize / threads_per_block) + 1;
|
||||
|
||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||
gpuYoloLayer_v8<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||
reinterpret_cast<const float*>(input) + (batch * (4 + numOutputClasses) * outputSize),
|
||||
reinterpret_cast<int*>(num_detections) + (batch),
|
||||
reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
|
||||
reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
|
||||
reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
|
||||
scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize);
|
||||
}
|
||||
return cudaGetLastError();
|
||||
}
|
||||
@@ -24,325 +24,288 @@
|
||||
*/
|
||||
|
||||
#include "yoloPlugins.h"
|
||||
#include "NvInferPlugin.h"
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
uint kNUM_CLASSES;
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
void write(char*& buffer, const T& val)
|
||||
{
|
||||
*reinterpret_cast<T*>(buffer) = val;
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void read(const char*& buffer, T& val)
|
||||
{
|
||||
val = *reinterpret_cast<const T*>(buffer);
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
template <typename T>
|
||||
void write(char*& buffer, const T& val) {
|
||||
*reinterpret_cast<T*>(buffer) = val;
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
template <typename T>
|
||||
void read(const char*& buffer, T& val) {
|
||||
val = *reinterpret_cast<const T*>(buffer);
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
cudaError_t cudaYoloLayer_e(
|
||||
const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_r(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses,
|
||||
cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer_nc(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaYoloLayer(
|
||||
const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
|
||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||
const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
|
||||
const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
cudaError_t cudaRegionLayer(
|
||||
const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
YoloLayer::YoloLayer (const void* data, size_t length)
|
||||
{
|
||||
const char *d = static_cast<const char*>(data);
|
||||
cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
read(d, m_NetWidth);
|
||||
read(d, m_NetHeight);
|
||||
read(d, m_NumClasses);
|
||||
read(d, m_NewCoords);
|
||||
read(d, m_OutputSize);
|
||||
read(d, m_Type);
|
||||
read(d, m_ScoreThreshold);
|
||||
cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores,
|
||||
void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
|
||||
const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
||||
const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
|
||||
|
||||
if (m_Type != 3) {
|
||||
uint yoloTensorsSize;
|
||||
read(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||
{
|
||||
TensorInfo curYoloTensor;
|
||||
read(d, curYoloTensor.gridSizeX);
|
||||
read(d, curYoloTensor.gridSizeY);
|
||||
read(d, curYoloTensor.numBBoxes);
|
||||
read(d, curYoloTensor.scaleXY);
|
||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes,
|
||||
void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize,
|
||||
const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY,
|
||||
const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream);
|
||||
|
||||
uint anchorsSize;
|
||||
read(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; j++)
|
||||
{
|
||||
float result;
|
||||
read(d, result);
|
||||
curYoloTensor.anchors.push_back(result);
|
||||
}
|
||||
YoloLayer::YoloLayer(const void* data, size_t length) {
|
||||
const char* d = static_cast<const char*>(data);
|
||||
|
||||
uint maskSize;
|
||||
read(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; j++)
|
||||
{
|
||||
int result;
|
||||
read(d, result);
|
||||
curYoloTensor.mask.push_back(result);
|
||||
}
|
||||
m_YoloTensors.push_back(curYoloTensor);
|
||||
}
|
||||
read(d, m_NetWidth);
|
||||
read(d, m_NetHeight);
|
||||
read(d, m_NumClasses);
|
||||
read(d, m_NewCoords);
|
||||
read(d, m_OutputSize);
|
||||
read(d, m_Type);
|
||||
read(d, m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize;
|
||||
read(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo curYoloTensor;
|
||||
read(d, curYoloTensor.gridSizeX);
|
||||
read(d, curYoloTensor.gridSizeY);
|
||||
read(d, curYoloTensor.numBBoxes);
|
||||
read(d, curYoloTensor.scaleXY);
|
||||
|
||||
uint anchorsSize;
|
||||
read(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j) {
|
||||
float result;
|
||||
read(d, result);
|
||||
curYoloTensor.anchors.push_back(result);
|
||||
}
|
||||
|
||||
uint maskSize;
|
||||
read(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j) {
|
||||
int result;
|
||||
read(d, result);
|
||||
curYoloTensor.mask.push_back(result);
|
||||
}
|
||||
|
||||
m_YoloTensors.push_back(curYoloTensor);
|
||||
}
|
||||
|
||||
kNUM_CLASSES = m_NumClasses;
|
||||
}
|
||||
};
|
||||
|
||||
YoloLayer::YoloLayer(
|
||||
const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
|
||||
const float& scoreThreshold) :
|
||||
m_NetWidth(netWidth),
|
||||
m_NetHeight(netHeight),
|
||||
m_NumClasses(numClasses),
|
||||
m_NewCoords(newCoords),
|
||||
m_YoloTensors(yoloTensors),
|
||||
m_OutputSize(outputSize),
|
||||
m_Type(modelType),
|
||||
const float& scoreThreshold) : m_NetWidth(netWidth), m_NetHeight(netHeight), m_NumClasses(numClasses),
|
||||
m_NewCoords(newCoords), m_YoloTensors(yoloTensors), m_OutputSize(outputSize), m_Type(modelType),
|
||||
m_ScoreThreshold(scoreThreshold)
|
||||
{
|
||||
assert(m_NetWidth > 0);
|
||||
assert(m_NetHeight > 0);
|
||||
|
||||
kNUM_CLASSES = m_NumClasses;
|
||||
assert(m_NetWidth > 0);
|
||||
assert(m_NetHeight > 0);
|
||||
};
|
||||
|
||||
nvinfer1::Dims
|
||||
YoloLayer::getOutputDimensions(
|
||||
int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
|
||||
YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
|
||||
{
|
||||
assert(index <= 4);
|
||||
if (index == 0) {
|
||||
return nvinfer1::Dims{1, {1}};
|
||||
}
|
||||
else if (index == 1) {
|
||||
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 4}};
|
||||
}
|
||||
return nvinfer1::Dims{1, {static_cast<int>(m_OutputSize)}};
|
||||
assert(index <= 4);
|
||||
if (index == 0)
|
||||
return nvinfer1::Dims{1, {1}};
|
||||
else if (index == 1)
|
||||
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 4}};
|
||||
return nvinfer1::Dims{1, {static_cast<int>(m_OutputSize)}};
|
||||
}
|
||||
|
||||
bool YoloLayer::supportsFormat (
|
||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept {
|
||||
return (type == nvinfer1::DataType::kFLOAT &&
|
||||
format == nvinfer1::PluginFormat::kLINEAR);
|
||||
bool
|
||||
YoloLayer::supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept {
|
||||
return (type == nvinfer1::DataType::kFLOAT && format == nvinfer1::PluginFormat::kLINEAR);
|
||||
}
|
||||
|
||||
void
|
||||
YoloLayer::configureWithFormat (
|
||||
const nvinfer1::Dims* inputDims, int nbInputs,
|
||||
const nvinfer1::Dims* outputDims, int nbOutputs,
|
||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept
|
||||
YoloLayer::configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims,
|
||||
int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept
|
||||
{
|
||||
assert(nbInputs > 0);
|
||||
assert(format == nvinfer1::PluginFormat::kLINEAR);
|
||||
assert(inputDims != nullptr);
|
||||
assert(nbInputs > 0);
|
||||
assert(format == nvinfer1::PluginFormat::kLINEAR);
|
||||
assert(inputDims != nullptr);
|
||||
}
|
||||
|
||||
int32_t YoloLayer::enqueue (
|
||||
int batchSize, void const* const* inputs, void* const* outputs, void* workspace,
|
||||
cudaStream_t stream) noexcept
|
||||
int32_t
|
||||
YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||
noexcept
|
||||
{
|
||||
void* num_detections = outputs[0];
|
||||
void* detection_boxes = outputs[1];
|
||||
void* detection_scores = outputs[2];
|
||||
void* detection_classes = outputs[3];
|
||||
void* num_detections = outputs[0];
|
||||
void* detection_boxes = outputs[1];
|
||||
void* detection_scores = outputs[2];
|
||||
void* detection_classes = outputs[3];
|
||||
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
|
||||
CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
||||
|
||||
if (m_Type == 3)
|
||||
{
|
||||
CUDA_CHECK(cudaYoloLayer_e(
|
||||
inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||
{
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
if (m_Type == 4) {
|
||||
CUDA_CHECK(cudaYoloLayer_v8(inputs[0], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||
}
|
||||
else if (m_Type == 3) {
|
||||
CUDA_CHECK(cudaYoloLayer_e(inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||
}
|
||||
else {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
|
||||
uint numBBoxes = curYoloTensor.numBBoxes;
|
||||
float scaleXY = curYoloTensor.scaleXY;
|
||||
uint gridSizeX = curYoloTensor.gridSizeX;
|
||||
uint gridSizeY = curYoloTensor.gridSizeY;
|
||||
std::vector<float> anchors = curYoloTensor.anchors;
|
||||
std::vector<int> mask = curYoloTensor.mask;
|
||||
uint numBBoxes = curYoloTensor.numBBoxes;
|
||||
float scaleXY = curYoloTensor.scaleXY;
|
||||
uint gridSizeX = curYoloTensor.gridSizeX;
|
||||
uint gridSizeY = curYoloTensor.gridSizeY;
|
||||
std::vector<float> anchors = curYoloTensor.anchors;
|
||||
std::vector<int> mask = curYoloTensor.mask;
|
||||
|
||||
void* v_anchors;
|
||||
void* v_mask;
|
||||
if (anchors.size() > 0) {
|
||||
float* f_anchors = anchors.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
int* f_mask = mask.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
void* v_anchors;
|
||||
void* v_mask;
|
||||
if (anchors.size() > 0) {
|
||||
float* f_anchors = anchors.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
int* f_mask = mask.data();
|
||||
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||
CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
|
||||
uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
|
||||
|
||||
if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0
|
||||
CUDA_CHECK(cudaYoloLayer_r(
|
||||
inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, inputSize,
|
||||
m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes,
|
||||
2.0, v_anchors, v_mask, stream));
|
||||
}
|
||||
else if (m_Type == 1) {
|
||||
if (m_NewCoords) {
|
||||
CUDA_CHECK(cudaYoloLayer_nc(
|
||||
inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
else {
|
||||
CUDA_CHECK(cudaYoloLayer(
|
||||
inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
}
|
||||
else {
|
||||
void* softmax;
|
||||
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
|
||||
CUDA_CHECK(cudaRegionLayer(
|
||||
inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
|
||||
inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses,
|
||||
numBBoxes, v_anchors, stream));
|
||||
|
||||
CUDA_CHECK(cudaFree(softmax));
|
||||
}
|
||||
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_anchors));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_mask));
|
||||
}
|
||||
if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0
|
||||
CUDA_CHECK(cudaYoloLayer_r(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, 2.0, v_anchors, v_mask, stream));
|
||||
}
|
||||
else if (m_Type == 1) {
|
||||
if (m_NewCoords) {
|
||||
CUDA_CHECK(cudaYoloLayer_nc( inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t YoloLayer::getSerializationSize() const noexcept
|
||||
{
|
||||
size_t totalSize = 0;
|
||||
|
||||
totalSize += sizeof(m_NetWidth);
|
||||
totalSize += sizeof(m_NetHeight);
|
||||
totalSize += sizeof(m_NumClasses);
|
||||
totalSize += sizeof(m_NewCoords);
|
||||
totalSize += sizeof(m_OutputSize);
|
||||
totalSize += sizeof(m_Type);
|
||||
totalSize += sizeof(m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
totalSize += sizeof(yoloTensorsSize);
|
||||
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||
{
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeX);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeY);
|
||||
totalSize += sizeof(curYoloTensor.numBBoxes);
|
||||
totalSize += sizeof(curYoloTensor.scaleXY);
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
||||
else {
|
||||
CUDA_CHECK(cudaYoloLayer(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
void* softmax;
|
||||
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream));
|
||||
|
||||
return totalSize;
|
||||
CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes,
|
||||
batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
|
||||
m_NumClasses, numBBoxes, v_anchors, stream));
|
||||
|
||||
CUDA_CHECK(cudaFree(softmax));
|
||||
}
|
||||
|
||||
if (anchors.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_anchors));
|
||||
}
|
||||
if (mask.size() > 0) {
|
||||
CUDA_CHECK(cudaFree(v_mask));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void YoloLayer::serialize(void* buffer) const noexcept
|
||||
size_t
|
||||
YoloLayer::getSerializationSize() const noexcept
|
||||
{
|
||||
char *d = static_cast<char*>(buffer);
|
||||
size_t totalSize = 0;
|
||||
|
||||
write(d, m_NetWidth);
|
||||
write(d, m_NetHeight);
|
||||
write(d, m_NumClasses);
|
||||
write(d, m_NewCoords);
|
||||
write(d, m_OutputSize);
|
||||
write(d, m_Type);
|
||||
write(d, m_ScoreThreshold);
|
||||
totalSize += sizeof(m_NetWidth);
|
||||
totalSize += sizeof(m_NetHeight);
|
||||
totalSize += sizeof(m_NumClasses);
|
||||
totalSize += sizeof(m_NewCoords);
|
||||
totalSize += sizeof(m_OutputSize);
|
||||
totalSize += sizeof(m_Type);
|
||||
totalSize += sizeof(m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
write(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||
{
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
write(d, curYoloTensor.gridSizeX);
|
||||
write(d, curYoloTensor.gridSizeY);
|
||||
write(d, curYoloTensor.numBBoxes);
|
||||
write(d, curYoloTensor.scaleXY);
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
totalSize += sizeof(yoloTensorsSize);
|
||||
|
||||
uint anchorsSize = curYoloTensor.anchors.size();
|
||||
write(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j)
|
||||
{
|
||||
write(d, curYoloTensor.anchors[j]);
|
||||
}
|
||||
|
||||
uint maskSize = curYoloTensor.mask.size();
|
||||
write(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j)
|
||||
{
|
||||
write(d, curYoloTensor.mask[j]);
|
||||
}
|
||||
}
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeX);
|
||||
totalSize += sizeof(curYoloTensor.gridSizeY);
|
||||
totalSize += sizeof(curYoloTensor.numBBoxes);
|
||||
totalSize += sizeof(curYoloTensor.scaleXY);
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
||||
}
|
||||
}
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2* YoloLayer::clone() const noexcept
|
||||
void
|
||||
YoloLayer::serialize(void* buffer) const noexcept
|
||||
{
|
||||
return new YoloLayer (
|
||||
m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type, m_ScoreThreshold);
|
||||
char* d = static_cast<char*>(buffer);
|
||||
|
||||
write(d, m_NetWidth);
|
||||
write(d, m_NetHeight);
|
||||
write(d, m_NumClasses);
|
||||
write(d, m_NewCoords);
|
||||
write(d, m_OutputSize);
|
||||
write(d, m_Type);
|
||||
write(d, m_ScoreThreshold);
|
||||
|
||||
if (m_Type != 3 && m_Type != 4) {
|
||||
uint yoloTensorsSize = m_YoloTensors.size();
|
||||
write(d, yoloTensorsSize);
|
||||
for (uint i = 0; i < yoloTensorsSize; ++i) {
|
||||
const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
|
||||
write(d, curYoloTensor.gridSizeX);
|
||||
write(d, curYoloTensor.gridSizeY);
|
||||
write(d, curYoloTensor.numBBoxes);
|
||||
write(d, curYoloTensor.scaleXY);
|
||||
|
||||
uint anchorsSize = curYoloTensor.anchors.size();
|
||||
write(d, anchorsSize);
|
||||
for (uint j = 0; j < anchorsSize; ++j)
|
||||
write(d, curYoloTensor.anchors[j]);
|
||||
|
||||
uint maskSize = curYoloTensor.mask.size();
|
||||
write(d, maskSize);
|
||||
for (uint j = 0; j < maskSize; ++j)
|
||||
write(d, curYoloTensor.mask[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2*
|
||||
YoloLayer::clone() const noexcept
|
||||
{
|
||||
return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type,
|
||||
m_ScoreThreshold);
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
|
||||
|
||||
@@ -26,88 +26,64 @@
|
||||
#ifndef __YOLO_PLUGINS__
|
||||
#define __YOLO_PLUGINS__
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "NvInferPlugin.h"
|
||||
|
||||
#include "yolo.h"
|
||||
|
||||
#define CUDA_CHECK(status) \
|
||||
{ \
|
||||
if (status != 0) \
|
||||
{ \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " \
|
||||
<< __LINE__ << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
#define CUDA_CHECK(status) { \
|
||||
if (status != 0) { \
|
||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \
|
||||
std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
const char* YOLOLAYER_PLUGIN_VERSION {"1"};
|
||||
const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
|
||||
namespace {
|
||||
const char* YOLOLAYER_PLUGIN_VERSION {"1"};
|
||||
const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
|
||||
} // namespace
|
||||
|
||||
class YoloLayer : public nvinfer1::IPluginV2
|
||||
{
|
||||
public:
|
||||
YoloLayer (const void* data, size_t length);
|
||||
class YoloLayer : public nvinfer1::IPluginV2 {
|
||||
public:
|
||||
YoloLayer(const void* data, size_t length);
|
||||
|
||||
YoloLayer (
|
||||
const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
|
||||
const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
|
||||
const float& scoreThreshold);
|
||||
|
||||
const char* getPluginType () const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
|
||||
const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
|
||||
const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
|
||||
|
||||
int getNbOutputs () const noexcept override { return 4; }
|
||||
int getNbOutputs() const noexcept override { return 4; }
|
||||
|
||||
nvinfer1::Dims getOutputDimensions (
|
||||
int index, const nvinfer1::Dims* inputs,
|
||||
int nbInputDims) noexcept override;
|
||||
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept override;
|
||||
|
||||
bool supportsFormat (
|
||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept override;
|
||||
bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept override;
|
||||
|
||||
void configureWithFormat (
|
||||
const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs,
|
||||
void configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs,
|
||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept override;
|
||||
|
||||
int initialize () noexcept override { return 0; }
|
||||
int initialize() noexcept override { return 0; }
|
||||
|
||||
void terminate () noexcept override {}
|
||||
void terminate() noexcept override {}
|
||||
|
||||
size_t getWorkspaceSize (int maxBatchSize) const noexcept override { return 0; }
|
||||
size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0; }
|
||||
|
||||
int32_t enqueue (
|
||||
int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||
int32_t enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||
noexcept override;
|
||||
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
|
||||
void serialize (void* buffer) const noexcept override;
|
||||
void serialize(void* buffer) const noexcept override;
|
||||
|
||||
void destroy () noexcept override { delete this; }
|
||||
void destroy() noexcept override { delete this; }
|
||||
|
||||
nvinfer1::IPluginV2* clone() const noexcept override;
|
||||
|
||||
void setPluginNamespace (const char* pluginNamespace) noexcept override {
|
||||
m_Namespace = pluginNamespace;
|
||||
}
|
||||
void setPluginNamespace(const char* pluginNamespace) noexcept override { m_Namespace = pluginNamespace; }
|
||||
|
||||
virtual const char* getPluginNamespace () const noexcept override {
|
||||
return m_Namespace.c_str();
|
||||
}
|
||||
virtual const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); }
|
||||
|
||||
private:
|
||||
private:
|
||||
std::string m_Namespace {""};
|
||||
uint m_NetWidth {0};
|
||||
uint m_NetHeight {0};
|
||||
@@ -119,47 +95,37 @@ private:
|
||||
float m_ScoreThreshold {0};
|
||||
};
|
||||
|
||||
class YoloLayerPluginCreator : public nvinfer1::IPluginCreator
|
||||
{
|
||||
public:
|
||||
YoloLayerPluginCreator () {}
|
||||
class YoloLayerPluginCreator : public nvinfer1::IPluginCreator {
|
||||
public:
|
||||
YoloLayerPluginCreator() {}
|
||||
|
||||
~YoloLayerPluginCreator () {}
|
||||
~YoloLayerPluginCreator() {}
|
||||
|
||||
const char* getPluginName () const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
const char* getPluginName() const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
|
||||
|
||||
const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
|
||||
const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
|
||||
|
||||
const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override {
|
||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl;
|
||||
return nullptr;
|
||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2* createPlugin (
|
||||
const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override
|
||||
{
|
||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
|
||||
return nullptr;
|
||||
nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override {
|
||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2* deserializePlugin (
|
||||
const char* name, const void* serialData, size_t serialLength) noexcept override
|
||||
{
|
||||
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
|
||||
return new YoloLayer(serialData, serialLength);
|
||||
nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override {
|
||||
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
|
||||
return new YoloLayer(serialData, serialLength);
|
||||
}
|
||||
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override {
|
||||
m_Namespace = libNamespace;
|
||||
}
|
||||
const char* getPluginNamespace() const noexcept override {
|
||||
return m_Namespace.c_str();
|
||||
}
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override { m_Namespace = libNamespace; }
|
||||
|
||||
private:
|
||||
const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); }
|
||||
|
||||
private:
|
||||
std::string m_Namespace {""};
|
||||
};
|
||||
|
||||
extern uint kNUM_CLASSES;
|
||||
|
||||
#endif // __YOLO_PLUGINS__
|
||||
|
||||
Reference in New Issue
Block a user