Add PP-YOLOE support
This commit is contained in:
25
config_infer_primary_ppyoloe.txt
Normal file
25
config_infer_primary_ppyoloe.txt
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[property]
|
||||||
|
gpu-id=0
|
||||||
|
net-scale-factor=0.0173520735727919486
|
||||||
|
offsets=123.675;116.28;103.53
|
||||||
|
model-color-format=0
|
||||||
|
custom-network-config=ppyoloe_crn_s_400e_coco.cfg
|
||||||
|
model-file=ppyoloe_crn_s_400e_coco.wts
|
||||||
|
model-engine-file=model_b1_gpu0_fp32.engine
|
||||||
|
#int8-calib-file=calib.table
|
||||||
|
labelfile-path=labels.txt
|
||||||
|
batch-size=1
|
||||||
|
network-mode=0
|
||||||
|
num-detected-classes=80
|
||||||
|
interval=0
|
||||||
|
gie-unique-id=1
|
||||||
|
process-mode=1
|
||||||
|
network-type=0
|
||||||
|
cluster-mode=4
|
||||||
|
maintain-aspect-ratio=0
|
||||||
|
parse-bbox-func-name=NvDsInferParseYolo
|
||||||
|
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
||||||
|
engine-create-func-name=NvDsInferYoloCudaEngineGet
|
||||||
|
|
||||||
|
[class-attrs-all]
|
||||||
|
pre-cluster-threshold=0
|
||||||
115
docs/PPYOLOE.md
Normal file
115
docs/PPYOLOE.md
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
# PP-YOLOE usage
|
||||||
|
|
||||||
|
* [Convert model](#convert-model)
|
||||||
|
* [Compile the lib](#compile-the-lib)
|
||||||
|
* [Edit the config_infer_primary_ppyoloe file](#edit-the-config_infer_primary_ppyoloe-file)
|
||||||
|
* [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
|
||||||
|
* [Testing the model](#testing-the-model)
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
### Convert model
|
||||||
|
|
||||||
|
#### 1. Download the PaddleDetection repo and install the requirements
|
||||||
|
|
||||||
|
https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/docs/tutorials/INSTALL.md
|
||||||
|
|
||||||
|
**NOTE**: It is recommended to use Python virtualenv.
|
||||||
|
|
||||||
|
#### 2. Copy conversor
|
||||||
|
|
||||||
|
Copy the `gen_wts_ppyoloe.py` file from `DeepStream-Yolo/utils` directory to the `PaddleDetection` folder.
|
||||||
|
|
||||||
|
#### 3. Download the model
|
||||||
|
|
||||||
|
Download the `pdparams` file from [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) releases (example for PP-YOLOE-s)
|
||||||
|
|
||||||
|
```
|
||||||
|
wget https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_400e_coco.pdparams
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`ppyoloe_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly.
|
||||||
|
|
||||||
|
#### 4. Convert model
|
||||||
|
|
||||||
|
Generate the `cfg` and `wts` files (example for PP-YOLOE-s)
|
||||||
|
|
||||||
|
```
|
||||||
|
python3 gen_wts_ppyoloe.py -w ppyoloe_crn_s_400e_coco.pdparams -c configs/ppyoloe/ppyoloe_crn_s_400e_coco.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 5. Copy generated files
|
||||||
|
|
||||||
|
Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder.
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
### Compile the lib
|
||||||
|
|
||||||
|
Open the `DeepStream-Yolo` folder and compile the lib
|
||||||
|
|
||||||
|
* DeepStream 6.1 on x86 platform
|
||||||
|
|
||||||
|
```
|
||||||
|
CUDA_VER=11.6 make -C nvdsinfer_custom_impl_Yolo
|
||||||
|
```
|
||||||
|
|
||||||
|
* DeepStream 6.0.1 / 6.0 on x86 platform
|
||||||
|
|
||||||
|
```
|
||||||
|
CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo
|
||||||
|
```
|
||||||
|
|
||||||
|
* DeepStream 6.1 on Jetson platform
|
||||||
|
|
||||||
|
```
|
||||||
|
CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo
|
||||||
|
```
|
||||||
|
|
||||||
|
* DeepStream 6.0.1 / 6.0 on Jetson platform
|
||||||
|
|
||||||
|
```
|
||||||
|
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
||||||
|
```
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
### Edit the config_infer_primary_yoloV5 file
|
||||||
|
|
||||||
|
Edit the `config_infer_primary_ppyoloe.txt` file according to your model (example for PP-YOLOE-s)
|
||||||
|
|
||||||
|
```
|
||||||
|
[property]
|
||||||
|
...
|
||||||
|
custom-network-config=ppyoloe_crn_s_400e_coco.cfg
|
||||||
|
model-file=ppyoloe_crn_s_400e_coco.wts
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE**: The PP-YOLOE uses normalization on the image preprocess. It is important to change the `net-scale-factor` and `offsets` according to the trained values.
|
||||||
|
|
||||||
|
Default: `mean = 0.485, 0.456, 0.406` and `std = 0.229, 0.224, 0.225`
|
||||||
|
|
||||||
|
```
|
||||||
|
net-scale-factor=0.0173520735727919486
|
||||||
|
offsets=123.675;116.28;103.53
|
||||||
|
```
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
### Edit the deepstream_app_config.txt file
|
||||||
|
|
||||||
|
```
|
||||||
|
...
|
||||||
|
[primary-gie]
|
||||||
|
...
|
||||||
|
config-file=config_infer_primary_ppyoloe.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
### Testing the model
|
||||||
|
|
||||||
|
```
|
||||||
|
deepstream-app -c deepstream_app_config.txt
|
||||||
|
```
|
||||||
@@ -22,7 +22,7 @@ cd yolor
|
|||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
**NOTE**: It is recommended to use a Python virtualenv.
|
**NOTE**: It is recommended to use Python virtualenv.
|
||||||
|
|
||||||
#### 2. Copy conversor
|
#### 2. Copy conversor
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ cd yolov5
|
|||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
**NOTE**: It is recommended to use a Python virtualenv.
|
**NOTE**: It is recommended to use Python virtualenv.
|
||||||
|
|
||||||
#### 2. Copy conversor
|
#### 2. Copy conversor
|
||||||
|
|
||||||
|
|||||||
@@ -59,15 +59,21 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \
|
|||||||
layers/shortcut_layer.cpp \
|
layers/shortcut_layer.cpp \
|
||||||
layers/route_layer.cpp \
|
layers/route_layer.cpp \
|
||||||
layers/upsample_layer.cpp \
|
layers/upsample_layer.cpp \
|
||||||
layers/maxpool_layer.cpp \
|
layers/pooling_layer.cpp \
|
||||||
layers/activation_layer.cpp \
|
layers/activation_layer.cpp \
|
||||||
layers/reorgv5_layer.cpp \
|
layers/reorg_layer.cpp \
|
||||||
|
layers/reduce_layer.cpp \
|
||||||
|
layers/shuffle_layer.cpp \
|
||||||
|
layers/softmax_layer.cpp \
|
||||||
|
layers/cls_layer.cpp \
|
||||||
|
layers/reg_layer.cpp \
|
||||||
utils.cpp \
|
utils.cpp \
|
||||||
yolo.cpp \
|
yolo.cpp \
|
||||||
yoloForward.cu \
|
yoloForward.cu \
|
||||||
yoloForward_v2.cu \
|
yoloForward_v2.cu \
|
||||||
yoloForward_nc.cu \
|
yoloForward_nc.cu \
|
||||||
yoloForward_r.cu \
|
yoloForward_r.cu \
|
||||||
|
yoloForward_e.cu \
|
||||||
sortDetections.cu
|
sortDetections.cu
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
ifeq ($(OPENCV), 1)
|
||||||
|
|||||||
@@ -5,114 +5,113 @@
|
|||||||
|
|
||||||
#include "activation_layer.h"
|
#include "activation_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
nvinfer1::ITensor* activationLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::string activation,
|
std::string activation,
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
if (activation == "linear")
|
if (activation == "linear")
|
||||||
{
|
{
|
||||||
// Pass
|
output = input;
|
||||||
}
|
}
|
||||||
else if (activation == "relu")
|
else if (activation == "relu")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* relu = network->addActivation(
|
nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU);
|
||||||
*input, nvinfer1::ActivationType::kRELU);
|
|
||||||
assert(relu != nullptr);
|
assert(relu != nullptr);
|
||||||
std::string reluLayerName = "relu_" + std::to_string(layerIdx);
|
std::string reluLayerName = "relu_" + std::to_string(layerIdx);
|
||||||
relu->setName(reluLayerName.c_str());
|
relu->setName(reluLayerName.c_str());
|
||||||
output = relu;
|
output = relu->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "sigmoid" || activation == "logistic")
|
else if (activation == "sigmoid" || activation == "logistic")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(
|
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||||
*input, nvinfer1::ActivationType::kSIGMOID);
|
|
||||||
assert(sigmoid != nullptr);
|
assert(sigmoid != nullptr);
|
||||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
||||||
sigmoid->setName(sigmoidLayerName.c_str());
|
sigmoid->setName(sigmoidLayerName.c_str());
|
||||||
output = sigmoid;
|
output = sigmoid->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "tanh")
|
else if (activation == "tanh")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH);
|
||||||
*input, nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
assert(tanh != nullptr);
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
||||||
tanh->setName(tanhLayerName.c_str());
|
tanh->setName(tanhLayerName.c_str());
|
||||||
output = tanh;
|
output = tanh->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "leaky")
|
else if (activation == "leaky")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* leaky = network->addActivation(
|
nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
||||||
*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
|
||||||
assert(leaky != nullptr);
|
assert(leaky != nullptr);
|
||||||
leaky->setAlpha(0.1);
|
|
||||||
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
|
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
|
||||||
leaky->setName(leakyLayerName.c_str());
|
leaky->setName(leakyLayerName.c_str());
|
||||||
output = leaky;
|
leaky->setAlpha(0.1);
|
||||||
|
output = leaky->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "softplus")
|
else if (activation == "softplus")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
assert(softplus != nullptr);
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
||||||
softplus->setName(softplusLayerName.c_str());
|
softplus->setName(softplusLayerName.c_str());
|
||||||
output = softplus;
|
output = softplus->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "mish")
|
else if (activation == "mish")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
assert(softplus != nullptr);
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
||||||
softplus->setName(softplusLayerName.c_str());
|
softplus->setName(softplusLayerName.c_str());
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
||||||
*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
assert(tanh != nullptr);
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
||||||
tanh->setName(tanhLayerName.c_str());
|
tanh->setName(tanhLayerName.c_str());
|
||||||
nvinfer1::IElementWiseLayer* mish = network->addElementWise(
|
nvinfer1::IElementWiseLayer* mish
|
||||||
*input, *tanh->getOutput(0),
|
= network->addElementWise(*input, *tanh->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
|
||||||
assert(mish != nullptr);
|
assert(mish != nullptr);
|
||||||
std::string mishLayerName = "mish_" + std::to_string(layerIdx);
|
std::string mishLayerName = "mish_" + std::to_string(layerIdx);
|
||||||
mish->setName(mishLayerName.c_str());
|
mish->setName(mishLayerName.c_str());
|
||||||
output = mish;
|
output = mish->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "silu" || activation == "swish")
|
else if (activation == "silu" || activation == "swish")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(
|
nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
|
||||||
*input, nvinfer1::ActivationType::kSIGMOID);
|
|
||||||
assert(sigmoid != nullptr);
|
assert(sigmoid != nullptr);
|
||||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
||||||
sigmoid->setName(sigmoidLayerName.c_str());
|
sigmoid->setName(sigmoidLayerName.c_str());
|
||||||
nvinfer1::IElementWiseLayer* silu = network->addElementWise(
|
nvinfer1::IElementWiseLayer* silu
|
||||||
*input, *sigmoid->getOutput(0),
|
= network->addElementWise(*input, *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
|
||||||
assert(silu != nullptr);
|
assert(silu != nullptr);
|
||||||
std::string siluLayerName = "silu_" + std::to_string(layerIdx);
|
std::string siluLayerName = "silu_" + std::to_string(layerIdx);
|
||||||
silu->setName(siluLayerName.c_str());
|
silu->setName(siluLayerName.c_str());
|
||||||
output = silu;
|
output = silu->getOutput(0);
|
||||||
|
}
|
||||||
|
else if (activation == "hardsigmoid")
|
||||||
|
{
|
||||||
|
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||||
|
assert(hardsigmoid != nullptr);
|
||||||
|
std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
|
||||||
|
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||||
|
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||||
|
hardsigmoid->setBeta(0.5);
|
||||||
|
output = hardsigmoid->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (activation == "hardswish")
|
else if (activation == "hardswish")
|
||||||
{
|
{
|
||||||
nvinfer1::IActivationLayer* hard_sigmoid = network->addActivation(
|
nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
||||||
*input, nvinfer1::ActivationType::kHARD_SIGMOID);
|
assert(hardsigmoid != nullptr);
|
||||||
assert(hard_sigmoid != nullptr);
|
std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
|
||||||
hard_sigmoid->setAlpha(1.0 / 6.0);
|
hardsigmoid->setName(hardsigmoidLayerName.c_str());
|
||||||
hard_sigmoid->setBeta(0.5);
|
hardsigmoid->setAlpha(1.0 / 6.0);
|
||||||
std::string hardSigmoidLayerName = "hard_sigmoid_" + std::to_string(layerIdx);
|
hardsigmoid->setBeta(0.5);
|
||||||
hard_sigmoid->setName(hardSigmoidLayerName.c_str());
|
nvinfer1::IElementWiseLayer* hardswish
|
||||||
nvinfer1::IElementWiseLayer* hard_swish = network->addElementWise(
|
= network->addElementWise(*input, *hardsigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
|
||||||
*input, *hard_sigmoid->getOutput(0),
|
assert(hardswish != nullptr);
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
std::string hardswishLayerName = "hardswish_" + std::to_string(layerIdx);
|
||||||
assert(hard_swish != nullptr);
|
hardswish->setName(hardswishLayerName.c_str());
|
||||||
std::string hardSwishLayerName = "hard_swish_" + std::to_string(layerIdx);
|
output = hardswish->getOutput(0);
|
||||||
hard_swish->setName(hardSwishLayerName.c_str());
|
|
||||||
output = hard_swish;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -6,18 +6,14 @@
|
|||||||
#ifndef __ACTIVATION_LAYER_H__
|
#ifndef __ACTIVATION_LAYER_H__
|
||||||
#define __ACTIVATION_LAYER_H__
|
#define __ACTIVATION_LAYER_H__
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
#include "activation_layer.h"
|
nvinfer1::ITensor* activationLayer(
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::string activation,
|
std::string activation,
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network);
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "batchnorm_layer.h"
|
#include "batchnorm_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* batchnormLayer(
|
nvinfer1::ITensor* batchnormLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
@@ -17,6 +17,8 @@ nvinfer1::ILayer* batchnormLayer(
|
|||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
assert(block.at("type") == "batchnorm");
|
assert(block.at("type") == "batchnorm");
|
||||||
assert(block.find("filters") != block.end());
|
assert(block.find("filters") != block.end());
|
||||||
|
|
||||||
@@ -28,7 +30,8 @@ nvinfer1::ILayer* batchnormLayer(
|
|||||||
std::vector<float> bnRunningMean;
|
std::vector<float> bnRunningMean;
|
||||||
std::vector<float> bnRunningVar;
|
std::vector<float> bnRunningVar;
|
||||||
|
|
||||||
if (weightsType == "weights") {
|
if (weightsType == "weights")
|
||||||
|
{
|
||||||
for (int i = 0; i < filters; ++i)
|
for (int i = 0; i < filters; ++i)
|
||||||
{
|
{
|
||||||
bnBiases.push_back(weights[weightPtr]);
|
bnBiases.push_back(weights[weightPtr]);
|
||||||
@@ -50,7 +53,8 @@ nvinfer1::ILayer* batchnormLayer(
|
|||||||
weightPtr++;
|
weightPtr++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
for (int i = 0; i < filters; ++i)
|
for (int i = 0; i < filters; ++i)
|
||||||
{
|
{
|
||||||
bnWeights.push_back(weights[weightPtr]);
|
bnWeights.push_back(weights[weightPtr]);
|
||||||
@@ -79,35 +83,27 @@ nvinfer1::ILayer* batchnormLayer(
|
|||||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||||
float* shiftWt = new float[size];
|
float* shiftWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||||
shiftWt[i]
|
|
||||||
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
|
||||||
}
|
|
||||||
shift.values = shiftWt;
|
shift.values = shiftWt;
|
||||||
float* scaleWt = new float[size];
|
float* scaleWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
|
||||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||||
}
|
|
||||||
scale.values = scaleWt;
|
scale.values = scaleWt;
|
||||||
float* powerWt = new float[size];
|
float* powerWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
|
||||||
powerWt[i] = 1.0;
|
powerWt[i] = 1.0;
|
||||||
}
|
|
||||||
power.values = powerWt;
|
power.values = powerWt;
|
||||||
trtWeights.push_back(shift);
|
trtWeights.push_back(shift);
|
||||||
trtWeights.push_back(scale);
|
trtWeights.push_back(scale);
|
||||||
trtWeights.push_back(power);
|
trtWeights.push_back(power);
|
||||||
|
|
||||||
nvinfer1::IScaleLayer* bn = network->addScale(
|
nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||||
*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
assert(batchnorm != nullptr);
|
||||||
assert(bn != nullptr);
|
std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
|
||||||
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
|
batchnorm->setName(batchnormLayerName.c_str());
|
||||||
bn->setName(bnLayerName.c_str());
|
output = batchnorm->getOutput(0);
|
||||||
nvinfer1::ILayer* output = bn;
|
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, output, output->getOutput(0), network);
|
output = activationLayer(layerIdx, activation, output, network);
|
||||||
assert(output != nullptr);
|
assert(output != nullptr);
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
#include "activation_layer.h"
|
#include "activation_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* batchnormLayer(
|
nvinfer1::ITensor* batchnormLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
|
|||||||
@@ -5,27 +5,32 @@
|
|||||||
|
|
||||||
#include "channels_layer.h"
|
#include "channels_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* channelsLayer(
|
nvinfer1::ITensor* channelsLayer(
|
||||||
std::string type,
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::ITensor* implicitTensor,
|
nvinfer1::ITensor* implicitTensor,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
nvinfer1::ILayer* output;
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
if (type == "shift") {
|
assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels");
|
||||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(
|
|
||||||
*input, *implicitTensor,
|
if (block.at("type") == "shift_channels") {
|
||||||
nvinfer1::ElementWiseOperation::kSUM);
|
nvinfer1::IElementWiseLayer* shift
|
||||||
assert(ew != nullptr);
|
= network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||||
output = ew;
|
assert(shift != nullptr);
|
||||||
|
std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx);
|
||||||
|
shift->setName(shiftLayerName.c_str());
|
||||||
|
output = shift->getOutput(0);
|
||||||
}
|
}
|
||||||
else if (type == "control") {
|
else if (block.at("type") == "control_channels") {
|
||||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(
|
nvinfer1::IElementWiseLayer* control
|
||||||
*input, *implicitTensor,
|
= network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
assert(control != nullptr);
|
||||||
assert(ew != nullptr);
|
std::string controlLayerName = "control_channels_" + std::to_string(layerIdx);
|
||||||
output = ew;
|
control->setName(controlLayerName.c_str());
|
||||||
|
output = control->getOutput(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|||||||
@@ -11,8 +11,9 @@
|
|||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* channelsLayer(
|
nvinfer1::ITensor* channelsLayer(
|
||||||
std::string type,
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::ITensor* implicitTensor,
|
nvinfer1::ITensor* implicitTensor,
|
||||||
nvinfer1::INetworkDefinition* network);
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|||||||
29
nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp
Normal file
29
nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cls_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* clsLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "cls");
|
||||||
|
|
||||||
|
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||||
|
assert(shuffle != nullptr);
|
||||||
|
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||||
|
shuffle->setName(shuffleLayerName.c_str());
|
||||||
|
nvinfer1::Permutation permutation;
|
||||||
|
permutation.order[0] = 1;
|
||||||
|
permutation.order[1] = 0;
|
||||||
|
shuffle->setFirstTranspose(permutation);
|
||||||
|
output = shuffle->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
@@ -3,15 +3,15 @@
|
|||||||
* https://www.github.com/marcoslucianops
|
* https://www.github.com/marcoslucianops
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __MAXPOOL_LAYER_H__
|
#ifndef __CLS_LAYER_H__
|
||||||
#define __MAXPOOL_LAYER_H__
|
#define __CLS_LAYER_H__
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
nvinfer1::ITensor* clsLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "convolutional_layer.h"
|
#include "convolutional_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
nvinfer1::ITensor* convolutionalLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
@@ -18,6 +18,8 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
assert(block.at("type") == "convolutional");
|
assert(block.at("type") == "convolutional");
|
||||||
assert(block.find("filters") != block.end());
|
assert(block.find("filters") != block.end());
|
||||||
assert(block.find("pad") != block.end());
|
assert(block.find("pad") != block.end());
|
||||||
@@ -40,14 +42,10 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
|
|
||||||
int groups = 1;
|
int groups = 1;
|
||||||
if (block.find("groups") != block.end())
|
if (block.find("groups") != block.end())
|
||||||
{
|
|
||||||
groups = std::stoi(block.at("groups"));
|
groups = std::stoi(block.at("groups"));
|
||||||
}
|
|
||||||
|
|
||||||
if (block.find("bias") != block.end())
|
if (block.find("bias") != block.end())
|
||||||
{
|
|
||||||
bias = std::stoi(block.at("bias"));
|
bias = std::stoi(block.at("bias"));
|
||||||
}
|
|
||||||
|
|
||||||
int pad;
|
int pad;
|
||||||
if (padding)
|
if (padding)
|
||||||
@@ -63,7 +61,8 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
|
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||||
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
|
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
|
||||||
|
|
||||||
if (weightsType == "weights") {
|
if (weightsType == "weights")
|
||||||
|
{
|
||||||
if (batchNormalize == false)
|
if (batchNormalize == false)
|
||||||
{
|
{
|
||||||
float* val;
|
float* val;
|
||||||
@@ -120,7 +119,8 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
trtWeights.push_back(convBias);
|
trtWeights.push_back(convBias);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
if (batchNormalize == false)
|
if (batchNormalize == false)
|
||||||
{
|
{
|
||||||
float* val = new float[size];
|
float* val = new float[size];
|
||||||
@@ -177,20 +177,18 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(
|
nvinfer1::IConvolutionLayer* conv
|
||||||
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
|
= network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
|
||||||
assert(conv != nullptr);
|
assert(conv != nullptr);
|
||||||
std::string convLayerName = "conv_" + std::to_string(layerIdx);
|
std::string convLayerName = "conv_" + std::to_string(layerIdx);
|
||||||
conv->setName(convLayerName.c_str());
|
conv->setName(convLayerName.c_str());
|
||||||
conv->setStrideNd(nvinfer1::DimsHW{stride, stride});
|
conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||||
conv->setPaddingNd(nvinfer1::DimsHW{pad, pad});
|
conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
|
||||||
|
|
||||||
if (block.find("groups") != block.end())
|
if (block.find("groups") != block.end())
|
||||||
{
|
|
||||||
conv->setNbGroups(groups);
|
conv->setNbGroups(groups);
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ILayer* output = conv;
|
output = conv->getOutput(0);
|
||||||
|
|
||||||
if (batchNormalize == true)
|
if (batchNormalize == true)
|
||||||
{
|
{
|
||||||
@@ -200,36 +198,28 @@ nvinfer1::ILayer* convolutionalLayer(
|
|||||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
||||||
float* shiftWt = new float[size];
|
float* shiftWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
||||||
shiftWt[i]
|
|
||||||
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
|
||||||
}
|
|
||||||
shift.values = shiftWt;
|
shift.values = shiftWt;
|
||||||
float* scaleWt = new float[size];
|
float* scaleWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
|
||||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
||||||
}
|
|
||||||
scale.values = scaleWt;
|
scale.values = scaleWt;
|
||||||
float* powerWt = new float[size];
|
float* powerWt = new float[size];
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
|
||||||
powerWt[i] = 1.0;
|
powerWt[i] = 1.0;
|
||||||
}
|
|
||||||
power.values = powerWt;
|
power.values = powerWt;
|
||||||
trtWeights.push_back(shift);
|
trtWeights.push_back(shift);
|
||||||
trtWeights.push_back(scale);
|
trtWeights.push_back(scale);
|
||||||
trtWeights.push_back(power);
|
trtWeights.push_back(power);
|
||||||
|
|
||||||
nvinfer1::IScaleLayer* bn = network->addScale(
|
nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
||||||
*output->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
assert(batchnorm != nullptr);
|
||||||
assert(bn != nullptr);
|
std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
|
||||||
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
|
batchnorm->setName(batchnormLayerName.c_str());
|
||||||
bn->setName(bnLayerName.c_str());
|
output = batchnorm->getOutput(0);
|
||||||
output = bn;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, output, output->getOutput(0), network);
|
output = activationLayer(layerIdx, activation, output, network);
|
||||||
assert(output != nullptr);
|
assert(output != nullptr);
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
#include "activation_layer.h"
|
#include "activation_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
nvinfer1::ITensor* convolutionalLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
|
|||||||
@@ -5,17 +5,25 @@
|
|||||||
|
|
||||||
#include "implicit_layer.h"
|
#include "implicit_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* implicitLayer(
|
nvinfer1::ITensor* implicitLayer(
|
||||||
int channels,
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
std::vector<nvinfer1::Weights>& trtWeights,
|
||||||
int& weightPtr,
|
int& weightPtr,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
float* val = new float[channels];
|
assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
|
||||||
for (int i = 0; i < channels; ++i)
|
assert(block.find("filters") != block.end());
|
||||||
|
|
||||||
|
int filters = std::stoi(block.at("filters"));
|
||||||
|
|
||||||
|
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, filters};
|
||||||
|
|
||||||
|
float* val = new float[filters];
|
||||||
|
for (int i = 0; i < filters; ++i)
|
||||||
{
|
{
|
||||||
val[i] = weights[weightPtr];
|
val[i] = weights[weightPtr];
|
||||||
weightPtr++;
|
weightPtr++;
|
||||||
@@ -23,8 +31,11 @@ nvinfer1::ILayer* implicitLayer(
|
|||||||
convWt.values = val;
|
convWt.values = val;
|
||||||
trtWeights.push_back(convWt);
|
trtWeights.push_back(convWt);
|
||||||
|
|
||||||
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims3{static_cast<int>(channels), 1, 1}, convWt);
|
nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt);
|
||||||
assert(implicit != nullptr);
|
assert(implicit != nullptr);
|
||||||
|
std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
|
||||||
|
implicit->setName(implicitLayerName.c_str());
|
||||||
|
output = implicit->getOutput(0);
|
||||||
|
|
||||||
return implicit;
|
return output;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,8 +12,9 @@
|
|||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* implicitLayer(
|
nvinfer1::ITensor* implicitLayer(
|
||||||
int channels,
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<float>& weights,
|
std::vector<float>& weights,
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
std::vector<nvinfer1::Weights>& trtWeights,
|
||||||
int& weightPtr,
|
int& weightPtr,
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "maxpool_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "maxpool");
|
|
||||||
assert(block.find("size") != block.end());
|
|
||||||
assert(block.find("stride") != block.end());
|
|
||||||
|
|
||||||
int size = std::stoi(block.at("size"));
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
|
|
||||||
nvinfer1::IPoolingLayer* pool
|
|
||||||
= network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}});
|
|
||||||
assert(pool);
|
|
||||||
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
|
||||||
pool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
|
||||||
pool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
|
|
||||||
if (size == 2 && stride == 1)
|
|
||||||
{
|
|
||||||
pool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
|
|
||||||
pool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
|
|
||||||
}
|
|
||||||
pool->setName(maxpoolLayerName.c_str());
|
|
||||||
|
|
||||||
return pool;
|
|
||||||
}
|
|
||||||
57
nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp
Normal file
57
nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "pooling_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* poolingLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "maxpool" || block.at("type") == "avgpool");
|
||||||
|
|
||||||
|
if (block.at("type") == "maxpool")
|
||||||
|
{
|
||||||
|
assert(block.find("size") != block.end());
|
||||||
|
assert(block.find("stride") != block.end());
|
||||||
|
|
||||||
|
int size = std::stoi(block.at("size"));
|
||||||
|
int stride = std::stoi(block.at("stride"));
|
||||||
|
|
||||||
|
nvinfer1::IPoolingLayer* maxpool
|
||||||
|
= network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}});
|
||||||
|
assert(maxpool != nullptr);
|
||||||
|
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
||||||
|
maxpool->setName(maxpoolLayerName.c_str());
|
||||||
|
maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
|
||||||
|
maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
|
||||||
|
if (size == 2 && stride == 1)
|
||||||
|
{
|
||||||
|
maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
|
||||||
|
maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
|
||||||
|
}
|
||||||
|
output = maxpool->getOutput(0);
|
||||||
|
}
|
||||||
|
else if (block.at("type") == "avgpool")
|
||||||
|
{
|
||||||
|
nvinfer1::Dims inputDims = input->getDimensions();
|
||||||
|
nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(
|
||||||
|
*input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
|
||||||
|
assert(avgpool != nullptr);
|
||||||
|
std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx);
|
||||||
|
avgpool->setName(avgpoolLayerName.c_str());
|
||||||
|
output = avgpool->getOutput(0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cerr << "Pooling not supported: " << block.at("type") << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
21
nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h
Normal file
21
nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __POOLING_LAYER_H__
|
||||||
|
#define __POOLING_LAYER_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* poolingLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
#endif
|
||||||
58
nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp
Normal file
58
nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "reduce_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* reduceLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "reduce");
|
||||||
|
assert(block.find("mode") != block.end());
|
||||||
|
assert(block.find("axes") != block.end());
|
||||||
|
|
||||||
|
std::string mode = block.at("mode");
|
||||||
|
|
||||||
|
nvinfer1::ReduceOperation operation;
|
||||||
|
if (mode == "mean")
|
||||||
|
operation = nvinfer1::ReduceOperation::kAVG;
|
||||||
|
|
||||||
|
std::string strAxes = block.at("axes");
|
||||||
|
std::vector<int32_t> axes;
|
||||||
|
size_t lastPos = 0, pos = 0;
|
||||||
|
while ((pos = strAxes.find(',', lastPos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos)));
|
||||||
|
axes.push_back(vL);
|
||||||
|
lastPos = pos + 1;
|
||||||
|
}
|
||||||
|
if (lastPos < strAxes.length())
|
||||||
|
{
|
||||||
|
std::string lastV = trim(strAxes.substr(lastPos));
|
||||||
|
if (!lastV.empty())
|
||||||
|
axes.push_back(std::stoi(lastV));
|
||||||
|
}
|
||||||
|
assert(!axes.empty());
|
||||||
|
|
||||||
|
uint32_t axisMask = 0;
|
||||||
|
for (int axis : axes)
|
||||||
|
axisMask |= 1 << axis;
|
||||||
|
|
||||||
|
bool keepDims = false;
|
||||||
|
if (block.find("keep") != block.end())
|
||||||
|
keepDims = std::stoi(block.at("keep")) == 1 ? true : false;
|
||||||
|
|
||||||
|
nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims);
|
||||||
|
assert(reduce != nullptr);
|
||||||
|
std::string reduceLayerName = "reduce_" + std::to_string(layerIdx);
|
||||||
|
reduce->setName(reduceLayerName.c_str());
|
||||||
|
output = reduce->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
18
nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h
Normal file
18
nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __REDUCE_LAYER_H__
|
||||||
|
#define __REDUCE_LAYER_H__
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
#include "../utils.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* reduceLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
#endif
|
||||||
113
nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp
Normal file
113
nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "reg_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* regLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
std::vector<float>& weights,
|
||||||
|
std::vector<nvinfer1::Weights>& trtWeights,
|
||||||
|
int& weightPtr,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "reg");
|
||||||
|
|
||||||
|
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||||
|
assert(shuffle != nullptr);
|
||||||
|
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||||
|
shuffle->setName(shuffleLayerName.c_str());
|
||||||
|
nvinfer1::Permutation permutation;
|
||||||
|
permutation.order[0] = 1;
|
||||||
|
permutation.order[1] = 0;
|
||||||
|
shuffle->setFirstTranspose(permutation);
|
||||||
|
output = shuffle->getOutput(0);
|
||||||
|
nvinfer1::Dims shuffleDims = output->getDimensions();
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer* sliceLt = network->addSlice(
|
||||||
|
*output, nvinfer1::Dims{2, {0, 0}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||||
|
assert(sliceLt != nullptr);
|
||||||
|
std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
|
||||||
|
sliceLt->setName(sliceLtLayerName.c_str());
|
||||||
|
nvinfer1::ITensor* lt = sliceLt->getOutput(0);
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer* sliceRb = network->addSlice(
|
||||||
|
*output, nvinfer1::Dims{2, {0, 2}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
|
||||||
|
assert(sliceRb != nullptr);
|
||||||
|
std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
|
||||||
|
sliceRb->setName(sliceRbLayerName.c_str());
|
||||||
|
nvinfer1::ITensor* rb = sliceRb->getOutput(0);
|
||||||
|
|
||||||
|
int channels = shuffleDims.d[0] * 2;
|
||||||
|
nvinfer1::Weights anchorPointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||||
|
float* val = new float[channels];
|
||||||
|
for (int i = 0; i < channels; ++i)
|
||||||
|
{
|
||||||
|
val[i] = weights[weightPtr];
|
||||||
|
weightPtr++;
|
||||||
|
}
|
||||||
|
anchorPointsWt.values = val;
|
||||||
|
trtWeights.push_back(anchorPointsWt);
|
||||||
|
|
||||||
|
nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt);
|
||||||
|
assert(anchorPoints != nullptr);
|
||||||
|
std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
|
||||||
|
anchorPoints->setName(anchorPointsLayerName.c_str());
|
||||||
|
nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
|
||||||
|
|
||||||
|
nvinfer1::IElementWiseLayer* x1y1
|
||||||
|
= network->addElementWise(*anchorPointsTensor, *lt, nvinfer1::ElementWiseOperation::kSUB);
|
||||||
|
assert(x1y1 != nullptr);
|
||||||
|
std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
|
||||||
|
x1y1->setName(x1y1LayerName.c_str());
|
||||||
|
nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
|
||||||
|
|
||||||
|
nvinfer1::IElementWiseLayer* x2y2
|
||||||
|
= network->addElementWise(*rb, *anchorPointsTensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||||
|
assert(x2y2 != nullptr);
|
||||||
|
std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
|
||||||
|
x2y2->setName(x2y2LayerName.c_str());
|
||||||
|
nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
|
||||||
|
|
||||||
|
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||||
|
concatInputs.push_back(x1y1Tensor);
|
||||||
|
concatInputs.push_back(x2y2Tensor);
|
||||||
|
|
||||||
|
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||||
|
assert(concat != nullptr);
|
||||||
|
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||||
|
concat->setName(concatLayerName.c_str());
|
||||||
|
concat->setAxis(1);
|
||||||
|
output = concat->getOutput(0);
|
||||||
|
|
||||||
|
channels = shuffleDims.d[0];
|
||||||
|
nvinfer1::Weights stridePointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
|
||||||
|
val = new float[channels];
|
||||||
|
for (int i = 0; i < channels; ++i)
|
||||||
|
{
|
||||||
|
val[i] = weights[weightPtr];
|
||||||
|
weightPtr++;
|
||||||
|
}
|
||||||
|
stridePointsWt.values = val;
|
||||||
|
trtWeights.push_back(stridePointsWt);
|
||||||
|
|
||||||
|
nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt);
|
||||||
|
assert(stridePoints != nullptr);
|
||||||
|
std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
|
||||||
|
stridePoints->setName(stridePointsLayerName.c_str());
|
||||||
|
nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
|
||||||
|
|
||||||
|
nvinfer1::IElementWiseLayer* pred
|
||||||
|
= network->addElementWise(*output, *stridePointsTensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||||
|
assert(pred != nullptr);
|
||||||
|
std::string predLayerName = "pred_" + std::to_string(layerIdx);
|
||||||
|
pred->setName(predLayerName.c_str());
|
||||||
|
output = pred->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
24
nvdsinfer_custom_impl_Yolo/layers/reg_layer.h
Normal file
24
nvdsinfer_custom_impl_Yolo/layers/reg_layer.h
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __REG_LAYER_H__
|
||||||
|
#define __REG_LAYER_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* regLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
std::vector<float>& weights,
|
||||||
|
std::vector<nvinfer1::Weights>& trtWeights,
|
||||||
|
int& weightPtr,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
#endif
|
||||||
62
nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp
Normal file
62
nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "reorg_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* reorgLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "reorg");
|
||||||
|
|
||||||
|
nvinfer1::Dims inputDims = input->getDimensions();
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer *slice1 = network->addSlice(
|
||||||
|
*input, nvinfer1::Dims{3, {0, 0, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||||
|
nvinfer1::Dims{3, {1, 2, 2}});
|
||||||
|
assert(slice1 != nullptr);
|
||||||
|
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
|
||||||
|
slice1->setName(slice1LayerName.c_str());
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer *slice2 = network->addSlice(
|
||||||
|
*input, nvinfer1::Dims{3, {0, 0, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||||
|
nvinfer1::Dims{3, {1, 2, 2}});
|
||||||
|
assert(slice2 != nullptr);
|
||||||
|
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
|
||||||
|
slice2->setName(slice2LayerName.c_str());
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer *slice3 = network->addSlice(
|
||||||
|
*input, nvinfer1::Dims{3, {0, 1, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||||
|
nvinfer1::Dims{3, {1, 2, 2}});
|
||||||
|
assert(slice3 != nullptr);
|
||||||
|
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
|
||||||
|
slice3->setName(slice3LayerName.c_str());
|
||||||
|
|
||||||
|
nvinfer1::ISliceLayer *slice4 = network->addSlice(
|
||||||
|
*input, nvinfer1::Dims{3, {0, 1, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
|
||||||
|
nvinfer1::Dims{3, {1, 2, 2}});
|
||||||
|
assert(slice4 != nullptr);
|
||||||
|
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
|
||||||
|
slice4->setName(slice4LayerName.c_str());
|
||||||
|
|
||||||
|
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||||
|
concatInputs.push_back(slice1->getOutput(0));
|
||||||
|
concatInputs.push_back(slice2->getOutput(0));
|
||||||
|
concatInputs.push_back(slice3->getOutput(0));
|
||||||
|
concatInputs.push_back(slice4->getOutput(0));
|
||||||
|
|
||||||
|
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||||
|
assert(concat != nullptr);
|
||||||
|
std::string concatLayerName = "concat_" + std::to_string(layerIdx);
|
||||||
|
concat->setName(concatLayerName.c_str());
|
||||||
|
concat->setAxis(0);
|
||||||
|
output = concat->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
@@ -12,8 +12,9 @@
|
|||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* reorgV5Layer(
|
nvinfer1::ITensor* reorgLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network);
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "reorgv5_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* reorgV5Layer(
|
|
||||||
int layerIdx,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
nvinfer1::Dims prevTensorDims = input->getDimensions();
|
|
||||||
|
|
||||||
nvinfer1::ISliceLayer *slice1 = network->addSlice(
|
|
||||||
*input,
|
|
||||||
nvinfer1::Dims3{0, 0, 0},
|
|
||||||
nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2},
|
|
||||||
nvinfer1::Dims3{1, 2, 2});
|
|
||||||
assert(slice1 != nullptr);
|
|
||||||
std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
|
|
||||||
slice1->setName(slice1LayerName.c_str());
|
|
||||||
|
|
||||||
nvinfer1::ISliceLayer *slice2 = network->addSlice(
|
|
||||||
*input,
|
|
||||||
nvinfer1::Dims3{0, 1, 0},
|
|
||||||
nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2},
|
|
||||||
nvinfer1::Dims3{1, 2, 2});
|
|
||||||
assert(slice2 != nullptr);
|
|
||||||
std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
|
|
||||||
slice2->setName(slice2LayerName.c_str());
|
|
||||||
|
|
||||||
nvinfer1::ISliceLayer *slice3 = network->addSlice(
|
|
||||||
*input,
|
|
||||||
nvinfer1::Dims3{0, 0, 1},
|
|
||||||
nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2},
|
|
||||||
nvinfer1::Dims3{1, 2, 2});
|
|
||||||
assert(slice3 != nullptr);
|
|
||||||
std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
|
|
||||||
slice3->setName(slice3LayerName.c_str());
|
|
||||||
|
|
||||||
nvinfer1::ISliceLayer *slice4 = network->addSlice(
|
|
||||||
*input,
|
|
||||||
nvinfer1::Dims3{0, 1, 1},
|
|
||||||
nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2},
|
|
||||||
nvinfer1::Dims3{1, 2, 2});
|
|
||||||
assert(slice4 != nullptr);
|
|
||||||
std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
|
|
||||||
slice4->setName(slice4LayerName.c_str());
|
|
||||||
|
|
||||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
|
||||||
concatInputs.push_back (slice1->getOutput(0));
|
|
||||||
concatInputs.push_back (slice2->getOutput(0));
|
|
||||||
concatInputs.push_back (slice3->getOutput(0));
|
|
||||||
concatInputs.push_back (slice4->getOutput(0));
|
|
||||||
|
|
||||||
nvinfer1::IConcatenationLayer* concat =
|
|
||||||
network->addConcatenation(concatInputs.data(), concatInputs.size());
|
|
||||||
assert(concat != nullptr);
|
|
||||||
|
|
||||||
return concat;
|
|
||||||
}
|
|
||||||
@@ -5,58 +5,73 @@
|
|||||||
|
|
||||||
#include "route_layer.h"
|
#include "route_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
nvinfer1::ITensor* routeLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
|
std::string& layers,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "route");
|
||||||
|
assert(block.find("layers") != block.end());
|
||||||
|
|
||||||
std::string strLayers = block.at("layers");
|
std::string strLayers = block.at("layers");
|
||||||
std::vector<int> idxLayers;
|
std::vector<int> idxLayers;
|
||||||
size_t lastPos = 0, pos = 0;
|
size_t lastPos = 0, pos = 0;
|
||||||
while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
|
while ((pos = strLayers.find(',', lastPos)) != std::string::npos)
|
||||||
|
{
|
||||||
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
||||||
idxLayers.push_back(vL);
|
idxLayers.push_back(vL);
|
||||||
lastPos = pos + 1;
|
lastPos = pos + 1;
|
||||||
}
|
}
|
||||||
if (lastPos < strLayers.length()) {
|
if (lastPos < strLayers.length())
|
||||||
|
{
|
||||||
std::string lastV = trim(strLayers.substr(lastPos));
|
std::string lastV = trim(strLayers.substr(lastPos));
|
||||||
if (!lastV.empty()) {
|
if (!lastV.empty())
|
||||||
idxLayers.push_back(std::stoi(lastV));
|
idxLayers.push_back(std::stoi(lastV));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
assert (!idxLayers.empty());
|
assert (!idxLayers.empty());
|
||||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
std::vector<nvinfer1::ITensor*> concatInputs;
|
||||||
for (int idxLayer : idxLayers) {
|
for (uint i = 0; i < idxLayers.size(); ++i)
|
||||||
if (idxLayer < 0) {
|
{
|
||||||
idxLayer = tensorOutputs.size() + idxLayer;
|
if (idxLayers[i] < 0)
|
||||||
}
|
idxLayers[i] = tensorOutputs.size() + idxLayers[i];
|
||||||
assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size());
|
assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
|
||||||
concatInputs.push_back (tensorOutputs[idxLayer]);
|
concatInputs.push_back(tensorOutputs[idxLayers[i]]);
|
||||||
|
if (i < idxLayers.size() - 1)
|
||||||
|
layers += std::to_string(idxLayers[i]) + ", ";
|
||||||
}
|
}
|
||||||
|
layers += std::to_string(idxLayers[idxLayers.size() - 1]);
|
||||||
|
|
||||||
nvinfer1::IConcatenationLayer* concat =
|
int axis = 0;
|
||||||
network->addConcatenation(concatInputs.data(), concatInputs.size());
|
if (block.find("axis") != block.end())
|
||||||
|
axis = std::stoi(block.at("axis"));
|
||||||
|
if (axis < 0)
|
||||||
|
axis = concatInputs[0]->getDimensions().nbDims + axis;
|
||||||
|
|
||||||
|
nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
|
||||||
assert(concat != nullptr);
|
assert(concat != nullptr);
|
||||||
std::string concatLayerName = "route_" + std::to_string(layerIdx - 1);
|
std::string concatLayerName = "route_" + std::to_string(layerIdx);
|
||||||
concat->setName(concatLayerName.c_str());
|
concat->setName(concatLayerName.c_str());
|
||||||
concat->setAxis(0);
|
concat->setAxis(axis);
|
||||||
|
output = concat->getOutput(0);
|
||||||
|
|
||||||
nvinfer1::ILayer* output = concat;
|
if (block.find("groups") != block.end())
|
||||||
|
{
|
||||||
if (block.find("groups") != block.end()) {
|
nvinfer1::Dims prevTensorDims = output->getDimensions();
|
||||||
nvinfer1::Dims prevTensorDims = output->getOutput(0)->getDimensions();
|
|
||||||
int groups = stoi(block.at("groups"));
|
int groups = stoi(block.at("groups"));
|
||||||
int group_id = stoi(block.at("group_id"));
|
int group_id = stoi(block.at("group_id"));
|
||||||
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
||||||
int channelSlice = (prevTensorDims.d[0] / groups);
|
int channelSlice = (prevTensorDims.d[0] / groups);
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
nvinfer1::ISliceLayer* slice = network->addSlice(
|
||||||
*output->getOutput(0),
|
*output, nvinfer1::Dims{3, {startSlice, 0, 0}},
|
||||||
nvinfer1::Dims3{startSlice, 0, 0},
|
nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
|
||||||
nvinfer1::Dims3{channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]},
|
assert(slice != nullptr);
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||||
assert(sl != nullptr);
|
slice->setName(sliceLayerName.c_str());
|
||||||
output = sl;
|
output = slice->getOutput(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|||||||
@@ -9,8 +9,9 @@
|
|||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
#include "../utils.h"
|
#include "../utils.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
nvinfer1::ITensor* routeLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
|
std::string& layers,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||||
nvinfer1::INetworkDefinition* network);
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|||||||
@@ -5,40 +5,47 @@
|
|||||||
|
|
||||||
#include "shortcut_layer.h"
|
#include "shortcut_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
nvinfer1::ITensor* shortcutLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
|
std::string mode,
|
||||||
std::string activation,
|
std::string activation,
|
||||||
std::string inputVol,
|
std::string inputVol,
|
||||||
std::string shortcutVol,
|
std::string shortcutVol,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
nvinfer1::ITensor* shortcutInput,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
nvinfer1::ILayer* output;
|
nvinfer1::ITensor* output;
|
||||||
nvinfer1::ITensor* outputTensor;
|
|
||||||
|
|
||||||
if (inputVol != shortcutVol)
|
assert(block.at("type") == "shortcut");
|
||||||
|
|
||||||
|
nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM;
|
||||||
|
|
||||||
|
if (mode == "mul")
|
||||||
|
operation = nvinfer1::ElementWiseOperation::kPROD;
|
||||||
|
|
||||||
|
if (mode == "add" && inputVol != shortcutVol)
|
||||||
{
|
{
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
nvinfer1::ISliceLayer* slice = network->addSlice(
|
||||||
*shortcutTensor,
|
*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}});
|
||||||
nvinfer1::Dims3{0, 0, 0},
|
assert(slice != nullptr);
|
||||||
input->getDimensions(),
|
std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
slice->setName(sliceLayerName.c_str());
|
||||||
assert(sl != nullptr);
|
output = slice->getOutput(0);
|
||||||
outputTensor = sl->getOutput(0);
|
}
|
||||||
assert(outputTensor != nullptr);
|
else
|
||||||
} else
|
|
||||||
{
|
{
|
||||||
outputTensor = shortcutTensor;
|
output = shortcutInput;
|
||||||
assert(outputTensor != nullptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(
|
nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation);
|
||||||
*input, *outputTensor,
|
assert(shortcut != nullptr);
|
||||||
nvinfer1::ElementWiseOperation::kSUM);
|
std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
|
||||||
assert(ew != nullptr);
|
shortcut->setName(shortcutLayerName.c_str());
|
||||||
|
output = shortcut->getOutput(0);
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, ew, ew->getOutput(0), network);
|
output = activationLayer(layerIdx, activation, output, network);
|
||||||
assert(output != nullptr);
|
assert(output != nullptr);
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
|
|||||||
@@ -6,17 +6,21 @@
|
|||||||
#ifndef __SHORTCUT_LAYER_H__
|
#ifndef __SHORTCUT_LAYER_H__
|
||||||
#define __SHORTCUT_LAYER_H__
|
#define __SHORTCUT_LAYER_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
#include "activation_layer.h"
|
#include "activation_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
nvinfer1::ITensor* shortcutLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
|
std::string mode,
|
||||||
std::string activation,
|
std::string activation,
|
||||||
std::string inputVol,
|
std::string inputVol,
|
||||||
std::string shortcutVol,
|
std::string shortcutVol,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
nvinfer1::ITensor* shortcut,
|
||||||
nvinfer1::INetworkDefinition* network);
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
123
nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp
Normal file
123
nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shuffle_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* shuffleLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::string& layer,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "shuffle");
|
||||||
|
|
||||||
|
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
|
||||||
|
assert(shuffle != nullptr);
|
||||||
|
std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
|
||||||
|
shuffle->setName(shuffleLayerName.c_str());
|
||||||
|
|
||||||
|
if (block.find("reshape") != block.end())
|
||||||
|
{
|
||||||
|
std::string strReshape = block.at("reshape");
|
||||||
|
std::vector<int32_t> reshape;
|
||||||
|
size_t lastPos = 0, pos = 0;
|
||||||
|
while ((pos = strReshape.find(',', lastPos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
int vL = std::stoi(trim(strReshape.substr(lastPos, pos - lastPos)));
|
||||||
|
reshape.push_back(vL);
|
||||||
|
lastPos = pos + 1;
|
||||||
|
}
|
||||||
|
if (lastPos < strReshape.length())
|
||||||
|
{
|
||||||
|
std::string lastV = trim(strReshape.substr(lastPos));
|
||||||
|
if (!lastV.empty())
|
||||||
|
reshape.push_back(std::stoi(lastV));
|
||||||
|
}
|
||||||
|
assert(!reshape.empty());
|
||||||
|
|
||||||
|
int from = -1;
|
||||||
|
if (block.find("from") != block.end())
|
||||||
|
from = std::stoi(block.at("from"));
|
||||||
|
|
||||||
|
if (from < 0)
|
||||||
|
from = tensorOutputs.size() + from;
|
||||||
|
|
||||||
|
layer = std::to_string(from);
|
||||||
|
|
||||||
|
nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions();
|
||||||
|
int32_t l = inputTensorDims.d[1] * inputTensorDims.d[2];
|
||||||
|
|
||||||
|
nvinfer1::Dims reshapeDims;
|
||||||
|
reshapeDims.nbDims = reshape.size();
|
||||||
|
|
||||||
|
for (uint i = 0; i < reshape.size(); ++i)
|
||||||
|
if (reshape[i] == 0)
|
||||||
|
reshapeDims.d[i] = l;
|
||||||
|
else
|
||||||
|
reshapeDims.d[i] = reshape[i];
|
||||||
|
|
||||||
|
shuffle->setReshapeDimensions(reshapeDims);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block.find("transpose1") != block.end())
|
||||||
|
{
|
||||||
|
std::string strTranspose1 = block.at("transpose1");
|
||||||
|
std::vector<int32_t> transpose1;
|
||||||
|
size_t lastPos = 0, pos = 0;
|
||||||
|
while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos)));
|
||||||
|
transpose1.push_back(vL);
|
||||||
|
lastPos = pos + 1;
|
||||||
|
}
|
||||||
|
if (lastPos < strTranspose1.length())
|
||||||
|
{
|
||||||
|
std::string lastV = trim(strTranspose1.substr(lastPos));
|
||||||
|
if (!lastV.empty())
|
||||||
|
transpose1.push_back(std::stoi(lastV));
|
||||||
|
}
|
||||||
|
assert(!transpose1.empty());
|
||||||
|
|
||||||
|
nvinfer1::Permutation permutation1;
|
||||||
|
for (uint i = 0; i < transpose1.size(); ++i)
|
||||||
|
permutation1.order[i] = transpose1[i];
|
||||||
|
|
||||||
|
shuffle->setFirstTranspose(permutation1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block.find("transpose2") != block.end())
|
||||||
|
{
|
||||||
|
std::string strTranspose2 = block.at("transpose2");
|
||||||
|
std::vector<int32_t> transpose2;
|
||||||
|
size_t lastPos = 0, pos = 0;
|
||||||
|
while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos)
|
||||||
|
{
|
||||||
|
int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos)));
|
||||||
|
transpose2.push_back(vL);
|
||||||
|
lastPos = pos + 1;
|
||||||
|
}
|
||||||
|
if (lastPos < strTranspose2.length())
|
||||||
|
{
|
||||||
|
std::string lastV = trim(strTranspose2.substr(lastPos));
|
||||||
|
if (!lastV.empty())
|
||||||
|
transpose2.push_back(std::stoi(lastV));
|
||||||
|
}
|
||||||
|
assert(!transpose2.empty());
|
||||||
|
|
||||||
|
nvinfer1::Permutation permutation2;
|
||||||
|
for (uint i = 0; i < transpose2.size(); ++i)
|
||||||
|
permutation2.order[i] = transpose2[i];
|
||||||
|
|
||||||
|
shuffle->setSecondTranspose(permutation2);
|
||||||
|
}
|
||||||
|
|
||||||
|
output = shuffle->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
20
nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h
Normal file
20
nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __SHUFFLE_LAYER_H__
|
||||||
|
#define __SHUFFLE_LAYER_H__
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
#include "../utils.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* shuffleLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::string& layer,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
||||||
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
#endif
|
||||||
29
nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp
Normal file
29
nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "softmax_layer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* softmaxLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network)
|
||||||
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
|
assert(block.at("type") == "softmax");
|
||||||
|
assert(block.find("axes") != block.end());
|
||||||
|
|
||||||
|
int axes = std::stoi(block.at("axes"));
|
||||||
|
|
||||||
|
nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input);
|
||||||
|
assert(softmax != nullptr);
|
||||||
|
std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx);
|
||||||
|
softmax->setName(softmaxLayerName.c_str());
|
||||||
|
softmax->setAxes(1 << axes);
|
||||||
|
output = softmax->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
20
nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h
Normal file
20
nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __SOFTMAX_LAYER_H__
|
||||||
|
#define __SOFTMAX_LAYER_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
|
||||||
|
nvinfer1::ITensor* softmaxLayer(
|
||||||
|
int layerIdx,
|
||||||
|
std::map<std::string, std::string>& block,
|
||||||
|
nvinfer1::ITensor* input,
|
||||||
|
nvinfer1::INetworkDefinition* network);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -5,20 +5,28 @@
|
|||||||
|
|
||||||
#include "upsample_layer.h"
|
#include "upsample_layer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
nvinfer1::ITensor* upsampleLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
nvinfer1::INetworkDefinition* network)
|
nvinfer1::INetworkDefinition* network)
|
||||||
{
|
{
|
||||||
|
nvinfer1::ITensor* output;
|
||||||
|
|
||||||
assert(block.at("type") == "upsample");
|
assert(block.at("type") == "upsample");
|
||||||
|
assert(block.find("stride") != block.end());
|
||||||
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
int stride = std::stoi(block.at("stride"));
|
||||||
|
|
||||||
nvinfer1::IResizeLayer* resize_layer = network->addResize(*input);
|
|
||||||
resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
|
||||||
float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
|
float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
|
||||||
resize_layer->setScales(scale, 3);
|
|
||||||
std::string layer_name = "upsample_" + std::to_string(layerIdx);
|
nvinfer1::IResizeLayer* resize = network->addResize(*input);
|
||||||
resize_layer->setName(layer_name.c_str());
|
assert(resize != nullptr);
|
||||||
return resize_layer;
|
std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
|
||||||
|
resize->setName(resizeLayerName.c_str());
|
||||||
|
resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
||||||
|
resize->setScales(scale, 3);
|
||||||
|
output = resize->getOutput(0);
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,12 +7,11 @@
|
|||||||
#define __UPSAMPLE_LAYER_H__
|
#define __UPSAMPLE_LAYER_H__
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
#include "NvInfer.h"
|
#include "NvInfer.h"
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
nvinfer1::ITensor* upsampleLayer(
|
||||||
int layerIdx,
|
int layerIdx,
|
||||||
std::map<std::string, std::string>& block,
|
std::map<std::string, std::string>& block,
|
||||||
nvinfer1::ITensor* input,
|
nvinfer1::ITensor* input,
|
||||||
|
|||||||
@@ -37,13 +37,11 @@ cudaError_t sortDetections(
|
|||||||
float* _d_scores = reinterpret_cast<float*>(d_scores) + (batch * outputSize);
|
float* _d_scores = reinterpret_cast<float*>(d_scores) + (batch * outputSize);
|
||||||
|
|
||||||
int* _countData = reinterpret_cast<int*>(countData) + (batch);
|
int* _countData = reinterpret_cast<int*>(countData) + (batch);
|
||||||
int* _count = (int*)malloc(sizeof(int));
|
int count;
|
||||||
cudaMemcpy(_count, (int*)&_countData[0], sizeof(int), cudaMemcpyDeviceToHost);
|
cudaMemcpy(&count, _countData, sizeof(int), cudaMemcpyDeviceToHost);
|
||||||
int count = _count[0];
|
|
||||||
|
|
||||||
if (count == 0)
|
if (count == 0)
|
||||||
{
|
{
|
||||||
free(_count);
|
|
||||||
return cudaGetLastError();
|
return cudaGetLastError();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,13 +70,13 @@ cudaError_t sortDetections(
|
|||||||
|
|
||||||
int _topK = count < topK ? count : topK;
|
int _topK = count < topK ? count : topK;
|
||||||
|
|
||||||
int threads_per_block = 0;
|
int threads_per_block = 16;
|
||||||
int number_of_blocks = 4;
|
int number_of_blocks = 0;
|
||||||
|
|
||||||
if (_topK % 2 == 0 && _topK >= number_of_blocks)
|
if (_topK % 2 == 0 && _topK >= threads_per_block)
|
||||||
threads_per_block = _topK / number_of_blocks;
|
number_of_blocks = _topK / threads_per_block;
|
||||||
else
|
else
|
||||||
threads_per_block = (_topK / number_of_blocks) + 1;
|
number_of_blocks = (_topK / threads_per_block) + 1;
|
||||||
|
|
||||||
sortOutput<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
sortOutput<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||||
_d_indexes, _d_scores, reinterpret_cast<float*>(d_boxes) + (batch * 4 * outputSize),
|
_d_indexes, _d_scores, reinterpret_cast<float*>(d_boxes) + (batch * 4 * outputSize),
|
||||||
@@ -89,8 +87,6 @@ cudaError_t sortDetections(
|
|||||||
cudaFree(d_keys_out);
|
cudaFree(d_keys_out);
|
||||||
cudaFree(d_values_out);
|
cudaFree(d_values_out);
|
||||||
cudaFree(d_temp_storage);
|
cudaFree(d_temp_storage);
|
||||||
|
|
||||||
free(_count);
|
|
||||||
}
|
}
|
||||||
return cudaGetLastError();
|
return cudaGetLastError();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,11 +132,10 @@ std::string dimsToString(const nvinfer1::Dims d)
|
|||||||
{
|
{
|
||||||
std::stringstream s;
|
std::stringstream s;
|
||||||
assert(d.nbDims >= 1);
|
assert(d.nbDims >= 1);
|
||||||
|
s << "[";
|
||||||
for (int i = 0; i < d.nbDims - 1; ++i)
|
for (int i = 0; i < d.nbDims - 1; ++i)
|
||||||
{
|
s << d.d[i] << ", ";
|
||||||
s << std::setw(4) << d.d[i] << " x";
|
s << d.d[d.nbDims - 1] << "]";
|
||||||
}
|
|
||||||
s << std::setw(4) << d.d[d.nbDims - 1];
|
|
||||||
|
|
||||||
return s.str();
|
return s.str();
|
||||||
}
|
}
|
||||||
@@ -152,10 +151,9 @@ int getNumChannels(nvinfer1::ITensor* t)
|
|||||||
void printLayerInfo(
|
void printLayerInfo(
|
||||||
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr)
|
std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr)
|
||||||
{
|
{
|
||||||
std::cout << std::setw(6) << std::left << layerIndex << std::setw(24) << std::left << layerName;
|
std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName;
|
||||||
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
|
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput;
|
||||||
<< layerOutput;
|
std::cout << weightPtr << std::endl;
|
||||||
std::cout << std::setw(7) << std::left << weightPtr << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string getAbsPath(std::string path)
|
std::string getAbsPath(std::string path)
|
||||||
|
|||||||
@@ -158,7 +158,6 @@ NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
|
|||||||
NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition& network)
|
NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition& network)
|
||||||
{
|
{
|
||||||
int weightPtr = 0;
|
int weightPtr = 0;
|
||||||
int channels = m_InputC;
|
|
||||||
|
|
||||||
std::string weightsType;
|
std::string weightsType;
|
||||||
if (m_WtsFilePath.find(".weights") != std::string::npos)
|
if (m_WtsFilePath.find(".weights") != std::string::npos)
|
||||||
@@ -172,81 +171,64 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
|
|||||||
else if (m_NetworkType.find("yolor") != std::string::npos)
|
else if (m_NetworkType.find("yolor") != std::string::npos)
|
||||||
eps = 1.0e-4;
|
eps = 1.0e-4;
|
||||||
|
|
||||||
nvinfer1::ITensor* data =
|
nvinfer1::ITensor* data = network.addInput(
|
||||||
network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
|
m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
|
||||||
nvinfer1::Dims3{static_cast<int>(m_InputC),
|
nvinfer1::Dims{3, {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}});
|
||||||
static_cast<int>(m_InputH), static_cast<int>(m_InputW)});
|
|
||||||
assert(data != nullptr && data->getDimensions().nbDims > 0);
|
assert(data != nullptr && data->getDimensions().nbDims > 0);
|
||||||
|
|
||||||
nvinfer1::ITensor* previous = data;
|
nvinfer1::ITensor* previous = data;
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs;
|
std::vector<nvinfer1::ITensor*> tensorOutputs;
|
||||||
std::vector<nvinfer1::ITensor*> yoloInputs;
|
|
||||||
uint inputYoloCount = 0;
|
nvinfer1::ITensor* yoloTensorInputs[m_YoloCount];
|
||||||
|
uint yoloCountInputs = 0;
|
||||||
|
|
||||||
int modelType = -1;
|
int modelType = -1;
|
||||||
|
|
||||||
for (uint i = 0; i < m_ConfigBlocks.size(); ++i)
|
for (uint i = 0; i < m_ConfigBlocks.size(); ++i)
|
||||||
{
|
{
|
||||||
assert(getNumChannels(previous) == channels);
|
|
||||||
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
|
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
|
||||||
|
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "net")
|
if (m_ConfigBlocks.at(i).at("type") == "net")
|
||||||
printLayerInfo("", "layer", " input", " output", "weightPtr");
|
printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr");
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "convolutional")
|
else if (m_ConfigBlocks.at(i).at("type") == "convolutional")
|
||||||
{
|
{
|
||||||
|
int channels = getNumChannels(previous);
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::ILayer* out = convolutionalLayer(
|
previous = convolutionalLayer(
|
||||||
i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, previous, &network);
|
i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, previous, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = "conv_" + m_ConfigBlocks.at(i).at("activation");
|
std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation");
|
||||||
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "batchnorm")
|
else if (m_ConfigBlocks.at(i).at("type") == "batchnorm")
|
||||||
{
|
{
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::ILayer* out = batchnormLayer(
|
previous = batchnormLayer(
|
||||||
i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
|
i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = "bn_" + m_ConfigBlocks.at(i).at("activation");
|
std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation");
|
||||||
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul")
|
else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul")
|
||||||
{
|
{
|
||||||
std::string type;
|
previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network);
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "implicit_add")
|
|
||||||
type = "add";
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "implicit_mul")
|
|
||||||
type = "mul";
|
|
||||||
assert(m_ConfigBlocks.at(i).find("filters") != m_ConfigBlocks.at(i).end());
|
|
||||||
int filters = std::stoi(m_ConfigBlocks.at(i).at("filters"));
|
|
||||||
nvinfer1::ILayer* out = implicitLayer(filters, weights, m_TrtWeights, weightPtr, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = "implicit_" + type;
|
std::string layerName = m_ConfigBlocks.at(i).at("type");
|
||||||
printLayerInfo(layerIndex, layerType, " -", outputVol, std::to_string(weightPtr));
|
printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr));
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels")
|
else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" ||
|
||||||
|
m_ConfigBlocks.at(i).at("type") == "control_channels")
|
||||||
{
|
{
|
||||||
std::string type;
|
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "shift_channels")
|
|
||||||
type = "shift";
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "control_channels")
|
|
||||||
type = "control";
|
|
||||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||||
if (from > 0)
|
if (from > 0)
|
||||||
@@ -254,140 +236,193 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
|
|||||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||||
assert(i + from - 1 < i - 2);
|
assert(i + from - 1 < i - 2);
|
||||||
nvinfer1::ILayer* out = channelsLayer(type, previous, tensorOutputs[i + from - 1], &network);
|
|
||||||
previous = out->getOutput(0);
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
|
previous = channelsLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network);
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = type + "_channels" + ": " + std::to_string(i + from - 1);
|
std::string layerName = m_ConfigBlocks.at(i).at("type") + ": " + std::to_string(i + from - 1);
|
||||||
printLayerInfo(layerIndex, layerType, " -", outputVol, " -");
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "dropout")
|
|
||||||
{
|
|
||||||
// Skip dropout layer
|
|
||||||
assert(previous != nullptr);
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "dropout", " -", " -", " -");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "shortcut")
|
else if (m_ConfigBlocks.at(i).at("type") == "shortcut")
|
||||||
{
|
{
|
||||||
assert(m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end());
|
|
||||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
||||||
std::string activation = m_ConfigBlocks.at(i).at("activation");
|
|
||||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
||||||
if (from > 0)
|
if (from > 0)
|
||||||
from = from - i + 1;
|
from = from - i + 1;
|
||||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
||||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
||||||
assert(i + from - 1 < i - 2);
|
assert(i + from - 1 < i - 2);
|
||||||
|
|
||||||
|
std::string mode = "add";
|
||||||
|
if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end())
|
||||||
|
mode = m_ConfigBlocks.at(i).at("mode");
|
||||||
|
|
||||||
|
std::string activation = "linear";
|
||||||
|
if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
|
||||||
|
activation = m_ConfigBlocks.at(i).at("activation");
|
||||||
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
||||||
nvinfer1::ILayer* out = shortcutLayer(i, activation, inputVol, shortcutVol, previous, tensorOutputs[i + from - 1], &network);
|
previous = shortcutLayer(
|
||||||
previous = out->getOutput(0);
|
i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1],
|
||||||
|
&network);
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = "shortcut_" + m_ConfigBlocks.at(i).at("activation") + ": " + std::to_string(i + from - 1);
|
std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1);
|
||||||
printLayerInfo(layerIndex, layerType, " -", outputVol, " -");
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
if (inputVol != shortcutVol) {
|
|
||||||
|
if (mode == "add" && inputVol != shortcutVol)
|
||||||
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "route")
|
else if (m_ConfigBlocks.at(i).at("type") == "route")
|
||||||
{
|
{
|
||||||
assert(m_ConfigBlocks.at(i).find("layers") != m_ConfigBlocks.at(i).end());
|
std::string layers;
|
||||||
nvinfer1::ILayer* out = routeLayer(i, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr));
|
std::string layerName = "route: " + layers;
|
||||||
|
printLayerInfo(layerIndex, layerName, "-", outputVol, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "upsample")
|
else if (m_ConfigBlocks.at(i).at("type") == "upsample")
|
||||||
{
|
{
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network);
|
previous = upsampleLayer(i, m_ConfigBlocks[i], previous, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
|
std::string layerName = "upsample";
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "maxpool")
|
else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool")
|
||||||
{
|
{
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::ILayer* out = maxpoolLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
|
std::string layerName = m_ConfigBlocks.at(i).at("type");
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg")
|
else if (m_ConfigBlocks.at(i).at("type") == "reorg")
|
||||||
{
|
|
||||||
if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolor") != std::string::npos)
|
|
||||||
{
|
{
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::ILayer* out = reorgV5Layer(i, previous, &network);
|
previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
std::string layerType = "reorgV5";
|
std::string layerName = "reorg";
|
||||||
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
else if (m_ConfigBlocks.at(i).at("type") == "reduce")
|
||||||
{
|
{
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
|
previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||||
assert(reorgPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* reorg =
|
|
||||||
network.addPluginV2(&previous, 1, *reorgPlugin);
|
|
||||||
assert(reorg != nullptr);
|
|
||||||
std::string layerName = "reorg_" + std::to_string(i);
|
|
||||||
reorg->setName(layerName.c_str());
|
|
||||||
previous = reorg->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
assert(previous != nullptr);
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
channels = getNumChannels(previous);
|
tensorOutputs.push_back(previous);
|
||||||
tensorOutputs.push_back(reorg->getOutput(0));
|
std::string layerName = "reduce";
|
||||||
printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr));
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (m_ConfigBlocks.at(i).at("type") == "shuffle")
|
||||||
|
{
|
||||||
|
std::string layer;
|
||||||
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
|
previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network);
|
||||||
|
assert(previous != nullptr);
|
||||||
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
|
tensorOutputs.push_back(previous);
|
||||||
|
std::string layerName = "shuffle: " + layer;
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (m_ConfigBlocks.at(i).at("type") == "softmax")
|
||||||
|
{
|
||||||
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
|
previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||||
|
assert(previous != nullptr);
|
||||||
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
|
tensorOutputs.push_back(previous);
|
||||||
|
std::string layerName = "softmax";
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region")
|
else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region")
|
||||||
{
|
{
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "yolo")
|
if (m_ConfigBlocks.at(i).at("type") == "yolo")
|
||||||
{
|
|
||||||
if (m_NetworkType.find("yolor") != std::string::npos)
|
if (m_NetworkType.find("yolor") != std::string::npos)
|
||||||
modelType = 2;
|
modelType = 2;
|
||||||
else
|
else
|
||||||
modelType = 1;
|
modelType = 1;
|
||||||
}
|
|
||||||
else
|
else
|
||||||
modelType = 0;
|
modelType = 0;
|
||||||
|
|
||||||
std::string layerName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i);
|
std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i);
|
||||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||||
TensorInfo& curYoloTensor = m_YoloTensors.at(inputYoloCount);
|
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||||
curYoloTensor.blobName = layerName;
|
curYoloTensor.blobName = blobName;
|
||||||
curYoloTensor.gridSizeX = prevTensorDims.d[2];
|
curYoloTensor.gridSizeX = prevTensorDims.d[2];
|
||||||
curYoloTensor.gridSizeY = prevTensorDims.d[1];
|
curYoloTensor.gridSizeY = prevTensorDims.d[1];
|
||||||
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(previous);
|
tensorOutputs.push_back(previous);
|
||||||
yoloInputs.push_back(previous);
|
yoloTensorInputs[yoloCountInputs] = previous;
|
||||||
++inputYoloCount;
|
++yoloCountInputs;
|
||||||
printLayerInfo(layerIndex, modelType != 0 ? "yolo" : "region", inputVol, " -", " -");
|
std::string layerName = modelType != 0 ? "yolo" : "region";
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, "-", "-");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (m_ConfigBlocks.at(i).at("type") == "cls")
|
||||||
|
{
|
||||||
|
modelType = 3;
|
||||||
|
|
||||||
|
std::string blobName = "cls_" + std::to_string(i);
|
||||||
|
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||||
|
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||||
|
curYoloTensor.blobName = blobName;
|
||||||
|
curYoloTensor.numBBoxes = prevTensorDims.d[1];
|
||||||
|
m_NumClasses = prevTensorDims.d[0];
|
||||||
|
|
||||||
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
|
previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
||||||
|
assert(previous != nullptr);
|
||||||
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
|
tensorOutputs.push_back(previous);
|
||||||
|
yoloTensorInputs[yoloCountInputs] = previous;
|
||||||
|
++yoloCountInputs;
|
||||||
|
std::string layerName = "cls";
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (m_ConfigBlocks.at(i).at("type") == "reg")
|
||||||
|
{
|
||||||
|
modelType = 3;
|
||||||
|
|
||||||
|
std::string blobName = "reg_" + std::to_string(i);
|
||||||
|
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
||||||
|
TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
|
||||||
|
curYoloTensor.blobName = blobName;
|
||||||
|
curYoloTensor.numBBoxes = prevTensorDims.d[1];
|
||||||
|
|
||||||
|
std::string inputVol = dimsToString(previous->getDimensions());
|
||||||
|
previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network);
|
||||||
|
assert(previous != nullptr);
|
||||||
|
std::string outputVol = dimsToString(previous->getDimensions());
|
||||||
|
tensorOutputs.push_back(previous);
|
||||||
|
yoloTensorInputs[yoloCountInputs] = previous;
|
||||||
|
++yoloCountInputs;
|
||||||
|
std::string layerName = "reg";
|
||||||
|
printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
@@ -403,16 +438,17 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_YoloCount == inputYoloCount)
|
if (m_YoloCount == yoloCountInputs)
|
||||||
{
|
{
|
||||||
assert((modelType != -1) && "\nCould not determine model type");
|
assert((modelType != -1) && "\nCould not determine model type");
|
||||||
|
|
||||||
nvinfer1::ITensor* yoloInputTensors[inputYoloCount];
|
|
||||||
uint64_t outputSize = 0;
|
uint64_t outputSize = 0;
|
||||||
for (uint j = 0; j < inputYoloCount; ++j)
|
for (uint j = 0; j < yoloCountInputs; ++j)
|
||||||
{
|
{
|
||||||
yoloInputTensors[j] = yoloInputs[j];
|
|
||||||
TensorInfo& curYoloTensor = m_YoloTensors.at(j);
|
TensorInfo& curYoloTensor = m_YoloTensors.at(j);
|
||||||
|
if (modelType == 3)
|
||||||
|
outputSize = curYoloTensor.numBBoxes;
|
||||||
|
else
|
||||||
outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
|
outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -422,21 +458,15 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string layerName = "yolo";
|
|
||||||
nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(
|
nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(
|
||||||
m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_TopK,
|
m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_TopK, m_ScoreThreshold);
|
||||||
m_ScoreThreshold);
|
|
||||||
assert(yoloPlugin != nullptr);
|
assert(yoloPlugin != nullptr);
|
||||||
nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloInputTensors, inputYoloCount, *yoloPlugin);
|
nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin);
|
||||||
assert(yolo != nullptr);
|
assert(yolo != nullptr);
|
||||||
yolo->setName(layerName.c_str());
|
std::string yoloLayerName = "yolo";
|
||||||
previous = yolo->getOutput(0);
|
yolo->setName(yoloLayerName.c_str());
|
||||||
assert(previous != nullptr);
|
|
||||||
previous->setName(layerName.c_str());
|
|
||||||
tensorOutputs.push_back(yolo->getOutput(0));
|
|
||||||
|
|
||||||
nvinfer1::ITensor* yoloTensors[] = {yolo->getOutput(0), yolo->getOutput(1)};
|
nvinfer1::ITensor* yoloTensorOutputs[] = {yolo->getOutput(0), yolo->getOutput(1)};
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
|
|
||||||
nvinfer1::plugin::NMSParameters nmsParams;
|
nvinfer1::plugin::NMSParameters nmsParams;
|
||||||
nmsParams.shareLocation = true;
|
nmsParams.shareLocation = true;
|
||||||
@@ -448,28 +478,28 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::IN
|
|||||||
nmsParams.iouThreshold = m_IouThreshold;
|
nmsParams.iouThreshold = m_IouThreshold;
|
||||||
nmsParams.isNormalized = false;
|
nmsParams.isNormalized = false;
|
||||||
|
|
||||||
layerName = "batchedNMS";
|
std::string nmslayerName = "batchedNMS";
|
||||||
nvinfer1::IPluginV2* batchedNMS = createBatchedNMSPlugin(nmsParams);
|
nvinfer1::IPluginV2* batchedNMS = createBatchedNMSPlugin(nmsParams);
|
||||||
nvinfer1::IPluginV2Layer* nms = network.addPluginV2(yoloTensors, 2, *batchedNMS);
|
nvinfer1::IPluginV2Layer* nms = network.addPluginV2(yoloTensorOutputs, 2, *batchedNMS);
|
||||||
nms->setName(layerName.c_str());
|
nms->setName(nmslayerName.c_str());
|
||||||
nvinfer1::ITensor* num_detections = nms->getOutput(0);
|
nvinfer1::ITensor* num_detections = nms->getOutput(0);
|
||||||
layerName = "num_detections";
|
nmslayerName = "num_detections";
|
||||||
num_detections->setName(layerName.c_str());
|
num_detections->setName(nmslayerName.c_str());
|
||||||
nvinfer1::ITensor* nmsed_boxes = nms->getOutput(1);
|
nvinfer1::ITensor* nmsed_boxes = nms->getOutput(1);
|
||||||
layerName = "nmsed_boxes";
|
nmslayerName = "nmsed_boxes";
|
||||||
nmsed_boxes->setName(layerName.c_str());
|
nmsed_boxes->setName(nmslayerName.c_str());
|
||||||
nvinfer1::ITensor* nmsed_scores = nms->getOutput(2);
|
nvinfer1::ITensor* nmsed_scores = nms->getOutput(2);
|
||||||
layerName = "nmsed_scores";
|
nmslayerName = "nmsed_scores";
|
||||||
nmsed_scores->setName(layerName.c_str());
|
nmsed_scores->setName(nmslayerName.c_str());
|
||||||
nvinfer1::ITensor* nmsed_classes = nms->getOutput(3);
|
nvinfer1::ITensor* nmsed_classes = nms->getOutput(3);
|
||||||
layerName = "nmsed_classes";
|
nmslayerName = "nmsed_classes";
|
||||||
nmsed_classes->setName(layerName.c_str());
|
nmsed_classes->setName(nmslayerName.c_str());
|
||||||
network.markOutput(*num_detections);
|
network.markOutput(*num_detections);
|
||||||
network.markOutput(*nmsed_boxes);
|
network.markOutput(*nmsed_boxes);
|
||||||
network.markOutput(*nmsed_scores);
|
network.markOutput(*nmsed_scores);
|
||||||
network.markOutput(*nmsed_classes);
|
network.markOutput(*nmsed_classes);
|
||||||
|
|
||||||
printLayerInfo("", "batched_nms", " -", outputVol, " -");
|
printLayerInfo("", "batched_nms", "-", "-", "-");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
std::cout << "\nError in yolo cfg file" << std::endl;
|
std::cout << "\nError in yolo cfg file" << std::endl;
|
||||||
@@ -620,6 +650,12 @@ void Yolo::parseConfigBlocks()
|
|||||||
|
|
||||||
m_YoloTensors.push_back(outputTensor);
|
m_YoloTensors.push_back(outputTensor);
|
||||||
}
|
}
|
||||||
|
else if ((block.at("type") == "cls") || (block.at("type") == "reg"))
|
||||||
|
{
|
||||||
|
++m_YoloCount;
|
||||||
|
TensorInfo outputTensor;
|
||||||
|
m_YoloTensors.push_back(outputTensor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -640,9 +676,7 @@ void Yolo::parseConfigNMSBlocks()
|
|||||||
void Yolo::destroyNetworkUtils()
|
void Yolo::destroyNetworkUtils()
|
||||||
{
|
{
|
||||||
for (uint i = 0; i < m_TrtWeights.size(); ++i)
|
for (uint i = 0; i < m_TrtWeights.size(); ++i)
|
||||||
{
|
|
||||||
if (m_TrtWeights[i].count > 0)
|
if (m_TrtWeights[i].count > 0)
|
||||||
free(const_cast<void*>(m_TrtWeights[i].values));
|
free(const_cast<void*>(m_TrtWeights[i].values));
|
||||||
}
|
|
||||||
m_TrtWeights.clear();
|
m_TrtWeights.clear();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,8 +33,13 @@
|
|||||||
#include "layers/shortcut_layer.h"
|
#include "layers/shortcut_layer.h"
|
||||||
#include "layers/route_layer.h"
|
#include "layers/route_layer.h"
|
||||||
#include "layers/upsample_layer.h"
|
#include "layers/upsample_layer.h"
|
||||||
#include "layers/maxpool_layer.h"
|
#include "layers/pooling_layer.h"
|
||||||
#include "layers/reorgv5_layer.h"
|
#include "layers/reorg_layer.h"
|
||||||
|
#include "layers/reduce_layer.h"
|
||||||
|
#include "layers/shuffle_layer.h"
|
||||||
|
#include "layers/softmax_layer.h"
|
||||||
|
#include "layers/cls_layer.h"
|
||||||
|
#include "layers/reg_layer.h"
|
||||||
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
#include "nvdsinfer_custom_impl.h"
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ __global__ void gpuYoloLayer(
|
|||||||
if (objectness < scoreThreshold)
|
if (objectness < scoreThreshold)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int count = (int)atomicAdd(&countData[0], 1);
|
int count = (int)atomicAdd(countData, 1);
|
||||||
|
|
||||||
const float alpha = scaleXY;
|
const float alpha = scaleXY;
|
||||||
const float beta = -0.5 * (scaleXY - 1);
|
const float beta = -0.5 * (scaleXY - 1);
|
||||||
|
|||||||
73
nvdsinfer_custom_impl_Yolo/yoloForward_e.cu
Normal file
73
nvdsinfer_custom_impl_Yolo/yoloForward_e.cu
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* Created by Marcos Luciano
|
||||||
|
* https://www.github.com/marcoslucianops
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
__global__ void gpuYoloLayer_e(
|
||||||
|
const float* cls, const float* reg, int* d_indexes, float* d_scores, float* d_boxes, int* d_classes, int* countData,
|
||||||
|
const float scoreThreshold, const uint netWidth, const uint netHeight, const uint numOutputClasses,
|
||||||
|
const uint64_t outputSize)
|
||||||
|
{
|
||||||
|
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (x_id >= outputSize)
|
||||||
|
return;
|
||||||
|
|
||||||
|
float maxProb = 0.0f;
|
||||||
|
int maxIndex = -1;
|
||||||
|
|
||||||
|
for (uint i = 0; i < numOutputClasses; ++i)
|
||||||
|
{
|
||||||
|
float prob
|
||||||
|
= cls[x_id * numOutputClasses + i];
|
||||||
|
|
||||||
|
if (prob > maxProb)
|
||||||
|
{
|
||||||
|
maxProb = prob;
|
||||||
|
maxIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxProb < scoreThreshold)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int count = (int)atomicAdd(countData, 1);
|
||||||
|
|
||||||
|
d_indexes[count] = count;
|
||||||
|
d_scores[count] = maxProb + 1.f;
|
||||||
|
d_boxes[count * 4 + 0] = reg[x_id * 4 + 0];
|
||||||
|
d_boxes[count * 4 + 1] = reg[x_id * 4 + 1];
|
||||||
|
d_boxes[count * 4 + 2] = reg[x_id * 4 + 2];
|
||||||
|
d_boxes[count * 4 + 3] = reg[x_id * 4 + 3];
|
||||||
|
d_classes[count] = maxIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaError_t cudaYoloLayer_e(
|
||||||
|
const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData,
|
||||||
|
const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight,
|
||||||
|
const uint& numOutputClasses, cudaStream_t stream);
|
||||||
|
|
||||||
|
cudaError_t cudaYoloLayer_e(
|
||||||
|
const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData,
|
||||||
|
const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight,
|
||||||
|
const uint& numOutputClasses, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
int threads_per_block = 16;
|
||||||
|
int number_of_blocks = 525;
|
||||||
|
|
||||||
|
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
||||||
|
{
|
||||||
|
gpuYoloLayer_e<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||||
|
reinterpret_cast<const float*>(cls) + (batch * numOutputClasses * outputSize),
|
||||||
|
reinterpret_cast<const float*>(reg) + (batch * 4 * outputSize),
|
||||||
|
reinterpret_cast<int*>(d_indexes) + (batch * outputSize),
|
||||||
|
reinterpret_cast<float*>(d_scores) + (batch * outputSize),
|
||||||
|
reinterpret_cast<float*>(d_boxes) + (batch * 4 * outputSize),
|
||||||
|
reinterpret_cast<int*>(d_classes) + (batch * outputSize), reinterpret_cast<int*>(countData) + (batch),
|
||||||
|
scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize);
|
||||||
|
}
|
||||||
|
return cudaGetLastError();
|
||||||
|
}
|
||||||
@@ -26,7 +26,7 @@ __global__ void gpuYoloLayer_nc(
|
|||||||
if (objectness < scoreThreshold)
|
if (objectness < scoreThreshold)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int count = (int)atomicAdd(&countData[0], 1);
|
int count = (int)atomicAdd(countData, 1);
|
||||||
|
|
||||||
const float alpha = scaleXY;
|
const float alpha = scaleXY;
|
||||||
const float beta = -0.5 * (scaleXY - 1);
|
const float beta = -0.5 * (scaleXY - 1);
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ __global__ void gpuYoloLayer_r(
|
|||||||
if (objectness < scoreThreshold)
|
if (objectness < scoreThreshold)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int count = (int)atomicAdd(&countData[0], 1);
|
int count = (int)atomicAdd(countData, 1);
|
||||||
|
|
||||||
const float alpha = scaleXY;
|
const float alpha = scaleXY;
|
||||||
const float beta = -0.5 * (scaleXY - 1);
|
const float beta = -0.5 * (scaleXY - 1);
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ __global__ void gpuRegionLayer(
|
|||||||
if (objectness < scoreThreshold)
|
if (objectness < scoreThreshold)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int count = (int)atomicAdd(&countData[0], 1);
|
int count = (int)atomicAdd(countData, 1);
|
||||||
|
|
||||||
float x
|
float x
|
||||||
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
|
= (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
|
||||||
|
|||||||
@@ -47,6 +47,11 @@ namespace {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cudaError_t cudaYoloLayer_e(
|
||||||
|
const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData,
|
||||||
|
const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight,
|
||||||
|
const uint& numOutputClasses, cudaStream_t stream);
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer_r(
|
cudaError_t cudaYoloLayer_r(
|
||||||
const void* input, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData,
|
const void* input, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData,
|
||||||
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
|
||||||
@@ -88,6 +93,7 @@ YoloLayer::YoloLayer (const void* data, size_t length)
|
|||||||
read(d, m_TopK);
|
read(d, m_TopK);
|
||||||
read(d, m_ScoreThreshold);
|
read(d, m_ScoreThreshold);
|
||||||
|
|
||||||
|
if (m_Type != 3) {
|
||||||
uint yoloTensorsSize;
|
uint yoloTensorsSize;
|
||||||
read(d, yoloTensorsSize);
|
read(d, yoloTensorsSize);
|
||||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||||
@@ -117,6 +123,7 @@ YoloLayer::YoloLayer (const void* data, size_t length)
|
|||||||
}
|
}
|
||||||
m_YoloTensors.push_back(curYoloTensor);
|
m_YoloTensors.push_back(curYoloTensor);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
kNUM_CLASSES = m_NumClasses;
|
kNUM_CLASSES = m_NumClasses;
|
||||||
};
|
};
|
||||||
@@ -147,9 +154,9 @@ YoloLayer::getOutputDimensions(
|
|||||||
{
|
{
|
||||||
assert(index < 3);
|
assert(index < 3);
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
return nvinfer1::Dims3(m_TopK, 1, 4);
|
return nvinfer1::Dims{3, {static_cast<int>(m_TopK), 1, 4}};
|
||||||
}
|
}
|
||||||
return nvinfer1::DimsHW(m_TopK, m_NumClasses);
|
return nvinfer1::Dims{2, {static_cast<int>(m_TopK), static_cast<int>(m_NumClasses)}};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool YoloLayer::supportsFormat (
|
bool YoloLayer::supportsFormat (
|
||||||
@@ -173,30 +180,40 @@ int32_t YoloLayer::enqueue (
|
|||||||
int batchSize, void const* const* inputs, void* const* outputs, void* workspace,
|
int batchSize, void const* const* inputs, void* const* outputs, void* workspace,
|
||||||
cudaStream_t stream) noexcept
|
cudaStream_t stream) noexcept
|
||||||
{
|
{
|
||||||
void* countData = workspace;
|
|
||||||
void* bboxData = outputs[0];
|
void* bboxData = outputs[0];
|
||||||
void* scoreData = outputs[1];
|
void* scoreData = outputs[1];
|
||||||
|
|
||||||
CUDA_CHECK(cudaMemsetAsync((int*)countData, 0, sizeof(int) * batchSize, stream));
|
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)bboxData, 0, sizeof(float) * m_TopK * 4 * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*)bboxData, 0, sizeof(float) * m_TopK * 4 * batchSize, stream));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)scoreData, 0, sizeof(float) * m_TopK * m_NumClasses * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*)scoreData, 0, sizeof(float) * m_TopK * m_NumClasses * batchSize, stream));
|
||||||
|
|
||||||
|
void* countData;
|
||||||
|
CUDA_CHECK(cudaMalloc(&countData, sizeof(int) * batchSize));
|
||||||
|
CUDA_CHECK(cudaMemsetAsync((int*)countData, 0, sizeof(int) * batchSize, stream));
|
||||||
|
|
||||||
void* d_indexes;
|
void* d_indexes;
|
||||||
CUDA_CHECK(cudaMallocHost(&d_indexes, sizeof(int) * m_OutputSize * batchSize));
|
CUDA_CHECK(cudaMalloc(&d_indexes, sizeof(int) * m_OutputSize * batchSize));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)d_indexes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((int*)d_indexes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
||||||
|
|
||||||
void* d_scores;
|
void* d_scores;
|
||||||
CUDA_CHECK(cudaMallocHost(&d_scores, sizeof(float) * m_OutputSize * batchSize));
|
CUDA_CHECK(cudaMalloc(&d_scores, sizeof(float) * m_OutputSize * batchSize));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)d_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*)d_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
|
||||||
|
|
||||||
void* d_boxes;
|
void* d_boxes;
|
||||||
CUDA_CHECK(cudaMallocHost(&d_boxes, sizeof(float) * m_OutputSize * 4 * batchSize));
|
CUDA_CHECK(cudaMalloc(&d_boxes, sizeof(float) * m_OutputSize * 4 * batchSize));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)d_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*)d_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
|
||||||
|
|
||||||
void* d_classes;
|
void* d_classes;
|
||||||
CUDA_CHECK(cudaMallocHost(&d_classes, sizeof(int) * m_OutputSize * batchSize));
|
CUDA_CHECK(cudaMalloc(&d_classes, sizeof(int) * m_OutputSize * batchSize));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)d_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*)d_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
|
||||||
|
|
||||||
|
if (m_Type == 3)
|
||||||
|
{
|
||||||
|
CUDA_CHECK(cudaYoloLayer_e(
|
||||||
|
inputs[0], inputs[1], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, m_OutputSize,
|
||||||
|
m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
uint yoloTensorsSize = m_YoloTensors.size();
|
uint yoloTensorsSize = m_YoloTensors.size();
|
||||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||||
{
|
{
|
||||||
@@ -213,12 +230,12 @@ int32_t YoloLayer::enqueue (
|
|||||||
void* v_mask;
|
void* v_mask;
|
||||||
if (anchors.size() > 0) {
|
if (anchors.size() > 0) {
|
||||||
float* f_anchors = anchors.data();
|
float* f_anchors = anchors.data();
|
||||||
CUDA_CHECK(cudaMallocHost(&v_anchors, sizeof(float) * anchors.size()));
|
CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
|
||||||
CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice));
|
CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice));
|
||||||
}
|
}
|
||||||
if (mask.size() > 0) {
|
if (mask.size() > 0) {
|
||||||
int* f_mask = mask.data();
|
int* f_mask = mask.data();
|
||||||
CUDA_CHECK(cudaMallocHost(&v_mask, sizeof(int) * mask.size()));
|
CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
|
||||||
CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice));
|
CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,7 +263,7 @@ int32_t YoloLayer::enqueue (
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
void* softmax;
|
void* softmax;
|
||||||
CUDA_CHECK(cudaMallocHost(&softmax, sizeof(float) * inputSize * batchSize));
|
CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize));
|
CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize));
|
||||||
|
|
||||||
CUDA_CHECK(cudaRegionLayer(
|
CUDA_CHECK(cudaRegionLayer(
|
||||||
@@ -254,14 +271,15 @@ int32_t YoloLayer::enqueue (
|
|||||||
m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors,
|
m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors,
|
||||||
stream));
|
stream));
|
||||||
|
|
||||||
CUDA_CHECK(cudaFreeHost(softmax));
|
CUDA_CHECK(cudaFree(softmax));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (anchors.size() > 0) {
|
if (anchors.size() > 0) {
|
||||||
CUDA_CHECK(cudaFreeHost(v_anchors));
|
CUDA_CHECK(cudaFree(v_anchors));
|
||||||
}
|
}
|
||||||
if (mask.size() > 0) {
|
if (mask.size() > 0) {
|
||||||
CUDA_CHECK(cudaFreeHost(v_mask));
|
CUDA_CHECK(cudaFree(v_mask));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -269,10 +287,11 @@ int32_t YoloLayer::enqueue (
|
|||||||
d_indexes, d_scores, d_boxes, d_classes, bboxData, scoreData, countData, batchSize, m_OutputSize, m_TopK,
|
d_indexes, d_scores, d_boxes, d_classes, bboxData, scoreData, countData, batchSize, m_OutputSize, m_TopK,
|
||||||
m_NumClasses, stream));
|
m_NumClasses, stream));
|
||||||
|
|
||||||
CUDA_CHECK(cudaFreeHost(d_indexes));
|
CUDA_CHECK(cudaFree(countData));
|
||||||
CUDA_CHECK(cudaFreeHost(d_scores));
|
CUDA_CHECK(cudaFree(d_indexes));
|
||||||
CUDA_CHECK(cudaFreeHost(d_boxes));
|
CUDA_CHECK(cudaFree(d_scores));
|
||||||
CUDA_CHECK(cudaFreeHost(d_classes));
|
CUDA_CHECK(cudaFree(d_boxes));
|
||||||
|
CUDA_CHECK(cudaFree(d_classes));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -290,6 +309,7 @@ size_t YoloLayer::getSerializationSize() const noexcept
|
|||||||
totalSize += sizeof(m_TopK);
|
totalSize += sizeof(m_TopK);
|
||||||
totalSize += sizeof(m_ScoreThreshold);
|
totalSize += sizeof(m_ScoreThreshold);
|
||||||
|
|
||||||
|
if (m_Type != 3) {
|
||||||
uint yoloTensorsSize = m_YoloTensors.size();
|
uint yoloTensorsSize = m_YoloTensors.size();
|
||||||
totalSize += sizeof(yoloTensorsSize);
|
totalSize += sizeof(yoloTensorsSize);
|
||||||
|
|
||||||
@@ -303,6 +323,7 @@ size_t YoloLayer::getSerializationSize() const noexcept
|
|||||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
|
||||||
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return totalSize;
|
return totalSize;
|
||||||
}
|
}
|
||||||
@@ -320,6 +341,7 @@ void YoloLayer::serialize(void* buffer) const noexcept
|
|||||||
write(d, m_TopK);
|
write(d, m_TopK);
|
||||||
write(d, m_ScoreThreshold);
|
write(d, m_ScoreThreshold);
|
||||||
|
|
||||||
|
if (m_Type != 3) {
|
||||||
uint yoloTensorsSize = m_YoloTensors.size();
|
uint yoloTensorsSize = m_YoloTensors.size();
|
||||||
write(d, yoloTensorsSize);
|
write(d, yoloTensorsSize);
|
||||||
for (uint i = 0; i < yoloTensorsSize; ++i)
|
for (uint i = 0; i < yoloTensorsSize; ++i)
|
||||||
@@ -345,6 +367,7 @@ void YoloLayer::serialize(void* buffer) const noexcept
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nvinfer1::IPluginV2* YoloLayer::clone() const noexcept
|
nvinfer1::IPluginV2* YoloLayer::clone() const noexcept
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -85,9 +85,7 @@ public:
|
|||||||
|
|
||||||
void terminate () noexcept override {}
|
void terminate () noexcept override {}
|
||||||
|
|
||||||
size_t getWorkspaceSize (int maxBatchSize) const noexcept override {
|
size_t getWorkspaceSize (int maxBatchSize) const noexcept override { return 0; }
|
||||||
return maxBatchSize * sizeof(int);
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t enqueue (
|
int32_t enqueue (
|
||||||
int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
|
||||||
|
|||||||
23
readme.md
23
readme.md
@@ -7,7 +7,6 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models
|
|||||||
* Models benchmarks
|
* Models benchmarks
|
||||||
* DeepStream tutorials
|
* DeepStream tutorials
|
||||||
* YOLOX support
|
* YOLOX support
|
||||||
* PP-YOLO support
|
|
||||||
* YOLOv6 support
|
* YOLOv6 support
|
||||||
* YOLOv7 support
|
* YOLOv7 support
|
||||||
* Dynamic batch-size
|
* Dynamic batch-size
|
||||||
@@ -23,18 +22,18 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models
|
|||||||
* Support for INT8 calibration
|
* Support for INT8 calibration
|
||||||
* Support for non square models
|
* Support for non square models
|
||||||
* New documentation for multiple models
|
* New documentation for multiple models
|
||||||
* **YOLOv5 >= 2.0 support**
|
* YOLOv5 support
|
||||||
* **YOLOR support**
|
* YOLOR support
|
||||||
* **GPU YOLO Decoder** [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138)
|
* **GPU YOLO Decoder** [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138)
|
||||||
* **GPU Batched NMS** [#142](https://github.com/marcoslucianops/DeepStream-Yolo/issues/142)
|
* **GPU Batched NMS** [#142](https://github.com/marcoslucianops/DeepStream-Yolo/issues/142)
|
||||||
* **New YOLOv5 conversion**
|
* **PP-YOLOE support**
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
||||||
### Getting started
|
### Getting started
|
||||||
|
|
||||||
* [Requirements](#requirements)
|
* [Requirements](#requirements)
|
||||||
* [Tested models](#tested-models)
|
* [Suported models](#supported-models)
|
||||||
* [Benchmarks](#benchmarks)
|
* [Benchmarks](#benchmarks)
|
||||||
* [dGPU installation](#dgpu-installation)
|
* [dGPU installation](#dgpu-installation)
|
||||||
* [Basic usage](#basic-usage)
|
* [Basic usage](#basic-usage)
|
||||||
@@ -42,6 +41,7 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models
|
|||||||
* [INT8 calibration](#int8-calibration)
|
* [INT8 calibration](#int8-calibration)
|
||||||
* [YOLOv5 usage](docs/YOLOv5.md)
|
* [YOLOv5 usage](docs/YOLOv5.md)
|
||||||
* [YOLOR usage](docs/YOLOR.md)
|
* [YOLOR usage](docs/YOLOR.md)
|
||||||
|
* [PP-YOLOE usage](docs/PPYOLOE.md)
|
||||||
* [Using your custom model](docs/customModels.md)
|
* [Using your custom model](docs/customModels.md)
|
||||||
* [Multiple YOLO GIEs](docs/multipleGIEs.md)
|
* [Multiple YOLO GIEs](docs/multipleGIEs.md)
|
||||||
|
|
||||||
@@ -81,23 +81,14 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models
|
|||||||
* [NVIDIA DeepStream SDK 6.0.1 / 6.0](https://developer.nvidia.com/embedded/deepstream-on-jetson-downloads-archived)
|
* [NVIDIA DeepStream SDK 6.0.1 / 6.0](https://developer.nvidia.com/embedded/deepstream-on-jetson-downloads-archived)
|
||||||
* [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo)
|
* [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo)
|
||||||
|
|
||||||
### For YOLOv5 and YOLOR
|
|
||||||
|
|
||||||
#### x86 platform
|
|
||||||
|
|
||||||
* [PyTorch >= 1.7.0](https://pytorch.org/get-started/locally/)
|
|
||||||
|
|
||||||
#### Jetson platform
|
|
||||||
|
|
||||||
* [PyTorch >= 1.7.0](https://forums.developer.nvidia.com/t/pytorch-for-jetson-version-1-11-now-available/72048)
|
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
||||||
### Tested models
|
### Suported models
|
||||||
|
|
||||||
* [Darknet YOLO](https://github.com/AlexeyAB/darknet)
|
* [Darknet YOLO](https://github.com/AlexeyAB/darknet)
|
||||||
* [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5)
|
* [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5)
|
||||||
* [YOLOR](https://github.com/WongKinYiu/yolor)
|
* [YOLOR](https://github.com/WongKinYiu/yolor)
|
||||||
|
* [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
|
||||||
* [MobileNet-YOLO](https://github.com/dog-qiuqiu/MobileNet-Yolo)
|
* [MobileNet-YOLO](https://github.com/dog-qiuqiu/MobileNet-Yolo)
|
||||||
* [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest)
|
* [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest)
|
||||||
|
|
||||||
|
|||||||
437
utils/gen_wts_ppyoloe.py
Normal file
437
utils/gen_wts_ppyoloe.py
Normal file
@@ -0,0 +1,437 @@
|
|||||||
|
import os
|
||||||
|
import struct
|
||||||
|
import paddle
|
||||||
|
import numpy as np
|
||||||
|
from ppdet.core.workspace import load_config, merge_config
|
||||||
|
from ppdet.utils.check import check_gpu, check_version, check_config
|
||||||
|
from ppdet.utils.cli import ArgsParser
|
||||||
|
from ppdet.engine import Trainer
|
||||||
|
from ppdet.slim import build_slim_model
|
||||||
|
|
||||||
|
class Layers(object):
|
||||||
|
def __init__(self, size, fw, fc, letter_box):
|
||||||
|
self.blocks = [0 for _ in range(300)]
|
||||||
|
self.current = -1
|
||||||
|
|
||||||
|
self.backbone_outs = []
|
||||||
|
self.neck_fpn_feats = []
|
||||||
|
self.neck_pan_feats = []
|
||||||
|
self.yolo_head_cls = []
|
||||||
|
self.yolo_head_reg = []
|
||||||
|
|
||||||
|
self.width = size[0] if len(size) == 1 else size[1]
|
||||||
|
self.height = size[0]
|
||||||
|
self.letter_box = letter_box
|
||||||
|
|
||||||
|
self.fw = fw
|
||||||
|
self.fc = fc
|
||||||
|
self.wc = 0
|
||||||
|
|
||||||
|
self.net()
|
||||||
|
|
||||||
|
def ConvBNLayer(self, child):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
self.convolutional(child, act='swish')
|
||||||
|
|
||||||
|
def CSPResStage(self, child, ret):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
if child.conv_down is not None:
|
||||||
|
self.convolutional(child.conv_down, act='swish')
|
||||||
|
self.convolutional(child.conv1, act='swish')
|
||||||
|
self.route('-2')
|
||||||
|
self.convolutional(child.conv2, act='swish')
|
||||||
|
idx = -3
|
||||||
|
for m in child.blocks:
|
||||||
|
self.convolutional(m.conv1, act='swish')
|
||||||
|
self.convolutional(m.conv2, act='swish')
|
||||||
|
self.shortcut(-3)
|
||||||
|
idx -= 3
|
||||||
|
self.route('%d, -1' % idx)
|
||||||
|
if child.attn is not None:
|
||||||
|
self.reduce((1, 2), mode='mean', keepdim=True)
|
||||||
|
self.convolutional(child.attn.fc, act='hardsigmoid')
|
||||||
|
self.shortcut(-3, ew='mul')
|
||||||
|
self.convolutional(child.conv3, act='swish')
|
||||||
|
if ret is True:
|
||||||
|
self.backbone_outs.append(self.current)
|
||||||
|
|
||||||
|
def CSPStage(self, child, stage):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
self.convolutional(child.conv1, act='swish')
|
||||||
|
self.route('-2')
|
||||||
|
self.convolutional(child.conv2, act='swish')
|
||||||
|
idx = -3
|
||||||
|
for m in child.convs:
|
||||||
|
if m.__class__.__name__ == 'BasicBlock':
|
||||||
|
self.convolutional(m.conv1, act='swish')
|
||||||
|
self.convolutional(m.conv2, act='swish')
|
||||||
|
idx -= 2
|
||||||
|
elif m.__class__.__name__ == 'SPP':
|
||||||
|
self.maxpool(m.pool0)
|
||||||
|
self.route('-2')
|
||||||
|
self.maxpool(m.pool1)
|
||||||
|
self.route('-4')
|
||||||
|
self.maxpool(m.pool2)
|
||||||
|
self.route('-6, -5, -3, -1')
|
||||||
|
self.convolutional(m.conv, act='swish')
|
||||||
|
idx -= 7
|
||||||
|
self.route('%d, -1' % idx)
|
||||||
|
self.convolutional(child.conv3, act='swish')
|
||||||
|
if stage == 'fpn':
|
||||||
|
self.neck_fpn_feats.append(self.current)
|
||||||
|
elif stage == 'pan':
|
||||||
|
self.neck_pan_feats.append(self.current)
|
||||||
|
|
||||||
|
def Concat(self, route):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
r = self.get_route(route)
|
||||||
|
self.route('-1, %d' % r)
|
||||||
|
|
||||||
|
def Upsample(self):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
self.upsample()
|
||||||
|
|
||||||
|
def AvgPool2d(self, route=None):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
if route is not None:
|
||||||
|
r = self.get_route(route)
|
||||||
|
self.route('%d' % r)
|
||||||
|
self.avgpool()
|
||||||
|
|
||||||
|
def ESEAttn(self, child, route=0):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
if route < 0:
|
||||||
|
self.route('%d' % route)
|
||||||
|
self.convolutional(child.fc, act='sigmoid')
|
||||||
|
self.shortcut(route - 3, ew='mul')
|
||||||
|
self.convolutional(child.conv, act='swish')
|
||||||
|
if route == 0:
|
||||||
|
self.shortcut(-5)
|
||||||
|
|
||||||
|
def Conv2D(self, child, act='linear'):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
self.convolutional(child, act=act)
|
||||||
|
|
||||||
|
def Shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None, output=''):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
r = 0
|
||||||
|
if route is not None:
|
||||||
|
r = self.get_route(route)
|
||||||
|
self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2, route=r)
|
||||||
|
if output == 'cls':
|
||||||
|
self.yolo_head_cls.append(self.current)
|
||||||
|
elif output == 'reg':
|
||||||
|
self.yolo_head_reg.append(self.current)
|
||||||
|
|
||||||
|
def SoftMax(self, axes):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
self.softmax(axes)
|
||||||
|
|
||||||
|
def Detect(self, output):
|
||||||
|
self.current += 1
|
||||||
|
|
||||||
|
routes = self.yolo_head_cls if output == 'cls' else self.yolo_head_reg
|
||||||
|
|
||||||
|
for i, route in enumerate(routes):
|
||||||
|
routes[i] = self.get_route(route)
|
||||||
|
self.route(str(routes)[1:-1], axis=-1)
|
||||||
|
self.yolo(output)
|
||||||
|
|
||||||
|
def net(self):
|
||||||
|
lb = 'letter_box=1\n' if self.letter_box else ''
|
||||||
|
|
||||||
|
self.fc.write('[net]\n' +
|
||||||
|
'width=%d\n' % self.width +
|
||||||
|
'height=%d\n' % self.height +
|
||||||
|
'channels=3\n' +
|
||||||
|
lb)
|
||||||
|
|
||||||
|
def convolutional(self, cv, act='linear', detect=False):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
self.get_state_dict(cv.state_dict())
|
||||||
|
|
||||||
|
if cv.__class__.__name__ == 'Conv2D':
|
||||||
|
filters = cv._out_channels
|
||||||
|
size = cv._kernel_size
|
||||||
|
stride = cv._stride
|
||||||
|
pad = cv._padding
|
||||||
|
groups = cv._groups
|
||||||
|
bias = cv.bias
|
||||||
|
bn = False
|
||||||
|
else:
|
||||||
|
filters = cv.conv._out_channels
|
||||||
|
size = cv.conv._kernel_size
|
||||||
|
stride = cv.conv._stride
|
||||||
|
pad = cv.conv._padding
|
||||||
|
groups = cv.conv._groups
|
||||||
|
bias = cv.conv.bias
|
||||||
|
bn = True if hasattr(cv, 'bn') else False
|
||||||
|
|
||||||
|
if detect:
|
||||||
|
act = 'logistic'
|
||||||
|
|
||||||
|
b = 'batch_normalize=1\n' if bn is True else ''
|
||||||
|
g = 'groups=%d\n' % groups if groups > 1 else ''
|
||||||
|
w = 'bias=0\n' if bias is None and bn is False else ''
|
||||||
|
|
||||||
|
self.fc.write('\n[convolutional]\n' +
|
||||||
|
b +
|
||||||
|
'filters=%d\n' % filters +
|
||||||
|
'size=%s\n' % self.get_value(size) +
|
||||||
|
'stride=%s\n' % self.get_value(stride) +
|
||||||
|
'pad=%s\n' % self.get_value(pad) +
|
||||||
|
g +
|
||||||
|
w +
|
||||||
|
'activation=%s\n' % act)
|
||||||
|
|
||||||
|
def route(self, layers, axis=0):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
a = 'axis=%d\n' % axis if axis != 0 else ''
|
||||||
|
|
||||||
|
self.fc.write('\n[route]\n' +
|
||||||
|
'layers=%s\n' % layers +
|
||||||
|
a)
|
||||||
|
|
||||||
|
def shortcut(self, r, ew='add', act='linear'):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
m = 'mode=mul\n' if ew == 'mul' else ''
|
||||||
|
|
||||||
|
self.fc.write('\n[shortcut]\n' +
|
||||||
|
'from=%d\n' % r +
|
||||||
|
m +
|
||||||
|
'activation=%s\n' % act)
|
||||||
|
|
||||||
|
def reduce(self, dim, mode='mean', keepdim=False):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
self.fc.write('\n[reduce]\n' +
|
||||||
|
'mode=%s\n' % mode +
|
||||||
|
'axes=%s\n' % str(dim)[1:-1] +
|
||||||
|
'keep=%d\n' % keepdim)
|
||||||
|
|
||||||
|
def maxpool(self, m):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
stride = m.stride
|
||||||
|
size = m.ksize
|
||||||
|
mode = m.ceil_mode
|
||||||
|
|
||||||
|
m = 'maxpool_up' if mode else 'maxpool'
|
||||||
|
|
||||||
|
self.fc.write('\n[%s]\n' % m +
|
||||||
|
'stride=%d\n' % stride +
|
||||||
|
'size=%d\n' % size)
|
||||||
|
|
||||||
|
def upsample(self):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
stride = 2
|
||||||
|
|
||||||
|
self.fc.write('\n[upsample]\n' +
|
||||||
|
'stride=%d\n' % stride)
|
||||||
|
|
||||||
|
def avgpool(self):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
self.fc.write('\n[avgpool]\n')
|
||||||
|
|
||||||
|
def shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
r = 'reshape=%s\n' % str(reshape)[1:-1] if reshape is not None else ''
|
||||||
|
t1 = 'transpose1=%s\n' % str(transpose1)[1:-1] if transpose1 is not None else ''
|
||||||
|
t2 = 'transpose2=%s\n' % str(transpose2)[1:-1] if transpose2 is not None else ''
|
||||||
|
f = 'from=%d\n' % route if route is not None else ''
|
||||||
|
|
||||||
|
self.fc.write('\n[shuffle]\n' +
|
||||||
|
r +
|
||||||
|
t1 +
|
||||||
|
t2 +
|
||||||
|
f)
|
||||||
|
|
||||||
|
def softmax(self, axes):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
self.fc.write('\n[softmax]\n' +
|
||||||
|
'axes=%d\n' % axes)
|
||||||
|
|
||||||
|
def yolo(self, output):
|
||||||
|
self.blocks[self.current] += 1
|
||||||
|
|
||||||
|
self.fc.write('\n[%s]\n' % output)
|
||||||
|
|
||||||
|
def get_state_dict(self, state_dict):
|
||||||
|
for k, v in state_dict.items():
|
||||||
|
vr = v.reshape([-1]).numpy()
|
||||||
|
self.fw.write('{} {} '.format(k, len(vr)))
|
||||||
|
for vv in vr:
|
||||||
|
self.fw.write(' ')
|
||||||
|
self.fw.write(struct.pack('>f', float(vv)).hex())
|
||||||
|
self.fw.write('\n')
|
||||||
|
self.wc += 1
|
||||||
|
|
||||||
|
def get_anchors(self, anchor_points, stride_tensor):
|
||||||
|
vr = anchor_points.numpy()
|
||||||
|
self.fw.write('{} {} '.format('anchor_points', len(vr)))
|
||||||
|
for vv in vr:
|
||||||
|
self.fw.write(' ')
|
||||||
|
self.fw.write(struct.pack('>f', float(vv)).hex())
|
||||||
|
self.fw.write('\n')
|
||||||
|
self.wc += 1
|
||||||
|
vr = stride_tensor.numpy()
|
||||||
|
self.fw.write('{} {} '.format('stride_tensor', len(vr)))
|
||||||
|
for vv in vr:
|
||||||
|
self.fw.write(' ')
|
||||||
|
self.fw.write(struct.pack('>f', float(vv)).hex())
|
||||||
|
self.fw.write('\n')
|
||||||
|
self.wc += 1
|
||||||
|
|
||||||
|
def get_value(self, key):
|
||||||
|
if type(key) == int:
|
||||||
|
return key
|
||||||
|
return key[0] if key[0] == key[1] else str(key)[1:-1]
|
||||||
|
|
||||||
|
def get_route(self, n):
|
||||||
|
r = 0
|
||||||
|
for i, b in enumerate(self.blocks):
|
||||||
|
if i <= n:
|
||||||
|
r += b
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return r - 1
|
||||||
|
|
||||||
|
|
||||||
|
def export_model():
|
||||||
|
paddle.set_device('cpu')
|
||||||
|
|
||||||
|
FLAGS = parse_args()
|
||||||
|
|
||||||
|
cfg = load_config(FLAGS.config)
|
||||||
|
|
||||||
|
FLAGS.opt['weights'] = FLAGS.weights
|
||||||
|
FLAGS.opt['exclude_nms'] = True
|
||||||
|
|
||||||
|
if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn':
|
||||||
|
FLAGS.opt['norm_type'] = 'bn'
|
||||||
|
merge_config(FLAGS.opt)
|
||||||
|
|
||||||
|
if FLAGS.slim_config:
|
||||||
|
cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test')
|
||||||
|
|
||||||
|
merge_config(FLAGS.opt)
|
||||||
|
check_config(cfg)
|
||||||
|
check_gpu(cfg.use_gpu)
|
||||||
|
check_version()
|
||||||
|
|
||||||
|
trainer = Trainer(cfg, mode='test')
|
||||||
|
trainer.load_weights(cfg.weights)
|
||||||
|
|
||||||
|
trainer.model.eval()
|
||||||
|
|
||||||
|
if not os.path.exists('.tmp'):
|
||||||
|
os.makedirs('.tmp')
|
||||||
|
|
||||||
|
static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp')
|
||||||
|
|
||||||
|
os.system('rm -r .tmp')
|
||||||
|
|
||||||
|
return cfg, static_model
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = ArgsParser()
|
||||||
|
parser.add_argument('-w', '--weights', required=True, type=str, help='Input weights (.pdparams) file path (required)')
|
||||||
|
parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
cfg, model = export_model()
|
||||||
|
|
||||||
|
model_name = cfg.filename
|
||||||
|
inference_size = (cfg.eval_height, cfg.eval_width)
|
||||||
|
letter_box = False
|
||||||
|
|
||||||
|
for sample_transforms in cfg['EvalReader']['sample_transforms']:
|
||||||
|
if 'Resize' in sample_transforms:
|
||||||
|
letter_box = sample_transforms['Resize']['keep_ratio']
|
||||||
|
|
||||||
|
backbone = cfg[cfg.architecture]['backbone']
|
||||||
|
neck = cfg[cfg.architecture]['neck']
|
||||||
|
yolo_head = cfg[cfg.architecture]['yolo_head']
|
||||||
|
|
||||||
|
wts_file = model_name + '.wts' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.wts'
|
||||||
|
cfg_file = model_name + '.cfg' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.cfg'
|
||||||
|
|
||||||
|
with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc:
|
||||||
|
layers = Layers(inference_size, fw, fc, letter_box)
|
||||||
|
|
||||||
|
if backbone == 'CSPResNet':
|
||||||
|
layers.fc.write('\n# CSPResNet\n')
|
||||||
|
|
||||||
|
for child in model.backbone.stem:
|
||||||
|
layers.ConvBNLayer(child)
|
||||||
|
for i, child in enumerate(model.backbone.stages):
|
||||||
|
ret = True if i in model.backbone.return_idx else False
|
||||||
|
layers.CSPResStage(child, ret)
|
||||||
|
else:
|
||||||
|
raise SystemExit('Model not supported')
|
||||||
|
|
||||||
|
if neck == 'CustomCSPPAN':
|
||||||
|
layers.fc.write('\n# CustomCSPPAN\n')
|
||||||
|
|
||||||
|
blocks = layers.backbone_outs[::-1]
|
||||||
|
for i, block in enumerate(blocks):
|
||||||
|
if i > 0:
|
||||||
|
layers.Concat(block)
|
||||||
|
layers.CSPStage(model.neck.fpn_stages[i][0], 'fpn')
|
||||||
|
if i < model.neck.num_blocks - 1:
|
||||||
|
layers.ConvBNLayer(model.neck.fpn_routes[i])
|
||||||
|
layers.Upsample()
|
||||||
|
layers.neck_pan_feats = [layers.neck_fpn_feats[-1], ]
|
||||||
|
for i in reversed(range(model.neck.num_blocks - 1)):
|
||||||
|
layers.ConvBNLayer(model.neck.pan_routes[i])
|
||||||
|
layers.Concat(layers.neck_fpn_feats[i])
|
||||||
|
layers.CSPStage(model.neck.pan_stages[i][0], 'pan')
|
||||||
|
layers.neck_pan_feats = layers.neck_pan_feats[::-1]
|
||||||
|
else:
|
||||||
|
raise SystemExit('Model not supported')
|
||||||
|
|
||||||
|
if yolo_head == 'PPYOLOEHead':
|
||||||
|
layers.fc.write('\n# PPYOLOEHead\n')
|
||||||
|
|
||||||
|
for i, feat in enumerate(layers.neck_pan_feats):
|
||||||
|
if i > 0:
|
||||||
|
layers.AvgPool2d(route=feat)
|
||||||
|
else:
|
||||||
|
layers.AvgPool2d()
|
||||||
|
layers.ESEAttn(model.yolo_head.stem_cls[i])
|
||||||
|
layers.Conv2D(model.yolo_head.pred_cls[i], act='sigmoid')
|
||||||
|
layers.Shuffle(reshape=[model.yolo_head.num_classes, 0], route=feat, output='cls')
|
||||||
|
layers.ESEAttn(model.yolo_head.stem_reg[i], route=-7)
|
||||||
|
layers.Conv2D(model.yolo_head.pred_reg[i])
|
||||||
|
layers.Shuffle(reshape=[4, model.yolo_head.reg_max + 1, 0], transpose2=[1, 0, 2], route=feat)
|
||||||
|
layers.SoftMax(0)
|
||||||
|
layers.Conv2D(model.yolo_head.proj_conv)
|
||||||
|
layers.Shuffle(reshape=[4, 0], route=feat, output='reg')
|
||||||
|
layers.Detect('cls')
|
||||||
|
layers.Detect('reg')
|
||||||
|
layers.get_anchors(model.yolo_head.anchor_points.reshape([-1]), model.yolo_head.stride_tensor)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise SystemExit('Model not supported')
|
||||||
|
|
||||||
|
os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file))
|
||||||
@@ -9,11 +9,11 @@ from models.models import Darknet
|
|||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='PyTorch YOLOR conversion (main branch)')
|
parser = argparse.ArgumentParser(description='PyTorch YOLOR conversion (main branch)')
|
||||||
parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
|
parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
|
||||||
parser.add_argument('-c', '--cfg', help='Input cfg (.cfg) file path')
|
parser.add_argument('-c', '--cfg', default='', help='Input cfg (.cfg) file path')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if not os.path.isfile(args.weights):
|
if not os.path.isfile(args.weights):
|
||||||
raise SystemExit('Invalid weights file')
|
raise SystemExit('Invalid weights file')
|
||||||
if not os.path.isfile(args.cfg):
|
if args.cfg != '' and not os.path.isfile(args.cfg):
|
||||||
raise SystemExit('Invalid cfg file')
|
raise SystemExit('Invalid cfg file')
|
||||||
return args.weights, args.cfg
|
return args.weights, args.cfg
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user