diff --git a/config_infer_primary_ppyoloe.txt b/config_infer_primary_ppyoloe.txt new file mode 100644 index 0000000..4dfc950 --- /dev/null +++ b/config_infer_primary_ppyoloe.txt @@ -0,0 +1,25 @@ +[property] +gpu-id=0 +net-scale-factor=0.0173520735727919486 +offsets=123.675;116.28;103.53 +model-color-format=0 +custom-network-config=ppyoloe_crn_s_400e_coco.cfg +model-file=ppyoloe_crn_s_400e_coco.wts +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table +labelfile-path=labels.txt +batch-size=1 +network-mode=0 +num-detected-classes=80 +interval=0 +gie-unique-id=1 +process-mode=1 +network-type=0 +cluster-mode=4 +maintain-aspect-ratio=0 +parse-bbox-func-name=NvDsInferParseYolo +custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so +engine-create-func-name=NvDsInferYoloCudaEngineGet + +[class-attrs-all] +pre-cluster-threshold=0 diff --git a/docs/PPYOLOE.md b/docs/PPYOLOE.md new file mode 100644 index 0000000..9e97d0c --- /dev/null +++ b/docs/PPYOLOE.md @@ -0,0 +1,115 @@ +# PP-YOLOE usage + +* [Convert model](#convert-model) +* [Compile the lib](#compile-the-lib) +* [Edit the config_infer_primary_ppyoloe file](#edit-the-config_infer_primary_ppyoloe-file) +* [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) +* [Testing the model](#testing-the-model) + +## + +### Convert model + +#### 1. Download the PaddleDetection repo and install the requirements + +https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/docs/tutorials/INSTALL.md + +**NOTE**: It is recommended to use Python virtualenv. + +#### 2. Copy conversor + +Copy the `gen_wts_ppyoloe.py` file from `DeepStream-Yolo/utils` directory to the `PaddleDetection` folder. + +#### 3. Download the model + +Download the `pdparams` file from [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) releases (example for PP-YOLOE-s) + +``` +wget https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_400e_coco.pdparams +``` + +**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`ppyoloe_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. + +#### 4. Convert model + +Generate the `cfg` and `wts` files (example for PP-YOLOE-s) + +``` +python3 gen_wts_ppyoloe.py -w ppyoloe_crn_s_400e_coco.pdparams -c configs/ppyoloe/ppyoloe_crn_s_400e_coco.yml +``` + +#### 5. Copy generated files + +Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. + +## + +### Compile the lib + +Open the `DeepStream-Yolo` folder and compile the lib + +* DeepStream 6.1 on x86 platform + + ``` + CUDA_VER=11.6 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on x86 platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.1 on Jetson platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on Jetson platform + + ``` + CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo + ``` + +## + +### Edit the config_infer_primary_yoloV5 file + +Edit the `config_infer_primary_ppyoloe.txt` file according to your model (example for PP-YOLOE-s) + +``` +[property] +... +custom-network-config=ppyoloe_crn_s_400e_coco.cfg +model-file=ppyoloe_crn_s_400e_coco.wts +... +``` + +**NOTE**: The PP-YOLOE uses normalization on the image preprocess. It is important to change the `net-scale-factor` and `offsets` according to the trained values. + +Default: `mean = 0.485, 0.456, 0.406` and `std = 0.229, 0.224, 0.225` + +``` +net-scale-factor=0.0173520735727919486 +offsets=123.675;116.28;103.53 +``` + +## + +### Edit the deepstream_app_config.txt file + +``` +... +[primary-gie] +... +config-file=config_infer_primary_ppyoloe.txt +``` + +## + +### Testing the model + +``` +deepstream-app -c deepstream_app_config.txt +``` diff --git a/docs/YOLOR.md b/docs/YOLOR.md index e111a80..efa0037 100644 --- a/docs/YOLOR.md +++ b/docs/YOLOR.md @@ -22,7 +22,7 @@ cd yolor pip3 install -r requirements.txt ``` -**NOTE**: It is recommended to use a Python virtualenv. +**NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor diff --git a/docs/YOLOv5.md b/docs/YOLOv5.md index 7a5da06..44a5d40 100644 --- a/docs/YOLOv5.md +++ b/docs/YOLOv5.md @@ -22,7 +22,7 @@ cd yolov5 pip3 install -r requirements.txt ``` -**NOTE**: It is recommended to use a Python virtualenv. +**NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor diff --git a/nvdsinfer_custom_impl_Yolo/Makefile b/nvdsinfer_custom_impl_Yolo/Makefile index c5bedbc..d71080a 100644 --- a/nvdsinfer_custom_impl_Yolo/Makefile +++ b/nvdsinfer_custom_impl_Yolo/Makefile @@ -59,15 +59,21 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \ layers/shortcut_layer.cpp \ layers/route_layer.cpp \ layers/upsample_layer.cpp \ - layers/maxpool_layer.cpp \ + layers/pooling_layer.cpp \ layers/activation_layer.cpp \ - layers/reorgv5_layer.cpp \ + layers/reorg_layer.cpp \ + layers/reduce_layer.cpp \ + layers/shuffle_layer.cpp \ + layers/softmax_layer.cpp \ + layers/cls_layer.cpp \ + layers/reg_layer.cpp \ utils.cpp \ yolo.cpp \ yoloForward.cu \ yoloForward_v2.cu \ yoloForward_nc.cu \ yoloForward_r.cu \ + yoloForward_e.cu \ sortDetections.cu ifeq ($(OPENCV), 1) diff --git a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp index 500789c..139b42f 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp @@ -5,114 +5,113 @@ #include "activation_layer.h" -nvinfer1::ILayer* activationLayer( +nvinfer1::ITensor* activationLayer( int layerIdx, std::string activation, - nvinfer1::ILayer* output, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { + nvinfer1::ITensor* output; + if (activation == "linear") { - // Pass + output = input; } else if (activation == "relu") { - nvinfer1::IActivationLayer* relu = network->addActivation( - *input, nvinfer1::ActivationType::kRELU); + nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU); assert(relu != nullptr); std::string reluLayerName = "relu_" + std::to_string(layerIdx); relu->setName(reluLayerName.c_str()); - output = relu; + output = relu->getOutput(0); } else if (activation == "sigmoid" || activation == "logistic") { - nvinfer1::IActivationLayer* sigmoid = network->addActivation( - *input, nvinfer1::ActivationType::kSIGMOID); + nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); assert(sigmoid != nullptr); std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx); sigmoid->setName(sigmoidLayerName.c_str()); - output = sigmoid; + output = sigmoid->getOutput(0); } else if (activation == "tanh") { - nvinfer1::IActivationLayer* tanh = network->addActivation( - *input, nvinfer1::ActivationType::kTANH); + nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH); assert(tanh != nullptr); std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); tanh->setName(tanhLayerName.c_str()); - output = tanh; + output = tanh->getOutput(0); } else if (activation == "leaky") { - nvinfer1::IActivationLayer* leaky = network->addActivation( - *input, nvinfer1::ActivationType::kLEAKY_RELU); + nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU); assert(leaky != nullptr); - leaky->setAlpha(0.1); std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); leaky->setName(leakyLayerName.c_str()); - output = leaky; + leaky->setAlpha(0.1); + output = leaky->getOutput(0); } else if (activation == "softplus") { - nvinfer1::IActivationLayer* softplus = network->addActivation( - *input, nvinfer1::ActivationType::kSOFTPLUS); + nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); assert(softplus != nullptr); std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); softplus->setName(softplusLayerName.c_str()); - output = softplus; + output = softplus->getOutput(0); } else if (activation == "mish") { - nvinfer1::IActivationLayer* softplus = network->addActivation( - *input, nvinfer1::ActivationType::kSOFTPLUS); + nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); assert(softplus != nullptr); std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); softplus->setName(softplusLayerName.c_str()); - nvinfer1::IActivationLayer* tanh = network->addActivation( - *softplus->getOutput(0), nvinfer1::ActivationType::kTANH); + nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH); assert(tanh != nullptr); std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); tanh->setName(tanhLayerName.c_str()); - nvinfer1::IElementWiseLayer* mish = network->addElementWise( - *input, *tanh->getOutput(0), - nvinfer1::ElementWiseOperation::kPROD); + nvinfer1::IElementWiseLayer* mish + = network->addElementWise(*input, *tanh->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); assert(mish != nullptr); std::string mishLayerName = "mish_" + std::to_string(layerIdx); mish->setName(mishLayerName.c_str()); - output = mish; + output = mish->getOutput(0); } else if (activation == "silu" || activation == "swish") { - nvinfer1::IActivationLayer* sigmoid = network->addActivation( - *input, nvinfer1::ActivationType::kSIGMOID); + nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); assert(sigmoid != nullptr); std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx); sigmoid->setName(sigmoidLayerName.c_str()); - nvinfer1::IElementWiseLayer* silu = network->addElementWise( - *input, *sigmoid->getOutput(0), - nvinfer1::ElementWiseOperation::kPROD); + nvinfer1::IElementWiseLayer* silu + = network->addElementWise(*input, *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); assert(silu != nullptr); std::string siluLayerName = "silu_" + std::to_string(layerIdx); silu->setName(siluLayerName.c_str()); - output = silu; + output = silu->getOutput(0); + } + else if (activation == "hardsigmoid") + { + nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); + assert(hardsigmoid != nullptr); + std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx); + hardsigmoid->setName(hardsigmoidLayerName.c_str()); + hardsigmoid->setAlpha(1.0 / 6.0); + hardsigmoid->setBeta(0.5); + output = hardsigmoid->getOutput(0); } else if (activation == "hardswish") { - nvinfer1::IActivationLayer* hard_sigmoid = network->addActivation( - *input, nvinfer1::ActivationType::kHARD_SIGMOID); - assert(hard_sigmoid != nullptr); - hard_sigmoid->setAlpha(1.0 / 6.0); - hard_sigmoid->setBeta(0.5); - std::string hardSigmoidLayerName = "hard_sigmoid_" + std::to_string(layerIdx); - hard_sigmoid->setName(hardSigmoidLayerName.c_str()); - nvinfer1::IElementWiseLayer* hard_swish = network->addElementWise( - *input, *hard_sigmoid->getOutput(0), - nvinfer1::ElementWiseOperation::kPROD); - assert(hard_swish != nullptr); - std::string hardSwishLayerName = "hard_swish_" + std::to_string(layerIdx); - hard_swish->setName(hardSwishLayerName.c_str()); - output = hard_swish; + nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); + assert(hardsigmoid != nullptr); + std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx); + hardsigmoid->setName(hardsigmoidLayerName.c_str()); + hardsigmoid->setAlpha(1.0 / 6.0); + hardsigmoid->setBeta(0.5); + nvinfer1::IElementWiseLayer* hardswish + = network->addElementWise(*input, *hardsigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); + assert(hardswish != nullptr); + std::string hardswishLayerName = "hardswish_" + std::to_string(layerIdx); + hardswish->setName(hardswishLayerName.c_str()); + output = hardswish->getOutput(0); } else { diff --git a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h index 176a5c2..c5151ac 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h @@ -6,18 +6,14 @@ #ifndef __ACTIVATION_LAYER_H__ #define __ACTIVATION_LAYER_H__ -#include #include #include #include "NvInfer.h" -#include "activation_layer.h" - -nvinfer1::ILayer* activationLayer( +nvinfer1::ITensor* activationLayer( int layerIdx, std::string activation, - nvinfer1::ILayer* output, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); diff --git a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp index 063f3d1..e6828e7 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp @@ -6,7 +6,7 @@ #include #include "batchnorm_layer.h" -nvinfer1::ILayer* batchnormLayer( +nvinfer1::ITensor* batchnormLayer( int layerIdx, std::map& block, std::vector& weights, @@ -17,6 +17,8 @@ nvinfer1::ILayer* batchnormLayer( nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { + nvinfer1::ITensor* output; + assert(block.at("type") == "batchnorm"); assert(block.find("filters") != block.end()); @@ -28,7 +30,8 @@ nvinfer1::ILayer* batchnormLayer( std::vector bnRunningMean; std::vector bnRunningVar; - if (weightsType == "weights") { + if (weightsType == "weights") + { for (int i = 0; i < filters; ++i) { bnBiases.push_back(weights[weightPtr]); @@ -50,7 +53,8 @@ nvinfer1::ILayer* batchnormLayer( weightPtr++; } } - else { + else + { for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); @@ -79,35 +83,27 @@ nvinfer1::ILayer* batchnormLayer( nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) - { - shiftWt[i] - = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); - } + shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) - { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; - } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) - { powerWt[i] = 1.0; - } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); - nvinfer1::IScaleLayer* bn = network->addScale( - *input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); - bn->setName(bnLayerName.c_str()); - nvinfer1::ILayer* output = bn; + nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + assert(batchnorm != nullptr); + std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); + batchnorm->setName(batchnormLayerName.c_str()); + output = batchnorm->getOutput(0); - output = activationLayer(layerIdx, activation, output, output->getOutput(0), network); + output = activationLayer(layerIdx, activation, output, network); assert(output != nullptr); return output; diff --git a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h index 514b456..078b7f1 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h @@ -13,7 +13,7 @@ #include "activation_layer.h" -nvinfer1::ILayer* batchnormLayer( +nvinfer1::ITensor* batchnormLayer( int layerIdx, std::map& block, std::vector& weights, diff --git a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp index 42d17a1..69e183a 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp @@ -5,27 +5,32 @@ #include "channels_layer.h" -nvinfer1::ILayer* channelsLayer( - std::string type, +nvinfer1::ITensor* channelsLayer( + int layerIdx, + std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network) { - nvinfer1::ILayer* output; + nvinfer1::ITensor* output; - if (type == "shift") { - nvinfer1::IElementWiseLayer* ew = network->addElementWise( - *input, *implicitTensor, - nvinfer1::ElementWiseOperation::kSUM); - assert(ew != nullptr); - output = ew; + assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels"); + + if (block.at("type") == "shift_channels") { + nvinfer1::IElementWiseLayer* shift + = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kSUM); + assert(shift != nullptr); + std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx); + shift->setName(shiftLayerName.c_str()); + output = shift->getOutput(0); } - else if (type == "control") { - nvinfer1::IElementWiseLayer* ew = network->addElementWise( - *input, *implicitTensor, - nvinfer1::ElementWiseOperation::kPROD); - assert(ew != nullptr); - output = ew; + else if (block.at("type") == "control_channels") { + nvinfer1::IElementWiseLayer* control + = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kPROD); + assert(control != nullptr); + std::string controlLayerName = "control_channels_" + std::to_string(layerIdx); + control->setName(controlLayerName.c_str()); + output = control->getOutput(0); } return output; diff --git a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h index b22f6b6..4db704c 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h @@ -11,8 +11,9 @@ #include "NvInfer.h" -nvinfer1::ILayer* channelsLayer( - std::string type, +nvinfer1::ITensor* channelsLayer( + int layerIdx, + std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network); diff --git a/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp new file mode 100644 index 0000000..c8eed52 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp @@ -0,0 +1,29 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "cls_layer.h" + +nvinfer1::ITensor* clsLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "cls"); + + nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); + assert(shuffle != nullptr); + std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); + shuffle->setName(shuffleLayerName.c_str()); + nvinfer1::Permutation permutation; + permutation.order[0] = 1; + permutation.order[1] = 0; + shuffle->setFirstTranspose(permutation); + output = shuffle->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h similarity index 76% rename from nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h rename to nvdsinfer_custom_impl_Yolo/layers/cls_layer.h index a0d139e..cca342b 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h @@ -3,15 +3,15 @@ * https://www.github.com/marcoslucianops */ -#ifndef __MAXPOOL_LAYER_H__ -#define __MAXPOOL_LAYER_H__ +#ifndef __CLS_LAYER_H__ +#define __CLS_LAYER_H__ #include #include #include "NvInfer.h" -nvinfer1::ILayer* maxpoolLayer( +nvinfer1::ITensor* clsLayer( int layerIdx, std::map& block, nvinfer1::ITensor* input, diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp index 6b502ad..be85379 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp @@ -6,7 +6,7 @@ #include #include "convolutional_layer.h" -nvinfer1::ILayer* convolutionalLayer( +nvinfer1::ITensor* convolutionalLayer( int layerIdx, std::map& block, std::vector& weights, @@ -18,6 +18,8 @@ nvinfer1::ILayer* convolutionalLayer( nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { + nvinfer1::ITensor* output; + assert(block.at("type") == "convolutional"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); @@ -40,14 +42,10 @@ nvinfer1::ILayer* convolutionalLayer( int groups = 1; if (block.find("groups") != block.end()) - { groups = std::stoi(block.at("groups")); - } if (block.find("bias") != block.end()) - { bias = std::stoi(block.at("bias")); - } int pad; if (padding) @@ -63,7 +61,8 @@ nvinfer1::ILayer* convolutionalLayer( nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias}; - if (weightsType == "weights") { + if (weightsType == "weights") + { if (batchNormalize == false) { float* val; @@ -120,7 +119,8 @@ nvinfer1::ILayer* convolutionalLayer( trtWeights.push_back(convBias); } } - else { + else + { if (batchNormalize == false) { float* val = new float[size]; @@ -177,20 +177,18 @@ nvinfer1::ILayer* convolutionalLayer( } } - nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd( - *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); + nvinfer1::IConvolutionLayer* conv + = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); - conv->setStrideNd(nvinfer1::DimsHW{stride, stride}); - conv->setPaddingNd(nvinfer1::DimsHW{pad, pad}); + conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); + conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}}); if (block.find("groups") != block.end()) - { conv->setNbGroups(groups); - } - nvinfer1::ILayer* output = conv; + output = conv->getOutput(0); if (batchNormalize == true) { @@ -200,36 +198,28 @@ nvinfer1::ILayer* convolutionalLayer( nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) - { - shiftWt[i] - = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); - } + shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) - { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; - } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) - { powerWt[i] = 1.0; - } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); - nvinfer1::IScaleLayer* bn = network->addScale( - *output->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); - bn->setName(bnLayerName.c_str()); - output = bn; + nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + assert(batchnorm != nullptr); + std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); + batchnorm->setName(batchnormLayerName.c_str()); + output = batchnorm->getOutput(0); } - output = activationLayer(layerIdx, activation, output, output->getOutput(0), network); + output = activationLayer(layerIdx, activation, output, network); assert(output != nullptr); return output; diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h index ba077cb..8df166a 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h @@ -13,7 +13,7 @@ #include "activation_layer.h" -nvinfer1::ILayer* convolutionalLayer( +nvinfer1::ITensor* convolutionalLayer( int layerIdx, std::map& block, std::vector& weights, diff --git a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp index 0d7e044..25ce603 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp @@ -5,17 +5,25 @@ #include "implicit_layer.h" -nvinfer1::ILayer* implicitLayer( - int channels, +nvinfer1::ITensor* implicitLayer( + int layerIdx, + std::map& block, std::vector& weights, std::vector& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network) { - nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, channels}; + nvinfer1::ITensor* output; - float* val = new float[channels]; - for (int i = 0; i < channels; ++i) + assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); + assert(block.find("filters") != block.end()); + + int filters = std::stoi(block.at("filters")); + + nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, filters}; + + float* val = new float[filters]; + for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; weightPtr++; @@ -23,8 +31,11 @@ nvinfer1::ILayer* implicitLayer( convWt.values = val; trtWeights.push_back(convWt); - nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims3{static_cast(channels), 1, 1}, convWt); + nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt); assert(implicit != nullptr); + std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx); + implicit->setName(implicitLayerName.c_str()); + output = implicit->getOutput(0); - return implicit; + return output; } diff --git a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h index e34d738..a4611c9 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h @@ -12,8 +12,9 @@ #include "NvInfer.h" -nvinfer1::ILayer* implicitLayer( - int channels, +nvinfer1::ITensor* implicitLayer( + int layerIdx, + std::map& block, std::vector& weights, std::vector& trtWeights, int& weightPtr, diff --git a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp deleted file mode 100644 index 51caab9..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "maxpool_layer.h" - -nvinfer1::ILayer* maxpoolLayer( - int layerIdx, - std::map& block, - nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "maxpool"); - assert(block.find("size") != block.end()); - assert(block.find("stride") != block.end()); - - int size = std::stoi(block.at("size")); - int stride = std::stoi(block.at("stride")); - - nvinfer1::IPoolingLayer* pool - = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}}); - assert(pool); - std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); - pool->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); - pool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}}); - if (size == 2 && stride == 1) - { - pool->setPrePadding(nvinfer1::Dims{2, {0, 0}}); - pool->setPostPadding(nvinfer1::Dims{2, {1, 1}}); - } - pool->setName(maxpoolLayerName.c_str()); - - return pool; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp new file mode 100644 index 0000000..9f4d59c --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp @@ -0,0 +1,57 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "pooling_layer.h" + +nvinfer1::ITensor* poolingLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "maxpool" || block.at("type") == "avgpool"); + + if (block.at("type") == "maxpool") + { + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int size = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + + nvinfer1::IPoolingLayer* maxpool + = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}}); + assert(maxpool != nullptr); + std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); + maxpool->setName(maxpoolLayerName.c_str()); + maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); + maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}}); + if (size == 2 && stride == 1) + { + maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}}); + maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}}); + } + output = maxpool->getOutput(0); + } + else if (block.at("type") == "avgpool") + { + nvinfer1::Dims inputDims = input->getDimensions(); + nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd( + *input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}}); + assert(avgpool != nullptr); + std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx); + avgpool->setName(avgpoolLayerName.c_str()); + output = avgpool->getOutput(0); + } + else + { + std::cerr << "Pooling not supported: " << block.at("type") << std::endl; + std::abort(); + } + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h new file mode 100644 index 0000000..0e97f24 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h @@ -0,0 +1,21 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __POOLING_LAYER_H__ +#define __POOLING_LAYER_H__ + +#include +#include +#include + +#include "NvInfer.h" + +nvinfer1::ITensor* poolingLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp new file mode 100644 index 0000000..716848b --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp @@ -0,0 +1,58 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "reduce_layer.h" + +nvinfer1::ITensor* reduceLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "reduce"); + assert(block.find("mode") != block.end()); + assert(block.find("axes") != block.end()); + + std::string mode = block.at("mode"); + + nvinfer1::ReduceOperation operation; + if (mode == "mean") + operation = nvinfer1::ReduceOperation::kAVG; + + std::string strAxes = block.at("axes"); + std::vector axes; + size_t lastPos = 0, pos = 0; + while ((pos = strAxes.find(',', lastPos)) != std::string::npos) + { + int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos))); + axes.push_back(vL); + lastPos = pos + 1; + } + if (lastPos < strAxes.length()) + { + std::string lastV = trim(strAxes.substr(lastPos)); + if (!lastV.empty()) + axes.push_back(std::stoi(lastV)); + } + assert(!axes.empty()); + + uint32_t axisMask = 0; + for (int axis : axes) + axisMask |= 1 << axis; + + bool keepDims = false; + if (block.find("keep") != block.end()) + keepDims = std::stoi(block.at("keep")) == 1 ? true : false; + + nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims); + assert(reduce != nullptr); + std::string reduceLayerName = "reduce_" + std::to_string(layerIdx); + reduce->setName(reduceLayerName.c_str()); + output = reduce->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h new file mode 100644 index 0000000..c8330a1 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h @@ -0,0 +1,18 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __REDUCE_LAYER_H__ +#define __REDUCE_LAYER_H__ + +#include "NvInfer.h" +#include "../utils.h" + +nvinfer1::ITensor* reduceLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp new file mode 100644 index 0000000..ea9be07 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp @@ -0,0 +1,113 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "reg_layer.h" + +nvinfer1::ITensor* regLayer( + int layerIdx, + std::map& block, + std::vector& weights, + std::vector& trtWeights, + int& weightPtr, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "reg"); + + nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); + assert(shuffle != nullptr); + std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); + shuffle->setName(shuffleLayerName.c_str()); + nvinfer1::Permutation permutation; + permutation.order[0] = 1; + permutation.order[1] = 0; + shuffle->setFirstTranspose(permutation); + output = shuffle->getOutput(0); + nvinfer1::Dims shuffleDims = output->getDimensions(); + + nvinfer1::ISliceLayer* sliceLt = network->addSlice( + *output, nvinfer1::Dims{2, {0, 0}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); + assert(sliceLt != nullptr); + std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx); + sliceLt->setName(sliceLtLayerName.c_str()); + nvinfer1::ITensor* lt = sliceLt->getOutput(0); + + nvinfer1::ISliceLayer* sliceRb = network->addSlice( + *output, nvinfer1::Dims{2, {0, 2}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); + assert(sliceRb != nullptr); + std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx); + sliceRb->setName(sliceRbLayerName.c_str()); + nvinfer1::ITensor* rb = sliceRb->getOutput(0); + + int channels = shuffleDims.d[0] * 2; + nvinfer1::Weights anchorPointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels}; + float* val = new float[channels]; + for (int i = 0; i < channels; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + anchorPointsWt.values = val; + trtWeights.push_back(anchorPointsWt); + + nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt); + assert(anchorPoints != nullptr); + std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx); + anchorPoints->setName(anchorPointsLayerName.c_str()); + nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0); + + nvinfer1::IElementWiseLayer* x1y1 + = network->addElementWise(*anchorPointsTensor, *lt, nvinfer1::ElementWiseOperation::kSUB); + assert(x1y1 != nullptr); + std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx); + x1y1->setName(x1y1LayerName.c_str()); + nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0); + + nvinfer1::IElementWiseLayer* x2y2 + = network->addElementWise(*rb, *anchorPointsTensor, nvinfer1::ElementWiseOperation::kSUM); + assert(x2y2 != nullptr); + std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx); + x2y2->setName(x2y2LayerName.c_str()); + nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0); + + std::vector concatInputs; + concatInputs.push_back(x1y1Tensor); + concatInputs.push_back(x2y2Tensor); + + nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); + assert(concat != nullptr); + std::string concatLayerName = "concat_" + std::to_string(layerIdx); + concat->setName(concatLayerName.c_str()); + concat->setAxis(1); + output = concat->getOutput(0); + + channels = shuffleDims.d[0]; + nvinfer1::Weights stridePointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels}; + val = new float[channels]; + for (int i = 0; i < channels; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + stridePointsWt.values = val; + trtWeights.push_back(stridePointsWt); + + nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt); + assert(stridePoints != nullptr); + std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx); + stridePoints->setName(stridePointsLayerName.c_str()); + nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0); + + nvinfer1::IElementWiseLayer* pred + = network->addElementWise(*output, *stridePointsTensor, nvinfer1::ElementWiseOperation::kPROD); + assert(pred != nullptr); + std::string predLayerName = "pred_" + std::to_string(layerIdx); + pred->setName(predLayerName.c_str()); + output = pred->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h new file mode 100644 index 0000000..b8addb3 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h @@ -0,0 +1,24 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __REG_LAYER_H__ +#define __REG_LAYER_H__ + +#include +#include +#include + +#include "NvInfer.h" + +nvinfer1::ITensor* regLayer( + int layerIdx, + std::map& block, + std::vector& weights, + std::vector& trtWeights, + int& weightPtr, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp new file mode 100644 index 0000000..c126df2 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp @@ -0,0 +1,62 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "reorg_layer.h" + +nvinfer1::ITensor* reorgLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "reorg"); + + nvinfer1::Dims inputDims = input->getDimensions(); + + nvinfer1::ISliceLayer *slice1 = network->addSlice( + *input, nvinfer1::Dims{3, {0, 0, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, + nvinfer1::Dims{3, {1, 2, 2}}); + assert(slice1 != nullptr); + std::string slice1LayerName = "slice1_" + std::to_string(layerIdx); + slice1->setName(slice1LayerName.c_str()); + + nvinfer1::ISliceLayer *slice2 = network->addSlice( + *input, nvinfer1::Dims{3, {0, 0, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, + nvinfer1::Dims{3, {1, 2, 2}}); + assert(slice2 != nullptr); + std::string slice2LayerName = "slice2_" + std::to_string(layerIdx); + slice2->setName(slice2LayerName.c_str()); + + nvinfer1::ISliceLayer *slice3 = network->addSlice( + *input, nvinfer1::Dims{3, {0, 1, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, + nvinfer1::Dims{3, {1, 2, 2}}); + assert(slice3 != nullptr); + std::string slice3LayerName = "slice3_" + std::to_string(layerIdx); + slice3->setName(slice3LayerName.c_str()); + + nvinfer1::ISliceLayer *slice4 = network->addSlice( + *input, nvinfer1::Dims{3, {0, 1, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, + nvinfer1::Dims{3, {1, 2, 2}}); + assert(slice4 != nullptr); + std::string slice4LayerName = "slice4_" + std::to_string(layerIdx); + slice4->setName(slice4LayerName.c_str()); + + std::vector concatInputs; + concatInputs.push_back(slice1->getOutput(0)); + concatInputs.push_back(slice2->getOutput(0)); + concatInputs.push_back(slice3->getOutput(0)); + concatInputs.push_back(slice4->getOutput(0)); + + nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); + assert(concat != nullptr); + std::string concatLayerName = "concat_" + std::to_string(layerIdx); + concat->setName(concatLayerName.c_str()); + concat->setAxis(0); + output = concat->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.h b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h similarity index 79% rename from nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.h rename to nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h index 5794d37..fca09fa 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h @@ -12,8 +12,9 @@ #include "NvInfer.h" -nvinfer1::ILayer* reorgV5Layer( +nvinfer1::ITensor* reorgLayer( int layerIdx, + std::map& block, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); diff --git a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.cpp deleted file mode 100644 index 2e65033..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/reorgv5_layer.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "reorgv5_layer.h" - -nvinfer1::ILayer* reorgV5Layer( - int layerIdx, - nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::Dims prevTensorDims = input->getDimensions(); - - nvinfer1::ISliceLayer *slice1 = network->addSlice( - *input, - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2}, - nvinfer1::Dims3{1, 2, 2}); - assert(slice1 != nullptr); - std::string slice1LayerName = "slice1_" + std::to_string(layerIdx); - slice1->setName(slice1LayerName.c_str()); - - nvinfer1::ISliceLayer *slice2 = network->addSlice( - *input, - nvinfer1::Dims3{0, 1, 0}, - nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2}, - nvinfer1::Dims3{1, 2, 2}); - assert(slice2 != nullptr); - std::string slice2LayerName = "slice2_" + std::to_string(layerIdx); - slice2->setName(slice2LayerName.c_str()); - - nvinfer1::ISliceLayer *slice3 = network->addSlice( - *input, - nvinfer1::Dims3{0, 0, 1}, - nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2}, - nvinfer1::Dims3{1, 2, 2}); - assert(slice3 != nullptr); - std::string slice3LayerName = "slice3_" + std::to_string(layerIdx); - slice3->setName(slice3LayerName.c_str()); - - nvinfer1::ISliceLayer *slice4 = network->addSlice( - *input, - nvinfer1::Dims3{0, 1, 1}, - nvinfer1::Dims3{prevTensorDims.d[0], prevTensorDims.d[1] / 2, prevTensorDims.d[2] / 2}, - nvinfer1::Dims3{1, 2, 2}); - assert(slice4 != nullptr); - std::string slice4LayerName = "slice4_" + std::to_string(layerIdx); - slice4->setName(slice4LayerName.c_str()); - - std::vector concatInputs; - concatInputs.push_back (slice1->getOutput(0)); - concatInputs.push_back (slice2->getOutput(0)); - concatInputs.push_back (slice3->getOutput(0)); - concatInputs.push_back (slice4->getOutput(0)); - - nvinfer1::IConcatenationLayer* concat = - network->addConcatenation(concatInputs.data(), concatInputs.size()); - assert(concat != nullptr); - - return concat; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp index 5ca4844..1fda570 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp @@ -5,58 +5,73 @@ #include "route_layer.h" -nvinfer1::ILayer* routeLayer( +nvinfer1::ITensor* routeLayer( int layerIdx, + std::string& layers, std::map& block, std::vector tensorOutputs, nvinfer1::INetworkDefinition* network) { + nvinfer1::ITensor* output; + + assert(block.at("type") == "route"); + assert(block.find("layers") != block.end()); + std::string strLayers = block.at("layers"); std::vector idxLayers; size_t lastPos = 0, pos = 0; - while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { + while ((pos = strLayers.find(',', lastPos)) != std::string::npos) + { int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); - idxLayers.push_back (vL); + idxLayers.push_back(vL); lastPos = pos + 1; } - if (lastPos < strLayers.length()) { + if (lastPos < strLayers.length()) + { std::string lastV = trim(strLayers.substr(lastPos)); - if (!lastV.empty()) { - idxLayers.push_back (std::stoi(lastV)); - } + if (!lastV.empty()) + idxLayers.push_back(std::stoi(lastV)); } assert (!idxLayers.empty()); std::vector concatInputs; - for (int idxLayer : idxLayers) { - if (idxLayer < 0) { - idxLayer = tensorOutputs.size() + idxLayer; - } - assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size()); - concatInputs.push_back (tensorOutputs[idxLayer]); + for (uint i = 0; i < idxLayers.size(); ++i) + { + if (idxLayers[i] < 0) + idxLayers[i] = tensorOutputs.size() + idxLayers[i]; + assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size()); + concatInputs.push_back(tensorOutputs[idxLayers[i]]); + if (i < idxLayers.size() - 1) + layers += std::to_string(idxLayers[i]) + ", "; } + layers += std::to_string(idxLayers[idxLayers.size() - 1]); - nvinfer1::IConcatenationLayer* concat = - network->addConcatenation(concatInputs.data(), concatInputs.size()); + int axis = 0; + if (block.find("axis") != block.end()) + axis = std::stoi(block.at("axis")); + if (axis < 0) + axis = concatInputs[0]->getDimensions().nbDims + axis; + + nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); assert(concat != nullptr); - std::string concatLayerName = "route_" + std::to_string(layerIdx - 1); + std::string concatLayerName = "route_" + std::to_string(layerIdx); concat->setName(concatLayerName.c_str()); - concat->setAxis(0); + concat->setAxis(axis); + output = concat->getOutput(0); - nvinfer1::ILayer* output = concat; - - if (block.find("groups") != block.end()) { - nvinfer1::Dims prevTensorDims = output->getOutput(0)->getDimensions(); + if (block.find("groups") != block.end()) + { + nvinfer1::Dims prevTensorDims = output->getDimensions(); int groups = stoi(block.at("groups")); int group_id = stoi(block.at("group_id")); int startSlice = (prevTensorDims.d[0] / groups) * group_id; int channelSlice = (prevTensorDims.d[0] / groups); - nvinfer1::ISliceLayer* sl = network->addSlice( - *output->getOutput(0), - nvinfer1::Dims3{startSlice, 0, 0}, - nvinfer1::Dims3{channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}, - nvinfer1::Dims3{1, 1, 1}); - assert(sl != nullptr); - output = sl; + nvinfer1::ISliceLayer* slice = network->addSlice( + *output, nvinfer1::Dims{3, {startSlice, 0, 0}}, + nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}}); + assert(slice != nullptr); + std::string sliceLayerName = "slice_" + std::to_string(layerIdx); + slice->setName(sliceLayerName.c_str()); + output = slice->getOutput(0); } return output; diff --git a/nvdsinfer_custom_impl_Yolo/layers/route_layer.h b/nvdsinfer_custom_impl_Yolo/layers/route_layer.h index 409beee..9679365 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/route_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/route_layer.h @@ -9,8 +9,9 @@ #include "NvInfer.h" #include "../utils.h" -nvinfer1::ILayer* routeLayer( +nvinfer1::ITensor* routeLayer( int layerIdx, + std::string& layers, std::map& block, std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); diff --git a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp index 725d781..3e53cbe 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp @@ -5,40 +5,47 @@ #include "shortcut_layer.h" -nvinfer1::ILayer* shortcutLayer( +nvinfer1::ITensor* shortcutLayer( int layerIdx, + std::string mode, std::string activation, std::string inputVol, std::string shortcutVol, + std::map& block, nvinfer1::ITensor* input, - nvinfer1::ITensor* shortcutTensor, + nvinfer1::ITensor* shortcutInput, nvinfer1::INetworkDefinition* network) { - nvinfer1::ILayer* output; - nvinfer1::ITensor* outputTensor; + nvinfer1::ITensor* output; - if (inputVol != shortcutVol) + assert(block.at("type") == "shortcut"); + + nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM; + + if (mode == "mul") + operation = nvinfer1::ElementWiseOperation::kPROD; + + if (mode == "add" && inputVol != shortcutVol) { - nvinfer1::ISliceLayer* sl = network->addSlice( - *shortcutTensor, - nvinfer1::Dims3{0, 0, 0}, - input->getDimensions(), - nvinfer1::Dims3{1, 1, 1}); - assert(sl != nullptr); - outputTensor = sl->getOutput(0); - assert(outputTensor != nullptr); - } else + nvinfer1::ISliceLayer* slice = network->addSlice( + *shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}}); + assert(slice != nullptr); + std::string sliceLayerName = "slice_" + std::to_string(layerIdx); + slice->setName(sliceLayerName.c_str()); + output = slice->getOutput(0); + } + else { - outputTensor = shortcutTensor; - assert(outputTensor != nullptr); + output = shortcutInput; } - nvinfer1::IElementWiseLayer* ew = network->addElementWise( - *input, *outputTensor, - nvinfer1::ElementWiseOperation::kSUM); - assert(ew != nullptr); + nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation); + assert(shortcut != nullptr); + std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx); + shortcut->setName(shortcutLayerName.c_str()); + output = shortcut->getOutput(0); - output = activationLayer(layerIdx, activation, ew, ew->getOutput(0), network); + output = activationLayer(layerIdx, activation, output, network); assert(output != nullptr); return output; diff --git a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h index c564d3d..22195e8 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h @@ -6,17 +6,21 @@ #ifndef __SHORTCUT_LAYER_H__ #define __SHORTCUT_LAYER_H__ +#include + #include "NvInfer.h" #include "activation_layer.h" -nvinfer1::ILayer* shortcutLayer( +nvinfer1::ITensor* shortcutLayer( int layerIdx, + std::string mode, std::string activation, std::string inputVol, std::string shortcutVol, + std::map& block, nvinfer1::ITensor* input, - nvinfer1::ITensor* shortcutTensor, + nvinfer1::ITensor* shortcut, nvinfer1::INetworkDefinition* network); #endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp new file mode 100644 index 0000000..a967e46 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp @@ -0,0 +1,123 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "shuffle_layer.h" + +nvinfer1::ITensor* shuffleLayer( + int layerIdx, + std::string& layer, + std::map& block, + nvinfer1::ITensor* input, + std::vector tensorOutputs, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "shuffle"); + + nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); + assert(shuffle != nullptr); + std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); + shuffle->setName(shuffleLayerName.c_str()); + + if (block.find("reshape") != block.end()) + { + std::string strReshape = block.at("reshape"); + std::vector reshape; + size_t lastPos = 0, pos = 0; + while ((pos = strReshape.find(',', lastPos)) != std::string::npos) + { + int vL = std::stoi(trim(strReshape.substr(lastPos, pos - lastPos))); + reshape.push_back(vL); + lastPos = pos + 1; + } + if (lastPos < strReshape.length()) + { + std::string lastV = trim(strReshape.substr(lastPos)); + if (!lastV.empty()) + reshape.push_back(std::stoi(lastV)); + } + assert(!reshape.empty()); + + int from = -1; + if (block.find("from") != block.end()) + from = std::stoi(block.at("from")); + + if (from < 0) + from = tensorOutputs.size() + from; + + layer = std::to_string(from); + + nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions(); + int32_t l = inputTensorDims.d[1] * inputTensorDims.d[2]; + + nvinfer1::Dims reshapeDims; + reshapeDims.nbDims = reshape.size(); + + for (uint i = 0; i < reshape.size(); ++i) + if (reshape[i] == 0) + reshapeDims.d[i] = l; + else + reshapeDims.d[i] = reshape[i]; + + shuffle->setReshapeDimensions(reshapeDims); + } + + if (block.find("transpose1") != block.end()) + { + std::string strTranspose1 = block.at("transpose1"); + std::vector transpose1; + size_t lastPos = 0, pos = 0; + while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos) + { + int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos))); + transpose1.push_back(vL); + lastPos = pos + 1; + } + if (lastPos < strTranspose1.length()) + { + std::string lastV = trim(strTranspose1.substr(lastPos)); + if (!lastV.empty()) + transpose1.push_back(std::stoi(lastV)); + } + assert(!transpose1.empty()); + + nvinfer1::Permutation permutation1; + for (uint i = 0; i < transpose1.size(); ++i) + permutation1.order[i] = transpose1[i]; + + shuffle->setFirstTranspose(permutation1); + } + + if (block.find("transpose2") != block.end()) + { + std::string strTranspose2 = block.at("transpose2"); + std::vector transpose2; + size_t lastPos = 0, pos = 0; + while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos) + { + int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos))); + transpose2.push_back(vL); + lastPos = pos + 1; + } + if (lastPos < strTranspose2.length()) + { + std::string lastV = trim(strTranspose2.substr(lastPos)); + if (!lastV.empty()) + transpose2.push_back(std::stoi(lastV)); + } + assert(!transpose2.empty()); + + nvinfer1::Permutation permutation2; + for (uint i = 0; i < transpose2.size(); ++i) + permutation2.order[i] = transpose2[i]; + + shuffle->setSecondTranspose(permutation2); + } + + output = shuffle->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h new file mode 100644 index 0000000..53aa3ce --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h @@ -0,0 +1,20 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __SHUFFLE_LAYER_H__ +#define __SHUFFLE_LAYER_H__ + +#include "NvInfer.h" +#include "../utils.h" + +nvinfer1::ITensor* shuffleLayer( + int layerIdx, + std::string& layer, + std::map& block, + nvinfer1::ITensor* input, + std::vector tensorOutputs, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp new file mode 100644 index 0000000..cb6348d --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp @@ -0,0 +1,29 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "softmax_layer.h" + +nvinfer1::ITensor* softmaxLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "softmax"); + assert(block.find("axes") != block.end()); + + int axes = std::stoi(block.at("axes")); + + nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input); + assert(softmax != nullptr); + std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx); + softmax->setName(softmaxLayerName.c_str()); + softmax->setAxes(1 << axes); + output = softmax->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h new file mode 100644 index 0000000..0ca208e --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h @@ -0,0 +1,20 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __SOFTMAX_LAYER_H__ +#define __SOFTMAX_LAYER_H__ + +#include +#include + +#include "NvInfer.h" + +nvinfer1::ITensor* softmaxLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp index 066cce8..4e3614c 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp @@ -5,20 +5,28 @@ #include "upsample_layer.h" -nvinfer1::ILayer* upsampleLayer( +nvinfer1::ITensor* upsampleLayer( int layerIdx, std::map& block, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { + nvinfer1::ITensor* output; + assert(block.at("type") == "upsample"); + assert(block.find("stride") != block.end()); + int stride = std::stoi(block.at("stride")); - nvinfer1::IResizeLayer* resize_layer = network->addResize(*input); - resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST); float scale[3] = {1, static_cast(stride), static_cast(stride)}; - resize_layer->setScales(scale, 3); - std::string layer_name = "upsample_" + std::to_string(layerIdx); - resize_layer->setName(layer_name.c_str()); - return resize_layer; + + nvinfer1::IResizeLayer* resize = network->addResize(*input); + assert(resize != nullptr); + std::string resizeLayerName = "upsample_" + std::to_string(layerIdx); + resize->setName(resizeLayerName.c_str()); + resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + resize->setScales(scale, 3); + output = resize->getOutput(0); + + return output; } diff --git a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h index d1b7768..89e69bf 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h @@ -7,12 +7,11 @@ #define __UPSAMPLE_LAYER_H__ #include -#include #include #include "NvInfer.h" -nvinfer1::ILayer* upsampleLayer( +nvinfer1::ITensor* upsampleLayer( int layerIdx, std::map& block, nvinfer1::ITensor* input, diff --git a/nvdsinfer_custom_impl_Yolo/sortDetections.cu b/nvdsinfer_custom_impl_Yolo/sortDetections.cu index cda816a..e2a2480 100644 --- a/nvdsinfer_custom_impl_Yolo/sortDetections.cu +++ b/nvdsinfer_custom_impl_Yolo/sortDetections.cu @@ -37,13 +37,11 @@ cudaError_t sortDetections( float* _d_scores = reinterpret_cast(d_scores) + (batch * outputSize); int* _countData = reinterpret_cast(countData) + (batch); - int* _count = (int*)malloc(sizeof(int)); - cudaMemcpy(_count, (int*)&_countData[0], sizeof(int), cudaMemcpyDeviceToHost); - int count = _count[0]; + int count; + cudaMemcpy(&count, _countData, sizeof(int), cudaMemcpyDeviceToHost); if (count == 0) { - free(_count); return cudaGetLastError(); } @@ -72,13 +70,13 @@ cudaError_t sortDetections( int _topK = count < topK ? count : topK; - int threads_per_block = 0; - int number_of_blocks = 4; + int threads_per_block = 16; + int number_of_blocks = 0; - if (_topK % 2 == 0 && _topK >= number_of_blocks) - threads_per_block = _topK / number_of_blocks; + if (_topK % 2 == 0 && _topK >= threads_per_block) + number_of_blocks = _topK / threads_per_block; else - threads_per_block = (_topK / number_of_blocks) + 1; + number_of_blocks = (_topK / threads_per_block) + 1; sortOutput<<>>( _d_indexes, _d_scores, reinterpret_cast(d_boxes) + (batch * 4 * outputSize), @@ -89,8 +87,6 @@ cudaError_t sortDetections( cudaFree(d_keys_out); cudaFree(d_values_out); cudaFree(d_temp_storage); - - free(_count); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/utils.cpp b/nvdsinfer_custom_impl_Yolo/utils.cpp index 21d8117..b6431a1 100644 --- a/nvdsinfer_custom_impl_Yolo/utils.cpp +++ b/nvdsinfer_custom_impl_Yolo/utils.cpp @@ -132,11 +132,10 @@ std::string dimsToString(const nvinfer1::Dims d) { std::stringstream s; assert(d.nbDims >= 1); + s << "["; for (int i = 0; i < d.nbDims - 1; ++i) - { - s << std::setw(4) << d.d[i] << " x"; - } - s << std::setw(4) << d.d[d.nbDims - 1]; + s << d.d[i] << ", "; + s << d.d[d.nbDims - 1] << "]"; return s.str(); } @@ -152,10 +151,9 @@ int getNumChannels(nvinfer1::ITensor* t) void printLayerInfo( std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr) { - std::cout << std::setw(6) << std::left << layerIndex << std::setw(24) << std::left << layerName; - std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left - << layerOutput; - std::cout << std::setw(7) << std::left << weightPtr << std::endl; + std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName; + std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput; + std::cout << weightPtr << std::endl; } std::string getAbsPath(std::string path) diff --git a/nvdsinfer_custom_impl_Yolo/yolo.cpp b/nvdsinfer_custom_impl_Yolo/yolo.cpp index 2e7fb9f..c784250 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -158,7 +158,6 @@ NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition& network) { int weightPtr = 0; - int channels = m_InputC; std::string weightsType; if (m_WtsFilePath.find(".weights") != std::string::npos) @@ -172,81 +171,64 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::IN else if (m_NetworkType.find("yolor") != std::string::npos) eps = 1.0e-4; - nvinfer1::ITensor* data = - network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, - nvinfer1::Dims3{static_cast(m_InputC), - static_cast(m_InputH), static_cast(m_InputW)}); + nvinfer1::ITensor* data = network.addInput( + m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, + nvinfer1::Dims{3, {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}}); assert(data != nullptr && data->getDimensions().nbDims > 0); nvinfer1::ITensor* previous = data; std::vector tensorOutputs; - std::vector yoloInputs; - uint inputYoloCount = 0; + + nvinfer1::ITensor* yoloTensorInputs[m_YoloCount]; + uint yoloCountInputs = 0; int modelType = -1; for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { - assert(getNumChannels(previous) == channels); std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; if (m_ConfigBlocks.at(i).at("type") == "net") - printLayerInfo("", "layer", " input", " output", "weightPtr"); + printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr"); else if (m_ConfigBlocks.at(i).at("type") == "convolutional") { + int channels = getNumChannels(previous); std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = convolutionalLayer( + previous = convolutionalLayer( i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, previous, &network); - previous = out->getOutput(0); assert(previous != nullptr); - channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerType = "conv_" + m_ConfigBlocks.at(i).at("activation"); - printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); + std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation"); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } else if (m_ConfigBlocks.at(i).at("type") == "batchnorm") { std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = batchnormLayer( + previous = batchnormLayer( i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network); - previous = out->getOutput(0); assert(previous != nullptr); - channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerType = "bn_" + m_ConfigBlocks.at(i).at("activation"); - printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); + std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation"); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul") { - std::string type; - if (m_ConfigBlocks.at(i).at("type") == "implicit_add") - type = "add"; - else if (m_ConfigBlocks.at(i).at("type") == "implicit_mul") - type = "mul"; - assert(m_ConfigBlocks.at(i).find("filters") != m_ConfigBlocks.at(i).end()); - int filters = std::stoi(m_ConfigBlocks.at(i).at("filters")); - nvinfer1::ILayer* out = implicitLayer(filters, weights, m_TrtWeights, weightPtr, &network); - previous = out->getOutput(0); + previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network); assert(previous != nullptr); - channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerType = "implicit_" + type; - printLayerInfo(layerIndex, layerType, " -", outputVol, std::to_string(weightPtr)); + std::string layerName = m_ConfigBlocks.at(i).at("type"); + printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr)); } - else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels") + else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || + m_ConfigBlocks.at(i).at("type") == "control_channels") { - std::string type; - if (m_ConfigBlocks.at(i).at("type") == "shift_channels") - type = "shift"; - else if (m_ConfigBlocks.at(i).at("type") == "control_channels") - type = "control"; assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); int from = stoi(m_ConfigBlocks.at(i).at("from")); if (from > 0) @@ -254,140 +236,193 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::IN assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); assert(i + from - 1 < i - 2); - nvinfer1::ILayer* out = channelsLayer(type, previous, tensorOutputs[i + from - 1], &network); - previous = out->getOutput(0); + + std::string inputVol = dimsToString(previous->getDimensions()); + previous = channelsLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerType = type + "_channels" + ": " + std::to_string(i + from - 1); - printLayerInfo(layerIndex, layerType, " -", outputVol, " -"); - } - - else if (m_ConfigBlocks.at(i).at("type") == "dropout") - { - // Skip dropout layer - assert(previous != nullptr); - tensorOutputs.push_back(previous); - printLayerInfo(layerIndex, "dropout", " -", " -", " -"); + std::string layerName = m_ConfigBlocks.at(i).at("type") + ": " + std::to_string(i + from - 1); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "shortcut") { - assert(m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()); assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); - std::string activation = m_ConfigBlocks.at(i).at("activation"); int from = stoi(m_ConfigBlocks.at(i).at("from")); if (from > 0) from = from - i + 1; assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); assert(i + from - 1 < i - 2); + + std::string mode = "add"; + if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end()) + mode = m_ConfigBlocks.at(i).at("mode"); + + std::string activation = "linear"; + if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) + activation = m_ConfigBlocks.at(i).at("activation"); + std::string inputVol = dimsToString(previous->getDimensions()); std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions()); - nvinfer1::ILayer* out = shortcutLayer(i, activation, inputVol, shortcutVol, previous, tensorOutputs[i + from - 1], &network); - previous = out->getOutput(0); + previous = shortcutLayer( + i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], + &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerType = "shortcut_" + m_ConfigBlocks.at(i).at("activation") + ": " + std::to_string(i + from - 1); - printLayerInfo(layerIndex, layerType, " -", outputVol, " -"); - if (inputVol != shortcutVol) { + std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + + if (mode == "add" && inputVol != shortcutVol) std::cout << inputVol << " +" << shortcutVol << std::endl; - } } else if (m_ConfigBlocks.at(i).at("type") == "route") { - assert(m_ConfigBlocks.at(i).find("layers") != m_ConfigBlocks.at(i).end()); - nvinfer1::ILayer* out = routeLayer(i, m_ConfigBlocks.at(i), tensorOutputs, &network); - previous = out->getOutput(0); + std::string layers; + previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network); assert(previous != nullptr); - channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); + std::string layerName = "route: " + layers; + printLayerInfo(layerIndex, layerName, "-", outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "upsample") { std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network); - previous = out->getOutput(0); + previous = upsampleLayer(i, m_ConfigBlocks[i], previous, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); + std::string layerName = "upsample"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } - else if (m_ConfigBlocks.at(i).at("type") == "maxpool") + else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool") { std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = maxpoolLayer(i, m_ConfigBlocks.at(i), previous, &network); - previous = out->getOutput(0); + previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); + std::string layerName = m_ConfigBlocks.at(i).at("type"); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "reorg") { - if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolor") != std::string::npos) - { - std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = reorgV5Layer(i, previous, &network); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - std::string layerType = "reorgV5"; - printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); - } - else - { - std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); - assert(reorgPlugin != nullptr); - nvinfer1::IPluginV2Layer* reorg = - network.addPluginV2(&previous, 1, *reorgPlugin); - assert(reorg != nullptr); - std::string layerName = "reorg_" + std::to_string(i); - reorg->setName(layerName.c_str()); - previous = reorg->getOutput(0); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - channels = getNumChannels(previous); - tensorOutputs.push_back(reorg->getOutput(0)); - printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr)); - } + std::string inputVol = dimsToString(previous->getDimensions()); + previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "reorg"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "reduce") + { + std::string inputVol = dimsToString(previous->getDimensions()); + previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "reduce"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "shuffle") + { + std::string layer; + std::string inputVol = dimsToString(previous->getDimensions()); + previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "shuffle: " + layer; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "softmax") + { + std::string inputVol = dimsToString(previous->getDimensions()); + previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "softmax"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") { if (m_ConfigBlocks.at(i).at("type") == "yolo") - { if (m_NetworkType.find("yolor") != std::string::npos) modelType = 2; else modelType = 1; - } else modelType = 0; - std::string layerName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i); + std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i); nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_YoloTensors.at(inputYoloCount); - curYoloTensor.blobName = layerName; + TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); + curYoloTensor.blobName = blobName; curYoloTensor.gridSizeX = prevTensorDims.d[2]; curYoloTensor.gridSizeY = prevTensorDims.d[1]; std::string inputVol = dimsToString(previous->getDimensions()); - channels = getNumChannels(previous); tensorOutputs.push_back(previous); - yoloInputs.push_back(previous); - ++inputYoloCount; - printLayerInfo(layerIndex, modelType != 0 ? "yolo" : "region", inputVol, " -", " -"); + yoloTensorInputs[yoloCountInputs] = previous; + ++yoloCountInputs; + std::string layerName = modelType != 0 ? "yolo" : "region"; + printLayerInfo(layerIndex, layerName, inputVol, "-", "-"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "cls") + { + modelType = 3; + + std::string blobName = "cls_" + std::to_string(i); + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); + curYoloTensor.blobName = blobName; + curYoloTensor.numBBoxes = prevTensorDims.d[1]; + m_NumClasses = prevTensorDims.d[0]; + + std::string inputVol = dimsToString(previous->getDimensions()); + previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + yoloTensorInputs[yoloCountInputs] = previous; + ++yoloCountInputs; + std::string layerName = "cls"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "reg") + { + modelType = 3; + + std::string blobName = "reg_" + std::to_string(i); + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); + curYoloTensor.blobName = blobName; + curYoloTensor.numBBoxes = prevTensorDims.d[1]; + + std::string inputVol = dimsToString(previous->getDimensions()); + previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + yoloTensorInputs[yoloCountInputs] = previous; + ++yoloCountInputs; + std::string layerName = "reg"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } else @@ -403,17 +438,18 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::IN assert(0); } - if (m_YoloCount == inputYoloCount) + if (m_YoloCount == yoloCountInputs) { assert((modelType != -1) && "\nCould not determine model type"); - nvinfer1::ITensor* yoloInputTensors[inputYoloCount]; uint64_t outputSize = 0; - for (uint j = 0; j < inputYoloCount; ++j) + for (uint j = 0; j < yoloCountInputs; ++j) { - yoloInputTensors[j] = yoloInputs[j]; TensorInfo& curYoloTensor = m_YoloTensors.at(j); - outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes; + if (modelType == 3) + outputSize = curYoloTensor.numBBoxes; + else + outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes; } if (m_TopK > outputSize) { @@ -422,21 +458,15 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::IN assert(0); } - std::string layerName = "yolo"; nvinfer1::IPluginV2* yoloPlugin = new YoloLayer( - m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_TopK, - m_ScoreThreshold); + m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_TopK, m_ScoreThreshold); assert(yoloPlugin != nullptr); - nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloInputTensors, inputYoloCount, *yoloPlugin); + nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin); assert(yolo != nullptr); - yolo->setName(layerName.c_str()); - previous = yolo->getOutput(0); - assert(previous != nullptr); - previous->setName(layerName.c_str()); - tensorOutputs.push_back(yolo->getOutput(0)); + std::string yoloLayerName = "yolo"; + yolo->setName(yoloLayerName.c_str()); - nvinfer1::ITensor* yoloTensors[] = {yolo->getOutput(0), yolo->getOutput(1)}; - std::string outputVol = dimsToString(previous->getDimensions()); + nvinfer1::ITensor* yoloTensorOutputs[] = {yolo->getOutput(0), yolo->getOutput(1)}; nvinfer1::plugin::NMSParameters nmsParams; nmsParams.shareLocation = true; @@ -448,28 +478,28 @@ NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::IN nmsParams.iouThreshold = m_IouThreshold; nmsParams.isNormalized = false; - layerName = "batchedNMS"; + std::string nmslayerName = "batchedNMS"; nvinfer1::IPluginV2* batchedNMS = createBatchedNMSPlugin(nmsParams); - nvinfer1::IPluginV2Layer* nms = network.addPluginV2(yoloTensors, 2, *batchedNMS); - nms->setName(layerName.c_str()); + nvinfer1::IPluginV2Layer* nms = network.addPluginV2(yoloTensorOutputs, 2, *batchedNMS); + nms->setName(nmslayerName.c_str()); nvinfer1::ITensor* num_detections = nms->getOutput(0); - layerName = "num_detections"; - num_detections->setName(layerName.c_str()); + nmslayerName = "num_detections"; + num_detections->setName(nmslayerName.c_str()); nvinfer1::ITensor* nmsed_boxes = nms->getOutput(1); - layerName = "nmsed_boxes"; - nmsed_boxes->setName(layerName.c_str()); + nmslayerName = "nmsed_boxes"; + nmsed_boxes->setName(nmslayerName.c_str()); nvinfer1::ITensor* nmsed_scores = nms->getOutput(2); - layerName = "nmsed_scores"; - nmsed_scores->setName(layerName.c_str()); + nmslayerName = "nmsed_scores"; + nmsed_scores->setName(nmslayerName.c_str()); nvinfer1::ITensor* nmsed_classes = nms->getOutput(3); - layerName = "nmsed_classes"; - nmsed_classes->setName(layerName.c_str()); + nmslayerName = "nmsed_classes"; + nmsed_classes->setName(nmslayerName.c_str()); network.markOutput(*num_detections); network.markOutput(*nmsed_boxes); network.markOutput(*nmsed_scores); network.markOutput(*nmsed_classes); - printLayerInfo("", "batched_nms", " -", outputVol, " -"); + printLayerInfo("", "batched_nms", "-", "-", "-"); } else { std::cout << "\nError in yolo cfg file" << std::endl; @@ -620,6 +650,12 @@ void Yolo::parseConfigBlocks() m_YoloTensors.push_back(outputTensor); } + else if ((block.at("type") == "cls") || (block.at("type") == "reg")) + { + ++m_YoloCount; + TensorInfo outputTensor; + m_YoloTensors.push_back(outputTensor); + } } } @@ -640,9 +676,7 @@ void Yolo::parseConfigNMSBlocks() void Yolo::destroyNetworkUtils() { for (uint i = 0; i < m_TrtWeights.size(); ++i) - { if (m_TrtWeights[i].count > 0) free(const_cast(m_TrtWeights[i].values)); - } m_TrtWeights.clear(); } diff --git a/nvdsinfer_custom_impl_Yolo/yolo.h b/nvdsinfer_custom_impl_Yolo/yolo.h index 038a2ec..1dd75f5 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/nvdsinfer_custom_impl_Yolo/yolo.h @@ -33,8 +33,13 @@ #include "layers/shortcut_layer.h" #include "layers/route_layer.h" #include "layers/upsample_layer.h" -#include "layers/maxpool_layer.h" -#include "layers/reorgv5_layer.h" +#include "layers/pooling_layer.h" +#include "layers/reorg_layer.h" +#include "layers/reduce_layer.h" +#include "layers/shuffle_layer.h" +#include "layers/softmax_layer.h" +#include "layers/cls_layer.h" +#include "layers/reg_layer.h" #include "nvdsinfer_custom_impl.h" diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/nvdsinfer_custom_impl_Yolo/yoloForward.cu index fb03854..a3d106f 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -28,7 +28,7 @@ __global__ void gpuYoloLayer( if (objectness < scoreThreshold) return; - int count = (int)atomicAdd(&countData[0], 1); + int count = (int)atomicAdd(countData, 1); const float alpha = scaleXY; const float beta = -0.5 * (scaleXY - 1); diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu new file mode 100644 index 0000000..2d34d39 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu @@ -0,0 +1,73 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include +#include + +__global__ void gpuYoloLayer_e( + const float* cls, const float* reg, int* d_indexes, float* d_scores, float* d_boxes, int* d_classes, int* countData, + const float scoreThreshold, const uint netWidth, const uint netHeight, const uint numOutputClasses, + const uint64_t outputSize) +{ + uint x_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (x_id >= outputSize) + return; + + float maxProb = 0.0f; + int maxIndex = -1; + + for (uint i = 0; i < numOutputClasses; ++i) + { + float prob + = cls[x_id * numOutputClasses + i]; + + if (prob > maxProb) + { + maxProb = prob; + maxIndex = i; + } + } + + if (maxProb < scoreThreshold) + return; + + int count = (int)atomicAdd(countData, 1); + + d_indexes[count] = count; + d_scores[count] = maxProb + 1.f; + d_boxes[count * 4 + 0] = reg[x_id * 4 + 0]; + d_boxes[count * 4 + 1] = reg[x_id * 4 + 1]; + d_boxes[count * 4 + 2] = reg[x_id * 4 + 2]; + d_boxes[count * 4 + 3] = reg[x_id * 4 + 3]; + d_classes[count] = maxIndex; +} + +cudaError_t cudaYoloLayer_e( + const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData, + const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight, + const uint& numOutputClasses, cudaStream_t stream); + +cudaError_t cudaYoloLayer_e( + const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData, + const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight, + const uint& numOutputClasses, cudaStream_t stream) +{ + int threads_per_block = 16; + int number_of_blocks = 525; + + for (unsigned int batch = 0; batch < batchSize; ++batch) + { + gpuYoloLayer_e<<>>( + reinterpret_cast(cls) + (batch * numOutputClasses * outputSize), + reinterpret_cast(reg) + (batch * 4 * outputSize), + reinterpret_cast(d_indexes) + (batch * outputSize), + reinterpret_cast(d_scores) + (batch * outputSize), + reinterpret_cast(d_boxes) + (batch * 4 * outputSize), + reinterpret_cast(d_classes) + (batch * outputSize), reinterpret_cast(countData) + (batch), + scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize); + } + return cudaGetLastError(); +} diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu index ff850c3..9b81910 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu @@ -26,7 +26,7 @@ __global__ void gpuYoloLayer_nc( if (objectness < scoreThreshold) return; - int count = (int)atomicAdd(&countData[0], 1); + int count = (int)atomicAdd(countData, 1); const float alpha = scaleXY; const float beta = -0.5 * (scaleXY - 1); diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu index 46bda23..cbc1aaf 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu @@ -28,7 +28,7 @@ __global__ void gpuYoloLayer_r( if (objectness < scoreThreshold) return; - int count = (int)atomicAdd(&countData[0], 1); + int count = (int)atomicAdd(countData, 1); const float alpha = scaleXY; const float beta = -0.5 * (scaleXY - 1); diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu index a1e0cbc..43c57d9 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu @@ -49,7 +49,7 @@ __global__ void gpuRegionLayer( if (objectness < scoreThreshold) return; - int count = (int)atomicAdd(&countData[0], 1); + int count = (int)atomicAdd(countData, 1); float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp index 504e68c..75dfe25 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -47,6 +47,11 @@ namespace { } } +cudaError_t cudaYoloLayer_e( + const void* cls, const void* reg, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData, + const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, const uint& netHeight, + const uint& numOutputClasses, cudaStream_t stream); + cudaError_t cudaYoloLayer_r( const void* input, void* d_indexes, void* d_scores, void* d_boxes, void* d_classes, void* countData, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, @@ -88,34 +93,36 @@ YoloLayer::YoloLayer (const void* data, size_t length) read(d, m_TopK); read(d, m_ScoreThreshold); - uint yoloTensorsSize; - read(d, yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) - { - TensorInfo curYoloTensor; - read(d, curYoloTensor.gridSizeX); - read(d, curYoloTensor.gridSizeY); - read(d, curYoloTensor.numBBoxes); - read(d, curYoloTensor.scaleXY); - - uint anchorsSize; - read(d, anchorsSize); - for (uint j = 0; j < anchorsSize; j++) + if (m_Type != 3) { + uint yoloTensorsSize; + read(d, yoloTensorsSize); + for (uint i = 0; i < yoloTensorsSize; ++i) { - float result; - read(d, result); - curYoloTensor.anchors.push_back(result); - } + TensorInfo curYoloTensor; + read(d, curYoloTensor.gridSizeX); + read(d, curYoloTensor.gridSizeY); + read(d, curYoloTensor.numBBoxes); + read(d, curYoloTensor.scaleXY); - uint maskSize; - read(d, maskSize); - for (uint j = 0; j < maskSize; j++) - { - int result; - read(d, result); - curYoloTensor.mask.push_back(result); + uint anchorsSize; + read(d, anchorsSize); + for (uint j = 0; j < anchorsSize; j++) + { + float result; + read(d, result); + curYoloTensor.anchors.push_back(result); + } + + uint maskSize; + read(d, maskSize); + for (uint j = 0; j < maskSize; j++) + { + int result; + read(d, result); + curYoloTensor.mask.push_back(result); + } + m_YoloTensors.push_back(curYoloTensor); } - m_YoloTensors.push_back(curYoloTensor); } kNUM_CLASSES = m_NumClasses; @@ -147,9 +154,9 @@ YoloLayer::getOutputDimensions( { assert(index < 3); if (index == 0) { - return nvinfer1::Dims3(m_TopK, 1, 4); + return nvinfer1::Dims{3, {static_cast(m_TopK), 1, 4}}; } - return nvinfer1::DimsHW(m_TopK, m_NumClasses); + return nvinfer1::Dims{2, {static_cast(m_TopK), static_cast(m_NumClasses)}}; } bool YoloLayer::supportsFormat ( @@ -173,95 +180,106 @@ int32_t YoloLayer::enqueue ( int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { - void* countData = workspace; void* bboxData = outputs[0]; void* scoreData = outputs[1]; - CUDA_CHECK(cudaMemsetAsync((int*)countData, 0, sizeof(int) * batchSize, stream)); CUDA_CHECK(cudaMemsetAsync((float*)bboxData, 0, sizeof(float) * m_TopK * 4 * batchSize, stream)); CUDA_CHECK(cudaMemsetAsync((float*)scoreData, 0, sizeof(float) * m_TopK * m_NumClasses * batchSize, stream)); + void* countData; + CUDA_CHECK(cudaMalloc(&countData, sizeof(int) * batchSize)); + CUDA_CHECK(cudaMemsetAsync((int*)countData, 0, sizeof(int) * batchSize, stream)); + void* d_indexes; - CUDA_CHECK(cudaMallocHost(&d_indexes, sizeof(int) * m_OutputSize * batchSize)); - CUDA_CHECK(cudaMemsetAsync((float*)d_indexes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); + CUDA_CHECK(cudaMalloc(&d_indexes, sizeof(int) * m_OutputSize * batchSize)); + CUDA_CHECK(cudaMemsetAsync((int*)d_indexes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); void* d_scores; - CUDA_CHECK(cudaMallocHost(&d_scores, sizeof(float) * m_OutputSize * batchSize)); + CUDA_CHECK(cudaMalloc(&d_scores, sizeof(float) * m_OutputSize * batchSize)); CUDA_CHECK(cudaMemsetAsync((float*)d_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream)); void* d_boxes; - CUDA_CHECK(cudaMallocHost(&d_boxes, sizeof(float) * m_OutputSize * 4 * batchSize)); + CUDA_CHECK(cudaMalloc(&d_boxes, sizeof(float) * m_OutputSize * 4 * batchSize)); CUDA_CHECK(cudaMemsetAsync((float*)d_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream)); void* d_classes; - CUDA_CHECK(cudaMallocHost(&d_classes, sizeof(int) * m_OutputSize * batchSize)); + CUDA_CHECK(cudaMalloc(&d_classes, sizeof(int) * m_OutputSize * batchSize)); CUDA_CHECK(cudaMemsetAsync((float*)d_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); - uint yoloTensorsSize = m_YoloTensors.size(); - for (uint i = 0; i < yoloTensorsSize; ++i) + if (m_Type == 3) { - TensorInfo& curYoloTensor = m_YoloTensors.at(i); + CUDA_CHECK(cudaYoloLayer_e( + inputs[0], inputs[1], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, m_OutputSize, + m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream)); + } + else + { + uint yoloTensorsSize = m_YoloTensors.size(); + for (uint i = 0; i < yoloTensorsSize; ++i) + { + TensorInfo& curYoloTensor = m_YoloTensors.at(i); - uint numBBoxes = curYoloTensor.numBBoxes; - float scaleXY = curYoloTensor.scaleXY; - uint gridSizeX = curYoloTensor.gridSizeX; - uint gridSizeY = curYoloTensor.gridSizeY; - std::vector anchors = curYoloTensor.anchors; - std::vector mask = curYoloTensor.mask; + uint numBBoxes = curYoloTensor.numBBoxes; + float scaleXY = curYoloTensor.scaleXY; + uint gridSizeX = curYoloTensor.gridSizeX; + uint gridSizeY = curYoloTensor.gridSizeY; + std::vector anchors = curYoloTensor.anchors; + std::vector mask = curYoloTensor.mask; - void* v_anchors; - void* v_mask; - if (anchors.size() > 0) { - float* f_anchors = anchors.data(); - CUDA_CHECK(cudaMallocHost(&v_anchors, sizeof(float) * anchors.size())); - CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice)); - } - if (mask.size() > 0) { - int* f_mask = mask.data(); - CUDA_CHECK(cudaMallocHost(&v_mask, sizeof(int) * mask.size())); - CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice)); - } + void* v_anchors; + void* v_mask; + if (anchors.size() > 0) { + float* f_anchors = anchors.data(); + CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); + CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice)); + } + if (mask.size() > 0) { + int* f_mask = mask.data(); + CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size())); + CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice)); + } - uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); + uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); - if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0 - CUDA_CHECK(cudaYoloLayer_r( - inputs[i], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, - m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, 2.0, v_anchors, - v_mask, stream)); - } - else if (m_Type == 1) { - if (m_NewCoords) { - CUDA_CHECK(cudaYoloLayer_nc( + if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0 + CUDA_CHECK(cudaYoloLayer_r( inputs[i], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, - m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, - v_anchors, v_mask, stream)); + m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, 2.0, v_anchors, + v_mask, stream)); + } + else if (m_Type == 1) { + if (m_NewCoords) { + CUDA_CHECK(cudaYoloLayer_nc( + inputs[i], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, + m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, + v_anchors, v_mask, stream)); + } + else { + CUDA_CHECK(cudaYoloLayer( + inputs[i], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, + m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, + v_anchors, v_mask, stream)); + } } else { - CUDA_CHECK(cudaYoloLayer( - inputs[i], d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, - m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, - v_anchors, v_mask, stream)); + void* softmax; + CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize)); + CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize)); + + CUDA_CHECK(cudaRegionLayer( + inputs[i], softmax, d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, + m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, + stream)); + + CUDA_CHECK(cudaFree(softmax)); } - } - else { - void* softmax; - CUDA_CHECK(cudaMallocHost(&softmax, sizeof(float) * inputSize * batchSize)); - CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize)); - CUDA_CHECK(cudaRegionLayer( - inputs[i], softmax, d_indexes, d_scores, d_boxes, d_classes, countData, batchSize, inputSize, m_OutputSize, - m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, - stream)); - - CUDA_CHECK(cudaFreeHost(softmax)); - } - - if (anchors.size() > 0) { - CUDA_CHECK(cudaFreeHost(v_anchors)); - } - if (mask.size() > 0) { - CUDA_CHECK(cudaFreeHost(v_mask)); + if (anchors.size() > 0) { + CUDA_CHECK(cudaFree(v_anchors)); + } + if (mask.size() > 0) { + CUDA_CHECK(cudaFree(v_mask)); + } } } @@ -269,10 +287,11 @@ int32_t YoloLayer::enqueue ( d_indexes, d_scores, d_boxes, d_classes, bboxData, scoreData, countData, batchSize, m_OutputSize, m_TopK, m_NumClasses, stream)); - CUDA_CHECK(cudaFreeHost(d_indexes)); - CUDA_CHECK(cudaFreeHost(d_scores)); - CUDA_CHECK(cudaFreeHost(d_boxes)); - CUDA_CHECK(cudaFreeHost(d_classes)); + CUDA_CHECK(cudaFree(countData)); + CUDA_CHECK(cudaFree(d_indexes)); + CUDA_CHECK(cudaFree(d_scores)); + CUDA_CHECK(cudaFree(d_boxes)); + CUDA_CHECK(cudaFree(d_classes)); return 0; } @@ -290,18 +309,20 @@ size_t YoloLayer::getSerializationSize() const noexcept totalSize += sizeof(m_TopK); totalSize += sizeof(m_ScoreThreshold); - uint yoloTensorsSize = m_YoloTensors.size(); - totalSize += sizeof(yoloTensorsSize); + if (m_Type != 3) { + uint yoloTensorsSize = m_YoloTensors.size(); + totalSize += sizeof(yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) - { - const TensorInfo& curYoloTensor = m_YoloTensors.at(i); - totalSize += sizeof(curYoloTensor.gridSizeX); - totalSize += sizeof(curYoloTensor.gridSizeY); - totalSize += sizeof(curYoloTensor.numBBoxes); - totalSize += sizeof(curYoloTensor.scaleXY); - totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); - totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); + for (uint i = 0; i < yoloTensorsSize; ++i) + { + const TensorInfo& curYoloTensor = m_YoloTensors.at(i); + totalSize += sizeof(curYoloTensor.gridSizeX); + totalSize += sizeof(curYoloTensor.gridSizeY); + totalSize += sizeof(curYoloTensor.numBBoxes); + totalSize += sizeof(curYoloTensor.scaleXY); + totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); + totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); + } } return totalSize; @@ -320,28 +341,30 @@ void YoloLayer::serialize(void* buffer) const noexcept write(d, m_TopK); write(d, m_ScoreThreshold); - uint yoloTensorsSize = m_YoloTensors.size(); - write(d, yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) - { - const TensorInfo& curYoloTensor = m_YoloTensors.at(i); - write(d, curYoloTensor.gridSizeX); - write(d, curYoloTensor.gridSizeY); - write(d, curYoloTensor.numBBoxes); - write(d, curYoloTensor.scaleXY); - - uint anchorsSize = curYoloTensor.anchors.size(); - write(d, anchorsSize); - for (uint j = 0; j < anchorsSize; ++j) + if (m_Type != 3) { + uint yoloTensorsSize = m_YoloTensors.size(); + write(d, yoloTensorsSize); + for (uint i = 0; i < yoloTensorsSize; ++i) { - write(d, curYoloTensor.anchors[j]); - } + const TensorInfo& curYoloTensor = m_YoloTensors.at(i); + write(d, curYoloTensor.gridSizeX); + write(d, curYoloTensor.gridSizeY); + write(d, curYoloTensor.numBBoxes); + write(d, curYoloTensor.scaleXY); - uint maskSize = curYoloTensor.mask.size(); - write(d, maskSize); - for (uint j = 0; j < maskSize; ++j) - { - write(d, curYoloTensor.mask[j]); + uint anchorsSize = curYoloTensor.anchors.size(); + write(d, anchorsSize); + for (uint j = 0; j < anchorsSize; ++j) + { + write(d, curYoloTensor.anchors[j]); + } + + uint maskSize = curYoloTensor.mask.size(); + write(d, maskSize); + for (uint j = 0; j < maskSize; ++j) + { + write(d, curYoloTensor.mask[j]); + } } } } diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index e7ae524..2948e2d 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -85,9 +85,7 @@ public: void terminate () noexcept override {} - size_t getWorkspaceSize (int maxBatchSize) const noexcept override { - return maxBatchSize * sizeof(int); - } + size_t getWorkspaceSize (int maxBatchSize) const noexcept override { return 0; } int32_t enqueue ( int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) diff --git a/readme.md b/readme.md index 1916a8c..e38a7e1 100644 --- a/readme.md +++ b/readme.md @@ -7,7 +7,6 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models * Models benchmarks * DeepStream tutorials * YOLOX support -* PP-YOLO support * YOLOv6 support * YOLOv7 support * Dynamic batch-size @@ -23,18 +22,18 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models * Support for INT8 calibration * Support for non square models * New documentation for multiple models -* **YOLOv5 >= 2.0 support** -* **YOLOR support** +* YOLOv5 support +* YOLOR support * **GPU YOLO Decoder** [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138) * **GPU Batched NMS** [#142](https://github.com/marcoslucianops/DeepStream-Yolo/issues/142) -* **New YOLOv5 conversion** +* **PP-YOLOE support** ## ### Getting started * [Requirements](#requirements) -* [Tested models](#tested-models) +* [Suported models](#supported-models) * [Benchmarks](#benchmarks) * [dGPU installation](#dgpu-installation) * [Basic usage](#basic-usage) @@ -42,6 +41,7 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models * [INT8 calibration](#int8-calibration) * [YOLOv5 usage](docs/YOLOv5.md) * [YOLOR usage](docs/YOLOR.md) +* [PP-YOLOE usage](docs/PPYOLOE.md) * [Using your custom model](docs/customModels.md) * [Multiple YOLO GIEs](docs/multipleGIEs.md) @@ -81,23 +81,14 @@ NVIDIA DeepStream SDK 6.1 / 6.0.1 / 6.0 configuration for YOLO models * [NVIDIA DeepStream SDK 6.0.1 / 6.0](https://developer.nvidia.com/embedded/deepstream-on-jetson-downloads-archived) * [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) -### For YOLOv5 and YOLOR - -#### x86 platform - -* [PyTorch >= 1.7.0](https://pytorch.org/get-started/locally/) - -#### Jetson platform - -* [PyTorch >= 1.7.0](https://forums.developer.nvidia.com/t/pytorch-for-jetson-version-1-11-now-available/72048) - ## -### Tested models +### Suported models * [Darknet YOLO](https://github.com/AlexeyAB/darknet) * [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5) * [YOLOR](https://github.com/WongKinYiu/yolor) +* [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) * [MobileNet-YOLO](https://github.com/dog-qiuqiu/MobileNet-Yolo) * [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest) diff --git a/utils/gen_wts_ppyoloe.py b/utils/gen_wts_ppyoloe.py new file mode 100644 index 0000000..85fe20d --- /dev/null +++ b/utils/gen_wts_ppyoloe.py @@ -0,0 +1,437 @@ +import os +import struct +import paddle +import numpy as np +from ppdet.core.workspace import load_config, merge_config +from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.engine import Trainer +from ppdet.slim import build_slim_model + +class Layers(object): + def __init__(self, size, fw, fc, letter_box): + self.blocks = [0 for _ in range(300)] + self.current = -1 + + self.backbone_outs = [] + self.neck_fpn_feats = [] + self.neck_pan_feats = [] + self.yolo_head_cls = [] + self.yolo_head_reg = [] + + self.width = size[0] if len(size) == 1 else size[1] + self.height = size[0] + self.letter_box = letter_box + + self.fw = fw + self.fc = fc + self.wc = 0 + + self.net() + + def ConvBNLayer(self, child): + self.current += 1 + + self.convolutional(child, act='swish') + + def CSPResStage(self, child, ret): + self.current += 1 + + if child.conv_down is not None: + self.convolutional(child.conv_down, act='swish') + self.convolutional(child.conv1, act='swish') + self.route('-2') + self.convolutional(child.conv2, act='swish') + idx = -3 + for m in child.blocks: + self.convolutional(m.conv1, act='swish') + self.convolutional(m.conv2, act='swish') + self.shortcut(-3) + idx -= 3 + self.route('%d, -1' % idx) + if child.attn is not None: + self.reduce((1, 2), mode='mean', keepdim=True) + self.convolutional(child.attn.fc, act='hardsigmoid') + self.shortcut(-3, ew='mul') + self.convolutional(child.conv3, act='swish') + if ret is True: + self.backbone_outs.append(self.current) + + def CSPStage(self, child, stage): + self.current += 1 + + self.convolutional(child.conv1, act='swish') + self.route('-2') + self.convolutional(child.conv2, act='swish') + idx = -3 + for m in child.convs: + if m.__class__.__name__ == 'BasicBlock': + self.convolutional(m.conv1, act='swish') + self.convolutional(m.conv2, act='swish') + idx -= 2 + elif m.__class__.__name__ == 'SPP': + self.maxpool(m.pool0) + self.route('-2') + self.maxpool(m.pool1) + self.route('-4') + self.maxpool(m.pool2) + self.route('-6, -5, -3, -1') + self.convolutional(m.conv, act='swish') + idx -= 7 + self.route('%d, -1' % idx) + self.convolutional(child.conv3, act='swish') + if stage == 'fpn': + self.neck_fpn_feats.append(self.current) + elif stage == 'pan': + self.neck_pan_feats.append(self.current) + + def Concat(self, route): + self.current += 1 + + r = self.get_route(route) + self.route('-1, %d' % r) + + def Upsample(self): + self.current += 1 + + self.upsample() + + def AvgPool2d(self, route=None): + self.current += 1 + + if route is not None: + r = self.get_route(route) + self.route('%d' % r) + self.avgpool() + + def ESEAttn(self, child, route=0): + self.current += 1 + + if route < 0: + self.route('%d' % route) + self.convolutional(child.fc, act='sigmoid') + self.shortcut(route - 3, ew='mul') + self.convolutional(child.conv, act='swish') + if route == 0: + self.shortcut(-5) + + def Conv2D(self, child, act='linear'): + self.current += 1 + + self.convolutional(child, act=act) + + def Shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None, output=''): + self.current += 1 + + r = 0 + if route is not None: + r = self.get_route(route) + self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2, route=r) + if output == 'cls': + self.yolo_head_cls.append(self.current) + elif output == 'reg': + self.yolo_head_reg.append(self.current) + + def SoftMax(self, axes): + self.current += 1 + + self.softmax(axes) + + def Detect(self, output): + self.current += 1 + + routes = self.yolo_head_cls if output == 'cls' else self.yolo_head_reg + + for i, route in enumerate(routes): + routes[i] = self.get_route(route) + self.route(str(routes)[1:-1], axis=-1) + self.yolo(output) + + def net(self): + lb = 'letter_box=1\n' if self.letter_box else '' + + self.fc.write('[net]\n' + + 'width=%d\n' % self.width + + 'height=%d\n' % self.height + + 'channels=3\n' + + lb) + + def convolutional(self, cv, act='linear', detect=False): + self.blocks[self.current] += 1 + + self.get_state_dict(cv.state_dict()) + + if cv.__class__.__name__ == 'Conv2D': + filters = cv._out_channels + size = cv._kernel_size + stride = cv._stride + pad = cv._padding + groups = cv._groups + bias = cv.bias + bn = False + else: + filters = cv.conv._out_channels + size = cv.conv._kernel_size + stride = cv.conv._stride + pad = cv.conv._padding + groups = cv.conv._groups + bias = cv.conv.bias + bn = True if hasattr(cv, 'bn') else False + + if detect: + act = 'logistic' + + b = 'batch_normalize=1\n' if bn is True else '' + g = 'groups=%d\n' % groups if groups > 1 else '' + w = 'bias=0\n' if bias is None and bn is False else '' + + self.fc.write('\n[convolutional]\n' + + b + + 'filters=%d\n' % filters + + 'size=%s\n' % self.get_value(size) + + 'stride=%s\n' % self.get_value(stride) + + 'pad=%s\n' % self.get_value(pad) + + g + + w + + 'activation=%s\n' % act) + + def route(self, layers, axis=0): + self.blocks[self.current] += 1 + + a = 'axis=%d\n' % axis if axis != 0 else '' + + self.fc.write('\n[route]\n' + + 'layers=%s\n' % layers + + a) + + def shortcut(self, r, ew='add', act='linear'): + self.blocks[self.current] += 1 + + m = 'mode=mul\n' if ew == 'mul' else '' + + self.fc.write('\n[shortcut]\n' + + 'from=%d\n' % r + + m + + 'activation=%s\n' % act) + + def reduce(self, dim, mode='mean', keepdim=False): + self.blocks[self.current] += 1 + + self.fc.write('\n[reduce]\n' + + 'mode=%s\n' % mode + + 'axes=%s\n' % str(dim)[1:-1] + + 'keep=%d\n' % keepdim) + + def maxpool(self, m): + self.blocks[self.current] += 1 + + stride = m.stride + size = m.ksize + mode = m.ceil_mode + + m = 'maxpool_up' if mode else 'maxpool' + + self.fc.write('\n[%s]\n' % m + + 'stride=%d\n' % stride + + 'size=%d\n' % size) + + def upsample(self): + self.blocks[self.current] += 1 + + stride = 2 + + self.fc.write('\n[upsample]\n' + + 'stride=%d\n' % stride) + + def avgpool(self): + self.blocks[self.current] += 1 + + self.fc.write('\n[avgpool]\n') + + def shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None): + self.blocks[self.current] += 1 + + r = 'reshape=%s\n' % str(reshape)[1:-1] if reshape is not None else '' + t1 = 'transpose1=%s\n' % str(transpose1)[1:-1] if transpose1 is not None else '' + t2 = 'transpose2=%s\n' % str(transpose2)[1:-1] if transpose2 is not None else '' + f = 'from=%d\n' % route if route is not None else '' + + self.fc.write('\n[shuffle]\n' + + r + + t1 + + t2 + + f) + + def softmax(self, axes): + self.blocks[self.current] += 1 + + self.fc.write('\n[softmax]\n' + + 'axes=%d\n' % axes) + + def yolo(self, output): + self.blocks[self.current] += 1 + + self.fc.write('\n[%s]\n' % output) + + def get_state_dict(self, state_dict): + for k, v in state_dict.items(): + vr = v.reshape([-1]).numpy() + self.fw.write('{} {} '.format(k, len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + + def get_anchors(self, anchor_points, stride_tensor): + vr = anchor_points.numpy() + self.fw.write('{} {} '.format('anchor_points', len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + vr = stride_tensor.numpy() + self.fw.write('{} {} '.format('stride_tensor', len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + + def get_value(self, key): + if type(key) == int: + return key + return key[0] if key[0] == key[1] else str(key)[1:-1] + + def get_route(self, n): + r = 0 + for i, b in enumerate(self.blocks): + if i <= n: + r += b + else: + break + return r - 1 + + +def export_model(): + paddle.set_device('cpu') + + FLAGS = parse_args() + + cfg = load_config(FLAGS.config) + + FLAGS.opt['weights'] = FLAGS.weights + FLAGS.opt['exclude_nms'] = True + + if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn': + FLAGS.opt['norm_type'] = 'bn' + merge_config(FLAGS.opt) + + if FLAGS.slim_config: + cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test') + + merge_config(FLAGS.opt) + check_config(cfg) + check_gpu(cfg.use_gpu) + check_version() + + trainer = Trainer(cfg, mode='test') + trainer.load_weights(cfg.weights) + + trainer.model.eval() + + if not os.path.exists('.tmp'): + os.makedirs('.tmp') + + static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp') + + os.system('rm -r .tmp') + + return cfg, static_model + + +def parse_args(): + parser = ArgsParser() + parser.add_argument('-w', '--weights', required=True, type=str, help='Input weights (.pdparams) file path (required)') + parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method') + args = parser.parse_args() + return args + + +cfg, model = export_model() + +model_name = cfg.filename +inference_size = (cfg.eval_height, cfg.eval_width) +letter_box = False + +for sample_transforms in cfg['EvalReader']['sample_transforms']: + if 'Resize' in sample_transforms: + letter_box = sample_transforms['Resize']['keep_ratio'] + +backbone = cfg[cfg.architecture]['backbone'] +neck = cfg[cfg.architecture]['neck'] +yolo_head = cfg[cfg.architecture]['yolo_head'] + +wts_file = model_name + '.wts' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.wts' +cfg_file = model_name + '.cfg' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.cfg' + +with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: + layers = Layers(inference_size, fw, fc, letter_box) + + if backbone == 'CSPResNet': + layers.fc.write('\n# CSPResNet\n') + + for child in model.backbone.stem: + layers.ConvBNLayer(child) + for i, child in enumerate(model.backbone.stages): + ret = True if i in model.backbone.return_idx else False + layers.CSPResStage(child, ret) + else: + raise SystemExit('Model not supported') + + if neck == 'CustomCSPPAN': + layers.fc.write('\n# CustomCSPPAN\n') + + blocks = layers.backbone_outs[::-1] + for i, block in enumerate(blocks): + if i > 0: + layers.Concat(block) + layers.CSPStage(model.neck.fpn_stages[i][0], 'fpn') + if i < model.neck.num_blocks - 1: + layers.ConvBNLayer(model.neck.fpn_routes[i]) + layers.Upsample() + layers.neck_pan_feats = [layers.neck_fpn_feats[-1], ] + for i in reversed(range(model.neck.num_blocks - 1)): + layers.ConvBNLayer(model.neck.pan_routes[i]) + layers.Concat(layers.neck_fpn_feats[i]) + layers.CSPStage(model.neck.pan_stages[i][0], 'pan') + layers.neck_pan_feats = layers.neck_pan_feats[::-1] + else: + raise SystemExit('Model not supported') + + if yolo_head == 'PPYOLOEHead': + layers.fc.write('\n# PPYOLOEHead\n') + + for i, feat in enumerate(layers.neck_pan_feats): + if i > 0: + layers.AvgPool2d(route=feat) + else: + layers.AvgPool2d() + layers.ESEAttn(model.yolo_head.stem_cls[i]) + layers.Conv2D(model.yolo_head.pred_cls[i], act='sigmoid') + layers.Shuffle(reshape=[model.yolo_head.num_classes, 0], route=feat, output='cls') + layers.ESEAttn(model.yolo_head.stem_reg[i], route=-7) + layers.Conv2D(model.yolo_head.pred_reg[i]) + layers.Shuffle(reshape=[4, model.yolo_head.reg_max + 1, 0], transpose2=[1, 0, 2], route=feat) + layers.SoftMax(0) + layers.Conv2D(model.yolo_head.proj_conv) + layers.Shuffle(reshape=[4, 0], route=feat, output='reg') + layers.Detect('cls') + layers.Detect('reg') + layers.get_anchors(model.yolo_head.anchor_points.reshape([-1]), model.yolo_head.stride_tensor) + + else: + raise SystemExit('Model not supported') + +os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yolor.py b/utils/gen_wts_yolor.py index cfc6043..ffaf427 100644 --- a/utils/gen_wts_yolor.py +++ b/utils/gen_wts_yolor.py @@ -9,11 +9,11 @@ from models.models import Darknet def parse_args(): parser = argparse.ArgumentParser(description='PyTorch YOLOR conversion (main branch)') parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument('-c', '--cfg', help='Input cfg (.cfg) file path') + parser.add_argument('-c', '--cfg', default='', help='Input cfg (.cfg) file path') args = parser.parse_args() if not os.path.isfile(args.weights): raise SystemExit('Invalid weights file') - if not os.path.isfile(args.cfg): + if args.cfg != '' and not os.path.isfile(args.cfg): raise SystemExit('Invalid cfg file') return args.weights, args.cfg