From 087a41acf6a7f15aec2b98ffb89621aa279d8a06 Mon Sep 17 00:00:00 2001 From: Marcos Luciano Date: Wed, 1 Feb 2023 02:52:01 -0300 Subject: [PATCH] Add YOLOv6 support --- README.md | 26 +- config_infer_primary_yoloV6.txt | 27 + docs/YOLOX.md | 4 +- docs/YOLOv5.md | 6 + docs/YOLOv6.md | 145 +++++ docs/YOLOv7.md | 8 +- .../layers/convolutional_layer.cpp | 28 +- .../layers/deconvolutional_layer.cpp | 102 +++ .../layers/deconvolutional_layer.h | 18 + .../layers/shuffle_layer.cpp | 12 +- .../layers/shuffle_layer.h | 4 +- nvdsinfer_custom_impl_Yolo/yolo.cpp | 21 +- nvdsinfer_custom_impl_Yolo/yolo.h | 1 + utils/gen_wts_ppyoloe.py | 21 +- utils/gen_wts_yoloV5.py | 7 +- utils/gen_wts_yoloV6.py | 588 ++++++++++++++++++ utils/gen_wts_yoloV7.py | 7 +- utils/gen_wts_yoloV8.py | 10 +- utils/gen_wts_yolox.py | 12 +- 19 files changed, 982 insertions(+), 65 deletions(-) create mode 100644 config_infer_primary_yoloV6.txt create mode 100644 docs/YOLOv6.md create mode 100644 nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp create mode 100644 nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h create mode 100644 utils/gen_wts_yoloV6.py diff --git a/README.md b/README.md index dd5d303..d894249 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ NVIDIA DeepStream SDK 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models ### Future updates * DeepStream tutorials -* YOLOv6 support * Dynamic batch-size +* Segmentation model support +* Classification model support ### Improvements on this repository @@ -19,7 +20,7 @@ NVIDIA DeepStream SDK 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models * Support for INT8 calibration * Support for non square models * New documentation for multiple models -* YOLOv5 support +* YOLOv5 >= 2.0 support * YOLOR support * GPU YOLO Decoder [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138) * PP-YOLOE support @@ -29,6 +30,7 @@ NVIDIA DeepStream SDK 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models * **YOLOv8 support** * **YOLOX support** * **PP-YOLOE+ support** +* **YOLOv6 >= 2.0 support** ## @@ -43,11 +45,12 @@ NVIDIA DeepStream SDK 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models * [NMS configuration](#nms-configuration) * [INT8 calibration](#int8-calibration) * [YOLOv5 usage](docs/YOLOv5.md) -* [YOLOR usage](docs/YOLOR.md) -* [PP-YOLOE / PP-YOLOE+ usage](docs/PPYOLOE.md) +* [YOLOv6 usage](docs/YOLOv6.md) * [YOLOv7 usage](docs/YOLOv7.md) * [YOLOv8 usage](docs/YOLOv8.md) +* [YOLOR usage](docs/YOLOR.md) * [YOLOX usage](docs/YOLOX.md) +* [PP-YOLOE / PP-YOLOE+ usage](docs/PPYOLOE.md) * [Using your custom model](docs/customModels.md) * [Multiple YOLO GIEs](docs/multipleGIEs.md) @@ -108,14 +111,15 @@ NVIDIA DeepStream SDK 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models ### Suported models * [Darknet YOLO](https://github.com/AlexeyAB/darknet) -* [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5) -* [YOLOR](https://github.com/WongKinYiu/yolor) -* [PP-YOLOE / PP-YOLOE+](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe) -* [YOLOv7](https://github.com/WongKinYiu/yolov7) -* [YOLOv8](https://github.com/ultralytics/ultralytics) -* [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) * [MobileNet-YOLO](https://github.com/dog-qiuqiu/MobileNet-Yolo) * [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest) +* [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5) +* [YOLOv6 >= 2.0](https://github.com/meituan/YOLOv6) +* [YOLOv7](https://github.com/WongKinYiu/yolov7) +* [YOLOv8](https://github.com/ultralytics/ultralytics) +* [YOLOR](https://github.com/WongKinYiu/yolor) +* [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) +* [PP-YOLOE / PP-YOLOE+](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe) ## @@ -137,7 +141,7 @@ sample = 1920x1080 video - Eval ``` -nms-iou-threshold = 0.6 (Darknet and YOLOv8) / 0.65 (YOLOR, YOLOv5, YOLOv7 and YOLOX) / 0.7 (Paddle) +nms-iou-threshold = 0.6 (Darknet and YOLOv8) / 0.65 (YOLOv5, YOLOv6, YOLOv7, YOLOR and YOLOX) / 0.7 (Paddle) pre-cluster-threshold = 0.001 topk = 300 ``` diff --git a/config_infer_primary_yoloV6.txt b/config_infer_primary_yoloV6.txt new file mode 100644 index 0000000..ffeb800 --- /dev/null +++ b/config_infer_primary_yoloV6.txt @@ -0,0 +1,27 @@ +[property] +gpu-id=0 +net-scale-factor=0.0039215697906911373 +model-color-format=0 +custom-network-config=yolov6s.cfg +model-file=yolov6s.wts +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table +labelfile-path=labels.txt +batch-size=1 +network-mode=0 +num-detected-classes=80 +interval=0 +gie-unique-id=1 +process-mode=1 +network-type=0 +cluster-mode=2 +maintain-aspect-ratio=1 +symmetric-padding=1 +parse-bbox-func-name=NvDsInferParseYolo +custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so +engine-create-func-name=NvDsInferYoloCudaEngineGet + +[class-attrs-all] +nms-iou-threshold=0.45 +pre-cluster-threshold=0.25 +topk=300 diff --git a/docs/YOLOX.md b/docs/YOLOX.md index 055a1eb..1fcc053 100644 --- a/docs/YOLOX.md +++ b/docs/YOLOX.md @@ -15,7 +15,7 @@ #### 1. Download the YOLOX repo and install the requirements ``` -git clone https://github.com/Megvii-BaseDetection/YOLOX +git clone https://github.com/Megvii-BaseDetection/YOLOX.git cd YOLOX pip3 install -r requirements.txt ``` @@ -28,7 +28,7 @@ Copy the `gen_wts_yolox.py` file from `DeepStream-Yolo/utils` directory to the ` #### 3. Download the model -Download the `pth` file from [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX/releases) releases (example for YOLOX-s standard) +Download the `pth` file from [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX/releases/) releases (example for YOLOX-s standard) ``` wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth diff --git a/docs/YOLOv5.md b/docs/YOLOv5.md index e87fc8b..8d3721a 100644 --- a/docs/YOLOv5.md +++ b/docs/YOLOv5.md @@ -46,6 +46,12 @@ Generate the `cfg` and `wts` files (example for YOLOv5s) python3 gen_wts_yoloV5.py -w yolov5s.pt ``` +**NOTE**: To convert a P6 model + +``` +--p6 +``` + **NOTE**: To change the inference size (defaut: 640) ``` diff --git a/docs/YOLOv6.md b/docs/YOLOv6.md new file mode 100644 index 0000000..ea12052 --- /dev/null +++ b/docs/YOLOv6.md @@ -0,0 +1,145 @@ +# YOLOv6 usage + +**NOTE**: The yaml file is not required. + +* [Convert model](#convert-model) +* [Compile the lib](#compile-the-lib) +* [Edit the config_infer_primary_yoloV6 file](#edit-the-config_infer_primary_yolov6-file) +* [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) +* [Testing the model](#testing-the-model) + +## + +### Convert model + +#### 1. Download the YOLOv6 repo and install the requirements + +``` +git clone https://github.com/meituan/YOLOv6.git +cd YOLOv6 +pip3 install -r requirements.txt +``` + +**NOTE**: It is recommended to use Python virtualenv. + +#### 2. Copy conversor + +Copy the `gen_wts_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `YOLOv6` folder. + +#### 3. Download the model + +Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/) releases (example for YOLOv6-S 3.0) + +``` +wget https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s.pt +``` + +**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov6_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. + +#### 4. Convert model + +Generate the `cfg` and `wts` files (example for YOLOv6-S 3.0) + +``` +python3 gen_wts_yoloV6.py -w yolov6s.pt +``` + +**NOTE**: To convert a P6 model + +``` +--p6 +``` + +**NOTE**: To change the inference size (defaut: 640) + +``` +-s SIZE +--size SIZE +-s HEIGHT WIDTH +--size HEIGHT WIDTH +``` + +Example for 1280 + +``` +-s 1280 +``` + +or + +``` +-s 1280 1280 +``` + +#### 5. Copy generated files + +Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. + +## + +### Compile the lib + +Open the `DeepStream-Yolo` folder and compile the lib + +* DeepStream 6.1.1 on x86 platform + + ``` + CUDA_VER=11.7 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.1 on x86 platform + + ``` + CUDA_VER=11.6 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on x86 platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.1.1 / 6.1 on Jetson platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on Jetson platform + + ``` + CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo + ``` + +## + +### Edit the config_infer_primary_yoloV6 file + +Edit the `config_infer_primary_yoloV6.txt` file according to your model (example for YOLOv6-S 3.0) + +``` +[property] +... +custom-network-config=yolov6s.cfg +model-file=yolov6s.wts +... +``` + +## + +### Edit the deepstream_app_config file + +``` +... +[primary-gie] +... +config-file=config_infer_primary_yoloV6.txt +``` + +## + +### Testing the model + +``` +deepstream-app -c deepstream_app_config.txt +``` diff --git a/docs/YOLOv7.md b/docs/YOLOv7.md index 6c67bbc..5cd64d1 100644 --- a/docs/YOLOv7.md +++ b/docs/YOLOv7.md @@ -31,7 +31,7 @@ Copy the `gen_wts_yoloV7.py` file from `DeepStream-Yolo/utils` directory to the Download the `pt` file from [YOLOv7](https://github.com/WongKinYiu/yolov7/releases/) releases (example for YOLOv7) ``` -wget hhttps://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt +wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt ``` **NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov7_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. @@ -48,6 +48,12 @@ Generate the `cfg` and `wts` files (example for YOLOv7) python3 gen_wts_yoloV7.py -w yolov7.pt ``` +**NOTE**: To convert a P6 model + +``` +--p6 +``` + **NOTE**: To change the inference size (defaut: 640) ``` diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp index 8f3ef62..bdec987 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp @@ -34,13 +34,16 @@ convolutionalLayer(int layerIdx, std::map& block, std: batchNormalize = (block.at("batch_normalize") == "1"); } + if (block.find("bias") != block.end()) { + bias = std::stoi(block.at("bias")); + if (bias == 1) + bias = filters; + } + int groups = 1; if (block.find("groups") != block.end()) groups = std::stoi(block.at("groups")); - if (block.find("bias") != block.end()) - bias = std::stoi(block.at("bias")); - int pad; if (padding) pad = (kernelSize - 1) / 2; @@ -92,7 +95,16 @@ convolutionalLayer(int layerIdx, std::map& block, std: bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); ++weightPtr; } - float* val = new float[size]; + float* val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convBias.values = val; + } + val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; ++weightPtr; @@ -129,6 +141,14 @@ convolutionalLayer(int layerIdx, std::map& block, std: ++weightPtr; } convWt.values = val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convBias.values = val; + } for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); ++weightPtr; diff --git a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp new file mode 100644 index 0000000..79d2f90 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp @@ -0,0 +1,102 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "deconvolutional_layer.h" + +#include + +nvinfer1::ITensor* +deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, + std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "deconvolutional"); + assert(block.find("filters") != block.end()); + assert(block.find("pad") != block.end()); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int filters = std::stoi(block.at("filters")); + int padding = std::stoi(block.at("pad")); + int kernelSize = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + int bias = filters; + + int groups = 1; + if (block.find("groups") != block.end()) + groups = std::stoi(block.at("groups")); + + if (block.find("bias") != block.end()) + bias = std::stoi(block.at("bias")); + + int pad; + if (padding) + pad = (kernelSize - 1) / 2; + else + pad = 0; + + int size = filters * inputChannels * kernelSize * kernelSize / groups; + std::vector bnBiases; + std::vector bnWeights; + std::vector bnRunningMean; + std::vector bnRunningVar; + nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias}; + + if (weightsType == "weights") { + float* val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convBias.values = val; + trtWeights.push_back(convBias); + } + val = new float[size]; + for (int i = 0; i < size; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convWt.values = val; + trtWeights.push_back(convWt); + } + else { + float* val = new float[size]; + for (int i = 0; i < size; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convWt.values = val; + trtWeights.push_back(convWt); + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; + } + convBias.values = val; + trtWeights.push_back(convBias); + } + } + + nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters, + nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias); + assert(conv != nullptr); + std::string convLayerName = "deconv_" + layerName + std::to_string(layerIdx); + conv->setName(convLayerName.c_str()); + conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); + conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}}); + + if (block.find("groups") != block.end()) + conv->setNbGroups(groups); + + output = conv->getOutput(0); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h new file mode 100644 index 0000000..886a43e --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h @@ -0,0 +1,18 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __DECONVOLUTIONAL_LAYER_H__ +#define __DECONVOLUTIONAL_LAYER_H__ + +#include +#include + +#include "NvInfer.h" + +nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, + std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = ""); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp index fd8ce4c..e37c522 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp @@ -6,7 +6,7 @@ #include "shuffle_layer.h" nvinfer1::ITensor* -shuffleLayer(int layerIdx, std::string& layer, std::map& block, nvinfer1::ITensor* input, +shuffleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, std::vector tensorOutputs, nvinfer1::INetworkDefinition* network) { nvinfer1::ITensor* output; @@ -18,16 +18,8 @@ shuffleLayer(int layerIdx, std::string& layer, std::mapsetName(shuffleLayerName.c_str()); - int from = -1; - if (block.find("from") != block.end()) - from = std::stoi(block.at("from")); - if (from < 0) - from = tensorOutputs.size() + from; - - layer = std::to_string(from); - if (block.find("reshape") != block.end()) { - nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions(); + nvinfer1::Dims inputTensorDims = input->getDimensions(); std::string strReshape = block.at("reshape"); std::vector reshape; diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h index 2e5a4ef..24389f5 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h @@ -8,7 +8,7 @@ #include "../utils.h" -nvinfer1::ITensor* shuffleLayer(int layerIdx, std::string& layer, std::map& block, - nvinfer1::ITensor* input, std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); +nvinfer1::ITensor* shuffleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, + std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); #endif diff --git a/nvdsinfer_custom_impl_Yolo/yolo.cpp b/nvdsinfer_custom_impl_Yolo/yolo.cpp index a7fa21c..700ec36 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -135,8 +135,9 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition weightsType = "weights"; float eps = 1.0e-5; - if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov7") != std::string::npos || - m_NetworkType.find("yolov8") != std::string::npos || m_NetworkType.find("yolox") != std::string::npos) + if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov6") != std::string::npos || + m_NetworkType.find("yolov7") != std::string::npos || m_NetworkType.find("yolov8") != std::string::npos || + m_NetworkType.find("yolox") != std::string::npos) eps = 1.0e-3; else if (m_NetworkType.find("yolor") != std::string::npos) eps = 1.0e-4; @@ -169,6 +170,17 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation"); printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } + else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") { + int channels = getNumChannels(previous); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, + previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "deconv"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); + } else if (m_ConfigBlocks.at(i).at("type") == "c2f") { std::string inputVol = dimsToString(previous->getDimensions()); previous = c2fLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network); @@ -299,13 +311,12 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "shuffle") { - std::string layer; std::string inputVol = dimsToString(previous->getDimensions()); - previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network); + previous = shuffleLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerName = "shuffle: " + layer; + std::string layerName = "shuffle"; printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } else if (m_ConfigBlocks.at(i).at("type") == "softmax") { diff --git a/nvdsinfer_custom_impl_Yolo/yolo.h b/nvdsinfer_custom_impl_Yolo/yolo.h index c915337..25a2e89 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/nvdsinfer_custom_impl_Yolo/yolo.h @@ -30,6 +30,7 @@ #include "nvdsinfer_custom_impl.h" #include "layers/convolutional_layer.h" +#include "layers/deconvolutional_layer.h" #include "layers/c2f_layer.h" #include "layers/batchnorm_layer.h" #include "layers/implicit_layer.h" diff --git a/utils/gen_wts_ppyoloe.py b/utils/gen_wts_ppyoloe.py index 2b2a6c4..d54b088 100644 --- a/utils/gen_wts_ppyoloe.py +++ b/utils/gen_wts_ppyoloe.py @@ -121,13 +121,10 @@ class Layers(object): self.convolutional(child, act=act) - def Shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None, output=''): + def Shuffle(self, reshape=None, transpose1=None, transpose2=None, output=''): self.current += 1 - r = None - if route is not None: - r = self.get_route(route) - self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2, route=r) + self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2) if output == 'cls': self.yolo_head_cls.append(self.current) elif output == 'reg': @@ -181,7 +178,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[convolutional]\n' + b + @@ -246,19 +243,17 @@ class Layers(object): self.fc.write('\n[avgpool]\n') - def shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None): + def shuffle(self, reshape=None, transpose1=None, transpose2=None): self.blocks[self.current] += 1 r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - f = 'from=%d\n' % route if route is not None else '' self.fc.write('\n[shuffle]\n' + r + t1 + - t2 + - f) + t2) def softmax(self, axes): self.blocks[self.current] += 1 @@ -418,13 +413,13 @@ with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: layers.AvgPool2d() layers.ESEAttn(model.yolo_head.stem_cls[i]) layers.Conv2D(model.yolo_head.pred_cls[i], act='sigmoid') - layers.Shuffle(reshape=[model.yolo_head.num_classes, 'hw'], route=feat, output='cls') + layers.Shuffle(reshape=[model.yolo_head.num_classes, 'hw'], output='cls') layers.ESEAttn(model.yolo_head.stem_reg[i], route=-7) layers.Conv2D(model.yolo_head.pred_reg[i]) - layers.Shuffle(reshape=[4, model.yolo_head.reg_max + 1, 'hw'], transpose2=[1, 0, 2], route=feat) + layers.Shuffle(reshape=[4, model.yolo_head.reg_max + 1, 'hw'], transpose2=[1, 0, 2]) layers.SoftMax(0) layers.Conv2D(model.yolo_head.proj_conv) - layers.Shuffle(reshape=[4, 'hw'], route=feat, output='reg') + layers.Shuffle(reshape=['h', 'w'], output='reg') layers.Detect('cls') layers.Detect('reg') layers.get_anchors(model.yolo_head.anchor_points.reshape([-1]), model.yolo_head.stride_tensor) diff --git a/utils/gen_wts_yoloV5.py b/utils/gen_wts_yoloV5.py index be8d71f..d19a2b8 100644 --- a/utils/gen_wts_yoloV5.py +++ b/utils/gen_wts_yoloV5.py @@ -195,7 +195,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[convolutional]\n' + b + @@ -335,10 +335,13 @@ def parse_args(): parser = argparse.ArgumentParser(description='PyTorch YOLOv5 conversion') parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') parser.add_argument( - '-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') + parser.add_argument("--p6", action="store_true", help="P6 model") args = parser.parse_args() if not os.path.isfile(args.weights): raise SystemExit('Invalid weights file') + if not args.size: + args.size = [1280] if args.p6 else [640] return args.weights, args.size diff --git a/utils/gen_wts_yoloV6.py b/utils/gen_wts_yoloV6.py new file mode 100644 index 0000000..91501b4 --- /dev/null +++ b/utils/gen_wts_yoloV6.py @@ -0,0 +1,588 @@ +import argparse +import os +import struct +import torch +from yolov6.assigners.anchor_generator import generate_anchors + + +class Layers(object): + def __init__(self, size, fw, fc): + self.blocks = [0 for _ in range(300)] + self.current = -1 + + self.width = size[0] if len(size) == 1 else size[1] + self.height = size[0] + + self.backbone_outs = [] + self.fpn_feats = [] + self.pan_feats = [] + self.yolo_head_cls = [] + self.yolo_head_reg = [] + + self.fw = fw + self.fc = fc + self.wc = 0 + + self.net() + + def BaseConv(self, child): + self.current += 1 + + if child._get_name() == 'RepVGGBlock': + self.convolutional(child.rbr_reparam, act=self.get_activation(child.nonlinearity._get_name())) + elif child._get_name() == 'ConvWrapper' or child._get_name() == 'SimConvWrapper': + self.convolutional(child.block) + else: + raise SystemExit('Model not supported') + + def RepBlock(self, child, stage=''): + self.current += 1 + + if child.conv1._get_name() == 'RepVGGBlock': + self.convolutional(child.conv1.rbr_reparam, act=self.get_activation(child.conv1.nonlinearity._get_name())) + if child.block is not None: + for m in child.block: + self.convolutional(m.rbr_reparam, act=self.get_activation(m.nonlinearity._get_name())) + elif child.conv1._get_name() == 'ConvWrapper' or child.conv1._get_name() == 'SimConvWrapper': + self.convolutional(child.conv1.block) + if child.block is not None: + for m in child.block: + self.convolutional(m.block) + else: + raise SystemExit('Model not supported') + + if stage == 'backbone': + self.backbone_outs.append(self.current) + elif stage == 'pan': + self.pan_feats.append(self.current) + + def BepC3(self, child, stage=''): + self.current += 1 + + if child.concat is True: + self.convolutional(child.cv2) + self.route('-2') + self.convolutional(child.cv1) + idx = -3 + if child.m.conv1.conv1._get_name() == 'RepVGGBlock': + self.convolutional(child.m.conv1.conv1.rbr_reparam, + act=self.get_activation(child.m.conv1.conv1.nonlinearity._get_name())) + self.convolutional(child.m.conv1.conv2.rbr_reparam, + act=self.get_activation(child.m.conv1.conv2.nonlinearity._get_name())) + idx -= 2 + if child.m.conv1.shortcut: + self.shortcut(-3) + idx -= 1 + if child.m.block is not None: + for m in child.m.block: + self.convolutional(m.conv1.rbr_reparam, act=self.get_activation(m.conv1.nonlinearity._get_name())) + self.convolutional(m.conv2.rbr_reparam, act=self.get_activation(m.conv2.nonlinearity._get_name())) + idx -= 2 + if m.shortcut: + self.shortcut(-3) + idx -= 1 + elif child.m.conv1.conv1._get_name() == 'ConvWrapper' or child.m.conv1.conv1._get_name() == 'SimConvWrapper': + self.convolutional(child.m.conv1.conv1.block) + self.convolutional(child.m.conv1.conv2.block) + idx -= 2 + if child.m.conv1.shortcut: + self.shortcut(-3) + idx -= 1 + if child.m.block is not None: + for m in child.m.block: + self.convolutional(m.conv1.block) + self.convolutional(m.conv2.block) + idx -= 2 + if m.shortcut: + self.shortcut(-3) + idx -= 1 + else: + raise SystemExit('Model not supported') + + if child.concat is True: + self.route('-1, %d' % idx) + self.convolutional(child.cv3) + + if stage == 'backbone': + self.backbone_outs.append(self.current) + elif stage == 'pan': + self.pan_feats.append(self.current) + + def CSPSPPF(self, child): + self.current += 1 + + self.convolutional(child.cv2) + self.route('-2') + self.convolutional(child.cv1) + self.convolutional(child.cv3) + self.convolutional(child.cv4) + self.maxpool(child.m) + self.maxpool(child.m) + self.maxpool(child.m) + self.route('-4, -3, -2, -1') + self.convolutional(child.cv5) + self.convolutional(child.cv6) + self.route('-11, -1') + self.convolutional(child.cv7) + self.backbone_outs.append(self.current) + + def SPPF(self, child): + self.current += 1 + + self.convolutional(child.cv1) + self.maxpool(child.m) + self.maxpool(child.m) + self.maxpool(child.m) + self.route('-4, -3, -2, -1') + self.convolutional(child.cv2) + self.backbone_outs.append(self.current) + + def SimConv(self, child, stage=''): + self.current += 1 + + self.convolutional(child) + if stage == 'fpn': + self.fpn_feats.append(self.current) + + def BiFusion(self, child, idx): + self.current += 1 + + self.deconvolutional(child.upsample.upsample_transpose) + r = self.get_route(self.backbone_outs[- idx -2]) + self.route('%d' % r) + self.convolutional(child.cv1) + r = self.get_route(self.backbone_outs[- idx -3]) + self.route('%d' % r) + self.convolutional(child.cv2) + self.convolutional(child.downsample) + self.route('-6, -4, -1') + self.convolutional(child.cv3) + + def Upsample(self, child): + self.current += 1 + + self.deconvolutional(child.upsample_transpose) + + def Conv(self, child, act=None): + self.current += 1 + + self.convolutional(child, act=act) + + def Concat(self, route): + self.current += 1 + + r = self.get_route(route) + self.route('-1, %d' % r) + + def Route(self, route): + self.current += 1 + + if route > 0: + r = self.get_route(route) + self.route('%d' % r) + else: + self.route('%d' % route) + + def Shuffle(self, reshape=None, transpose1=None, transpose2=None, output=''): + self.current += 1 + + self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2) + if output == 'cls': + self.yolo_head_cls.append(self.current) + elif output == 'reg': + self.yolo_head_reg.append(self.current) + + def SoftMax(self, axes): + self.current += 1 + + self.softmax(axes) + + def Detect(self, output): + self.current += 1 + + routes = self.yolo_head_cls if output == 'cls' else self.yolo_head_reg + + for i, route in enumerate(routes): + routes[i] = self.get_route(route) + self.route(str(routes)[1:-1], axis=-1) + self.yolo(output) + + def net(self): + self.fc.write('[net]\n' + + 'width=%d\n' % self.width + + 'height=%d\n' % self.height + + 'channels=3\n' + + 'letter_box=1\n') + + def convolutional(self, cv, act=None, detect=False): + self.blocks[self.current] += 1 + + self.get_state_dict(cv.state_dict()) + + if cv._get_name() == 'Conv2d': + filters = cv.out_channels + size = cv.kernel_size + stride = cv.stride + pad = cv.padding + groups = cv.groups + bias = cv.bias + bn = False + act = act if act is not None else 'linear' + else: + filters = cv.conv.out_channels + size = cv.conv.kernel_size + stride = cv.conv.stride + pad = cv.conv.padding + groups = cv.conv.groups + bias = cv.conv.bias + bn = True if hasattr(cv, 'bn') else False + if act is None: + act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' + + b = 'batch_normalize=1\n' if bn is True else '' + g = 'groups=%d\n' % groups if groups > 1 else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' + + self.fc.write('\n[convolutional]\n' + + b + + 'filters=%d\n' % filters + + 'size=%s\n' % self.get_value(size) + + 'stride=%s\n' % self.get_value(stride) + + 'pad=%s\n' % self.get_value(pad) + + g + + w + + 'activation=%s\n' % act) + + def deconvolutional(self, cv): + self.blocks[self.current] += 1 + + self.get_state_dict(cv.state_dict()) + + filters = cv.out_channels + size = cv.kernel_size + stride = cv.stride + pad = cv.padding + groups = cv.groups + bias = cv.bias + + g = 'groups=%d\n' % groups if groups > 1 else '' + w = 'bias=0\n' if bias is None else '' + + self.fc.write('\n[deconvolutional]\n' + + 'filters=%d\n' % filters + + 'size=%s\n' % self.get_value(size) + + 'stride=%s\n' % self.get_value(stride) + + 'pad=%s\n' % self.get_value(pad) + + g + + w) + + def route(self, layers, axis=0): + self.blocks[self.current] += 1 + + a = 'axis=%d\n' % axis if axis != 0 else '' + + self.fc.write('\n[route]\n' + + 'layers=%s\n' % layers + + a) + + def shortcut(self, r, ew='add', act='linear'): + self.blocks[self.current] += 1 + + m = 'mode=mul\n' if ew == 'mul' else '' + + self.fc.write('\n[shortcut]\n' + + 'from=%d\n' % r + + m + + 'activation=%s\n' % act) + + def maxpool(self, m): + self.blocks[self.current] += 1 + + stride = m.stride + size = m.kernel_size + mode = m.ceil_mode + + m = 'maxpool_up' if mode else 'maxpool' + + self.fc.write('\n[%s]\n' % m + + 'stride=%d\n' % stride + + 'size=%d\n' % size) + + def shuffle(self, reshape=None, transpose1=None, transpose2=None): + self.blocks[self.current] += 1 + + r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' + t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' + t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' + + self.fc.write('\n[shuffle]\n' + + r + + t1 + + t2) + + def softmax(self, axes): + self.blocks[self.current] += 1 + + self.fc.write('\n[softmax]\n' + + 'axes=%d\n' % axes) + + def yolo(self, output): + self.blocks[self.current] += 1 + + self.fc.write('\n[%s]\n' % output) + + def get_state_dict(self, state_dict): + for k, v in state_dict.items(): + if 'num_batches_tracked' not in k: + vr = v.reshape(-1).numpy() + self.fw.write('{} {} '.format(k, len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + + def get_anchors(self, anchor_points, stride_tensor): + vr = anchor_points.numpy() + self.fw.write('{} {} '.format('anchor_points', len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + vr = stride_tensor.numpy() + self.fw.write('{} {} '.format('stride_tensor', len(vr))) + for vv in vr: + self.fw.write(' ') + self.fw.write(struct.pack('>f', float(vv)).hex()) + self.fw.write('\n') + self.wc += 1 + + def get_value(self, key): + if type(key) == int: + return key + return key[0] if key[0] == key[1] else str(key)[1:-1] + + def get_route(self, n): + r = 0 + for i, b in enumerate(self.blocks): + if i <= n: + r += b + else: + break + return r - 1 + + def get_activation(self, act): + if act == 'Hardswish': + return 'hardswish' + elif act == 'LeakyReLU': + return 'leaky' + elif act == 'SiLU': + return 'silu' + elif act == 'ReLU': + return 'relu' + return 'linear' + + +def parse_args(): + parser = argparse.ArgumentParser(description='PyTorch YOLOv6 conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument( + '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') + parser.add_argument("--p6", action="store_true", help="P6 model") + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + if not args.size: + args.size = [1280] if args.p6 else [640] + return args.weights, args.size + + +pt_file, inference_size = parse_args() + +model_name = os.path.basename(pt_file).split('.pt')[0] +wts_file = model_name + '.wts' if 'yolov6' in model_name else 'yolov6_' + model_name + '.wts' +cfg_file = model_name + '.cfg' if 'yolov6' in model_name else 'yolov6_' + model_name + '.cfg' + +model = torch.load(pt_file, map_location='cpu')['model'].float() +model.to('cpu').eval() + +for layer in model.modules(): + if layer._get_name() == 'RepVGGBlock': + layer.switch_to_deploy() + +backbones = ['EfficientRep', 'CSPBepBackbone'] +necks = ['RepBiFPANNeck', 'CSPRepBiFPANNeck', 'RepPANNeck', 'CSPRepPANNeck'] +backbones_p6 = ['EfficientRep6', 'CSPBepBackbone_P6'] +necks_p6 = ['RepBiFPANNeck6', 'CSPRepBiFPANNeck_P6', 'RepPANNeck6', 'CSPRepPANNeck_P6'] + +with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: + layers = Layers(inference_size, fw, fc) + + if model.backbone._get_name() in backbones: + layers.fc.write('\n# %s\n' % model.backbone._get_name()) + + if model.backbone._get_name() == 'EfficientRep': + block1 = layers.RepBlock + elif model.backbone._get_name() == 'CSPBepBackbone': + block1 = layers.BepC3 + + if model.backbone.ERBlock_5[2]._get_name() == 'CSPSPPF' or model.backbone.ERBlock_5[2]._get_name() == 'SimCSPSPPF': + block2 = layers.CSPSPPF + elif model.backbone.ERBlock_5[2]._get_name() == 'SPPF' or model.backbone.ERBlock_5[2]._get_name() == 'SimSPPF': + block2 = layers.SPPF + else: + raise SystemExit('Model not supported') + + layers.BaseConv(model.backbone.stem) + layers.BaseConv(model.backbone.ERBlock_2[0]) + block1(model.backbone.ERBlock_2[1], 'backbone' if hasattr(model.backbone, 'fuse_P2') and + model.backbone.fuse_P2 else '') + layers.BaseConv(model.backbone.ERBlock_3[0]) + block1(model.backbone.ERBlock_3[1], 'backbone') + layers.BaseConv(model.backbone.ERBlock_4[0]) + block1(model.backbone.ERBlock_4[1], 'backbone') + layers.BaseConv(model.backbone.ERBlock_5[0]) + block1(model.backbone.ERBlock_5[1]) + block2(model.backbone.ERBlock_5[2]) + + elif model.backbone._get_name() in backbones_p6: + layers.fc.write('\n# %s\n' % model.backbone._get_name()) + + if model.backbone._get_name() == 'EfficientRep6': + block1 = layers.RepBlock + elif model.backbone._get_name() == 'CSPBepBackbone_P6': + block1 = layers.BepC3 + + if model.backbone.ERBlock_6[2]._get_name() == 'CSPSPPF' or model.backbone.ERBlock_6[2]._get_name() == 'SimCSPSPPF': + block2 = layers.CSPSPPF + elif model.backbone.ERBlock_6[2]._get_name() == 'SPPF' or model.backbone.ERBlock_6[2]._get_name() == 'SimSPPF': + block2 = layers.SPPF + else: + raise SystemExit('Model not supported') + + layers.BaseConv(model.backbone.stem) + layers.BaseConv(model.backbone.ERBlock_2[0]) + block1(model.backbone.ERBlock_2[1], 'backbone' if model.backbone._get_name() == 'CSPBepBackbone_P6' or + (hasattr(model.backbone, 'fuse_P2') and model.backbone.fuse_P2) else '') + layers.BaseConv(model.backbone.ERBlock_3[0]) + block1(model.backbone.ERBlock_3[1], 'backbone') + layers.BaseConv(model.backbone.ERBlock_4[0]) + block1(model.backbone.ERBlock_4[1], 'backbone') + layers.BaseConv(model.backbone.ERBlock_5[0]) + block1(model.backbone.ERBlock_5[1], 'backbone') + layers.BaseConv(model.backbone.ERBlock_6[0]) + block1(model.backbone.ERBlock_6[1]) + block2(model.backbone.ERBlock_6[2]) + + else: + raise SystemExit('Model not supported') + + if model.neck._get_name() in necks: + layers.fc.write('\n# %s\n' % model.neck._get_name()) + + if model.neck._get_name() == 'RepBiFPANNeck' or model.neck._get_name() == 'RepPANNeck': + block = layers.RepBlock + elif model.neck._get_name() == 'CSPRepBiFPANNeck' or model.neck._get_name() == 'CSPRepPANNeck': + block = layers.BepC3 + + layers.SimConv(model.neck.reduce_layer0, 'fpn') + if 'Bi' in model.neck._get_name(): + layers.BiFusion(model.neck.Bifusion0, 0) + else: + layers.Upsample(model.neck.upsample0) + layers.Concat(layers.backbone_outs[-2]) + block(model.neck.Rep_p4) + layers.SimConv(model.neck.reduce_layer1, 'fpn') + if 'Bi' in model.neck._get_name(): + layers.BiFusion(model.neck.Bifusion1, 1) + else: + layers.Upsample(model.neck.upsample1) + layers.Concat(layers.backbone_outs[-3]) + block(model.neck.Rep_p3, 'pan') + layers.SimConv(model.neck.downsample2) + layers.Concat(layers.fpn_feats[1]) + block(model.neck.Rep_n3, 'pan') + layers.SimConv(model.neck.downsample1) + layers.Concat(layers.fpn_feats[0]) + block(model.neck.Rep_n4, 'pan') + layers.pan_feats = layers.pan_feats[::-1] + + elif model.neck._get_name() in necks_p6: + layers.fc.write('\n# %s\n' % model.neck._get_name()) + + if model.neck._get_name() == 'RepBiFPANNeck6' or model.neck._get_name() == 'RepPANNeck6': + block = layers.RepBlock + elif model.neck._get_name() == 'CSPRepBiFPANNeck_P6' or model.neck._get_name() == 'CSPRepPANNeck_P6': + block = layers.BepC3 + + layers.SimConv(model.neck.reduce_layer0, 'fpn') + if 'Bi' in model.neck._get_name(): + layers.BiFusion(model.neck.Bifusion0, 0) + else: + layers.Upsample(model.neck.upsample0) + layers.Concat(layers.backbone_outs[-2]) + block(model.neck.Rep_p5) + layers.SimConv(model.neck.reduce_layer1, 'fpn') + if 'Bi' in model.neck._get_name(): + layers.BiFusion(model.neck.Bifusion1, 1) + else: + layers.Upsample(model.neck.upsample1) + layers.Concat(layers.backbone_outs[-3]) + block(model.neck.Rep_p4) + layers.SimConv(model.neck.reduce_layer2, 'fpn') + if 'Bi' in model.neck._get_name(): + layers.BiFusion(model.neck.Bifusion2, 2) + else: + layers.Upsample(model.neck.upsample2) + layers.Concat(layers.backbone_outs[-4]) + block(model.neck.Rep_p3, 'pan') + layers.SimConv(model.neck.downsample2) + layers.Concat(layers.fpn_feats[2]) + block(model.neck.Rep_n4, 'pan') + layers.SimConv(model.neck.downsample1) + layers.Concat(layers.fpn_feats[1]) + block(model.neck.Rep_n5, 'pan') + layers.SimConv(model.neck.downsample0) + layers.Concat(layers.fpn_feats[0]) + block(model.neck.Rep_n6, 'pan') + layers.pan_feats = layers.pan_feats[::-1] + + else: + raise SystemExit('Model not supported') + + if model.detect._get_name() == 'Detect': + layers.fc.write('\n# Detect\n') + + for i, feat in enumerate(layers.pan_feats): + idx = len(layers.pan_feats) - i - 1 + if i > 0: + layers.Route(feat) + layers.Conv(model.detect.stems[idx]) + layers.Conv(model.detect.cls_convs[idx]) + layers.Conv(model.detect.cls_preds[idx], act='sigmoid') + layers.Shuffle(reshape=[model.detect.nc, 'hw'], output='cls') + layers.Route(-4) + layers.Conv(model.detect.reg_convs[idx]) + layers.Conv(model.detect.reg_preds[idx]) + if model.detect.use_dfl: + layers.Shuffle(reshape=[4, model.detect.reg_max + 1, 'hw'], transpose2=[1, 0, 2]) + layers.SoftMax(0) + layers.Conv(model.detect.proj_conv) + layers.Shuffle(reshape=['h', 'w'], output='reg') + else: + layers.Shuffle(reshape=[4, 'hw'], output='reg') + layers.Detect('cls') + layers.Detect('reg') + + x = [] + for stride in model.detect.stride.tolist()[::-1]: + x.append(torch.zeros([1, 1, int(layers.height / stride), int(layers.width / stride)], dtype=torch.float32)) + anchor_points, stride_tensor = generate_anchors(x, model.detect.stride.flip((0,)), model.detect.grid_cell_size, + model.detect.grid_cell_offset, device='cpu', is_eval=True, mode='af') + layers.get_anchors(anchor_points.reshape([-1]), stride_tensor) + + else: + raise SystemExit('Model not supported') + +os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yoloV7.py b/utils/gen_wts_yoloV7.py index 2f7ad45..afbcc3c 100644 --- a/utils/gen_wts_yoloV7.py +++ b/utils/gen_wts_yoloV7.py @@ -179,7 +179,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[convolutional]\n' + b + @@ -299,10 +299,13 @@ def parse_args(): parser = argparse.ArgumentParser(description='PyTorch YOLOv7 conversion') parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') parser.add_argument( - '-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') + parser.add_argument("--p6", action="store_true", help="P6 model") args = parser.parse_args() if not os.path.isfile(args.weights): raise SystemExit('Invalid weights file') + if not args.size: + args.size = [1280] if args.p6 else [640] return args.weights, args.size diff --git a/utils/gen_wts_yoloV8.py b/utils/gen_wts_yoloV8.py index 4be6b1f..b583b91 100644 --- a/utils/gen_wts_yoloV8.py +++ b/utils/gen_wts_yoloV8.py @@ -112,7 +112,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[convolutional]\n' + b + @@ -145,7 +145,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[c2f]\n' + 'n=%d\n' % n + @@ -199,19 +199,17 @@ class Layers(object): self.fc.write('\n[upsample]\n' + 'stride=%d\n' % stride) - def shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None): + def shuffle(self, reshape=None, transpose1=None, transpose2=None): self.blocks[self.current] += 1 r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - f = 'from=%d\n' % route if route is not None else '' self.fc.write('\n[shuffle]\n' + r + t1 + - t2 + - f) + t2) def yolo(self, child): self.blocks[self.current] += 1 diff --git a/utils/gen_wts_yolox.py b/utils/gen_wts_yolox.py index c47ff92..179cef1 100644 --- a/utils/gen_wts_yolox.py +++ b/utils/gen_wts_yolox.py @@ -170,7 +170,7 @@ class Layers(object): b = 'batch_normalize=1\n' if bn is True else '' g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None and bn is False else '' + w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' self.fc.write('\n[convolutional]\n' + b + @@ -222,19 +222,17 @@ class Layers(object): self.fc.write('\n[upsample]\n' + 'stride=%d\n' % stride) - def shuffle(self, reshape=None, transpose1=None, transpose2=None, route=None): + def shuffle(self, reshape=None, transpose1=None, transpose2=None): self.blocks[self.current] += 1 r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - f = 'from=%d\n' % route if route is not None else '' self.fc.write('\n[shuffle]\n' + r + t1 + - t2 + - f) + t2) def yolo(self, strides): self.blocks[self.current] += 1 @@ -350,14 +348,14 @@ with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: layers.BaseConv(model.head.stems[idx]) layers.Conv(model.head.cls_convs[idx][0]) layers.Conv(model.head.cls_convs[idx][1]) - layers.BaseConv(model.head.cls_preds[idx], act='logistic') + layers.BaseConv(model.head.cls_preds[idx], act='sigmoid') if dw: layers.Route(-6) else: layers.Route(-4) layers.Conv(model.head.reg_convs[idx][0]) layers.Conv(model.head.reg_convs[idx][1]) - layers.BaseConv(model.head.obj_preds[idx], act='logistic') + layers.BaseConv(model.head.obj_preds[idx], act='sigmoid') layers.Route(-2) layers.BaseConv(model.head.reg_preds[idx]) if dw: