DeepStream 6.0 update
This commit is contained in:
192
YOLOv5-3.X.md
192
YOLOv5-3.X.md
@@ -1,192 +0,0 @@
|
|||||||
# YOLOv5
|
|
||||||
NVIDIA DeepStream SDK 5.1 configuration for YOLOv5 3.0/3.1 models
|
|
||||||
|
|
||||||
Thanks [DanaHan](https://github.com/DanaHan/Yolov5-in-Deepstream-5.0), [wang-xinyu](https://github.com/wang-xinyu/tensorrtx) and [Ultralytics](https://github.com/ultralytics/yolov5)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* [Requirements](#requirements)
|
|
||||||
* [Convert PyTorch model to wts file](#convert-pytorch-model-to-wts-file)
|
|
||||||
* [Convert wts file to TensorRT model](#convert-wts-file-to-tensorrt-model)
|
|
||||||
* [Compile nvdsinfer_custom_impl_Yolo](#compile-nvdsinfer_custom_impl_yolo)
|
|
||||||
* [Testing model](#testing-model)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Requirements
|
|
||||||
* [TensorRTX](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
|
|
||||||
|
|
||||||
* [Ultralytics](https://github.com/ultralytics/yolov5/blob/v3.1/requirements.txt)
|
|
||||||
|
|
||||||
* Matplotlib (for Jetson plataform)
|
|
||||||
```
|
|
||||||
sudo apt-get install python3-matplotlib
|
|
||||||
```
|
|
||||||
|
|
||||||
* PyTorch (for Jetson plataform)
|
|
||||||
```
|
|
||||||
wget https://nvidia.box.com/shared/static/p57jwntv436lfrd78inwl7iml6p13fzh.whl -O torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
|
|
||||||
pip3 install Cython
|
|
||||||
pip3 install numpy torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
```
|
|
||||||
|
|
||||||
* TorchVision (for Jetson platform)
|
|
||||||
```
|
|
||||||
sudo apt-get install libjpeg-dev zlib1g-dev libpython3-dev libavcodec-dev libavformat-dev libswscale-dev
|
|
||||||
git clone --branch v0.9.0 https://github.com/pytorch/vision torchvision
|
|
||||||
cd torchvision
|
|
||||||
export BUILD_VERSION=0.9.0
|
|
||||||
python3 setup.py install --user
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert PyTorch model to wts file
|
|
||||||
1. Download repositories
|
|
||||||
```
|
|
||||||
git clone https://github.com/DanaHan/Yolov5-in-Deepstream-5.0.git yolov5converter
|
|
||||||
git clone -b yolov5-v3.1 https://github.com/wang-xinyu/tensorrtx.git
|
|
||||||
git clone -b v3.1 https://github.com/ultralytics/yolov5.git
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Download latest YoloV5 (YOLOv5s, YOLOv5m, YOLOv5l or YOLOv5x) weights to yolov5/weights directory (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
wget https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt -P yolov5/weights/
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Copy gen_wts.py file (from tensorrtx/yolov5 folder) to yolov5 (ultralytics) folder
|
|
||||||
```
|
|
||||||
cp tensorrtx/yolov5/gen_wts.py yolov5/gen_wts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Generate wts file
|
|
||||||
```
|
|
||||||
cd yolov5
|
|
||||||
python3 gen_wts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
yolov5s.wts file will be generated in yolov5 folder
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: if you want to generate wts file to another YOLOv5 model (YOLOv5m, YOLOv5l or YOLOv5x), edit get_wts.py file changing yolov5s to your model name
|
|
||||||
```
|
|
||||||
model = torch.load('weights/yolov5s.pt', map_location=device)['model'].float() # load to FP32
|
|
||||||
model.to(device).eval()
|
|
||||||
|
|
||||||
f = open('yolov5s.wts', 'w')
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert wts file to TensorRT model
|
|
||||||
1. Replace yololayer files from tensorrtx/yolov5 folder to yololayer and hardswish files from yolov5converter
|
|
||||||
```
|
|
||||||
mv yolov5converter/yololayer.cu tensorrtx/yolov5/yololayer.cu
|
|
||||||
mv yolov5converter/yololayer.h tensorrtx/yolov5/yololayer.h
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Move generated yolov5s.wts file to tensorrtx/yolov5 folder (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
cp yolov5/yolov5s.wts tensorrtx/yolov5/yolov5s.wts
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Build tensorrtx/yolov5
|
|
||||||
```
|
|
||||||
cd tensorrtx/yolov5
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Convert to TensorRT model (yolov5s.engine file will be generated in tensorrtx/yolov5/build folder)
|
|
||||||
```
|
|
||||||
sudo ./yolov5 -s
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Create a custom yolo folder and copy generated files (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
mkdir /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
cp yolov5s.engine /opt/nvidia/deepstream/deepstream-5.1/sources/yolo/yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: by default, yolov5 script generate model with batch size = 1, FP16 mode and s model.
|
|
||||||
```
|
|
||||||
#define USE_FP16 // comment out this if want to use FP32
|
|
||||||
#define DEVICE 0 // GPU id
|
|
||||||
#define NMS_THRESH 0.4
|
|
||||||
#define CONF_THRESH 0.5
|
|
||||||
#define BATCH_SIZE 1
|
|
||||||
|
|
||||||
#define NET s // s m l x
|
|
||||||
```
|
|
||||||
Edit yolov5.cpp file before compile if you want to change this parameters.
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Compile nvdsinfer_custom_impl_Yolo
|
|
||||||
1. Run command
|
|
||||||
```
|
|
||||||
sudo chmod -R 777 /opt/nvidia/deepstream/deepstream-5.1/sources/
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Donwload [my external/yolov5 folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-3.X) and move files to created yolo folder
|
|
||||||
|
|
||||||
3. Compile lib
|
|
||||||
|
|
||||||
* x86 platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
* Jetson platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Testing model
|
|
||||||
Use my edited [deepstream_app_config.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-3.X/deepstream_app_config.txt) and [config_infer_primary.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-3.X/config_infer_primary.txt) files available in [my external/yolov5-3.X folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-3.X)
|
|
||||||
|
|
||||||
Run command
|
|
||||||
```
|
|
||||||
deepstream-app -c deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: based on selected model, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
For example, if you using YOLOv5x
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5x.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
To change NMS_THRESH, edit nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp file and recompile
|
|
||||||
|
|
||||||
```
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
```
|
|
||||||
|
|
||||||
To change CONF_THRESH, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
```
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
```
|
|
||||||
183
YOLOv5-4.0.md
183
YOLOv5-4.0.md
@@ -1,183 +0,0 @@
|
|||||||
# YOLOv5
|
|
||||||
NVIDIA DeepStream SDK 5.1 configuration for YOLOv5 4.0 models
|
|
||||||
|
|
||||||
Thanks [wang-xinyu](https://github.com/wang-xinyu/tensorrtx) and [Ultralytics](https://github.com/ultralytics/yolov5)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* [Requirements](#requirements)
|
|
||||||
* [Convert PyTorch model to wts file](#convert-pytorch-model-to-wts-file)
|
|
||||||
* [Convert wts file to TensorRT model](#convert-wts-file-to-tensorrt-model)
|
|
||||||
* [Compile nvdsinfer_custom_impl_Yolo](#compile-nvdsinfer_custom_impl_yolo)
|
|
||||||
* [Testing model](#testing-model)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Requirements
|
|
||||||
* [TensorRTX](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
|
|
||||||
|
|
||||||
* [Ultralytics](https://github.com/ultralytics/yolov5/blob/v4.0/requirements.txt)
|
|
||||||
|
|
||||||
* Matplotlib (for Jetson plataform)
|
|
||||||
```
|
|
||||||
sudo apt-get install python3-matplotlib
|
|
||||||
```
|
|
||||||
|
|
||||||
* PyTorch (for Jetson plataform)
|
|
||||||
```
|
|
||||||
wget https://nvidia.box.com/shared/static/p57jwntv436lfrd78inwl7iml6p13fzh.whl -O torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
|
|
||||||
pip3 install Cython
|
|
||||||
pip3 install numpy torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
```
|
|
||||||
|
|
||||||
* TorchVision (for Jetson platform)
|
|
||||||
```
|
|
||||||
sudo apt-get install libjpeg-dev zlib1g-dev libpython3-dev libavcodec-dev libavformat-dev libswscale-dev
|
|
||||||
git clone --branch v0.9.0 https://github.com/pytorch/vision torchvision
|
|
||||||
cd torchvision
|
|
||||||
export BUILD_VERSION=0.9.0
|
|
||||||
python3 setup.py install --user
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert PyTorch model to wts file
|
|
||||||
1. Download repositories
|
|
||||||
```
|
|
||||||
git clone -b yolov5-v4.0 https://github.com/wang-xinyu/tensorrtx.git
|
|
||||||
git clone -b v4.0 https://github.com/ultralytics/yolov5.git
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Download latest YoloV5 (YOLOv5s, YOLOv5m, YOLOv5l or YOLOv5x) weights to yolov5/weights directory (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
wget https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt -P yolov5/weights
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Copy gen_wts.py file (from tensorrtx/yolov5 folder) to yolov5 (ultralytics) folder
|
|
||||||
```
|
|
||||||
cp tensorrtx/yolov5/gen_wts.py yolov5/gen_wts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Generate wts file
|
|
||||||
```
|
|
||||||
cd yolov5
|
|
||||||
python3 gen_wts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
yolov5s.wts file will be generated in yolov5 folder
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: if you want to generate wts file to another YOLOv5 model (YOLOv5m, YOLOv5l or YOLOv5x), edit get_wts.py file changing yolov5s to your model name
|
|
||||||
```
|
|
||||||
model = torch.load('weights/yolov5s.pt', map_location=device)['model'].float() # load to FP32
|
|
||||||
model.to(device).eval()
|
|
||||||
|
|
||||||
f = open('yolov5s.wts', 'w')
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert wts file to TensorRT model
|
|
||||||
1. Build tensorrtx/yolov5
|
|
||||||
```
|
|
||||||
cd tensorrtx/yolov5
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Move generated yolov5s.wts file to tensorrtx/yolov5 folder (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
cp yolov5/yolov5s.wts tensorrtx/yolov5/build/yolov5s.wts
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Convert to TensorRT model (yolov5s.engine file will be generated in tensorrtx/yolov5/build folder)
|
|
||||||
```
|
|
||||||
sudo ./yolov5 -s yolov5s.wts yolov5s.engine s
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Create a custom yolo folder and copy generated file (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
mkdir /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
cp yolov5s.engine /opt/nvidia/deepstream/deepstream-5.1/sources/yolo/yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: by default, yolov5 script generate model with batch size = 1 and FP16 mode.
|
|
||||||
```
|
|
||||||
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
|
|
||||||
#define DEVICE 0 // GPU id
|
|
||||||
#define NMS_THRESH 0.4
|
|
||||||
#define CONF_THRESH 0.5
|
|
||||||
#define BATCH_SIZE 1
|
|
||||||
```
|
|
||||||
Edit yolov5.cpp file before compile if you want to change this parameters.
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Compile nvdsinfer_custom_impl_Yolo
|
|
||||||
1. Run command
|
|
||||||
```
|
|
||||||
sudo chmod -R 777 /opt/nvidia/deepstream/deepstream-5.1/sources/
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Donwload [my external/yolov5-4.0 folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-4.0) and move files to created yolo folder
|
|
||||||
|
|
||||||
3. Compile lib
|
|
||||||
|
|
||||||
* x86 platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
* Jetson platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Testing model
|
|
||||||
Use my edited [deepstream_app_config.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-4.0/deepstream_app_config.txt) and [config_infer_primary.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-4.0/config_infer_primary.txt) files available in [my external/yolov5-4.0 folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-4.0)
|
|
||||||
|
|
||||||
Run command
|
|
||||||
```
|
|
||||||
deepstream-app -c deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: based on selected model, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
For example, if you using YOLOv5x
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5x.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
To change NMS_THRESH, edit nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp file and recompile
|
|
||||||
|
|
||||||
```
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
```
|
|
||||||
|
|
||||||
To change CONF_THRESH, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
```
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
```
|
|
||||||
173
YOLOv5-5.0.md
173
YOLOv5-5.0.md
@@ -1,173 +0,0 @@
|
|||||||
# YOLOv5
|
|
||||||
NVIDIA DeepStream SDK 5.1 configuration for YOLOv5 5.0 models
|
|
||||||
|
|
||||||
Thanks [wang-xinyu](https://github.com/wang-xinyu/tensorrtx) and [Ultralytics](https://github.com/ultralytics/yolov5)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* [Requirements](#requirements)
|
|
||||||
* [Convert PyTorch model to wts file](#convert-pytorch-model-to-wts-file)
|
|
||||||
* [Convert wts file to TensorRT model](#convert-wts-file-to-tensorrt-model)
|
|
||||||
* [Compile nvdsinfer_custom_impl_Yolo](#compile-nvdsinfer_custom_impl_yolo)
|
|
||||||
* [Testing model](#testing-model)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Requirements
|
|
||||||
* [TensorRTX](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
|
|
||||||
|
|
||||||
* [Ultralytics](https://github.com/ultralytics/yolov5/blob/master/requirements.txt)
|
|
||||||
|
|
||||||
* Matplotlib (for Jetson plataform)
|
|
||||||
```
|
|
||||||
sudo apt-get install python3-matplotlib
|
|
||||||
```
|
|
||||||
|
|
||||||
* PyTorch (for Jetson plataform)
|
|
||||||
```
|
|
||||||
wget https://nvidia.box.com/shared/static/p57jwntv436lfrd78inwl7iml6p13fzh.whl -O torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
|
|
||||||
pip3 install Cython
|
|
||||||
pip3 install numpy torch-1.8.0-cp36-cp36m-linux_aarch64.whl
|
|
||||||
```
|
|
||||||
|
|
||||||
* TorchVision (for Jetson platform)
|
|
||||||
```
|
|
||||||
sudo apt-get install libjpeg-dev zlib1g-dev libpython3-dev libavcodec-dev libavformat-dev libswscale-dev
|
|
||||||
git clone --branch v0.9.0 https://github.com/pytorch/vision torchvision
|
|
||||||
cd torchvision
|
|
||||||
export BUILD_VERSION=0.9.0
|
|
||||||
python3 setup.py install --user
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert PyTorch model to wts file
|
|
||||||
1. Download repositories
|
|
||||||
```
|
|
||||||
git clone https://github.com/wang-xinyu/tensorrtx.git
|
|
||||||
git clone https://github.com/ultralytics/yolov5.git
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Download latest YoloV5 (YOLOv5s, YOLOv5m, YOLOv5l or YOLOv5x) weights to yolov5 folder (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
wget https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5s.pt -P yolov5/
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Copy gen_wts.py file (from tensorrtx/yolov5 folder) to yolov5 (ultralytics) folder
|
|
||||||
```
|
|
||||||
cp tensorrtx/yolov5/gen_wts.py yolov5/gen_wts.py
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Generate wts file
|
|
||||||
```
|
|
||||||
cd yolov5
|
|
||||||
python3 gen_wts.py yolov5s.pt
|
|
||||||
```
|
|
||||||
|
|
||||||
yolov5s.wts file will be generated in yolov5 folder
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Convert wts file to TensorRT model
|
|
||||||
1. Build tensorrtx/yolov5
|
|
||||||
```
|
|
||||||
cd tensorrtx/yolov5
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Move generated yolov5s.wts file to tensorrtx/yolov5 folder (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
cp yolov5/yolov5s.wts tensorrtx/yolov5/build/yolov5s.wts
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Convert to TensorRT model (yolov5s.engine file will be generated in tensorrtx/yolov5/build folder)
|
|
||||||
```
|
|
||||||
sudo ./yolov5 -s yolov5s.wts yolov5s.engine s
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Create a custom yolo folder and copy generated file (example for YOLOv5s)
|
|
||||||
```
|
|
||||||
mkdir /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
cp yolov5s.engine /opt/nvidia/deepstream/deepstream-5.1/sources/yolo/yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: by default, yolov5 script generate model with batch size = 1 and FP16 mode.
|
|
||||||
```
|
|
||||||
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
|
|
||||||
#define DEVICE 0 // GPU id
|
|
||||||
#define NMS_THRESH 0.4
|
|
||||||
#define CONF_THRESH 0.5
|
|
||||||
#define BATCH_SIZE 1
|
|
||||||
```
|
|
||||||
Edit yolov5.cpp file before compile if you want to change this parameters.
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Compile nvdsinfer_custom_impl_Yolo
|
|
||||||
1. Run command
|
|
||||||
```
|
|
||||||
sudo chmod -R 777 /opt/nvidia/deepstream/deepstream-5.1/sources/
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Donwload [my external/yolov5-5.0 folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-5.0) and move files to created yolo folder
|
|
||||||
|
|
||||||
3. Compile lib
|
|
||||||
|
|
||||||
* x86 platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
* Jetson platform
|
|
||||||
```
|
|
||||||
cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo
|
|
||||||
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Testing model
|
|
||||||
Use my edited [deepstream_app_config.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-5.0/deepstream_app_config.txt) and [config_infer_primary.txt](https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/master/external/yolov5-5.0/config_infer_primary.txt) files available in [my external/yolov5-5.0 folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/external/yolov5-5.0)
|
|
||||||
|
|
||||||
Run command
|
|
||||||
```
|
|
||||||
deepstream-app -c deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
<br />
|
|
||||||
|
|
||||||
Note: based on selected model, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
For example, if you using YOLOv5x
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=yolov5x.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
To change NMS_THRESH, edit nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp file and recompile
|
|
||||||
|
|
||||||
```
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
```
|
|
||||||
|
|
||||||
To change CONF_THRESH, edit config_infer_primary.txt file
|
|
||||||
|
|
||||||
```
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
```
|
|
||||||
312
customModels.md
312
customModels.md
@@ -1,312 +0,0 @@
|
|||||||
# Editing default model to your custom model
|
|
||||||
How to edit DeepStream files to your custom model
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* [Requirements](#requirements)
|
|
||||||
* [Editing default model](#editing-default-model)
|
|
||||||
* [Compiling edited model](#compiling-edited-model)
|
|
||||||
* [Understanding and editing deepstream_app_config](#understanding-and-editing-deepstream_app_config)
|
|
||||||
* [Understanding and editing config_infer_primary](#understanding-and-editing-config_infer_primary)
|
|
||||||
* [Testing model](#testing-model)
|
|
||||||
* [Custom functions in your model](#custom-functions-in-your-model)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Requirements
|
|
||||||
* [NVIDIA DeepStream SDK 5.1](https://developer.nvidia.com/deepstream-sdk)
|
|
||||||
* [DeepStream-Yolo Native](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/native)
|
|
||||||
* [Pre-treined YOLO model](https://github.com/AlexeyAB/darknet)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Editing default model
|
|
||||||
1. Run command
|
|
||||||
```
|
|
||||||
sudo chmod -R 777 /opt/nvidia/deepstream/deepstream-5.1/sources/
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Download [my native folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/native), rename to yolo and move to your deepstream/sources folder.
|
|
||||||
3. Copy and remane your obj.names file to labels.txt to deepstream/sources/yolo directory
|
|
||||||
4. Copy your yolo.cfg and yolo.weights files to deepstream/sources/yolo directory.
|
|
||||||
5. Edit config_infer_primary.txt for your model
|
|
||||||
```
|
|
||||||
[property]
|
|
||||||
...
|
|
||||||
# CFG
|
|
||||||
custom-network-config=yolo.cfg
|
|
||||||
# Weights
|
|
||||||
model-file=yolo.weights
|
|
||||||
# Model labels file
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: if you want to use YOLOv2 or YOLOv2-Tiny models, change deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary_yoloV2.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: config_infer_primary.txt uses cluster-mode=4 and NMS = 0.45 (via code) when beta_nms isn't available (when beta_nms is available, NMS = beta_nms), while config_infer_primary_yoloV2.txt uses cluster-mode=2 and nms-iou-threshold=0.45 to set NMS.
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Compiling edited model
|
|
||||||
1. Check your CUDA version (nvcc --version)
|
|
||||||
2. Go to deepstream/sources/yolo directory
|
|
||||||
3. Type command to compile:
|
|
||||||
|
|
||||||
* x86 platform
|
|
||||||
```
|
|
||||||
CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
* Jetson platform
|
|
||||||
```
|
|
||||||
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Understanding and editing deepstream_app_config
|
|
||||||
To understand and edit deepstream_app_config.txt file, read the [DeepStream SDK Development Guide - Configuration Groups](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_ref_app_deepstream.html#configuration-groups)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit tiled-display
|
|
||||||
|
|
||||||
```
|
|
||||||
[tiled-display]
|
|
||||||
enable=1
|
|
||||||
# If you have 1 stream use 1/1 (rows/columns), if you have 4 streams use 2/2 or 4/1 or 1/4 (rows/columns)
|
|
||||||
rows=1
|
|
||||||
columns=1
|
|
||||||
# Resolution of tiled display
|
|
||||||
width=1280
|
|
||||||
height=720
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit source
|
|
||||||
|
|
||||||
Example for 1 source:
|
|
||||||
```
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
# 1=Camera (V4L2), 2=URI, 3=MultiURI, 4=RTSP, 5=Camera (CSI; Jetson only)
|
|
||||||
type=3
|
|
||||||
# Stream URL
|
|
||||||
uri=rtsp://192.168.1.2/Streaming/Channels/101/httppreview
|
|
||||||
# Number of sources copy (if > 1, you need edit rows/columns in tiled-display section and batch-size in streammux section and config_infer_primary.txt; need type=3 for more than 1 source)
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for 1 duplcated source:
|
|
||||||
```
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=rtsp://192.168.1.2/Streaming/Channels/101/httppreview
|
|
||||||
num-sources=2
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for 2 sources:
|
|
||||||
```
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=rtsp://192.168.1.2/Streaming/Channels/101/httppreview
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
|
|
||||||
[source1]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=rtsp://192.168.1.3/Streaming/Channels/101/httppreview
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit sink
|
|
||||||
|
|
||||||
Example for 1 source or 1 duplicated source:
|
|
||||||
```
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
# 1=Fakesink, 2=EGL (nveglglessink), 3=Filesink, 4=RTSP, 5=Overlay (Jetson only)
|
|
||||||
type=2
|
|
||||||
# Indicates how fast the stream is to be rendered (0=As fast as possible, 1=Synchronously)
|
|
||||||
sync=0
|
|
||||||
# The ID of the source whose buffers this sink must use
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for 2 sources:
|
|
||||||
```
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[sink1]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=1
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit streammux
|
|
||||||
|
|
||||||
Example for 1 source:
|
|
||||||
```
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
# Boolean property to inform muxer that sources are live
|
|
||||||
live-source=1
|
|
||||||
# Number of sources
|
|
||||||
batch-size=1
|
|
||||||
# Time out in usec, to wait after the first buffer is available to push the batch even if the complete batch is not formed
|
|
||||||
batched-push-timeout=40000
|
|
||||||
# Resolution of streammux
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for 1 duplicated source or 2 sources:
|
|
||||||
```
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
live-source=0
|
|
||||||
batch-size=2
|
|
||||||
batched-push-timeout=40000
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit primary-gie
|
|
||||||
```
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
* You can remove [tracker] section, if you don't use it.
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Understanding and editing config_infer_primary
|
|
||||||
To understand and edit config_infer_primary.txt file, read the [NVIDIA DeepStream Plugin Manual - Gst-nvinfer File Configuration Specifications](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_plugin_gst-nvinfer.html#gst-nvinfer-file-configuration-specifications)
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit model-color-format accoding number of channels in yolo.cfg (1=GRAYSCALE, 3=RGB)
|
|
||||||
|
|
||||||
```
|
|
||||||
# 0=RGB, 1=BGR, 2=GRAYSCALE
|
|
||||||
model-color-format=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit model-engine-file (example for batch-size=1 and network-mode=2)
|
|
||||||
|
|
||||||
```
|
|
||||||
model-engine-file=model_b1_gpu0_fp16.engine
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit batch-size
|
|
||||||
|
|
||||||
```
|
|
||||||
# Number of sources
|
|
||||||
batch-size=1
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit network-mode
|
|
||||||
|
|
||||||
```
|
|
||||||
# 0=FP32, 1=INT8, 2=FP16
|
|
||||||
network-mode=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit num-detected-classes according number of classes in yolo.cfg
|
|
||||||
|
|
||||||
```
|
|
||||||
num-detected-classes=80
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit network-type
|
|
||||||
|
|
||||||
```
|
|
||||||
# 0=Detector, 1=Classifier, 2=Segmentation
|
|
||||||
network-type=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Add/edit interval (FPS increase if > 0)
|
|
||||||
|
|
||||||
```
|
|
||||||
# Interval of detection
|
|
||||||
interval=0
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Change pre-cluster-threshold (optional)
|
|
||||||
|
|
||||||
```
|
|
||||||
[class-attrs-all]
|
|
||||||
# CONF_THRESH
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Testing model
|
|
||||||
|
|
||||||
To run your custom YOLO model, use command
|
|
||||||
```
|
|
||||||
deepstream-app -c deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
@@ -14,7 +14,7 @@ nvbuf-memory-type=0
|
|||||||
[source0]
|
[source0]
|
||||||
enable=1
|
enable=1
|
||||||
type=3
|
type=3
|
||||||
uri=file:///opt/nvidia/deepstream/deepstream-5.1/samples/streams/sample_1080p_h264.mp4
|
uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4
|
||||||
num-sources=1
|
num-sources=1
|
||||||
gpu-id=0
|
gpu-id=0
|
||||||
cudadec-memtype=0
|
cudadec-memtype=0
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
[application]
|
|
||||||
enable-perf-measurement=1
|
|
||||||
perf-measurement-interval-sec=5
|
|
||||||
|
|
||||||
[tiled-display]
|
|
||||||
enable=1
|
|
||||||
rows=1
|
|
||||||
columns=1
|
|
||||||
width=1280
|
|
||||||
height=720
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=file:///opt/nvidia/deepstream/deepstream-5.1/samples/streams/sample_1080p_h264.mp4
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[osd]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
border-width=1
|
|
||||||
text-size=15
|
|
||||||
text-color=1;1;1;1;
|
|
||||||
text-bg-color=0.3;0.3;0.3;1
|
|
||||||
font=Serif
|
|
||||||
show-clock=0
|
|
||||||
clock-x-offset=800
|
|
||||||
clock-y-offset=820
|
|
||||||
clock-text-size=12
|
|
||||||
clock-color=1;0;0;0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
live-source=0
|
|
||||||
batch-size=1
|
|
||||||
batched-push-timeout=40000
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=pgie/config_infer_primary.txt
|
|
||||||
|
|
||||||
[secondary-gie0]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=2
|
|
||||||
operate-on-gie-id=1
|
|
||||||
#operate-on-class-ids=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=sgie1/config_infer_secondary1.txt
|
|
||||||
|
|
||||||
[tests]
|
|
||||||
file-loop=0
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
[property]
|
|
||||||
gpu-id=0
|
|
||||||
net-scale-factor=0.0039215697906911373
|
|
||||||
model-color-format=0
|
|
||||||
custom-network-config=pgie/yolo.cfg
|
|
||||||
model-file=yolo.weights
|
|
||||||
model-engine-file=model_b1_gpu0_fp32.engine
|
|
||||||
#int8-calib-file=calib.table
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
batch-size=1
|
|
||||||
network-mode=0
|
|
||||||
num-detected-classes=2
|
|
||||||
interval=0
|
|
||||||
gie-unique-id=1
|
|
||||||
process-mode=1
|
|
||||||
network-type=0
|
|
||||||
cluster-mode=4
|
|
||||||
maintain-aspect-ratio=0
|
|
||||||
parse-bbox-func-name=NvDsInferParseYolo
|
|
||||||
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
engine-create-func-name=NvDsInferYoloCudaEngineGet
|
|
||||||
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
# copy of this software and associated documentation files (the "Software"),
|
|
||||||
# to deal in the Software without restriction, including without limitation
|
|
||||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
# and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
# Software is furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
# DEALINGS IN THE SOFTWARE.
|
|
||||||
#
|
|
||||||
# Edited by Marcos Luciano
|
|
||||||
# https://www.github.com/marcoslucianops
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
CUDA_VER?=
|
|
||||||
ifeq ($(CUDA_VER),)
|
|
||||||
$(error "CUDA_VER is not set")
|
|
||||||
endif
|
|
||||||
|
|
||||||
OPENCV?=
|
|
||||||
ifeq ($(OPENCV),)
|
|
||||||
OPENCV=0
|
|
||||||
endif
|
|
||||||
|
|
||||||
CC:= g++
|
|
||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
|
||||||
CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
|
||||||
COMMON= -DOPENCV
|
|
||||||
CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv)
|
|
||||||
LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv)
|
|
||||||
endif
|
|
||||||
|
|
||||||
LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
|
||||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
|
||||||
|
|
||||||
INCS:= $(wildcard *.h)
|
|
||||||
SRCFILES:= nvdsinfer_yolo_engine.cpp \
|
|
||||||
nvdsparsebbox_Yolo.cpp \
|
|
||||||
yoloPlugins.cpp \
|
|
||||||
layers/convolutional_layer.cpp \
|
|
||||||
layers/dropout_layer.cpp \
|
|
||||||
layers/shortcut_layer.cpp \
|
|
||||||
layers/route_layer.cpp \
|
|
||||||
layers/upsample_layer.cpp \
|
|
||||||
layers/maxpool_layer.cpp \
|
|
||||||
layers/activation_layer.cpp \
|
|
||||||
utils.cpp \
|
|
||||||
yolo.cpp \
|
|
||||||
yoloForward.cu
|
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
|
||||||
SRCFILES+= calibrator.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
|
|
||||||
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
|
|
||||||
|
|
||||||
all: $(TARGET_LIB)
|
|
||||||
|
|
||||||
%.o: %.cpp $(INCS) Makefile
|
|
||||||
$(CC) -c $(COMMON) -o $@ $(CFLAGS) $<
|
|
||||||
|
|
||||||
%.o: %.cu $(INCS) Makefile
|
|
||||||
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
|
|
||||||
|
|
||||||
$(TARGET_LIB) : $(TARGET_OBJS)
|
|
||||||
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TARGET_LIB)
|
|
||||||
rm -rf $(TARGET_OBJS)
|
|
||||||
@@ -1,209 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include "NvInferPlugin.h"
|
|
||||||
#include <cassert>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
int kNUM_CLASSES;
|
|
||||||
float kBETA_NMS;
|
|
||||||
std::vector<float> kANCHORS;
|
|
||||||
std::vector<std::vector<int>> kMASK;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
template <typename T>
|
|
||||||
void write(char*& buffer, const T& val)
|
|
||||||
{
|
|
||||||
*reinterpret_cast<T*>(buffer) = val;
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void read(const char*& buffer, T& val)
|
|
||||||
{
|
|
||||||
val = *reinterpret_cast<const T*>(buffer);
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer (
|
|
||||||
const void* input, void* output, const uint& batchSize,
|
|
||||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
|
||||||
const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType);
|
|
||||||
|
|
||||||
YoloLayer::YoloLayer (const void* data, size_t length)
|
|
||||||
{
|
|
||||||
const char *d = static_cast<const char*>(data);
|
|
||||||
read(d, m_NumBoxes);
|
|
||||||
read(d, m_NumClasses);
|
|
||||||
read(d, m_GridSizeX);
|
|
||||||
read(d, m_GridSizeY);
|
|
||||||
read(d, m_OutputSize);
|
|
||||||
|
|
||||||
read(d, m_type);
|
|
||||||
read(d, m_new_coords);
|
|
||||||
read(d, m_scale_x_y);
|
|
||||||
read(d, m_beta_nms);
|
|
||||||
uint anchorsSize;
|
|
||||||
read(d, anchorsSize);
|
|
||||||
for (uint i = 0; i < anchorsSize; i++) {
|
|
||||||
float result;
|
|
||||||
read(d, result);
|
|
||||||
m_Anchors.push_back(result);
|
|
||||||
}
|
|
||||||
uint maskSize;
|
|
||||||
read(d, maskSize);
|
|
||||||
for (uint i = 0; i < maskSize; i++) {
|
|
||||||
uint nMask;
|
|
||||||
read(d, nMask);
|
|
||||||
std::vector<int> pMask;
|
|
||||||
for (uint f = 0; f < nMask; f++) {
|
|
||||||
int result;
|
|
||||||
read(d, result);
|
|
||||||
pMask.push_back(result);
|
|
||||||
}
|
|
||||||
m_Mask.push_back(pMask);
|
|
||||||
}
|
|
||||||
kNUM_CLASSES = m_NumClasses;
|
|
||||||
kBETA_NMS = m_beta_nms;
|
|
||||||
kANCHORS = m_Anchors;
|
|
||||||
kMASK = m_Mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
YoloLayer::YoloLayer (
|
|
||||||
const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector<float> anchors, std::vector<std::vector<int>> mask) :
|
|
||||||
m_NumBoxes(numBoxes),
|
|
||||||
m_NumClasses(numClasses),
|
|
||||||
m_GridSizeX(gridSizeX),
|
|
||||||
m_GridSizeY(gridSizeY),
|
|
||||||
m_type(model_type),
|
|
||||||
m_new_coords(new_coords),
|
|
||||||
m_scale_x_y(scale_x_y),
|
|
||||||
m_beta_nms(beta_nms),
|
|
||||||
m_Anchors(anchors),
|
|
||||||
m_Mask(mask)
|
|
||||||
{
|
|
||||||
assert(m_NumBoxes > 0);
|
|
||||||
assert(m_NumClasses > 0);
|
|
||||||
assert(m_GridSizeX > 0);
|
|
||||||
assert(m_GridSizeY > 0);
|
|
||||||
m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBoxes * (4 + 1 + m_NumClasses));
|
|
||||||
};
|
|
||||||
|
|
||||||
nvinfer1::Dims
|
|
||||||
YoloLayer::getOutputDimensions(
|
|
||||||
int index, const nvinfer1::Dims* inputs, int nbInputDims)
|
|
||||||
{
|
|
||||||
assert(index == 0);
|
|
||||||
assert(nbInputDims == 1);
|
|
||||||
return inputs[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
bool YoloLayer::supportsFormat (
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const {
|
|
||||||
return (type == nvinfer1::DataType::kFLOAT &&
|
|
||||||
format == nvinfer1::PluginFormat::kNCHW);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
YoloLayer::configureWithFormat (
|
|
||||||
const nvinfer1::Dims* inputDims, int nbInputs,
|
|
||||||
const nvinfer1::Dims* outputDims, int nbOutputs,
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
|
|
||||||
{
|
|
||||||
assert(nbInputs == 1);
|
|
||||||
assert (format == nvinfer1::PluginFormat::kNCHW);
|
|
||||||
assert(inputDims != nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
int YoloLayer::enqueue(
|
|
||||||
int batchSize, const void* const* inputs, void** outputs, void* workspace,
|
|
||||||
cudaStream_t stream)
|
|
||||||
{
|
|
||||||
CHECK(cudaYoloLayer(
|
|
||||||
inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes,
|
|
||||||
m_OutputSize, stream, m_new_coords, m_scale_x_y, m_type));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t YoloLayer::getSerializationSize() const
|
|
||||||
{
|
|
||||||
int anchorsSum = 1;
|
|
||||||
for (uint i = 0; i < m_Anchors.size(); i++) {
|
|
||||||
anchorsSum += 1;
|
|
||||||
}
|
|
||||||
int maskSum = 1;
|
|
||||||
for (uint i = 0; i < m_Mask.size(); i++) {
|
|
||||||
maskSum += 1;
|
|
||||||
for (uint f = 0; f < m_Mask[i].size(); f++) {
|
|
||||||
maskSum += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSizeX) + sizeof(m_GridSizeY) + sizeof(m_OutputSize) + sizeof(m_type)
|
|
||||||
+ sizeof(m_new_coords) + sizeof(m_scale_x_y) + sizeof(m_beta_nms) + anchorsSum * sizeof(float) + maskSum * sizeof(int);
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayer::serialize(void* buffer) const
|
|
||||||
{
|
|
||||||
char *d = static_cast<char*>(buffer);
|
|
||||||
write(d, m_NumBoxes);
|
|
||||||
write(d, m_NumClasses);
|
|
||||||
write(d, m_GridSizeX);
|
|
||||||
write(d, m_GridSizeY);
|
|
||||||
write(d, m_OutputSize);
|
|
||||||
|
|
||||||
write(d, m_type);
|
|
||||||
write(d, m_new_coords);
|
|
||||||
write(d, m_scale_x_y);
|
|
||||||
write(d, m_beta_nms);
|
|
||||||
uint anchorsSize = m_Anchors.size();
|
|
||||||
write(d, anchorsSize);
|
|
||||||
for (uint i = 0; i < anchorsSize; i++) {
|
|
||||||
write(d, m_Anchors[i]);
|
|
||||||
}
|
|
||||||
uint maskSize = m_Mask.size();
|
|
||||||
write(d, maskSize);
|
|
||||||
for (uint i = 0; i < maskSize; i++) {
|
|
||||||
uint pMaskSize = m_Mask[i].size();
|
|
||||||
write(d, pMaskSize);
|
|
||||||
for (uint f = 0; f < pMaskSize; f++) {
|
|
||||||
write(d, m_Mask[i][f]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
kNUM_CLASSES = m_NumClasses;
|
|
||||||
kBETA_NMS = m_beta_nms;
|
|
||||||
kANCHORS = m_Anchors;
|
|
||||||
kMASK = m_Mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* YoloLayer::clone() const
|
|
||||||
{
|
|
||||||
return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSizeX, m_GridSizeY, m_type, m_new_coords, m_scale_x_y, m_beta_nms, m_Anchors, m_Mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
|
|
||||||
@@ -1,156 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __YOLO_PLUGINS__
|
|
||||||
#define __YOLO_PLUGINS__
|
|
||||||
|
|
||||||
#include <cassert>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cuda_runtime_api.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "NvInferPlugin.h"
|
|
||||||
|
|
||||||
#define CHECK(status) \
|
|
||||||
{ \
|
|
||||||
if (status != 0) \
|
|
||||||
{ \
|
|
||||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
|
|
||||||
<< " at line " << __LINE__ << std::endl; \
|
|
||||||
abort(); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
const char* YOLOLAYER_PLUGIN_VERSION {"1"};
|
|
||||||
const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
class YoloLayer : public nvinfer1::IPluginV2
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayer (const void* data, size_t length);
|
|
||||||
YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms,
|
|
||||||
const std::vector<float> anchors, const std::vector<std::vector<int>> mask);
|
|
||||||
const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; }
|
|
||||||
const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; }
|
|
||||||
int getNbOutputs () const override { return 1; }
|
|
||||||
|
|
||||||
nvinfer1::Dims getOutputDimensions (
|
|
||||||
int index, const nvinfer1::Dims* inputs,
|
|
||||||
int nbInputDims) override;
|
|
||||||
|
|
||||||
bool supportsFormat (
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const override;
|
|
||||||
|
|
||||||
void configureWithFormat (
|
|
||||||
const nvinfer1::Dims* inputDims, int nbInputs,
|
|
||||||
const nvinfer1::Dims* outputDims, int nbOutputs,
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override;
|
|
||||||
|
|
||||||
int initialize () override { return 0; }
|
|
||||||
void terminate () override {}
|
|
||||||
size_t getWorkspaceSize (int maxBatchSize) const override { return 0; }
|
|
||||||
int enqueue (
|
|
||||||
int batchSize, const void* const* inputs, void** outputs,
|
|
||||||
void* workspace, cudaStream_t stream) override;
|
|
||||||
size_t getSerializationSize() const override;
|
|
||||||
void serialize (void* buffer) const override;
|
|
||||||
void destroy () override { delete this; }
|
|
||||||
nvinfer1::IPluginV2* clone() const override;
|
|
||||||
|
|
||||||
void setPluginNamespace (const char* pluginNamespace)override {
|
|
||||||
m_Namespace = pluginNamespace;
|
|
||||||
}
|
|
||||||
virtual const char* getPluginNamespace () const override {
|
|
||||||
return m_Namespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint m_NumBoxes {0};
|
|
||||||
uint m_NumClasses {0};
|
|
||||||
uint m_GridSizeX {0};
|
|
||||||
uint m_GridSizeY {0};
|
|
||||||
uint64_t m_OutputSize {0};
|
|
||||||
std::string m_Namespace {""};
|
|
||||||
|
|
||||||
uint m_type {0};
|
|
||||||
uint m_new_coords {0};
|
|
||||||
float m_scale_x_y {0};
|
|
||||||
float m_beta_nms {0};
|
|
||||||
std::vector<float> m_Anchors;
|
|
||||||
std::vector<std::vector<int>> m_Mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
class YoloLayerPluginCreator : public nvinfer1::IPluginCreator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayerPluginCreator () {}
|
|
||||||
~YoloLayerPluginCreator () {}
|
|
||||||
|
|
||||||
const char* getPluginName () const override { return YOLOLAYER_PLUGIN_NAME; }
|
|
||||||
const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; }
|
|
||||||
|
|
||||||
const nvinfer1::PluginFieldCollection* getFieldNames() override {
|
|
||||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl;
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* createPlugin (
|
|
||||||
const char* name, const nvinfer1::PluginFieldCollection* fc) override
|
|
||||||
{
|
|
||||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* deserializePlugin (
|
|
||||||
const char* name, const void* serialData, size_t serialLength) override
|
|
||||||
{
|
|
||||||
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
|
|
||||||
return new YoloLayer(serialData, serialLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* libNamespace) override {
|
|
||||||
m_Namespace = libNamespace;
|
|
||||||
}
|
|
||||||
const char* getPluginNamespace() const override {
|
|
||||||
return m_Namespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string m_Namespace {""};
|
|
||||||
};
|
|
||||||
|
|
||||||
extern int kNUM_CLASSES;
|
|
||||||
extern float kBETA_NMS;
|
|
||||||
extern std::vector<float> kANCHORS;
|
|
||||||
extern std::vector<std::vector<int>> kMASK;
|
|
||||||
|
|
||||||
#endif // __YOLO_PLUGINS__
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
[property]
|
|
||||||
gpu-id=0
|
|
||||||
net-scale-factor=0.0039215697906911373
|
|
||||||
model-color-format=0
|
|
||||||
custom-network-config=sgie1/yolo.cfg
|
|
||||||
model-file=yolo.weights
|
|
||||||
model-engine-file=model_b16_gpu0_fp32.engine
|
|
||||||
#int8-calib-file=calib.table
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
batch-size=16
|
|
||||||
network-mode=0
|
|
||||||
num-detected-classes=10
|
|
||||||
interval=0
|
|
||||||
gie-unique-id=2
|
|
||||||
process-mode=2
|
|
||||||
network-type=0
|
|
||||||
cluster-mode=4
|
|
||||||
maintain-aspect-ratio=0
|
|
||||||
parse-bbox-func-name=NvDsInferParseYolo
|
|
||||||
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
engine-create-func-name=NvDsInferYoloCudaEngineGet
|
|
||||||
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
# copy of this software and associated documentation files (the "Software"),
|
|
||||||
# to deal in the Software without restriction, including without limitation
|
|
||||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
# and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
# Software is furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
# DEALINGS IN THE SOFTWARE.
|
|
||||||
#
|
|
||||||
# Edited by Marcos Luciano
|
|
||||||
# https://www.github.com/marcoslucianops
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
CUDA_VER?=
|
|
||||||
ifeq ($(CUDA_VER),)
|
|
||||||
$(error "CUDA_VER is not set")
|
|
||||||
endif
|
|
||||||
|
|
||||||
OPENCV?=
|
|
||||||
ifeq ($(OPENCV),)
|
|
||||||
OPENCV=0
|
|
||||||
endif
|
|
||||||
|
|
||||||
CC:= g++
|
|
||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
|
||||||
CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
|
||||||
COMMON= -DOPENCV
|
|
||||||
CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv)
|
|
||||||
LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv)
|
|
||||||
endif
|
|
||||||
|
|
||||||
LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
|
||||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
|
||||||
|
|
||||||
INCS:= $(wildcard *.h)
|
|
||||||
SRCFILES:= nvdsinfer_yolo_engine.cpp \
|
|
||||||
nvdsparsebbox_Yolo.cpp \
|
|
||||||
yoloPlugins.cpp \
|
|
||||||
layers/convolutional_layer.cpp \
|
|
||||||
layers/dropout_layer.cpp \
|
|
||||||
layers/shortcut_layer.cpp \
|
|
||||||
layers/route_layer.cpp \
|
|
||||||
layers/upsample_layer.cpp \
|
|
||||||
layers/maxpool_layer.cpp \
|
|
||||||
layers/activation_layer.cpp \
|
|
||||||
utils.cpp \
|
|
||||||
yolo.cpp \
|
|
||||||
yoloForward.cu
|
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
|
||||||
SRCFILES+= calibrator.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
|
|
||||||
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
|
|
||||||
|
|
||||||
all: $(TARGET_LIB)
|
|
||||||
|
|
||||||
%.o: %.cpp $(INCS) Makefile
|
|
||||||
$(CC) -c $(COMMON) -o $@ $(CFLAGS) $<
|
|
||||||
|
|
||||||
%.o: %.cu $(INCS) Makefile
|
|
||||||
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
|
|
||||||
|
|
||||||
$(TARGET_LIB) : $(TARGET_OBJS)
|
|
||||||
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TARGET_LIB)
|
|
||||||
rm -rf $(TARGET_OBJS)
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "calibrator.h"
|
|
||||||
#include <fstream>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
int8EntroyCalibrator::int8EntroyCalibrator(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath,
|
|
||||||
const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
|
|
||||||
{
|
|
||||||
inputCount = batchsize * channels * height * width;
|
|
||||||
std::fstream f(imgPath);
|
|
||||||
if (f.is_open())
|
|
||||||
{
|
|
||||||
std::string temp;
|
|
||||||
while (std::getline(f, temp)) imgPaths.push_back(temp);
|
|
||||||
}
|
|
||||||
batchData = new float[inputCount];
|
|
||||||
CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
|
|
||||||
}
|
|
||||||
|
|
||||||
int8EntroyCalibrator::~int8EntroyCalibrator()
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaFree(deviceInput));
|
|
||||||
if (batchData)
|
|
||||||
delete[] batchData;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool int8EntroyCalibrator::getBatch(void **bindings, const char **names, int nbBindings)
|
|
||||||
{
|
|
||||||
if (imageIndex + batchSize > uint(imgPaths.size()))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
float* ptr = batchData;
|
|
||||||
for (size_t j = imageIndex; j < imageIndex + batchSize; ++j)
|
|
||||||
{
|
|
||||||
cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR);
|
|
||||||
std::vector<float>inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
|
|
||||||
|
|
||||||
int len = (int)(inputData.size());
|
|
||||||
memcpy(ptr, inputData.data(), len * sizeof(float));
|
|
||||||
|
|
||||||
ptr += inputData.size();
|
|
||||||
std::cout << "Load image: " << imgPaths[j] << std::endl;
|
|
||||||
std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl;
|
|
||||||
}
|
|
||||||
imageIndex += batchSize;
|
|
||||||
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
|
|
||||||
bindings[0] = deviceInput;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const void* int8EntroyCalibrator::readCalibrationCache(std::size_t &length)
|
|
||||||
{
|
|
||||||
calibrationCache.clear();
|
|
||||||
std::ifstream input(calibTablePath, std::ios::binary);
|
|
||||||
input >> std::noskipws;
|
|
||||||
if (readCache && input.good())
|
|
||||||
{
|
|
||||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
|
|
||||||
std::back_inserter(calibrationCache));
|
|
||||||
}
|
|
||||||
length = calibrationCache.size();
|
|
||||||
return length ? calibrationCache.data() : nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length)
|
|
||||||
{
|
|
||||||
std::ofstream output(calibTablePath, std::ios::binary);
|
|
||||||
output.write(reinterpret_cast<const char*>(cache), length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
|
|
||||||
{
|
|
||||||
cv::Mat out;
|
|
||||||
int image_w = img.cols;
|
|
||||||
int image_h = img.rows;
|
|
||||||
if (image_w != input_w || image_h != input_h)
|
|
||||||
{
|
|
||||||
if (letter_box == 1)
|
|
||||||
{
|
|
||||||
float ratio_w = (float)image_w / (float)input_w;
|
|
||||||
float ratio_h = (float)image_h / (float)input_h;
|
|
||||||
if (ratio_w > ratio_h)
|
|
||||||
{
|
|
||||||
int new_width = input_w * ratio_h;
|
|
||||||
int x = (image_w - new_width) / 2;
|
|
||||||
cv::Rect roi(abs(x), 0, new_width, image_h);
|
|
||||||
out = img(roi);
|
|
||||||
}
|
|
||||||
else if (ratio_w < ratio_h)
|
|
||||||
{
|
|
||||||
int new_height = input_h * ratio_w;
|
|
||||||
int y = (image_h - new_height) / 2;
|
|
||||||
cv::Rect roi(0, abs(y), image_w, new_height);
|
|
||||||
out = img(roi);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
out = img;
|
|
||||||
}
|
|
||||||
cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
|
||||||
}
|
|
||||||
if (input_c == 3)
|
|
||||||
{
|
|
||||||
out.convertTo(out, CV_32FC3, 1.0 / 255.0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
out.convertTo(out, CV_32FC1, 1.0 / 255.0);
|
|
||||||
}
|
|
||||||
std::vector<cv::Mat> input_channels(input_c);
|
|
||||||
cv::split(out, input_channels);
|
|
||||||
std::vector<float> result(input_h * input_w * input_c);
|
|
||||||
auto data = result.data();
|
|
||||||
int channelLength = input_h * input_w;
|
|
||||||
for (int i = 0; i < input_c; ++i)
|
|
||||||
{
|
|
||||||
memcpy(data, input_channels[i].data, channelLength * sizeof(float));
|
|
||||||
data += channelLength;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef CALIBRATOR_H
|
|
||||||
#define CALIBRATOR_H
|
|
||||||
|
|
||||||
#include "opencv2/opencv.hpp"
|
|
||||||
#include "cuda_runtime.h"
|
|
||||||
#include "NvInfer.h"
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#ifndef CUDA_CHECK
|
|
||||||
#define CUDA_CHECK(callstr) \
|
|
||||||
{ \
|
|
||||||
cudaError_t error_code = callstr; \
|
|
||||||
if (error_code != cudaSuccess) { \
|
|
||||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
|
|
||||||
assert(0); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace nvinfer1 {
|
|
||||||
class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 {
|
|
||||||
public:
|
|
||||||
int8EntroyCalibrator(const int &batchsize,
|
|
||||||
const int &channels,
|
|
||||||
const int &height,
|
|
||||||
const int &width,
|
|
||||||
const int &letterbox,
|
|
||||||
const std::string &imgPath,
|
|
||||||
const std::string &calibTablePath);
|
|
||||||
|
|
||||||
virtual ~int8EntroyCalibrator();
|
|
||||||
int getBatchSize() const override { return batchSize; }
|
|
||||||
bool getBatch(void *bindings[], const char *names[], int nbBindings) override;
|
|
||||||
const void *readCalibrationCache(std::size_t &length) override;
|
|
||||||
void writeCalibrationCache(const void *ptr, std::size_t length) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int batchSize;
|
|
||||||
int inputC;
|
|
||||||
int inputH;
|
|
||||||
int inputW;
|
|
||||||
int letterBox;
|
|
||||||
std::string calibTablePath;
|
|
||||||
size_t imageIndex;
|
|
||||||
size_t inputCount;
|
|
||||||
std::vector<std::string> imgPaths;
|
|
||||||
float *batchData{ nullptr };
|
|
||||||
void *deviceInput{ nullptr };
|
|
||||||
bool readCache;
|
|
||||||
std::vector<char> calibrationCache;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box);
|
|
||||||
|
|
||||||
#endif //CALIBRATOR_H
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
if (activation == "relu")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* relu = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kRELU);
|
|
||||||
assert(relu != nullptr);
|
|
||||||
std::string reluLayerName = "relu_" + std::to_string(layerIdx);
|
|
||||||
relu->setName(reluLayerName.c_str());
|
|
||||||
output = relu;
|
|
||||||
}
|
|
||||||
else if (activation == "sigmoid" || activation == "logistic")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSIGMOID);
|
|
||||||
assert(sigmoid != nullptr);
|
|
||||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
|
||||||
sigmoid->setName(sigmoidLayerName.c_str());
|
|
||||||
output = sigmoid;
|
|
||||||
}
|
|
||||||
else if (activation == "tanh")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
|
||||||
tanh->setName(tanhLayerName.c_str());
|
|
||||||
output = tanh;
|
|
||||||
}
|
|
||||||
else if (activation == "leaky")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* leaky = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
|
||||||
leaky->setAlpha(0.1);
|
|
||||||
assert(leaky != nullptr);
|
|
||||||
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
|
|
||||||
leaky->setName(leakyLayerName.c_str());
|
|
||||||
output = leaky;
|
|
||||||
}
|
|
||||||
else if (activation == "softplus")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
|
||||||
softplus->setName(softplusLayerName.c_str());
|
|
||||||
output = softplus;
|
|
||||||
}
|
|
||||||
else if (activation == "mish")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
|
||||||
softplus->setName(softplusLayerName.c_str());
|
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
|
||||||
*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
|
||||||
tanh->setName(tanhLayerName.c_str());
|
|
||||||
nvinfer1::IElementWiseLayer* mish = network->addElementWise(
|
|
||||||
*tanh->getOutput(0), *input,
|
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
|
||||||
assert(mish != nullptr);
|
|
||||||
std::string mishLayerName = "mish_" + std::to_string(layerIdx);
|
|
||||||
mish->setName(mishLayerName.c_str());
|
|
||||||
output = mish;
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __ACTIVATION_LAYER_H__
|
|
||||||
#define __ACTIVATION_LAYER_H__
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include "convolutional_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<float>& weights,
|
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
|
||||||
int& weightPtr,
|
|
||||||
int& inputChannels,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "convolutional");
|
|
||||||
assert(block.find("filters") != block.end());
|
|
||||||
assert(block.find("pad") != block.end());
|
|
||||||
assert(block.find("size") != block.end());
|
|
||||||
assert(block.find("stride") != block.end());
|
|
||||||
|
|
||||||
int filters = std::stoi(block.at("filters"));
|
|
||||||
int padding = std::stoi(block.at("pad"));
|
|
||||||
int kernelSize = std::stoi(block.at("size"));
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
std::string activation = block.at("activation");
|
|
||||||
int bias = filters;
|
|
||||||
|
|
||||||
bool batchNormalize = false;
|
|
||||||
if (block.find("batch_normalize") != block.end())
|
|
||||||
{
|
|
||||||
bias = 0;
|
|
||||||
batchNormalize = (block.at("batch_normalize") == "1");
|
|
||||||
}
|
|
||||||
|
|
||||||
int groups = 1;
|
|
||||||
if (block.find("groups") != block.end())
|
|
||||||
{
|
|
||||||
groups = std::stoi(block.at("groups"));
|
|
||||||
}
|
|
||||||
|
|
||||||
int pad;
|
|
||||||
if (padding)
|
|
||||||
pad = (kernelSize - 1) / 2;
|
|
||||||
else
|
|
||||||
pad = 0;
|
|
||||||
|
|
||||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
|
||||||
std::vector<float> bnBiases;
|
|
||||||
std::vector<float> bnWeights;
|
|
||||||
std::vector<float> bnRunningMean;
|
|
||||||
std::vector<float> bnRunningVar;
|
|
||||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
|
|
||||||
|
|
||||||
if (batchNormalize == false)
|
|
||||||
{
|
|
||||||
float* val = new float[filters];
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convBias.values = val;
|
|
||||||
trtWeights.push_back(convBias);
|
|
||||||
val = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convWt.values = val;
|
|
||||||
trtWeights.push_back(convWt);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnBiases.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnWeights.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnRunningMean.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
float* val = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convWt.values = val;
|
|
||||||
trtWeights.push_back(convWt);
|
|
||||||
trtWeights.push_back(convBias);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
|
|
||||||
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
|
|
||||||
assert(conv != nullptr);
|
|
||||||
std::string convLayerName = "conv_" + std::to_string(layerIdx);
|
|
||||||
conv->setName(convLayerName.c_str());
|
|
||||||
conv->setStride(nvinfer1::DimsHW{stride, stride});
|
|
||||||
conv->setPadding(nvinfer1::DimsHW{pad, pad});
|
|
||||||
|
|
||||||
if (block.find("groups") != block.end())
|
|
||||||
{
|
|
||||||
conv->setNbGroups(groups);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ILayer* output = conv;
|
|
||||||
|
|
||||||
if (batchNormalize == true)
|
|
||||||
{
|
|
||||||
size = filters;
|
|
||||||
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
float* shiftWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
shiftWt[i]
|
|
||||||
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
|
||||||
}
|
|
||||||
shift.values = shiftWt;
|
|
||||||
float* scaleWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
|
||||||
}
|
|
||||||
scale.values = scaleWt;
|
|
||||||
float* powerWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
powerWt[i] = 1.0;
|
|
||||||
}
|
|
||||||
power.values = powerWt;
|
|
||||||
trtWeights.push_back(shift);
|
|
||||||
trtWeights.push_back(scale);
|
|
||||||
trtWeights.push_back(power);
|
|
||||||
|
|
||||||
nvinfer1::IScaleLayer* bn = network->addScale(
|
|
||||||
*output->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
|
||||||
assert(bn != nullptr);
|
|
||||||
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
|
|
||||||
bn->setName(bnLayerName.c_str());
|
|
||||||
output = bn;
|
|
||||||
}
|
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, output, output->getOutput(0), network);
|
|
||||||
assert(output != nullptr);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CONVOLUTIONAL_LAYER_H__
|
|
||||||
#define __CONVOLUTIONAL_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<float>& weights,
|
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
|
||||||
int& weightPtr,
|
|
||||||
int& inputChannels,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "dropout_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* dropoutLayer(
|
|
||||||
float probability,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
nvinfer1::ILayer* output;
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __DROPOUT_LAYER_H__
|
|
||||||
#define __DROPOUT_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* dropoutLayer(
|
|
||||||
float probability,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "maxpool_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "maxpool");
|
|
||||||
assert(block.find("size") != block.end());
|
|
||||||
assert(block.find("stride") != block.end());
|
|
||||||
|
|
||||||
int size = std::stoi(block.at("size"));
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
|
|
||||||
nvinfer1::IPoolingLayer* pool
|
|
||||||
= network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
|
|
||||||
assert(pool);
|
|
||||||
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
|
||||||
pool->setStride(nvinfer1::DimsHW{stride, stride});
|
|
||||||
pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
|
||||||
pool->setName(maxpoolLayerName.c_str());
|
|
||||||
|
|
||||||
return pool;
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __MAXPOOL_LAYER_H__
|
|
||||||
#define __MAXPOOL_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "route_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
std::string strLayers = block.at("layers");
|
|
||||||
std::vector<int> idxLayers;
|
|
||||||
size_t lastPos = 0, pos = 0;
|
|
||||||
while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
|
|
||||||
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
|
||||||
idxLayers.push_back (vL);
|
|
||||||
lastPos = pos + 1;
|
|
||||||
}
|
|
||||||
if (lastPos < strLayers.length()) {
|
|
||||||
std::string lastV = trim(strLayers.substr(lastPos));
|
|
||||||
if (!lastV.empty()) {
|
|
||||||
idxLayers.push_back (std::stoi(lastV));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert (!idxLayers.empty());
|
|
||||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
|
||||||
for (int idxLayer : idxLayers) {
|
|
||||||
if (idxLayer < 0) {
|
|
||||||
idxLayer = tensorOutputs.size() + idxLayer;
|
|
||||||
}
|
|
||||||
assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size());
|
|
||||||
concatInputs.push_back (tensorOutputs[idxLayer]);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IConcatenationLayer* concat =
|
|
||||||
network->addConcatenation(concatInputs.data(), concatInputs.size());
|
|
||||||
assert(concat != nullptr);
|
|
||||||
std::string concatLayerName = "route_" + std::to_string(layerIdx - 1);
|
|
||||||
concat->setName(concatLayerName.c_str());
|
|
||||||
concat->setAxis(0);
|
|
||||||
|
|
||||||
nvinfer1::ILayer* output = concat;
|
|
||||||
|
|
||||||
if (block.find("groups") != block.end()) {
|
|
||||||
nvinfer1::Dims prevTensorDims = output->getOutput(0)->getDimensions();
|
|
||||||
int groups = stoi(block.at("groups"));
|
|
||||||
int group_id = stoi(block.at("group_id"));
|
|
||||||
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
|
||||||
int channelSlice = (prevTensorDims.d[0] / groups);
|
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
|
||||||
*output->getOutput(0),
|
|
||||||
nvinfer1::Dims3{startSlice, 0, 0},
|
|
||||||
nvinfer1::Dims3{channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]},
|
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
|
||||||
assert(sl != nullptr);
|
|
||||||
output = sl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __ROUTE_LAYER_H__
|
|
||||||
#define __ROUTE_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
#include "../utils.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "shortcut_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
std::string inputVol,
|
|
||||||
std::string shortcutVol,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
nvinfer1::ILayer* output;
|
|
||||||
nvinfer1::ITensor* outputTensor;
|
|
||||||
|
|
||||||
if (inputVol != shortcutVol)
|
|
||||||
{
|
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
|
||||||
*shortcutTensor,
|
|
||||||
nvinfer1::Dims3{0, 0, 0},
|
|
||||||
input->getDimensions(),
|
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
|
||||||
assert(sl != nullptr);
|
|
||||||
outputTensor = sl->getOutput(0);
|
|
||||||
assert(outputTensor != nullptr);
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
outputTensor = shortcutTensor;
|
|
||||||
assert(outputTensor != nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(
|
|
||||||
*input, *outputTensor,
|
|
||||||
nvinfer1::ElementWiseOperation::kSUM);
|
|
||||||
assert(ew != nullptr);
|
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, ew, ew->getOutput(0), network);
|
|
||||||
assert(output != nullptr);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __SHORTCUT_LAYER_H__
|
|
||||||
#define __SHORTCUT_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
std::string inputVol,
|
|
||||||
std::string shortcutVol,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "upsample_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "upsample");
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
|
|
||||||
nvinfer1::IResizeLayer* resize_layer = network->addResize(*input);
|
|
||||||
resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
|
||||||
float scale[3] = {1, stride, stride};
|
|
||||||
resize_layer->setScales(scale, 3);
|
|
||||||
std::string layer_name = "upsample_" + std::to_string(layerIdx);
|
|
||||||
resize_layer->setName(layer_name.c_str());
|
|
||||||
return resize_layer;
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __UPSAMPLE_LAYER_H__
|
|
||||||
#define __UPSAMPLE_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
#include "nvdsinfer_context.h"
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include "yolo.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#define USE_CUDA_ENGINE_GET_API 1
|
|
||||||
|
|
||||||
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
|
|
||||||
{
|
|
||||||
std::string yoloCfg = initParams->customNetworkConfigFilePath;
|
|
||||||
std::string yoloType;
|
|
||||||
|
|
||||||
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
|
|
||||||
return std::tolower (c);});
|
|
||||||
|
|
||||||
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
|
|
||||||
|
|
||||||
networkInfo.networkType = yoloType;
|
|
||||||
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
|
|
||||||
networkInfo.wtsFilePath = initParams->modelFilePath;
|
|
||||||
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
|
|
||||||
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
|
|
||||||
networkInfo.inputBlobName = "data";
|
|
||||||
|
|
||||||
if(initParams->networkMode == 0) {
|
|
||||||
networkInfo.networkMode = "FP32";
|
|
||||||
}
|
|
||||||
else if(initParams->networkMode == 1) {
|
|
||||||
networkInfo.networkMode = "INT8";
|
|
||||||
}
|
|
||||||
else if(initParams->networkMode == 2) {
|
|
||||||
networkInfo.networkMode = "FP16";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (networkInfo.configFilePath.empty() ||
|
|
||||||
networkInfo.wtsFilePath.empty()) {
|
|
||||||
std::cerr << "YOLO config file or weights file is not specified"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fileExists(networkInfo.configFilePath) ||
|
|
||||||
!fileExists(networkInfo.wtsFilePath)) {
|
|
||||||
std::cerr << "YOLO config file or weights file is not exist"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !USE_CUDA_ENGINE_GET_API
|
|
||||||
IModelParser* NvDsInferCreateModelParser(
|
|
||||||
const NvDsInferContextInitParams* initParams) {
|
|
||||||
NetworkInfo networkInfo;
|
|
||||||
if (!getYoloNetworkInfo(networkInfo, initParams)) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Yolo(networkInfo);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
extern "C"
|
|
||||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
|
||||||
const NvDsInferContextInitParams * const initParams,
|
|
||||||
nvinfer1::DataType dataType,
|
|
||||||
nvinfer1::ICudaEngine *& cudaEngine);
|
|
||||||
|
|
||||||
extern "C"
|
|
||||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
|
||||||
const NvDsInferContextInitParams * const initParams,
|
|
||||||
nvinfer1::DataType dataType,
|
|
||||||
nvinfer1::ICudaEngine *& cudaEngine)
|
|
||||||
{
|
|
||||||
NetworkInfo networkInfo;
|
|
||||||
if (!getYoloNetworkInfo(networkInfo, initParams)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Yolo yolo(networkInfo);
|
|
||||||
cudaEngine = yolo.createEngine (builder);
|
|
||||||
if (cudaEngine == nullptr)
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to build CUDA engine on "
|
|
||||||
<< networkInfo.configFilePath << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,378 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <sstream>
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList);
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
nonMaximumSuppression(const float nmsThresh, std::vector<NvDsInferParseObjectInfo> binfo)
|
|
||||||
{
|
|
||||||
auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float {
|
|
||||||
if (x1min > x2min)
|
|
||||||
{
|
|
||||||
std::swap(x1min, x2min);
|
|
||||||
std::swap(x1max, x2max);
|
|
||||||
}
|
|
||||||
return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min;
|
|
||||||
};
|
|
||||||
auto computeIoU
|
|
||||||
= [&overlap1D](NvDsInferParseObjectInfo& bbox1, NvDsInferParseObjectInfo& bbox2) -> float {
|
|
||||||
float overlapX
|
|
||||||
= overlap1D(bbox1.left, bbox1.left + bbox1.width, bbox2.left, bbox2.left + bbox2.width);
|
|
||||||
float overlapY
|
|
||||||
= overlap1D(bbox1.top, bbox1.top + bbox1.height, bbox2.top, bbox2.top + bbox2.height);
|
|
||||||
float area1 = (bbox1.width) * (bbox1.height);
|
|
||||||
float area2 = (bbox2.width) * (bbox2.height);
|
|
||||||
float overlap2D = overlapX * overlapY;
|
|
||||||
float u = area1 + area2 - overlap2D;
|
|
||||||
return u == 0 ? 0 : overlap2D / u;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::stable_sort(binfo.begin(), binfo.end(),
|
|
||||||
[](const NvDsInferParseObjectInfo& b1, const NvDsInferParseObjectInfo& b2) {
|
|
||||||
return b1.detectionConfidence > b2.detectionConfidence;
|
|
||||||
});
|
|
||||||
std::vector<NvDsInferParseObjectInfo> out;
|
|
||||||
for (auto i : binfo)
|
|
||||||
{
|
|
||||||
bool keep = true;
|
|
||||||
for (auto j : out)
|
|
||||||
{
|
|
||||||
if (keep)
|
|
||||||
{
|
|
||||||
float overlap = computeIoU(i, j);
|
|
||||||
keep = overlap <= nmsThresh;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (keep) out.push_back(i);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
nmsAllClasses(const float nmsThresh,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& binfo,
|
|
||||||
const uint numClasses)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> result;
|
|
||||||
std::vector<std::vector<NvDsInferParseObjectInfo>> splitBoxes(numClasses);
|
|
||||||
for (auto& box : binfo)
|
|
||||||
{
|
|
||||||
splitBoxes.at(box.classId).push_back(box);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& boxes : splitBoxes)
|
|
||||||
{
|
|
||||||
boxes = nonMaximumSuppression(nmsThresh, boxes);
|
|
||||||
result.insert(result.end(), boxes.begin(), boxes.end());
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw,
|
|
||||||
const float& bh, const int& stride, const uint& netW,
|
|
||||||
const uint& netH)
|
|
||||||
{
|
|
||||||
NvDsInferParseObjectInfo b;
|
|
||||||
float xCenter = bx * stride;
|
|
||||||
float yCenter = by * stride;
|
|
||||||
float x0 = xCenter - bw / 2;
|
|
||||||
float y0 = yCenter - bh / 2;
|
|
||||||
float x1 = x0 + bw;
|
|
||||||
float y1 = y0 + bh;
|
|
||||||
|
|
||||||
x0 = clamp(x0, 0, netW);
|
|
||||||
y0 = clamp(y0, 0, netH);
|
|
||||||
x1 = clamp(x1, 0, netW);
|
|
||||||
y1 = clamp(y1, 0, netH);
|
|
||||||
|
|
||||||
b.left = x0;
|
|
||||||
b.width = clamp(x1 - x0, 0, netW);
|
|
||||||
b.top = y0;
|
|
||||||
b.height = clamp(y1 - y0, 0, netH);
|
|
||||||
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
|
|
||||||
const uint stride, const uint& netW, const uint& netH, const int maxIndex,
|
|
||||||
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
|
|
||||||
{
|
|
||||||
NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH);
|
|
||||||
if (bbi.width < 1 || bbi.height < 1) return;
|
|
||||||
|
|
||||||
bbi.detectionConfidence = maxProb;
|
|
||||||
bbi.classId = maxIndex;
|
|
||||||
binfo.push_back(bbi);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
decodeYoloTensor(
|
|
||||||
const float* detections, const std::vector<int> &mask, const std::vector<float> &anchors,
|
|
||||||
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
|
|
||||||
const uint numOutputClasses, const uint& netW,
|
|
||||||
const uint& netH,
|
|
||||||
const float confThresh)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
|
||||||
for (uint y = 0; y < gridSizeH; ++y) {
|
|
||||||
for (uint x = 0; x < gridSizeW; ++x) {
|
|
||||||
for (uint b = 0; b < numBBoxes; ++b)
|
|
||||||
{
|
|
||||||
const float pw = anchors[mask[b] * 2];
|
|
||||||
const float ph = anchors[mask[b] * 2 + 1];
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeH * gridSizeW;
|
|
||||||
const int bbindex = y * gridSizeW + x;
|
|
||||||
const float bx
|
|
||||||
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
|
|
||||||
const float by
|
|
||||||
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
|
|
||||||
const float bw
|
|
||||||
= pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
|
|
||||||
const float bh
|
|
||||||
= ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
|
|
||||||
|
|
||||||
const float objectness
|
|
||||||
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
float maxProb = 0.0f;
|
|
||||||
int maxIndex = -1;
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
float prob
|
|
||||||
= (detections[bbindex
|
|
||||||
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
|
|
||||||
if (prob > maxProb)
|
|
||||||
{
|
|
||||||
maxProb = prob;
|
|
||||||
maxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxProb = objectness * maxProb;
|
|
||||||
|
|
||||||
if (maxProb > confThresh)
|
|
||||||
{
|
|
||||||
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return binfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
decodeYoloV2Tensor(
|
|
||||||
const float* detections, const std::vector<float> &anchors,
|
|
||||||
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
|
|
||||||
const uint numOutputClasses, const uint& netW,
|
|
||||||
const uint& netH)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
|
||||||
for (uint y = 0; y < gridSizeH; ++y) {
|
|
||||||
for (uint x = 0; x < gridSizeW; ++x) {
|
|
||||||
for (uint b = 0; b < numBBoxes; ++b)
|
|
||||||
{
|
|
||||||
const float pw = anchors[b * 2];
|
|
||||||
const float ph = anchors[b * 2 + 1];
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeH * gridSizeW;
|
|
||||||
const int bbindex = y * gridSizeW + x;
|
|
||||||
const float bx
|
|
||||||
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
|
|
||||||
const float by
|
|
||||||
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
|
|
||||||
const float bw
|
|
||||||
= pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
|
|
||||||
const float bh
|
|
||||||
= ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
|
|
||||||
|
|
||||||
const float objectness
|
|
||||||
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
float maxProb = 0.0f;
|
|
||||||
int maxIndex = -1;
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
float prob
|
|
||||||
= (detections[bbindex
|
|
||||||
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
|
|
||||||
if (prob > maxProb)
|
|
||||||
{
|
|
||||||
maxProb = prob;
|
|
||||||
maxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxProb = objectness * maxProb;
|
|
||||||
|
|
||||||
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return binfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline std::vector<const NvDsInferLayerInfo*>
|
|
||||||
SortLayers(const std::vector<NvDsInferLayerInfo> & outputLayersInfo)
|
|
||||||
{
|
|
||||||
std::vector<const NvDsInferLayerInfo*> outLayers;
|
|
||||||
for (auto const &layer : outputLayersInfo) {
|
|
||||||
outLayers.push_back (&layer);
|
|
||||||
}
|
|
||||||
std::sort(outLayers.begin(), outLayers.end(),
|
|
||||||
[](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) {
|
|
||||||
return a->inferDims.d[1] < b->inferDims.d[1];
|
|
||||||
});
|
|
||||||
return outLayers;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList,
|
|
||||||
const std::vector<float> &anchors,
|
|
||||||
const std::vector<std::vector<int>> &masks,
|
|
||||||
const uint &num_classes,
|
|
||||||
const float &beta_nms)
|
|
||||||
{
|
|
||||||
const float kCONF_THRESH = detectionParams.perClassThreshold[0];
|
|
||||||
|
|
||||||
const std::vector<const NvDsInferLayerInfo*> sortedLayers =
|
|
||||||
SortLayers (outputLayersInfo);
|
|
||||||
|
|
||||||
if (sortedLayers.size() != masks.size()) {
|
|
||||||
std::cerr << "ERROR: YOLO output layer.size: " << sortedLayers.size()
|
|
||||||
<< " does not match mask.size: " << masks.size() << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num_classes != detectionParams.numClassesConfigured)
|
|
||||||
{
|
|
||||||
std::cerr << "WARNING: Num classes mismatch. Configured: "
|
|
||||||
<< detectionParams.numClassesConfigured
|
|
||||||
<< ", detected by network: " << num_classes << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> objects;
|
|
||||||
|
|
||||||
for (uint idx = 0; idx < masks.size(); ++idx) {
|
|
||||||
const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid
|
|
||||||
|
|
||||||
assert(layer.inferDims.numDims == 3);
|
|
||||||
const uint gridSizeH = layer.inferDims.d[1];
|
|
||||||
const uint gridSizeW = layer.inferDims.d[2];
|
|
||||||
const uint stride = DIVUP(networkInfo.width, gridSizeW);
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> outObjs =
|
|
||||||
decodeYoloTensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, masks[idx].size(),
|
|
||||||
num_classes, networkInfo.width, networkInfo.height, kCONF_THRESH);
|
|
||||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
objectList.clear();
|
|
||||||
objectList = nmsAllClasses(beta_nms, objects, num_classes);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool NvDsInferParseYoloV2(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList,
|
|
||||||
std::vector<float> &anchors,
|
|
||||||
const uint &num_classes)
|
|
||||||
{
|
|
||||||
if (outputLayersInfo.empty()) {
|
|
||||||
std::cerr << "Could not find output layer in bbox parsing" << std::endl;;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const uint kNUM_BBOXES = anchors.size() / 2;
|
|
||||||
|
|
||||||
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
|
|
||||||
|
|
||||||
if (num_classes != detectionParams.numClassesConfigured)
|
|
||||||
{
|
|
||||||
std::cerr << "WARNING: Num classes mismatch. Configured: "
|
|
||||||
<< detectionParams.numClassesConfigured
|
|
||||||
<< ", detected by network: " << num_classes << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(layer.inferDims.numDims == 3);
|
|
||||||
const uint gridSizeH = layer.inferDims.d[1];
|
|
||||||
const uint gridSizeW = layer.inferDims.d[2];
|
|
||||||
const uint stride = DIVUP(networkInfo.width, gridSizeW);
|
|
||||||
for (auto& anchor : anchors) {
|
|
||||||
anchor *= stride;
|
|
||||||
}
|
|
||||||
std::vector<NvDsInferParseObjectInfo> objects =
|
|
||||||
decodeYoloV2Tensor((const float*)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
|
|
||||||
num_classes, networkInfo.width, networkInfo.height);
|
|
||||||
|
|
||||||
objectList = objects;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
|
||||||
{
|
|
||||||
|
|
||||||
int num_classes = kNUM_CLASSES;
|
|
||||||
float beta_nms = kBETA_NMS;
|
|
||||||
std::vector<float> anchors = kANCHORS;
|
|
||||||
std::vector<std::vector<int>> mask = kMASK;
|
|
||||||
|
|
||||||
if (mask.size() > 0) {
|
|
||||||
return NvDsInferParseYolo (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, mask, num_classes, beta_nms);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return NvDsInferParseYoloV2 (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, num_classes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
|
|
||||||
@@ -1,150 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
#include <experimental/filesystem>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
static void leftTrim(std::string& s)
|
|
||||||
{
|
|
||||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void rightTrim(std::string& s)
|
|
||||||
{
|
|
||||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string trim(std::string s)
|
|
||||||
{
|
|
||||||
leftTrim(s);
|
|
||||||
rightTrim(s);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
float clamp(const float val, const float minVal, const float maxVal)
|
|
||||||
{
|
|
||||||
assert(minVal <= maxVal);
|
|
||||||
return std::min(maxVal, std::max(minVal, val));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool fileExists(const std::string fileName, bool verbose)
|
|
||||||
{
|
|
||||||
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
|
|
||||||
{
|
|
||||||
if (verbose) std::cout << "File does not exist: " << fileName << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
|
|
||||||
{
|
|
||||||
assert(fileExists(weightsFilePath));
|
|
||||||
std::cout << "\nLoading pre-trained weights" << std::endl;
|
|
||||||
std::ifstream file(weightsFilePath, std::ios_base::binary);
|
|
||||||
assert(file.good());
|
|
||||||
std::string line;
|
|
||||||
|
|
||||||
if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
|
|
||||||
{
|
|
||||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
|
||||||
file.ignore(4 * 4);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
|
||||||
file.ignore(4 * 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> weights;
|
|
||||||
char floatWeight[4];
|
|
||||||
while (!file.eof())
|
|
||||||
{
|
|
||||||
file.read(floatWeight, 4);
|
|
||||||
assert(file.gcount() == 4);
|
|
||||||
weights.push_back(*reinterpret_cast<float*>(floatWeight));
|
|
||||||
if (file.peek() == std::istream::traits_type::eof()) break;
|
|
||||||
}
|
|
||||||
std::cout << "Loading weights of " << networkType << " complete"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Total weights read: " << weights.size() << std::endl;
|
|
||||||
return weights;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string dimsToString(const nvinfer1::Dims d)
|
|
||||||
{
|
|
||||||
std::stringstream s;
|
|
||||||
assert(d.nbDims >= 1);
|
|
||||||
for (int i = 0; i < d.nbDims - 1; ++i)
|
|
||||||
{
|
|
||||||
s << std::setw(4) << d.d[i] << " x";
|
|
||||||
}
|
|
||||||
s << std::setw(4) << d.d[d.nbDims - 1];
|
|
||||||
|
|
||||||
return s.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
void displayDimType(const nvinfer1::Dims d)
|
|
||||||
{
|
|
||||||
std::cout << "(" << d.nbDims << ") ";
|
|
||||||
for (int i = 0; i < d.nbDims; ++i)
|
|
||||||
{
|
|
||||||
switch (d.type[i])
|
|
||||||
{
|
|
||||||
case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break;
|
|
||||||
case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break;
|
|
||||||
case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break;
|
|
||||||
case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNumChannels(nvinfer1::ITensor* t)
|
|
||||||
{
|
|
||||||
nvinfer1::Dims d = t->getDimensions();
|
|
||||||
assert(d.nbDims == 3);
|
|
||||||
|
|
||||||
return d.d[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
|
|
||||||
{
|
|
||||||
assert(inputDims.nbDims == 3);
|
|
||||||
return inputDims.d[0] * inputDims.d[1] * inputDims.d[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
|
|
||||||
std::string layerOutput, std::string weightPtr)
|
|
||||||
{
|
|
||||||
std::cout << std::setw(6) << std::left << layerIndex << std::setw(24) << std::left << layerName;
|
|
||||||
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
|
|
||||||
<< layerOutput;
|
|
||||||
std::cout << std::setw(7) << std::left << weightPtr << std::endl;
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __UTILS_H__
|
|
||||||
#define __UTILS_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <cassert>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#define UNUSED(expr) (void)(expr)
|
|
||||||
#define DIVUP(n, d) ((n) + (d)-1) / (d)
|
|
||||||
|
|
||||||
std::string trim(std::string s);
|
|
||||||
float clamp(const float val, const float minVal, const float maxVal);
|
|
||||||
bool fileExists(const std::string fileName, bool verbose = true);
|
|
||||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
|
|
||||||
std::string dimsToString(const nvinfer1::Dims d);
|
|
||||||
void displayDimType(const nvinfer1::Dims d);
|
|
||||||
int getNumChannels(nvinfer1::ITensor* t);
|
|
||||||
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
|
|
||||||
|
|
||||||
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
|
|
||||||
std::string layerOutput, std::string weightPtr);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,507 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "yolo.h"
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef OPENCV
|
|
||||||
#include "calibrator.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void orderParams(std::vector<std::vector<int>> *maskVector) {
|
|
||||||
std::vector<std::vector<int>> maskinput = *maskVector;
|
|
||||||
std::vector<int> maskPartial;
|
|
||||||
for (uint i = 0; i < maskinput.size(); i++) {
|
|
||||||
for (uint j = i + 1; j < maskinput.size(); j++) {
|
|
||||||
if (maskinput[i][0] <= maskinput[j][0]) {
|
|
||||||
maskPartial = maskinput[i];
|
|
||||||
maskinput[i] = maskinput[j];
|
|
||||||
maskinput[j] = maskPartial;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*maskVector = maskinput;
|
|
||||||
}
|
|
||||||
|
|
||||||
Yolo::Yolo(const NetworkInfo& networkInfo)
|
|
||||||
: m_NetworkType(networkInfo.networkType), // YOLO type
|
|
||||||
m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg
|
|
||||||
m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights
|
|
||||||
m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path
|
|
||||||
m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16
|
|
||||||
m_DeviceType(networkInfo.deviceType), // kDLA, kGPU
|
|
||||||
m_InputBlobName(networkInfo.inputBlobName), // data
|
|
||||||
m_InputH(0),
|
|
||||||
m_InputW(0),
|
|
||||||
m_InputC(0),
|
|
||||||
m_InputSize(0)
|
|
||||||
{}
|
|
||||||
|
|
||||||
Yolo::~Yolo()
|
|
||||||
{
|
|
||||||
destroyNetworkUtils();
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder)
|
|
||||||
{
|
|
||||||
assert (builder);
|
|
||||||
|
|
||||||
m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
|
|
||||||
parseConfigBlocks();
|
|
||||||
orderParams(&m_OutputMasks);
|
|
||||||
|
|
||||||
if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) {
|
|
||||||
assert(builder->platformHasFastInt8());
|
|
||||||
#ifdef OPENCV
|
|
||||||
std::string calib_image_list;
|
|
||||||
int calib_batch_size;
|
|
||||||
if (getenv("INT8_CALIB_IMG_PATH")) {
|
|
||||||
calib_image_list = getenv("INT8_CALIB_IMG_PATH");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
if (getenv("INT8_CALIB_BATCH_SIZE")) {
|
|
||||||
calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE"));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
nvinfer1::int8EntroyCalibrator *calibrator = new nvinfer1::int8EntroyCalibrator(calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath);
|
|
||||||
builder->setInt8Mode(true);
|
|
||||||
builder->setInt8Calibrator(calibrator);
|
|
||||||
#else
|
|
||||||
std::cerr << "OpenCV is required to run INT8 calibrator" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
|
|
||||||
std::vector<nvinfer1::Weights> trtWeights;
|
|
||||||
|
|
||||||
nvinfer1::INetworkDefinition *network = builder->createNetwork();
|
|
||||||
if (parseModel(*network) != NVDSINFER_SUCCESS) {
|
|
||||||
network->destroy();
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Building the TensorRT Engine" << std::endl;
|
|
||||||
|
|
||||||
if (m_LetterBox == 1) {
|
|
||||||
std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network);
|
|
||||||
if (engine) {
|
|
||||||
std::cout << "Building complete\n" << std::endl;
|
|
||||||
} else {
|
|
||||||
std::cerr << "Building engine failed\n" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
network->destroy();
|
|
||||||
return engine;
|
|
||||||
}
|
|
||||||
|
|
||||||
NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
|
|
||||||
destroyNetworkUtils();
|
|
||||||
|
|
||||||
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
|
|
||||||
std::cout << "Building YOLO network" << std::endl;
|
|
||||||
NvDsInferStatus status = buildYoloNetwork(weights, network);
|
|
||||||
|
|
||||||
if (status == NVDSINFER_SUCCESS) {
|
|
||||||
std::cout << "Building YOLO network complete" << std::endl;
|
|
||||||
} else {
|
|
||||||
std::cerr << "Building YOLO network failed" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
NvDsInferStatus Yolo::buildYoloNetwork(
|
|
||||||
std::vector<float>& weights, nvinfer1::INetworkDefinition& network) {
|
|
||||||
int weightPtr = 0;
|
|
||||||
int channels = m_InputC;
|
|
||||||
|
|
||||||
nvinfer1::ITensor* data =
|
|
||||||
network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
|
|
||||||
nvinfer1::DimsCHW{static_cast<int>(m_InputC),
|
|
||||||
static_cast<int>(m_InputH), static_cast<int>(m_InputW)});
|
|
||||||
assert(data != nullptr && data->getDimensions().nbDims > 0);
|
|
||||||
|
|
||||||
nvinfer1::ITensor* previous = data;
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs;
|
|
||||||
uint outputTensorCount = 0;
|
|
||||||
|
|
||||||
for (uint i = 0; i < m_ConfigBlocks.size(); ++i) {
|
|
||||||
assert(getNumChannels(previous) == channels);
|
|
||||||
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
|
|
||||||
|
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "net") {
|
|
||||||
printLayerInfo("", "layer", " input", " outup", "weightPtr");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
std::string layerType = "conv_" + m_ConfigBlocks.at(i).at("activation");
|
|
||||||
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "dropout") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("probability") != m_ConfigBlocks.at(i).end());
|
|
||||||
//float probability = std::stof(m_ConfigBlocks.at(i).at("probability"));
|
|
||||||
//nvinfer1::ILayer* out = dropoutLayer(probability, previous, &network);
|
|
||||||
//previous = out->getOutput(0);
|
|
||||||
//Skip dropout layer
|
|
||||||
assert(previous != nullptr);
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "dropout", " -", " -", " -");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "shortcut") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end());
|
|
||||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
|
||||||
std::string activation = m_ConfigBlocks.at(i).at("activation");
|
|
||||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
|
||||||
if (from > 0) {
|
|
||||||
from = from - i + 1;
|
|
||||||
}
|
|
||||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
|
||||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
|
||||||
assert(i + from - 1 < i - 2);
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = shortcutLayer(i, activation, inputVol, shortcutVol, previous, tensorOutputs[i + from - 1], &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
std::string layerType = "shortcut_" + m_ConfigBlocks.at(i).at("activation") + ": " + std::to_string(i + from - 1);
|
|
||||||
printLayerInfo(layerIndex, layerType, " -", outputVol, " -");
|
|
||||||
if (inputVol != shortcutVol) {
|
|
||||||
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "route") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("layers") != m_ConfigBlocks.at(i).end());
|
|
||||||
nvinfer1::ILayer* out = routeLayer(i, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "upsample") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "maxpool") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = maxpoolLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "yolo") {
|
|
||||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
|
||||||
TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
|
|
||||||
curYoloTensor.gridSizeY = prevTensorDims.d[1];
|
|
||||||
curYoloTensor.gridSizeX = prevTensorDims.d[2];
|
|
||||||
curYoloTensor.stride = m_InputH / curYoloTensor.gridSizeY;
|
|
||||||
m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSizeY
|
|
||||||
* curYoloTensor.gridSizeX
|
|
||||||
* (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
|
|
||||||
std::string layerName = "yolo_" + std::to_string(i);
|
|
||||||
curYoloTensor.blobName = layerName;
|
|
||||||
int new_coords = 0;
|
|
||||||
float scale_x_y = 1;
|
|
||||||
float beta_nms = 0.45;
|
|
||||||
if (m_ConfigBlocks.at(i).find("new_coords") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
new_coords = std::stoi(m_ConfigBlocks.at(i).at("new_coords"));
|
|
||||||
}
|
|
||||||
if (m_ConfigBlocks.at(i).find("scale_x_y") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
scale_x_y = std::stof(m_ConfigBlocks.at(i).at("scale_x_y"));
|
|
||||||
}
|
|
||||||
if (m_ConfigBlocks.at(i).find("beta_nms") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
beta_nms = std::stof(m_ConfigBlocks.at(i).at("beta_nms"));
|
|
||||||
}
|
|
||||||
nvinfer1::IPluginV2* yoloPlugin
|
|
||||||
= new YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes,
|
|
||||||
m_OutputTensors.at(outputTensorCount).numClasses,
|
|
||||||
m_OutputTensors.at(outputTensorCount).gridSizeX,
|
|
||||||
m_OutputTensors.at(outputTensorCount).gridSizeY,
|
|
||||||
1, new_coords, scale_x_y, beta_nms,
|
|
||||||
curYoloTensor.anchors,
|
|
||||||
m_OutputMasks);
|
|
||||||
assert(yoloPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* yolo =
|
|
||||||
network.addPluginV2(&previous, 1, *yoloPlugin);
|
|
||||||
assert(yolo != nullptr);
|
|
||||||
yolo->setName(layerName.c_str());
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
previous = yolo->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
previous->setName(layerName.c_str());
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
network.markOutput(*previous);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(yolo->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
++outputTensorCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
//YOLOv2 support
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "region") {
|
|
||||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
|
||||||
TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount);
|
|
||||||
curRegionTensor.gridSizeY = prevTensorDims.d[1];
|
|
||||||
curRegionTensor.gridSizeX = prevTensorDims.d[2];
|
|
||||||
curRegionTensor.stride = m_InputH / curRegionTensor.gridSizeY;
|
|
||||||
m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSizeY
|
|
||||||
* curRegionTensor.gridSizeX
|
|
||||||
* (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses));
|
|
||||||
std::string layerName = "region_" + std::to_string(i);
|
|
||||||
curRegionTensor.blobName = layerName;
|
|
||||||
std::vector<std::vector<int>> mask;
|
|
||||||
nvinfer1::IPluginV2* regionPlugin
|
|
||||||
= new YoloLayer(curRegionTensor.numBBoxes,
|
|
||||||
curRegionTensor.numClasses,
|
|
||||||
curRegionTensor.gridSizeX,
|
|
||||||
curRegionTensor.gridSizeY,
|
|
||||||
0, 0, 1.0, 0,
|
|
||||||
curRegionTensor.anchors,
|
|
||||||
mask);
|
|
||||||
assert(regionPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* region =
|
|
||||||
network.addPluginV2(&previous, 1, *regionPlugin);
|
|
||||||
assert(region != nullptr);
|
|
||||||
region->setName(layerName.c_str());
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
previous = region->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
previous->setName(layerName.c_str());
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
network.markOutput(*previous);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(region->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
++outputTensorCount;
|
|
||||||
}
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
|
|
||||||
assert(reorgPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* reorg =
|
|
||||||
network.addPluginV2(&previous, 1, *reorgPlugin);
|
|
||||||
assert(reorg != nullptr);
|
|
||||||
std::string layerName = "reorg_" + std::to_string(i);
|
|
||||||
reorg->setName(layerName.c_str());
|
|
||||||
previous = reorg->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(reorg->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "Unsupported layer type --> \""
|
|
||||||
<< m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((int)weights.size() != weightPtr)
|
|
||||||
{
|
|
||||||
std::cout << "Number of unused weights left: " << weights.size() - weightPtr << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Output YOLO blob names: " << std::endl;
|
|
||||||
for (auto& tensor : m_OutputTensors) {
|
|
||||||
std::cout << tensor.blobName << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nbLayers = network.getNbLayers();
|
|
||||||
std::cout << "Total number of YOLO layers: " << nbLayers << std::endl;
|
|
||||||
|
|
||||||
return NVDSINFER_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::map<std::string, std::string>>
|
|
||||||
Yolo::parseConfigFile (const std::string cfgFilePath)
|
|
||||||
{
|
|
||||||
assert(fileExists(cfgFilePath));
|
|
||||||
std::ifstream file(cfgFilePath);
|
|
||||||
assert(file.good());
|
|
||||||
std::string line;
|
|
||||||
std::vector<std::map<std::string, std::string>> blocks;
|
|
||||||
std::map<std::string, std::string> block;
|
|
||||||
|
|
||||||
while (getline(file, line))
|
|
||||||
{
|
|
||||||
if (line.size() == 0) continue;
|
|
||||||
if (line.front() == '#') continue;
|
|
||||||
line = trim(line);
|
|
||||||
if (line.front() == '[')
|
|
||||||
{
|
|
||||||
if (block.size() > 0)
|
|
||||||
{
|
|
||||||
blocks.push_back(block);
|
|
||||||
block.clear();
|
|
||||||
}
|
|
||||||
std::string key = "type";
|
|
||||||
std::string value = trim(line.substr(1, line.size() - 2));
|
|
||||||
block.insert(std::pair<std::string, std::string>(key, value));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int cpos = line.find('=');
|
|
||||||
std::string key = trim(line.substr(0, cpos));
|
|
||||||
std::string value = trim(line.substr(cpos + 1));
|
|
||||||
block.insert(std::pair<std::string, std::string>(key, value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
blocks.push_back(block);
|
|
||||||
return blocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Yolo::parseConfigBlocks()
|
|
||||||
{
|
|
||||||
for (auto block : m_ConfigBlocks) {
|
|
||||||
if (block.at("type") == "net")
|
|
||||||
{
|
|
||||||
assert((block.find("height") != block.end())
|
|
||||||
&& "Missing 'height' param in network cfg");
|
|
||||||
assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
|
|
||||||
assert((block.find("channels") != block.end())
|
|
||||||
&& "Missing 'channels' param in network cfg");
|
|
||||||
|
|
||||||
m_InputH = std::stoul(block.at("height"));
|
|
||||||
m_InputW = std::stoul(block.at("width"));
|
|
||||||
m_InputC = std::stoul(block.at("channels"));
|
|
||||||
m_InputSize = m_InputC * m_InputH * m_InputW;
|
|
||||||
|
|
||||||
if (block.find("letter_box") != block.end()) {
|
|
||||||
m_LetterBox = std::stoul(block.at("letter_box"));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
m_LetterBox = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
|
|
||||||
{
|
|
||||||
assert((block.find("num") != block.end())
|
|
||||||
&& std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
|
|
||||||
assert((block.find("classes") != block.end())
|
|
||||||
&& std::string("Missing 'classes' param in " + block.at("type") + " layer")
|
|
||||||
.c_str());
|
|
||||||
assert((block.find("anchors") != block.end())
|
|
||||||
&& std::string("Missing 'anchors' param in " + block.at("type") + " layer")
|
|
||||||
.c_str());
|
|
||||||
|
|
||||||
TensorInfo outputTensor;
|
|
||||||
std::string anchorString = block.at("anchors");
|
|
||||||
while (!anchorString.empty())
|
|
||||||
{
|
|
||||||
int npos = anchorString.find_first_of(',');
|
|
||||||
if (npos != -1)
|
|
||||||
{
|
|
||||||
float anchor = std::stof(trim(anchorString.substr(0, npos)));
|
|
||||||
outputTensor.anchors.push_back(anchor);
|
|
||||||
anchorString.erase(0, npos + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
float anchor = std::stof(trim(anchorString));
|
|
||||||
outputTensor.anchors.push_back(anchor);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (block.find("mask") != block.end()) {
|
|
||||||
|
|
||||||
std::string maskString = block.at("mask");
|
|
||||||
std::vector<int> pMASKS;
|
|
||||||
while (!maskString.empty())
|
|
||||||
{
|
|
||||||
int npos = maskString.find_first_of(',');
|
|
||||||
if (npos != -1)
|
|
||||||
{
|
|
||||||
int mask = std::stoul(trim(maskString.substr(0, npos)));
|
|
||||||
pMASKS.push_back(mask);
|
|
||||||
outputTensor.masks.push_back(mask);
|
|
||||||
maskString.erase(0, npos + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int mask = std::stoul(trim(maskString));
|
|
||||||
pMASKS.push_back(mask);
|
|
||||||
outputTensor.masks.push_back(mask);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_OutputMasks.push_back(pMASKS);
|
|
||||||
}
|
|
||||||
|
|
||||||
outputTensor.numBBoxes = outputTensor.masks.size() > 0
|
|
||||||
? outputTensor.masks.size()
|
|
||||||
: std::stoul(trim(block.at("num")));
|
|
||||||
outputTensor.numClasses = std::stoul(block.at("classes"));
|
|
||||||
m_OutputTensors.push_back(outputTensor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Yolo::destroyNetworkUtils() {
|
|
||||||
for (uint i = 0; i < m_TrtWeights.size(); ++i) {
|
|
||||||
if (m_TrtWeights[i].count > 0)
|
|
||||||
free(const_cast<void*>(m_TrtWeights[i].values));
|
|
||||||
}
|
|
||||||
m_TrtWeights.clear();
|
|
||||||
}
|
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _YOLO_H_
|
|
||||||
#define _YOLO_H_
|
|
||||||
|
|
||||||
#include "layers/convolutional_layer.h"
|
|
||||||
#include "layers/dropout_layer.h"
|
|
||||||
#include "layers/shortcut_layer.h"
|
|
||||||
#include "layers/route_layer.h"
|
|
||||||
#include "layers/upsample_layer.h"
|
|
||||||
#include "layers/maxpool_layer.h"
|
|
||||||
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
|
|
||||||
struct NetworkInfo
|
|
||||||
{
|
|
||||||
std::string networkType;
|
|
||||||
std::string configFilePath;
|
|
||||||
std::string wtsFilePath;
|
|
||||||
std::string int8CalibPath;
|
|
||||||
std::string networkMode;
|
|
||||||
std::string deviceType;
|
|
||||||
std::string inputBlobName;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TensorInfo
|
|
||||||
{
|
|
||||||
std::string blobName;
|
|
||||||
uint stride{0};
|
|
||||||
uint gridSizeY{0};
|
|
||||||
uint gridSizeX{0};
|
|
||||||
uint numClasses{0};
|
|
||||||
uint numBBoxes{0};
|
|
||||||
uint64_t volume{0};
|
|
||||||
std::vector<uint> masks;
|
|
||||||
std::vector<float> anchors;
|
|
||||||
int bindingIndex{-1};
|
|
||||||
float* hostBuffer{nullptr};
|
|
||||||
};
|
|
||||||
|
|
||||||
class Yolo : public IModelParser {
|
|
||||||
public:
|
|
||||||
Yolo(const NetworkInfo& networkInfo);
|
|
||||||
~Yolo() override;
|
|
||||||
bool hasFullDimsSupported() const override { return false; }
|
|
||||||
const char* getModelName() const override {
|
|
||||||
return m_ConfigFilePath.empty() ? m_NetworkType.c_str()
|
|
||||||
: m_ConfigFilePath.c_str();
|
|
||||||
}
|
|
||||||
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const std::string m_NetworkType;
|
|
||||||
const std::string m_ConfigFilePath;
|
|
||||||
const std::string m_WtsFilePath;
|
|
||||||
const std::string m_Int8CalibPath;
|
|
||||||
const std::string m_NetworkMode;
|
|
||||||
const std::string m_DeviceType;
|
|
||||||
const std::string m_InputBlobName;
|
|
||||||
std::vector<TensorInfo> m_OutputTensors;
|
|
||||||
std::vector<std::vector<int>> m_OutputMasks;
|
|
||||||
std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
|
|
||||||
uint m_InputH;
|
|
||||||
uint m_InputW;
|
|
||||||
uint m_InputC;
|
|
||||||
uint64_t m_InputSize;
|
|
||||||
uint m_LetterBox;
|
|
||||||
|
|
||||||
std::vector<nvinfer1::Weights> m_TrtWeights;
|
|
||||||
|
|
||||||
private:
|
|
||||||
NvDsInferStatus buildYoloNetwork(
|
|
||||||
std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
|
|
||||||
std::vector<std::map<std::string, std::string>> parseConfigFile(
|
|
||||||
const std::string cfgFilePath);
|
|
||||||
void parseConfigBlocks();
|
|
||||||
void destroyNetworkUtils();
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // _YOLO_H_
|
|
||||||
@@ -1,166 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
|
|
||||||
*
|
|
||||||
* NVIDIA Corporation and its licensors retain all intellectual property
|
|
||||||
* and proprietary rights in and to this software, related documentation
|
|
||||||
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
||||||
* distribution of this software and related documentation without an express
|
|
||||||
* license agreement from NVIDIA Corporation is strictly prohibited.
|
|
||||||
*
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <cuda.h>
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
|
||||||
|
|
||||||
__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
|
||||||
const uint numBBoxes, const uint new_coords, const float scale_x_y)
|
|
||||||
{
|
|
||||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
|
||||||
|
|
||||||
if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeX * gridSizeY;
|
|
||||||
const int bbindex = y_id * gridSizeX + x_id;
|
|
||||||
|
|
||||||
float alpha = scale_x_y;
|
|
||||||
float beta = -0.5 * (scale_x_y - 1);
|
|
||||||
|
|
||||||
if (new_coords == 1) {
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void gpuRegionLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
|
||||||
const uint numBBoxes)
|
|
||||||
{
|
|
||||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
|
||||||
|
|
||||||
if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeX * gridSizeY;
|
|
||||||
const int bbindex = y_id * gridSizeX + x_id;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
|
||||||
|
|
||||||
float temp = 1.0;
|
|
||||||
int i;
|
|
||||||
float sum = 0;
|
|
||||||
float largest = -INFINITY;
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
|
||||||
largest = (val>largest) ? val : largest;
|
|
||||||
}
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
float e = exp(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
|
|
||||||
sum += e;
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
|
|
||||||
}
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint& numOutputClasses, const uint& numBBoxes,
|
|
||||||
uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType);
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint& numOutputClasses, const uint& numBBoxes,
|
|
||||||
uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType)
|
|
||||||
{
|
|
||||||
dim3 threads_per_block(16, 16, 4);
|
|
||||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
|
||||||
(gridSizeY / threads_per_block.y) + 1,
|
|
||||||
(numBBoxes / threads_per_block.z) + 1);
|
|
||||||
if (modelType == 1) {
|
|
||||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
|
||||||
{
|
|
||||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
|
||||||
reinterpret_cast<const float*>(input) + (batch * outputSize),
|
|
||||||
reinterpret_cast<float*>(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses,
|
|
||||||
numBBoxes, modelCoords, modelScale);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (modelType == 0) {
|
|
||||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
|
||||||
{
|
|
||||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
|
||||||
reinterpret_cast<const float*>(input) + (batch * outputSize),
|
|
||||||
reinterpret_cast<float*>(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses,
|
|
||||||
numBBoxes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cudaGetLastError();
|
|
||||||
}
|
|
||||||
@@ -1,156 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __YOLO_PLUGINS__
|
|
||||||
#define __YOLO_PLUGINS__
|
|
||||||
|
|
||||||
#include <cassert>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cuda_runtime_api.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "NvInferPlugin.h"
|
|
||||||
|
|
||||||
#define CHECK(status) \
|
|
||||||
{ \
|
|
||||||
if (status != 0) \
|
|
||||||
{ \
|
|
||||||
std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
|
|
||||||
<< " at line " << __LINE__ << std::endl; \
|
|
||||||
abort(); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
const char* YOLOLAYER_PLUGIN_VERSION {"2"};
|
|
||||||
const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
class YoloLayer : public nvinfer1::IPluginV2
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayer (const void* data, size_t length);
|
|
||||||
YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms,
|
|
||||||
const std::vector<float> anchors, const std::vector<std::vector<int>> mask);
|
|
||||||
const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; }
|
|
||||||
const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; }
|
|
||||||
int getNbOutputs () const override { return 1; }
|
|
||||||
|
|
||||||
nvinfer1::Dims getOutputDimensions (
|
|
||||||
int index, const nvinfer1::Dims* inputs,
|
|
||||||
int nbInputDims) override;
|
|
||||||
|
|
||||||
bool supportsFormat (
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const override;
|
|
||||||
|
|
||||||
void configureWithFormat (
|
|
||||||
const nvinfer1::Dims* inputDims, int nbInputs,
|
|
||||||
const nvinfer1::Dims* outputDims, int nbOutputs,
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override;
|
|
||||||
|
|
||||||
int initialize () override { return 0; }
|
|
||||||
void terminate () override {}
|
|
||||||
size_t getWorkspaceSize (int maxBatchSize) const override { return 0; }
|
|
||||||
int enqueue (
|
|
||||||
int batchSize, const void* const* inputs, void** outputs,
|
|
||||||
void* workspace, cudaStream_t stream) override;
|
|
||||||
size_t getSerializationSize() const override;
|
|
||||||
void serialize (void* buffer) const override;
|
|
||||||
void destroy () override { delete this; }
|
|
||||||
nvinfer1::IPluginV2* clone() const override;
|
|
||||||
|
|
||||||
void setPluginNamespace (const char* pluginNamespace)override {
|
|
||||||
m_Namespace = pluginNamespace;
|
|
||||||
}
|
|
||||||
virtual const char* getPluginNamespace () const override {
|
|
||||||
return m_Namespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint m_NumBoxes {0};
|
|
||||||
uint m_NumClasses {0};
|
|
||||||
uint m_GridSizeX {0};
|
|
||||||
uint m_GridSizeY {0};
|
|
||||||
uint64_t m_OutputSize {0};
|
|
||||||
std::string m_Namespace {""};
|
|
||||||
|
|
||||||
uint m_type {0};
|
|
||||||
uint m_new_coords {0};
|
|
||||||
float m_scale_x_y {0};
|
|
||||||
float m_beta_nms {0};
|
|
||||||
std::vector<float> m_Anchors;
|
|
||||||
std::vector<std::vector<int>> m_Mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
class YoloLayerPluginCreator : public nvinfer1::IPluginCreator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayerPluginCreator () {}
|
|
||||||
~YoloLayerPluginCreator () {}
|
|
||||||
|
|
||||||
const char* getPluginName () const override { return YOLOLAYER_PLUGIN_NAME; }
|
|
||||||
const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; }
|
|
||||||
|
|
||||||
const nvinfer1::PluginFieldCollection* getFieldNames() override {
|
|
||||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl;
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* createPlugin (
|
|
||||||
const char* name, const nvinfer1::PluginFieldCollection* fc) override
|
|
||||||
{
|
|
||||||
std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* deserializePlugin (
|
|
||||||
const char* name, const void* serialData, size_t serialLength) override
|
|
||||||
{
|
|
||||||
std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
|
|
||||||
return new YoloLayer(serialData, serialLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* libNamespace) override {
|
|
||||||
m_Namespace = libNamespace;
|
|
||||||
}
|
|
||||||
const char* getPluginNamespace() const override {
|
|
||||||
return m_Namespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string m_Namespace {""};
|
|
||||||
};
|
|
||||||
|
|
||||||
extern int kNUM_CLASSES;
|
|
||||||
extern float kBETA_NMS;
|
|
||||||
extern std::vector<float> kANCHORS;
|
|
||||||
extern std::vector<std::vector<int>> kMASK;
|
|
||||||
|
|
||||||
#endif // __YOLO_PLUGINS__
|
|
||||||
18
external/yolov5-3.X/config_infer_primary.txt
vendored
18
external/yolov5-3.X/config_infer_primary.txt
vendored
@@ -1,18 +0,0 @@
|
|||||||
[property]
|
|
||||||
gpu-id=0
|
|
||||||
net-scale-factor=0.0039215697906911373
|
|
||||||
model-color-format=0
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
num-detected-classes=80
|
|
||||||
interval=0
|
|
||||||
gie-unique-id=1
|
|
||||||
process-mode=1
|
|
||||||
network-type=0
|
|
||||||
cluster-mode=4
|
|
||||||
maintain-aspect-ratio=0
|
|
||||||
parse-bbox-func-name=NvDsInferParseCustomYoloV5
|
|
||||||
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
63
external/yolov5-3.X/deepstream_app_config.txt
vendored
63
external/yolov5-3.X/deepstream_app_config.txt
vendored
@@ -1,63 +0,0 @@
|
|||||||
[application]
|
|
||||||
enable-perf-measurement=1
|
|
||||||
perf-measurement-interval-sec=1
|
|
||||||
|
|
||||||
[tiled-display]
|
|
||||||
enable=1
|
|
||||||
rows=1
|
|
||||||
columns=1
|
|
||||||
width=1280
|
|
||||||
height=720
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=file://../../samples/streams/sample_1080p_h264.mp4
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[osd]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
border-width=1
|
|
||||||
text-size=15
|
|
||||||
text-color=1;1;1;1;
|
|
||||||
text-bg-color=0.3;0.3;0.3;1
|
|
||||||
font=Serif
|
|
||||||
show-clock=0
|
|
||||||
clock-x-offset=800
|
|
||||||
clock-y-offset=820
|
|
||||||
clock-text-size=12
|
|
||||||
clock-color=1;0;0;0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
live-source=0
|
|
||||||
batch-size=1
|
|
||||||
batched-push-timeout=40000
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary.txt
|
|
||||||
|
|
||||||
[tests]
|
|
||||||
file-loop=0
|
|
||||||
80
external/yolov5-3.X/labels.txt
vendored
80
external/yolov5-3.X/labels.txt
vendored
@@ -1,80 +0,0 @@
|
|||||||
person
|
|
||||||
bicycle
|
|
||||||
car
|
|
||||||
motorbike
|
|
||||||
aeroplane
|
|
||||||
bus
|
|
||||||
train
|
|
||||||
truck
|
|
||||||
boat
|
|
||||||
traffic light
|
|
||||||
fire hydrant
|
|
||||||
stop sign
|
|
||||||
parking meter
|
|
||||||
bench
|
|
||||||
bird
|
|
||||||
cat
|
|
||||||
dog
|
|
||||||
horse
|
|
||||||
sheep
|
|
||||||
cow
|
|
||||||
elephant
|
|
||||||
bear
|
|
||||||
zebra
|
|
||||||
giraffe
|
|
||||||
backpack
|
|
||||||
umbrella
|
|
||||||
handbag
|
|
||||||
tie
|
|
||||||
suitcase
|
|
||||||
frisbee
|
|
||||||
skis
|
|
||||||
snowboard
|
|
||||||
sports ball
|
|
||||||
kite
|
|
||||||
baseball bat
|
|
||||||
baseball glove
|
|
||||||
skateboard
|
|
||||||
surfboard
|
|
||||||
tennis racket
|
|
||||||
bottle
|
|
||||||
wine glass
|
|
||||||
cup
|
|
||||||
fork
|
|
||||||
knife
|
|
||||||
spoon
|
|
||||||
bowl
|
|
||||||
banana
|
|
||||||
apple
|
|
||||||
sandwich
|
|
||||||
orange
|
|
||||||
broccoli
|
|
||||||
carrot
|
|
||||||
hot dog
|
|
||||||
pizza
|
|
||||||
donut
|
|
||||||
cake
|
|
||||||
chair
|
|
||||||
sofa
|
|
||||||
pottedplant
|
|
||||||
bed
|
|
||||||
diningtable
|
|
||||||
toilet
|
|
||||||
tvmonitor
|
|
||||||
laptop
|
|
||||||
mouse
|
|
||||||
remote
|
|
||||||
keyboard
|
|
||||||
cell phone
|
|
||||||
microwave
|
|
||||||
oven
|
|
||||||
toaster
|
|
||||||
sink
|
|
||||||
refrigerator
|
|
||||||
book
|
|
||||||
clock
|
|
||||||
vase
|
|
||||||
scissors
|
|
||||||
teddy bear
|
|
||||||
hair drier
|
|
||||||
toothbrush
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
CUDA_VER?=
|
|
||||||
ifeq ($(CUDA_VER),)
|
|
||||||
$(error "CUDA_VER is not set")
|
|
||||||
endif
|
|
||||||
CC:= g++
|
|
||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
|
||||||
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
|
|
||||||
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
|
||||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
|
||||||
|
|
||||||
INCS:= $(wildcard *.h)
|
|
||||||
SRCFILES:= nvdsparsebbox_Yolo.cpp \
|
|
||||||
yololayer.cu
|
|
||||||
|
|
||||||
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
|
|
||||||
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
|
|
||||||
|
|
||||||
all: $(TARGET_LIB)
|
|
||||||
|
|
||||||
%.o: %.cpp $(INCS) Makefile
|
|
||||||
$(CC) -c -o $@ $(CFLAGS) $<
|
|
||||||
|
|
||||||
%.o: %.cu $(INCS) Makefile
|
|
||||||
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
|
|
||||||
|
|
||||||
$(TARGET_LIB) : $(TARGET_OBJS)
|
|
||||||
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TARGET_LIB)
|
|
||||||
rm -rf $(TARGET_OBJS)
|
|
||||||
@@ -1,122 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstring>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection{
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
|
|
||||||
float iou(float lbox[4], float rbox[4]) {
|
|
||||||
float interBox[] = {
|
|
||||||
std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
|
|
||||||
std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
|
|
||||||
std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
|
|
||||||
std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
|
|
||||||
};
|
|
||||||
|
|
||||||
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
|
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
|
|
||||||
return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool cmp(Detection& a, Detection& b) {
|
|
||||||
return a.conf > b.conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh) {
|
|
||||||
int det_size = sizeof(Detection) / sizeof(float);
|
|
||||||
std::map<float, std::vector<Detection>> m;
|
|
||||||
for (int i = 0; i < output[0] && i < 1000; i++) {
|
|
||||||
if (output[1 + det_size * i + 4] <= conf_thresh) continue;
|
|
||||||
Detection det;
|
|
||||||
memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
|
|
||||||
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
|
|
||||||
m[det.class_id].push_back(det);
|
|
||||||
}
|
|
||||||
for (auto it = m.begin(); it != m.end(); it++) {
|
|
||||||
auto& dets = it->second;
|
|
||||||
std::sort(dets.begin(), dets.end(), cmp);
|
|
||||||
for (size_t m = 0; m < dets.size(); ++m) {
|
|
||||||
auto& item = dets[m];
|
|
||||||
res.push_back(item);
|
|
||||||
for (size_t n = m + 1; n < dets.size(); ++n) {
|
|
||||||
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
|
|
||||||
dets.erase(dets.begin()+n);
|
|
||||||
--n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is a sample bounding box parsing function for the sample YoloV5 detector model */
|
|
||||||
static bool NvDsInferParseYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
|
||||||
{
|
|
||||||
const float kCONF_THRESH = detectionParams.perClassThreshold[0];
|
|
||||||
|
|
||||||
std::vector<Detection> res;
|
|
||||||
|
|
||||||
nms(res, (float*)(outputLayersInfo[0].buffer), kCONF_THRESH, kNMS_THRESH);
|
|
||||||
|
|
||||||
for(auto& r : res) {
|
|
||||||
NvDsInferParseObjectInfo oinfo;
|
|
||||||
|
|
||||||
oinfo.classId = r.class_id;
|
|
||||||
oinfo.left = static_cast<unsigned int>(r.bbox[0]-r.bbox[2]*0.5f);
|
|
||||||
oinfo.top = static_cast<unsigned int>(r.bbox[1]-r.bbox[3]*0.5f);
|
|
||||||
oinfo.width = static_cast<unsigned int>(r.bbox[2]);
|
|
||||||
oinfo.height = static_cast<unsigned int>(r.bbox[3]);
|
|
||||||
oinfo.detectionConfidence = r.conf;
|
|
||||||
objectList.push_back(oinfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseCustomYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const &networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const &detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo> &objectList)
|
|
||||||
{
|
|
||||||
return NvDsInferParseYoloV5(
|
|
||||||
outputLayersInfo, networkInfo, detectionParams, objectList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check that the custom function has been defined correctly */
|
|
||||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5);
|
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
#ifndef __TRT_UTILS_H_
|
|
||||||
#define __TRT_UTILS_H_
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cudnn.h>
|
|
||||||
|
|
||||||
#ifndef CUDA_CHECK
|
|
||||||
|
|
||||||
#define CUDA_CHECK(callstr) \
|
|
||||||
{ \
|
|
||||||
cudaError_t error_code = callstr; \
|
|
||||||
if (error_code != cudaSuccess) { \
|
|
||||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
|
|
||||||
assert(0); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Tn
|
|
||||||
{
|
|
||||||
class Profiler : public nvinfer1::IProfiler
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
void printLayerTimes(int itrationsTimes)
|
|
||||||
{
|
|
||||||
float totalTime = 0;
|
|
||||||
for (size_t i = 0; i < mProfile.size(); i++)
|
|
||||||
{
|
|
||||||
printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
|
|
||||||
totalTime += mProfile[i].second;
|
|
||||||
}
|
|
||||||
printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
typedef std::pair<std::string, float> Record;
|
|
||||||
std::vector<Record> mProfile;
|
|
||||||
|
|
||||||
virtual void reportLayerTime(const char* layerName, float ms)
|
|
||||||
{
|
|
||||||
auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
|
|
||||||
if (record == mProfile.end())
|
|
||||||
mProfile.push_back(std::make_pair(layerName, ms));
|
|
||||||
else
|
|
||||||
record->second += ms;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//Logger for TensorRT info/warning/errors
|
|
||||||
class Logger : public nvinfer1::ILogger
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
|
|
||||||
Logger(): Logger(Severity::kWARNING) {}
|
|
||||||
|
|
||||||
Logger(Severity severity): reportableSeverity(severity) {}
|
|
||||||
|
|
||||||
void log(Severity severity, const char* msg) override
|
|
||||||
{
|
|
||||||
// suppress messages with severity enum value greater than the reportable
|
|
||||||
if (severity > reportableSeverity) return;
|
|
||||||
|
|
||||||
switch (severity)
|
|
||||||
{
|
|
||||||
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
|
|
||||||
case Severity::kERROR: std::cerr << "ERROR: "; break;
|
|
||||||
case Severity::kWARNING: std::cerr << "WARNING: "; break;
|
|
||||||
case Severity::kINFO: std::cerr << "INFO: "; break;
|
|
||||||
default: std::cerr << "UNKNOWN: "; break;
|
|
||||||
}
|
|
||||||
std::cerr << msg << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
Severity reportableSeverity{Severity::kWARNING};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
void write(char*& buffer, const T& val)
|
|
||||||
{
|
|
||||||
*reinterpret_cast<T*>(buffer) = val;
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
void read(const char*& buffer, T& val)
|
|
||||||
{
|
|
||||||
val = *reinterpret_cast<const T*>(buffer);
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,270 +0,0 @@
|
|||||||
#include <assert.h>
|
|
||||||
#include "yololayer.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
using namespace Yolo;
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin()
|
|
||||||
{
|
|
||||||
mClassCount = CLASS_NUM;
|
|
||||||
mYoloKernel.clear();
|
|
||||||
mYoloKernel.push_back(yolo1);
|
|
||||||
mYoloKernel.push_back(yolo2);
|
|
||||||
mYoloKernel.push_back(yolo3);
|
|
||||||
|
|
||||||
mKernelCount = mYoloKernel.size();
|
|
||||||
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT*2;
|
|
||||||
for(int ii = 0; ii < mKernelCount; ii ++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii],AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
YoloLayerPlugin::~YoloLayerPlugin()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the plugin at runtime from a byte stream
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
const char *d = reinterpret_cast<const char *>(data), *a = d;
|
|
||||||
read(d, mClassCount);
|
|
||||||
read(d, mThreadCount);
|
|
||||||
read(d, mKernelCount);
|
|
||||||
mYoloKernel.resize(mKernelCount);
|
|
||||||
auto kernelSize = mKernelCount*sizeof(YoloKernel);
|
|
||||||
memcpy(mYoloKernel.data(),d,kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT*2;
|
|
||||||
for(int ii = 0; ii < mKernelCount; ii ++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii],AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(d == a + length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::serialize(void* buffer) const
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
char* d = static_cast<char*>(buffer), *a = d;
|
|
||||||
write(d, mClassCount);
|
|
||||||
write(d, mThreadCount);
|
|
||||||
write(d, mKernelCount);
|
|
||||||
auto kernelSize = mKernelCount*sizeof(YoloKernel);
|
|
||||||
memcpy(d,mYoloKernel.data(),kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
|
|
||||||
assert(d == a + getSerializationSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t YoloLayerPlugin::getSerializationSize() const
|
|
||||||
{
|
|
||||||
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
int YoloLayerPlugin::initialize()
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
|
|
||||||
{
|
|
||||||
//output the result to channel
|
|
||||||
int totalsize = MAX_OUTPUT_BBOX_COUNT * sizeof(Detection) / sizeof(float);
|
|
||||||
|
|
||||||
return Dims3(totalsize + 1, 1, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set plugin namespace
|
|
||||||
void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
|
|
||||||
{
|
|
||||||
mPluginNamespace = pluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginNamespace() const
|
|
||||||
{
|
|
||||||
return mPluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the DataType of the plugin output at the requested index
|
|
||||||
DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
|
|
||||||
{
|
|
||||||
return DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if output tensor is broadcast across a batch.
|
|
||||||
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if plugin can use input that is broadcast across batch without replication.
|
|
||||||
bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attach the plugin object to an execution context and grant the plugin the access to some context resource.
|
|
||||||
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detach the plugin object from its execution context.
|
|
||||||
void YoloLayerPlugin::detachFromContext() {}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginType() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::destroy()
|
|
||||||
{
|
|
||||||
delete this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clone the plugin
|
|
||||||
IPluginV2IOExt* YoloLayerPlugin::clone() const
|
|
||||||
{
|
|
||||||
YoloLayerPlugin *p = new YoloLayerPlugin();
|
|
||||||
p->setPluginNamespace(mPluginNamespace);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
__device__ float Logist(float data){ return 1.0f / (1.0f + expf(-data)); };
|
|
||||||
|
|
||||||
__global__ void CalDetection(const float *input, float *output,int noElements,
|
|
||||||
int yoloWidth,int yoloHeight,const float anchors[CHECK_COUNT*2],int classes,int outputElem) {
|
|
||||||
|
|
||||||
int idx = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
if (idx >= noElements) return;
|
|
||||||
|
|
||||||
int total_grid = yoloWidth * yoloHeight;
|
|
||||||
int bnIdx = idx / total_grid;
|
|
||||||
idx = idx - total_grid*bnIdx;
|
|
||||||
int info_len_i = 5 + classes;
|
|
||||||
const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT);
|
|
||||||
|
|
||||||
for (int k = 0; k < 3; ++k) {
|
|
||||||
float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
|
|
||||||
if (box_prob < IGNORE_THRESH) continue;
|
|
||||||
int class_id = 0;
|
|
||||||
float max_cls_prob = 0.0;
|
|
||||||
for (int i = 5; i < info_len_i; ++i) {
|
|
||||||
float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
|
|
||||||
if (p > max_cls_prob) {
|
|
||||||
max_cls_prob = p;
|
|
||||||
class_id = i - 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float *res_count = output + bnIdx*outputElem;
|
|
||||||
int count = (int)atomicAdd(res_count, 1);
|
|
||||||
if (count >= MAX_OUTPUT_BBOX_COUNT) return;
|
|
||||||
char* data = (char *)res_count + sizeof(float) + count * sizeof(Detection);
|
|
||||||
Detection* det = (Detection*)(data);
|
|
||||||
|
|
||||||
int row = idx / yoloWidth;
|
|
||||||
int col = idx % yoloWidth;
|
|
||||||
|
|
||||||
//Location
|
|
||||||
det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * INPUT_W / yoloWidth;
|
|
||||||
det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * INPUT_H / yoloHeight;
|
|
||||||
det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
|
|
||||||
det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2*k];
|
|
||||||
det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
|
|
||||||
det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2*k + 1];
|
|
||||||
det->conf = box_prob * max_cls_prob;
|
|
||||||
det->class_id = class_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) {
|
|
||||||
|
|
||||||
int outputElem = 1 + MAX_OUTPUT_BBOX_COUNT * sizeof(Detection) / sizeof(float);
|
|
||||||
|
|
||||||
for(int idx = 0 ; idx < batchSize; ++idx) {
|
|
||||||
CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float)));
|
|
||||||
}
|
|
||||||
int numElem = 0;
|
|
||||||
for (unsigned int i = 0; i < mYoloKernel.size(); ++i)
|
|
||||||
{
|
|
||||||
const auto& yolo = mYoloKernel[i];
|
|
||||||
numElem = yolo.width*yolo.height*batchSize;
|
|
||||||
if (numElem < mThreadCount)
|
|
||||||
mThreadCount = numElem;
|
|
||||||
CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
|
|
||||||
(inputs[i], output, numElem, yolo.width, yolo.height, (float *)mAnchor[i], mClassCount, outputElem);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PluginFieldCollection YoloPluginCreator::mFC{};
|
|
||||||
std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
|
|
||||||
|
|
||||||
YoloPluginCreator::YoloPluginCreator()
|
|
||||||
{
|
|
||||||
mPluginAttributes.clear();
|
|
||||||
|
|
||||||
mFC.nbFields = mPluginAttributes.size();
|
|
||||||
mFC.fields = mPluginAttributes.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginName() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
const PluginFieldCollection* YoloPluginCreator::getFieldNames()
|
|
||||||
{
|
|
||||||
return &mFC;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
|
|
||||||
{
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin();
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
|
|
||||||
{
|
|
||||||
// This object will be deleted when the network is destroyed, which will
|
|
||||||
// call MishPlugin::destroy()
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,152 +0,0 @@
|
|||||||
#ifndef _YOLO_LAYER_H
|
|
||||||
#define _YOLO_LAYER_H
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
namespace Yolo
|
|
||||||
{
|
|
||||||
static constexpr int CHECK_COUNT = 3;
|
|
||||||
static constexpr float IGNORE_THRESH = 0.1f;
|
|
||||||
static constexpr int MAX_OUTPUT_BBOX_COUNT = 1000;
|
|
||||||
static constexpr int CLASS_NUM = 80;
|
|
||||||
static constexpr int INPUT_H = 608;
|
|
||||||
static constexpr int INPUT_W = 608;
|
|
||||||
|
|
||||||
struct YoloKernel
|
|
||||||
{
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
float anchors[CHECK_COUNT*2];
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr YoloKernel yolo1 = {
|
|
||||||
INPUT_W / 32,
|
|
||||||
INPUT_H / 32,
|
|
||||||
{116,90, 156,198, 373,326}
|
|
||||||
};
|
|
||||||
static constexpr YoloKernel yolo2 = {
|
|
||||||
INPUT_W / 16,
|
|
||||||
INPUT_H / 16,
|
|
||||||
{30,61, 62,45, 59,119}
|
|
||||||
};
|
|
||||||
static constexpr YoloKernel yolo3 = {
|
|
||||||
INPUT_W / 8,
|
|
||||||
INPUT_H / 8,
|
|
||||||
{10,13, 16,30, 33,23}
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection{
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
class YoloLayerPlugin: public IPluginV2IOExt
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
explicit YoloLayerPlugin();
|
|
||||||
YoloLayerPlugin(const void* data, size_t length);
|
|
||||||
|
|
||||||
~YoloLayerPlugin();
|
|
||||||
|
|
||||||
int getNbOutputs() const override
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
|
|
||||||
|
|
||||||
int initialize() override;
|
|
||||||
|
|
||||||
virtual void terminate() override {};
|
|
||||||
|
|
||||||
virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
|
|
||||||
|
|
||||||
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
|
|
||||||
|
|
||||||
virtual size_t getSerializationSize() const override;
|
|
||||||
|
|
||||||
virtual void serialize(void* buffer) const override;
|
|
||||||
|
|
||||||
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
|
|
||||||
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginType() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
void destroy() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* clone() const override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* pluginNamespace) override;
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override;
|
|
||||||
|
|
||||||
DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool canBroadcastInputAcrossBatch(int inputIndex) const override;
|
|
||||||
|
|
||||||
void attachToContext(
|
|
||||||
cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
|
|
||||||
|
|
||||||
void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
|
|
||||||
|
|
||||||
void detachFromContext() override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
|
|
||||||
int mClassCount;
|
|
||||||
int mKernelCount;
|
|
||||||
std::vector<Yolo::YoloKernel> mYoloKernel;
|
|
||||||
int mThreadCount = 256;
|
|
||||||
void** mAnchor;
|
|
||||||
const char* mPluginNamespace;
|
|
||||||
};
|
|
||||||
|
|
||||||
class YoloPluginCreator : public IPluginCreator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloPluginCreator();
|
|
||||||
|
|
||||||
~YoloPluginCreator() override = default;
|
|
||||||
|
|
||||||
const char* getPluginName() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
const PluginFieldCollection* getFieldNames() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* libNamespace) override
|
|
||||||
{
|
|
||||||
mNamespace = libNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override
|
|
||||||
{
|
|
||||||
return mNamespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string mNamespace;
|
|
||||||
static PluginFieldCollection mFC;
|
|
||||||
static std::vector<PluginField> mPluginAttributes;
|
|
||||||
};
|
|
||||||
REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
18
external/yolov5-4.0/config_infer_primary.txt
vendored
18
external/yolov5-4.0/config_infer_primary.txt
vendored
@@ -1,18 +0,0 @@
|
|||||||
[property]
|
|
||||||
gpu-id=0
|
|
||||||
net-scale-factor=0.0039215697906911373
|
|
||||||
model-color-format=0
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
num-detected-classes=80
|
|
||||||
interval=0
|
|
||||||
gie-unique-id=1
|
|
||||||
process-mode=1
|
|
||||||
network-type=0
|
|
||||||
cluster-mode=4
|
|
||||||
maintain-aspect-ratio=0
|
|
||||||
parse-bbox-func-name=NvDsInferParseCustomYoloV5
|
|
||||||
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
63
external/yolov5-4.0/deepstream_app_config.txt
vendored
63
external/yolov5-4.0/deepstream_app_config.txt
vendored
@@ -1,63 +0,0 @@
|
|||||||
[application]
|
|
||||||
enable-perf-measurement=1
|
|
||||||
perf-measurement-interval-sec=1
|
|
||||||
|
|
||||||
[tiled-display]
|
|
||||||
enable=1
|
|
||||||
rows=1
|
|
||||||
columns=1
|
|
||||||
width=1280
|
|
||||||
height=720
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=file://../../samples/streams/sample_1080p_h264.mp4
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[osd]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
border-width=1
|
|
||||||
text-size=15
|
|
||||||
text-color=1;1;1;1;
|
|
||||||
text-bg-color=0.3;0.3;0.3;1
|
|
||||||
font=Serif
|
|
||||||
show-clock=0
|
|
||||||
clock-x-offset=800
|
|
||||||
clock-y-offset=820
|
|
||||||
clock-text-size=12
|
|
||||||
clock-color=1;0;0;0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
live-source=0
|
|
||||||
batch-size=1
|
|
||||||
batched-push-timeout=40000
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary.txt
|
|
||||||
|
|
||||||
[tests]
|
|
||||||
file-loop=0
|
|
||||||
80
external/yolov5-4.0/labels.txt
vendored
80
external/yolov5-4.0/labels.txt
vendored
@@ -1,80 +0,0 @@
|
|||||||
person
|
|
||||||
bicycle
|
|
||||||
car
|
|
||||||
motorbike
|
|
||||||
aeroplane
|
|
||||||
bus
|
|
||||||
train
|
|
||||||
truck
|
|
||||||
boat
|
|
||||||
traffic light
|
|
||||||
fire hydrant
|
|
||||||
stop sign
|
|
||||||
parking meter
|
|
||||||
bench
|
|
||||||
bird
|
|
||||||
cat
|
|
||||||
dog
|
|
||||||
horse
|
|
||||||
sheep
|
|
||||||
cow
|
|
||||||
elephant
|
|
||||||
bear
|
|
||||||
zebra
|
|
||||||
giraffe
|
|
||||||
backpack
|
|
||||||
umbrella
|
|
||||||
handbag
|
|
||||||
tie
|
|
||||||
suitcase
|
|
||||||
frisbee
|
|
||||||
skis
|
|
||||||
snowboard
|
|
||||||
sports ball
|
|
||||||
kite
|
|
||||||
baseball bat
|
|
||||||
baseball glove
|
|
||||||
skateboard
|
|
||||||
surfboard
|
|
||||||
tennis racket
|
|
||||||
bottle
|
|
||||||
wine glass
|
|
||||||
cup
|
|
||||||
fork
|
|
||||||
knife
|
|
||||||
spoon
|
|
||||||
bowl
|
|
||||||
banana
|
|
||||||
apple
|
|
||||||
sandwich
|
|
||||||
orange
|
|
||||||
broccoli
|
|
||||||
carrot
|
|
||||||
hot dog
|
|
||||||
pizza
|
|
||||||
donut
|
|
||||||
cake
|
|
||||||
chair
|
|
||||||
sofa
|
|
||||||
pottedplant
|
|
||||||
bed
|
|
||||||
diningtable
|
|
||||||
toilet
|
|
||||||
tvmonitor
|
|
||||||
laptop
|
|
||||||
mouse
|
|
||||||
remote
|
|
||||||
keyboard
|
|
||||||
cell phone
|
|
||||||
microwave
|
|
||||||
oven
|
|
||||||
toaster
|
|
||||||
sink
|
|
||||||
refrigerator
|
|
||||||
book
|
|
||||||
clock
|
|
||||||
vase
|
|
||||||
scissors
|
|
||||||
teddy bear
|
|
||||||
hair drier
|
|
||||||
toothbrush
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
CUDA_VER?=
|
|
||||||
ifeq ($(CUDA_VER),)
|
|
||||||
$(error "CUDA_VER is not set")
|
|
||||||
endif
|
|
||||||
CC:= g++
|
|
||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
|
||||||
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
|
|
||||||
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
|
||||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
|
||||||
|
|
||||||
INCS:= $(wildcard *.h)
|
|
||||||
SRCFILES:= nvdsparsebbox_Yolo.cpp \
|
|
||||||
yololayer.cu
|
|
||||||
|
|
||||||
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
|
|
||||||
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
|
|
||||||
|
|
||||||
all: $(TARGET_LIB)
|
|
||||||
|
|
||||||
%.o: %.cpp $(INCS) Makefile
|
|
||||||
$(CC) -c -o $@ $(CFLAGS) $<
|
|
||||||
|
|
||||||
%.o: %.cu $(INCS) Makefile
|
|
||||||
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
|
|
||||||
|
|
||||||
$(TARGET_LIB) : $(TARGET_OBJS)
|
|
||||||
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TARGET_LIB)
|
|
||||||
rm -rf $(TARGET_OBJS)
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
#ifndef TRTX_CUDA_UTILS_H_
|
|
||||||
#define TRTX_CUDA_UTILS_H_
|
|
||||||
|
|
||||||
#include <cuda_runtime_api.h>
|
|
||||||
|
|
||||||
#ifndef CUDA_CHECK
|
|
||||||
#define CUDA_CHECK(callstr)\
|
|
||||||
{\
|
|
||||||
cudaError_t error_code = callstr;\
|
|
||||||
if (error_code != cudaSuccess) {\
|
|
||||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
|
|
||||||
assert(0);\
|
|
||||||
}\
|
|
||||||
}
|
|
||||||
#endif // CUDA_CHECK
|
|
||||||
|
|
||||||
#endif // TRTX_CUDA_UTILS_H_
|
|
||||||
|
|
||||||
@@ -1,122 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstring>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection{
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
|
|
||||||
float iou(float lbox[4], float rbox[4]) {
|
|
||||||
float interBox[] = {
|
|
||||||
std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
|
|
||||||
std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
|
|
||||||
std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
|
|
||||||
std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
|
|
||||||
};
|
|
||||||
|
|
||||||
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
|
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
|
|
||||||
return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool cmp(Detection& a, Detection& b) {
|
|
||||||
return a.conf > b.conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh) {
|
|
||||||
int det_size = sizeof(Detection) / sizeof(float);
|
|
||||||
std::map<float, std::vector<Detection>> m;
|
|
||||||
for (int i = 0; i < output[0] && i < 1000; i++) {
|
|
||||||
if (output[1 + det_size * i + 4] <= conf_thresh) continue;
|
|
||||||
Detection det;
|
|
||||||
memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
|
|
||||||
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
|
|
||||||
m[det.class_id].push_back(det);
|
|
||||||
}
|
|
||||||
for (auto it = m.begin(); it != m.end(); it++) {
|
|
||||||
auto& dets = it->second;
|
|
||||||
std::sort(dets.begin(), dets.end(), cmp);
|
|
||||||
for (size_t m = 0; m < dets.size(); ++m) {
|
|
||||||
auto& item = dets[m];
|
|
||||||
res.push_back(item);
|
|
||||||
for (size_t n = m + 1; n < dets.size(); ++n) {
|
|
||||||
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
|
|
||||||
dets.erase(dets.begin()+n);
|
|
||||||
--n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is a sample bounding box parsing function for the sample YoloV5 detector model */
|
|
||||||
static bool NvDsInferParseYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
|
||||||
{
|
|
||||||
const float kCONF_THRESH = detectionParams.perClassThreshold[0];
|
|
||||||
|
|
||||||
std::vector<Detection> res;
|
|
||||||
|
|
||||||
nms(res, (float*)(outputLayersInfo[0].buffer), kCONF_THRESH, kNMS_THRESH);
|
|
||||||
|
|
||||||
for(auto& r : res) {
|
|
||||||
NvDsInferParseObjectInfo oinfo;
|
|
||||||
|
|
||||||
oinfo.classId = r.class_id;
|
|
||||||
oinfo.left = static_cast<unsigned int>(r.bbox[0]-r.bbox[2]*0.5f);
|
|
||||||
oinfo.top = static_cast<unsigned int>(r.bbox[1]-r.bbox[3]*0.5f);
|
|
||||||
oinfo.width = static_cast<unsigned int>(r.bbox[2]);
|
|
||||||
oinfo.height = static_cast<unsigned int>(r.bbox[3]);
|
|
||||||
oinfo.detectionConfidence = r.conf;
|
|
||||||
objectList.push_back(oinfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseCustomYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const &networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const &detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo> &objectList)
|
|
||||||
{
|
|
||||||
return NvDsInferParseYoloV5(
|
|
||||||
outputLayersInfo, networkInfo, detectionParams, objectList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check that the custom function has been defined correctly */
|
|
||||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5);
|
|
||||||
@@ -1,333 +0,0 @@
|
|||||||
#include <assert.h>
|
|
||||||
#include <vector>
|
|
||||||
#include <iostream>
|
|
||||||
#include "yololayer.h"
|
|
||||||
#include "cuda_utils.h"
|
|
||||||
|
|
||||||
namespace Tn
|
|
||||||
{
|
|
||||||
template<typename T>
|
|
||||||
void write(char*& buffer, const T& val)
|
|
||||||
{
|
|
||||||
*reinterpret_cast<T*>(buffer) = val;
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
void read(const char*& buffer, T& val)
|
|
||||||
{
|
|
||||||
val = *reinterpret_cast<const T*>(buffer);
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace Yolo;
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel)
|
|
||||||
{
|
|
||||||
mClassCount = classCount;
|
|
||||||
mYoloV5NetWidth = netWidth;
|
|
||||||
mYoloV5NetHeight = netHeight;
|
|
||||||
mMaxOutObject = maxOut;
|
|
||||||
mYoloKernel = vYoloKernel;
|
|
||||||
mKernelCount = vYoloKernel.size();
|
|
||||||
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
YoloLayerPlugin::~YoloLayerPlugin()
|
|
||||||
{
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaFree(mAnchor[ii]));
|
|
||||||
}
|
|
||||||
CUDA_CHECK(cudaFreeHost(mAnchor));
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the plugin at runtime from a byte stream
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
const char *d = reinterpret_cast<const char *>(data), *a = d;
|
|
||||||
read(d, mClassCount);
|
|
||||||
read(d, mThreadCount);
|
|
||||||
read(d, mKernelCount);
|
|
||||||
read(d, mYoloV5NetWidth);
|
|
||||||
read(d, mYoloV5NetHeight);
|
|
||||||
read(d, mMaxOutObject);
|
|
||||||
mYoloKernel.resize(mKernelCount);
|
|
||||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
|
||||||
memcpy(mYoloKernel.data(), d, kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
assert(d == a + length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::serialize(void* buffer) const
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
char* d = static_cast<char*>(buffer), *a = d;
|
|
||||||
write(d, mClassCount);
|
|
||||||
write(d, mThreadCount);
|
|
||||||
write(d, mKernelCount);
|
|
||||||
write(d, mYoloV5NetWidth);
|
|
||||||
write(d, mYoloV5NetHeight);
|
|
||||||
write(d, mMaxOutObject);
|
|
||||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
|
||||||
memcpy(d, mYoloKernel.data(), kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
|
|
||||||
assert(d == a + getSerializationSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t YoloLayerPlugin::getSerializationSize() const
|
|
||||||
{
|
|
||||||
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size() + sizeof(mYoloV5NetWidth) + sizeof(mYoloV5NetHeight) + sizeof(mMaxOutObject);
|
|
||||||
}
|
|
||||||
|
|
||||||
int YoloLayerPlugin::initialize()
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
|
|
||||||
{
|
|
||||||
//output the result to channel
|
|
||||||
int totalsize = mMaxOutObject * sizeof(Detection) / sizeof(float);
|
|
||||||
|
|
||||||
return Dims3(totalsize + 1, 1, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set plugin namespace
|
|
||||||
void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
|
|
||||||
{
|
|
||||||
mPluginNamespace = pluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginNamespace() const
|
|
||||||
{
|
|
||||||
return mPluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the DataType of the plugin output at the requested index
|
|
||||||
DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
|
|
||||||
{
|
|
||||||
return DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if output tensor is broadcast across a batch.
|
|
||||||
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if plugin can use input that is broadcast across batch without replication.
|
|
||||||
bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attach the plugin object to an execution context and grant the plugin the access to some context resource.
|
|
||||||
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detach the plugin object from its execution context.
|
|
||||||
void YoloLayerPlugin::detachFromContext() {}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginType() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::destroy()
|
|
||||||
{
|
|
||||||
delete this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clone the plugin
|
|
||||||
IPluginV2IOExt* YoloLayerPlugin::clone() const
|
|
||||||
{
|
|
||||||
YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, mYoloKernel);
|
|
||||||
p->setPluginNamespace(mPluginNamespace);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
__device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
|
|
||||||
|
|
||||||
__global__ void CalDetection(const float *input, float *output, int noElements,
|
|
||||||
const int netwidth, const int netheight, int maxoutobject, int yoloWidth, int yoloHeight, const float anchors[CHECK_COUNT * 2], int classes, int outputElem)
|
|
||||||
{
|
|
||||||
|
|
||||||
int idx = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
if (idx >= noElements) return;
|
|
||||||
|
|
||||||
int total_grid = yoloWidth * yoloHeight;
|
|
||||||
int bnIdx = idx / total_grid;
|
|
||||||
idx = idx - total_grid * bnIdx;
|
|
||||||
int info_len_i = 5 + classes;
|
|
||||||
const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT);
|
|
||||||
|
|
||||||
for (int k = 0; k < 3; ++k) {
|
|
||||||
float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
|
|
||||||
if (box_prob < IGNORE_THRESH) continue;
|
|
||||||
int class_id = 0;
|
|
||||||
float max_cls_prob = 0.0;
|
|
||||||
for (int i = 5; i < info_len_i; ++i) {
|
|
||||||
float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
|
|
||||||
if (p > max_cls_prob) {
|
|
||||||
max_cls_prob = p;
|
|
||||||
class_id = i - 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float *res_count = output + bnIdx * outputElem;
|
|
||||||
int count = (int)atomicAdd(res_count, 1);
|
|
||||||
if (count >= maxoutobject) return;
|
|
||||||
char* data = (char *)res_count + sizeof(float) + count * sizeof(Detection);
|
|
||||||
Detection* det = (Detection*)(data);
|
|
||||||
|
|
||||||
int row = idx / yoloWidth;
|
|
||||||
int col = idx % yoloWidth;
|
|
||||||
|
|
||||||
//Location
|
|
||||||
// pytorch:
|
|
||||||
// y = x[i].sigmoid()
|
|
||||||
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
|
||||||
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
|
||||||
// X: (sigmoid(tx) + cx)/FeaturemapW * netwidth
|
|
||||||
det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * netwidth / yoloWidth;
|
|
||||||
det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * netheight / yoloHeight;
|
|
||||||
|
|
||||||
// W: (Pw * e^tw) / FeaturemapW * netwidth
|
|
||||||
// v5: https://github.com/ultralytics/yolov5/issues/471
|
|
||||||
det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
|
|
||||||
det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2 * k];
|
|
||||||
det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
|
|
||||||
det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2 * k + 1];
|
|
||||||
det->conf = box_prob * max_cls_prob;
|
|
||||||
det->class_id = class_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize)
|
|
||||||
{
|
|
||||||
int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
|
|
||||||
for (int idx = 0; idx < batchSize; ++idx) {
|
|
||||||
CUDA_CHECK(cudaMemset(output + idx * outputElem, 0, sizeof(float)));
|
|
||||||
}
|
|
||||||
int numElem = 0;
|
|
||||||
for (unsigned int i = 0; i < mYoloKernel.size(); ++i)
|
|
||||||
{
|
|
||||||
const auto& yolo = mYoloKernel[i];
|
|
||||||
numElem = yolo.width*yolo.height*batchSize;
|
|
||||||
if (numElem < mThreadCount)
|
|
||||||
mThreadCount = numElem;
|
|
||||||
|
|
||||||
//printf("Net: %d %d \n", mYoloV5NetWidth, mYoloV5NetHeight);
|
|
||||||
CalDetection << < (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
|
|
||||||
(inputs[i], output, numElem, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, yolo.width, yolo.height, (float *)mAnchor[i], mClassCount, outputElem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PluginFieldCollection YoloPluginCreator::mFC{};
|
|
||||||
std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
|
|
||||||
|
|
||||||
YoloPluginCreator::YoloPluginCreator()
|
|
||||||
{
|
|
||||||
mPluginAttributes.clear();
|
|
||||||
|
|
||||||
mFC.nbFields = mPluginAttributes.size();
|
|
||||||
mFC.fields = mPluginAttributes.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginName() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
const PluginFieldCollection* YoloPluginCreator::getFieldNames()
|
|
||||||
{
|
|
||||||
return &mFC;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
|
|
||||||
{
|
|
||||||
int class_count = -1;
|
|
||||||
int input_w = -1;
|
|
||||||
int input_h = -1;
|
|
||||||
int max_output_object_count = -1;
|
|
||||||
std::vector<Yolo::YoloKernel> yolo_kernels(3);
|
|
||||||
|
|
||||||
const PluginField* fields = fc->fields;
|
|
||||||
for (int i = 0; i < fc->nbFields; i++) {
|
|
||||||
if (strcmp(fields[i].name, "netdata") == 0) {
|
|
||||||
assert(fields[i].type == PluginFieldType::kFLOAT32);
|
|
||||||
int *tmp = (int*)(fields[i].data);
|
|
||||||
class_count = tmp[0];
|
|
||||||
input_w = tmp[1];
|
|
||||||
input_h = tmp[2];
|
|
||||||
max_output_object_count = tmp[3];
|
|
||||||
} else if (strstr(fields[i].name, "yolodata") != NULL) {
|
|
||||||
assert(fields[i].type == PluginFieldType::kFLOAT32);
|
|
||||||
int *tmp = (int*)(fields[i].data);
|
|
||||||
YoloKernel kernel;
|
|
||||||
kernel.width = tmp[0];
|
|
||||||
kernel.height = tmp[1];
|
|
||||||
for (int j = 0; j < fields[i].length - 2; j++) {
|
|
||||||
kernel.anchors[j] = tmp[j + 2];
|
|
||||||
}
|
|
||||||
yolo_kernels[2 - (fields[i].name[8] - '1')] = kernel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(class_count && input_w && input_h && max_output_object_count);
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, yolo_kernels);
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
|
|
||||||
{
|
|
||||||
// This object will be deleted when the network is destroyed, which will
|
|
||||||
// call YoloLayerPlugin::destroy()
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
#ifndef _YOLO_LAYER_H
|
|
||||||
#define _YOLO_LAYER_H
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
namespace Yolo
|
|
||||||
{
|
|
||||||
static constexpr int CHECK_COUNT = 3;
|
|
||||||
static constexpr float IGNORE_THRESH = 0.1f;
|
|
||||||
struct YoloKernel
|
|
||||||
{
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
float anchors[CHECK_COUNT * 2];
|
|
||||||
};
|
|
||||||
static constexpr int MAX_OUTPUT_BBOX_COUNT = 1000;
|
|
||||||
static constexpr int CLASS_NUM = 80;
|
|
||||||
static constexpr int INPUT_H = 640; // yolov5's input height and width must be divisible by 32.
|
|
||||||
static constexpr int INPUT_W = 640;
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection {
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
class YoloLayerPlugin : public IPluginV2IOExt
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel);
|
|
||||||
YoloLayerPlugin(const void* data, size_t length);
|
|
||||||
~YoloLayerPlugin();
|
|
||||||
|
|
||||||
int getNbOutputs() const override
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
|
|
||||||
|
|
||||||
int initialize() override;
|
|
||||||
|
|
||||||
virtual void terminate() override {};
|
|
||||||
|
|
||||||
virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
|
|
||||||
|
|
||||||
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
|
|
||||||
|
|
||||||
virtual size_t getSerializationSize() const override;
|
|
||||||
|
|
||||||
virtual void serialize(void* buffer) const override;
|
|
||||||
|
|
||||||
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
|
|
||||||
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginType() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
void destroy() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* clone() const override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* pluginNamespace) override;
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override;
|
|
||||||
|
|
||||||
DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool canBroadcastInputAcrossBatch(int inputIndex) const override;
|
|
||||||
|
|
||||||
void attachToContext(
|
|
||||||
cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
|
|
||||||
|
|
||||||
void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
|
|
||||||
|
|
||||||
void detachFromContext() override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void forwardGpu(const float *const * inputs, float * output, cudaStream_t stream, int batchSize = 1);
|
|
||||||
int mThreadCount = 256;
|
|
||||||
const char* mPluginNamespace;
|
|
||||||
int mKernelCount;
|
|
||||||
int mClassCount;
|
|
||||||
int mYoloV5NetWidth;
|
|
||||||
int mYoloV5NetHeight;
|
|
||||||
int mMaxOutObject;
|
|
||||||
std::vector<Yolo::YoloKernel> mYoloKernel;
|
|
||||||
void** mAnchor;
|
|
||||||
};
|
|
||||||
|
|
||||||
class YoloPluginCreator : public IPluginCreator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloPluginCreator();
|
|
||||||
|
|
||||||
~YoloPluginCreator() override = default;
|
|
||||||
|
|
||||||
const char* getPluginName() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
const PluginFieldCollection* getFieldNames() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* libNamespace) override
|
|
||||||
{
|
|
||||||
mNamespace = libNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override
|
|
||||||
{
|
|
||||||
return mNamespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string mNamespace;
|
|
||||||
static PluginFieldCollection mFC;
|
|
||||||
static std::vector<PluginField> mPluginAttributes;
|
|
||||||
};
|
|
||||||
REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
18
external/yolov5-5.0/config_infer_primary.txt
vendored
18
external/yolov5-5.0/config_infer_primary.txt
vendored
@@ -1,18 +0,0 @@
|
|||||||
[property]
|
|
||||||
gpu-id=0
|
|
||||||
net-scale-factor=0.0039215697906911373
|
|
||||||
model-color-format=0
|
|
||||||
model-engine-file=yolov5s.engine
|
|
||||||
labelfile-path=labels.txt
|
|
||||||
num-detected-classes=80
|
|
||||||
interval=0
|
|
||||||
gie-unique-id=1
|
|
||||||
process-mode=1
|
|
||||||
network-type=0
|
|
||||||
cluster-mode=4
|
|
||||||
maintain-aspect-ratio=0
|
|
||||||
parse-bbox-func-name=NvDsInferParseCustomYoloV5
|
|
||||||
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
[class-attrs-all]
|
|
||||||
pre-cluster-threshold=0.25
|
|
||||||
63
external/yolov5-5.0/deepstream_app_config.txt
vendored
63
external/yolov5-5.0/deepstream_app_config.txt
vendored
@@ -1,63 +0,0 @@
|
|||||||
[application]
|
|
||||||
enable-perf-measurement=1
|
|
||||||
perf-measurement-interval-sec=1
|
|
||||||
|
|
||||||
[tiled-display]
|
|
||||||
enable=1
|
|
||||||
rows=1
|
|
||||||
columns=1
|
|
||||||
width=1280
|
|
||||||
height=720
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[source0]
|
|
||||||
enable=1
|
|
||||||
type=3
|
|
||||||
uri=file://../../samples/streams/sample_1080p_h264.mp4
|
|
||||||
num-sources=1
|
|
||||||
gpu-id=0
|
|
||||||
cudadec-memtype=0
|
|
||||||
|
|
||||||
[sink0]
|
|
||||||
enable=1
|
|
||||||
type=2
|
|
||||||
sync=0
|
|
||||||
source-id=0
|
|
||||||
gpu-id=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[osd]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
border-width=1
|
|
||||||
text-size=15
|
|
||||||
text-color=1;1;1;1;
|
|
||||||
text-bg-color=0.3;0.3;0.3;1
|
|
||||||
font=Serif
|
|
||||||
show-clock=0
|
|
||||||
clock-x-offset=800
|
|
||||||
clock-y-offset=820
|
|
||||||
clock-text-size=12
|
|
||||||
clock-color=1;0;0;0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[streammux]
|
|
||||||
gpu-id=0
|
|
||||||
live-source=0
|
|
||||||
batch-size=1
|
|
||||||
batched-push-timeout=40000
|
|
||||||
width=1920
|
|
||||||
height=1080
|
|
||||||
enable-padding=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary.txt
|
|
||||||
|
|
||||||
[tests]
|
|
||||||
file-loop=0
|
|
||||||
80
external/yolov5-5.0/labels.txt
vendored
80
external/yolov5-5.0/labels.txt
vendored
@@ -1,80 +0,0 @@
|
|||||||
person
|
|
||||||
bicycle
|
|
||||||
car
|
|
||||||
motorbike
|
|
||||||
aeroplane
|
|
||||||
bus
|
|
||||||
train
|
|
||||||
truck
|
|
||||||
boat
|
|
||||||
traffic light
|
|
||||||
fire hydrant
|
|
||||||
stop sign
|
|
||||||
parking meter
|
|
||||||
bench
|
|
||||||
bird
|
|
||||||
cat
|
|
||||||
dog
|
|
||||||
horse
|
|
||||||
sheep
|
|
||||||
cow
|
|
||||||
elephant
|
|
||||||
bear
|
|
||||||
zebra
|
|
||||||
giraffe
|
|
||||||
backpack
|
|
||||||
umbrella
|
|
||||||
handbag
|
|
||||||
tie
|
|
||||||
suitcase
|
|
||||||
frisbee
|
|
||||||
skis
|
|
||||||
snowboard
|
|
||||||
sports ball
|
|
||||||
kite
|
|
||||||
baseball bat
|
|
||||||
baseball glove
|
|
||||||
skateboard
|
|
||||||
surfboard
|
|
||||||
tennis racket
|
|
||||||
bottle
|
|
||||||
wine glass
|
|
||||||
cup
|
|
||||||
fork
|
|
||||||
knife
|
|
||||||
spoon
|
|
||||||
bowl
|
|
||||||
banana
|
|
||||||
apple
|
|
||||||
sandwich
|
|
||||||
orange
|
|
||||||
broccoli
|
|
||||||
carrot
|
|
||||||
hot dog
|
|
||||||
pizza
|
|
||||||
donut
|
|
||||||
cake
|
|
||||||
chair
|
|
||||||
sofa
|
|
||||||
pottedplant
|
|
||||||
bed
|
|
||||||
diningtable
|
|
||||||
toilet
|
|
||||||
tvmonitor
|
|
||||||
laptop
|
|
||||||
mouse
|
|
||||||
remote
|
|
||||||
keyboard
|
|
||||||
cell phone
|
|
||||||
microwave
|
|
||||||
oven
|
|
||||||
toaster
|
|
||||||
sink
|
|
||||||
refrigerator
|
|
||||||
book
|
|
||||||
clock
|
|
||||||
vase
|
|
||||||
scissors
|
|
||||||
teddy bear
|
|
||||||
hair drier
|
|
||||||
toothbrush
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
CUDA_VER?=
|
|
||||||
ifeq ($(CUDA_VER),)
|
|
||||||
$(error "CUDA_VER is not set")
|
|
||||||
endif
|
|
||||||
CC:= g++
|
|
||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
|
||||||
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
|
|
||||||
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
|
|
||||||
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
|
|
||||||
|
|
||||||
INCS:= $(wildcard *.h)
|
|
||||||
SRCFILES:= nvdsparsebbox_Yolo.cpp \
|
|
||||||
yololayer.cu
|
|
||||||
|
|
||||||
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
|
|
||||||
|
|
||||||
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
|
|
||||||
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
|
|
||||||
|
|
||||||
all: $(TARGET_LIB)
|
|
||||||
|
|
||||||
%.o: %.cpp $(INCS) Makefile
|
|
||||||
$(CC) -c -o $@ $(CFLAGS) $<
|
|
||||||
|
|
||||||
%.o: %.cu $(INCS) Makefile
|
|
||||||
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
|
|
||||||
|
|
||||||
$(TARGET_LIB) : $(TARGET_OBJS)
|
|
||||||
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf $(TARGET_LIB)
|
|
||||||
rm -rf $(TARGET_OBJS)
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
#ifndef TRTX_CUDA_UTILS_H_
|
|
||||||
#define TRTX_CUDA_UTILS_H_
|
|
||||||
|
|
||||||
#include <cuda_runtime_api.h>
|
|
||||||
|
|
||||||
#ifndef CUDA_CHECK
|
|
||||||
#define CUDA_CHECK(callstr)\
|
|
||||||
{\
|
|
||||||
cudaError_t error_code = callstr;\
|
|
||||||
if (error_code != cudaSuccess) {\
|
|
||||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
|
|
||||||
assert(0);\
|
|
||||||
}\
|
|
||||||
}
|
|
||||||
#endif // CUDA_CHECK
|
|
||||||
|
|
||||||
#endif // TRTX_CUDA_UTILS_H_
|
|
||||||
|
|
||||||
@@ -1,122 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstring>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#define kNMS_THRESH 0.45
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection{
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
|
|
||||||
float iou(float lbox[4], float rbox[4]) {
|
|
||||||
float interBox[] = {
|
|
||||||
std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
|
|
||||||
std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
|
|
||||||
std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
|
|
||||||
std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
|
|
||||||
};
|
|
||||||
|
|
||||||
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
|
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
|
|
||||||
return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool cmp(Detection& a, Detection& b) {
|
|
||||||
return a.conf > b.conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh) {
|
|
||||||
int det_size = sizeof(Detection) / sizeof(float);
|
|
||||||
std::map<float, std::vector<Detection>> m;
|
|
||||||
for (int i = 0; i < output[0] && i < 1000; i++) {
|
|
||||||
if (output[1 + det_size * i + 4] <= conf_thresh) continue;
|
|
||||||
Detection det;
|
|
||||||
memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
|
|
||||||
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
|
|
||||||
m[det.class_id].push_back(det);
|
|
||||||
}
|
|
||||||
for (auto it = m.begin(); it != m.end(); it++) {
|
|
||||||
auto& dets = it->second;
|
|
||||||
std::sort(dets.begin(), dets.end(), cmp);
|
|
||||||
for (size_t m = 0; m < dets.size(); ++m) {
|
|
||||||
auto& item = dets[m];
|
|
||||||
res.push_back(item);
|
|
||||||
for (size_t n = m + 1; n < dets.size(); ++n) {
|
|
||||||
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
|
|
||||||
dets.erase(dets.begin()+n);
|
|
||||||
--n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is a sample bounding box parsing function for the sample YoloV5 detector model */
|
|
||||||
static bool NvDsInferParseYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
|
||||||
{
|
|
||||||
const float kCONF_THRESH = detectionParams.perClassThreshold[0];
|
|
||||||
|
|
||||||
std::vector<Detection> res;
|
|
||||||
|
|
||||||
nms(res, (float*)(outputLayersInfo[0].buffer), kCONF_THRESH, kNMS_THRESH);
|
|
||||||
|
|
||||||
for(auto& r : res) {
|
|
||||||
NvDsInferParseObjectInfo oinfo;
|
|
||||||
|
|
||||||
oinfo.classId = r.class_id;
|
|
||||||
oinfo.left = static_cast<unsigned int>(r.bbox[0]-r.bbox[2]*0.5f);
|
|
||||||
oinfo.top = static_cast<unsigned int>(r.bbox[1]-r.bbox[3]*0.5f);
|
|
||||||
oinfo.width = static_cast<unsigned int>(r.bbox[2]);
|
|
||||||
oinfo.height = static_cast<unsigned int>(r.bbox[3]);
|
|
||||||
oinfo.detectionConfidence = r.conf;
|
|
||||||
objectList.push_back(oinfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseCustomYoloV5(
|
|
||||||
std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const &networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const &detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo> &objectList)
|
|
||||||
{
|
|
||||||
return NvDsInferParseYoloV5(
|
|
||||||
outputLayersInfo, networkInfo, detectionParams, objectList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check that the custom function has been defined correctly */
|
|
||||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5);
|
|
||||||
@@ -1,313 +0,0 @@
|
|||||||
#include <assert.h>
|
|
||||||
#include <vector>
|
|
||||||
#include <iostream>
|
|
||||||
#include "yololayer.h"
|
|
||||||
#include "cuda_utils.h"
|
|
||||||
|
|
||||||
namespace Tn
|
|
||||||
{
|
|
||||||
template<typename T>
|
|
||||||
void write(char*& buffer, const T& val)
|
|
||||||
{
|
|
||||||
*reinterpret_cast<T*>(buffer) = val;
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
void read(const char*& buffer, T& val)
|
|
||||||
{
|
|
||||||
val = *reinterpret_cast<const T*>(buffer);
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace Yolo;
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel)
|
|
||||||
{
|
|
||||||
mClassCount = classCount;
|
|
||||||
mYoloV5NetWidth = netWidth;
|
|
||||||
mYoloV5NetHeight = netHeight;
|
|
||||||
mMaxOutObject = maxOut;
|
|
||||||
mYoloKernel = vYoloKernel;
|
|
||||||
mKernelCount = vYoloKernel.size();
|
|
||||||
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
YoloLayerPlugin::~YoloLayerPlugin()
|
|
||||||
{
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaFree(mAnchor[ii]));
|
|
||||||
}
|
|
||||||
CUDA_CHECK(cudaFreeHost(mAnchor));
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the plugin at runtime from a byte stream
|
|
||||||
YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
const char *d = reinterpret_cast<const char *>(data), *a = d;
|
|
||||||
read(d, mClassCount);
|
|
||||||
read(d, mThreadCount);
|
|
||||||
read(d, mKernelCount);
|
|
||||||
read(d, mYoloV5NetWidth);
|
|
||||||
read(d, mYoloV5NetHeight);
|
|
||||||
read(d, mMaxOutObject);
|
|
||||||
mYoloKernel.resize(mKernelCount);
|
|
||||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
|
||||||
memcpy(mYoloKernel.data(), d, kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
|
|
||||||
size_t AnchorLen = sizeof(float)* CHECK_COUNT * 2;
|
|
||||||
for (int ii = 0; ii < mKernelCount; ii++)
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaMalloc(&mAnchor[ii], AnchorLen));
|
|
||||||
const auto& yolo = mYoloKernel[ii];
|
|
||||||
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
|
|
||||||
}
|
|
||||||
assert(d == a + length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::serialize(void* buffer) const
|
|
||||||
{
|
|
||||||
using namespace Tn;
|
|
||||||
char* d = static_cast<char*>(buffer), *a = d;
|
|
||||||
write(d, mClassCount);
|
|
||||||
write(d, mThreadCount);
|
|
||||||
write(d, mKernelCount);
|
|
||||||
write(d, mYoloV5NetWidth);
|
|
||||||
write(d, mYoloV5NetHeight);
|
|
||||||
write(d, mMaxOutObject);
|
|
||||||
auto kernelSize = mKernelCount * sizeof(YoloKernel);
|
|
||||||
memcpy(d, mYoloKernel.data(), kernelSize);
|
|
||||||
d += kernelSize;
|
|
||||||
|
|
||||||
assert(d == a + getSerializationSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t YoloLayerPlugin::getSerializationSize() const
|
|
||||||
{
|
|
||||||
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size() + sizeof(mYoloV5NetWidth) + sizeof(mYoloV5NetHeight) + sizeof(mMaxOutObject);
|
|
||||||
}
|
|
||||||
|
|
||||||
int YoloLayerPlugin::initialize()
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
|
|
||||||
{
|
|
||||||
//output the result to channel
|
|
||||||
int totalsize = mMaxOutObject * sizeof(Detection) / sizeof(float);
|
|
||||||
|
|
||||||
return Dims3(totalsize + 1, 1, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set plugin namespace
|
|
||||||
void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
|
|
||||||
{
|
|
||||||
mPluginNamespace = pluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginNamespace() const
|
|
||||||
{
|
|
||||||
return mPluginNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the DataType of the plugin output at the requested index
|
|
||||||
DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
|
|
||||||
{
|
|
||||||
return DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if output tensor is broadcast across a batch.
|
|
||||||
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return true if plugin can use input that is broadcast across batch without replication.
|
|
||||||
bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attach the plugin object to an execution context and grant the plugin the access to some context resource.
|
|
||||||
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detach the plugin object from its execution context.
|
|
||||||
void YoloLayerPlugin::detachFromContext() {}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginType() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloLayerPlugin::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::destroy()
|
|
||||||
{
|
|
||||||
delete this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clone the plugin
|
|
||||||
IPluginV2IOExt* YoloLayerPlugin::clone() const
|
|
||||||
{
|
|
||||||
YoloLayerPlugin* p = new YoloLayerPlugin(mClassCount, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, mYoloKernel);
|
|
||||||
p->setPluginNamespace(mPluginNamespace);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
__device__ float Logist(float data) { return 1.0f / (1.0f + expf(-data)); };
|
|
||||||
|
|
||||||
__global__ void CalDetection(const float *input, float *output, int noElements,
|
|
||||||
const int netwidth, const int netheight, int maxoutobject, int yoloWidth, int yoloHeight, const float anchors[CHECK_COUNT * 2], int classes, int outputElem)
|
|
||||||
{
|
|
||||||
|
|
||||||
int idx = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
if (idx >= noElements) return;
|
|
||||||
|
|
||||||
int total_grid = yoloWidth * yoloHeight;
|
|
||||||
int bnIdx = idx / total_grid;
|
|
||||||
idx = idx - total_grid * bnIdx;
|
|
||||||
int info_len_i = 5 + classes;
|
|
||||||
const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT);
|
|
||||||
|
|
||||||
for (int k = 0; k < CHECK_COUNT; ++k) {
|
|
||||||
float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
|
|
||||||
if (box_prob < IGNORE_THRESH) continue;
|
|
||||||
int class_id = 0;
|
|
||||||
float max_cls_prob = 0.0;
|
|
||||||
for (int i = 5; i < info_len_i; ++i) {
|
|
||||||
float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
|
|
||||||
if (p > max_cls_prob) {
|
|
||||||
max_cls_prob = p;
|
|
||||||
class_id = i - 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float *res_count = output + bnIdx * outputElem;
|
|
||||||
int count = (int)atomicAdd(res_count, 1);
|
|
||||||
if (count >= maxoutobject) return;
|
|
||||||
char *data = (char*)res_count + sizeof(float) + count * sizeof(Detection);
|
|
||||||
Detection *det = (Detection*)(data);
|
|
||||||
|
|
||||||
int row = idx / yoloWidth;
|
|
||||||
int col = idx % yoloWidth;
|
|
||||||
|
|
||||||
//Location
|
|
||||||
// pytorch:
|
|
||||||
// y = x[i].sigmoid()
|
|
||||||
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
|
||||||
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
|
||||||
// X: (sigmoid(tx) + cx)/FeaturemapW * netwidth
|
|
||||||
det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * netwidth / yoloWidth;
|
|
||||||
det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * netheight / yoloHeight;
|
|
||||||
|
|
||||||
// W: (Pw * e^tw) / FeaturemapW * netwidth
|
|
||||||
// v5: https://github.com/ultralytics/yolov5/issues/471
|
|
||||||
det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
|
|
||||||
det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2 * k];
|
|
||||||
det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
|
|
||||||
det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2 * k + 1];
|
|
||||||
det->conf = box_prob * max_cls_prob;
|
|
||||||
det->class_id = class_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayerPlugin::forwardGpu(const float* const* inputs, float *output, cudaStream_t stream, int batchSize)
|
|
||||||
{
|
|
||||||
int outputElem = 1 + mMaxOutObject * sizeof(Detection) / sizeof(float);
|
|
||||||
for (int idx = 0; idx < batchSize; ++idx) {
|
|
||||||
CUDA_CHECK(cudaMemset(output + idx * outputElem, 0, sizeof(float)));
|
|
||||||
}
|
|
||||||
int numElem = 0;
|
|
||||||
for (unsigned int i = 0; i < mYoloKernel.size(); ++i) {
|
|
||||||
const auto& yolo = mYoloKernel[i];
|
|
||||||
numElem = yolo.width * yolo.height * batchSize;
|
|
||||||
if (numElem < mThreadCount) mThreadCount = numElem;
|
|
||||||
|
|
||||||
//printf("Net: %d %d \n", mYoloV5NetWidth, mYoloV5NetHeight);
|
|
||||||
CalDetection << < (numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream >> >
|
|
||||||
(inputs[i], output, numElem, mYoloV5NetWidth, mYoloV5NetHeight, mMaxOutObject, yolo.width, yolo.height, (float*)mAnchor[i], mClassCount, outputElem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PluginFieldCollection YoloPluginCreator::mFC{};
|
|
||||||
std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
|
|
||||||
|
|
||||||
YoloPluginCreator::YoloPluginCreator()
|
|
||||||
{
|
|
||||||
mPluginAttributes.clear();
|
|
||||||
|
|
||||||
mFC.nbFields = mPluginAttributes.size();
|
|
||||||
mFC.fields = mPluginAttributes.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginName() const
|
|
||||||
{
|
|
||||||
return "YoloLayer_TRT";
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* YoloPluginCreator::getPluginVersion() const
|
|
||||||
{
|
|
||||||
return "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
const PluginFieldCollection* YoloPluginCreator::getFieldNames()
|
|
||||||
{
|
|
||||||
return &mFC;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
|
|
||||||
{
|
|
||||||
assert(fc->nbFields == 2);
|
|
||||||
assert(strcmp(fc->fields[0].name, "netinfo") == 0);
|
|
||||||
assert(strcmp(fc->fields[1].name, "kernels") == 0);
|
|
||||||
int *p_netinfo = (int*)(fc->fields[0].data);
|
|
||||||
int class_count = p_netinfo[0];
|
|
||||||
int input_w = p_netinfo[1];
|
|
||||||
int input_h = p_netinfo[2];
|
|
||||||
int max_output_object_count = p_netinfo[3];
|
|
||||||
std::vector<Yolo::YoloKernel> kernels(fc->fields[1].length);
|
|
||||||
memcpy(&kernels[0], fc->fields[1].data, kernels.size() * sizeof(Yolo::YoloKernel));
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin(class_count, input_w, input_h, max_output_object_count, kernels);
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
|
|
||||||
{
|
|
||||||
// This object will be deleted when the network is destroyed, which will
|
|
||||||
// call YoloLayerPlugin::destroy()
|
|
||||||
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
|
|
||||||
obj->setPluginNamespace(mNamespace.c_str());
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
#ifndef _YOLO_LAYER_H
|
|
||||||
#define _YOLO_LAYER_H
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
namespace Yolo
|
|
||||||
{
|
|
||||||
static constexpr int CHECK_COUNT = 3;
|
|
||||||
static constexpr float IGNORE_THRESH = 0.1f;
|
|
||||||
struct YoloKernel
|
|
||||||
{
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
float anchors[CHECK_COUNT * 2];
|
|
||||||
};
|
|
||||||
static constexpr int MAX_OUTPUT_BBOX_COUNT = 1000;
|
|
||||||
static constexpr int CLASS_NUM = 80;
|
|
||||||
static constexpr int INPUT_H = 640; // yolov5's input height and width must be divisible by 32.
|
|
||||||
static constexpr int INPUT_W = 640;
|
|
||||||
|
|
||||||
static constexpr int LOCATIONS = 4;
|
|
||||||
struct alignas(float) Detection {
|
|
||||||
//center_x center_y w h
|
|
||||||
float bbox[LOCATIONS];
|
|
||||||
float conf; // bbox_conf * cls_conf
|
|
||||||
float class_id;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
class YoloLayerPlugin : public IPluginV2IOExt
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloLayerPlugin(int classCount, int netWidth, int netHeight, int maxOut, const std::vector<Yolo::YoloKernel>& vYoloKernel);
|
|
||||||
YoloLayerPlugin(const void* data, size_t length);
|
|
||||||
~YoloLayerPlugin();
|
|
||||||
|
|
||||||
int getNbOutputs() const override
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
|
|
||||||
|
|
||||||
int initialize() override;
|
|
||||||
|
|
||||||
virtual void terminate() override {};
|
|
||||||
|
|
||||||
virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
|
|
||||||
|
|
||||||
virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override;
|
|
||||||
|
|
||||||
virtual size_t getSerializationSize() const override;
|
|
||||||
|
|
||||||
virtual void serialize(void* buffer) const override;
|
|
||||||
|
|
||||||
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
|
|
||||||
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginType() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
void destroy() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* clone() const override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* pluginNamespace) override;
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override;
|
|
||||||
|
|
||||||
DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
|
|
||||||
|
|
||||||
bool canBroadcastInputAcrossBatch(int inputIndex) const override;
|
|
||||||
|
|
||||||
void attachToContext(
|
|
||||||
cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
|
|
||||||
|
|
||||||
void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
|
|
||||||
|
|
||||||
void detachFromContext() override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void forwardGpu(const float* const* inputs, float *output, cudaStream_t stream, int batchSize = 1);
|
|
||||||
int mThreadCount = 256;
|
|
||||||
const char* mPluginNamespace;
|
|
||||||
int mKernelCount;
|
|
||||||
int mClassCount;
|
|
||||||
int mYoloV5NetWidth;
|
|
||||||
int mYoloV5NetHeight;
|
|
||||||
int mMaxOutObject;
|
|
||||||
std::vector<Yolo::YoloKernel> mYoloKernel;
|
|
||||||
void** mAnchor;
|
|
||||||
};
|
|
||||||
|
|
||||||
class YoloPluginCreator : public IPluginCreator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
YoloPluginCreator();
|
|
||||||
|
|
||||||
~YoloPluginCreator() override = default;
|
|
||||||
|
|
||||||
const char* getPluginName() const override;
|
|
||||||
|
|
||||||
const char* getPluginVersion() const override;
|
|
||||||
|
|
||||||
const PluginFieldCollection* getFieldNames() override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
|
|
||||||
|
|
||||||
IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
|
|
||||||
|
|
||||||
void setPluginNamespace(const char* libNamespace) override
|
|
||||||
{
|
|
||||||
mNamespace = libNamespace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* getPluginNamespace() const override
|
|
||||||
{
|
|
||||||
return mNamespace.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string mNamespace;
|
|
||||||
static PluginFieldCollection mFC;
|
|
||||||
static std::vector<PluginField> mPluginAttributes;
|
|
||||||
};
|
|
||||||
REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,191 +0,0 @@
|
|||||||
# Multiple YOLO inferences
|
|
||||||
How to use multiples GIE's on DeepStream
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
1. Download [my native folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/native), rename to yolo and move to your deepstream/sources folder.
|
|
||||||
2. Make a folder, in deepstream/sources/yolo directory, named pgie (where you will put files of primary inference).
|
|
||||||
3. Make a folder, for each secondary inference, in deepstream/sources/yolo directory, named sgie* (* = 1, 2, 3, etc.; depending on the number of secondary inferences; where you will put files of others inferences).
|
|
||||||
4. Copy and remane each obj.names file to labels.txt in each inference directory (pgie, sgie*), according each inference type.
|
|
||||||
5. Copy your yolo.cfg and yolo.weights files to each inference directory (pgie, sgie*), according each inference type.
|
|
||||||
6. Move nvdsinfer_custom_impl_Yolo folder and config_infer_primary.txt file to each inference directory (pgie, sgie*; for sgie's, rename config_infer_primary to config_infer_secondary*; * = 1, 2, 3, etc.)
|
|
||||||
7. Edit DeepStream for your custom model, according each yolo.cfg file: https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/customModels.md
|
|
||||||
|
|
||||||
**In example folder, on this repository, have all example files to multiple YOLO inferences.**
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Editing Makefile
|
|
||||||
To compile nvdsinfer_custom_impl_Yolo without errors is necessary to edit Makefile (line 34), in nvdsinfer_custom_impl_Yolo folder in each inference directory.
|
|
||||||
```
|
|
||||||
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
```
|
|
||||||
To:
|
|
||||||
```
|
|
||||||
CFLAGS+= -I../../../includes -I/usr/local/cuda-$(CUDA_VER)/include
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Editing yoloPlugins.h
|
|
||||||
To run deepstream-app without errors is necessary to edit yoloPlugins.h (line 51), in nvdsinfer_custom_impl_Yolo folder in each secondary inference directory.
|
|
||||||
```
|
|
||||||
const char* YOLOLAYER_PLUGIN_VERSION {"1"};
|
|
||||||
```
|
|
||||||
To:
|
|
||||||
```
|
|
||||||
const char* YOLOLAYER_PLUGIN_VERSION {"2"};
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: 2 = sgie1, 3 = sgie2, 4 = sgie3, etc
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Compiling edited models
|
|
||||||
1. Check your CUDA version (nvcc --version)
|
|
||||||
2. Go to inference directory.
|
|
||||||
3. Type command to compile:
|
|
||||||
|
|
||||||
* x86 platform
|
|
||||||
```
|
|
||||||
CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
* Jetson platform
|
|
||||||
```
|
|
||||||
CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
|
|
||||||
```
|
|
||||||
|
|
||||||
**Do this for each GIE!**
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Add secondary-gie to deepstream_app_config after primary-gie
|
|
||||||
|
|
||||||
Example for 1 secondary-gie (2 inferences):
|
|
||||||
```
|
|
||||||
[secondary-gie0]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=2
|
|
||||||
operate-on-gie-id=1
|
|
||||||
# If you want secodary inference operate on specified class ids of GIE (class ids you want to operate: 1, 1;2, 2;3;4, 3 etc; comment it if you don't want to use)
|
|
||||||
operate-on-class-ids=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=sgie1/config_infer_secondary1.txt
|
|
||||||
```
|
|
||||||
Example for 2 secondary-gie (3 inferences):
|
|
||||||
```
|
|
||||||
[secondary-gie0]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=2
|
|
||||||
operate-on-gie-id=1
|
|
||||||
operate-on-class-ids=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=sgie1/config_infer_secondary1.txt
|
|
||||||
|
|
||||||
[secondary-gie1]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=3
|
|
||||||
operate-on-gie-id=1
|
|
||||||
operate-on-class-ids=0
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=sgie2/config_infer_secondary2.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: remember to edit primary-gie
|
|
||||||
```
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=config_infer_primary.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
to
|
|
||||||
```
|
|
||||||
[primary-gie]
|
|
||||||
enable=1
|
|
||||||
gpu-id=0
|
|
||||||
gie-unique-id=1
|
|
||||||
nvbuf-memory-type=0
|
|
||||||
config-file=pgie/config_infer_primary.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
### Editing config_infer
|
|
||||||
|
|
||||||
* Edit path of config (config_infer_primary, config_infer_secondary1, etc.) files
|
|
||||||
|
|
||||||
Example for primary
|
|
||||||
|
|
||||||
```
|
|
||||||
custom-network-config=pgie/yolo.cfg
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for secondary1
|
|
||||||
|
|
||||||
```
|
|
||||||
custom-network-config=sgie1/yolo.cfg
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for secondary2
|
|
||||||
|
|
||||||
```
|
|
||||||
custom-network-config=sgie2/yolo.cfg
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit gie-unique-id
|
|
||||||
|
|
||||||
Example for primary
|
|
||||||
|
|
||||||
```
|
|
||||||
gie-unique-id=1
|
|
||||||
process-mode=1
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for secondary1
|
|
||||||
|
|
||||||
```
|
|
||||||
gie-unique-id=2
|
|
||||||
process-mode=2
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for secondary2
|
|
||||||
|
|
||||||
```
|
|
||||||
gie-unique-id=3
|
|
||||||
process-mode=2
|
|
||||||
```
|
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
* Edit batch-size
|
|
||||||
|
|
||||||
Example for primary
|
|
||||||
|
|
||||||
```
|
|
||||||
# Number of sources
|
|
||||||
batch-size=1
|
|
||||||
```
|
|
||||||
|
|
||||||
Example for all secondary:
|
|
||||||
|
|
||||||
```
|
|
||||||
batch-size=16
|
|
||||||
```
|
|
||||||
|
|
||||||
### Testing model
|
|
||||||
To run your custom YOLO model, use this command
|
|
||||||
|
|
||||||
```
|
|
||||||
deepstream-app -c deepstream_app_config.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
**During test process, engine file will be generated. When engine build process is done, move engine file to respective GIE folder (pgie, sgie1, etc.)**
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "calibrator.h"
|
|
||||||
#include <fstream>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
namespace nvinfer1
|
|
||||||
{
|
|
||||||
int8EntroyCalibrator::int8EntroyCalibrator(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath,
|
|
||||||
const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
|
|
||||||
{
|
|
||||||
inputCount = batchsize * channels * height * width;
|
|
||||||
std::fstream f(imgPath);
|
|
||||||
if (f.is_open())
|
|
||||||
{
|
|
||||||
std::string temp;
|
|
||||||
while (std::getline(f, temp)) imgPaths.push_back(temp);
|
|
||||||
}
|
|
||||||
batchData = new float[inputCount];
|
|
||||||
CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
|
|
||||||
}
|
|
||||||
|
|
||||||
int8EntroyCalibrator::~int8EntroyCalibrator()
|
|
||||||
{
|
|
||||||
CUDA_CHECK(cudaFree(deviceInput));
|
|
||||||
if (batchData)
|
|
||||||
delete[] batchData;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool int8EntroyCalibrator::getBatch(void **bindings, const char **names, int nbBindings)
|
|
||||||
{
|
|
||||||
if (imageIndex + batchSize > uint(imgPaths.size()))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
float* ptr = batchData;
|
|
||||||
for (size_t j = imageIndex; j < imageIndex + batchSize; ++j)
|
|
||||||
{
|
|
||||||
cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR);
|
|
||||||
std::vector<float>inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
|
|
||||||
|
|
||||||
int len = (int)(inputData.size());
|
|
||||||
memcpy(ptr, inputData.data(), len * sizeof(float));
|
|
||||||
|
|
||||||
ptr += inputData.size();
|
|
||||||
std::cout << "Load image: " << imgPaths[j] << std::endl;
|
|
||||||
std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl;
|
|
||||||
}
|
|
||||||
imageIndex += batchSize;
|
|
||||||
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
|
|
||||||
bindings[0] = deviceInput;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const void* int8EntroyCalibrator::readCalibrationCache(std::size_t &length)
|
|
||||||
{
|
|
||||||
calibrationCache.clear();
|
|
||||||
std::ifstream input(calibTablePath, std::ios::binary);
|
|
||||||
input >> std::noskipws;
|
|
||||||
if (readCache && input.good())
|
|
||||||
{
|
|
||||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
|
|
||||||
std::back_inserter(calibrationCache));
|
|
||||||
}
|
|
||||||
length = calibrationCache.size();
|
|
||||||
return length ? calibrationCache.data() : nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length)
|
|
||||||
{
|
|
||||||
std::ofstream output(calibTablePath, std::ios::binary);
|
|
||||||
output.write(reinterpret_cast<const char*>(cache), length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
|
|
||||||
{
|
|
||||||
cv::Mat out;
|
|
||||||
int image_w = img.cols;
|
|
||||||
int image_h = img.rows;
|
|
||||||
if (image_w != input_w || image_h != input_h)
|
|
||||||
{
|
|
||||||
if (letter_box == 1)
|
|
||||||
{
|
|
||||||
float ratio_w = (float)image_w / (float)input_w;
|
|
||||||
float ratio_h = (float)image_h / (float)input_h;
|
|
||||||
if (ratio_w > ratio_h)
|
|
||||||
{
|
|
||||||
int new_width = input_w * ratio_h;
|
|
||||||
int x = (image_w - new_width) / 2;
|
|
||||||
cv::Rect roi(abs(x), 0, new_width, image_h);
|
|
||||||
out = img(roi);
|
|
||||||
}
|
|
||||||
else if (ratio_w < ratio_h)
|
|
||||||
{
|
|
||||||
int new_height = input_h * ratio_w;
|
|
||||||
int y = (image_h - new_height) / 2;
|
|
||||||
cv::Rect roi(0, abs(y), image_w, new_height);
|
|
||||||
out = img(roi);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
out = img;
|
|
||||||
}
|
|
||||||
cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
|
|
||||||
}
|
|
||||||
cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
|
|
||||||
}
|
|
||||||
if (input_c == 3)
|
|
||||||
{
|
|
||||||
out.convertTo(out, CV_32FC3, 1.0 / 255.0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
out.convertTo(out, CV_32FC1, 1.0 / 255.0);
|
|
||||||
}
|
|
||||||
std::vector<cv::Mat> input_channels(input_c);
|
|
||||||
cv::split(out, input_channels);
|
|
||||||
std::vector<float> result(input_h * input_w * input_c);
|
|
||||||
auto data = result.data();
|
|
||||||
int channelLength = input_h * input_w;
|
|
||||||
for (int i = 0; i < input_c; ++i)
|
|
||||||
{
|
|
||||||
memcpy(data, input_channels[i].data, channelLength * sizeof(float));
|
|
||||||
data += channelLength;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef CALIBRATOR_H
|
|
||||||
#define CALIBRATOR_H
|
|
||||||
|
|
||||||
#include "opencv2/opencv.hpp"
|
|
||||||
#include "cuda_runtime.h"
|
|
||||||
#include "NvInfer.h"
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#ifndef CUDA_CHECK
|
|
||||||
#define CUDA_CHECK(callstr) \
|
|
||||||
{ \
|
|
||||||
cudaError_t error_code = callstr; \
|
|
||||||
if (error_code != cudaSuccess) { \
|
|
||||||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
|
|
||||||
assert(0); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace nvinfer1 {
|
|
||||||
class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 {
|
|
||||||
public:
|
|
||||||
int8EntroyCalibrator(const int &batchsize,
|
|
||||||
const int &channels,
|
|
||||||
const int &height,
|
|
||||||
const int &width,
|
|
||||||
const int &letterbox,
|
|
||||||
const std::string &imgPath,
|
|
||||||
const std::string &calibTablePath);
|
|
||||||
|
|
||||||
virtual ~int8EntroyCalibrator();
|
|
||||||
int getBatchSize() const override { return batchSize; }
|
|
||||||
bool getBatch(void *bindings[], const char *names[], int nbBindings) override;
|
|
||||||
const void *readCalibrationCache(std::size_t &length) override;
|
|
||||||
void writeCalibrationCache(const void *ptr, std::size_t length) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int batchSize;
|
|
||||||
int inputC;
|
|
||||||
int inputH;
|
|
||||||
int inputW;
|
|
||||||
int letterBox;
|
|
||||||
std::string calibTablePath;
|
|
||||||
size_t imageIndex;
|
|
||||||
size_t inputCount;
|
|
||||||
std::vector<std::string> imgPaths;
|
|
||||||
float *batchData{ nullptr };
|
|
||||||
void *deviceInput{ nullptr };
|
|
||||||
bool readCache;
|
|
||||||
std::vector<char> calibrationCache;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box);
|
|
||||||
|
|
||||||
#endif //CALIBRATOR_H
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
if (activation == "relu")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* relu = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kRELU);
|
|
||||||
assert(relu != nullptr);
|
|
||||||
std::string reluLayerName = "relu_" + std::to_string(layerIdx);
|
|
||||||
relu->setName(reluLayerName.c_str());
|
|
||||||
output = relu;
|
|
||||||
}
|
|
||||||
else if (activation == "sigmoid" || activation == "logistic")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* sigmoid = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSIGMOID);
|
|
||||||
assert(sigmoid != nullptr);
|
|
||||||
std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
|
|
||||||
sigmoid->setName(sigmoidLayerName.c_str());
|
|
||||||
output = sigmoid;
|
|
||||||
}
|
|
||||||
else if (activation == "tanh")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
|
||||||
tanh->setName(tanhLayerName.c_str());
|
|
||||||
output = tanh;
|
|
||||||
}
|
|
||||||
else if (activation == "leaky")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* leaky = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kLEAKY_RELU);
|
|
||||||
leaky->setAlpha(0.1);
|
|
||||||
assert(leaky != nullptr);
|
|
||||||
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
|
|
||||||
leaky->setName(leakyLayerName.c_str());
|
|
||||||
output = leaky;
|
|
||||||
}
|
|
||||||
else if (activation == "softplus")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
|
||||||
softplus->setName(softplusLayerName.c_str());
|
|
||||||
output = softplus;
|
|
||||||
}
|
|
||||||
else if (activation == "mish")
|
|
||||||
{
|
|
||||||
nvinfer1::IActivationLayer* softplus = network->addActivation(
|
|
||||||
*input, nvinfer1::ActivationType::kSOFTPLUS);
|
|
||||||
assert(softplus != nullptr);
|
|
||||||
std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
|
|
||||||
softplus->setName(softplusLayerName.c_str());
|
|
||||||
nvinfer1::IActivationLayer* tanh = network->addActivation(
|
|
||||||
*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
|
|
||||||
assert(tanh != nullptr);
|
|
||||||
std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
|
|
||||||
tanh->setName(tanhLayerName.c_str());
|
|
||||||
nvinfer1::IElementWiseLayer* mish = network->addElementWise(
|
|
||||||
*tanh->getOutput(0), *input,
|
|
||||||
nvinfer1::ElementWiseOperation::kPROD);
|
|
||||||
assert(mish != nullptr);
|
|
||||||
std::string mishLayerName = "mish_" + std::to_string(layerIdx);
|
|
||||||
mish->setName(mishLayerName.c_str());
|
|
||||||
output = mish;
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __ACTIVATION_LAYER_H__
|
|
||||||
#define __ACTIVATION_LAYER_H__
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* activationLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
nvinfer1::ILayer* output,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include "convolutional_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<float>& weights,
|
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
|
||||||
int& weightPtr,
|
|
||||||
int& inputChannels,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "convolutional");
|
|
||||||
assert(block.find("filters") != block.end());
|
|
||||||
assert(block.find("pad") != block.end());
|
|
||||||
assert(block.find("size") != block.end());
|
|
||||||
assert(block.find("stride") != block.end());
|
|
||||||
|
|
||||||
int filters = std::stoi(block.at("filters"));
|
|
||||||
int padding = std::stoi(block.at("pad"));
|
|
||||||
int kernelSize = std::stoi(block.at("size"));
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
std::string activation = block.at("activation");
|
|
||||||
int bias = filters;
|
|
||||||
|
|
||||||
bool batchNormalize = false;
|
|
||||||
if (block.find("batch_normalize") != block.end())
|
|
||||||
{
|
|
||||||
bias = 0;
|
|
||||||
batchNormalize = (block.at("batch_normalize") == "1");
|
|
||||||
}
|
|
||||||
|
|
||||||
int groups = 1;
|
|
||||||
if (block.find("groups") != block.end())
|
|
||||||
{
|
|
||||||
groups = std::stoi(block.at("groups"));
|
|
||||||
}
|
|
||||||
|
|
||||||
int pad;
|
|
||||||
if (padding)
|
|
||||||
pad = (kernelSize - 1) / 2;
|
|
||||||
else
|
|
||||||
pad = 0;
|
|
||||||
|
|
||||||
int size = filters * inputChannels * kernelSize * kernelSize / groups;
|
|
||||||
std::vector<float> bnBiases;
|
|
||||||
std::vector<float> bnWeights;
|
|
||||||
std::vector<float> bnRunningMean;
|
|
||||||
std::vector<float> bnRunningVar;
|
|
||||||
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
|
|
||||||
|
|
||||||
if (batchNormalize == false)
|
|
||||||
{
|
|
||||||
float* val = new float[filters];
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convBias.values = val;
|
|
||||||
trtWeights.push_back(convBias);
|
|
||||||
val = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convWt.values = val;
|
|
||||||
trtWeights.push_back(convWt);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnBiases.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnWeights.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnRunningMean.push_back(weights[weightPtr]);
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < filters; ++i)
|
|
||||||
{
|
|
||||||
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
float* val = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
val[i] = weights[weightPtr];
|
|
||||||
weightPtr++;
|
|
||||||
}
|
|
||||||
convWt.values = val;
|
|
||||||
trtWeights.push_back(convWt);
|
|
||||||
trtWeights.push_back(convBias);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
|
|
||||||
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
|
|
||||||
assert(conv != nullptr);
|
|
||||||
std::string convLayerName = "conv_" + std::to_string(layerIdx);
|
|
||||||
conv->setName(convLayerName.c_str());
|
|
||||||
conv->setStride(nvinfer1::DimsHW{stride, stride});
|
|
||||||
conv->setPadding(nvinfer1::DimsHW{pad, pad});
|
|
||||||
|
|
||||||
if (block.find("groups") != block.end())
|
|
||||||
{
|
|
||||||
conv->setNbGroups(groups);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ILayer* output = conv;
|
|
||||||
|
|
||||||
if (batchNormalize == true)
|
|
||||||
{
|
|
||||||
size = filters;
|
|
||||||
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
|
|
||||||
float* shiftWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
shiftWt[i]
|
|
||||||
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
|
|
||||||
}
|
|
||||||
shift.values = shiftWt;
|
|
||||||
float* scaleWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
|
|
||||||
}
|
|
||||||
scale.values = scaleWt;
|
|
||||||
float* powerWt = new float[size];
|
|
||||||
for (int i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
powerWt[i] = 1.0;
|
|
||||||
}
|
|
||||||
power.values = powerWt;
|
|
||||||
trtWeights.push_back(shift);
|
|
||||||
trtWeights.push_back(scale);
|
|
||||||
trtWeights.push_back(power);
|
|
||||||
|
|
||||||
nvinfer1::IScaleLayer* bn = network->addScale(
|
|
||||||
*output->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
|
|
||||||
assert(bn != nullptr);
|
|
||||||
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
|
|
||||||
bn->setName(bnLayerName.c_str());
|
|
||||||
output = bn;
|
|
||||||
}
|
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, output, output->getOutput(0), network);
|
|
||||||
assert(output != nullptr);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CONVOLUTIONAL_LAYER_H__
|
|
||||||
#define __CONVOLUTIONAL_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* convolutionalLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<float>& weights,
|
|
||||||
std::vector<nvinfer1::Weights>& trtWeights,
|
|
||||||
int& weightPtr,
|
|
||||||
int& inputChannels,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "dropout_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* dropoutLayer(
|
|
||||||
float probability,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
nvinfer1::ILayer* output;
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __DROPOUT_LAYER_H__
|
|
||||||
#define __DROPOUT_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* dropoutLayer(
|
|
||||||
float probability,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "maxpool_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "maxpool");
|
|
||||||
assert(block.find("size") != block.end());
|
|
||||||
assert(block.find("stride") != block.end());
|
|
||||||
|
|
||||||
int size = std::stoi(block.at("size"));
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
|
|
||||||
nvinfer1::IPoolingLayer* pool
|
|
||||||
= network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
|
|
||||||
assert(pool);
|
|
||||||
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
|
|
||||||
pool->setStride(nvinfer1::DimsHW{stride, stride});
|
|
||||||
pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
|
||||||
pool->setName(maxpoolLayerName.c_str());
|
|
||||||
|
|
||||||
return pool;
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __MAXPOOL_LAYER_H__
|
|
||||||
#define __MAXPOOL_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* maxpoolLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "route_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
std::string strLayers = block.at("layers");
|
|
||||||
std::vector<int> idxLayers;
|
|
||||||
size_t lastPos = 0, pos = 0;
|
|
||||||
while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
|
|
||||||
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
|
|
||||||
idxLayers.push_back (vL);
|
|
||||||
lastPos = pos + 1;
|
|
||||||
}
|
|
||||||
if (lastPos < strLayers.length()) {
|
|
||||||
std::string lastV = trim(strLayers.substr(lastPos));
|
|
||||||
if (!lastV.empty()) {
|
|
||||||
idxLayers.push_back (std::stoi(lastV));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert (!idxLayers.empty());
|
|
||||||
std::vector<nvinfer1::ITensor*> concatInputs;
|
|
||||||
for (int idxLayer : idxLayers) {
|
|
||||||
if (idxLayer < 0) {
|
|
||||||
idxLayer = tensorOutputs.size() + idxLayer;
|
|
||||||
}
|
|
||||||
assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size());
|
|
||||||
concatInputs.push_back (tensorOutputs[idxLayer]);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IConcatenationLayer* concat =
|
|
||||||
network->addConcatenation(concatInputs.data(), concatInputs.size());
|
|
||||||
assert(concat != nullptr);
|
|
||||||
std::string concatLayerName = "route_" + std::to_string(layerIdx - 1);
|
|
||||||
concat->setName(concatLayerName.c_str());
|
|
||||||
concat->setAxis(0);
|
|
||||||
|
|
||||||
nvinfer1::ILayer* output = concat;
|
|
||||||
|
|
||||||
if (block.find("groups") != block.end()) {
|
|
||||||
nvinfer1::Dims prevTensorDims = output->getOutput(0)->getDimensions();
|
|
||||||
int groups = stoi(block.at("groups"));
|
|
||||||
int group_id = stoi(block.at("group_id"));
|
|
||||||
int startSlice = (prevTensorDims.d[0] / groups) * group_id;
|
|
||||||
int channelSlice = (prevTensorDims.d[0] / groups);
|
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
|
||||||
*output->getOutput(0),
|
|
||||||
nvinfer1::Dims3{startSlice, 0, 0},
|
|
||||||
nvinfer1::Dims3{channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]},
|
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
|
||||||
assert(sl != nullptr);
|
|
||||||
output = sl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __ROUTE_LAYER_H__
|
|
||||||
#define __ROUTE_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
#include "../utils.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* routeLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "shortcut_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
std::string inputVol,
|
|
||||||
std::string shortcutVol,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
nvinfer1::ILayer* output;
|
|
||||||
nvinfer1::ITensor* outputTensor;
|
|
||||||
|
|
||||||
if (inputVol != shortcutVol)
|
|
||||||
{
|
|
||||||
nvinfer1::ISliceLayer* sl = network->addSlice(
|
|
||||||
*shortcutTensor,
|
|
||||||
nvinfer1::Dims3{0, 0, 0},
|
|
||||||
input->getDimensions(),
|
|
||||||
nvinfer1::Dims3{1, 1, 1});
|
|
||||||
assert(sl != nullptr);
|
|
||||||
outputTensor = sl->getOutput(0);
|
|
||||||
assert(outputTensor != nullptr);
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
outputTensor = shortcutTensor;
|
|
||||||
assert(outputTensor != nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IElementWiseLayer* ew = network->addElementWise(
|
|
||||||
*input, *outputTensor,
|
|
||||||
nvinfer1::ElementWiseOperation::kSUM);
|
|
||||||
assert(ew != nullptr);
|
|
||||||
|
|
||||||
output = activationLayer(layerIdx, activation, ew, ew->getOutput(0), network);
|
|
||||||
assert(output != nullptr);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __SHORTCUT_LAYER_H__
|
|
||||||
#define __SHORTCUT_LAYER_H__
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#include "activation_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* shortcutLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::string activation,
|
|
||||||
std::string inputVol,
|
|
||||||
std::string shortcutVol,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::ITensor* shortcutTensor,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "upsample_layer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network)
|
|
||||||
{
|
|
||||||
assert(block.at("type") == "upsample");
|
|
||||||
int stride = std::stoi(block.at("stride"));
|
|
||||||
|
|
||||||
nvinfer1::IResizeLayer* resize_layer = network->addResize(*input);
|
|
||||||
resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
|
|
||||||
float scale[3] = {1, stride, stride};
|
|
||||||
resize_layer->setScales(scale, 3);
|
|
||||||
std::string layer_name = "upsample_" + std::to_string(layerIdx);
|
|
||||||
resize_layer->setName(layer_name.c_str());
|
|
||||||
return resize_layer;
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
/*
|
|
||||||
* Created by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __UPSAMPLE_LAYER_H__
|
|
||||||
#define __UPSAMPLE_LAYER_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
nvinfer1::ILayer* upsampleLayer(
|
|
||||||
int layerIdx,
|
|
||||||
std::map<std::string, std::string>& block,
|
|
||||||
nvinfer1::ITensor* input,
|
|
||||||
nvinfer1::INetworkDefinition* network);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
#include "nvdsinfer_context.h"
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include "yolo.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#define USE_CUDA_ENGINE_GET_API 1
|
|
||||||
|
|
||||||
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
|
|
||||||
{
|
|
||||||
std::string yoloCfg = initParams->customNetworkConfigFilePath;
|
|
||||||
std::string yoloType;
|
|
||||||
|
|
||||||
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
|
|
||||||
return std::tolower (c);});
|
|
||||||
|
|
||||||
yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
|
|
||||||
|
|
||||||
networkInfo.networkType = yoloType;
|
|
||||||
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
|
|
||||||
networkInfo.wtsFilePath = initParams->modelFilePath;
|
|
||||||
networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
|
|
||||||
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
|
|
||||||
networkInfo.inputBlobName = "data";
|
|
||||||
|
|
||||||
if(initParams->networkMode == 0) {
|
|
||||||
networkInfo.networkMode = "FP32";
|
|
||||||
}
|
|
||||||
else if(initParams->networkMode == 1) {
|
|
||||||
networkInfo.networkMode = "INT8";
|
|
||||||
}
|
|
||||||
else if(initParams->networkMode == 2) {
|
|
||||||
networkInfo.networkMode = "FP16";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (networkInfo.configFilePath.empty() ||
|
|
||||||
networkInfo.wtsFilePath.empty()) {
|
|
||||||
std::cerr << "YOLO config file or weights file is not specified"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fileExists(networkInfo.configFilePath) ||
|
|
||||||
!fileExists(networkInfo.wtsFilePath)) {
|
|
||||||
std::cerr << "YOLO config file or weights file is not exist"
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !USE_CUDA_ENGINE_GET_API
|
|
||||||
IModelParser* NvDsInferCreateModelParser(
|
|
||||||
const NvDsInferContextInitParams* initParams) {
|
|
||||||
NetworkInfo networkInfo;
|
|
||||||
if (!getYoloNetworkInfo(networkInfo, initParams)) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Yolo(networkInfo);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
extern "C"
|
|
||||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
|
||||||
const NvDsInferContextInitParams * const initParams,
|
|
||||||
nvinfer1::DataType dataType,
|
|
||||||
nvinfer1::ICudaEngine *& cudaEngine);
|
|
||||||
|
|
||||||
extern "C"
|
|
||||||
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
|
|
||||||
const NvDsInferContextInitParams * const initParams,
|
|
||||||
nvinfer1::DataType dataType,
|
|
||||||
nvinfer1::ICudaEngine *& cudaEngine)
|
|
||||||
{
|
|
||||||
NetworkInfo networkInfo;
|
|
||||||
if (!getYoloNetworkInfo(networkInfo, initParams)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Yolo yolo(networkInfo);
|
|
||||||
cudaEngine = yolo.createEngine (builder);
|
|
||||||
if (cudaEngine == nullptr)
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to build CUDA engine on "
|
|
||||||
<< networkInfo.configFilePath << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,378 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <sstream>
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList);
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
nonMaximumSuppression(const float nmsThresh, std::vector<NvDsInferParseObjectInfo> binfo)
|
|
||||||
{
|
|
||||||
auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float {
|
|
||||||
if (x1min > x2min)
|
|
||||||
{
|
|
||||||
std::swap(x1min, x2min);
|
|
||||||
std::swap(x1max, x2max);
|
|
||||||
}
|
|
||||||
return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min;
|
|
||||||
};
|
|
||||||
auto computeIoU
|
|
||||||
= [&overlap1D](NvDsInferParseObjectInfo& bbox1, NvDsInferParseObjectInfo& bbox2) -> float {
|
|
||||||
float overlapX
|
|
||||||
= overlap1D(bbox1.left, bbox1.left + bbox1.width, bbox2.left, bbox2.left + bbox2.width);
|
|
||||||
float overlapY
|
|
||||||
= overlap1D(bbox1.top, bbox1.top + bbox1.height, bbox2.top, bbox2.top + bbox2.height);
|
|
||||||
float area1 = (bbox1.width) * (bbox1.height);
|
|
||||||
float area2 = (bbox2.width) * (bbox2.height);
|
|
||||||
float overlap2D = overlapX * overlapY;
|
|
||||||
float u = area1 + area2 - overlap2D;
|
|
||||||
return u == 0 ? 0 : overlap2D / u;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::stable_sort(binfo.begin(), binfo.end(),
|
|
||||||
[](const NvDsInferParseObjectInfo& b1, const NvDsInferParseObjectInfo& b2) {
|
|
||||||
return b1.detectionConfidence > b2.detectionConfidence;
|
|
||||||
});
|
|
||||||
std::vector<NvDsInferParseObjectInfo> out;
|
|
||||||
for (auto i : binfo)
|
|
||||||
{
|
|
||||||
bool keep = true;
|
|
||||||
for (auto j : out)
|
|
||||||
{
|
|
||||||
if (keep)
|
|
||||||
{
|
|
||||||
float overlap = computeIoU(i, j);
|
|
||||||
keep = overlap <= nmsThresh;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (keep) out.push_back(i);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
nmsAllClasses(const float nmsThresh,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& binfo,
|
|
||||||
const uint numClasses)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> result;
|
|
||||||
std::vector<std::vector<NvDsInferParseObjectInfo>> splitBoxes(numClasses);
|
|
||||||
for (auto& box : binfo)
|
|
||||||
{
|
|
||||||
splitBoxes.at(box.classId).push_back(box);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& boxes : splitBoxes)
|
|
||||||
{
|
|
||||||
boxes = nonMaximumSuppression(nmsThresh, boxes);
|
|
||||||
result.insert(result.end(), boxes.begin(), boxes.end());
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw,
|
|
||||||
const float& bh, const int& stride, const uint& netW,
|
|
||||||
const uint& netH)
|
|
||||||
{
|
|
||||||
NvDsInferParseObjectInfo b;
|
|
||||||
float xCenter = bx * stride;
|
|
||||||
float yCenter = by * stride;
|
|
||||||
float x0 = xCenter - bw / 2;
|
|
||||||
float y0 = yCenter - bh / 2;
|
|
||||||
float x1 = x0 + bw;
|
|
||||||
float y1 = y0 + bh;
|
|
||||||
|
|
||||||
x0 = clamp(x0, 0, netW);
|
|
||||||
y0 = clamp(y0, 0, netH);
|
|
||||||
x1 = clamp(x1, 0, netW);
|
|
||||||
y1 = clamp(y1, 0, netH);
|
|
||||||
|
|
||||||
b.left = x0;
|
|
||||||
b.width = clamp(x1 - x0, 0, netW);
|
|
||||||
b.top = y0;
|
|
||||||
b.height = clamp(y1 - y0, 0, netH);
|
|
||||||
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
|
|
||||||
const uint stride, const uint& netW, const uint& netH, const int maxIndex,
|
|
||||||
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
|
|
||||||
{
|
|
||||||
NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH);
|
|
||||||
if (bbi.width < 1 || bbi.height < 1) return;
|
|
||||||
|
|
||||||
bbi.detectionConfidence = maxProb;
|
|
||||||
bbi.classId = maxIndex;
|
|
||||||
binfo.push_back(bbi);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
decodeYoloTensor(
|
|
||||||
const float* detections, const std::vector<int> &mask, const std::vector<float> &anchors,
|
|
||||||
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
|
|
||||||
const uint numOutputClasses, const uint& netW,
|
|
||||||
const uint& netH,
|
|
||||||
const float confThresh)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
|
||||||
for (uint y = 0; y < gridSizeH; ++y) {
|
|
||||||
for (uint x = 0; x < gridSizeW; ++x) {
|
|
||||||
for (uint b = 0; b < numBBoxes; ++b)
|
|
||||||
{
|
|
||||||
const float pw = anchors[mask[b] * 2];
|
|
||||||
const float ph = anchors[mask[b] * 2 + 1];
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeH * gridSizeW;
|
|
||||||
const int bbindex = y * gridSizeW + x;
|
|
||||||
const float bx
|
|
||||||
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
|
|
||||||
const float by
|
|
||||||
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
|
|
||||||
const float bw
|
|
||||||
= pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
|
|
||||||
const float bh
|
|
||||||
= ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
|
|
||||||
|
|
||||||
const float objectness
|
|
||||||
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
float maxProb = 0.0f;
|
|
||||||
int maxIndex = -1;
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
float prob
|
|
||||||
= (detections[bbindex
|
|
||||||
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
|
|
||||||
if (prob > maxProb)
|
|
||||||
{
|
|
||||||
maxProb = prob;
|
|
||||||
maxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxProb = objectness * maxProb;
|
|
||||||
|
|
||||||
if (maxProb > confThresh)
|
|
||||||
{
|
|
||||||
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return binfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
|
||||||
decodeYoloV2Tensor(
|
|
||||||
const float* detections, const std::vector<float> &anchors,
|
|
||||||
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
|
|
||||||
const uint numOutputClasses, const uint& netW,
|
|
||||||
const uint& netH)
|
|
||||||
{
|
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
|
||||||
for (uint y = 0; y < gridSizeH; ++y) {
|
|
||||||
for (uint x = 0; x < gridSizeW; ++x) {
|
|
||||||
for (uint b = 0; b < numBBoxes; ++b)
|
|
||||||
{
|
|
||||||
const float pw = anchors[b * 2];
|
|
||||||
const float ph = anchors[b * 2 + 1];
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeH * gridSizeW;
|
|
||||||
const int bbindex = y * gridSizeW + x;
|
|
||||||
const float bx
|
|
||||||
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
|
|
||||||
const float by
|
|
||||||
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
|
|
||||||
const float bw
|
|
||||||
= pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
|
|
||||||
const float bh
|
|
||||||
= ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
|
|
||||||
|
|
||||||
const float objectness
|
|
||||||
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
float maxProb = 0.0f;
|
|
||||||
int maxIndex = -1;
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
float prob
|
|
||||||
= (detections[bbindex
|
|
||||||
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
|
|
||||||
if (prob > maxProb)
|
|
||||||
{
|
|
||||||
maxProb = prob;
|
|
||||||
maxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxProb = objectness * maxProb;
|
|
||||||
|
|
||||||
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return binfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline std::vector<const NvDsInferLayerInfo*>
|
|
||||||
SortLayers(const std::vector<NvDsInferLayerInfo> & outputLayersInfo)
|
|
||||||
{
|
|
||||||
std::vector<const NvDsInferLayerInfo*> outLayers;
|
|
||||||
for (auto const &layer : outputLayersInfo) {
|
|
||||||
outLayers.push_back (&layer);
|
|
||||||
}
|
|
||||||
std::sort(outLayers.begin(), outLayers.end(),
|
|
||||||
[](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) {
|
|
||||||
return a->inferDims.d[1] < b->inferDims.d[1];
|
|
||||||
});
|
|
||||||
return outLayers;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList,
|
|
||||||
const std::vector<float> &anchors,
|
|
||||||
const std::vector<std::vector<int>> &masks,
|
|
||||||
const uint &num_classes,
|
|
||||||
const float &beta_nms)
|
|
||||||
{
|
|
||||||
const float kCONF_THRESH = detectionParams.perClassThreshold[0];
|
|
||||||
|
|
||||||
const std::vector<const NvDsInferLayerInfo*> sortedLayers =
|
|
||||||
SortLayers (outputLayersInfo);
|
|
||||||
|
|
||||||
if (sortedLayers.size() != masks.size()) {
|
|
||||||
std::cerr << "ERROR: YOLO output layer.size: " << sortedLayers.size()
|
|
||||||
<< " does not match mask.size: " << masks.size() << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num_classes != detectionParams.numClassesConfigured)
|
|
||||||
{
|
|
||||||
std::cerr << "WARNING: Num classes mismatch. Configured: "
|
|
||||||
<< detectionParams.numClassesConfigured
|
|
||||||
<< ", detected by network: " << num_classes << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> objects;
|
|
||||||
|
|
||||||
for (uint idx = 0; idx < masks.size(); ++idx) {
|
|
||||||
const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid
|
|
||||||
|
|
||||||
assert(layer.inferDims.numDims == 3);
|
|
||||||
const uint gridSizeH = layer.inferDims.d[1];
|
|
||||||
const uint gridSizeW = layer.inferDims.d[2];
|
|
||||||
const uint stride = DIVUP(networkInfo.width, gridSizeW);
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> outObjs =
|
|
||||||
decodeYoloTensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, masks[idx].size(),
|
|
||||||
num_classes, networkInfo.width, networkInfo.height, kCONF_THRESH);
|
|
||||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
objectList.clear();
|
|
||||||
objectList = nmsAllClasses(beta_nms, objects, num_classes);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool NvDsInferParseYoloV2(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList,
|
|
||||||
std::vector<float> &anchors,
|
|
||||||
const uint &num_classes)
|
|
||||||
{
|
|
||||||
if (outputLayersInfo.empty()) {
|
|
||||||
std::cerr << "Could not find output layer in bbox parsing" << std::endl;;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const uint kNUM_BBOXES = anchors.size() / 2;
|
|
||||||
|
|
||||||
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
|
|
||||||
|
|
||||||
if (num_classes != detectionParams.numClassesConfigured)
|
|
||||||
{
|
|
||||||
std::cerr << "WARNING: Num classes mismatch. Configured: "
|
|
||||||
<< detectionParams.numClassesConfigured
|
|
||||||
<< ", detected by network: " << num_classes << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(layer.inferDims.numDims == 3);
|
|
||||||
const uint gridSizeH = layer.inferDims.d[1];
|
|
||||||
const uint gridSizeW = layer.inferDims.d[2];
|
|
||||||
const uint stride = DIVUP(networkInfo.width, gridSizeW);
|
|
||||||
for (auto& anchor : anchors) {
|
|
||||||
anchor *= stride;
|
|
||||||
}
|
|
||||||
std::vector<NvDsInferParseObjectInfo> objects =
|
|
||||||
decodeYoloV2Tensor((const float*)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
|
|
||||||
num_classes, networkInfo.width, networkInfo.height);
|
|
||||||
|
|
||||||
objectList = objects;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" bool NvDsInferParseYolo(
|
|
||||||
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
|
|
||||||
NvDsInferNetworkInfo const& networkInfo,
|
|
||||||
NvDsInferParseDetectionParams const& detectionParams,
|
|
||||||
std::vector<NvDsInferParseObjectInfo>& objectList)
|
|
||||||
{
|
|
||||||
|
|
||||||
int num_classes = kNUM_CLASSES;
|
|
||||||
float beta_nms = kBETA_NMS;
|
|
||||||
std::vector<float> anchors = kANCHORS;
|
|
||||||
std::vector<std::vector<int>> mask = kMASK;
|
|
||||||
|
|
||||||
if (mask.size() > 0) {
|
|
||||||
return NvDsInferParseYolo (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, mask, num_classes, beta_nms);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return NvDsInferParseYoloV2 (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, num_classes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
|
|
||||||
@@ -1,150 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
#include <experimental/filesystem>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
static void leftTrim(std::string& s)
|
|
||||||
{
|
|
||||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void rightTrim(std::string& s)
|
|
||||||
{
|
|
||||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string trim(std::string s)
|
|
||||||
{
|
|
||||||
leftTrim(s);
|
|
||||||
rightTrim(s);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
float clamp(const float val, const float minVal, const float maxVal)
|
|
||||||
{
|
|
||||||
assert(minVal <= maxVal);
|
|
||||||
return std::min(maxVal, std::max(minVal, val));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool fileExists(const std::string fileName, bool verbose)
|
|
||||||
{
|
|
||||||
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
|
|
||||||
{
|
|
||||||
if (verbose) std::cout << "File does not exist: " << fileName << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
|
|
||||||
{
|
|
||||||
assert(fileExists(weightsFilePath));
|
|
||||||
std::cout << "\nLoading pre-trained weights" << std::endl;
|
|
||||||
std::ifstream file(weightsFilePath, std::ios_base::binary);
|
|
||||||
assert(file.good());
|
|
||||||
std::string line;
|
|
||||||
|
|
||||||
if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
|
|
||||||
{
|
|
||||||
// Remove 4 int32 bytes of data from the stream belonging to the header
|
|
||||||
file.ignore(4 * 4);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Remove 5 int32 bytes of data from the stream belonging to the header
|
|
||||||
file.ignore(4 * 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> weights;
|
|
||||||
char floatWeight[4];
|
|
||||||
while (!file.eof())
|
|
||||||
{
|
|
||||||
file.read(floatWeight, 4);
|
|
||||||
assert(file.gcount() == 4);
|
|
||||||
weights.push_back(*reinterpret_cast<float*>(floatWeight));
|
|
||||||
if (file.peek() == std::istream::traits_type::eof()) break;
|
|
||||||
}
|
|
||||||
std::cout << "Loading weights of " << networkType << " complete"
|
|
||||||
<< std::endl;
|
|
||||||
std::cout << "Total weights read: " << weights.size() << std::endl;
|
|
||||||
return weights;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string dimsToString(const nvinfer1::Dims d)
|
|
||||||
{
|
|
||||||
std::stringstream s;
|
|
||||||
assert(d.nbDims >= 1);
|
|
||||||
for (int i = 0; i < d.nbDims - 1; ++i)
|
|
||||||
{
|
|
||||||
s << std::setw(4) << d.d[i] << " x";
|
|
||||||
}
|
|
||||||
s << std::setw(4) << d.d[d.nbDims - 1];
|
|
||||||
|
|
||||||
return s.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
void displayDimType(const nvinfer1::Dims d)
|
|
||||||
{
|
|
||||||
std::cout << "(" << d.nbDims << ") ";
|
|
||||||
for (int i = 0; i < d.nbDims; ++i)
|
|
||||||
{
|
|
||||||
switch (d.type[i])
|
|
||||||
{
|
|
||||||
case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break;
|
|
||||||
case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break;
|
|
||||||
case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break;
|
|
||||||
case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNumChannels(nvinfer1::ITensor* t)
|
|
||||||
{
|
|
||||||
nvinfer1::Dims d = t->getDimensions();
|
|
||||||
assert(d.nbDims == 3);
|
|
||||||
|
|
||||||
return d.d[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
|
|
||||||
{
|
|
||||||
assert(inputDims.nbDims == 3);
|
|
||||||
return inputDims.d[0] * inputDims.d[1] * inputDims.d[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
|
|
||||||
std::string layerOutput, std::string weightPtr)
|
|
||||||
{
|
|
||||||
std::cout << std::setw(6) << std::left << layerIndex << std::setw(24) << std::left << layerName;
|
|
||||||
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
|
|
||||||
<< layerOutput;
|
|
||||||
std::cout << std::setw(7) << std::left << weightPtr << std::endl;
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __UTILS_H__
|
|
||||||
#define __UTILS_H__
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <cassert>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
#define UNUSED(expr) (void)(expr)
|
|
||||||
#define DIVUP(n, d) ((n) + (d)-1) / (d)
|
|
||||||
|
|
||||||
std::string trim(std::string s);
|
|
||||||
float clamp(const float val, const float minVal, const float maxVal);
|
|
||||||
bool fileExists(const std::string fileName, bool verbose = true);
|
|
||||||
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
|
|
||||||
std::string dimsToString(const nvinfer1::Dims d);
|
|
||||||
void displayDimType(const nvinfer1::Dims d);
|
|
||||||
int getNumChannels(nvinfer1::ITensor* t);
|
|
||||||
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
|
|
||||||
|
|
||||||
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
|
|
||||||
std::string layerOutput, std::string weightPtr);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,507 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "yolo.h"
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef OPENCV
|
|
||||||
#include "calibrator.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void orderParams(std::vector<std::vector<int>> *maskVector) {
|
|
||||||
std::vector<std::vector<int>> maskinput = *maskVector;
|
|
||||||
std::vector<int> maskPartial;
|
|
||||||
for (uint i = 0; i < maskinput.size(); i++) {
|
|
||||||
for (uint j = i + 1; j < maskinput.size(); j++) {
|
|
||||||
if (maskinput[i][0] <= maskinput[j][0]) {
|
|
||||||
maskPartial = maskinput[i];
|
|
||||||
maskinput[i] = maskinput[j];
|
|
||||||
maskinput[j] = maskPartial;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*maskVector = maskinput;
|
|
||||||
}
|
|
||||||
|
|
||||||
Yolo::Yolo(const NetworkInfo& networkInfo)
|
|
||||||
: m_NetworkType(networkInfo.networkType), // YOLO type
|
|
||||||
m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg
|
|
||||||
m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights
|
|
||||||
m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path
|
|
||||||
m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16
|
|
||||||
m_DeviceType(networkInfo.deviceType), // kDLA, kGPU
|
|
||||||
m_InputBlobName(networkInfo.inputBlobName), // data
|
|
||||||
m_InputH(0),
|
|
||||||
m_InputW(0),
|
|
||||||
m_InputC(0),
|
|
||||||
m_InputSize(0)
|
|
||||||
{}
|
|
||||||
|
|
||||||
Yolo::~Yolo()
|
|
||||||
{
|
|
||||||
destroyNetworkUtils();
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder)
|
|
||||||
{
|
|
||||||
assert (builder);
|
|
||||||
|
|
||||||
m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
|
|
||||||
parseConfigBlocks();
|
|
||||||
orderParams(&m_OutputMasks);
|
|
||||||
|
|
||||||
if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) {
|
|
||||||
assert(builder->platformHasFastInt8());
|
|
||||||
#ifdef OPENCV
|
|
||||||
std::string calib_image_list;
|
|
||||||
int calib_batch_size;
|
|
||||||
if (getenv("INT8_CALIB_IMG_PATH")) {
|
|
||||||
calib_image_list = getenv("INT8_CALIB_IMG_PATH");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
if (getenv("INT8_CALIB_BATCH_SIZE")) {
|
|
||||||
calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE"));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
nvinfer1::int8EntroyCalibrator *calibrator = new nvinfer1::int8EntroyCalibrator(calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath);
|
|
||||||
builder->setInt8Mode(true);
|
|
||||||
builder->setInt8Calibrator(calibrator);
|
|
||||||
#else
|
|
||||||
std::cerr << "OpenCV is required to run INT8 calibrator" << std::endl;
|
|
||||||
std::abort();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
|
|
||||||
std::vector<nvinfer1::Weights> trtWeights;
|
|
||||||
|
|
||||||
nvinfer1::INetworkDefinition *network = builder->createNetwork();
|
|
||||||
if (parseModel(*network) != NVDSINFER_SUCCESS) {
|
|
||||||
network->destroy();
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Building the TensorRT Engine" << std::endl;
|
|
||||||
|
|
||||||
if (m_LetterBox == 1) {
|
|
||||||
std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network);
|
|
||||||
if (engine) {
|
|
||||||
std::cout << "Building complete\n" << std::endl;
|
|
||||||
} else {
|
|
||||||
std::cerr << "Building engine failed\n" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
network->destroy();
|
|
||||||
return engine;
|
|
||||||
}
|
|
||||||
|
|
||||||
NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
|
|
||||||
destroyNetworkUtils();
|
|
||||||
|
|
||||||
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
|
|
||||||
std::cout << "Building YOLO network" << std::endl;
|
|
||||||
NvDsInferStatus status = buildYoloNetwork(weights, network);
|
|
||||||
|
|
||||||
if (status == NVDSINFER_SUCCESS) {
|
|
||||||
std::cout << "Building YOLO network complete" << std::endl;
|
|
||||||
} else {
|
|
||||||
std::cerr << "Building YOLO network failed" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
NvDsInferStatus Yolo::buildYoloNetwork(
|
|
||||||
std::vector<float>& weights, nvinfer1::INetworkDefinition& network) {
|
|
||||||
int weightPtr = 0;
|
|
||||||
int channels = m_InputC;
|
|
||||||
|
|
||||||
nvinfer1::ITensor* data =
|
|
||||||
network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
|
|
||||||
nvinfer1::DimsCHW{static_cast<int>(m_InputC),
|
|
||||||
static_cast<int>(m_InputH), static_cast<int>(m_InputW)});
|
|
||||||
assert(data != nullptr && data->getDimensions().nbDims > 0);
|
|
||||||
|
|
||||||
nvinfer1::ITensor* previous = data;
|
|
||||||
std::vector<nvinfer1::ITensor*> tensorOutputs;
|
|
||||||
uint outputTensorCount = 0;
|
|
||||||
|
|
||||||
for (uint i = 0; i < m_ConfigBlocks.size(); ++i) {
|
|
||||||
assert(getNumChannels(previous) == channels);
|
|
||||||
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
|
|
||||||
|
|
||||||
if (m_ConfigBlocks.at(i).at("type") == "net") {
|
|
||||||
printLayerInfo("", "layer", " input", " outup", "weightPtr");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
std::string layerType = "conv_" + m_ConfigBlocks.at(i).at("activation");
|
|
||||||
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "dropout") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("probability") != m_ConfigBlocks.at(i).end());
|
|
||||||
//float probability = std::stof(m_ConfigBlocks.at(i).at("probability"));
|
|
||||||
//nvinfer1::ILayer* out = dropoutLayer(probability, previous, &network);
|
|
||||||
//previous = out->getOutput(0);
|
|
||||||
//Skip dropout layer
|
|
||||||
assert(previous != nullptr);
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "dropout", " -", " -", " -");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "shortcut") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end());
|
|
||||||
assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
|
|
||||||
std::string activation = m_ConfigBlocks.at(i).at("activation");
|
|
||||||
int from = stoi(m_ConfigBlocks.at(i).at("from"));
|
|
||||||
if (from > 0) {
|
|
||||||
from = from - i + 1;
|
|
||||||
}
|
|
||||||
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
|
|
||||||
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
|
|
||||||
assert(i + from - 1 < i - 2);
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = shortcutLayer(i, activation, inputVol, shortcutVol, previous, tensorOutputs[i + from - 1], &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
std::string layerType = "shortcut_" + m_ConfigBlocks.at(i).at("activation") + ": " + std::to_string(i + from - 1);
|
|
||||||
printLayerInfo(layerIndex, layerType, " -", outputVol, " -");
|
|
||||||
if (inputVol != shortcutVol) {
|
|
||||||
std::cout << inputVol << " +" << shortcutVol << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "route") {
|
|
||||||
assert(m_ConfigBlocks.at(i).find("layers") != m_ConfigBlocks.at(i).end());
|
|
||||||
nvinfer1::ILayer* out = routeLayer(i, m_ConfigBlocks.at(i), tensorOutputs, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "upsample") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "maxpool") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::ILayer* out = maxpoolLayer(i, m_ConfigBlocks.at(i), previous, &network);
|
|
||||||
previous = out->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
tensorOutputs.push_back(previous);
|
|
||||||
printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "yolo") {
|
|
||||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
|
||||||
TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
|
|
||||||
curYoloTensor.gridSizeY = prevTensorDims.d[1];
|
|
||||||
curYoloTensor.gridSizeX = prevTensorDims.d[2];
|
|
||||||
curYoloTensor.stride = m_InputH / curYoloTensor.gridSizeY;
|
|
||||||
m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSizeY
|
|
||||||
* curYoloTensor.gridSizeX
|
|
||||||
* (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
|
|
||||||
std::string layerName = "yolo_" + std::to_string(i);
|
|
||||||
curYoloTensor.blobName = layerName;
|
|
||||||
int new_coords = 0;
|
|
||||||
float scale_x_y = 1;
|
|
||||||
float beta_nms = 0.45;
|
|
||||||
if (m_ConfigBlocks.at(i).find("new_coords") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
new_coords = std::stoi(m_ConfigBlocks.at(i).at("new_coords"));
|
|
||||||
}
|
|
||||||
if (m_ConfigBlocks.at(i).find("scale_x_y") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
scale_x_y = std::stof(m_ConfigBlocks.at(i).at("scale_x_y"));
|
|
||||||
}
|
|
||||||
if (m_ConfigBlocks.at(i).find("beta_nms") != m_ConfigBlocks.at(i).end()) {
|
|
||||||
beta_nms = std::stof(m_ConfigBlocks.at(i).at("beta_nms"));
|
|
||||||
}
|
|
||||||
nvinfer1::IPluginV2* yoloPlugin
|
|
||||||
= new YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes,
|
|
||||||
m_OutputTensors.at(outputTensorCount).numClasses,
|
|
||||||
m_OutputTensors.at(outputTensorCount).gridSizeX,
|
|
||||||
m_OutputTensors.at(outputTensorCount).gridSizeY,
|
|
||||||
1, new_coords, scale_x_y, beta_nms,
|
|
||||||
curYoloTensor.anchors,
|
|
||||||
m_OutputMasks);
|
|
||||||
assert(yoloPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* yolo =
|
|
||||||
network.addPluginV2(&previous, 1, *yoloPlugin);
|
|
||||||
assert(yolo != nullptr);
|
|
||||||
yolo->setName(layerName.c_str());
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
previous = yolo->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
previous->setName(layerName.c_str());
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
network.markOutput(*previous);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(yolo->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
++outputTensorCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
//YOLOv2 support
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "region") {
|
|
||||||
nvinfer1::Dims prevTensorDims = previous->getDimensions();
|
|
||||||
TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount);
|
|
||||||
curRegionTensor.gridSizeY = prevTensorDims.d[1];
|
|
||||||
curRegionTensor.gridSizeX = prevTensorDims.d[2];
|
|
||||||
curRegionTensor.stride = m_InputH / curRegionTensor.gridSizeY;
|
|
||||||
m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSizeY
|
|
||||||
* curRegionTensor.gridSizeX
|
|
||||||
* (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses));
|
|
||||||
std::string layerName = "region_" + std::to_string(i);
|
|
||||||
curRegionTensor.blobName = layerName;
|
|
||||||
std::vector<std::vector<int>> mask;
|
|
||||||
nvinfer1::IPluginV2* regionPlugin
|
|
||||||
= new YoloLayer(curRegionTensor.numBBoxes,
|
|
||||||
curRegionTensor.numClasses,
|
|
||||||
curRegionTensor.gridSizeX,
|
|
||||||
curRegionTensor.gridSizeY,
|
|
||||||
0, 0, 1.0, 0,
|
|
||||||
curRegionTensor.anchors,
|
|
||||||
mask);
|
|
||||||
assert(regionPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* region =
|
|
||||||
network.addPluginV2(&previous, 1, *regionPlugin);
|
|
||||||
assert(region != nullptr);
|
|
||||||
region->setName(layerName.c_str());
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
previous = region->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
previous->setName(layerName.c_str());
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
network.markOutput(*previous);
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(region->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
++outputTensorCount;
|
|
||||||
}
|
|
||||||
else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
|
|
||||||
std::string inputVol = dimsToString(previous->getDimensions());
|
|
||||||
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
|
|
||||||
assert(reorgPlugin != nullptr);
|
|
||||||
nvinfer1::IPluginV2Layer* reorg =
|
|
||||||
network.addPluginV2(&previous, 1, *reorgPlugin);
|
|
||||||
assert(reorg != nullptr);
|
|
||||||
std::string layerName = "reorg_" + std::to_string(i);
|
|
||||||
reorg->setName(layerName.c_str());
|
|
||||||
previous = reorg->getOutput(0);
|
|
||||||
assert(previous != nullptr);
|
|
||||||
std::string outputVol = dimsToString(previous->getDimensions());
|
|
||||||
channels = getNumChannels(previous);
|
|
||||||
tensorOutputs.push_back(reorg->getOutput(0));
|
|
||||||
printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "Unsupported layer type --> \""
|
|
||||||
<< m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((int)weights.size() != weightPtr)
|
|
||||||
{
|
|
||||||
std::cout << "Number of unused weights left: " << weights.size() - weightPtr << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Output YOLO blob names: " << std::endl;
|
|
||||||
for (auto& tensor : m_OutputTensors) {
|
|
||||||
std::cout << tensor.blobName << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nbLayers = network.getNbLayers();
|
|
||||||
std::cout << "Total number of YOLO layers: " << nbLayers << std::endl;
|
|
||||||
|
|
||||||
return NVDSINFER_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::map<std::string, std::string>>
|
|
||||||
Yolo::parseConfigFile (const std::string cfgFilePath)
|
|
||||||
{
|
|
||||||
assert(fileExists(cfgFilePath));
|
|
||||||
std::ifstream file(cfgFilePath);
|
|
||||||
assert(file.good());
|
|
||||||
std::string line;
|
|
||||||
std::vector<std::map<std::string, std::string>> blocks;
|
|
||||||
std::map<std::string, std::string> block;
|
|
||||||
|
|
||||||
while (getline(file, line))
|
|
||||||
{
|
|
||||||
if (line.size() == 0) continue;
|
|
||||||
if (line.front() == '#') continue;
|
|
||||||
line = trim(line);
|
|
||||||
if (line.front() == '[')
|
|
||||||
{
|
|
||||||
if (block.size() > 0)
|
|
||||||
{
|
|
||||||
blocks.push_back(block);
|
|
||||||
block.clear();
|
|
||||||
}
|
|
||||||
std::string key = "type";
|
|
||||||
std::string value = trim(line.substr(1, line.size() - 2));
|
|
||||||
block.insert(std::pair<std::string, std::string>(key, value));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int cpos = line.find('=');
|
|
||||||
std::string key = trim(line.substr(0, cpos));
|
|
||||||
std::string value = trim(line.substr(cpos + 1));
|
|
||||||
block.insert(std::pair<std::string, std::string>(key, value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
blocks.push_back(block);
|
|
||||||
return blocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Yolo::parseConfigBlocks()
|
|
||||||
{
|
|
||||||
for (auto block : m_ConfigBlocks) {
|
|
||||||
if (block.at("type") == "net")
|
|
||||||
{
|
|
||||||
assert((block.find("height") != block.end())
|
|
||||||
&& "Missing 'height' param in network cfg");
|
|
||||||
assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
|
|
||||||
assert((block.find("channels") != block.end())
|
|
||||||
&& "Missing 'channels' param in network cfg");
|
|
||||||
|
|
||||||
m_InputH = std::stoul(block.at("height"));
|
|
||||||
m_InputW = std::stoul(block.at("width"));
|
|
||||||
m_InputC = std::stoul(block.at("channels"));
|
|
||||||
m_InputSize = m_InputC * m_InputH * m_InputW;
|
|
||||||
|
|
||||||
if (block.find("letter_box") != block.end()) {
|
|
||||||
m_LetterBox = std::stoul(block.at("letter_box"));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
m_LetterBox = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
|
|
||||||
{
|
|
||||||
assert((block.find("num") != block.end())
|
|
||||||
&& std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
|
|
||||||
assert((block.find("classes") != block.end())
|
|
||||||
&& std::string("Missing 'classes' param in " + block.at("type") + " layer")
|
|
||||||
.c_str());
|
|
||||||
assert((block.find("anchors") != block.end())
|
|
||||||
&& std::string("Missing 'anchors' param in " + block.at("type") + " layer")
|
|
||||||
.c_str());
|
|
||||||
|
|
||||||
TensorInfo outputTensor;
|
|
||||||
std::string anchorString = block.at("anchors");
|
|
||||||
while (!anchorString.empty())
|
|
||||||
{
|
|
||||||
int npos = anchorString.find_first_of(',');
|
|
||||||
if (npos != -1)
|
|
||||||
{
|
|
||||||
float anchor = std::stof(trim(anchorString.substr(0, npos)));
|
|
||||||
outputTensor.anchors.push_back(anchor);
|
|
||||||
anchorString.erase(0, npos + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
float anchor = std::stof(trim(anchorString));
|
|
||||||
outputTensor.anchors.push_back(anchor);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (block.find("mask") != block.end()) {
|
|
||||||
|
|
||||||
std::string maskString = block.at("mask");
|
|
||||||
std::vector<int> pMASKS;
|
|
||||||
while (!maskString.empty())
|
|
||||||
{
|
|
||||||
int npos = maskString.find_first_of(',');
|
|
||||||
if (npos != -1)
|
|
||||||
{
|
|
||||||
int mask = std::stoul(trim(maskString.substr(0, npos)));
|
|
||||||
pMASKS.push_back(mask);
|
|
||||||
outputTensor.masks.push_back(mask);
|
|
||||||
maskString.erase(0, npos + 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int mask = std::stoul(trim(maskString));
|
|
||||||
pMASKS.push_back(mask);
|
|
||||||
outputTensor.masks.push_back(mask);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_OutputMasks.push_back(pMASKS);
|
|
||||||
}
|
|
||||||
|
|
||||||
outputTensor.numBBoxes = outputTensor.masks.size() > 0
|
|
||||||
? outputTensor.masks.size()
|
|
||||||
: std::stoul(trim(block.at("num")));
|
|
||||||
outputTensor.numClasses = std::stoul(block.at("classes"));
|
|
||||||
m_OutputTensors.push_back(outputTensor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Yolo::destroyNetworkUtils() {
|
|
||||||
for (uint i = 0; i < m_TrtWeights.size(); ++i) {
|
|
||||||
if (m_TrtWeights[i].count > 0)
|
|
||||||
free(const_cast<void*>(m_TrtWeights[i].values));
|
|
||||||
}
|
|
||||||
m_TrtWeights.clear();
|
|
||||||
}
|
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _YOLO_H_
|
|
||||||
#define _YOLO_H_
|
|
||||||
|
|
||||||
#include "layers/convolutional_layer.h"
|
|
||||||
#include "layers/dropout_layer.h"
|
|
||||||
#include "layers/shortcut_layer.h"
|
|
||||||
#include "layers/route_layer.h"
|
|
||||||
#include "layers/upsample_layer.h"
|
|
||||||
#include "layers/maxpool_layer.h"
|
|
||||||
|
|
||||||
#include "nvdsinfer_custom_impl.h"
|
|
||||||
|
|
||||||
struct NetworkInfo
|
|
||||||
{
|
|
||||||
std::string networkType;
|
|
||||||
std::string configFilePath;
|
|
||||||
std::string wtsFilePath;
|
|
||||||
std::string int8CalibPath;
|
|
||||||
std::string networkMode;
|
|
||||||
std::string deviceType;
|
|
||||||
std::string inputBlobName;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TensorInfo
|
|
||||||
{
|
|
||||||
std::string blobName;
|
|
||||||
uint stride{0};
|
|
||||||
uint gridSizeY{0};
|
|
||||||
uint gridSizeX{0};
|
|
||||||
uint numClasses{0};
|
|
||||||
uint numBBoxes{0};
|
|
||||||
uint64_t volume{0};
|
|
||||||
std::vector<uint> masks;
|
|
||||||
std::vector<float> anchors;
|
|
||||||
int bindingIndex{-1};
|
|
||||||
float* hostBuffer{nullptr};
|
|
||||||
};
|
|
||||||
|
|
||||||
class Yolo : public IModelParser {
|
|
||||||
public:
|
|
||||||
Yolo(const NetworkInfo& networkInfo);
|
|
||||||
~Yolo() override;
|
|
||||||
bool hasFullDimsSupported() const override { return false; }
|
|
||||||
const char* getModelName() const override {
|
|
||||||
return m_ConfigFilePath.empty() ? m_NetworkType.c_str()
|
|
||||||
: m_ConfigFilePath.c_str();
|
|
||||||
}
|
|
||||||
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
|
|
||||||
|
|
||||||
nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const std::string m_NetworkType;
|
|
||||||
const std::string m_ConfigFilePath;
|
|
||||||
const std::string m_WtsFilePath;
|
|
||||||
const std::string m_Int8CalibPath;
|
|
||||||
const std::string m_NetworkMode;
|
|
||||||
const std::string m_DeviceType;
|
|
||||||
const std::string m_InputBlobName;
|
|
||||||
std::vector<TensorInfo> m_OutputTensors;
|
|
||||||
std::vector<std::vector<int>> m_OutputMasks;
|
|
||||||
std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
|
|
||||||
uint m_InputH;
|
|
||||||
uint m_InputW;
|
|
||||||
uint m_InputC;
|
|
||||||
uint64_t m_InputSize;
|
|
||||||
uint m_LetterBox;
|
|
||||||
|
|
||||||
std::vector<nvinfer1::Weights> m_TrtWeights;
|
|
||||||
|
|
||||||
private:
|
|
||||||
NvDsInferStatus buildYoloNetwork(
|
|
||||||
std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
|
|
||||||
std::vector<std::map<std::string, std::string>> parseConfigFile(
|
|
||||||
const std::string cfgFilePath);
|
|
||||||
void parseConfigBlocks();
|
|
||||||
void destroyNetworkUtils();
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // _YOLO_H_
|
|
||||||
@@ -1,166 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
|
|
||||||
*
|
|
||||||
* NVIDIA Corporation and its licensors retain all intellectual property
|
|
||||||
* and proprietary rights in and to this software, related documentation
|
|
||||||
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
||||||
* distribution of this software and related documentation without an express
|
|
||||||
* license agreement from NVIDIA Corporation is strictly prohibited.
|
|
||||||
*
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <cuda.h>
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
|
||||||
|
|
||||||
__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
|
||||||
const uint numBBoxes, const uint new_coords, const float scale_x_y)
|
|
||||||
{
|
|
||||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
|
||||||
|
|
||||||
if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeX * gridSizeY;
|
|
||||||
const int bbindex = y_id * gridSizeX + x_id;
|
|
||||||
|
|
||||||
float alpha = scale_x_y;
|
|
||||||
float beta = -0.5 * (scale_x_y - 1);
|
|
||||||
|
|
||||||
if (new_coords == 1) {
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
|
|
||||||
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
|
||||||
|
|
||||||
for (uint i = 0; i < numOutputClasses; ++i)
|
|
||||||
{
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void gpuRegionLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
|
||||||
const uint numBBoxes)
|
|
||||||
{
|
|
||||||
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
|
|
||||||
|
|
||||||
if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int numGridCells = gridSizeX * gridSizeY;
|
|
||||||
const int bbindex = y_id * gridSizeX + x_id;
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
|
|
||||||
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
|
|
||||||
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
|
|
||||||
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
|
|
||||||
|
|
||||||
float temp = 1.0;
|
|
||||||
int i;
|
|
||||||
float sum = 0;
|
|
||||||
float largest = -INFINITY;
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
|
|
||||||
largest = (val>largest) ? val : largest;
|
|
||||||
}
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
float e = exp(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
|
|
||||||
sum += e;
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
|
|
||||||
}
|
|
||||||
for(i = 0; i < numOutputClasses; ++i){
|
|
||||||
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint& numOutputClasses, const uint& numBBoxes,
|
|
||||||
uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType);
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY,
|
|
||||||
const uint& numOutputClasses, const uint& numBBoxes,
|
|
||||||
uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType)
|
|
||||||
{
|
|
||||||
dim3 threads_per_block(16, 16, 4);
|
|
||||||
dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
|
|
||||||
(gridSizeY / threads_per_block.y) + 1,
|
|
||||||
(numBBoxes / threads_per_block.z) + 1);
|
|
||||||
if (modelType == 1) {
|
|
||||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
|
||||||
{
|
|
||||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
|
||||||
reinterpret_cast<const float*>(input) + (batch * outputSize),
|
|
||||||
reinterpret_cast<float*>(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses,
|
|
||||||
numBBoxes, modelCoords, modelScale);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (modelType == 0) {
|
|
||||||
for (unsigned int batch = 0; batch < batchSize; ++batch)
|
|
||||||
{
|
|
||||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
|
||||||
reinterpret_cast<const float*>(input) + (batch * outputSize),
|
|
||||||
reinterpret_cast<float*>(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses,
|
|
||||||
numBBoxes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cudaGetLastError();
|
|
||||||
}
|
|
||||||
@@ -1,209 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
* Edited by Marcos Luciano
|
|
||||||
* https://www.github.com/marcoslucianops
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "yoloPlugins.h"
|
|
||||||
#include "NvInferPlugin.h"
|
|
||||||
#include <cassert>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
int kNUM_CLASSES;
|
|
||||||
float kBETA_NMS;
|
|
||||||
std::vector<float> kANCHORS;
|
|
||||||
std::vector<std::vector<int>> kMASK;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
template <typename T>
|
|
||||||
void write(char*& buffer, const T& val)
|
|
||||||
{
|
|
||||||
*reinterpret_cast<T*>(buffer) = val;
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void read(const char*& buffer, T& val)
|
|
||||||
{
|
|
||||||
val = *reinterpret_cast<const T*>(buffer);
|
|
||||||
buffer += sizeof(T);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer (
|
|
||||||
const void* input, void* output, const uint& batchSize,
|
|
||||||
const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
|
|
||||||
const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType);
|
|
||||||
|
|
||||||
YoloLayer::YoloLayer (const void* data, size_t length)
|
|
||||||
{
|
|
||||||
const char *d = static_cast<const char*>(data);
|
|
||||||
read(d, m_NumBoxes);
|
|
||||||
read(d, m_NumClasses);
|
|
||||||
read(d, m_GridSizeX);
|
|
||||||
read(d, m_GridSizeY);
|
|
||||||
read(d, m_OutputSize);
|
|
||||||
|
|
||||||
read(d, m_type);
|
|
||||||
read(d, m_new_coords);
|
|
||||||
read(d, m_scale_x_y);
|
|
||||||
read(d, m_beta_nms);
|
|
||||||
uint anchorsSize;
|
|
||||||
read(d, anchorsSize);
|
|
||||||
for (uint i = 0; i < anchorsSize; i++) {
|
|
||||||
float result;
|
|
||||||
read(d, result);
|
|
||||||
m_Anchors.push_back(result);
|
|
||||||
}
|
|
||||||
uint maskSize;
|
|
||||||
read(d, maskSize);
|
|
||||||
for (uint i = 0; i < maskSize; i++) {
|
|
||||||
uint nMask;
|
|
||||||
read(d, nMask);
|
|
||||||
std::vector<int> pMask;
|
|
||||||
for (uint f = 0; f < nMask; f++) {
|
|
||||||
int result;
|
|
||||||
read(d, result);
|
|
||||||
pMask.push_back(result);
|
|
||||||
}
|
|
||||||
m_Mask.push_back(pMask);
|
|
||||||
}
|
|
||||||
kNUM_CLASSES = m_NumClasses;
|
|
||||||
kBETA_NMS = m_beta_nms;
|
|
||||||
kANCHORS = m_Anchors;
|
|
||||||
kMASK = m_Mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
YoloLayer::YoloLayer (
|
|
||||||
const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector<float> anchors, std::vector<std::vector<int>> mask) :
|
|
||||||
m_NumBoxes(numBoxes),
|
|
||||||
m_NumClasses(numClasses),
|
|
||||||
m_GridSizeX(gridSizeX),
|
|
||||||
m_GridSizeY(gridSizeY),
|
|
||||||
m_type(model_type),
|
|
||||||
m_new_coords(new_coords),
|
|
||||||
m_scale_x_y(scale_x_y),
|
|
||||||
m_beta_nms(beta_nms),
|
|
||||||
m_Anchors(anchors),
|
|
||||||
m_Mask(mask)
|
|
||||||
{
|
|
||||||
assert(m_NumBoxes > 0);
|
|
||||||
assert(m_NumClasses > 0);
|
|
||||||
assert(m_GridSizeX > 0);
|
|
||||||
assert(m_GridSizeY > 0);
|
|
||||||
m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBoxes * (4 + 1 + m_NumClasses));
|
|
||||||
};
|
|
||||||
|
|
||||||
nvinfer1::Dims
|
|
||||||
YoloLayer::getOutputDimensions(
|
|
||||||
int index, const nvinfer1::Dims* inputs, int nbInputDims)
|
|
||||||
{
|
|
||||||
assert(index == 0);
|
|
||||||
assert(nbInputDims == 1);
|
|
||||||
return inputs[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
bool YoloLayer::supportsFormat (
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format) const {
|
|
||||||
return (type == nvinfer1::DataType::kFLOAT &&
|
|
||||||
format == nvinfer1::PluginFormat::kNCHW);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
YoloLayer::configureWithFormat (
|
|
||||||
const nvinfer1::Dims* inputDims, int nbInputs,
|
|
||||||
const nvinfer1::Dims* outputDims, int nbOutputs,
|
|
||||||
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
|
|
||||||
{
|
|
||||||
assert(nbInputs == 1);
|
|
||||||
assert (format == nvinfer1::PluginFormat::kNCHW);
|
|
||||||
assert(inputDims != nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
int YoloLayer::enqueue(
|
|
||||||
int batchSize, const void* const* inputs, void** outputs, void* workspace,
|
|
||||||
cudaStream_t stream)
|
|
||||||
{
|
|
||||||
CHECK(cudaYoloLayer(
|
|
||||||
inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes,
|
|
||||||
m_OutputSize, stream, m_new_coords, m_scale_x_y, m_type));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t YoloLayer::getSerializationSize() const
|
|
||||||
{
|
|
||||||
int anchorsSum = 1;
|
|
||||||
for (uint i = 0; i < m_Anchors.size(); i++) {
|
|
||||||
anchorsSum += 1;
|
|
||||||
}
|
|
||||||
int maskSum = 1;
|
|
||||||
for (uint i = 0; i < m_Mask.size(); i++) {
|
|
||||||
maskSum += 1;
|
|
||||||
for (uint f = 0; f < m_Mask[i].size(); f++) {
|
|
||||||
maskSum += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSizeX) + sizeof(m_GridSizeY) + sizeof(m_OutputSize) + sizeof(m_type)
|
|
||||||
+ sizeof(m_new_coords) + sizeof(m_scale_x_y) + sizeof(m_beta_nms) + anchorsSum * sizeof(float) + maskSum * sizeof(int);
|
|
||||||
}
|
|
||||||
|
|
||||||
void YoloLayer::serialize(void* buffer) const
|
|
||||||
{
|
|
||||||
char *d = static_cast<char*>(buffer);
|
|
||||||
write(d, m_NumBoxes);
|
|
||||||
write(d, m_NumClasses);
|
|
||||||
write(d, m_GridSizeX);
|
|
||||||
write(d, m_GridSizeY);
|
|
||||||
write(d, m_OutputSize);
|
|
||||||
|
|
||||||
write(d, m_type);
|
|
||||||
write(d, m_new_coords);
|
|
||||||
write(d, m_scale_x_y);
|
|
||||||
write(d, m_beta_nms);
|
|
||||||
uint anchorsSize = m_Anchors.size();
|
|
||||||
write(d, anchorsSize);
|
|
||||||
for (uint i = 0; i < anchorsSize; i++) {
|
|
||||||
write(d, m_Anchors[i]);
|
|
||||||
}
|
|
||||||
uint maskSize = m_Mask.size();
|
|
||||||
write(d, maskSize);
|
|
||||||
for (uint i = 0; i < maskSize; i++) {
|
|
||||||
uint pMaskSize = m_Mask[i].size();
|
|
||||||
write(d, pMaskSize);
|
|
||||||
for (uint f = 0; f < pMaskSize; f++) {
|
|
||||||
write(d, m_Mask[i][f]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
kNUM_CLASSES = m_NumClasses;
|
|
||||||
kBETA_NMS = m_beta_nms;
|
|
||||||
kANCHORS = m_Anchors;
|
|
||||||
kMASK = m_Mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvinfer1::IPluginV2* YoloLayer::clone() const
|
|
||||||
{
|
|
||||||
return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSizeX, m_GridSizeY, m_type, m_new_coords, m_scale_x_y, m_beta_nms, m_Anchors, m_Mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
|
|
||||||
@@ -37,7 +37,7 @@ CC:= g++
|
|||||||
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
|
||||||
|
|
||||||
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
|
||||||
CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
|
CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
|
||||||
|
|
||||||
ifeq ($(OPENCV), 1)
|
ifeq ($(OPENCV), 1)
|
||||||
COMMON= -DOPENCV
|
COMMON= -DOPENCV
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user