diff --git a/README.md b/README.md index 379bd7c..7d00486 100644 --- a/README.md +++ b/README.md @@ -2,39 +2,25 @@ NVIDIA DeepStream SDK 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO models -### **I will be back with updates soon, I'm full of work from my jobs right now. Sorry for the delay.** +------------------------------------- +### **Big update on DeepStream-Yolo** +------------------------------------- ### Future updates +* Models benchmarks * DeepStream tutorials * Dynamic batch-size -* Segmentation model support -* Classification model support +* Updated INT8 calibration +* Support for segmentation models +* Support for classification models ### Improvements on this repository -* Darknet cfg params parser (no need to edit `nvdsparsebbox_Yolo.cpp` or other files) -* Support for `new_coords` and `scale_x_y` params -* Support for new models -* Support for new layers -* Support for new activations -* Support for convolutional groups * Support for INT8 calibration * Support for non square models -* New documentation for multiple models -* YOLOv5 >= 2.0 support -* YOLOR support -* GPU YOLO Decoder [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138) -* PP-YOLOE support -* YOLOv7 support -* Optimized NMS [#142](https://github.com/marcoslucianops/DeepStream-Yolo/issues/142) -* Models benchmarks -* YOLOv8 support -* YOLOX support -* PP-YOLOE+ support -* YOLOv6 >= 2.0 support -* **ONNX model support with GPU post-processing** -* **YOLO-NAS support (ONNX)** +* **Support for Darknet YOLO models (YOLOv4, etc) using cfg and weights conversion with GPU post-processing** +* **Support for YOLO-NAS, PPYOLOE+, PPYOLOE, YOLOX, YOLOR, YOLOv8, YOLOv7, YOLOv6 and YOLOv5 using ONNX conversion with GPU post-processing** ## @@ -55,6 +41,7 @@ NVIDIA DeepStream SDK 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO mod * [YOLOR usage](docs/YOLOR.md) * [YOLOX usage](docs/YOLOX.md) * [PP-YOLOE / PP-YOLOE+ usage](docs/PPYOLOE.md) +* [YOLO-NAS usage](docs/YOLONAS.md) * [Using your custom model](docs/customModels.md) * [Multiple YOLO GIEs](docs/multipleGIEs.md) @@ -133,13 +120,14 @@ NVIDIA DeepStream SDK 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO mod * [Darknet YOLO](https://github.com/AlexeyAB/darknet) * [MobileNet-YOLO](https://github.com/dog-qiuqiu/MobileNet-Yolo) * [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest) -* [YOLOv5 >= 2.0](https://github.com/ultralytics/yolov5) -* [YOLOv6 >= 2.0](https://github.com/meituan/YOLOv6) +* [YOLOv5](https://github.com/ultralytics/yolov5) +* [YOLOv6](https://github.com/meituan/YOLOv6) * [YOLOv7](https://github.com/WongKinYiu/yolov7) * [YOLOv8](https://github.com/ultralytics/ultralytics) * [YOLOR](https://github.com/WongKinYiu/yolor) * [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) -* [PP-YOLOE / PP-YOLOE+](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe) +* [PP-YOLOE / PP-YOLOE+](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe) +* [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md) ## @@ -161,7 +149,7 @@ sample = 1920x1080 video - Eval ``` -nms-iou-threshold = 0.6 (Darknet and YOLOv8) / 0.65 (YOLOv5, YOLOv6, YOLOv7, YOLOR and YOLOX) / 0.7 (Paddle) +nms-iou-threshold = 0.6 (Darknet) / 0.65 (YOLOv5, YOLOv6, YOLOv7, YOLOR and YOLOX) / 0.7 (Paddle, YOLO-NAS and YOLOv8) pre-cluster-threshold = 0.001 topk = 300 ``` @@ -169,7 +157,7 @@ topk = 300 - Test ``` -nms-iou-threshold = 0.45 / 0.7 (Paddle) +nms-iou-threshold = 0.45 pre-cluster-threshold = 0.25 topk = 300 ``` @@ -182,30 +170,7 @@ topk = 300 | DeepStream | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(without display) | |:------------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:--------------------------:| -| PP-YOLOE-x | FP16 | 640 | 0.506 | 0.681 | 0.551 | 116.54 | -| PP-YOLOE-l | FP16 | 640 | 0.498 | 0.674 | 0.545 | 187.93 | -| PP-YOLOE-m | FP16 | 640 | 0.476 | 0.646 | 0.522 | 257.42 | -| PP-YOLOE-s (400) | FP16 | 640 | 0.422 | 0.589 | 0.463 | 465.23 | -| YOLOv7-E6E | FP16 | 1280 | 0.476 | 0.648 | 0.521 | 47.82 | -| YOLOv7-D6 | FP16 | 1280 | 0.479 | 0.648 | 0.520 | 60.66 | -| YOLOv7-E6 | FP16 | 1280 | 0.471 | 0.640 | 0.516 | 73.05 | -| YOLOv7-W6 | FP16 | 1280 | 0.444 | 0.610 | 0.483 | 110.29 | -| YOLOv7-X* | FP16 | 640 | 0.496 | 0.679 | 0.536 | 162.31 | -| YOLOv7* | FP16 | 640 | 0.476 | 0.660 | 0.518 | 237.79 | -| YOLOv7-Tiny Leaky* | FP16 | 640 | 0.345 | 0.516 | 0.372 | 611.36 | -| YOLOv7-Tiny Leaky* | FP16 | 416 | 0.328 | 0.493 | 0.348 | 633.73 | -| YOLOv5x6 6.1 | FP16 | 1280 | 0.508 | 0.683 | 0.554 | 54.88 | -| YOLOv5l6 6.1 | FP16 | 1280 | 0.494 | 0.668 | 0.540 | 87.86 | -| YOLOv5m6 6.1 | FP16 | 1280 | 0.469 | 0.644 | 0.514 | 142.68 | -| YOLOv5s6 6.1 | FP16 | 1280 | 0.399 | 0.581 | 0.438 | 271.19 | -| YOLOv5n6 6.1 | FP16 | 1280 | 0.317 | 0.487 | 0.344 | 392.20 | -| YOLOv5x 6.1 | FP16 | 640 | 0.470 | 0.652 | 0.513 | 152.99 | -| YOLOv5l 6.1 | FP16 | 640 | 0.454 | 0.636 | 0.496 | 247.60 | -| YOLOv5m 6.1 | FP16 | 640 | 0.421 | 0.604 | 0.458 | 375.06 | -| YOLOv5s 6.1 | FP16 | 640 | 0.344 | 0.528 | 0.371 | 602.44 | -| YOLOv5n 6.1 | FP16 | 640 | 0.247 | 0.413 | 0.256 | 629.04 | -| YOLOv4** | FP16 | 608 | 0.497 | 0.739 | 0.549 | 206.23 | -| YOLOv4-Tiny | FP16 | 416 | 0.215 | 0.402 | 0.205 | 634.69 | +| Coming soon | FP16 | 640 | | | | | ## @@ -326,7 +291,7 @@ sudo prime-select nvidia * Run ``` - sudo sh NVIDIA-Linux-x86_64-510.47.03.run --no-cc-version-check --silent --disable-nouveau --dkms --install-libglvnd --run-nvidia-xconfig + sudo sh NVIDIA-Linux-x86_64-525.105.17.run --no-cc-version-check --silent --disable-nouveau --dkms --install-libglvnd --run-nvidia-xconfig ``` @@ -1005,7 +970,7 @@ config-file=config_infer_primary_yoloV2.txt ### NMS Configuration -To change the `nms-iou-threshold`, `pre-cluster-threshold` and `topk` values, modify the config_infer file and regenerate the model engine file +To change the `nms-iou-threshold`, `pre-cluster-threshold` and `topk` values, modify the config_infer file ``` [class-attrs-all] @@ -1014,16 +979,14 @@ pre-cluster-threshold=0.25 topk=300 ``` -**NOTE**: It is important to regenerate the engine to get the max detection speed based on `pre-cluster-threshold` you set. - -**NOTE**: Lower `topk` values will result in more performance. - **NOTE**: Make sure to set `cluster-mode=2` in the config_infer file. ## ### INT8 calibration +**NOTE**: For now, Only for Darknet YOLO model. + #### 1. Install OpenCV ``` @@ -1123,7 +1086,7 @@ sudo apt-get install libopencv-dev deepstream-app -c deepstream_app_config.txt ``` -**NOTE**: NVIDIA recommends at least 500 images to get a good accuracy. On this example, I used 1000 images to get better accuracy (more images = more accuracy). Higher `INT8_CALIB_BATCH_SIZE` values will result in more accuracy and faster calibration speed. Set it according to you GPU memory. This process can take a long time. +**NOTE**: NVIDIA recommends at least 500 images to get a good accuracy. On this example, I recommend to use 1000 images to get better accuracy (more images = more accuracy). Higher `INT8_CALIB_BATCH_SIZE` values will result in more accuracy and faster calibration speed. Set it according to you GPU memory. This process may take a long time. ## diff --git a/config_infer_primary_ppyoloe.txt b/config_infer_primary_ppyoloe.txt index 99a096f..4060360 100644 --- a/config_infer_primary_ppyoloe.txt +++ b/config_infer_primary_ppyoloe.txt @@ -3,9 +3,8 @@ gpu-id=0 net-scale-factor=0.0173520735727919486 offsets=123.675;116.28;103.53 model-color-format=0 -custom-network-config=ppyoloe_crn_s_400e_coco.cfg -model-file=ppyoloe_crn_s_400e_coco.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=ppyoloe_crn_s_400e_coco.onnx +model-engine-file=ppyoloe_crn_s_400e_coco.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -17,11 +16,10 @@ process-mode=1 network-type=0 cluster-mode=2 maintain-aspect-ratio=0 -parse-bbox-func-name=NvDsInferParseYolo +parse-bbox-func-name=NvDsInferParseYoloE custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] -nms-iou-threshold=0.7 +nms-iou-threshold=0.45 pre-cluster-threshold=0.25 topk=300 diff --git a/config_infer_primary_ppyoloe_onnx.txt b/config_infer_primary_ppyoloe_onnx.txt deleted file mode 100644 index f5c0036..0000000 --- a/config_infer_primary_ppyoloe_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0173520735727919486 -offsets=123.675;116.28;103.53 -model-color-format=0 -onnx-file=ppyoloe_crn_s_400e_coco.onnx -model-engine-file=ppyoloe_crn_s_400e_coco.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=0 -parse-bbox-func-name=NvDsInferParse_PPYOLOE_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.7 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_ppyoloe_plus.txt b/config_infer_primary_ppyoloe_plus.txt index b7a6838..5b5b172 100644 --- a/config_infer_primary_ppyoloe_plus.txt +++ b/config_infer_primary_ppyoloe_plus.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=ppyoloe_plus_crn_s_80e_coco.cfg -model-file=ppyoloe_plus_crn_s_80e_coco.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=ppyoloe_plus_crn_s_80e_coco.onnx +model-engine-file=ppyoloe_plus_crn_s_80e_coco.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -16,11 +15,10 @@ process-mode=1 network-type=0 cluster-mode=2 maintain-aspect-ratio=0 -parse-bbox-func-name=NvDsInferParseYolo +parse-bbox-func-name=NvDsInferParseYoloE custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] -nms-iou-threshold=0.7 +nms-iou-threshold=0.45 pre-cluster-threshold=0.25 topk=300 diff --git a/config_infer_primary_ppyoloe_plus_onnx.txt b/config_infer_primary_ppyoloe_plus_onnx.txt deleted file mode 100644 index 0baa131..0000000 --- a/config_infer_primary_ppyoloe_plus_onnx.txt +++ /dev/null @@ -1,24 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0039215697906911373 -model-color-format=0 -onnx-file=ppyoloe_plus_crn_s_80e_coco.onnx -model-engine-file=ppyoloe_plus_crn_s_80e_coco.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=0 -parse-bbox-func-name=NvDsInferParse_PPYOLOE_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.7 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yoloV5.txt b/config_infer_primary_yoloV5.txt index 601ffb4..f294ef6 100644 --- a/config_infer_primary_yoloV5.txt +++ b/config_infer_primary_yoloV5.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=yolov5s.cfg -model-file=yolov5s.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolov5s.onnx +model-engine-file=yolov5s.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yoloV5_onnx.txt b/config_infer_primary_yoloV5_onnx.txt deleted file mode 100644 index a059d17..0000000 --- a/config_infer_primary_yoloV5_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0039215697906911373 -model-color-format=0 -onnx-file=yolov5s.onnx -model-engine-file=yolov5s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=1 -parse-bbox-func-name=NvDsInferParse_YOLO_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yoloV6.txt b/config_infer_primary_yoloV6.txt index ffeb800..98a487c 100644 --- a/config_infer_primary_yoloV6.txt +++ b/config_infer_primary_yoloV6.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=yolov6s.cfg -model-file=yolov6s.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolov6s.onnx +model-engine-file=yolov6s.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yoloV6_onnx.txt b/config_infer_primary_yoloV6_onnx.txt deleted file mode 100644 index 7b0dde6..0000000 --- a/config_infer_primary_yoloV6_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0039215697906911373 -model-color-format=0 -onnx-file=yolov6s.onnx -model-engine-file=yolov6s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=1 -parse-bbox-func-name=NvDsInferParse_YOLO_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yoloV7.txt b/config_infer_primary_yoloV7.txt index 0e35f08..1a16f1d 100644 --- a/config_infer_primary_yoloV7.txt +++ b/config_infer_primary_yoloV7.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=yolov7.cfg -model-file=yolov7.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolov7.onnx +model-engine-file=yolov7.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yoloV8.txt b/config_infer_primary_yoloV8.txt index 3214bd3..25fabd4 100644 --- a/config_infer_primary_yoloV8.txt +++ b/config_infer_primary_yoloV8.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=yolov8s.cfg -model-file=yolov8s.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolov8s.onnx +model-engine-file=yolov8s.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yoloV8_onnx.txt b/config_infer_primary_yoloV8_onnx.txt deleted file mode 100644 index 2d85b28..0000000 --- a/config_infer_primary_yoloV8_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0039215697906911373 -model-color-format=0 -onnx-file=yolov8s.onnx -model-engine-file=yolov8s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=1 -parse-bbox-func-name=NvDsInferParse_YOLOV8_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yolo_nas_onnx.txt b/config_infer_primary_yolo_nas_onnx.txt deleted file mode 100644 index 5364ad7..0000000 --- a/config_infer_primary_yolo_nas_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0039215697906911373 -model-color-format=0 -onnx-file=yolo_nas_s.onnx -model-engine-file=yolo_nas_s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=1 -parse-bbox-func-name=NvDsInferParse_YOLO_NAS_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yoloV7_onnx.txt b/config_infer_primary_yolonas.txt similarity index 74% rename from config_infer_primary_yoloV7_onnx.txt rename to config_infer_primary_yolonas.txt index c940736..fdf55b6 100644 --- a/config_infer_primary_yoloV7_onnx.txt +++ b/config_infer_primary_yolonas.txt @@ -2,8 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -onnx-file=yolov7.onnx -model-engine-file=yolov7.onnx_b1_gpu0_fp32.engine +onnx-file=yolo_nas_s_coco.onnx +model-engine-file=yolo_nas_s_coco.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -15,8 +15,8 @@ process-mode=1 network-type=0 cluster-mode=2 maintain-aspect-ratio=1 -symmetric-padding=1 -parse-bbox-func-name=NvDsInferParse_YOLO_ONNX +symmetric-padding=0 +parse-bbox-func-name=NvDsInferParseYoloE custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so [class-attrs-all] diff --git a/config_infer_primary_yolor.txt b/config_infer_primary_yolor.txt index 4e178de..4883e34 100644 --- a/config_infer_primary_yolor.txt +++ b/config_infer_primary_yolor.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0.0039215697906911373 model-color-format=0 -custom-network-config=yolor_csp.cfg -model-file=yolor_csp.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolor_csp.onnx +model-engine-file=yolor_csp.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yolox.txt b/config_infer_primary_yolox.txt index e006344..339b317 100644 --- a/config_infer_primary_yolox.txt +++ b/config_infer_primary_yolox.txt @@ -2,9 +2,8 @@ gpu-id=0 net-scale-factor=0 model-color-format=0 -custom-network-config=yolox_s.cfg -model-file=yolox_s.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolox_s.onnx +model-engine-file=yolox_s.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -19,7 +18,6 @@ maintain-aspect-ratio=1 symmetric-padding=0 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yolox_legacy.txt b/config_infer_primary_yolox_legacy.txt index 5c078ce..cc3c3b6 100644 --- a/config_infer_primary_yolox_legacy.txt +++ b/config_infer_primary_yolox_legacy.txt @@ -3,9 +3,8 @@ gpu-id=0 net-scale-factor=0.0173520735727919486 offsets=123.675;116.28;103.53 model-color-format=0 -custom-network-config=yolox_s.cfg -model-file=yolox_s.wts -model-engine-file=model_b1_gpu0_fp32.engine +onnx-file=yolox_s.onnx +model-engine-file=yolox_s.onnx_b1_gpu0_fp32.engine #int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 @@ -20,7 +19,6 @@ maintain-aspect-ratio=1 symmetric-padding=0 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so -engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] nms-iou-threshold=0.45 diff --git a/config_infer_primary_yolox_legacy_onnx.txt b/config_infer_primary_yolox_legacy_onnx.txt deleted file mode 100644 index 521a59c..0000000 --- a/config_infer_primary_yolox_legacy_onnx.txt +++ /dev/null @@ -1,26 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0.0173520735727919486 -offsets=123.675;116.28;103.53 -model-color-format=0 -onnx-file=yolox_s.onnx -model-engine-file=yolox_s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=0 -parse-bbox-func-name=NvDsInferParse_YOLOX_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/config_infer_primary_yolox_onnx.txt b/config_infer_primary_yolox_onnx.txt deleted file mode 100644 index a7120e3..0000000 --- a/config_infer_primary_yolox_onnx.txt +++ /dev/null @@ -1,25 +0,0 @@ -[property] -gpu-id=0 -net-scale-factor=0 -model-color-format=0 -onnx-file=yolox_s.onnx -model-engine-file=yolox_s.onnx_b1_gpu0_fp32.engine -#int8-calib-file=calib.table -labelfile-path=labels.txt -batch-size=1 -network-mode=0 -num-detected-classes=80 -interval=0 -gie-unique-id=1 -process-mode=1 -network-type=0 -cluster-mode=2 -maintain-aspect-ratio=1 -symmetric-padding=0 -parse-bbox-func-name=NvDsInferParse_YOLOX_ONNX -custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so - -[class-attrs-all] -nms-iou-threshold=0.45 -pre-cluster-threshold=0.25 -topk=300 diff --git a/docs/PPYOLOE.md b/docs/PPYOLOE.md index 4dc744d..478d61c 100644 --- a/docs/PPYOLOE.md +++ b/docs/PPYOLOE.md @@ -1,5 +1,7 @@ # PP-YOLOE / PP-YOLOE+ usage +**NOTE**: You can use the release/2.6 branch of the PPYOLOE repo to convert all model versions. + * [Convert model](#convert-model) * [Compile the lib](#compile-the-lib) * [Edit the config_infer_primary_ppyoloe_plus file](#edit-the-config_infer_primary_ppyoloe_plus-file) @@ -12,35 +14,36 @@ #### 1. Download the PaddleDetection repo and install the requirements -https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/docs/tutorials/INSTALL.md +https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/docs/tutorials/INSTALL.md **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_ppyoloe.py` file from `DeepStream-Yolo/utils` directory to the `PaddleDetection` folder. +Copy the `export_ppyoloe.py` file from `DeepStream-Yolo/utils` directory to the `PaddleDetection` folder. #### 3. Download the model -Download the `pdparams` file from [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe) releases (example for PP-YOLOE+_s) +Download the `pdparams` file from [PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe) releases (example for PP-YOLOE+_s) ``` wget https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`ppyoloe_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg` and `wts` files (example for PP-YOLOE+_s) +Generate the ONNX model file (example for PP-YOLOE+_s) ``` -python3 gen_wts_ppyoloe.py -w ppyoloe_plus_crn_s_80e_coco.pdparams -c configs/ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml +pip3 install onnx onnxsim onnxruntime +python3 export_ppyoloe.py -w ppyoloe_plus_crn_s_80e_coco.pdparams -c configs/ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml --simplify ``` #### 5. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -93,11 +96,13 @@ Edit the `config_infer_primary_ppyoloe_plus.txt` file according to your model (e ``` [property] ... -custom-network-config=ppyoloe_plus_crn_s_80e_coco.cfg -model-file=ppyoloe_plus_crn_s_80e_coco.wts +onnx-file=ppyoloe_plus_crn_s_80e_coco.onnx +model-engine-file=ppyoloe_plus_crn_s_80e_coco.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYoloE +... ``` **NOTE**: If you use the **legacy** model, you should edit the `config_infer_primary_ppyoloe.txt` file. diff --git a/docs/YOLONAS.md b/docs/YOLONAS.md new file mode 100644 index 0000000..14d2ff0 --- /dev/null +++ b/docs/YOLONAS.md @@ -0,0 +1,171 @@ +# YOLONAS usage + +**NOTE**: The yaml file is not required. + +* [Convert model](#convert-model) +* [Compile the lib](#compile-the-lib) +* [Edit the config_infer_primary_yolonas file](#edit-the-config_infer_primary_yolonas-file) +* [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) +* [Testing the model](#testing-the-model) + +## + +### Convert model + +#### 1. Download the YOLO-NAS repo and install the requirements + +``` +git clone https://github.com/Deci-AI/super-gradients.git +cd super-gradients +pip3 install -r requirements.txt +python3 setup.py install +pip3 install onnx onnxsim onnxruntime +``` + +**NOTE**: It is recommended to use Python virtualenv. + +#### 2. Copy conversor + +Copy the `export_yolonas.py` file from `DeepStream-Yolo/utils` directory to the `super-gradients` folder. + +#### 3. Download the model + +Download the `pth` file from [YOLO-NAS](https://sghub.deci.ai/) website (example for YOLO-NAS S) + +``` +wget https://sghub.deci.ai/models/yolo_nas_s_coco.pth +``` + +**NOTE**: You can use your custom model. + +#### 4. Convert model + +Generate the ONNX model file (example for YOLO-NAS S) + +``` +python3 export_yolonas.py -m yolo_nas_s -w yolo_nas_s_coco.pth --simplify +``` + +**NOTE**: Model names + +``` +-m yolo_nas_s +``` + +or + +``` +-m yolo_nas_m +``` + +or + +``` +-m yolo_nas_l +``` + +**NOTE**: To change the inference size (defaut: 640) + +``` +-s SIZE +--size SIZE +-s HEIGHT WIDTH +--size HEIGHT WIDTH +``` + +Example for 1280 + +``` +-s 1280 +``` + +or + +``` +-s 1280 1280 +``` + +#### 5. Copy generated files + +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. + +## + +### Compile the lib + +Open the `DeepStream-Yolo` folder and compile the lib + +* DeepStream 6.2 on x86 platform + + ``` + CUDA_VER=11.8 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.1.1 on x86 platform + + ``` + CUDA_VER=11.7 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.1 on x86 platform + + ``` + CUDA_VER=11.6 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on x86 platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.2 / 6.1.1 / 6.1 on Jetson platform + + ``` + CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo + ``` + +* DeepStream 6.0.1 / 6.0 on Jetson platform + + ``` + CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo + ``` + +## + +### Edit the config_infer_primary_yolonas file + +Edit the `config_infer_primary_yolonas.txt` file according to your model (example for YOLO-NAS S with 80 classes) + +``` +[property] +... +onnx-file=yolo_nas_s_coco.onnx +model-engine-file=yolo_nas_s_coco.onnx_b1_gpu0_fp32.engine +... +num-detected-classes=80 +... +parse-bbox-func-name=NvDsInferParseYoloE +... +``` + +## + +### Edit the deepstream_app_config file + +``` +... +[primary-gie] +... +config-file=config_infer_primary_yolonas.txt +``` + +## + +### Testing the model + +``` +deepstream-app -c deepstream_app_config.txt +``` + +**NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. diff --git a/docs/YOLOR.md b/docs/YOLOR.md index ec416b3..f4ece0a 100644 --- a/docs/YOLOR.md +++ b/docs/YOLOR.md @@ -1,8 +1,8 @@ # YOLOR usage -**NOTE**: You need to use the main branch of the YOLOR repo to convert the model. +**NOTE**: Select the correct branch of the YOLOR repo before the conversion. -**NOTE**: The cfg file is required. +**NOTE**: The cfg file is required for the main branch. * [Convert model](#convert-model) * [Compile the lib](#compile-the-lib) @@ -20,31 +20,71 @@ git clone https://github.com/WongKinYiu/yolor.git cd yolor pip3 install -r requirements.txt +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yolor.py` file from `DeepStream-Yolo/utils` directory to the `yolor` folder. +Copy the `export_yolor.py` file from `DeepStream-Yolo/utils` directory to the `yolor` folder. #### 3. Download the model Download the `pt` file from [YOLOR](https://github.com/WongKinYiu/yolor) repo. -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolor_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg` and `wts` files (example for YOLOR-CSP) +Generate the ONNX model file + +- Main branch + + Example for YOLOR-CSP + + ``` + python3 export_yolor.py -w yolor_csp.pt -c cfg/yolor_csp.cfg --simplify + ``` + +- Paper branch + + Example for YOLOR-P6 + + ``` + python3 export_yolor.py -w yolor-p6.pt --simplify + ``` + +**NOTE**: To convert a P6 model ``` -python3 gen_wts_yolor.py -w yolor_csp.pt -c cfg/yolor_csp.cfg +--p6 +``` + +**NOTE**: To change the inference size (defaut: 640) + +``` +-s SIZE +--size SIZE +-s HEIGHT WIDTH +--size HEIGHT WIDTH +``` + +Example for 1280 + +``` +-s 1280 +``` + +or + +``` +-s 1280 1280 ``` #### 5. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder +Copy the generated ONNX model file to the `DeepStream-Yolo` folder ## @@ -97,11 +137,13 @@ Edit the `config_infer_primary_yolor.txt` file according to your model (example ``` [property] ... -custom-network-config=yolor_csp.cfg -model-file=yolor_csp.wts +onnx-file=yolor_csp.onnx +model-engine-file=yolor_csp.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` ## diff --git a/docs/YOLOX.md b/docs/YOLOX.md index d1f3337..4571c2f 100644 --- a/docs/YOLOX.md +++ b/docs/YOLOX.md @@ -1,5 +1,7 @@ # YOLOX usage +**NOTE**: You can use the main branch of the YOLOX repo to convert all model versions. + **NOTE**: The yaml file is not required. * [Convert model](#convert-model) @@ -18,13 +20,15 @@ git clone https://github.com/Megvii-BaseDetection/YOLOX.git cd YOLOX pip3 install -r requirements.txt +python3 setup.py develop +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yolox.py` file from `DeepStream-Yolo/utils` directory to the `YOLOX` folder. +Copy the `export_yolox.py` file from `DeepStream-Yolo/utils` directory to the `YOLOX` folder. #### 3. Download the model @@ -34,19 +38,19 @@ Download the `pth` file from [YOLOX](https://github.com/Megvii-BaseDetection/YOL wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolox_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg` and `wts` files (example for YOLOX-s standard) +Generate the ONNX model file (example for YOLOX-s standard) ``` -python3 gen_wts_yolox.py -w yolox_s.pth -e exps/default/yolox_s.py +python3 export_yolox.py -w yolox_s.pth -c exps/default/yolox_s.py --simplify ``` #### 5. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -99,11 +103,13 @@ Edit the `config_infer_primary_yolox.txt` file according to your model (example ``` [property] ... -custom-network-config=yolox_s.cfg -model-file=yolox_s.wts +onnx-file=yolox_s.onnx +model-engine-file=yolox_s.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` **NOTE**: If you use the **legacy** model, you should edit the `config_infer_primary_yolox_legacy.txt` file. diff --git a/docs/YOLOv5.md b/docs/YOLOv5.md index ee7c7b7..bdd6c0a 100644 --- a/docs/YOLOv5.md +++ b/docs/YOLOv5.md @@ -1,6 +1,6 @@ # YOLOv5 usage -**NOTE**: You can use the main branch of the YOLOv5 repo to convert all model versions. +**NOTE**: You can use the master branch of the YOLOv5 repo to convert all model versions. **NOTE**: The yaml file is not required. @@ -20,30 +20,31 @@ git clone https://github.com/ultralytics/yolov5.git cd yolov5 pip3 install -r requirements.txt +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yoloV5.py` file from `DeepStream-Yolo/utils` directory to the `yolov5` folder. +Copy the `export_yoloV5.py` file from `DeepStream-Yolo/utils` directory to the `yolov5` folder. #### 3. Download the model -Download the `pt` file from [YOLOv5](https://github.com/ultralytics/yolov5/releases/) releases (example for YOLOv5s 6.1) +Download the `pt` file from [YOLOv5](https://github.com/ultralytics/yolov5/releases/) releases (example for YOLOv5s 7.0) ``` -wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt +wget https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov5_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg` and `wts` files (example for YOLOv5s) +Generate the ONNX model file (example for YOLOv5s) ``` -python3 gen_wts_yoloV5.py -w yolov5s.pt +python3 export_yoloV5.py -w yolov5s.pt --simplify ``` **NOTE**: To convert a P6 model @@ -75,7 +76,7 @@ or #### 5. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -128,11 +129,13 @@ Edit the `config_infer_primary_yoloV5.txt` file according to your model (example ``` [property] ... -custom-network-config=yolov5s.cfg -model-file=yolov5s.wts +onnx-file=yolov5s.onnx +model-engine-file=yolov5s.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` ## diff --git a/docs/YOLOv6.md b/docs/YOLOv6.md index 4f46261..e0c3ef9 100644 --- a/docs/YOLOv6.md +++ b/docs/YOLOv6.md @@ -18,13 +18,14 @@ git clone https://github.com/meituan/YOLOv6.git cd YOLOv6 pip3 install -r requirements.txt +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `YOLOv6` folder. +Copy the `export_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `YOLOv6` folder. #### 3. Download the model @@ -34,14 +35,14 @@ Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/ wget https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s.pt ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov6_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg` and `wts` files (example for YOLOv6-S 3.0) +Generate the ONNX model file (example for YOLOv6-S 3.0) ``` -python3 gen_wts_yoloV6.py -w yolov6s.pt +python3 export_yoloV6.py -w yolov6s.pt --simplify ``` **NOTE**: To convert a P6 model @@ -73,7 +74,7 @@ or #### 5. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -126,11 +127,13 @@ Edit the `config_infer_primary_yoloV6.txt` file according to your model (example ``` [property] ... -custom-network-config=yolov6s.cfg -model-file=yolov6s.wts +onnx-file=yolov6s.onnx +model-engine-file=yolov6s.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` ## diff --git a/docs/YOLOv7.md b/docs/YOLOv7.md index 4274e77..e5bbb66 100644 --- a/docs/YOLOv7.md +++ b/docs/YOLOv7.md @@ -18,13 +18,14 @@ git clone https://github.com/WongKinYiu/yolov7.git cd yolov7 pip3 install -r requirements.txt +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yoloV7.py` file from `DeepStream-Yolo/utils` directory to the `yolov7` folder. +Copy the `export_yoloV7.py` file from `DeepStream-Yolo/utils` directory to the `yolov7` folder. #### 3. Download the model @@ -34,18 +35,18 @@ Download the `pt` file from [YOLOv7](https://github.com/WongKinYiu/yolov7/releas wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov7_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Reparameterize your model -[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/) and it's variants can't be directly converted to engine file. Therefore, you will have to reparameterize your model using the code [here](https://github.com/WongKinYiu/yolov7/blob/main/tools/reparameterization.ipynb). Make sure to convert your checkpoints in yolov7 repository, and then save your reparmeterized checkpoints for conversion in the next step. +[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/) and its variants cannot be directly converted to engine file. Therefore, you will have to reparameterize your model using the code [here](https://github.com/WongKinYiu/yolov7/blob/main/tools/reparameterization.ipynb). Make sure to convert your custom checkpoints in yolov7 repository, and then save your reparmeterized checkpoints for conversion in the next step. #### 5. Convert model -Generate the `cfg` and `wts` files (example for YOLOv7) +Generate the ONNX model file (example for YOLOv7) ``` -python3 gen_wts_yoloV7.py -w yolov7.pt +python3 export_yoloV7.py -w yolov7.pt --simplify ``` **NOTE**: To convert a P6 model @@ -77,7 +78,7 @@ or #### 6. Copy generated files -Copy the generated `cfg` and `wts` files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -130,11 +131,13 @@ Edit the `config_infer_primary_yoloV7.txt` file according to your model (example ``` [property] ... -custom-network-config=yolov7.cfg -model-file=yolov7.wts +onnx-file=yolov7.onnx +model-engine-file=yolov7.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` ## diff --git a/docs/YOLOv8.md b/docs/YOLOv8.md index b6e5152..0ebce79 100644 --- a/docs/YOLOv8.md +++ b/docs/YOLOv8.md @@ -18,13 +18,15 @@ git clone https://github.com/ultralytics/ultralytics.git cd ultralytics pip3 install -r requirements.txt +python3 setup.py install +pip3 install onnx onnxsim onnxruntime ``` **NOTE**: It is recommended to use Python virtualenv. #### 2. Copy conversor -Copy the `gen_wts_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. +Copy the `export_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. #### 3. Download the model @@ -34,14 +36,14 @@ Download the `pt` file from [YOLOv8](https://github.com/ultralytics/assets/relea wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt ``` -**NOTE**: You can use your custom model, but it is important to keep the YOLO model reference (`yolov8_`) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +**NOTE**: You can use your custom model. #### 4. Convert model -Generate the `cfg`, `wts` and `labels.txt` (if available) files (example for YOLOv8s) +Generate the ONNX model file (example for YOLOv8s) ``` -python3 gen_wts_yoloV8.py -w yolov8s.pt +python3 export_yoloV8.py -w yolov8s.pt --simplify ``` **NOTE**: To change the inference size (defaut: 640) @@ -67,7 +69,7 @@ or #### 5. Copy generated files -Copy the generated `cfg`, `wts` and `labels.txt` (if generated), files to the `DeepStream-Yolo` folder. +Copy the generated ONNX model file to the `DeepStream-Yolo` folder. ## @@ -120,11 +122,13 @@ Edit the `config_infer_primary_yoloV8.txt` file according to your model (example ``` [property] ... -custom-network-config=yolov8s.cfg -model-file=yolov8s.wts +onnx-file=yolov8s.onnx +model-engine-file=yolov8s.onnx_b1_gpu0_fp32.engine ... num-detected-classes=80 ... +parse-bbox-func-name=NvDsInferParseYolo +... ``` ## diff --git a/docs/customModels.md b/docs/customModels.md index d79ca5e..f1e8a3a 100644 --- a/docs/customModels.md +++ b/docs/customModels.md @@ -19,9 +19,7 @@ cd DeepStream-Yolo #### 2. Copy the class names file to DeepStream-Yolo folder and remane it to `labels.txt` -#### 3. Copy the `cfg` and `weights`/`wts` files to DeepStream-Yolo folder - -**NOTE**: It is important to keep the YOLO model reference (`yolov4_`, `yolov5_`, `yolor_`, etc) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +#### 3. Copy the `onnx` or `cfg` and `weights` files to DeepStream-Yolo folder ## @@ -189,24 +187,25 @@ To understand and edit `config_infer_primary.txt` file, read the [DeepStream Plu model-color-format=0 ``` - **NOTE**: Set it according to the number of channels in the `cfg` file (1=GRAYSCALE, 3=RGB). + **NOTE**: Set it according to the number of channels in the `cfg` file (1=GRAYSCALE, 3=RGB for Darknet YOLO) or your model configuration (ONNX). -* custom-network-config +* custom-network-config and model-file (Darknet YOLO) * Example for custom YOLOv4 model ``` custom-network-config=yolov4_custom.cfg - ``` - -* model-file - - * Example for custom YOLOv4 model - - ``` model-file=yolov4_custom.weights ``` +* onnx-file (ONNX) + + * Example for custom YOLOv8 model + + ``` + onnx-file=yolov8s_custom.onnx + ``` + * model-engine-file * Example for `batch-size=1` and `network-mode=2` @@ -233,7 +232,7 @@ To understand and edit `config_infer_primary.txt` file, read the [DeepStream Plu model-engine-file=model_b2_gpu0_fp32.engine ``` - **NOTE**: To change the generated engine filename, you need to edit and rebuild the `nvdsinfer_model_builder.cpp` file (`/opt/nvidia/deepstream/deepstream/sources/libs/nvdsinfer/nvdsinfer_model_builder.cpp`, lines 825-827) + **NOTE**: To change the generated engine filename (Darknet YOLO), you need to edit and rebuild the `nvdsinfer_model_builder.cpp` file (`/opt/nvidia/deepstream/deepstream/sources/libs/nvdsinfer/nvdsinfer_model_builder.cpp`, lines 825-827) ``` suggestedPathName = @@ -260,7 +259,7 @@ To understand and edit `config_infer_primary.txt` file, read the [DeepStream Plu num-detected-classes=80 ``` - **NOTE**: Set it according to number of classes in `cfg` file. + **NOTE**: Set it according to number of classes in `cfg` file (Darknet YOLO) or your model configuration (ONNX). * interval diff --git a/docs/multipleGIEs.md b/docs/multipleGIEs.md index 184cdce..511b4b5 100644 --- a/docs/multipleGIEs.md +++ b/docs/multipleGIEs.md @@ -26,9 +26,7 @@ cd DeepStream-Yolo #### 3. Copy the class names file to each GIE folder and remane it to `labels.txt` -#### 4. Copy the `cfg` and `weights`/`wts` files to each GIE folder - -**NOTE**: It is important to keep the YOLO model reference (`yolov4_`, `yolov5_`, `yolor_`, etc) in you `cfg` and `weights`/`wts` filenames to generate the engine correctly. +#### 4. Copy the `onnx` or `cfg` and `weights` files to each GIE folder ## @@ -92,22 +90,36 @@ const char* YOLOLAYER_PLUGIN_VERSION {"2"}; ### Edit the config_infer_primary files -**NOTE**: Edit the files according to the model you will use (YOLOv4, YOLOv5, YOLOR, etc). +**NOTE**: Edit the files according to the model you will use (YOLOv8, YOLOv5, YOLOv4, etc). **NOTE**: Do it for each GIE folder. * Edit the path of the `cfg` file - Example for gie1 + Example for gie1 (Darknet YOLO) ``` custom-network-config=gie1/yolo.cfg - ``` + model-file=yolo.weights + ``` - Example for gie2 + Example for gie2 (Darknet YOLO) ``` custom-network-config=gie2/yolo.cfg + model-file=yolo.weights + ``` + + Example for gie1 (ONNX) + + ``` + onnx-file=yolo.onnx + ``` + + Example for gie2 (ONNX) + + ``` + onnx-file=yolo.onnx ``` * Edit the gie-unique-id diff --git a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp index 084b22b..0b1fce2 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp @@ -10,7 +10,7 @@ nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input, + std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { nvinfer1::ITensor* output; @@ -26,41 +26,21 @@ batchnormLayer(int layerIdx, std::map& block, std::vec std::vector bnRunningMean; std::vector bnRunningVar; - if (weightsType == "weights") { - for (int i = 0; i < filters; ++i) { - bnBiases.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnWeights.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningMean.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); - ++weightPtr; - } + for (int i = 0; i < filters; ++i) { + bnBiases.push_back(weights[weightPtr]); + ++weightPtr; } - else { - for (int i = 0; i < filters; ++i) { - bnWeights.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnBiases.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningMean.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningVar.push_back(sqrt(weights[weightPtr] + eps)); - ++weightPtr; - } + for (int i = 0; i < filters; ++i) { + bnWeights.push_back(weights[weightPtr]); + ++weightPtr; + } + for (int i = 0; i < filters; ++i) { + bnRunningMean.push_back(weights[weightPtr]); + ++weightPtr; + } + for (int i = 0; i < filters; ++i) { + bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); + ++weightPtr; } int size = filters; diff --git a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h index c3bfffc..fda7fd8 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h @@ -14,7 +14,7 @@ #include "activation_layer.h" nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input, + std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); #endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.cpp deleted file mode 100644 index c0cf780..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "c2f_layer.h" - -#include - -#include "convolutional_layer.h" - -nvinfer1::ITensor* -c2fLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "c2f"); - assert(block.find("n") != block.end()); - assert(block.find("shortcut") != block.end()); - assert(block.find("filters") != block.end()); - - int n = std::stoi(block.at("n")); - bool shortcut = (block.at("shortcut") == "1"); - int filters = std::stoi(block.at("filters")); - - nvinfer1::Dims inputDims = input->getDimensions(); - - nvinfer1::ISliceLayer* sliceLt = network->addSlice(*input,nvinfer1::Dims{3, {0, 0, 0}}, - nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}}); - assert(sliceLt != nullptr); - std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx); - sliceLt->setName(sliceLtLayerName.c_str()); - nvinfer1::ITensor* lt = sliceLt->getOutput(0); - - nvinfer1::ISliceLayer* sliceRb = network->addSlice(*input,nvinfer1::Dims{3, {inputDims.d[0] / 2, 0, 0}}, - nvinfer1::Dims{3, {inputDims.d[0] / 2, inputDims.d[1], inputDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}}); - assert(sliceRb != nullptr); - std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx); - sliceRb->setName(sliceRbLayerName.c_str()); - nvinfer1::ITensor* rb = sliceRb->getOutput(0); - - std::vector concatInputs; - concatInputs.push_back(lt); - concatInputs.push_back(rb); - output = rb; - - for (int i = 0; i < n; ++i) { - std::string cv1MlayerName = "c2f_1_" + std::to_string(i + 1) + "_"; - nvinfer1::ITensor* cv1M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps, - output, network, cv1MlayerName); - assert(cv1M != nullptr); - - std::string cv2MlayerName = "c2f_2_" + std::to_string(i + 1) + "_"; - nvinfer1::ITensor* cv2M = convolutionalLayer(layerIdx, block, weights, trtWeights, weightPtr, weightsType, filters, eps, - cv1M, network, cv2MlayerName); - assert(cv2M != nullptr); - - if (shortcut) { - nvinfer1::IElementWiseLayer* ew = network->addElementWise(*output, *cv2M, nvinfer1::ElementWiseOperation::kSUM); - assert(ew != nullptr); - std::string ewLayerName = "shortcut_c2f_" + std::to_string(i + 1) + "_" + std::to_string(layerIdx); - ew->setName(ewLayerName.c_str()); - output = ew->getOutput(0); - concatInputs.push_back(output); - } - else { - output = cv2M; - concatInputs.push_back(output); - } - } - - nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); - assert(concat != nullptr); - std::string concatLayerName = "route_" + std::to_string(layerIdx); - concat->setName(concatLayerName.c_str()); - concat->setAxis(0); - output = concat->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.h b/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.h deleted file mode 100644 index 28f373f..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/c2f_layer.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __C2F_LAYER_H__ -#define __C2F_LAYER_H__ - -#include -#include - -#include "NvInfer.h" - -nvinfer1::ITensor* c2fLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, float eps, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp deleted file mode 100644 index 4a6a93b..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "cls_layer.h" - -#include - -nvinfer1::ITensor* -clsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "cls"); - - nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); - assert(shuffle != nullptr); - std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); - shuffle->setName(shuffleLayerName.c_str()); - nvinfer1::Permutation permutation; - permutation.order[0] = 1; - permutation.order[1] = 0; - shuffle->setFirstTranspose(permutation); - output = shuffle->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h b/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h deleted file mode 100644 index 3179590..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __CLS_LAYER_H__ -#define __CLS_LAYER_H__ - -#include - -#include "NvInfer.h" - -nvinfer1::ITensor* clsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp index bdec987..65fc65a 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp @@ -10,8 +10,8 @@ nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName) + std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network, std::string layerName) { nvinfer1::ITensor* output; @@ -58,117 +58,60 @@ convolutionalLayer(int layerIdx, std::map& block, std: nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias}; - if (weightsType == "weights") { - if (batchNormalize == false) { - float* val; - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - trtWeights.push_back(convBias); - } - val = new float[size]; - for (int i = 0; i < size; ++i) { + if (batchNormalize == false) { + float* val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; ++weightPtr; } - convWt.values = val; - trtWeights.push_back(convWt); + convBias.values = val; + trtWeights.push_back(convBias); } - else { - for (int i = 0; i < filters; ++i) { - bnBiases.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnWeights.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningMean.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); - ++weightPtr; - } - float* val; - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - } - val = new float[size]; - for (int i = 0; i < size; ++i) { + val = new float[size]; + for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; ++weightPtr; - } - convWt.values = val; - trtWeights.push_back(convWt); - if (bias != 0) - trtWeights.push_back(convBias); } + convWt.values = val; + trtWeights.push_back(convWt); } else { - if (batchNormalize == false) { - float* val = new float[size]; - for (int i = 0; i < size; ++i) { + for (int i = 0; i < filters; ++i) { + bnBiases.push_back(weights[weightPtr]); + ++weightPtr; + } + for (int i = 0; i < filters; ++i) { + bnWeights.push_back(weights[weightPtr]); + ++weightPtr; + } + for (int i = 0; i < filters; ++i) { + bnRunningMean.push_back(weights[weightPtr]); + ++weightPtr; + } + for (int i = 0; i < filters; ++i) { + bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); + ++weightPtr; + } + float* val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; ++weightPtr; } - convWt.values = val; - trtWeights.push_back(convWt); - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - trtWeights.push_back(convBias); - } + convBias.values = val; } - else { - float* val = new float[size]; - for (int i = 0; i < size; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convWt.values = val; - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - } - for (int i = 0; i < filters; ++i) { - bnWeights.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnBiases.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningMean.push_back(weights[weightPtr]); - ++weightPtr; - } - for (int i = 0; i < filters; ++i) { - bnRunningVar.push_back(sqrt(weights[weightPtr] + eps)); - ++weightPtr; - } - trtWeights.push_back(convWt); - if (bias != 0) - trtWeights.push_back(convBias); + val = new float[size]; + for (int i = 0; i < size; ++i) { + val[i] = weights[weightPtr]; + ++weightPtr; } + convWt.values = val; + trtWeights.push_back(convWt); + if (bias != 0) + trtWeights.push_back(convBias); } nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h index 4652bcb..7329eb9 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h @@ -14,7 +14,7 @@ #include "activation_layer.h" nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, float eps, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = ""); + std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network, std::string layerName = ""); #endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp index 79d2f90..5c6db36 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp @@ -9,8 +9,8 @@ nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName) + std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network, std::string layerName) { nvinfer1::ITensor* output; @@ -47,43 +47,23 @@ deconvolutionalLayer(int layerIdx, std::map& block, st nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias}; - if (weightsType == "weights") { - float* val; - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - trtWeights.push_back(convBias); - } - val = new float[size]; - for (int i = 0; i < size; ++i) { + float* val; + if (bias != 0) { + val = new float[filters]; + for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; ++weightPtr; } - convWt.values = val; - trtWeights.push_back(convWt); + convBias.values = val; + trtWeights.push_back(convBias); } - else { - float* val = new float[size]; - for (int i = 0; i < size; ++i) { + val = new float[size]; + for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; ++weightPtr; - } - convWt.values = val; - trtWeights.push_back(convWt); - if (bias != 0) { - val = new float[filters]; - for (int i = 0; i < filters; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - convBias.values = val; - trtWeights.push_back(convBias); - } } + convWt.values = val; + trtWeights.push_back(convWt); nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias); diff --git a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h index 886a43e..ae46e94 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h @@ -12,7 +12,7 @@ #include "NvInfer.h" nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, std::string weightsType, int& inputChannels, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = ""); + std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network, std::string layerName = ""); #endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.cpp deleted file mode 100644 index ed3e7ad..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "detect_v8_layer.h" - -#include - -nvinfer1::ITensor* -detectV8Layer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "detect_v8"); - assert(block.find("num") != block.end()); - assert(block.find("classes") != block.end()); - - int num = std::stoi(block.at("num")); - int classes = std::stoi(block.at("classes")); - int reg_max = num / 4; - - nvinfer1::Dims inputDims = input->getDimensions(); - - nvinfer1::ISliceLayer* sliceBox = network->addSlice(*input, nvinfer1::Dims{2, {0, 0}}, - nvinfer1::Dims{2, {num, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceBox != nullptr); - std::string sliceBoxLayerName = "slice_box_" + std::to_string(layerIdx); - sliceBox->setName(sliceBoxLayerName.c_str()); - nvinfer1::ITensor* box = sliceBox->getOutput(0); - - nvinfer1::ISliceLayer* sliceCls = network->addSlice(*input, nvinfer1::Dims{2, {num, 0}}, - nvinfer1::Dims{2, {classes, inputDims.d[1]}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceCls != nullptr); - std::string sliceClsLayerName = "slice_cls_" + std::to_string(layerIdx); - sliceCls->setName(sliceClsLayerName.c_str()); - nvinfer1::ITensor* cls = sliceCls->getOutput(0); - - nvinfer1::IShuffleLayer* shuffle1Box = network->addShuffle(*box); - assert(shuffle1Box != nullptr); - std::string shuffle1BoxLayerName = "shuffle1_box_" + std::to_string(layerIdx); - shuffle1Box->setName(shuffle1BoxLayerName.c_str()); - nvinfer1::Dims reshape1Dims = {3, {4, reg_max, inputDims.d[1]}}; - shuffle1Box->setReshapeDimensions(reshape1Dims); - nvinfer1::Permutation permutation1Box; - permutation1Box.order[0] = 1; - permutation1Box.order[1] = 0; - permutation1Box.order[2] = 2; - shuffle1Box->setSecondTranspose(permutation1Box); - box = shuffle1Box->getOutput(0); - - nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*box); - assert(softmax != nullptr); - std::string softmaxLayerName = "softmax_box_" + std::to_string(layerIdx); - softmax->setName(softmaxLayerName.c_str()); - softmax->setAxes(1 << 0); - box = softmax->getOutput(0); - - nvinfer1::Weights dflWt {nvinfer1::DataType::kFLOAT, nullptr, reg_max}; - - float* val = new float[reg_max]; - for (int i = 0; i < reg_max; ++i) { - val[i] = i; - } - dflWt.values = val; - - nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*box, 1, nvinfer1::Dims{2, {1, 1}}, dflWt, - nvinfer1::Weights{}); - assert(conv != nullptr); - std::string convLayerName = "conv_box_" + std::to_string(layerIdx); - conv->setName(convLayerName.c_str()); - conv->setStrideNd(nvinfer1::Dims{2, {1, 1}}); - conv->setPaddingNd(nvinfer1::Dims{2, {0, 0}}); - box = conv->getOutput(0); - - nvinfer1::IShuffleLayer* shuffle2Box = network->addShuffle(*box); - assert(shuffle2Box != nullptr); - std::string shuffle2BoxLayerName = "shuffle2_box_" + std::to_string(layerIdx); - shuffle2Box->setName(shuffle2BoxLayerName.c_str()); - nvinfer1::Dims reshape2Dims = {2, {4, inputDims.d[1]}}; - shuffle2Box->setReshapeDimensions(reshape2Dims); - box = shuffle2Box->getOutput(0); - - nvinfer1::Dims shuffle2BoxDims = box->getDimensions(); - - nvinfer1::ISliceLayer* sliceLtBox = network->addSlice(*box, nvinfer1::Dims{2, {0, 0}}, - nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceLtBox != nullptr); - std::string sliceLtBoxLayerName = "slice_lt_box_" + std::to_string(layerIdx); - sliceLtBox->setName(sliceLtBoxLayerName.c_str()); - nvinfer1::ITensor* lt = sliceLtBox->getOutput(0); - - nvinfer1::ISliceLayer* sliceRbBox = network->addSlice(*box, nvinfer1::Dims{2, {2, 0}}, - nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceRbBox != nullptr); - std::string sliceRbBoxLayerName = "slice_rb_box_" + std::to_string(layerIdx); - sliceRbBox->setName(sliceRbBoxLayerName.c_str()); - nvinfer1::ITensor* rb = sliceRbBox->getOutput(0); - - int channels = 2 * shuffle2BoxDims.d[1]; - nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels}; - val = new float[channels]; - for (int i = 0; i < channels; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - anchorPointsWt.values = val; - trtWeights.push_back(anchorPointsWt); - - nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {2, shuffle2BoxDims.d[1]}}, - anchorPointsWt); - assert(anchorPoints != nullptr); - std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx); - anchorPoints->setName(anchorPointsLayerName.c_str()); - nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0); - - nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt, - nvinfer1::ElementWiseOperation::kSUB); - assert(x1y1 != nullptr); - std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx); - x1y1->setName(x1y1LayerName.c_str()); - nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0); - - nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor, - nvinfer1::ElementWiseOperation::kSUM); - assert(x2y2 != nullptr); - std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx); - x2y2->setName(x2y2LayerName.c_str()); - nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0); - - std::vector concatBoxInputs; - concatBoxInputs.push_back(x1y1Tensor); - concatBoxInputs.push_back(x2y2Tensor); - - nvinfer1::IConcatenationLayer* concatBox = network->addConcatenation(concatBoxInputs.data(), concatBoxInputs.size()); - assert(concatBox != nullptr); - std::string concatBoxLayerName = "concat_box_" + std::to_string(layerIdx); - concatBox->setName(concatBoxLayerName.c_str()); - concatBox->setAxis(0); - box = concatBox->getOutput(0); - - channels = shuffle2BoxDims.d[1]; - nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels}; - val = new float[channels]; - for (int i = 0; i < channels; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - stridePointsWt.values = val; - trtWeights.push_back(stridePointsWt); - - nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {1, shuffle2BoxDims.d[1]}}, - stridePointsWt); - assert(stridePoints != nullptr); - std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx); - stridePoints->setName(stridePointsLayerName.c_str()); - nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0); - - nvinfer1::IElementWiseLayer* pred = network->addElementWise(*box, *stridePointsTensor, - nvinfer1::ElementWiseOperation::kPROD); - assert(pred != nullptr); - std::string predLayerName = "pred_" + std::to_string(layerIdx); - pred->setName(predLayerName.c_str()); - box = pred->getOutput(0); - - nvinfer1::IActivationLayer* sigmoid = network->addActivation(*cls, nvinfer1::ActivationType::kSIGMOID); - assert(sigmoid != nullptr); - std::string sigmoidLayerName = "sigmoid_cls_" + std::to_string(layerIdx); - sigmoid->setName(sigmoidLayerName.c_str()); - cls = sigmoid->getOutput(0); - - std::vector concatInputs; - concatInputs.push_back(box); - concatInputs.push_back(cls); - - nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); - assert(concat != nullptr); - std::string concatLayerName = "concat_" + std::to_string(layerIdx); - concat->setName(concatLayerName.c_str()); - concat->setAxis(0); - output = concat->getOutput(0); - - nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*output); - assert(shuffle != nullptr); - std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); - shuffle->setName(shuffleLayerName.c_str()); - nvinfer1::Permutation permutation; - permutation.order[0] = 1; - permutation.order[1] = 0; - shuffle->setFirstTranspose(permutation); - output = shuffle->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.h b/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.h deleted file mode 100644 index 9cd9443..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/detect_v8_layer.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __DETECT_V8_LAYER_H__ -#define __DETECT_V8_LAYER_H__ - -#include -#include - -#include "NvInfer.h" - -nvinfer1::ITensor* detectV8Layer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp index 5553ac7..b02eba9 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp @@ -13,7 +13,7 @@ implicitLayer(int layerIdx, std::map& block, std::vect { nvinfer1::ITensor* output; - assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); + assert(block.at("type") == "implicit" || block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); assert(block.find("filters") != block.end()); int filters = std::stoi(block.at("filters")); diff --git a/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp index 7ebe0f3..e5e8c99 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp @@ -14,9 +14,10 @@ poolingLayer(int layerIdx, std::map& block, nvinfer1:: { nvinfer1::ITensor* output; - assert(block.at("type") == "maxpool" || block.at("type") == "avgpool"); + assert(block.at("type") == "max" || block.at("type") == "maxpool" || block.at("type") == "avg" || + block.at("type") == "avgpool"); - if (block.at("type") == "maxpool") { + if (block.at("type") == "max" || block.at("type") == "maxpool") { assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); @@ -36,7 +37,7 @@ poolingLayer(int layerIdx, std::map& block, nvinfer1:: } output = maxpool->getOutput(0); } - else if (block.at("type") == "avgpool") { + else if (block.at("type") == "avg" || block.at("type") == "avgpool") { nvinfer1::Dims inputDims = input->getDimensions(); nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}}); diff --git a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp deleted file mode 100644 index 9d91178..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "reduce_layer.h" - -nvinfer1::ITensor* -reduceLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "reduce"); - assert(block.find("mode") != block.end()); - assert(block.find("axes") != block.end()); - - std::string mode = block.at("mode"); - - nvinfer1::ReduceOperation operation; - if (mode == "mean") - operation = nvinfer1::ReduceOperation::kAVG; - - std::string strAxes = block.at("axes"); - std::vector axes; - size_t lastPos = 0, pos = 0; - while ((pos = strAxes.find(',', lastPos)) != std::string::npos) { - int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos))); - axes.push_back(vL); - lastPos = pos + 1; - } - if (lastPos < strAxes.length()) { - std::string lastV = trim(strAxes.substr(lastPos)); - if (!lastV.empty()) - axes.push_back(std::stoi(lastV)); - } - assert(!axes.empty()); - - uint32_t axisMask = 0; - for (int axis : axes) - axisMask |= 1 << axis; - - bool keepDims = false; - if (block.find("keep") != block.end()) - keepDims = std::stoi(block.at("keep")) == 1 ? true : false; - - nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims); - assert(reduce != nullptr); - std::string reduceLayerName = "reduce_" + std::to_string(layerIdx); - reduce->setName(reduceLayerName.c_str()); - output = reduce->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h b/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h deleted file mode 100644 index e68bca2..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __REDUCE_LAYER_H__ -#define __REDUCE_LAYER_H__ - -#include "../utils.h" - -nvinfer1::ITensor* reduceLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp deleted file mode 100644 index 7d339e2..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "reg_layer.h" - -#include - -nvinfer1::ITensor* -regLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "reg"); - - nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); - assert(shuffle != nullptr); - std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); - shuffle->setName(shuffleLayerName.c_str()); - nvinfer1::Permutation permutation; - permutation.order[0] = 1; - permutation.order[1] = 0; - shuffle->setFirstTranspose(permutation); - output = shuffle->getOutput(0); - nvinfer1::Dims shuffleDims = output->getDimensions(); - - nvinfer1::ISliceLayer* sliceLt = network->addSlice(*output, nvinfer1::Dims{2, {0, 0}}, - nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceLt != nullptr); - std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx); - sliceLt->setName(sliceLtLayerName.c_str()); - nvinfer1::ITensor* lt = sliceLt->getOutput(0); - - nvinfer1::ISliceLayer* sliceRb = network->addSlice(*output, nvinfer1::Dims{2, {0, 2}}, - nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); - assert(sliceRb != nullptr); - std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx); - sliceRb->setName(sliceRbLayerName.c_str()); - nvinfer1::ITensor* rb = sliceRb->getOutput(0); - - int channels = shuffleDims.d[0] * 2; - nvinfer1::Weights anchorPointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels}; - float* val = new float[channels]; - for (int i = 0; i < channels; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - anchorPointsWt.values = val; - trtWeights.push_back(anchorPointsWt); - - nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt); - assert(anchorPoints != nullptr); - std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx); - anchorPoints->setName(anchorPointsLayerName.c_str()); - nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0); - - nvinfer1::IElementWiseLayer* x1y1 = network->addElementWise(*anchorPointsTensor, *lt, - nvinfer1::ElementWiseOperation::kSUB); - assert(x1y1 != nullptr); - std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx); - x1y1->setName(x1y1LayerName.c_str()); - nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0); - - nvinfer1::IElementWiseLayer* x2y2 = network->addElementWise(*rb, *anchorPointsTensor, - nvinfer1::ElementWiseOperation::kSUM); - assert(x2y2 != nullptr); - std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx); - x2y2->setName(x2y2LayerName.c_str()); - nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0); - - std::vector concatInputs; - concatInputs.push_back(x1y1Tensor); - concatInputs.push_back(x2y2Tensor); - - nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); - assert(concat != nullptr); - std::string concatLayerName = "concat_" + std::to_string(layerIdx); - concat->setName(concatLayerName.c_str()); - concat->setAxis(1); - output = concat->getOutput(0); - - channels = shuffleDims.d[0]; - nvinfer1::Weights stridePointsWt {nvinfer1::DataType::kFLOAT, nullptr, channels}; - val = new float[channels]; - for (int i = 0; i < channels; ++i) { - val[i] = weights[weightPtr]; - ++weightPtr; - } - stridePointsWt.values = val; - trtWeights.push_back(stridePointsWt); - - nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt); - assert(stridePoints != nullptr); - std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx); - stridePoints->setName(stridePointsLayerName.c_str()); - nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0); - - nvinfer1::IElementWiseLayer* pred = network->addElementWise(*output, *stridePointsTensor, - nvinfer1::ElementWiseOperation::kPROD); - assert(pred != nullptr); - std::string predLayerName = "pred_" + std::to_string(layerIdx); - pred->setName(predLayerName.c_str()); - output = pred->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h b/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h deleted file mode 100644 index 270c659..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __REG_LAYER_H__ -#define __REG_LAYER_H__ - -#include -#include - -#include "NvInfer.h" - -nvinfer1::ITensor* regLayer(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp index e5688c4..7404776 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp @@ -14,7 +14,7 @@ reorgLayer(int layerIdx, std::map& block, nvinfer1::IT { nvinfer1::ITensor* output; - assert(block.at("type") == "reorg"); + assert(block.at("type") == "reorg3d"); nvinfer1::Dims inputDims = input->getDimensions(); diff --git a/nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp new file mode 100644 index 0000000..73ba068 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp @@ -0,0 +1,28 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "sam_layer.h" + +#include + +nvinfer1::ITensor* +samLayer(int layerIdx, std::string activation, std::map& block, nvinfer1::ITensor* input, + nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ITensor* output; + + assert(block.at("type") == "sam"); + + nvinfer1::IElementWiseLayer* sam = network->addElementWise(*input, *samInput, nvinfer1::ElementWiseOperation::kPROD); + assert(sam != nullptr); + std::string samLayerName = "sam_" + std::to_string(layerIdx); + sam->setName(samLayerName.c_str()); + output = sam->getOutput(0); + + output = activationLayer(layerIdx, activation, output, network); + assert(output != nullptr); + + return output; +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/sam_layer.h b/nvdsinfer_custom_impl_Yolo/layers/sam_layer.h new file mode 100644 index 0000000..1564fb0 --- /dev/null +++ b/nvdsinfer_custom_impl_Yolo/layers/sam_layer.h @@ -0,0 +1,18 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __SAM_LAYER_H__ +#define __SAM_LAYER_H__ + +#include + +#include "NvInfer.h" + +#include "activation_layer.h" + +nvinfer1::ITensor* samLayer(int layerIdx, std::string activation, std::map& block, + nvinfer1::ITensor* input, nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network); + +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp index 929f037..3e58e72 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp @@ -8,7 +8,7 @@ #include nvinfer1::ITensor* -shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol, std::string shortcutVol, +shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput, nvinfer1::INetworkDefinition* network) { @@ -16,12 +16,7 @@ shortcutLayer(int layerIdx, std::string mode, std::string activation, std::strin assert(block.at("type") == "shortcut"); - nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM; - - if (mode == "mul") - operation = nvinfer1::ElementWiseOperation::kPROD; - - if (mode == "add" && inputVol != shortcutVol) { + if (inputVol != shortcutVol) { nvinfer1::ISliceLayer* slice = network->addSlice(*shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}}); assert(slice != nullptr); @@ -32,7 +27,7 @@ shortcutLayer(int layerIdx, std::string mode, std::string activation, std::strin else output = shortcutInput; - nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation); + nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, nvinfer1::ElementWiseOperation::kSUM); assert(shortcut != nullptr); std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx); shortcut->setName(shortcutLayerName.c_str()); diff --git a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h index c7b2bcf..f1556eb 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h @@ -12,8 +12,8 @@ #include "activation_layer.h" -nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string mode, std::string activation, std::string inputVol, - std::string shortcutVol, std::map& block, nvinfer1::ITensor* input, - nvinfer1::ITensor* shortcut, nvinfer1::INetworkDefinition* network); +nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, + std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut, + nvinfer1::INetworkDefinition* network); #endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp deleted file mode 100644 index e37c522..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "shuffle_layer.h" - -nvinfer1::ITensor* -shuffleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - std::vector tensorOutputs, nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "shuffle"); - - nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); - assert(shuffle != nullptr); - std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); - shuffle->setName(shuffleLayerName.c_str()); - - if (block.find("reshape") != block.end()) { - nvinfer1::Dims inputTensorDims = input->getDimensions(); - - std::string strReshape = block.at("reshape"); - std::vector reshape; - size_t lastPos = 0, pos = 0; - while ((pos = strReshape.find(',', lastPos)) != std::string::npos) { - std::string V = trim(strReshape.substr(lastPos, pos - lastPos)); - if (V == "c") - reshape.push_back(inputTensorDims.d[0]); - else if (V == "ch") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]); - else if (V == "cw") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]); - else if (V == "h") - reshape.push_back(inputTensorDims.d[1]); - else if (V == "hw") - reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]); - else if (V == "w") - reshape.push_back(inputTensorDims.d[2]); - else if (V == "chw") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]); - else - reshape.push_back(std::stoi(V)); - lastPos = pos + 1; - } - if (lastPos < strReshape.length()) { - std::string lastV = trim(strReshape.substr(lastPos)); - if (!lastV.empty()) { - if (lastV == "c") - reshape.push_back(inputTensorDims.d[0]); - else if (lastV == "ch") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1]); - else if (lastV == "cw") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[2]); - else if (lastV == "h") - reshape.push_back(inputTensorDims.d[1]); - else if (lastV == "hw") - reshape.push_back(inputTensorDims.d[1] * inputTensorDims.d[2]); - else if (lastV == "w") - reshape.push_back(inputTensorDims.d[2]); - else if (lastV == "chw") - reshape.push_back(inputTensorDims.d[0] * inputTensorDims.d[1] * inputTensorDims.d[2]); - else - reshape.push_back(std::stoi(lastV)); - } - } - assert(!reshape.empty()); - - nvinfer1::Dims reshapeDims; - reshapeDims.nbDims = reshape.size(); - - for (uint i = 0; i < reshape.size(); ++i) - reshapeDims.d[i] = reshape[i]; - - shuffle->setReshapeDimensions(reshapeDims); - } - - if (block.find("transpose1") != block.end()) { - std::string strTranspose1 = block.at("transpose1"); - std::vector transpose1; - size_t lastPos = 0, pos = 0; - while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos) { - int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos))); - transpose1.push_back(vL); - lastPos = pos + 1; - } - if (lastPos < strTranspose1.length()) { - std::string lastV = trim(strTranspose1.substr(lastPos)); - if (!lastV.empty()) - transpose1.push_back(std::stoi(lastV)); - } - assert(!transpose1.empty()); - - nvinfer1::Permutation permutation1; - for (uint i = 0; i < transpose1.size(); ++i) - permutation1.order[i] = transpose1[i]; - - shuffle->setFirstTranspose(permutation1); - } - - if (block.find("transpose2") != block.end()) { - std::string strTranspose2 = block.at("transpose2"); - std::vector transpose2; - size_t lastPos = 0, pos = 0; - while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos) { - int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos))); - transpose2.push_back(vL); - lastPos = pos + 1; - } - if (lastPos < strTranspose2.length()) { - std::string lastV = trim(strTranspose2.substr(lastPos)); - if (!lastV.empty()) - transpose2.push_back(std::stoi(lastV)); - } - assert(!transpose2.empty()); - - nvinfer1::Permutation permutation2; - for (uint i = 0; i < transpose2.size(); ++i) - permutation2.order[i] = transpose2[i]; - - shuffle->setSecondTranspose(permutation2); - } - - output = shuffle->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h b/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h deleted file mode 100644 index 24389f5..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __SHUFFLE_LAYER_H__ -#define __SHUFFLE_LAYER_H__ - -#include "../utils.h" - -nvinfer1::ITensor* shuffleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp deleted file mode 100644 index da73810..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "softmax_layer.h" - -#include - -nvinfer1::ITensor* -softmaxLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - nvinfer1::ITensor* output; - - assert(block.at("type") == "softmax"); - assert(block.find("axes") != block.end()); - - int axes = std::stoi(block.at("axes")); - - nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input); - assert(softmax != nullptr); - std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx); - softmax->setName(softmaxLayerName.c_str()); - softmax->setAxes(1 << axes); - output = softmax->getOutput(0); - - return output; -} diff --git a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h b/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h deleted file mode 100644 index 62ddf50..0000000 --- a/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __SOFTMAX_LAYER_H__ -#define __SOFTMAX_LAYER_H__ - -#include - -#include "NvInfer.h" - -nvinfer1::ITensor* softmaxLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -#endif diff --git a/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp b/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp index 3510feb..63175c3 100644 --- a/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp +++ b/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp @@ -44,7 +44,7 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg")); - networkInfo.inputBlobName = "data"; + networkInfo.inputBlobName = "input"; networkInfo.networkType = yoloType; networkInfo.configFilePath = initParams->customNetworkConfigFilePath; networkInfo.wtsFilePath = initParams->modelFilePath; @@ -52,7 +52,6 @@ getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* i networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); networkInfo.numDetectedClasses = initParams->numDetectedClasses; networkInfo.clusterMode = initParams->clusterMode; - networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold; if (initParams->networkMode == 0) networkInfo.networkMode = "FP32"; diff --git a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp b/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp index eefa76d..dfa929f 100644 --- a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp @@ -26,12 +26,15 @@ #include "nvdsinfer_custom_impl.h" #include "utils.h" -#include "yoloPlugins.h" extern "C" bool NvDsInferParseYolo(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); +extern "C" bool +NvDsInferParseYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); + static NvDsInferParseObjectInfo convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH) { @@ -60,7 +63,9 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b const int maxIndex, const float maxProb, std::vector& binfo) { NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH); - if (bbi.width < 1 || bbi.height < 1) return; + + if (bbi.width < 1 || bbi.height < 1) + return; bbi.detectionConfidence = maxProb; bbi.classId = maxIndex; @@ -68,23 +73,55 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b } static std::vector -decodeYoloTensor(const int* counts, const float* boxes, const float* scores, const int* classes, const uint& netW, - const uint& netH) +decodeTensorYolo(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH, + const std::vector& preclusterThreshold) { std::vector binfo; - uint numBoxes = counts[0]; - for (uint b = 0; b < numBoxes; ++b) { - float bx1 = boxes[b * 4 + 0]; - float by1 = boxes[b * 4 + 1]; - float bx2 = boxes[b * 4 + 2]; - float by2 = boxes[b * 4 + 3]; + for (uint b = 0; b < outputSize; ++b) { + float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6]; + int maxIndex = (int) detection[b * count + 5]; - float maxProb = scores[b]; - int maxIndex = classes[b]; + if (maxProb < preclusterThreshold[maxIndex]) + continue; + + float bxc = detection[b * count + 0]; + float byc = detection[b * count + 1]; + float bw = detection[b * count + 2]; + float bh = detection[b * count + 3]; + + float bx1 = bxc - bw / 2; + float by1 = byc - bh / 2; + float bx2 = bx1 + bw; + float by2 = by1 + bh; addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); } + + return binfo; +} + +static std::vector +decodeTensorYoloE(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH, + const std::vector& preclusterThreshold) +{ + std::vector binfo; + + for (uint b = 0; b < outputSize; ++b) { + float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6]; + int maxIndex = (int) detection[b * count + 5]; + + if (maxProb < preclusterThreshold[maxIndex]) + continue; + + float bx1 = detection[b * count + 0]; + float by1 = detection[b * count + 1]; + float bx2 = detection[b * count + 2]; + float by2 = detection[b * count + 3]; + + addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); + } + return binfo; } @@ -99,14 +136,39 @@ NvDsInferParseCustomYolo(std::vector const& outputLayersInfo std::vector objects; - const NvDsInferLayerInfo& counts = outputLayersInfo[0]; - const NvDsInferLayerInfo& boxes = outputLayersInfo[1]; - const NvDsInferLayerInfo& scores = outputLayersInfo[2]; - const NvDsInferLayerInfo& classes = outputLayersInfo[3]; + const NvDsInferLayerInfo& layer = outputLayersInfo[0]; - std::vector outObjs = decodeYoloTensor((const int*) (counts.buffer), - (const float*) (boxes.buffer), (const float*) (scores.buffer), (const int*) (classes.buffer), networkInfo.width, - networkInfo.height); + const uint outputSize = layer.inferDims.d[0]; + const uint count = layer.inferDims.d[1]; + + std::vector outObjs = decodeTensorYolo((const float*) (layer.buffer), outputSize, count, + networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold); + + objects.insert(objects.end(), outObjs.begin(), outObjs.end()); + + objectList = objects; + + return true; +} + +static bool +NvDsInferParseCustomYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) +{ + if (outputLayersInfo.empty()) { + std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; + return false; + } + + std::vector objects; + + const NvDsInferLayerInfo& layer = outputLayersInfo[0]; + + const uint outputSize = layer.inferDims.d[0]; + const uint count = layer.inferDims.d[1]; + + std::vector outObjs = decodeTensorYoloE((const float*) (layer.buffer), outputSize, count, + networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold); objects.insert(objects.end(), outObjs.begin(), outObjs.end()); @@ -122,4 +184,11 @@ NvDsInferParseYolo(std::vector const& outputLayersInfo, NvDs return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList); } +extern "C" bool +NvDsInferParseYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) +{ + return NvDsInferParseCustomYoloE(outputLayersInfo, networkInfo, detectionParams, objectList); +} + CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); diff --git a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu b/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu deleted file mode 100644 index d586a09..0000000 --- a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Edited by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include -#include - -#include "nvdsinfer_custom_impl.h" - -#include "utils.h" -#include "yoloPlugins.h" - -__global__ void decodeTensor_YOLO_ONNX(NvDsInferParseObjectInfo *binfo, const float* detections, const int numClasses, - const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numClasses; ++i) { - float prob = detections[x_id * (5 + numClasses) + 5 + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - const float objectness = detections[x_id * (5 + numClasses) + 4]; - - if (objectness * maxProb < preclusterThreshold[maxIndex]) - return; - - int count = (int)atomicAdd(numDetections, 1); - - const float bxc = detections[x_id * (5 + numClasses) + 0]; - const float byc = detections[x_id * (5 + numClasses) + 1]; - const float bw = detections[x_id * (5 + numClasses) + 2]; - const float bh = detections[x_id * (5 + numClasses) + 3]; - - float x0 = bxc - bw / 2; - float y0 = byc - bh / 2; - float x1 = x0 + bw; - float y1 = y0 + bh; - x0 = fminf(float(netW), fmaxf(float(0.0), x0)); - y0 = fminf(float(netH), fmaxf(float(0.0), y0)); - x1 = fminf(float(netW), fmaxf(float(0.0), x1)); - y1 = fminf(float(netH), fmaxf(float(0.0), y1)); - - binfo[count].left = x0; - binfo[count].top = y0; - binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); - binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); - binfo[count].detectionConfidence = objectness * maxProb; - binfo[count].classId = maxIndex; -} - -__global__ void decodeTensor_YOLOV8_ONNX(NvDsInferParseObjectInfo* binfo, const float* detections, const int numClasses, - const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numClasses; ++i) { - float prob = detections[x_id + outputSize * (i + 4)]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - if (maxProb < preclusterThreshold[maxIndex]) - return; - - int count = (int)atomicAdd(numDetections, 1); - - const float bxc = detections[x_id + outputSize * 0]; - const float byc = detections[x_id + outputSize * 1]; - const float bw = detections[x_id + outputSize * 2]; - const float bh = detections[x_id + outputSize * 3]; - - float x0 = bxc - bw / 2; - float y0 = byc - bh / 2; - float x1 = x0 + bw; - float y1 = y0 + bh; - x0 = fminf(float(netW), fmaxf(float(0.0), x0)); - y0 = fminf(float(netH), fmaxf(float(0.0), y0)); - x1 = fminf(float(netW), fmaxf(float(0.0), x1)); - y1 = fminf(float(netH), fmaxf(float(0.0), y1)); - - binfo[count].left = x0; - binfo[count].top = y0; - binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); - binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); - binfo[count].detectionConfidence = maxProb; - binfo[count].classId = maxIndex; -} - -__global__ void decodeTensor_YOLOX_ONNX(NvDsInferParseObjectInfo *binfo, const float* detections, const int numClasses, - const int outputSize, float netW, float netH, const int *grid0, const int *grid1, const int *strides, - const float* preclusterThreshold, int* numDetections) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numClasses; ++i) { - float prob = detections[x_id * (5 + numClasses) + 5 + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - const float objectness = detections[x_id * (5 + numClasses) + 4]; - - if (objectness * maxProb < preclusterThreshold[maxIndex]) - return; - - int count = (int)atomicAdd(numDetections, 1); - - const float bxc = (detections[x_id * (5 + numClasses) + 0] + grid0[x_id]) * strides[x_id]; - const float byc = (detections[x_id * (5 + numClasses) + 1] + grid1[x_id]) * strides[x_id]; - const float bw = __expf(detections[x_id * (5 + numClasses) + 2]) * strides[x_id]; - const float bh = __expf(detections[x_id * (5 + numClasses) + 3]) * strides[x_id]; - - float x0 = bxc - bw / 2; - float y0 = byc - bh / 2; - float x1 = x0 + bw; - float y1 = y0 + bh; - x0 = fminf(float(netW), fmaxf(float(0.0), x0)); - y0 = fminf(float(netH), fmaxf(float(0.0), y0)); - x1 = fminf(float(netW), fmaxf(float(0.0), x1)); - y1 = fminf(float(netH), fmaxf(float(0.0), y1)); - - binfo[count].left = x0; - binfo[count].top = y0; - binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); - binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); - binfo[count].detectionConfidence = objectness * maxProb; - binfo[count].classId = maxIndex; -} - -__global__ void decodeTensor_YOLO_NAS_ONNX(NvDsInferParseObjectInfo *binfo, const float* scores, const float* boxes, - const int numClasses, const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numClasses; ++i) { - float prob = scores[x_id * numClasses + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - if (maxProb < preclusterThreshold[maxIndex]) - return; - - int count = (int)atomicAdd(numDetections, 1); - - float x0 = boxes[x_id * 4 + 0]; - float y0 = boxes[x_id * 4 + 1]; - float x1 = boxes[x_id * 4 + 2]; - float y1 = boxes[x_id * 4 + 3]; - - x0 = fminf(float(netW), fmaxf(float(0.0), x0)); - y0 = fminf(float(netH), fmaxf(float(0.0), y0)); - x1 = fminf(float(netW), fmaxf(float(0.0), x1)); - y1 = fminf(float(netH), fmaxf(float(0.0), y1)); - - binfo[count].left = x0; - binfo[count].top = y0; - binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); - binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); - binfo[count].detectionConfidence = maxProb; - binfo[count].classId = maxIndex; -} - -__global__ void decodeTensor_PPYOLOE_ONNX(NvDsInferParseObjectInfo *binfo, const float* scores, const float* boxes, - const int numClasses, const int outputSize, float netW, float netH, const float* preclusterThreshold, int* numDetections) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numClasses; ++i) { - float prob = scores[x_id + outputSize * i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - if (maxProb < preclusterThreshold[maxIndex]) - return; - - int count = (int)atomicAdd(numDetections, 1); - - float x0 = boxes[x_id * 4 + 0]; - float y0 = boxes[x_id * 4 + 1]; - float x1 = boxes[x_id * 4 + 2]; - float y1 = boxes[x_id * 4 + 3]; - - x0 = fminf(float(netW), fmaxf(float(0.0), x0)); - y0 = fminf(float(netH), fmaxf(float(0.0), y0)); - x1 = fminf(float(netW), fmaxf(float(0.0), x1)); - y1 = fminf(float(netH), fmaxf(float(0.0), y1)); - - binfo[count].left = x0; - binfo[count].top = y0; - binfo[count].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); - binfo[count].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); - binfo[count].detectionConfidence = maxProb; - binfo[count].classId = maxIndex; -} - -static bool -NvDsInferParseCustom_YOLO_ONNX(std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; - return false; - } - - const NvDsInferLayerInfo& layer = outputLayersInfo[0]; - - const uint outputSize = layer.inferDims.d[0]; - const uint numClasses = layer.inferDims.d[1] - 5; - - if (numClasses != detectionParams.numClassesConfigured) { - std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses - << " in config_infer file\n" << std::endl; - } - - thrust::device_vector objects(outputSize); - - std::vector numDetections = { 0 }; - thrust::device_vector d_numDetections(numDetections); - - thrust::device_vector preclusterThreshold(detectionParams.perClassPreclusterThreshold); - - int threads_per_block = 1024; - int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; - - decodeTensor_YOLO_ONNX<<>>( - thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize, - static_cast(networkInfo.width), static_cast(networkInfo.height), - thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data())); - - thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin()); - objectList.resize(numDetections[0]); - thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin()); - - return true; -} - -static bool -NvDsInferParseCustom_YOLOV8_ONNX(std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; - return false; - } - - const NvDsInferLayerInfo& layer = outputLayersInfo[0]; - - const uint numClasses = layer.inferDims.d[0] - 4; - const uint outputSize = layer.inferDims.d[1]; - - if (numClasses != detectionParams.numClassesConfigured) { - std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses - << " in config_infer file\n" << std::endl; - } - - thrust::device_vector objects(outputSize); - - std::vector numDetections = { 0 }; - thrust::device_vector d_numDetections(numDetections); - - thrust::device_vector preclusterThreshold(detectionParams.perClassPreclusterThreshold); - - int threads_per_block = 1024; - int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; - - decodeTensor_YOLOV8_ONNX<<>>( - thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize, - static_cast(networkInfo.width), static_cast(networkInfo.height), - thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data())); - - thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin()); - objectList.resize(numDetections[0]); - thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin()); - - return true; -} - -static bool -NvDsInferParseCustom_YOLOX_ONNX(std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; - return false; - } - - const NvDsInferLayerInfo& layer = outputLayersInfo[0]; - - const uint outputSize = layer.inferDims.d[0]; - const uint numClasses = layer.inferDims.d[1] - 5; - - if (numClasses != detectionParams.numClassesConfigured) { - std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses - << " in config_infer file\n" << std::endl; - } - - thrust::device_vector objects(outputSize); - - std::vector numDetections = { 0 }; - thrust::device_vector d_numDetections(numDetections); - - thrust::device_vector preclusterThreshold(detectionParams.perClassPreclusterThreshold); - - std::vector strides = {8, 16, 32}; - - std::vector grid0; - std::vector grid1; - std::vector gridStrides; - - for (uint s = 0; s < strides.size(); ++s) { - int num_grid_y = networkInfo.height / strides[s]; - int num_grid_x = networkInfo.width / strides[s]; - for (int g1 = 0; g1 < num_grid_y; ++g1) { - for (int g0 = 0; g0 < num_grid_x; ++g0) { - grid0.push_back(g0); - grid1.push_back(g1); - gridStrides.push_back(strides[s]); - } - } - } - - thrust::device_vector d_grid0(grid0); - thrust::device_vector d_grid1(grid1); - thrust::device_vector d_gridStrides(gridStrides); - - int threads_per_block = 1024; - int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; - - decodeTensor_YOLOX_ONNX<<>>( - thrust::raw_pointer_cast(objects.data()), (const float*) (layer.buffer), numClasses, outputSize, - static_cast(networkInfo.width), static_cast(networkInfo.height), - thrust::raw_pointer_cast(d_grid0.data()), thrust::raw_pointer_cast(d_grid1.data()), - thrust::raw_pointer_cast(d_gridStrides.data()), thrust::raw_pointer_cast(preclusterThreshold.data()), - thrust::raw_pointer_cast(d_numDetections.data())); - - thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin()); - objectList.resize(numDetections[0]); - thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin()); - - return true; -} - -static bool -NvDsInferParseCustom_YOLO_NAS_ONNX(std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; - return false; - } - - const NvDsInferLayerInfo& scores = outputLayersInfo[0]; - const NvDsInferLayerInfo& boxes = outputLayersInfo[1]; - - const uint outputSize = scores.inferDims.d[0]; - const uint numClasses = scores.inferDims.d[1]; - - if (numClasses != detectionParams.numClassesConfigured) { - std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses - << " in config_infer file\n" << std::endl; - } - - thrust::device_vector objects(outputSize); - - std::vector numDetections = { 0 }; - thrust::device_vector d_numDetections(numDetections); - - thrust::device_vector preclusterThreshold(detectionParams.perClassPreclusterThreshold); - - int threads_per_block = 1024; - int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; - - decodeTensor_YOLO_NAS_ONNX<<>>( - thrust::raw_pointer_cast(objects.data()), (const float*) (scores.buffer), (const float*) (boxes.buffer), numClasses, - outputSize, static_cast(networkInfo.width), static_cast(networkInfo.height), - thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data())); - - thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin()); - objectList.resize(numDetections[0]); - thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin()); - - return true; -} - -static bool -NvDsInferParseCustom_PPYOLOE_ONNX(std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; - return false; - } - - const NvDsInferLayerInfo& scores = outputLayersInfo[0]; - const NvDsInferLayerInfo& boxes = outputLayersInfo[1]; - - const uint numClasses = scores.inferDims.d[0]; - const uint outputSize = scores.inferDims.d[1]; - - if (numClasses != detectionParams.numClassesConfigured) { - std::cerr << "WARNING: Number of classes mismatch, make sure to set num-detected-classes=" << numClasses - << " in config_infer file\n" << std::endl; - } - - thrust::device_vector objects(outputSize); - - std::vector numDetections = { 0 }; - thrust::device_vector d_numDetections(numDetections); - - thrust::device_vector preclusterThreshold(detectionParams.perClassPreclusterThreshold); - - int threads_per_block = 1024; - int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; - - decodeTensor_PPYOLOE_ONNX<<>>( - thrust::raw_pointer_cast(objects.data()), (const float*) (scores.buffer), (const float*) (boxes.buffer), numClasses, - outputSize, static_cast(networkInfo.width), static_cast(networkInfo.height), - thrust::raw_pointer_cast(preclusterThreshold.data()), thrust::raw_pointer_cast(d_numDetections.data())); - - thrust::copy(d_numDetections.begin(), d_numDetections.end(), numDetections.begin()); - objectList.resize(numDetections[0]); - thrust::copy(objects.begin(), objects.begin() + numDetections[0], objectList.begin()); - - return true; -} - -extern "C" bool -NvDsInferParse_YOLO_ONNX(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) -{ - return NvDsInferParseCustom_YOLO_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList); -} - -extern "C" bool -NvDsInferParse_YOLOV8_ONNX(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) -{ - return NvDsInferParseCustom_YOLOV8_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList); -} - -extern "C" bool -NvDsInferParse_YOLOX_ONNX(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) -{ - return NvDsInferParseCustom_YOLOX_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList); -} - -extern "C" bool -NvDsInferParse_YOLO_NAS_ONNX(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) -{ - return NvDsInferParseCustom_YOLO_NAS_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList); -} - -extern "C" bool -NvDsInferParse_PPYOLOE_ONNX(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) -{ - return NvDsInferParseCustom_PPYOLOE_ONNX(outputLayersInfo, networkInfo, detectionParams, objectList); -} diff --git a/nvdsinfer_custom_impl_Yolo/utils.cpp b/nvdsinfer_custom_impl_Yolo/utils.cpp index 1e1689f..b3ff68f 100644 --- a/nvdsinfer_custom_impl_Yolo/utils.cpp +++ b/nvdsinfer_custom_impl_Yolo/utils.cpp @@ -98,25 +98,6 @@ loadWeights(const std::string weightsFilePath, const std::string& networkType) break; } } - else if (weightsFilePath.find(".wts") != std::string::npos) { - std::ifstream file(weightsFilePath); - assert(file.good()); - int32_t count; - file >> count; - assert(count > 0 && "\nInvalid .wts file."); - - uint32_t floatWeight; - std::string name; - uint32_t size; - - while (count--) { - file >> name >> std::dec >> size; - for (uint32_t x = 0, y = size; x < y; ++x) { - file >> std::hex >> floatWeight; - weights.push_back(*reinterpret_cast(&floatWeight)); - }; - } - } else { std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl; assert(0); diff --git a/nvdsinfer_custom_impl_Yolo/yolo.cpp b/nvdsinfer_custom_impl_Yolo/yolo.cpp index 700ec36..7ed490a 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -34,8 +34,8 @@ Yolo::Yolo(const NetworkInfo& networkInfo) : m_InputBlobName(networkInfo.inputBl m_NetworkType(networkInfo.networkType), m_ConfigFilePath(networkInfo.configFilePath), m_WtsFilePath(networkInfo.wtsFilePath), m_Int8CalibPath(networkInfo.int8CalibPath), m_DeviceType(networkInfo.deviceType), m_NumDetectedClasses(networkInfo.numDetectedClasses), m_ClusterMode(networkInfo.clusterMode), - m_NetworkMode(networkInfo.networkMode), m_ScoreThreshold(networkInfo.scoreThreshold), m_InputH(0), m_InputW(0), - m_InputC(0), m_InputSize(0), m_NumClasses(0), m_LetterBox(0), m_NewCoords(0), m_YoloCount(0) + m_NetworkMode(networkInfo.networkMode), m_InputH(0), m_InputW(0), m_InputC(0), m_InputSize(0), m_NumClasses(0), + m_LetterBox(0), m_NewCoords(0), m_YoloCount(0) { } @@ -130,18 +130,6 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition { int weightPtr = 0; - std::string weightsType = "wts"; - if (m_WtsFilePath.find(".weights") != std::string::npos) - weightsType = "weights"; - - float eps = 1.0e-5; - if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov6") != std::string::npos || - m_NetworkType.find("yolov7") != std::string::npos || m_NetworkType.find("yolov8") != std::string::npos || - m_NetworkType.find("yolox") != std::string::npos) - eps = 1.0e-3; - else if (m_NetworkType.find("yolor") != std::string::npos) - eps = 1.0e-4; - nvinfer1::ITensor* data = network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, nvinfer1::Dims{3, {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}}); assert(data != nullptr && data->getDimensions().nbDims > 0); @@ -152,18 +140,15 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition nvinfer1::ITensor* yoloTensorInputs[m_YoloCount]; uint yoloCountInputs = 0; - int modelType = -1; - for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; if (m_ConfigBlocks.at(i).at("type") == "net") printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr"); - else if (m_ConfigBlocks.at(i).at("type") == "convolutional") { + else if (m_ConfigBlocks.at(i).at("type") == "conv" || m_ConfigBlocks.at(i).at("type") == "convolutional") { int channels = getNumChannels(previous); std::string inputVol = dimsToString(previous->getDimensions()); - previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, - previous, &network); + previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); @@ -173,39 +158,30 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") { int channels = getNumChannels(previous); std::string inputVol = dimsToString(previous->getDimensions()); - previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, - previous, &network); + previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, + &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); std::string layerName = "deconv"; printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } - else if (m_ConfigBlocks.at(i).at("type") == "c2f") { - std::string inputVol = dimsToString(previous->getDimensions()); - previous = c2fLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - std::string layerName = "c2f_" + m_ConfigBlocks.at(i).at("activation"); - printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); - } else if (m_ConfigBlocks.at(i).at("type") == "batchnorm") { std::string inputVol = dimsToString(previous->getDimensions()); - previous = batchnormLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, - &network); + previous = batchnormLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation"); printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); } - else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul") { + else if (m_ConfigBlocks.at(i).at("type") == "implicit" || m_ConfigBlocks.at(i).at("type") == "implicit_add" || + m_ConfigBlocks.at(i).at("type") == "implicit_mul") { previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerName = m_ConfigBlocks.at(i).at("type"); + std::string layerName = "implicit"; printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr)); } else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels") { @@ -234,27 +210,44 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); assert(i + from - 1 < i - 2); - std::string mode = "add"; - if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end()) - mode = m_ConfigBlocks.at(i).at("mode"); - std::string activation = "linear"; if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) activation = m_ConfigBlocks.at(i).at("activation"); std::string inputVol = dimsToString(previous->getDimensions()); std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions()); - previous = shortcutLayer(i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, + previous = shortcutLayer(i, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); - std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1); + std::string layerName = "shortcut_" + activation + ": " + std::to_string(i + from - 1); printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); - if (mode == "add" && inputVol != shortcutVol) + if (inputVol != shortcutVol) std::cout << inputVol << " +" << shortcutVol << std::endl; } + else if (m_ConfigBlocks.at(i).at("type") == "sam") { + assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); + int from = stoi(m_ConfigBlocks.at(i).at("from")); + if (from > 0) + from = from - i + 1; + assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); + assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); + assert(i + from - 1 < i - 2); + + std::string activation = "linear"; + if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) + activation = m_ConfigBlocks.at(i).at("activation"); + + std::string inputVol = dimsToString(previous->getDimensions()); + previous = samLayer(i, activation, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "sam_" + activation + ": " + std::to_string(i + from - 1); + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } else if (m_ConfigBlocks.at(i).at("type") == "route") { std::string layers; previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network); @@ -273,7 +266,8 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition std::string layerName = "upsample"; printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } - else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool") { + else if (m_ConfigBlocks.at(i).at("type") == "max" || m_ConfigBlocks.at(i).at("type") == "maxpool" || + m_ConfigBlocks.at(i).at("type") == "avg" || m_ConfigBlocks.at(i).at("type") == "avgpool") { std::string inputVol = dimsToString(previous->getDimensions()); previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network); assert(previous != nullptr); @@ -282,62 +276,33 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition std::string layerName = m_ConfigBlocks.at(i).at("type"); printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } + else if (m_ConfigBlocks.at(i).at("type") == "reorg3d") { + std::string inputVol = dimsToString(previous->getDimensions()); + previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerName = "reorg3d"; + printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); + } else if (m_ConfigBlocks.at(i).at("type") == "reorg") { std::string inputVol = dimsToString(previous->getDimensions()); - if (m_NetworkType.find("yolov2") != std::string::npos) { - nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); - assert(reorgPlugin != nullptr); - nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin); - assert(reorg != nullptr); - std::string reorglayerName = "reorg_" + std::to_string(i); - reorg->setName(reorglayerName.c_str()); - previous = reorg->getOutput(0); - } - else - previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network); + nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); + assert(reorgPlugin != nullptr); + nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin); + assert(reorg != nullptr); + std::string reorglayerName = "reorg_" + std::to_string(i); + reorg->setName(reorglayerName.c_str()); + previous = reorg->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(previous); std::string layerName = "reorg"; printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); } - else if (m_ConfigBlocks.at(i).at("type") == "reduce") { - std::string inputVol = dimsToString(previous->getDimensions()); - previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - std::string layerName = "reduce"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); - } - else if (m_ConfigBlocks.at(i).at("type") == "shuffle") { - std::string inputVol = dimsToString(previous->getDimensions()); - previous = shuffleLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - std::string layerName = "shuffle"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); - } - else if (m_ConfigBlocks.at(i).at("type") == "softmax") { - std::string inputVol = dimsToString(previous->getDimensions()); - previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - std::string layerName = "softmax"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); - } else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") { - if (m_ConfigBlocks.at(i).at("type") == "yolo") - if (m_NetworkType.find("yolor") != std::string::npos) - modelType = 2; - else - modelType = 1; - else - modelType = 0; - - std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i); + std::string blobName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo_" + std::to_string(i) : + "region_" + std::to_string(i); nvinfer1::Dims prevTensorDims = previous->getDimensions(); TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); curYoloTensor.blobName = blobName; @@ -348,83 +313,11 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition tensorOutputs.push_back(previous); yoloTensorInputs[yoloCountInputs] = previous; ++yoloCountInputs; - std::string layerName = modelType != 0 ? "yolo" : "region"; + std::string layerName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo" : "region"; printLayerInfo(layerIndex, layerName, inputVol, "-", "-"); } - else if (m_ConfigBlocks.at(i).at("type") == "cls") { - modelType = 3; - - std::string blobName = "cls_" + std::to_string(i); - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); - curYoloTensor.blobName = blobName; - curYoloTensor.numBBoxes = prevTensorDims.d[1]; - m_NumClasses = prevTensorDims.d[0]; - - std::string inputVol = dimsToString(previous->getDimensions()); - previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - yoloTensorInputs[yoloCountInputs] = previous; - ++yoloCountInputs; - std::string layerName = "cls"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); - } - else if (m_ConfigBlocks.at(i).at("type") == "reg") { - modelType = 3; - - std::string blobName = "reg_" + std::to_string(i); - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); - curYoloTensor.blobName = blobName; - curYoloTensor.numBBoxes = prevTensorDims.d[1]; - - std::string inputVol = dimsToString(previous->getDimensions()); - previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - yoloTensorInputs[yoloCountInputs] = previous; - ++yoloCountInputs; - std::string layerName = "reg"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); - } - else if (m_ConfigBlocks.at(i).at("type") == "detect_v8") { - modelType = 4; - - std::string blobName = "detect_v8_" + std::to_string(i); - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); - curYoloTensor.blobName = blobName; - curYoloTensor.numBBoxes = prevTensorDims.d[1]; - - std::string inputVol = dimsToString(previous->getDimensions()); - previous = detectV8Layer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - yoloTensorInputs[yoloCountInputs] = previous; - ++yoloCountInputs; - std::string layerName = "detect_v8"; - printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); - } - else if (m_ConfigBlocks.at(i).at("type") == "detect_x") { - modelType = 5; - - std::string blobName = "detect_x_" + std::to_string(i); - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); - curYoloTensor.blobName = blobName; - curYoloTensor.numBBoxes = prevTensorDims.d[0]; - m_NumClasses = prevTensorDims.d[1] - 5; - - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(previous); - yoloTensorInputs[yoloCountInputs] = previous; - ++yoloCountInputs; - std::string layerName = "detect_x"; - printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr)); + else if (m_ConfigBlocks.at(i).at("type") == "dropout") { + // pass } else { std::cerr << "\nUnsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; @@ -438,42 +331,24 @@ Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition } if (m_YoloCount == yoloCountInputs) { - assert((modelType != -1) && "\nCould not determine model type"); - uint64_t outputSize = 0; for (uint j = 0; j < yoloCountInputs; ++j) { TensorInfo& curYoloTensor = m_YoloTensors.at(j); - if (modelType == 3 || modelType == 4 || modelType == 5) - outputSize = curYoloTensor.numBBoxes; - else - outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes; + outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes; } - nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, - modelType, m_ScoreThreshold); + nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, + outputSize); assert(yoloPlugin != nullptr); nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin); assert(yolo != nullptr); std::string yoloLayerName = "yolo"; yolo->setName(yoloLayerName.c_str()); - std::string outputlayerName; - nvinfer1::ITensor* num_detections = yolo->getOutput(0); - outputlayerName = "num_detections"; - num_detections->setName(outputlayerName.c_str()); - nvinfer1::ITensor* detection_boxes = yolo->getOutput(1); - outputlayerName = "detection_boxes"; - detection_boxes->setName(outputlayerName.c_str()); - nvinfer1::ITensor* detection_scores = yolo->getOutput(2); - outputlayerName = "detection_scores"; - detection_scores->setName(outputlayerName.c_str()); - nvinfer1::ITensor* detection_classes = yolo->getOutput(3); - outputlayerName = "detection_classes"; - detection_classes->setName(outputlayerName.c_str()); - network.markOutput(*num_detections); - network.markOutput(*detection_boxes); - network.markOutput(*detection_scores); - network.markOutput(*detection_classes); + nvinfer1::ITensor* outputYolo = yolo->getOutput(0); + std::string outputYoloLayerName = "output"; + outputYolo->setName(outputYoloLayerName.c_str()); + network.markOutput(*outputYolo); } else { std::cerr << "\nError in yolo cfg file" << std::endl; @@ -600,54 +475,6 @@ Yolo::parseConfigBlocks() outputTensor.numBBoxes = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num"))); - m_YoloTensors.push_back(outputTensor); - } - else if ((block.at("type") == "cls") || (block.at("type") == "reg")) { - ++m_YoloCount; - TensorInfo outputTensor; - m_YoloTensors.push_back(outputTensor); - } - else if (block.at("type") == "detect_v8") { - ++m_YoloCount; - - m_NumClasses = std::stoul(block.at("classes")); - - TensorInfo outputTensor; - m_YoloTensors.push_back(outputTensor); - } - else if (block.at("type") == "detect_x") { - ++m_YoloCount; - TensorInfo outputTensor; - - std::vector strides; - - std::string stridesString = block.at("strides"); - while (!stridesString.empty()) { - int npos = stridesString.find_first_of(','); - if (npos != -1) { - int stride = std::stof(trim(stridesString.substr(0, npos))); - strides.push_back(stride); - stridesString.erase(0, npos + 1); - } - else { - int stride = std::stof(trim(stridesString)); - strides.push_back(stride); - break; - } - } - - for (uint i = 0; i < strides.size(); ++i) { - int num_grid_y = m_InputH / strides[i]; - int num_grid_x = m_InputW / strides[i]; - for (int g1 = 0; g1 < num_grid_y; ++g1) { - for (int g0 = 0; g0 < num_grid_x; ++g0) { - outputTensor.anchors.push_back((float) g0); - outputTensor.anchors.push_back((float) g1); - outputTensor.mask.push_back(strides[i]); - } - } - } - m_YoloTensors.push_back(outputTensor); } } diff --git a/nvdsinfer_custom_impl_Yolo/yolo.h b/nvdsinfer_custom_impl_Yolo/yolo.h index 25a2e89..5ce4ed3 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/nvdsinfer_custom_impl_Yolo/yolo.h @@ -31,21 +31,15 @@ #include "layers/convolutional_layer.h" #include "layers/deconvolutional_layer.h" -#include "layers/c2f_layer.h" #include "layers/batchnorm_layer.h" #include "layers/implicit_layer.h" #include "layers/channels_layer.h" #include "layers/shortcut_layer.h" +#include "layers/sam_layer.h" #include "layers/route_layer.h" #include "layers/upsample_layer.h" #include "layers/pooling_layer.h" #include "layers/reorg_layer.h" -#include "layers/reduce_layer.h" -#include "layers/shuffle_layer.h" -#include "layers/softmax_layer.h" -#include "layers/cls_layer.h" -#include "layers/reg_layer.h" -#include "layers/detect_v8_layer.h" struct NetworkInfo { @@ -57,7 +51,6 @@ struct NetworkInfo std::string deviceType; uint numDetectedClasses; int clusterMode; - float scoreThreshold; std::string networkMode; }; @@ -98,7 +91,6 @@ class Yolo : public IModelParser { const uint m_NumDetectedClasses; const int m_ClusterMode; const std::string m_NetworkMode; - const float m_ScoreThreshold; uint m_InputH; uint m_InputW; diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/nvdsinfer_custom_impl_Yolo/yoloForward.cu index 9d0d613..e455425 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -4,13 +4,13 @@ */ #include +#include inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__global__ void gpuYoloLayer(const float* input, int* num_detections, float* detection_boxes, float* detection_scores, - int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, - const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, - const int* mask) +__global__ void gpuYoloLayer(const float* input, float* output, int* count, const uint netWidth, const uint netHeight, + const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, + const float* anchors, const int* mask) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -24,18 +24,13 @@ __global__ void gpuYoloLayer(const float* input, int* num_detections, float* det const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); - if (objectness < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - const float alpha = scaleXY; const float beta = -0.5 * (scaleXY - 1); - float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id) + float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id) * netWidth / gridSizeX; - float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id) + float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id) * netHeight / gridSizeY; float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2]; @@ -53,23 +48,26 @@ __global__ void gpuYoloLayer(const float* input, int* num_detections, float* det } } - detection_boxes[count * 4 + 0] = x - 0.5 * w; - detection_boxes[count * 4 + 1] = y - 0.5 * h; - detection_boxes[count * 4 + 2] = x + 0.5 * w; - detection_boxes[count * 4 + 3] = y + 0.5 * h; - detection_scores[count] = objectness * maxProb; - detection_classes[count] = maxIndex; + int _count = (int)atomicAdd(count, 1); + + output[_count * 7 + 0] = xc; + output[_count * 7 + 1] = yc; + output[_count * 7 + 2] = w; + output[_count * 7 + 3] = h; + output[_count * 7 + 4] = maxProb; + output[_count * 7 + 5] = maxIndex; + output[_count * 7 + 6] = objectness; } -cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); +cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, + cudaStream_t stream); -cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) +cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, + cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, @@ -77,12 +75,11 @@ cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detecti for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer<<>>( - reinterpret_cast(input) + (batch * inputSize), reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX, - gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast(anchors), - reinterpret_cast(mask)); + reinterpret_cast (input) + (batch * inputSize), + reinterpret_cast (output) + (batch * 7 * outputSize), + reinterpret_cast (count) + (batch), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, + reinterpret_cast (anchors), reinterpret_cast (mask)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu deleted file mode 100644 index 9b7596a..0000000 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include - -__global__ void gpuYoloLayer_e(const float* cls, const float* reg, int* num_detections, float* detection_boxes, - float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, - const uint numOutputClasses, const uint64_t outputSize) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numOutputClasses; ++i) { - float prob = cls[x_id * numOutputClasses + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - if (maxProb < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - - detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0]; - detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1]; - detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2]; - detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3]; - detection_scores[count] = maxProb; - detection_classes[count] = maxIndex; -} - -cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses, - cudaStream_t stream); - -cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses, - cudaStream_t stream) -{ - int threads_per_block = 16; - int number_of_blocks = (outputSize / threads_per_block) + 1; - - for (unsigned int batch = 0; batch < batchSize; ++batch) { - gpuYoloLayer_e<<>>( - reinterpret_cast(cls) + (batch * numOutputClasses * outputSize), - reinterpret_cast(reg) + (batch * 4 * outputSize), reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, - numOutputClasses, outputSize); - } - return cudaGetLastError(); -} diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu index 45b8ca7..125cee3 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu @@ -5,10 +5,9 @@ #include -__global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float* detection_boxes, float* detection_scores, - int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, - const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, - const int* mask) +__global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, const uint netWidth, const uint netHeight, + const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, + const float* anchors, const int* mask) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -22,18 +21,13 @@ __global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float* const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; - if (objectness < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - const float alpha = scaleXY; const float beta = -0.5 * (scaleXY - 1); - float x = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth / + float xc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth / gridSizeX; - float y = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight / + float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight / gridSizeY; float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2]; @@ -51,23 +45,26 @@ __global__ void gpuYoloLayer_nc(const float* input, int* num_detections, float* } } - detection_boxes[count * 4 + 0] = x - 0.5 * w; - detection_boxes[count * 4 + 1] = y - 0.5 * h; - detection_boxes[count * 4 + 2] = x + 0.5 * w; - detection_boxes[count * 4 + 3] = y + 0.5 * h; - detection_scores[count] = objectness * maxProb; - detection_classes[count] = maxIndex; + int _count = (int)atomicAdd(count, 1); + + output[_count * 7 + 0] = xc; + output[_count * 7 + 1] = yc; + output[_count * 7 + 2] = w; + output[_count * 7 + 3] = h; + output[_count * 7 + 4] = maxProb; + output[_count * 7 + 5] = maxIndex; + output[_count * 7 + 6] = objectness; } -cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); +cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, + cudaStream_t stream); -cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) +cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, + cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, @@ -75,12 +72,11 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* dete for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer_nc<<>>( - reinterpret_cast(input) + (batch * inputSize), reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX, - gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast(anchors), - reinterpret_cast(mask)); + reinterpret_cast (input) + (batch * inputSize), + reinterpret_cast (output) + (batch * 7 * outputSize), + reinterpret_cast (count) + (batch), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, + reinterpret_cast (anchors), reinterpret_cast (mask)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu deleted file mode 100644 index 6a0327e..0000000 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include - -inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } - -__global__ void gpuYoloLayer_r(const float* input, int* num_detections, float* detection_boxes, float* detection_scores, - int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, - const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, - const int* mask) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - uint y_id = blockIdx.y * blockDim.y + threadIdx.y; - uint z_id = blockIdx.z * blockDim.z + threadIdx.z; - - if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) - return; - - const int numGridCells = gridSizeX * gridSizeY; - const int bbindex = y_id * gridSizeX + x_id; - - const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); - - if (objectness < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - - const float alpha = scaleXY; - const float beta = -0.5 * (scaleXY - 1); - - float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id) - * netWidth / gridSizeX; - - float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id) - * netHeight / gridSizeY; - - float w = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2) - * anchors[mask[z_id] * 2]; - - float h = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2) - * anchors[mask[z_id] * 2 + 1]; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numOutputClasses; ++i) { - float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - detection_boxes[count * 4 + 0] = x - 0.5 * w; - detection_boxes[count * 4 + 1] = y - 0.5 * h; - detection_boxes[count * 4 + 2] = x + 0.5 * w; - detection_boxes[count * 4 + 3] = y + 0.5 * h; - detection_scores[count] = objectness * maxProb; - detection_classes[count] = maxIndex; -} - -cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); - -cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) -{ - dim3 threads_per_block(16, 16, 4); - dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, - (numBBoxes / threads_per_block.z) + 1); - - for (unsigned int batch = 0; batch < batchSize; ++batch) { - gpuYoloLayer_r<<>>( - reinterpret_cast(input) + (batch * inputSize), reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX, - gridSizeY, numOutputClasses, numBBoxes, scaleXY, reinterpret_cast(anchors), - reinterpret_cast(mask)); - } - return cudaGetLastError(); -} diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu index 93f12da..5fb74de 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu @@ -27,9 +27,9 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG } } -__global__ void gpuRegionLayer(const float* input, float* softmax, int* num_detections, float* detection_boxes, - float* detection_scores, int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, - const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors) +__global__ void gpuRegionLayer(const float* input, float* softmax, float* output, int* count, const uint netWidth, + const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, + const float* anchors) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -43,14 +43,9 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, int* num_dete const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); - if (objectness < scoreThreshold) - return; + float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX; - int count = (int)atomicAdd(num_detections, 1); - - float x = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX; - - float y = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY; + float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY; float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth / gridSizeX; @@ -71,23 +66,24 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, int* num_dete } } - detection_boxes[count * 4 + 0] = x - 0.5 * w; - detection_boxes[count * 4 + 1] = y - 0.5 * h; - detection_boxes[count * 4 + 2] = x + 0.5 * w; - detection_boxes[count * 4 + 3] = y + 0.5 * h; - detection_scores[count] = objectness * maxProb; - detection_classes[count] = maxIndex; + int _count = (int)atomicAdd(count, 1); + + output[_count * 7 + 0] = xc; + output[_count * 7 + 1] = yc; + output[_count * 7 + 2] = w; + output[_count * 7 + 3] = h; + output[_count * 7 + 4] = maxProb; + output[_count * 7 + 5] = maxIndex; + output[_count * 7 + 6] = objectness; } -cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream); +cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize, + uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, + const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream); -cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream) +cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize, + uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, + const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, @@ -95,12 +91,12 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detectio for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuRegionLayer<<>>( - reinterpret_cast(input) + (batch * inputSize), reinterpret_cast(softmax) + (batch * inputSize), - reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), scoreThreshold, netWidth, netHeight, gridSizeX, - gridSizeY, numOutputClasses, numBBoxes, reinterpret_cast(anchors)); + reinterpret_cast (input) + (batch * inputSize), + reinterpret_cast (softmax) + (batch * inputSize), + reinterpret_cast (output) + (batch * 7 * outputSize), + reinterpret_cast (count) + (batch), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, + reinterpret_cast (anchors)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_v8.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_v8.cu deleted file mode 100644 index 696cf14..0000000 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_v8.cu +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include - -__global__ void gpuYoloLayer_v8(const float* input, int* num_detections, float* detection_boxes, float* detection_scores, - int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, - const uint numOutputClasses, const uint64_t outputSize) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numOutputClasses; ++i) { - float prob = input[x_id * (4 + numOutputClasses) + 4 + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - if (maxProb < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - - detection_boxes[count * 4 + 0] = input[x_id * (4 + numOutputClasses) + 0]; - detection_boxes[count * 4 + 1] = input[x_id * (4 + numOutputClasses) + 1]; - detection_boxes[count * 4 + 2] = input[x_id * (4 + numOutputClasses) + 2]; - detection_boxes[count * 4 + 3] = input[x_id * (4 + numOutputClasses) + 3]; - detection_scores[count] = maxProb; - detection_classes[count] = maxIndex; -} - -cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream); - -cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream) -{ - int threads_per_block = 16; - int number_of_blocks = (outputSize / threads_per_block) + 1; - - for (unsigned int batch = 0; batch < batchSize; ++batch) { - gpuYoloLayer_v8<<>>( - reinterpret_cast(input) + (batch * (4 + numOutputClasses) * outputSize), - reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), - scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize); - } - return cudaGetLastError(); -} diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_x.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_x.cu deleted file mode 100644 index 966a669..0000000 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_x.cu +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include - -__global__ void gpuYoloLayer_x(const float* input, int* num_detections, float* detection_boxes, float* detection_scores, - int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, - const uint numOutputClasses, const uint64_t outputSize, const float* anchors, const int* mask) -{ - uint x_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (x_id >= outputSize) - return; - - const float objectness = input[x_id * (5 + numOutputClasses) + 4]; - - if (objectness < scoreThreshold) - return; - - int count = (int)atomicAdd(num_detections, 1); - - float x = (input[x_id * (5 + numOutputClasses) + 0] + anchors[x_id * 2]) * mask[x_id]; - - float y = (input[x_id * (5 + numOutputClasses) + 1] + anchors[x_id * 2 + 1]) * mask[x_id]; - - float w = __expf(input[x_id * (5 + numOutputClasses) + 2]) * mask[x_id]; - - float h = __expf(input[x_id * (5 + numOutputClasses) + 3]) * mask[x_id]; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint i = 0; i < numOutputClasses; ++i) { - float prob = input[x_id * (5 + numOutputClasses) + 5 + i]; - if (prob > maxProb) { - maxProb = prob; - maxIndex = i; - } - } - - detection_boxes[count * 4 + 0] = x - 0.5 * w; - detection_boxes[count * 4 + 1] = y - 0.5 * h; - detection_boxes[count * 4 + 2] = x + 0.5 * w; - detection_boxes[count * 4 + 3] = y + 0.5 * h; - detection_scores[count] = objectness * maxProb; - detection_classes[count] = maxIndex; -} - -cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream); - -cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream) -{ - int threads_per_block = 16; - int number_of_blocks = (outputSize / threads_per_block) + 1; - - for (unsigned int batch = 0; batch < batchSize; ++batch) { - gpuYoloLayer_x<<>>( - reinterpret_cast(input) + (batch * (5 + numOutputClasses) * outputSize), - reinterpret_cast(num_detections) + (batch), - reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), - reinterpret_cast(detection_scores) + (batch * outputSize), - reinterpret_cast(detection_classes) + (batch * outputSize), - scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize, reinterpret_cast(anchors), - reinterpret_cast(mask)); - } - return cudaGetLastError(); -} diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp index 88c6dbd..6633d10 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -38,38 +38,19 @@ namespace { } } -cudaError_t cudaYoloLayer_x(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, const void* anchors, const void* mask, cudaStream_t stream); - -cudaError_t cudaYoloLayer_v8(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, - const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream); - -cudaError_t cudaYoloLayer_e(const void* cls, const void* reg, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& numOutputClasses, +cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer_r(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); +cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize, + uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, + cudaStream_t stream); -cudaError_t cudaYoloLayer_nc(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); - -cudaError_t cudaYoloLayer(const void* input, void* num_detections, void* detection_boxes, void* detection_scores, - void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, - const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); - -cudaError_t cudaRegionLayer(const void* input, void* softmax, void* num_detections, void* detection_boxes, - void* detection_scores, void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, - const float& scoreThreshold, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream); +cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize, + uint64_t& inputSize, uint64_t& outputSize, const uint& netWidth, const uint& netHeight, const uint& gridSizeX, + const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, const void* anchors, cudaStream_t stream); YoloLayer::YoloLayer(const void* data, size_t length) { const char* d = static_cast(data); @@ -79,45 +60,40 @@ YoloLayer::YoloLayer(const void* data, size_t length) { read(d, m_NumClasses); read(d, m_NewCoords); read(d, m_OutputSize); - read(d, m_Type); - read(d, m_ScoreThreshold); - if (m_Type != 3 && m_Type != 4) { - uint yoloTensorsSize; - read(d, yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) { - TensorInfo curYoloTensor; - read(d, curYoloTensor.gridSizeX); - read(d, curYoloTensor.gridSizeY); - read(d, curYoloTensor.numBBoxes); - read(d, curYoloTensor.scaleXY); + uint yoloTensorsSize; + read(d, yoloTensorsSize); + for (uint i = 0; i < yoloTensorsSize; ++i) { + TensorInfo curYoloTensor; + read(d, curYoloTensor.gridSizeX); + read(d, curYoloTensor.gridSizeY); + read(d, curYoloTensor.numBBoxes); + read(d, curYoloTensor.scaleXY); - uint anchorsSize; - read(d, anchorsSize); - for (uint j = 0; j < anchorsSize; ++j) { - float result; - read(d, result); - curYoloTensor.anchors.push_back(result); - } - - uint maskSize; - read(d, maskSize); - for (uint j = 0; j < maskSize; ++j) { - int result; - read(d, result); - curYoloTensor.mask.push_back(result); - } - - m_YoloTensors.push_back(curYoloTensor); + uint anchorsSize; + read(d, anchorsSize); + for (uint j = 0; j < anchorsSize; ++j) { + float result; + read(d, result); + curYoloTensor.anchors.push_back(result); } + + uint maskSize; + read(d, maskSize); + for (uint j = 0; j < maskSize; ++j) { + int result; + read(d, result); + curYoloTensor.mask.push_back(result); + } + + m_YoloTensors.push_back(curYoloTensor); } }; YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, - const std::vector& yoloTensors, const uint64_t& outputSize, const uint& modelType, - const float& scoreThreshold) : m_NetWidth(netWidth), m_NetHeight(netHeight), m_NumClasses(numClasses), - m_NewCoords(newCoords), m_YoloTensors(yoloTensors), m_OutputSize(outputSize), m_Type(modelType), - m_ScoreThreshold(scoreThreshold) + const std::vector& yoloTensors, const uint64_t& outputSize) : m_NetWidth(netWidth), + m_NetHeight(netHeight), m_NumClasses(numClasses), m_NewCoords(newCoords), m_YoloTensors(yoloTensors), + m_OutputSize(outputSize) { assert(m_NetWidth > 0); assert(m_NetHeight > 0); @@ -126,12 +102,8 @@ YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& nu nvinfer1::Dims YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept { - assert(index <= 4); - if (index == 0) - return nvinfer1::Dims{1, {1}}; - else if (index == 1) - return nvinfer1::Dims{2, {static_cast(m_OutputSize), 4}}; - return nvinfer1::Dims{1, {static_cast(m_OutputSize)}}; + assert(index == 0); + return nvinfer1::Dims{2, {static_cast(m_OutputSize), 7}}; } bool @@ -152,36 +124,56 @@ int32_t YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { - void* num_detections = outputs[0]; - void* detection_boxes = outputs[1]; - void* detection_scores = outputs[2]; - void* detection_classes = outputs[3]; + void* output = outputs[0]; + CUDA_CHECK(cudaMemsetAsync((float*) output, 0, sizeof(float) * m_OutputSize * 7 * batchSize, stream)); - CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream)); - CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream)); - CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream)); - CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); + void* count = workspace; + CUDA_CHECK(cudaMemsetAsync((int*) count, 0, sizeof(int) * batchSize, stream)); - if (m_Type == 5) { - TensorInfo& curYoloTensor = m_YoloTensors.at(0); + uint yoloTensorsSize = m_YoloTensors.size(); + for (uint i = 0; i < yoloTensorsSize; ++i) { + TensorInfo& curYoloTensor = m_YoloTensors.at(i); + + uint numBBoxes = curYoloTensor.numBBoxes; + float scaleXY = curYoloTensor.scaleXY; + uint gridSizeX = curYoloTensor.gridSizeX; + uint gridSizeY = curYoloTensor.gridSizeY; std::vector anchors = curYoloTensor.anchors; std::vector mask = curYoloTensor.mask; void* v_anchors; void* v_mask; if (anchors.size() > 0) { - float* f_anchors = anchors.data(); CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); - CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream)); + CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream)); } if (mask.size() > 0) { - int* f_mask = mask.data(); CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size())); - CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream)); + CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream)); } - CUDA_CHECK(cudaYoloLayer_x(inputs[0], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, - m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, v_anchors, v_mask, stream)); + uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); + + if (mask.size() > 0) { + if (m_NewCoords) { + CUDA_CHECK(cudaYoloLayer_nc(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight, + gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); + } + else { + CUDA_CHECK(cudaYoloLayer(inputs[i], output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, m_NetHeight, + gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); + } + } + else { + void* softmax; + CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize)); + CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream)); + + CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, output, count, batchSize, inputSize, m_OutputSize, m_NetWidth, + m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream)); + + CUDA_CHECK(cudaFree(softmax)); + } if (anchors.size() > 0) { CUDA_CHECK(cudaFree(v_anchors)); @@ -190,78 +182,6 @@ YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* output CUDA_CHECK(cudaFree(v_mask)); } } - else if (m_Type == 4) { - CUDA_CHECK(cudaYoloLayer_v8(inputs[0], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, - m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream)); - } - else if (m_Type == 3) { - CUDA_CHECK(cudaYoloLayer_e(inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes, - batchSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream)); - } - else { - uint yoloTensorsSize = m_YoloTensors.size(); - for (uint i = 0; i < yoloTensorsSize; ++i) { - TensorInfo& curYoloTensor = m_YoloTensors.at(i); - - uint numBBoxes = curYoloTensor.numBBoxes; - float scaleXY = curYoloTensor.scaleXY; - uint gridSizeX = curYoloTensor.gridSizeX; - uint gridSizeY = curYoloTensor.gridSizeY; - std::vector anchors = curYoloTensor.anchors; - std::vector mask = curYoloTensor.mask; - - void* v_anchors; - void* v_mask; - if (anchors.size() > 0) { - float* f_anchors = anchors.data(); - CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); - CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream)); - } - if (mask.size() > 0) { - int* f_mask = mask.data(); - CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size())); - CUDA_CHECK(cudaMemcpyAsync(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream)); - } - - uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); - - if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0 - CUDA_CHECK(cudaYoloLayer_r(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, - batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, - m_NumClasses, numBBoxes, 2.0, v_anchors, v_mask, stream)); - } - else if (m_Type == 1) { - if (m_NewCoords) { - CUDA_CHECK(cudaYoloLayer_nc( inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, - batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, - m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); - } - else { - CUDA_CHECK(cudaYoloLayer(inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, - batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, - m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); - } - } - else { - void* softmax; - CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize)); - CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize, stream)); - - CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes, - batchSize, inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, - m_NumClasses, numBBoxes, v_anchors, stream)); - - CUDA_CHECK(cudaFree(softmax)); - } - - if (anchors.size() > 0) { - CUDA_CHECK(cudaFree(v_anchors)); - } - if (mask.size() > 0) { - CUDA_CHECK(cudaFree(v_mask)); - } - } - } return 0; } @@ -276,22 +196,18 @@ YoloLayer::getSerializationSize() const noexcept totalSize += sizeof(m_NumClasses); totalSize += sizeof(m_NewCoords); totalSize += sizeof(m_OutputSize); - totalSize += sizeof(m_Type); - totalSize += sizeof(m_ScoreThreshold); - if (m_Type != 3 && m_Type != 4) { - uint yoloTensorsSize = m_YoloTensors.size(); - totalSize += sizeof(yoloTensorsSize); + uint yoloTensorsSize = m_YoloTensors.size(); + totalSize += sizeof(yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) { - const TensorInfo& curYoloTensor = m_YoloTensors.at(i); - totalSize += sizeof(curYoloTensor.gridSizeX); - totalSize += sizeof(curYoloTensor.gridSizeY); - totalSize += sizeof(curYoloTensor.numBBoxes); - totalSize += sizeof(curYoloTensor.scaleXY); - totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); - totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); - } + for (uint i = 0; i < yoloTensorsSize; ++i) { + const TensorInfo& curYoloTensor = m_YoloTensors.at(i); + totalSize += sizeof(curYoloTensor.gridSizeX); + totalSize += sizeof(curYoloTensor.gridSizeY); + totalSize += sizeof(curYoloTensor.numBBoxes); + totalSize += sizeof(curYoloTensor.scaleXY); + totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); + totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); } return totalSize; @@ -307,37 +223,32 @@ YoloLayer::serialize(void* buffer) const noexcept write(d, m_NumClasses); write(d, m_NewCoords); write(d, m_OutputSize); - write(d, m_Type); - write(d, m_ScoreThreshold); - if (m_Type != 3 && m_Type != 4) { - uint yoloTensorsSize = m_YoloTensors.size(); - write(d, yoloTensorsSize); - for (uint i = 0; i < yoloTensorsSize; ++i) { - const TensorInfo& curYoloTensor = m_YoloTensors.at(i); - write(d, curYoloTensor.gridSizeX); - write(d, curYoloTensor.gridSizeY); - write(d, curYoloTensor.numBBoxes); - write(d, curYoloTensor.scaleXY); + uint yoloTensorsSize = m_YoloTensors.size(); + write(d, yoloTensorsSize); + for (uint i = 0; i < yoloTensorsSize; ++i) { + const TensorInfo& curYoloTensor = m_YoloTensors.at(i); + write(d, curYoloTensor.gridSizeX); + write(d, curYoloTensor.gridSizeY); + write(d, curYoloTensor.numBBoxes); + write(d, curYoloTensor.scaleXY); - uint anchorsSize = curYoloTensor.anchors.size(); - write(d, anchorsSize); - for (uint j = 0; j < anchorsSize; ++j) - write(d, curYoloTensor.anchors[j]); + uint anchorsSize = curYoloTensor.anchors.size(); + write(d, anchorsSize); + for (uint j = 0; j < anchorsSize; ++j) + write(d, curYoloTensor.anchors[j]); - uint maskSize = curYoloTensor.mask.size(); - write(d, maskSize); - for (uint j = 0; j < maskSize; ++j) - write(d, curYoloTensor.mask[j]); - } + uint maskSize = curYoloTensor.mask.size(); + write(d, maskSize); + for (uint j = 0; j < maskSize; ++j) + write(d, curYoloTensor.mask[j]); } } nvinfer1::IPluginV2* YoloLayer::clone() const noexcept { - return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type, - m_ScoreThreshold); + return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize); } REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index 1293796..e2e711f 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -48,14 +48,13 @@ class YoloLayer : public nvinfer1::IPluginV2 { YoloLayer(const void* data, size_t length); YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, - const std::vector& yoloTensors, const uint64_t& outputSize, const uint& modelType, - const float& scoreThreshold); + const std::vector& yoloTensors, const uint64_t& outputSize); const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; } const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } - int getNbOutputs() const noexcept override { return 4; } + int getNbOutputs() const noexcept override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept override; @@ -68,7 +67,9 @@ class YoloLayer : public nvinfer1::IPluginV2 { void terminate() noexcept override {} - size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0; } + size_t getWorkspaceSize(int maxBatchSize) const noexcept override { + return maxBatchSize * sizeof(int); + } int32_t enqueue(int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; @@ -93,8 +94,6 @@ class YoloLayer : public nvinfer1::IPluginV2 { uint m_NewCoords {0}; std::vector m_YoloTensors; uint64_t m_OutputSize {0}; - uint m_Type {0}; - float m_ScoreThreshold {0}; }; class YoloLayerPluginCreator : public nvinfer1::IPluginCreator { diff --git a/utils/export_ppyoloe.py b/utils/export_ppyoloe.py new file mode 100644 index 0000000..1c2fca6 --- /dev/null +++ b/utils/export_ppyoloe.py @@ -0,0 +1,79 @@ +import os +import sys +import onnx +import paddle +import paddle.nn as nn +from ppdet.core.workspace import load_config, merge_config +from ppdet.utils.check import check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.engine import Trainer +from ppdet.slim import build_slim_model + + +class DeepStreamOutput(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + boxes = x['bbox'] + x['bbox_num'] = x['bbox_num'].transpose([0, 2, 1]) + scores = paddle.max(x['bbox_num'], 2, keepdim=True) + classes = paddle.cast(paddle.argmax(x['bbox_num'], 2, keepdim=True), dtype='float32') + return paddle.concat((boxes, scores, classes), axis=2) + + +def ppyoloe_export(FLAGS): + cfg = load_config(FLAGS.config) + FLAGS.opt['weights'] = FLAGS.weights + FLAGS.opt['exclude_nms'] = True + merge_config(FLAGS.opt) + if FLAGS.slim_config: + cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test') + merge_config(FLAGS.opt) + check_config(cfg) + check_version() + trainer = Trainer(cfg, mode='test') + trainer.load_weights(cfg.weights) + trainer.model.eval() + if not os.path.exists('.tmp'): + os.makedirs('.tmp') + static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp') + os.system('rm -r .tmp') + return cfg, static_model + + +def main(FLAGS): + paddle.set_device('cpu') + cfg, model = ppyoloe_export(FLAGS) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = [cfg.eval_height, cfg.eval_width] + + onnx_input_im = {} + onnx_input_im['image'] = paddle.static.InputSpec(shape=[None, 3, *img_size], dtype='float32', name='image') + onnx_input_im['scale_factor'] = paddle.static.InputSpec(shape=[None, 2], dtype='float32', name='scale_factor') + onnx_output_file = cfg.filename + '.onnx' + + paddle.onnx.export(model, cfg.filename, input_spec=[onnx_input_im], opset_version=FLAGS.opset) + + if FLAGS.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = ArgsParser() + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)') + parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method') + parser.add_argument('--opset', type=int, default=11, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + FLAGS = parse_args() + sys.exit(main(FLAGS)) diff --git a/utils/export_yoloV5.py b/utils/export_yoloV5.py new file mode 100644 index 0000000..fe403e5 --- /dev/null +++ b/utils/export_yoloV5.py @@ -0,0 +1,82 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +from models.experimental import attempt_load +from utils.torch_utils import select_device +from models.yolo import Detect + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x[0] + boxes = x[:, :, :4] + objectness = x[:, :, 4:5] + scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True) + return torch.cat((boxes, scores, classes, objectness), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolov5_export(weights, device): + model = attempt_load(weights, device=device, inplace=True, fuse=True) + model.eval() + for k, m in model.named_modules(): + if isinstance(m, Detect): + m.inplace = False + m.dynamic = False + m.export = True + return model + + +def main(args): + suppress_warnings() + device = select_device('cpu') + model = yolov5_export(args.weights, device) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + if img_size == [640, 640] and args.p6: + img_size = [1280] * 2 + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOv5 conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--p6', action='store_true', help='P6 model') + parser.add_argument('--opset', type=int, default=17, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yoloV6.py b/utils/export_yoloV6.py new file mode 100644 index 0000000..b4c593b --- /dev/null +++ b/utils/export_yoloV6.py @@ -0,0 +1,88 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +from yolov6.utils.checkpoint import load_checkpoint +from yolov6.layers.common import RepVGGBlock, ConvModule, SiLU +from yolov6.models.effidehead import Detect + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + print(x) + boxes = x[:, :, :4] + objectness = x[:, :, 4:5] + scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True) + return torch.cat((boxes, scores, classes, objectness), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolov6_export(weights, device): + model = load_checkpoint(weights, map_location=device, inplace=True, fuse=True) + for layer in model.modules(): + if isinstance(layer, RepVGGBlock): + layer.switch_to_deploy() + elif isinstance(layer, nn.Upsample) and not hasattr(layer, 'recompute_scale_factor'): + layer.recompute_scale_factor = None + model.eval() + for k, m in model.named_modules(): + if isinstance(m, ConvModule): + if hasattr(m, 'act') and isinstance(m.act, nn.SiLU): + m.act = SiLU() + elif isinstance(m, Detect): + m.inplace = False + return model + + +def main(args): + suppress_warnings() + device = torch.device('cpu') + model = yolov6_export(args.weights, device) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + if img_size == [640, 640] and args.p6: + img_size = [1280] * 2 + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOv6 conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--p6', action='store_true', help='P6 model') + parser.add_argument('--opset', type=int, default=13, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yoloV7.py b/utils/export_yoloV7.py new file mode 100644 index 0000000..73961ac --- /dev/null +++ b/utils/export_yoloV7.py @@ -0,0 +1,86 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +import models +from models.experimental import attempt_load +from utils.torch_utils import select_device +from utils.activations import Hardswish, SiLU + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + boxes = x[:, :, :4] + objectness = x[:, :, 4:5] + scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True) + return torch.cat((boxes, scores, classes, objectness), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolov7_export(weights, device): + model = attempt_load(weights, map_location=device) + for k, m in model.named_modules(): + m._non_persistent_buffers_set = set() + if isinstance(m, models.common.Conv): + if isinstance(m.act, nn.Hardswish): + m.act = Hardswish() + elif isinstance(m.act, nn.SiLU): + m.act = SiLU() + model.model[-1].export = False + model.model[-1].concat = True + model.eval() + return model + + +def main(args): + suppress_warnings() + device = select_device('cpu') + model = yolov7_export(args.weights, device) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + if img_size == [640, 640] and args.p6: + img_size = [1280] * 2 + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOv7 conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--p6', action='store_true', help='P6 model') + parser.add_argument('--opset', type=int, default=12, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yoloV8.py b/utils/export_yoloV8.py new file mode 100644 index 0000000..907f36c --- /dev/null +++ b/utils/export_yoloV8.py @@ -0,0 +1,85 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +from copy import deepcopy +from ultralytics import YOLO +from ultralytics.yolo.utils.torch_utils import select_device +from ultralytics.nn.modules import C2f, Detect + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x.transpose(1, 2) + boxes = x[:, :, :4] + scores, classes = torch.max(x[:, :, 4:], 2, keepdim=True) + return torch.cat((boxes, scores, classes), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolov8_export(weights, device): + model = YOLO(weights) + model = deepcopy(model.model).to(device) + for p in model.parameters(): + p.requires_grad = False + model.eval() + model.float() + model = model.fuse() + for k, m in model.named_modules(): + if isinstance(m, Detect): + m.dynamic = False + m.export = True + m.format = 'onnx' + elif isinstance(m, C2f): + m.forward = m.forward_split + return model + + +def main(args): + suppress_warnings() + device = select_device('cpu') + model = yolov8_export(args.weights, device) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOv8 conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--opset', type=int, default=16, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yolonas.py b/utils/export_yolonas.py new file mode 100644 index 0000000..3cf37b0 --- /dev/null +++ b/utils/export_yolonas.py @@ -0,0 +1,75 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +from super_gradients.training import models + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + boxes = x[0] + scores, classes = torch.max(x[1], 2, keepdim=True) + return torch.cat((boxes, scores, classes), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolonas_export(model_name, weights, num_classes, size): + img_size = size * 2 if len(size) == 1 else size + model = models.get(model_name, num_classes=num_classes, checkpoint_path=weights) + model.eval() + model.prep_model_for_conversion(input_size=[1, 3, *img_size]) + return model + + +def main(args): + suppress_warnings() + device = torch.device('cpu') + model = yolonas_export(args.model, args.weights, args.classes, args.size) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLO-NAS conversion') + parser.add_argument('-m', '--model', required=True, help='Model name (required)') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') + parser.add_argument('-n', '--classes', type=int, default=80, help='Number of trained classes (default 80)') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--opset', type=int, default=14, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if args.model == '': + raise SystemExit('Invalid model name') + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yolor.py b/utils/export_yolor.py new file mode 100644 index 0000000..f1b3125 --- /dev/null +++ b/utils/export_yolor.py @@ -0,0 +1,99 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x[0] + boxes = x[:, :, :4] + objectness = x[:, :, 4:5] + scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True) + return torch.cat((boxes, scores, classes, objectness), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolor_export(weights, cfg, size, device): + if os.path.isfile('models/experimental.py'): + import models + from models.experimental import attempt_load + from utils.activations import Hardswish + model = attempt_load(weights, map_location=device) + for k, m in model.named_modules(): + m._non_persistent_buffers_set = set() + if isinstance(m, models.common.Conv) and isinstance(m.act, nn.Hardswish): + m.act = Hardswish() + elif isinstance(m, nn.Upsample) and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None + model.model[-1].training = False + model.model[-1].export = False + else: + from models.models import Darknet + model_name = os.path.basename(weights).split('.pt')[0] + if cfg == '': + cfg = 'cfg/' + model_name + '.cfg' + if not os.path.isfile(cfg): + raise SystemExit('CFG file not found') + model = Darknet(cfg, img_size=size[::-1]).to(device) + model.load_state_dict(torch.load(weights, map_location=device)['model']) + model.float() + model.fuse() + model.eval() + model.module_list[-1].training = False + return model + + +def main(args): + suppress_warnings() + device = torch.device('cpu') + model = yolor_export(args.weights, args.cfg, args.size, device) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = args.size * 2 if len(args.size) == 1 else args.size + + if img_size == [640, 640] and args.p6: + img_size = [1280] * 2 + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOR conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') + parser.add_argument('-c', '--cfg', default='', help='Input cfg (.cfg) file path') + parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') + parser.add_argument('--p6', action='store_true', help='P6 model') + parser.add_argument('--opset', type=int, default=12, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/export_yolox.py b/utils/export_yolox.py new file mode 100644 index 0000000..0c08e40 --- /dev/null +++ b/utils/export_yolox.py @@ -0,0 +1,81 @@ +import os +import sys +import argparse +import warnings +import onnx +import torch +import torch.nn as nn +from yolox.exp import get_exp +from yolox.utils import replace_module +from yolox.models.network_blocks import SiLU + + +class DeepStreamOutput(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + boxes = x[:, :, :4] + objectness = x[:, :, 4:5] + scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True) + return torch.cat((boxes, scores, classes, objectness), dim=2) + + +def suppress_warnings(): + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) + warnings.filterwarnings('ignore', category=UserWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def yolox_export(weights, exp_file): + exp = get_exp(exp_file) + model = exp.get_model() + ckpt = torch.load(weights, map_location='cpu') + model.eval() + if 'model' in ckpt: + ckpt = ckpt['model'] + model.load_state_dict(ckpt) + model = replace_module(model, nn.SiLU, SiLU) + model.head.decode_in_inference = True + return model, exp + + +def main(args): + suppress_warnings() + device = torch.device('cpu') + model, exp = yolox_export(args.weights, args.exp) + + model = nn.Sequential(model, DeepStreamOutput()) + + img_size = [exp.input_size[1], exp.input_size[0]] + + onnx_input_im = torch.zeros(1, 3, *img_size).to(device) + onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx' + + torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, + do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None) + + if args.simplify: + import onnxsim + model_onnx = onnx.load(onnx_output_file) + model_onnx, _ = onnxsim.simplify(model_onnx) + onnx.save(model_onnx, onnx_output_file) + + +def parse_args(): + parser = argparse.ArgumentParser(description='DeepStream YOLOX conversion') + parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') + parser.add_argument('-c', '--exp', required=True, help='Input exp (.py) file path (required)') + parser.add_argument('--opset', type=int, default=11, help='ONNX opset version') + parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') + args = parser.parse_args() + if not os.path.isfile(args.weights): + raise SystemExit('Invalid weights file') + if not os.path.isfile(args.exp): + raise SystemExit('Invalid exp file') + return args + + +if __name__ == '__main__': + args = parse_args() + sys.exit(main(args)) diff --git a/utils/gen_wts_ppyoloe.py b/utils/gen_wts_ppyoloe.py deleted file mode 100644 index 1c10008..0000000 --- a/utils/gen_wts_ppyoloe.py +++ /dev/null @@ -1,432 +0,0 @@ -import os -import struct -import paddle -import numpy as np -from ppdet.core.workspace import load_config, merge_config -from ppdet.utils.check import check_version, check_config -from ppdet.utils.cli import ArgsParser -from ppdet.engine import Trainer -from ppdet.slim import build_slim_model - - -class Layers(object): - def __init__(self, size, fw, fc, letter_box): - self.blocks = [0 for _ in range(300)] - self.current = -1 - - self.backbone_outs = [] - self.neck_fpn_feats = [] - self.neck_pan_feats = [] - self.yolo_head_cls = [] - self.yolo_head_reg = [] - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - self.letter_box = letter_box - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def ConvBNLayer(self, child): - self.current += 1 - - self.convolutional(child, act='swish') - - def CSPResStage(self, child, ret): - self.current += 1 - - if child.conv_down is not None: - self.convolutional(child.conv_down, act='swish') - self.convolutional(child.conv1, act='swish') - self.route('-2') - self.convolutional(child.conv2, act='swish') - idx = -3 - for m in child.blocks: - self.convolutional(m.conv1, act='swish') - self.convolutional(m.conv2, act='swish') - self.shortcut(-3) - idx -= 3 - self.route('%d, -1' % idx) - if child.attn is not None: - self.reduce((1, 2), mode='mean', keepdim=True) - self.convolutional(child.attn.fc, act='hardsigmoid') - self.shortcut(-3, ew='mul') - self.convolutional(child.conv3, act='swish') - if ret is True: - self.backbone_outs.append(self.current) - - def CSPStage(self, child, stage): - self.current += 1 - - self.convolutional(child.conv1, act='swish') - self.route('-2') - self.convolutional(child.conv2, act='swish') - idx = -3 - for m in child.convs: - if m.__class__.__name__ == 'BasicBlock': - self.convolutional(m.conv1, act='swish') - self.convolutional(m.conv2, act='swish') - idx -= 2 - elif m.__class__.__name__ == 'SPP': - self.maxpool(m.pool0) - self.route('-2') - self.maxpool(m.pool1) - self.route('-4') - self.maxpool(m.pool2) - self.route('-6, -5, -3, -1') - self.convolutional(m.conv, act='swish') - idx -= 7 - self.route('%d, -1' % idx) - self.convolutional(child.conv3, act='swish') - if stage == 'fpn': - self.neck_fpn_feats.append(self.current) - elif stage == 'pan': - self.neck_pan_feats.append(self.current) - - def Concat(self, route): - self.current += 1 - - r = self.get_route(route) - self.route('-1, %d' % r) - - def Upsample(self): - self.current += 1 - - self.upsample() - - def AvgPool2d(self, route=None): - self.current += 1 - - if route is not None: - r = self.get_route(route) - self.route('%d' % r) - self.avgpool() - - def ESEAttn(self, child, route=0): - self.current += 1 - - if route < 0: - self.route('%d' % route) - self.convolutional(child.fc, act='sigmoid') - self.shortcut(route - 3, ew='mul') - self.convolutional(child.conv, act='swish') - if route == 0: - self.shortcut(-5) - - def Conv2D(self, child, act='linear'): - self.current += 1 - - self.convolutional(child, act=act) - - def Shuffle(self, reshape=None, transpose1=None, transpose2=None, output=''): - self.current += 1 - - self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2) - if output == 'cls': - self.yolo_head_cls.append(self.current) - elif output == 'reg': - self.yolo_head_reg.append(self.current) - - def SoftMax(self, axes): - self.current += 1 - - self.softmax(axes) - - def Detect(self, output): - self.current += 1 - - routes = self.yolo_head_cls if output == 'cls' else self.yolo_head_reg - - for i, route in enumerate(routes): - routes[i] = self.get_route(route) - self.route(str(routes)[1:-1], axis=-1) - self.yolo(output) - - def net(self): - lb = 'letter_box=1\n' if self.letter_box else '' - - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - lb) - - def convolutional(self, cv, act='linear'): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv.__class__.__name__ == 'Conv2D': - filters = cv._out_channels - size = cv._kernel_size - stride = cv._stride - pad = cv._padding - groups = cv._groups - bias = cv.bias - bn = False - else: - filters = cv.conv._out_channels - size = cv.conv._kernel_size - stride = cv.conv._stride - pad = cv.conv._padding - groups = cv.conv._groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def route(self, layers, axis=0): - self.blocks[self.current] += 1 - - a = 'axis=%d\n' % axis if axis != 0 else '' - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers + - a) - - def shortcut(self, r, ew='add', act='linear'): - self.blocks[self.current] += 1 - - m = 'mode=mul\n' if ew == 'mul' else '' - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - m + - 'activation=%s\n' % act) - - def reduce(self, dim, mode='mean', keepdim=False): - self.blocks[self.current] += 1 - - self.fc.write('\n[reduce]\n' + - 'mode=%s\n' % mode + - 'axes=%s\n' % str(dim)[1:-1] + - 'keep=%d\n' % keepdim) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.ksize - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def upsample(self): - self.blocks[self.current] += 1 - - stride = 2 - - self.fc.write('\n[upsample]\n' + - 'stride=%d\n' % stride) - - def avgpool(self): - self.blocks[self.current] += 1 - - self.fc.write('\n[avgpool]\n') - - def shuffle(self, reshape=None, transpose1=None, transpose2=None): - self.blocks[self.current] += 1 - - r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' - t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' - t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - - self.fc.write('\n[shuffle]\n' + - r + - t1 + - t2) - - def softmax(self, axes): - self.blocks[self.current] += 1 - - self.fc.write('\n[softmax]\n' + - 'axes=%d\n' % axes) - - def yolo(self, output): - self.blocks[self.current] += 1 - - self.fc.write('\n[%s]\n' % output) - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'alpha' not in k: - vr = v.reshape([-1]).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_anchors(self, anchor_points, stride_tensor): - vr = anchor_points.numpy() - self.fw.write('{} {} '.format('anchor_points', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - vr = stride_tensor.numpy() - self.fw.write('{} {} '.format('stride_tensor', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - -def export_model(): - paddle.set_device('cpu') - - FLAGS = parse_args() - - cfg = load_config(FLAGS.config) - - FLAGS.opt['weights'] = FLAGS.weights - FLAGS.opt['exclude_nms'] = True - - if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn': - FLAGS.opt['norm_type'] = 'bn' - merge_config(FLAGS.opt) - - if FLAGS.slim_config: - cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test') - - merge_config(FLAGS.opt) - check_config(cfg) - check_version() - - trainer = Trainer(cfg, mode='test') - trainer.load_weights(cfg.weights) - - trainer.model.eval() - - if not os.path.exists('.tmp'): - os.makedirs('.tmp') - - static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp') - - os.system('rm -r .tmp') - - return cfg, static_model - - -def parse_args(): - parser = ArgsParser() - parser.add_argument('-w', '--weights', required=True, type=str, help='Input weights (.pdparams) file path (required)') - parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method') - args = parser.parse_args() - return args - - -cfg, model = export_model() - -model_name = cfg.filename -inference_size = (cfg.eval_height, cfg.eval_width) -letter_box = False - -for sample_transforms in cfg['EvalReader']['sample_transforms']: - if 'Resize' in sample_transforms: - letter_box = sample_transforms['Resize']['keep_ratio'] - -backbone = cfg[cfg.architecture]['backbone'] -neck = cfg[cfg.architecture]['neck'] -yolo_head = cfg[cfg.architecture]['yolo_head'] - -wts_file = model_name + '.wts' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'ppyoloe' in model_name else 'ppyoloe_' + model_name + '.cfg' - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(inference_size, fw, fc, letter_box) - - if backbone == 'CSPResNet': - layers.fc.write('\n# CSPResNet\n') - - for child in model.backbone.stem: - layers.ConvBNLayer(child) - for i, child in enumerate(model.backbone.stages): - ret = True if i in model.backbone.return_idx else False - layers.CSPResStage(child, ret) - else: - raise SystemExit('Model not supported') - - if neck == 'CustomCSPPAN': - layers.fc.write('\n# CustomCSPPAN\n') - - blocks = layers.backbone_outs[::-1] - for i, block in enumerate(blocks): - if i > 0: - layers.Concat(block) - layers.CSPStage(model.neck.fpn_stages[i][0], 'fpn') - if i < model.neck.num_blocks - 1: - layers.ConvBNLayer(model.neck.fpn_routes[i]) - layers.Upsample() - layers.neck_pan_feats = [layers.neck_fpn_feats[-1], ] - for i in reversed(range(model.neck.num_blocks - 1)): - layers.ConvBNLayer(model.neck.pan_routes[i]) - layers.Concat(layers.neck_fpn_feats[i]) - layers.CSPStage(model.neck.pan_stages[i][0], 'pan') - layers.neck_pan_feats = layers.neck_pan_feats[::-1] - else: - raise SystemExit('Model not supported') - - if yolo_head == 'PPYOLOEHead': - layers.fc.write('\n# PPYOLOEHead\n') - - reg_max = model.yolo_head.reg_max + 1 if hasattr(model.yolo_head, 'reg_max') else model.yolo_head.reg_range[1] - - for i, feat in enumerate(layers.neck_pan_feats): - if i > 0: - layers.AvgPool2d(route=feat) - else: - layers.AvgPool2d() - layers.ESEAttn(model.yolo_head.stem_cls[i]) - layers.Conv2D(model.yolo_head.pred_cls[i], act='sigmoid') - layers.Shuffle(reshape=[model.yolo_head.num_classes, 'hw'], output='cls') - layers.ESEAttn(model.yolo_head.stem_reg[i], route=-7) - layers.Conv2D(model.yolo_head.pred_reg[i]) - layers.Shuffle(reshape=[4, reg_max, 'hw'], transpose2=[1, 0, 2]) - layers.SoftMax(0) - layers.Conv2D(model.yolo_head.proj_conv) - layers.Shuffle(reshape=['h', 'w'], output='reg') - layers.Detect('cls') - layers.Detect('reg') - layers.get_anchors(model.yolo_head.anchor_points.reshape([-1]), model.yolo_head.stride_tensor) - - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yoloV5.py b/utils/gen_wts_yoloV5.py deleted file mode 100644 index d19a2b8..0000000 --- a/utils/gen_wts_yoloV5.py +++ /dev/null @@ -1,394 +0,0 @@ -import argparse -import os -import struct -import torch -from utils.torch_utils import select_device - - -class Layers(object): - def __init__(self, n, size, fw, fc): - self.blocks = [0 for _ in range(n)] - self.current = 0 - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - - self.num = 0 - self.nc = 0 - self.anchors = '' - self.masks = [] - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def Focus(self, child): - self.current = child.i - self.fc.write('\n# Focus\n') - - self.reorg() - self.convolutional(child.conv) - - def Conv(self, child): - self.current = child.i - self.fc.write('\n# Conv\n') - - self.convolutional(child) - - def BottleneckCSP(self, child): - self.current = child.i - self.fc.write('\n# BottleneckCSP\n') - - self.convolutional(child.cv2) - self.route('-2') - self.convolutional(child.cv1) - idx = -3 - for m in child.m: - if m.add: - self.convolutional(m.cv1) - self.convolutional(m.cv2) - self.shortcut(-3) - idx -= 3 - else: - self.convolutional(m.cv1) - self.convolutional(m.cv2) - idx -= 2 - self.convolutional(child.cv3) - self.route('-1, %d' % (idx - 1)) - self.batchnorm(child.bn, child.act) - self.convolutional(child.cv4) - - def C3(self, child): - self.current = child.i - self.fc.write('\n# C3\n') - - self.convolutional(child.cv2) - self.route('-2') - self.convolutional(child.cv1) - idx = -3 - for m in child.m: - if m.add: - self.convolutional(m.cv1) - self.convolutional(m.cv2) - self.shortcut(-3) - idx -= 3 - else: - self.convolutional(m.cv1) - self.convolutional(m.cv2) - idx -= 2 - self.route('-1, %d' % idx) - self.convolutional(child.cv3) - - def SPP(self, child): - self.current = child.i - self.fc.write('\n# SPP\n') - - self.convolutional(child.cv1) - self.maxpool(child.m[0]) - self.route('-2') - self.maxpool(child.m[1]) - self.route('-4') - self.maxpool(child.m[2]) - self.route('-6, -5, -3, -1') - self.convolutional(child.cv2) - - def SPPF(self, child): - self.current = child.i - self.fc.write('\n# SPPF\n') - - self.convolutional(child.cv1) - self.maxpool(child.m) - self.maxpool(child.m) - self.maxpool(child.m) - self.route('-4, -3, -2, -1') - self.convolutional(child.cv2) - - def Upsample(self, child): - self.current = child.i - self.fc.write('\n# Upsample\n') - - self.upsample(child) - - def Concat(self, child): - self.current = child.i - self.fc.write('\n# Concat\n') - - r = [] - for i in range(1, len(child.f)): - r.append(self.get_route(child.f[i])) - self.route('-1, %s' % str(r)[1:-1]) - - def Detect(self, child): - self.current = child.i - self.fc.write('\n# Detect\n') - - self.get_anchors(child.state_dict(), child.m[0].out_channels) - - for i, m in enumerate(child.m): - r = self.get_route(child.f[i]) - self.route('%d' % r) - self.convolutional(m, detect=True) - self.yolo(i) - - def net(self): - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - 'letter_box=1\n') - - def CBH(self, child): - self.current = child.i - self.fc.write('\n# CBH\n') - - self.convolutional(child.conv, act='hardswish') - - def LC_Block(self, child): - self.current = child.i - self.fc.write('\n# LC_Block\n') - - self.convolutional(child.dw_conv, act='hardswish') - if child.use_se: - self.avgpool() - self.convolutional(child.se.conv1, act='relu') - self.convolutional(child.se.conv2, act='silu') - self.shortcut(-4, ew='mul') - self.convolutional(child.pw_conv, act='hardswish') - - def Dense(self, child): - self.current = child.i - self.fc.write('\n# Dense\n') - - self.convolutional(child.dense_conv, act='hardswish') - - def reorg(self): - self.blocks[self.current] += 1 - - self.fc.write('\n[reorg]\n') - - def convolutional(self, cv, act=None, detect=False): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv._get_name() == 'Conv2d': - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - bn = False - act = 'linear' if not detect else 'logistic' - else: - filters = cv.conv.out_channels - size = cv.conv.kernel_size - stride = cv.conv.stride - pad = cv.conv.padding - groups = cv.conv.groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - if act is None: - act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def batchnorm(self, bn, act): - self.blocks[self.current] += 1 - - self.get_state_dict(bn.state_dict()) - - filters = bn.num_features - act = self.get_activation(act._get_name()) - - self.fc.write('\n[batchnorm]\n' + - 'filters=%d\n' % filters + - 'activation=%s\n' % act) - - def route(self, layers): - self.blocks[self.current] += 1 - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers) - - def shortcut(self, r, ew='add', act='linear'): - self.blocks[self.current] += 1 - - m = 'mode=mul\n' if ew == 'mul' else '' - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - m + - 'activation=%s\n' % act) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.kernel_size - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def upsample(self, child): - self.blocks[self.current] += 1 - - stride = child.scale_factor - - self.fc.write('\n[upsample]\n' + - 'stride=%d\n' % stride) - - def avgpool(self): - self.blocks[self.current] += 1 - - self.fc.write('\n[avgpool]\n') - - def yolo(self, i): - self.blocks[self.current] += 1 - - self.fc.write('\n[yolo]\n' + - 'mask=%s\n' % self.masks[i] + - 'anchors=%s\n' % self.anchors + - 'classes=%d\n' % self.nc + - 'num=%d\n' % self.num + - 'scale_x_y=2.0\n' + - 'new_coords=1\n') - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_anchors(self, state_dict, out_channels): - anchor_grid = state_dict['anchor_grid'] - aa = anchor_grid.reshape(-1).tolist() - am = anchor_grid.tolist() - - self.num = (len(aa) / 2) - self.nc = int((out_channels / (self.num / len(am))) - 5) - self.anchors = str(aa)[1:-1] - - n = 0 - for m in am: - mask = [] - for _ in range(len(m)): - mask.append(n) - n += 1 - self.masks.append(str(mask)[1:-1]) - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - if n < 0: - for i, b in enumerate(self.blocks[self.current-1::-1]): - if i < abs(n) - 1: - r -= b - else: - break - else: - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - def get_activation(self, act): - if act == 'Hardswish': - return 'hardswish' - elif act == 'LeakyReLU': - return 'leaky' - elif act == 'SiLU': - return 'silu' - return 'linear' - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOv5 conversion') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument( - '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') - parser.add_argument("--p6", action="store_true", help="P6 model") - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - if not args.size: - args.size = [1280] if args.p6 else [640] - return args.weights, args.size - - -pt_file, inference_size = parse_args() - -model_name = os.path.basename(pt_file).split('.pt')[0] -wts_file = model_name + '.wts' if 'yolov5' in model_name else 'yolov5_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'yolov5' in model_name else 'yolov5_' + model_name + '.cfg' - -device = select_device('cpu') -model = torch.load(pt_file, map_location=device)['model'].float() - -anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None] -delattr(model.model[-1], 'anchor_grid') -model.model[-1].register_buffer('anchor_grid', anchor_grid) - -model.to(device).eval() - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(len(model.model), inference_size, fw, fc) - - for child in model.model.children(): - if child._get_name() == 'Focus': - layers.Focus(child) - elif child._get_name() == 'Conv': - layers.Conv(child) - elif child._get_name() == 'BottleneckCSP': - layers.BottleneckCSP(child) - elif child._get_name() == 'C3': - layers.C3(child) - elif child._get_name() == 'SPP': - layers.SPP(child) - elif child._get_name() == 'SPPF': - layers.SPPF(child) - elif child._get_name() == 'Upsample': - layers.Upsample(child) - elif child._get_name() == 'Concat': - layers.Concat(child) - elif child._get_name() == 'Detect': - layers.Detect(child) - elif child._get_name() == 'CBH': - layers.CBH(child) - elif child._get_name() == 'LC_Block': - layers.LC_Block(child) - elif child._get_name() == 'Dense': - layers.Dense(child) - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yoloV6.py b/utils/gen_wts_yoloV6.py deleted file mode 100644 index 91501b4..0000000 --- a/utils/gen_wts_yoloV6.py +++ /dev/null @@ -1,588 +0,0 @@ -import argparse -import os -import struct -import torch -from yolov6.assigners.anchor_generator import generate_anchors - - -class Layers(object): - def __init__(self, size, fw, fc): - self.blocks = [0 for _ in range(300)] - self.current = -1 - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - - self.backbone_outs = [] - self.fpn_feats = [] - self.pan_feats = [] - self.yolo_head_cls = [] - self.yolo_head_reg = [] - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def BaseConv(self, child): - self.current += 1 - - if child._get_name() == 'RepVGGBlock': - self.convolutional(child.rbr_reparam, act=self.get_activation(child.nonlinearity._get_name())) - elif child._get_name() == 'ConvWrapper' or child._get_name() == 'SimConvWrapper': - self.convolutional(child.block) - else: - raise SystemExit('Model not supported') - - def RepBlock(self, child, stage=''): - self.current += 1 - - if child.conv1._get_name() == 'RepVGGBlock': - self.convolutional(child.conv1.rbr_reparam, act=self.get_activation(child.conv1.nonlinearity._get_name())) - if child.block is not None: - for m in child.block: - self.convolutional(m.rbr_reparam, act=self.get_activation(m.nonlinearity._get_name())) - elif child.conv1._get_name() == 'ConvWrapper' or child.conv1._get_name() == 'SimConvWrapper': - self.convolutional(child.conv1.block) - if child.block is not None: - for m in child.block: - self.convolutional(m.block) - else: - raise SystemExit('Model not supported') - - if stage == 'backbone': - self.backbone_outs.append(self.current) - elif stage == 'pan': - self.pan_feats.append(self.current) - - def BepC3(self, child, stage=''): - self.current += 1 - - if child.concat is True: - self.convolutional(child.cv2) - self.route('-2') - self.convolutional(child.cv1) - idx = -3 - if child.m.conv1.conv1._get_name() == 'RepVGGBlock': - self.convolutional(child.m.conv1.conv1.rbr_reparam, - act=self.get_activation(child.m.conv1.conv1.nonlinearity._get_name())) - self.convolutional(child.m.conv1.conv2.rbr_reparam, - act=self.get_activation(child.m.conv1.conv2.nonlinearity._get_name())) - idx -= 2 - if child.m.conv1.shortcut: - self.shortcut(-3) - idx -= 1 - if child.m.block is not None: - for m in child.m.block: - self.convolutional(m.conv1.rbr_reparam, act=self.get_activation(m.conv1.nonlinearity._get_name())) - self.convolutional(m.conv2.rbr_reparam, act=self.get_activation(m.conv2.nonlinearity._get_name())) - idx -= 2 - if m.shortcut: - self.shortcut(-3) - idx -= 1 - elif child.m.conv1.conv1._get_name() == 'ConvWrapper' or child.m.conv1.conv1._get_name() == 'SimConvWrapper': - self.convolutional(child.m.conv1.conv1.block) - self.convolutional(child.m.conv1.conv2.block) - idx -= 2 - if child.m.conv1.shortcut: - self.shortcut(-3) - idx -= 1 - if child.m.block is not None: - for m in child.m.block: - self.convolutional(m.conv1.block) - self.convolutional(m.conv2.block) - idx -= 2 - if m.shortcut: - self.shortcut(-3) - idx -= 1 - else: - raise SystemExit('Model not supported') - - if child.concat is True: - self.route('-1, %d' % idx) - self.convolutional(child.cv3) - - if stage == 'backbone': - self.backbone_outs.append(self.current) - elif stage == 'pan': - self.pan_feats.append(self.current) - - def CSPSPPF(self, child): - self.current += 1 - - self.convolutional(child.cv2) - self.route('-2') - self.convolutional(child.cv1) - self.convolutional(child.cv3) - self.convolutional(child.cv4) - self.maxpool(child.m) - self.maxpool(child.m) - self.maxpool(child.m) - self.route('-4, -3, -2, -1') - self.convolutional(child.cv5) - self.convolutional(child.cv6) - self.route('-11, -1') - self.convolutional(child.cv7) - self.backbone_outs.append(self.current) - - def SPPF(self, child): - self.current += 1 - - self.convolutional(child.cv1) - self.maxpool(child.m) - self.maxpool(child.m) - self.maxpool(child.m) - self.route('-4, -3, -2, -1') - self.convolutional(child.cv2) - self.backbone_outs.append(self.current) - - def SimConv(self, child, stage=''): - self.current += 1 - - self.convolutional(child) - if stage == 'fpn': - self.fpn_feats.append(self.current) - - def BiFusion(self, child, idx): - self.current += 1 - - self.deconvolutional(child.upsample.upsample_transpose) - r = self.get_route(self.backbone_outs[- idx -2]) - self.route('%d' % r) - self.convolutional(child.cv1) - r = self.get_route(self.backbone_outs[- idx -3]) - self.route('%d' % r) - self.convolutional(child.cv2) - self.convolutional(child.downsample) - self.route('-6, -4, -1') - self.convolutional(child.cv3) - - def Upsample(self, child): - self.current += 1 - - self.deconvolutional(child.upsample_transpose) - - def Conv(self, child, act=None): - self.current += 1 - - self.convolutional(child, act=act) - - def Concat(self, route): - self.current += 1 - - r = self.get_route(route) - self.route('-1, %d' % r) - - def Route(self, route): - self.current += 1 - - if route > 0: - r = self.get_route(route) - self.route('%d' % r) - else: - self.route('%d' % route) - - def Shuffle(self, reshape=None, transpose1=None, transpose2=None, output=''): - self.current += 1 - - self.shuffle(reshape=reshape, transpose1=transpose1, transpose2=transpose2) - if output == 'cls': - self.yolo_head_cls.append(self.current) - elif output == 'reg': - self.yolo_head_reg.append(self.current) - - def SoftMax(self, axes): - self.current += 1 - - self.softmax(axes) - - def Detect(self, output): - self.current += 1 - - routes = self.yolo_head_cls if output == 'cls' else self.yolo_head_reg - - for i, route in enumerate(routes): - routes[i] = self.get_route(route) - self.route(str(routes)[1:-1], axis=-1) - self.yolo(output) - - def net(self): - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - 'letter_box=1\n') - - def convolutional(self, cv, act=None, detect=False): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv._get_name() == 'Conv2d': - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - bn = False - act = act if act is not None else 'linear' - else: - filters = cv.conv.out_channels - size = cv.conv.kernel_size - stride = cv.conv.stride - pad = cv.conv.padding - groups = cv.conv.groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - if act is None: - act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def deconvolutional(self, cv): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=0\n' if bias is None else '' - - self.fc.write('\n[deconvolutional]\n' + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w) - - def route(self, layers, axis=0): - self.blocks[self.current] += 1 - - a = 'axis=%d\n' % axis if axis != 0 else '' - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers + - a) - - def shortcut(self, r, ew='add', act='linear'): - self.blocks[self.current] += 1 - - m = 'mode=mul\n' if ew == 'mul' else '' - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - m + - 'activation=%s\n' % act) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.kernel_size - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def shuffle(self, reshape=None, transpose1=None, transpose2=None): - self.blocks[self.current] += 1 - - r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' - t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' - t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - - self.fc.write('\n[shuffle]\n' + - r + - t1 + - t2) - - def softmax(self, axes): - self.blocks[self.current] += 1 - - self.fc.write('\n[softmax]\n' + - 'axes=%d\n' % axes) - - def yolo(self, output): - self.blocks[self.current] += 1 - - self.fc.write('\n[%s]\n' % output) - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_anchors(self, anchor_points, stride_tensor): - vr = anchor_points.numpy() - self.fw.write('{} {} '.format('anchor_points', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - vr = stride_tensor.numpy() - self.fw.write('{} {} '.format('stride_tensor', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - def get_activation(self, act): - if act == 'Hardswish': - return 'hardswish' - elif act == 'LeakyReLU': - return 'leaky' - elif act == 'SiLU': - return 'silu' - elif act == 'ReLU': - return 'relu' - return 'linear' - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOv6 conversion') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument( - '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') - parser.add_argument("--p6", action="store_true", help="P6 model") - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - if not args.size: - args.size = [1280] if args.p6 else [640] - return args.weights, args.size - - -pt_file, inference_size = parse_args() - -model_name = os.path.basename(pt_file).split('.pt')[0] -wts_file = model_name + '.wts' if 'yolov6' in model_name else 'yolov6_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'yolov6' in model_name else 'yolov6_' + model_name + '.cfg' - -model = torch.load(pt_file, map_location='cpu')['model'].float() -model.to('cpu').eval() - -for layer in model.modules(): - if layer._get_name() == 'RepVGGBlock': - layer.switch_to_deploy() - -backbones = ['EfficientRep', 'CSPBepBackbone'] -necks = ['RepBiFPANNeck', 'CSPRepBiFPANNeck', 'RepPANNeck', 'CSPRepPANNeck'] -backbones_p6 = ['EfficientRep6', 'CSPBepBackbone_P6'] -necks_p6 = ['RepBiFPANNeck6', 'CSPRepBiFPANNeck_P6', 'RepPANNeck6', 'CSPRepPANNeck_P6'] - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(inference_size, fw, fc) - - if model.backbone._get_name() in backbones: - layers.fc.write('\n# %s\n' % model.backbone._get_name()) - - if model.backbone._get_name() == 'EfficientRep': - block1 = layers.RepBlock - elif model.backbone._get_name() == 'CSPBepBackbone': - block1 = layers.BepC3 - - if model.backbone.ERBlock_5[2]._get_name() == 'CSPSPPF' or model.backbone.ERBlock_5[2]._get_name() == 'SimCSPSPPF': - block2 = layers.CSPSPPF - elif model.backbone.ERBlock_5[2]._get_name() == 'SPPF' or model.backbone.ERBlock_5[2]._get_name() == 'SimSPPF': - block2 = layers.SPPF - else: - raise SystemExit('Model not supported') - - layers.BaseConv(model.backbone.stem) - layers.BaseConv(model.backbone.ERBlock_2[0]) - block1(model.backbone.ERBlock_2[1], 'backbone' if hasattr(model.backbone, 'fuse_P2') and - model.backbone.fuse_P2 else '') - layers.BaseConv(model.backbone.ERBlock_3[0]) - block1(model.backbone.ERBlock_3[1], 'backbone') - layers.BaseConv(model.backbone.ERBlock_4[0]) - block1(model.backbone.ERBlock_4[1], 'backbone') - layers.BaseConv(model.backbone.ERBlock_5[0]) - block1(model.backbone.ERBlock_5[1]) - block2(model.backbone.ERBlock_5[2]) - - elif model.backbone._get_name() in backbones_p6: - layers.fc.write('\n# %s\n' % model.backbone._get_name()) - - if model.backbone._get_name() == 'EfficientRep6': - block1 = layers.RepBlock - elif model.backbone._get_name() == 'CSPBepBackbone_P6': - block1 = layers.BepC3 - - if model.backbone.ERBlock_6[2]._get_name() == 'CSPSPPF' or model.backbone.ERBlock_6[2]._get_name() == 'SimCSPSPPF': - block2 = layers.CSPSPPF - elif model.backbone.ERBlock_6[2]._get_name() == 'SPPF' or model.backbone.ERBlock_6[2]._get_name() == 'SimSPPF': - block2 = layers.SPPF - else: - raise SystemExit('Model not supported') - - layers.BaseConv(model.backbone.stem) - layers.BaseConv(model.backbone.ERBlock_2[0]) - block1(model.backbone.ERBlock_2[1], 'backbone' if model.backbone._get_name() == 'CSPBepBackbone_P6' or - (hasattr(model.backbone, 'fuse_P2') and model.backbone.fuse_P2) else '') - layers.BaseConv(model.backbone.ERBlock_3[0]) - block1(model.backbone.ERBlock_3[1], 'backbone') - layers.BaseConv(model.backbone.ERBlock_4[0]) - block1(model.backbone.ERBlock_4[1], 'backbone') - layers.BaseConv(model.backbone.ERBlock_5[0]) - block1(model.backbone.ERBlock_5[1], 'backbone') - layers.BaseConv(model.backbone.ERBlock_6[0]) - block1(model.backbone.ERBlock_6[1]) - block2(model.backbone.ERBlock_6[2]) - - else: - raise SystemExit('Model not supported') - - if model.neck._get_name() in necks: - layers.fc.write('\n# %s\n' % model.neck._get_name()) - - if model.neck._get_name() == 'RepBiFPANNeck' or model.neck._get_name() == 'RepPANNeck': - block = layers.RepBlock - elif model.neck._get_name() == 'CSPRepBiFPANNeck' or model.neck._get_name() == 'CSPRepPANNeck': - block = layers.BepC3 - - layers.SimConv(model.neck.reduce_layer0, 'fpn') - if 'Bi' in model.neck._get_name(): - layers.BiFusion(model.neck.Bifusion0, 0) - else: - layers.Upsample(model.neck.upsample0) - layers.Concat(layers.backbone_outs[-2]) - block(model.neck.Rep_p4) - layers.SimConv(model.neck.reduce_layer1, 'fpn') - if 'Bi' in model.neck._get_name(): - layers.BiFusion(model.neck.Bifusion1, 1) - else: - layers.Upsample(model.neck.upsample1) - layers.Concat(layers.backbone_outs[-3]) - block(model.neck.Rep_p3, 'pan') - layers.SimConv(model.neck.downsample2) - layers.Concat(layers.fpn_feats[1]) - block(model.neck.Rep_n3, 'pan') - layers.SimConv(model.neck.downsample1) - layers.Concat(layers.fpn_feats[0]) - block(model.neck.Rep_n4, 'pan') - layers.pan_feats = layers.pan_feats[::-1] - - elif model.neck._get_name() in necks_p6: - layers.fc.write('\n# %s\n' % model.neck._get_name()) - - if model.neck._get_name() == 'RepBiFPANNeck6' or model.neck._get_name() == 'RepPANNeck6': - block = layers.RepBlock - elif model.neck._get_name() == 'CSPRepBiFPANNeck_P6' or model.neck._get_name() == 'CSPRepPANNeck_P6': - block = layers.BepC3 - - layers.SimConv(model.neck.reduce_layer0, 'fpn') - if 'Bi' in model.neck._get_name(): - layers.BiFusion(model.neck.Bifusion0, 0) - else: - layers.Upsample(model.neck.upsample0) - layers.Concat(layers.backbone_outs[-2]) - block(model.neck.Rep_p5) - layers.SimConv(model.neck.reduce_layer1, 'fpn') - if 'Bi' in model.neck._get_name(): - layers.BiFusion(model.neck.Bifusion1, 1) - else: - layers.Upsample(model.neck.upsample1) - layers.Concat(layers.backbone_outs[-3]) - block(model.neck.Rep_p4) - layers.SimConv(model.neck.reduce_layer2, 'fpn') - if 'Bi' in model.neck._get_name(): - layers.BiFusion(model.neck.Bifusion2, 2) - else: - layers.Upsample(model.neck.upsample2) - layers.Concat(layers.backbone_outs[-4]) - block(model.neck.Rep_p3, 'pan') - layers.SimConv(model.neck.downsample2) - layers.Concat(layers.fpn_feats[2]) - block(model.neck.Rep_n4, 'pan') - layers.SimConv(model.neck.downsample1) - layers.Concat(layers.fpn_feats[1]) - block(model.neck.Rep_n5, 'pan') - layers.SimConv(model.neck.downsample0) - layers.Concat(layers.fpn_feats[0]) - block(model.neck.Rep_n6, 'pan') - layers.pan_feats = layers.pan_feats[::-1] - - else: - raise SystemExit('Model not supported') - - if model.detect._get_name() == 'Detect': - layers.fc.write('\n# Detect\n') - - for i, feat in enumerate(layers.pan_feats): - idx = len(layers.pan_feats) - i - 1 - if i > 0: - layers.Route(feat) - layers.Conv(model.detect.stems[idx]) - layers.Conv(model.detect.cls_convs[idx]) - layers.Conv(model.detect.cls_preds[idx], act='sigmoid') - layers.Shuffle(reshape=[model.detect.nc, 'hw'], output='cls') - layers.Route(-4) - layers.Conv(model.detect.reg_convs[idx]) - layers.Conv(model.detect.reg_preds[idx]) - if model.detect.use_dfl: - layers.Shuffle(reshape=[4, model.detect.reg_max + 1, 'hw'], transpose2=[1, 0, 2]) - layers.SoftMax(0) - layers.Conv(model.detect.proj_conv) - layers.Shuffle(reshape=['h', 'w'], output='reg') - else: - layers.Shuffle(reshape=[4, 'hw'], output='reg') - layers.Detect('cls') - layers.Detect('reg') - - x = [] - for stride in model.detect.stride.tolist()[::-1]: - x.append(torch.zeros([1, 1, int(layers.height / stride), int(layers.width / stride)], dtype=torch.float32)) - anchor_points, stride_tensor = generate_anchors(x, model.detect.stride.flip((0,)), model.detect.grid_cell_size, - model.detect.grid_cell_offset, device='cpu', is_eval=True, mode='af') - layers.get_anchors(anchor_points.reshape([-1]), stride_tensor) - - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yoloV7.py b/utils/gen_wts_yoloV7.py deleted file mode 100644 index afbcc3c..0000000 --- a/utils/gen_wts_yoloV7.py +++ /dev/null @@ -1,357 +0,0 @@ -import argparse -import os -import struct -import torch -from utils.torch_utils import select_device - - -class Layers(object): - def __init__(self, n, size, fw, fc): - self.blocks = [0 for _ in range(n)] - self.current = 0 - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - - self.num = 0 - self.nc = 0 - self.anchors = '' - self.masks = [] - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def ReOrg(self, child): - self.current = child.i - self.fc.write('\n# ReOrg\n') - - self.reorg() - - def Conv(self, child): - self.current = child.i - self.fc.write('\n# Conv\n') - - if child.f != -1: - r = self.get_route(child.f) - self.route('%d' % r) - self.convolutional(child) - - def DownC(self, child): - self.current = child.i - self.fc.write('\n# DownC\n') - - self.maxpool(child.mp) - self.convolutional(child.cv3) - self.route('-3') - self.convolutional(child.cv1) - self.convolutional(child.cv2) - self.route('-1, -4') - - def MP(self, child): - self.current = child.i - self.fc.write('\n# MP\n') - - self.maxpool(child.m) - - def SP(self, child): - self.current = child.i - self.fc.write('\n# SP\n') - - if child.f != -1: - r = self.get_route(child.f) - self.route('%d' % r) - self.maxpool(child.m) - - def SPPCSPC(self, child): - self.current = child.i - self.fc.write('\n# SPPCSPC\n') - - self.convolutional(child.cv2) - self.route('-2') - self.convolutional(child.cv1) - self.convolutional(child.cv3) - self.convolutional(child.cv4) - self.maxpool(child.m[0]) - self.route('-2') - self.maxpool(child.m[1]) - self.route('-4') - self.maxpool(child.m[2]) - self.route('-6, -5, -3, -1') - self.convolutional(child.cv5) - self.convolutional(child.cv6) - self.route('-1, -13') - self.convolutional(child.cv7) - - def RepConv(self, child): - self.current = child.i - self.fc.write('\n# RepConv\n') - - if child.f != -1: - r = self.get_route(child.f) - self.route('%d' % r) - self.convolutional(child.rbr_1x1) - self.route('-2') - self.convolutional(child.rbr_dense) - self.shortcut(-3, act=self.get_activation(child.act._get_name())) - - def Upsample(self, child): - self.current = child.i - self.fc.write('\n# Upsample\n') - - self.upsample(child) - - def Concat(self, child): - self.current = child.i - self.fc.write('\n# Concat\n') - - r = [] - for i in range(1, len(child.f)): - r.append(self.get_route(child.f[i])) - self.route('-1, %s' % str(r)[1:-1]) - - def Shortcut(self, child): - self.current = child.i - self.fc.write('\n# Shortcut\n') - - r = self.get_route(child.f[1]) - self.shortcut(r) - - def Detect(self, child): - self.current = child.i - self.fc.write('\n# Detect\n') - - self.get_anchors(child.state_dict(), child.m[0].out_channels) - - for i, m in enumerate(child.m): - r = self.get_route(child.f[i]) - self.route('%d' % r) - self.convolutional(m, detect=True) - self.yolo(i) - - def net(self): - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - 'letter_box=1\n') - - def reorg(self): - self.blocks[self.current] += 1 - - self.fc.write('\n[reorg]\n') - - def convolutional(self, cv, act=None, detect=False): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv._get_name() == 'Conv2d': - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - bn = False - act = 'linear' if not detect else 'logistic' - elif cv._get_name() == 'Sequential': - filters = cv[0].out_channels - size = cv[0].kernel_size - stride = cv[0].stride - pad = cv[0].padding - groups = cv[0].groups - bias = cv[0].bias - bn = True if cv[1]._get_name() == 'BatchNorm2d' else False - act = 'linear' - else: - filters = cv.conv.out_channels - size = cv.conv.kernel_size - stride = cv.conv.stride - pad = cv.conv.padding - groups = cv.conv.groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - if act is None: - act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def route(self, layers): - self.blocks[self.current] += 1 - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers) - - def shortcut(self, r, act='linear'): - self.blocks[self.current] += 1 - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - 'activation=%s\n' % act) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.kernel_size - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def upsample(self, child): - self.blocks[self.current] += 1 - - stride = child.scale_factor - - self.fc.write('\n[upsample]\n' + - 'stride=%d\n' % stride) - - def yolo(self, i): - self.blocks[self.current] += 1 - - self.fc.write('\n[yolo]\n' + - 'mask=%s\n' % self.masks[i] + - 'anchors=%s\n' % self.anchors + - 'classes=%d\n' % self.nc + - 'num=%d\n' % self.num + - 'scale_x_y=2.0\n' + - 'new_coords=1\n') - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_anchors(self, state_dict, out_channels): - anchor_grid = state_dict['anchor_grid'] - aa = anchor_grid.reshape(-1).tolist() - am = anchor_grid.tolist() - - self.num = (len(aa) / 2) - self.nc = int((out_channels / (self.num / len(am))) - 5) - self.anchors = str(aa)[1:-1] - - n = 0 - for m in am: - mask = [] - for _ in range(len(m)): - mask.append(n) - n += 1 - self.masks.append(str(mask)[1:-1]) - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - if n < 0: - for i, b in enumerate(self.blocks[self.current-1::-1]): - if i < abs(n) - 1: - r -= b - else: - break - else: - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - def get_activation(self, act): - if act == 'Hardswish': - return 'hardswish' - elif act == 'LeakyReLU': - return 'leaky' - elif act == 'SiLU': - return 'silu' - return 'linear' - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOv7 conversion') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument( - '-s', '--size', nargs='+', type=int, help='Inference size [H,W] (default [640])') - parser.add_argument("--p6", action="store_true", help="P6 model") - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - if not args.size: - args.size = [1280] if args.p6 else [640] - return args.weights, args.size - - -pt_file, inference_size = parse_args() - -model_name = os.path.basename(pt_file).split('.pt')[0] -wts_file = model_name + '.wts' if 'yolov7' in model_name else 'yolov7_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'yolov7' in model_name else 'yolov7_' + model_name + '.cfg' - -device = select_device('cpu') -model = torch.load(pt_file, map_location=device) -model = model['ema' if model.get('ema') else 'model'].float() - -anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None] -delattr(model.model[-1], 'anchor_grid') -model.model[-1].register_buffer('anchor_grid', anchor_grid) - -model.to(device).eval() - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(len(model.model), inference_size, fw, fc) - - for child in model.model.children(): - if child._get_name() == 'ReOrg': - layers.ReOrg(child) - elif child._get_name() == 'Conv': - layers.Conv(child) - elif child._get_name() == 'DownC': - layers.DownC(child) - elif child._get_name() == 'MP': - layers.MP(child) - elif child._get_name() == 'SP': - layers.SP(child) - elif child._get_name() == 'SPPCSPC': - layers.SPPCSPC(child) - elif child._get_name() == 'RepConv': - layers.RepConv(child) - elif child._get_name() == 'Upsample': - layers.Upsample(child) - elif child._get_name() == 'Concat': - layers.Concat(child) - elif child._get_name() == 'Shortcut': - layers.Shortcut(child) - elif child._get_name() == 'Detect': - layers.Detect(child) - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yoloV8.py b/utils/gen_wts_yoloV8.py deleted file mode 100644 index eaeb11f..0000000 --- a/utils/gen_wts_yoloV8.py +++ /dev/null @@ -1,323 +0,0 @@ -import argparse -import os -import struct -import torch -from ultralytics.yolo.utils.torch_utils import select_device -from ultralytics.yolo.utils.tal import make_anchors - - -class Layers(object): - def __init__(self, n, size, fw, fc): - self.blocks = [0 for _ in range(n)] - self.current = -1 - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def Conv(self, child): - self.current = child.i - self.fc.write('\n# Conv\n') - - self.convolutional(child) - - def C2f(self, child): - self.current = child.i - self.fc.write('\n# C2f\n') - - self.convolutional(child.cv1) - self.c2f(child.m) - self.convolutional(child.cv2) - - def SPPF(self, child): - self.current = child.i - self.fc.write('\n# SPPF\n') - - self.convolutional(child.cv1) - self.maxpool(child.m) - self.maxpool(child.m) - self.maxpool(child.m) - self.route('-4, -3, -2, -1') - self.convolutional(child.cv2) - - def Upsample(self, child): - self.current = child.i - self.fc.write('\n# Upsample\n') - - self.upsample(child) - - def Concat(self, child): - self.current = child.i - self.fc.write('\n# Concat\n') - - r = [] - for i in range(1, len(child.f)): - r.append(self.get_route(child.f[i])) - self.route('-1, %s' % str(r)[1:-1]) - - def Detect(self, child): - self.current = child.i - self.fc.write('\n# Detect\n') - - output_idxs = [0 for _ in range(child.nl)] - for i in range(child.nl): - r = self.get_route(child.f[i]) - self.route('%d' % r) - for j in range(len(child.cv3[i])): - self.convolutional(child.cv3[i][j]) - self.route('%d' % (-1 - len(child.cv3[i]))) - for j in range(len(child.cv2[i])): - self.convolutional(child.cv2[i][j]) - self.route('-1, %d' % (-2 - len(child.cv2[i]))) - self.shuffle(reshape=[child.no, -1]) - output_idxs[i] = (-1 + i * (-4 - len(child.cv3[i]) - len(child.cv2[i]))) - self.route('%s' % str(output_idxs[::-1])[1:-1], axis=1) - self.yolo(child) - - def net(self): - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - 'letter_box=1\n') - - def convolutional(self, cv, act=None, detect=False): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv._get_name() == 'Conv2d': - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - bn = False - act = 'linear' if not detect else 'logistic' - else: - filters = cv.conv.out_channels - size = cv.conv.kernel_size - stride = cv.conv.stride - pad = cv.conv.padding - groups = cv.conv.groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - if act is None: - act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def c2f(self, m): - self.blocks[self.current] += 1 - - for x in m: - self.get_state_dict(x.state_dict()) - - n = len(m) - shortcut = 1 if m[0].add else 0 - filters = m[0].cv1.conv.out_channels - size = m[0].cv1.conv.kernel_size - stride = m[0].cv1.conv.stride - pad = m[0].cv1.conv.padding - groups = m[0].cv1.conv.groups - bias = m[0].cv1.conv.bias - bn = True if hasattr(m[0].cv1, 'bn') else False - act = 'linear' - if hasattr(m[0].cv1, 'act'): - act = self.get_activation(m[0].cv1.act._get_name()) - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[c2f]\n' + - 'n=%d\n' % n + - 'shortcut=%d\n' % shortcut + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def route(self, layers, axis=0): - self.blocks[self.current] += 1 - - a = 'axis=%d\n' % axis if axis != 0 else '' - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers + - a) - - def shortcut(self, r, ew='add', act='linear'): - self.blocks[self.current] += 1 - - m = 'mode=mul\n' if ew == 'mul' else '' - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - m + - 'activation=%s\n' % act) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.kernel_size - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def upsample(self, child): - self.blocks[self.current] += 1 - - stride = child.scale_factor - - self.fc.write('\n[upsample]\n' + - 'stride=%d\n' % stride) - - def shuffle(self, reshape=None, transpose1=None, transpose2=None): - self.blocks[self.current] += 1 - - r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' - t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' - t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - - self.fc.write('\n[shuffle]\n' + - r + - t1 + - t2) - - def yolo(self, child): - self.blocks[self.current] += 1 - - self.fc.write('\n[detect_v8]\n' + - 'num=%d\n' % (child.reg_max * 4) + - 'classes=%d\n' % child.nc) - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_anchors(self, anchor_points, stride_tensor): - vr = anchor_points.numpy() - self.fw.write('{} {} '.format('anchor_points', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - vr = stride_tensor.numpy() - self.fw.write('{} {} '.format('stride_tensor', len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - def get_activation(self, act): - if act == 'Hardswish': - return 'hardswish' - elif act == 'LeakyReLU': - return 'leaky' - elif act == 'SiLU': - return 'silu' - return 'linear' - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOv8 conversion') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument( - '-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - return args.weights, args.size - - -pt_file, inference_size = parse_args() - -model_name = os.path.basename(pt_file).split('.pt')[0] -wts_file = model_name + '.wts' if 'yolov8' in model_name else 'yolov8_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'yolov8' in model_name else 'yolov8_' + model_name + '.cfg' - -device = select_device('cpu') -model = torch.load(pt_file, map_location=device)['model'].float() -model.to(device).eval() - -if model.names and model.nc: - with open("labels.txt", 'w') as fw: - for i in range(model.nc): - fw.write(model.names[i] + '\n') - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(len(model.model), inference_size, fw, fc) - - for child in model.model.children(): - if child._get_name() == 'Conv': - layers.Conv(child) - elif child._get_name() == 'C2f': - layers.C2f(child) - elif child._get_name() == 'SPPF': - layers.SPPF(child) - elif child._get_name() == 'Upsample': - layers.Upsample(child) - elif child._get_name() == 'Concat': - layers.Concat(child) - elif child._get_name() == 'Detect': - layers.Detect(child) - x = [] - for stride in model.stride.tolist(): - x.append(torch.zeros([1, 1, int(layers.height / stride), int(layers.width / stride)], dtype=torch.float32)) - anchor_points, stride_tensor = (x.transpose(0, 1) for x in make_anchors(x, child.stride, 0.5)) - layers.get_anchors(anchor_points.reshape([-1]), stride_tensor.reshape([-1])) - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file)) diff --git a/utils/gen_wts_yolor.py b/utils/gen_wts_yolor.py deleted file mode 100644 index ffaf427..0000000 --- a/utils/gen_wts_yolor.py +++ /dev/null @@ -1,56 +0,0 @@ -import argparse -import os -import struct -import torch -from utils.torch_utils import select_device -from models.models import Darknet - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOR conversion (main branch)') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') - parser.add_argument('-c', '--cfg', default='', help='Input cfg (.cfg) file path') - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - if args.cfg != '' and not os.path.isfile(args.cfg): - raise SystemExit('Invalid cfg file') - return args.weights, args.cfg - - -pt_file, cfg_file = parse_args() - - -model_name = os.path.basename(pt_file).split('.pt')[0] -wts_file = model_name + '.wts' if 'yolor' in model_name else 'yolor_' + model_name + '.wts' -new_cfg_file = model_name + '.cfg' if 'yolor' in model_name else 'yolor_' + model_name + '.cfg' - -if cfg_file == '': - cfg_file = 'cfg/' + model_name + '.cfg' - if not os.path.isfile(cfg_file): - raise SystemExit('CFG file not found') -elif not os.path.isfile(cfg_file): - raise SystemExit('Invalid CFG file') - -device = select_device('cpu') -model = Darknet(cfg_file).to(device) -model.load_state_dict(torch.load(pt_file, map_location=device)['model']) -model.to(device).eval() - -with open(wts_file, 'w') as f: - wts_write = '' - conv_count = 0 - for k, v in model.state_dict().items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).cpu().numpy() - wts_write += '{} {} '.format(k, len(vr)) - for vv in vr: - wts_write += ' ' - wts_write += struct.pack('>f', float(vv)).hex() - wts_write += '\n' - conv_count += 1 - f.write('{}\n'.format(conv_count)) - f.write(wts_write) - -if not os.path.isfile(new_cfg_file): - os.system('cp %s %s' % (cfg_file, new_cfg_file)) diff --git a/utils/gen_wts_yolox.py b/utils/gen_wts_yolox.py deleted file mode 100644 index 179cef1..0000000 --- a/utils/gen_wts_yolox.py +++ /dev/null @@ -1,370 +0,0 @@ -import argparse -import os -import struct -import torch -from yolox.exp import get_exp - - -class Layers(object): - def __init__(self, size, fw, fc): - self.blocks = [0 for _ in range(300)] - self.current = -1 - - self.width = size[0] if len(size) == 1 else size[1] - self.height = size[0] - - self.backbone_outs = [] - self.fpn_feats = [] - self.pan_feats = [] - self.yolo_head = [] - - self.fw = fw - self.fc = fc - self.wc = 0 - - self.net() - - def Conv(self, child): - self.current += 1 - - if child._get_name() == 'DWConv': - self.convolutional(child.dconv) - self.convolutional(child.pconv) - else: - self.convolutional(child) - - def Focus(self, child): - self.current += 1 - - self.reorg() - self.convolutional(child.conv) - - def BaseConv(self, child, stage='', act=None): - self.current += 1 - - self.convolutional(child, act=act) - if stage == 'fpn': - self.fpn_feats.append(self.current) - - def CSPLayer(self, child, stage=''): - self.current += 1 - - self.convolutional(child.conv2) - self.route('-2') - self.convolutional(child.conv1) - idx = -3 - for m in child.m: - if m.use_add: - self.convolutional(m.conv1) - if m.conv2._get_name() == 'DWConv': - self.convolutional(m.conv2.dconv) - self.convolutional(m.conv2.pconv) - self.shortcut(-4) - idx -= 4 - else: - self.convolutional(m.conv2) - self.shortcut(-3) - idx -= 3 - else: - self.convolutional(m.conv1) - if m.conv2._get_name() == 'DWConv': - self.convolutional(m.conv2.dconv) - self.convolutional(m.conv2.pconv) - idx -= 3 - else: - self.convolutional(m.conv2) - idx -= 2 - self.route('-1, %d' % idx) - self.convolutional(child.conv3) - if stage == 'backbone': - self.backbone_outs.append(self.current) - elif stage == 'pan': - self.pan_feats.append(self.current) - - def SPPBottleneck(self, child): - self.current += 1 - - self.convolutional(child.conv1) - self.maxpool(child.m[0]) - self.route('-2') - self.maxpool(child.m[1]) - self.route('-4') - self.maxpool(child.m[2]) - self.route('-6, -5, -3, -1') - self.convolutional(child.conv2) - - def Upsample(self, child): - self.current += 1 - - self.upsample(child) - - def Concat(self, route): - self.current += 1 - - r = self.get_route(route) - self.route('-1, %d' % r) - - def Route(self, route): - self.current += 1 - - if route > 0: - r = self.get_route(route) - self.route('%d' % r) - else: - self.route('%d' % route) - - def RouteShuffleOut(self, route): - self.current += 1 - - self.route(route) - self.shuffle(reshape=['c', 'hw']) - self.yolo_head.append(self.current) - - def Detect(self, strides): - self.current += 1 - - routes = self.yolo_head[::-1] - - for i, route in enumerate(routes): - routes[i] = self.get_route(route) - self.route(str(routes)[1:-1], axis=1) - self.shuffle(transpose1=[1, 0]) - self.yolo(strides) - - def net(self): - self.fc.write('[net]\n' + - 'width=%d\n' % self.width + - 'height=%d\n' % self.height + - 'channels=3\n' + - 'letter_box=1\n') - - def reorg(self): - self.blocks[self.current] += 1 - - self.fc.write('\n[reorg]\n') - - def convolutional(self, cv, act=None, detect=False): - self.blocks[self.current] += 1 - - self.get_state_dict(cv.state_dict()) - - if cv._get_name() == 'Conv2d': - filters = cv.out_channels - size = cv.kernel_size - stride = cv.stride - pad = cv.padding - groups = cv.groups - bias = cv.bias - bn = False - act = act if act is not None else 'linear' - else: - filters = cv.conv.out_channels - size = cv.conv.kernel_size - stride = cv.conv.stride - pad = cv.conv.padding - groups = cv.conv.groups - bias = cv.conv.bias - bn = True if hasattr(cv, 'bn') else False - if act is None: - act = self.get_activation(cv.act._get_name()) if hasattr(cv, 'act') else 'linear' - - b = 'batch_normalize=1\n' if bn is True else '' - g = 'groups=%d\n' % groups if groups > 1 else '' - w = 'bias=1\n' if bias is not None and bn is not False else 'bias=0\n' if bias is None and bn is False else '' - - self.fc.write('\n[convolutional]\n' + - b + - 'filters=%d\n' % filters + - 'size=%s\n' % self.get_value(size) + - 'stride=%s\n' % self.get_value(stride) + - 'pad=%s\n' % self.get_value(pad) + - g + - w + - 'activation=%s\n' % act) - - def route(self, layers, axis=0): - self.blocks[self.current] += 1 - - a = 'axis=%d\n' % axis if axis != 0 else '' - - self.fc.write('\n[route]\n' + - 'layers=%s\n' % layers + - a) - - def shortcut(self, r, ew='add', act='linear'): - self.blocks[self.current] += 1 - - m = 'mode=mul\n' if ew == 'mul' else '' - - self.fc.write('\n[shortcut]\n' + - 'from=%d\n' % r + - m + - 'activation=%s\n' % act) - - def maxpool(self, m): - self.blocks[self.current] += 1 - - stride = m.stride - size = m.kernel_size - mode = m.ceil_mode - - m = 'maxpool_up' if mode else 'maxpool' - - self.fc.write('\n[%s]\n' % m + - 'stride=%d\n' % stride + - 'size=%d\n' % size) - - def upsample(self, child): - self.blocks[self.current] += 1 - - stride = child.scale_factor - - self.fc.write('\n[upsample]\n' + - 'stride=%d\n' % stride) - - def shuffle(self, reshape=None, transpose1=None, transpose2=None): - self.blocks[self.current] += 1 - - r = 'reshape=%s\n' % ', '.join(str(x) for x in reshape) if reshape is not None else '' - t1 = 'transpose1=%s\n' % ', '.join(str(x) for x in transpose1) if transpose1 is not None else '' - t2 = 'transpose2=%s\n' % ', '.join(str(x) for x in transpose2) if transpose2 is not None else '' - - self.fc.write('\n[shuffle]\n' + - r + - t1 + - t2) - - def yolo(self, strides): - self.blocks[self.current] += 1 - - self.fc.write('\n[detect_x]\n' + - 'strides=%s\n' % str(strides)[1:-1]) - - def get_state_dict(self, state_dict): - for k, v in state_dict.items(): - if 'num_batches_tracked' not in k: - vr = v.reshape(-1).numpy() - self.fw.write('{} {} '.format(k, len(vr))) - for vv in vr: - self.fw.write(' ') - self.fw.write(struct.pack('>f', float(vv)).hex()) - self.fw.write('\n') - self.wc += 1 - - def get_value(self, key): - if type(key) == int: - return key - return key[0] if key[0] == key[1] else str(key)[1:-1] - - def get_route(self, n): - r = 0 - for i, b in enumerate(self.blocks): - if i <= n: - r += b - else: - break - return r - 1 - - def get_activation(self, act): - if act == 'Hardswish': - return 'hardswish' - elif act == 'LeakyReLU': - return 'leaky' - elif act == 'SiLU': - return 'silu' - return 'linear' - - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch YOLOX conversion') - parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') - parser.add_argument('-e', '--exp', required=True, help='Input exp (.py) file path (required)') - args = parser.parse_args() - if not os.path.isfile(args.weights): - raise SystemExit('Invalid weights file') - if not os.path.isfile(args.exp): - raise SystemExit('Invalid exp file') - return args.weights, args.exp - - -pth_file, exp_file = parse_args() - -exp = get_exp(exp_file) -model = exp.get_model() -model.load_state_dict(torch.load(pth_file, map_location='cpu')['model']) -model.to('cpu').eval() - -model_name = exp.exp_name -inference_size = (exp.input_size[1], exp.input_size[0]) - -backbone = model.backbone._get_name() -head = model.head._get_name() - -wts_file = model_name + '.wts' if 'yolox' in model_name else 'yolox_' + model_name + '.wts' -cfg_file = model_name + '.cfg' if 'yolox' in model_name else 'yolox_' + model_name + '.cfg' - -with open(wts_file, 'w') as fw, open(cfg_file, 'w') as fc: - layers = Layers(inference_size, fw, fc) - - if backbone == 'YOLOPAFPN': - layers.fc.write('\n# YOLOPAFPN\n') - - layers.Focus(model.backbone.backbone.stem) - layers.Conv(model.backbone.backbone.dark2[0]) - layers.CSPLayer(model.backbone.backbone.dark2[1]) - layers.Conv(model.backbone.backbone.dark3[0]) - layers.CSPLayer(model.backbone.backbone.dark3[1], 'backbone') - layers.Conv(model.backbone.backbone.dark4[0]) - layers.CSPLayer(model.backbone.backbone.dark4[1], 'backbone') - layers.Conv(model.backbone.backbone.dark5[0]) - layers.SPPBottleneck(model.backbone.backbone.dark5[1]) - layers.CSPLayer(model.backbone.backbone.dark5[2], 'backbone') - layers.BaseConv(model.backbone.lateral_conv0, 'fpn') - layers.Upsample(model.backbone.upsample) - layers.Concat(layers.backbone_outs[1]) - layers.CSPLayer(model.backbone.C3_p4) - layers.BaseConv(model.backbone.reduce_conv1, 'fpn') - layers.Upsample(model.backbone.upsample) - layers.Concat(layers.backbone_outs[0]) - layers.CSPLayer(model.backbone.C3_p3, 'pan') - layers.Conv(model.backbone.bu_conv2) - layers.Concat(layers.fpn_feats[1]) - layers.CSPLayer(model.backbone.C3_n3, 'pan') - layers.Conv(model.backbone.bu_conv1) - layers.Concat(layers.fpn_feats[0]) - layers.CSPLayer(model.backbone.C3_n4, 'pan') - layers.pan_feats = layers.pan_feats[::-1] - else: - raise SystemExit('Model not supported') - - if head == 'YOLOXHead': - layers.fc.write('\n# YOLOXHead\n') - - for i, feat in enumerate(layers.pan_feats): - idx = len(layers.pan_feats) - i - 1 - dw = True if model.head.cls_convs[idx][0]._get_name() == 'DWConv' else False - if i > 0: - layers.Route(feat) - layers.BaseConv(model.head.stems[idx]) - layers.Conv(model.head.cls_convs[idx][0]) - layers.Conv(model.head.cls_convs[idx][1]) - layers.BaseConv(model.head.cls_preds[idx], act='sigmoid') - if dw: - layers.Route(-6) - else: - layers.Route(-4) - layers.Conv(model.head.reg_convs[idx][0]) - layers.Conv(model.head.reg_convs[idx][1]) - layers.BaseConv(model.head.obj_preds[idx], act='sigmoid') - layers.Route(-2) - layers.BaseConv(model.head.reg_preds[idx]) - if dw: - layers.RouteShuffleOut('-1, -3, -9') - else: - layers.RouteShuffleOut('-1, -3, -7') - layers.Detect(model.head.strides) - - else: - raise SystemExit('Model not supported') - -os.system('echo "%d" | cat - %s > temp && mv temp %s' % (layers.wc, wts_file, wts_file))