From 9bda315ee0834ca0fb2d7f6b5f34c0a69ddc24e0 Mon Sep 17 00:00:00 2001
From: Marcos Luciano <marcoslucianops@gmail.com>
Date: Fri, 24 Nov 2023 01:47:14 -0300
Subject: [PATCH] Add RT-DETR Paddle

---
 README.md                             |   6 +-
 docs/PPYOLOE.md                       |   2 +-
 docs/RTDETR_Paddle.md                 | 179 ++++++++++++++++++++++++++
 docs/{RTDETR.md => RTDETR_PyTorch.md} |   4 +-
 utils/export_rtdetr_paddle.py         | 104 +++++++++++++++
 5 files changed, 290 insertions(+), 5 deletions(-)
 create mode 100644 docs/RTDETR_Paddle.md
 rename docs/{RTDETR.md => RTDETR_PyTorch.md} (95%)
 create mode 100755 utils/export_rtdetr_paddle.py

diff --git a/README.md b/README.md
index 0cd4ad4..7f7a780 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,8 @@ NVIDIA DeepStream SDK 6.3 / 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 / 5.1  configuration
 * Dynamic batch-size for Darknet and ONNX exported models
 * INT8 calibration (PTQ) for Darknet and ONNX exported models
 * New output structure (fix wrong output on DeepStream < 6.2) - it need to export the ONNX model with the new export file, generate the TensorRT engine again with the updated files, and use the new config_infer_primary file according to your model
-* **RT-DETR (https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch)**
+* **RT-DETR PyTorch (https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch)**
+* **RT-DETR Paddle (https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle)**
 * **RT-DETR Ultralytics (https://docs.ultralytics.com/models/rtdetr)**
 
 ##
@@ -53,7 +54,8 @@ NVIDIA DeepStream SDK 6.3 / 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 / 5.1  configuration
 * [DAMO-YOLO usage](docs/DAMOYOLO.md)
 * [PP-YOLOE / PP-YOLOE+ usage](docs/PPYOLOE.md)
 * [YOLO-NAS usage](docs/YOLONAS.md)
-* [RT-DETR usage](docs/RTDETR.md)
+* [RT-DETR PyTorch usage](docs/RTDETR_PyTorch.md)
+* [RT-DETR Paddle usage](docs/RTDETR_Paddle.md)
 * [RT-DETR Ultralytics usage](docs/RTDETR_Ultralytics.md)
 * [Using your custom model](docs/customModels.md)
 * [Multiple YOLO GIEs](docs/multipleGIEs.md)
diff --git a/docs/PPYOLOE.md b/docs/PPYOLOE.md
index 9f57f21..93ac811 100644
--- a/docs/PPYOLOE.md
+++ b/docs/PPYOLOE.md
@@ -14,7 +14,7 @@
 
 #### 1. Download the PaddleDetection repo and install the requirements
 
-https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/docs/tutorials/INSTALL.md
+https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.7/docs/tutorials/INSTALL.md
 
 **NOTE**: It is recommended to use Python virtualenv.
 
diff --git a/docs/RTDETR_Paddle.md b/docs/RTDETR_Paddle.md
new file mode 100644
index 0000000..d19fc8e
--- /dev/null
+++ b/docs/RTDETR_Paddle.md
@@ -0,0 +1,179 @@
+# RT-DETR Paddle usage
+
+**NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle version.
+
+* [Convert model](#convert-model)
+* [Compile the lib](#compile-the-lib)
+* [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file)
+* [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
+* [Testing the model](#testing-the-model)
+
+##
+
+### Convert model
+
+#### 1. Download the PaddleDetection repo and install the requirements
+
+https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.7/docs/tutorials/INSTALL.md
+
+```
+git clone https://github.com/lyuwenyu/RT-DETR.git
+cd RT-DETR/rtdetr_paddle
+pip3 install -r requirements.txt
+pip3 install onnx onnxsim onnxruntime paddle2onnx
+```
+
+**NOTE**: It is recommended to use Python virtualenv.
+
+#### 2. Copy conversor
+
+Copy the `export_rtdetr_paddle.py` file from `DeepStream-Yolo/utils` directory to the `RT-DETR/rtdetr_paddle` folder.
+
+#### 3. Download the model
+
+Download the `pdparams` file from [RT-DETR Paddle](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle) releases (example for RT-DETR-R50)
+
+```
+wget https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams
+```
+
+**NOTE**: You can use your custom model.
+
+#### 4. Convert model
+
+Generate the ONNX model file (example for RT-DETR-R50)
+
+```
+python3 export_rtdetr_paddle.py -w rtdetr_r50vd_6x_coco.pdparams -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --dynamic
+```
+
+**NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
+
+```
+--simplify
+```
+
+**NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
+
+```
+--dynamic
+```
+
+**NOTE**: To use static batch-size (example for batch-size = 4)
+
+```
+--batch 4
+```
+
+**NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16.
+
+```
+--opset 12
+```
+
+#### 5. Copy generated files
+
+Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
+
+##
+
+### Compile the lib
+
+Open the `DeepStream-Yolo` folder and compile the lib
+
+* DeepStream 6.3 on x86 platform
+
+  ```
+  CUDA_VER=12.1 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.2 on x86 platform
+
+  ```
+  CUDA_VER=11.8 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.1.1 on x86 platform
+
+  ```
+  CUDA_VER=11.7 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.1 on x86 platform
+
+  ```
+  CUDA_VER=11.6 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.0.1 / 6.0 on x86 platform
+
+  ```
+  CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 5.1 on x86 platform
+
+  ```
+  CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 on Jetson platform
+
+  ```
+  CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+* DeepStream 6.0.1 / 6.0 / 5.1 on Jetson platform
+
+  ```
+  CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo
+  ```
+
+##
+
+### Edit the config_infer_primary_rtdetr file
+
+Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-R50 with 80 classes)
+
+```
+[property]
+...
+onnx-file=rtdetr_r50vd_6x_coco.onnx
+...
+num-detected-classes=80
+...
+parse-bbox-func-name=NvDsInferParseYolo
+...
+```
+
+**NOTE**: The **RT-DETR** do not resize the input with padding. To get better accuracy, use
+
+```
+[property]
+...
+maintain-aspect-ratio=0
+...
+```
+
+##
+
+### Edit the deepstream_app_config file
+
+```
+...
+[primary-gie]
+...
+config-file=config_infer_primary_rtdetr.txt
+```
+
+##
+
+### Testing the model
+
+```
+deepstream-app -c deepstream_app_config.txt
+```
+
+**NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
+
+**NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
diff --git a/docs/RTDETR.md b/docs/RTDETR_PyTorch.md
similarity index 95%
rename from docs/RTDETR.md
rename to docs/RTDETR_PyTorch.md
index 2f19a15..07937f9 100644
--- a/docs/RTDETR.md
+++ b/docs/RTDETR_PyTorch.md
@@ -1,4 +1,4 @@
-# RT-DETR usage
+# RT-DETR PyTorch usage
 
 **NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch version.
 
@@ -29,7 +29,7 @@ Copy the `export_rtdetr_pytorch.py` file from `DeepStream-Yolo/utils` directory
 
 #### 3. Download the model
 
-Download the `pth` file from [RT-DETR](https://github.com/lyuwenyu/storage/releases) releases (example for RT-DETR-R50)
+Download the `pth` file from [RT-DETR PyTorch](https://github.com/lyuwenyu/storage/releases/tag/v0.1) releases (example for RT-DETR-R50)
 
 ```
 wget https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth
diff --git a/utils/export_rtdetr_paddle.py b/utils/export_rtdetr_paddle.py
new file mode 100755
index 0000000..d642eae
--- /dev/null
+++ b/utils/export_rtdetr_paddle.py
@@ -0,0 +1,104 @@
+import os
+import sys
+import warnings
+import onnx
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import load_config, merge_config
+from ppdet.utils.check import check_version, check_config
+from ppdet.utils.cli import ArgsParser
+from ppdet.engine import Trainer
+
+
+class DeepStreamOutput(nn.Layer):
+    def __init__(self, img_size, use_focal_loss):
+        self.img_size = img_size
+        self.use_focal_loss = use_focal_loss
+        super().__init__()
+
+    def forward(self, x):
+        boxes = x['bbox']
+        out_shape = paddle.to_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1)
+        boxes *= out_shape
+        bbox_num = F.sigmoid(x['bbox_num']) if self.use_focal_loss else F.softmax(x['bbox_num'])[:, :, :-1]
+        scores = paddle.max(bbox_num, 2, keepdim=True)
+        classes = paddle.cast(paddle.argmax(bbox_num, 2, keepdim=True), dtype='float32')
+        return boxes, scores, classes
+    
+
+def suppress_warnings():
+    warnings.filterwarnings('ignore')
+
+
+def rtdetr_paddle_export(FLAGS):
+    cfg = load_config(FLAGS.config)
+    FLAGS.opt['weights'] = FLAGS.weights
+    FLAGS.opt['exclude_nms'] = True
+    FLAGS.opt['exclude_post_process'] = True
+    merge_config(FLAGS.opt)
+    merge_config(FLAGS.opt)
+    check_config(cfg)
+    check_version()
+    trainer = Trainer(cfg, mode='test')
+    trainer.load_weights(cfg.weights)
+    trainer.model.eval()
+    if not os.path.exists('.tmp'):
+        os.makedirs('.tmp')
+    static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp')
+    os.system('rm -r .tmp')
+    return trainer.cfg, static_model
+
+
+def main(FLAGS):
+    suppress_warnings()
+
+    print('\nStarting: %s' % FLAGS.weights)
+
+    print('\nOpening RT-DETR Paddle model\n')
+
+    paddle.set_device('cpu')
+    cfg, model = rtdetr_paddle_export(FLAGS)
+
+    img_size = [cfg.eval_size[1], cfg.eval_size[0]]
+
+    model = nn.Sequential(model, DeepStreamOutput(img_size, cfg.use_focal_loss))
+
+    onnx_input_im = {}
+    onnx_input_im['image'] = paddle.static.InputSpec(shape=[FLAGS.batch, 3, *img_size], dtype='float32', name='image')
+    onnx_output_file = cfg.filename + '.onnx'
+
+    print('\nExporting the model to ONNX\n')
+    paddle.onnx.export(model, cfg.filename, input_spec=[onnx_input_im], opset_version=FLAGS.opset)
+
+    if FLAGS.simplify:
+        print('\nSimplifying the ONNX model')
+        import onnxsim
+        model_onnx = onnx.load(onnx_output_file)
+        model_onnx, _ = onnxsim.simplify(model_onnx)
+        onnx.save(model_onnx, onnx_output_file)
+
+    print('\nDone: %s\n' % onnx_output_file)
+
+
+def parse_args():
+    parser = ArgsParser()
+    parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)')
+    parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method')
+    parser.add_argument('--opset', type=int, default=16, help='ONNX opset version')
+    parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
+    parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
+    parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
+    args = parser.parse_args()
+    if not os.path.isfile(args.weights):
+        raise SystemExit('\nInvalid weights file')
+    if args.dynamic and args.batch > 1:
+        raise SystemExit('\nCannot set dynamic batch-size and static batch-size at same time')
+    elif args.dynamic:
+        args.batch = None
+    return args
+
+
+if __name__ == '__main__':
+    FLAGS = parse_args()
+    sys.exit(main(FLAGS))