Update Benchmarks + Add YOLOv7-u6 + Fixes
This commit is contained in:
37
README.md
37
README.md
@@ -5,6 +5,8 @@ NVIDIA DeepStream SDK 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 configuration for YOLO mod
|
|||||||
-------------------------------------
|
-------------------------------------
|
||||||
### **Big update on DeepStream-Yolo**
|
### **Big update on DeepStream-Yolo**
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
### Important: please generate the ONNX model and the TensorRT engine again with the updated files
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
### Future updates
|
### Future updates
|
||||||
|
|
||||||
@@ -149,7 +151,7 @@ sample = 1920x1080 video
|
|||||||
- Eval
|
- Eval
|
||||||
|
|
||||||
```
|
```
|
||||||
nms-iou-threshold = 0.6 (Darknet) / 0.65 (YOLOv5, YOLOv6, YOLOv7, YOLOR and YOLOX) / 0.7 (Paddle, YOLO-NAS and YOLOv8)
|
nms-iou-threshold = 0.6 (Darknet) / 0.65 (YOLOv5, YOLOv6, YOLOv7, YOLOR and YOLOX) / 0.7 (Paddle, YOLO-NAS, YOLOv8 and YOLOv7-u6)
|
||||||
pre-cluster-threshold = 0.001
|
pre-cluster-threshold = 0.001
|
||||||
topk = 300
|
topk = 300
|
||||||
```
|
```
|
||||||
@@ -164,20 +166,20 @@ topk = 300
|
|||||||
|
|
||||||
#### Results
|
#### Results
|
||||||
|
|
||||||
**NOTE**: * = PyTorch
|
**NOTE**: * = PyTorch.
|
||||||
|
|
||||||
**NOTE**: ** = The YOLOv4 is trained with the trainvalno5k set, so the mAP is high on val2017 test
|
**NOTE**: ** = The YOLOv4 is trained with the trainvalno5k set, so the mAP is high on val2017 test.
|
||||||
|
|
||||||
**NOTE**: The p3.2xlarge instance (AWS) seems to max out at 625-635 FPS on DeepStream even using lighter models
|
**NOTE**: The p3.2xlarge instance (AWS) seems to max out at 625-635 FPS on DeepStream even using lighter models.
|
||||||
|
|
||||||
| DeepStream | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS<br />(without display) |
|
| DeepStream | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS<br />(without display) |
|
||||||
|:----------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:--------------------------:|
|
|:------------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:--------------------------:|
|
||||||
| YOLO-NAS L | FP16 | 640 | 0.484 | 0.658 | 0.532 | 235.27 |
|
| YOLO-NAS L | FP16 | 640 | 0.484 | 0.658 | 0.532 | 235.27 |
|
||||||
| YOLO-NAS M | FP16 | 640 | 0.480 | 0.651 | 0.524 | 287.39 |
|
| YOLO-NAS M | FP16 | 640 | 0.480 | 0.651 | 0.524 | 287.39 |
|
||||||
| YOLO-NAS S | FP16 | 640 | 0.442 | 0.614 | 0.485 | 478.52 |
|
| YOLO-NAS S | FP16 | 640 | 0.442 | 0.614 | 0.485 | 478.52 |
|
||||||
| PP-YOLOE+_x | FP16 | 640 | 0. | 0. | 0. | |
|
| PP-YOLOE+_x | FP16 | 640 | 0.528 | 0.705 | 0.579 | 121.17 |
|
||||||
| PP-YOLOE+_l | FP16 | 640 | 0. | 0. | 0. | |
|
| PP-YOLOE+_l | FP16 | 640 | 0.511 | 0.686 | 0.557 | 191.82 |
|
||||||
| PP-YOLOE+_m | FP16 | 640 | 0. | 0. | 0. | |
|
| PP-YOLOE+_m | FP16 | 640 | 0.483 | 0.658 | 0.528 | 264.39 |
|
||||||
| PP-YOLOE+_s | FP16 | 640 | 0.424 | 0.594 | 0.464 | 476.13 |
|
| PP-YOLOE+_s | FP16 | 640 | 0.424 | 0.594 | 0.464 | 476.13 |
|
||||||
| PP-YOLOE-s (400) | FP16 | 640 | 0.423 | 0.589 | 0.463 | 461.23 |
|
| PP-YOLOE-s (400) | FP16 | 640 | 0.423 | 0.589 | 0.463 | 461.23 |
|
||||||
| YOLOX-x | FP16 | 640 | 0.447 | 0.616 | 0.483 | 125.40 |
|
| YOLOX-x | FP16 | 640 | 0.447 | 0.616 | 0.483 | 125.40 |
|
||||||
@@ -193,11 +195,20 @@ topk = 300
|
|||||||
| YOLOv8m | FP16 | 640 | 0.468 | 0.637 | 0.510 | 278.08 |
|
| YOLOv8m | FP16 | 640 | 0.468 | 0.637 | 0.510 | 278.08 |
|
||||||
| YOLOv8s | FP16 | 640 | 0.415 | 0.578 | 0.453 | 493.45 |
|
| YOLOv8s | FP16 | 640 | 0.415 | 0.578 | 0.453 | 493.45 |
|
||||||
| YOLOv8n | FP16 | 640 | 0.343 | 0.492 | 0.373 | 627.43 |
|
| YOLOv8n | FP16 | 640 | 0.343 | 0.492 | 0.373 | 627.43 |
|
||||||
| YOLOv7 | FP16 | 640 | 0. | 0. | 0. | |
|
| YOLOv7-u6 | FP16 | 640 | 0.484 | 0.652 | 0.530 | 193.54 |
|
||||||
| YOLOv6s 3.0 | FP16 | 640 | 0. | 0. | 0. | |
|
| YOLOv7x* | FP16 | 640 | 0.496 | 0.679 | 0.536 | 155.07 |
|
||||||
| YOLOv5s 7.0 | FP16 | 640 | 0. | 0. | 0. | |
|
| YOLOv7* | FP16 | 640 | 0.476 | 0.660 | 0.518 | 226.01 |
|
||||||
| YOLOv4 | FP16 | 640 | 0. | 0. | 0. | |
|
| YOLOv7-Tiny Leaky* | FP16 | 640 | 0.345 | 0.516 | 0.372 | 626.23 |
|
||||||
| YOLOv3 | FP16 | 640 | 0. | 0. | 0. | |
|
| YOLOv7-Tiny Leaky* | FP16 | 416 | 0.328 | 0.493 | 0.349 | 633.90 |
|
||||||
|
| YOLOv6-L 4.0 | FP16 | 640 | 0.490 | 0.671 | 0.535 | 178.41 |
|
||||||
|
| YOLOv6-M 4.0 | FP16 | 640 | 0.460 | 0.635 | 0.502 | 293.39 |
|
||||||
|
| YOLOv6-S 4.0 | FP16 | 640 | 0.416 | 0.585 | 0.453 | 513.90 |
|
||||||
|
| YOLOv6-N 4.0 | FP16 | 640 | 0.349 | 0.503 | 0.378 | 633.37 |
|
||||||
|
| YOLOv5x 7.0 | FP16 | 640 | 0.471 | 0.652 | 0.513 | 149.93 |
|
||||||
|
| YOLOv5l 7.0 | FP16 | 640 | 0.455 | 0.637 | 0.497 | 235.55 |
|
||||||
|
| YOLOv5m 7.0 | FP16 | 640 | 0.421 | 0.604 | 0.459 | 351.69 |
|
||||||
|
| YOLOv5s 7.0 | FP16 | 640 | 0.344 | 0.529 | 0.372 | 618.13 |
|
||||||
|
| YOLOv5n 7.0 | FP16 | 640 | 0.247 | 0.414 | 0.257 | 629.66 |
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
# YOLOv6 usage
|
# YOLOv6 usage
|
||||||
|
|
||||||
|
**NOTE**: You need to change the branch of the YOLOv6 repo according to the version of the model you want to convert.
|
||||||
|
|
||||||
**NOTE**: The yaml file is not required.
|
**NOTE**: The yaml file is not required.
|
||||||
|
|
||||||
* [Convert model](#convert-model)
|
* [Convert model](#convert-model)
|
||||||
@@ -29,17 +31,17 @@ Copy the `export_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `
|
|||||||
|
|
||||||
#### 3. Download the model
|
#### 3. Download the model
|
||||||
|
|
||||||
Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/) releases (example for YOLOv6-S 3.0)
|
Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/) releases (example for YOLOv6-S 4.0)
|
||||||
|
|
||||||
```
|
```
|
||||||
wget https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s.pt
|
wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
|
||||||
```
|
```
|
||||||
|
|
||||||
**NOTE**: You can use your custom model.
|
**NOTE**: You can use your custom model.
|
||||||
|
|
||||||
#### 4. Convert model
|
#### 4. Convert model
|
||||||
|
|
||||||
Generate the ONNX model file (example for YOLOv6-S 3.0)
|
Generate the ONNX model file (example for YOLOv6-S 4.0)
|
||||||
|
|
||||||
```
|
```
|
||||||
python3 export_yoloV6.py -w yolov6s.pt --simplify
|
python3 export_yoloV6.py -w yolov6s.pt --simplify
|
||||||
@@ -122,7 +124,7 @@ Open the `DeepStream-Yolo` folder and compile the lib
|
|||||||
|
|
||||||
### Edit the config_infer_primary_yoloV6 file
|
### Edit the config_infer_primary_yoloV6 file
|
||||||
|
|
||||||
Edit the `config_infer_primary_yoloV6.txt` file according to your model (example for YOLOv6-S 3.0 with 80 classes)
|
Edit the `config_infer_primary_yoloV6.txt` file according to your model (example for YOLOv6-S 4.0 with 80 classes)
|
||||||
|
|
||||||
```
|
```
|
||||||
[property]
|
[property]
|
||||||
|
|||||||
@@ -73,22 +73,22 @@ addBBoxProposal(const float bx1, const float by1, const float bx2, const float b
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
static std::vector<NvDsInferParseObjectInfo>
|
||||||
decodeTensorYolo(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH,
|
decodeTensorYolo(const float* detection, const uint& outputSize, const uint& netW, const uint& netH,
|
||||||
const std::vector<float>& preclusterThreshold)
|
const std::vector<float>& preclusterThreshold)
|
||||||
{
|
{
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||||
|
|
||||||
for (uint b = 0; b < outputSize; ++b) {
|
for (uint b = 0; b < outputSize; ++b) {
|
||||||
float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6];
|
float maxProb = detection[b * 6 + 4];
|
||||||
int maxIndex = (int) detection[b * count + 5];
|
int maxIndex = (int) detection[b * 6 + 5];
|
||||||
|
|
||||||
if (maxProb < preclusterThreshold[maxIndex])
|
if (maxProb < preclusterThreshold[maxIndex])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
float bxc = detection[b * count + 0];
|
float bxc = detection[b * 6 + 0];
|
||||||
float byc = detection[b * count + 1];
|
float byc = detection[b * 6 + 1];
|
||||||
float bw = detection[b * count + 2];
|
float bw = detection[b * 6 + 2];
|
||||||
float bh = detection[b * count + 3];
|
float bh = detection[b * 6 + 3];
|
||||||
|
|
||||||
float bx1 = bxc - bw / 2;
|
float bx1 = bxc - bw / 2;
|
||||||
float by1 = byc - bh / 2;
|
float by1 = byc - bh / 2;
|
||||||
@@ -102,22 +102,22 @@ decodeTensorYolo(const float* detection, const uint& outputSize, const uint& cou
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<NvDsInferParseObjectInfo>
|
static std::vector<NvDsInferParseObjectInfo>
|
||||||
decodeTensorYoloE(const float* detection, const uint& outputSize, const uint& count, const uint& netW, const uint& netH,
|
decodeTensorYoloE(const float* detection, const uint& outputSize, const uint& netW, const uint& netH,
|
||||||
const std::vector<float>& preclusterThreshold)
|
const std::vector<float>& preclusterThreshold)
|
||||||
{
|
{
|
||||||
std::vector<NvDsInferParseObjectInfo> binfo;
|
std::vector<NvDsInferParseObjectInfo> binfo;
|
||||||
|
|
||||||
for (uint b = 0; b < outputSize; ++b) {
|
for (uint b = 0; b < outputSize; ++b) {
|
||||||
float maxProb = count == 6 ? detection[b * count + 4] : detection[b * count + 4] * detection[b * count + 6];
|
float maxProb = detection[b * 6 + 4];
|
||||||
int maxIndex = (int) detection[b * count + 5];
|
int maxIndex = (int) detection[b * 6 + 5];
|
||||||
|
|
||||||
if (maxProb < preclusterThreshold[maxIndex])
|
if (maxProb < preclusterThreshold[maxIndex])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
float bx1 = detection[b * count + 0];
|
float bx1 = detection[b * 6 + 0];
|
||||||
float by1 = detection[b * count + 1];
|
float by1 = detection[b * 6 + 1];
|
||||||
float bx2 = detection[b * count + 2];
|
float bx2 = detection[b * 6 + 2];
|
||||||
float by2 = detection[b * count + 3];
|
float by2 = detection[b * 6 + 3];
|
||||||
|
|
||||||
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
|
||||||
}
|
}
|
||||||
@@ -139,9 +139,8 @@ NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo
|
|||||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||||
|
|
||||||
const uint outputSize = layer.inferDims.d[0];
|
const uint outputSize = layer.inferDims.d[0];
|
||||||
const uint count = layer.inferDims.d[1];
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (layer.buffer), outputSize, count,
|
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (layer.buffer), outputSize,
|
||||||
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
||||||
|
|
||||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||||
@@ -165,9 +164,8 @@ NvDsInferParseCustomYoloE(std::vector<NvDsInferLayerInfo> const& outputLayersInf
|
|||||||
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
|
||||||
|
|
||||||
const uint outputSize = layer.inferDims.d[0];
|
const uint outputSize = layer.inferDims.d[0];
|
||||||
const uint count = layer.inferDims.d[1];
|
|
||||||
|
|
||||||
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (layer.buffer), outputSize, count,
|
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYoloE((const float*) (layer.buffer), outputSize,
|
||||||
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
|
||||||
|
|
||||||
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
|
||||||
|
|||||||
@@ -50,13 +50,12 @@ __global__ void gpuYoloLayer(const float* input, float* output, int* count, cons
|
|||||||
|
|
||||||
int _count = (int)atomicAdd(count, 1);
|
int _count = (int)atomicAdd(count, 1);
|
||||||
|
|
||||||
output[_count * 7 + 0] = xc;
|
output[_count * 6 + 0] = xc;
|
||||||
output[_count * 7 + 1] = yc;
|
output[_count * 6 + 1] = yc;
|
||||||
output[_count * 7 + 2] = w;
|
output[_count * 6 + 2] = w;
|
||||||
output[_count * 7 + 3] = h;
|
output[_count * 6 + 3] = h;
|
||||||
output[_count * 7 + 4] = maxProb;
|
output[_count * 6 + 4] = maxProb * objectness;
|
||||||
output[_count * 7 + 5] = maxIndex;
|
output[_count * 6 + 5] = maxIndex;
|
||||||
output[_count * 7 + 6] = objectness;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||||
@@ -76,7 +75,7 @@ cudaError_t cudaYoloLayer(const void* input, void* output, void* count, const ui
|
|||||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||||
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||||
reinterpret_cast<int*> (count) + (batch),
|
reinterpret_cast<int*> (count) + (batch),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||||
|
|||||||
@@ -47,13 +47,12 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, int* count, c
|
|||||||
|
|
||||||
int _count = (int)atomicAdd(count, 1);
|
int _count = (int)atomicAdd(count, 1);
|
||||||
|
|
||||||
output[_count * 7 + 0] = xc;
|
output[_count * 6 + 0] = xc;
|
||||||
output[_count * 7 + 1] = yc;
|
output[_count * 6 + 1] = yc;
|
||||||
output[_count * 7 + 2] = w;
|
output[_count * 6 + 2] = w;
|
||||||
output[_count * 7 + 3] = h;
|
output[_count * 6 + 3] = h;
|
||||||
output[_count * 7 + 4] = maxProb;
|
output[_count * 6 + 4] = maxProb * objectness;
|
||||||
output[_count * 7 + 5] = maxIndex;
|
output[_count * 6 + 5] = maxIndex;
|
||||||
output[_count * 7 + 6] = objectness;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const uint& batchSize, uint64_t& inputSize,
|
||||||
@@ -73,7 +72,7 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* output, void* count, const
|
|||||||
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
for (unsigned int batch = 0; batch < batchSize; ++batch) {
|
||||||
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||||
reinterpret_cast<int*> (count) + (batch),
|
reinterpret_cast<int*> (count) + (batch),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
|
||||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||||
|
|||||||
@@ -68,13 +68,12 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* output
|
|||||||
|
|
||||||
int _count = (int)atomicAdd(count, 1);
|
int _count = (int)atomicAdd(count, 1);
|
||||||
|
|
||||||
output[_count * 7 + 0] = xc;
|
output[_count * 6 + 0] = xc;
|
||||||
output[_count * 7 + 1] = yc;
|
output[_count * 6 + 1] = yc;
|
||||||
output[_count * 7 + 2] = w;
|
output[_count * 6 + 2] = w;
|
||||||
output[_count * 7 + 3] = h;
|
output[_count * 6 + 3] = h;
|
||||||
output[_count * 7 + 4] = maxProb;
|
output[_count * 6 + 4] = maxProb * objectness;
|
||||||
output[_count * 7 + 5] = maxIndex;
|
output[_count * 6 + 5] = maxIndex;
|
||||||
output[_count * 7 + 6] = objectness;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
|
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
|
||||||
@@ -93,7 +92,7 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void
|
|||||||
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
|
||||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
|
reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
|
||||||
reinterpret_cast<int*> (count) + (batch),
|
reinterpret_cast<int*> (count) + (batch),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
|
||||||
reinterpret_cast<const float*> (anchors));
|
reinterpret_cast<const float*> (anchors));
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ nvinfer1::Dims
|
|||||||
YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
|
YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
|
||||||
{
|
{
|
||||||
assert(index == 0);
|
assert(index == 0);
|
||||||
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 7}};
|
return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 6}};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -125,7 +125,7 @@ YoloLayer::enqueue(int batchSize, void const* const* inputs, void* const* output
|
|||||||
noexcept
|
noexcept
|
||||||
{
|
{
|
||||||
void* output = outputs[0];
|
void* output = outputs[0];
|
||||||
CUDA_CHECK(cudaMemsetAsync((float*) output, 0, sizeof(float) * m_OutputSize * 7 * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((float*) output, 0, sizeof(float) * m_OutputSize * 6 * batchSize, stream));
|
||||||
|
|
||||||
void* count = workspace;
|
void* count = workspace;
|
||||||
CUDA_CHECK(cudaMemsetAsync((int*) count, 0, sizeof(int) * batchSize, stream));
|
CUDA_CHECK(cudaMemsetAsync((int*) count, 0, sizeof(int) * batchSize, stream));
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class DeepStreamOutput(nn.Module):
|
|||||||
boxes = x[:, :, :4]
|
boxes = x[:, :, :4]
|
||||||
objectness = x[:, :, 4:5]
|
objectness = x[:, :, 4:5]
|
||||||
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
||||||
return torch.cat((boxes, scores, classes, objectness), dim=2)
|
return torch.cat((boxes, scores * objectness, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
def suppress_warnings():
|
def suppress_warnings():
|
||||||
|
|||||||
@@ -6,20 +6,24 @@ import onnx
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from yolov6.utils.checkpoint import load_checkpoint
|
from yolov6.utils.checkpoint import load_checkpoint
|
||||||
from yolov6.layers.common import RepVGGBlock, ConvModule, SiLU
|
from yolov6.layers.common import RepVGGBlock, SiLU
|
||||||
from yolov6.models.effidehead import Detect
|
from yolov6.models.effidehead import Detect
|
||||||
|
|
||||||
|
try:
|
||||||
|
from yolov6.layers.common import ConvModule
|
||||||
|
except ImportError:
|
||||||
|
from yolov6.layers.common import Conv as ConvModule
|
||||||
|
|
||||||
|
|
||||||
class DeepStreamOutput(nn.Module):
|
class DeepStreamOutput(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
print(x)
|
|
||||||
boxes = x[:, :, :4]
|
boxes = x[:, :, :4]
|
||||||
objectness = x[:, :, 4:5]
|
objectness = x[:, :, 4:5]
|
||||||
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
||||||
return torch.cat((boxes, scores, classes, objectness), dim=2)
|
return torch.cat((boxes, scores * objectness, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
def suppress_warnings():
|
def suppress_warnings():
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class DeepStreamOutput(nn.Module):
|
|||||||
boxes = x[:, :, :4]
|
boxes = x[:, :, :4]
|
||||||
objectness = x[:, :, 4:5]
|
objectness = x[:, :, 4:5]
|
||||||
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
||||||
return torch.cat((boxes, scores, classes, objectness), dim=2)
|
return torch.cat((boxes, scores * objectness, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
def suppress_warnings():
|
def suppress_warnings():
|
||||||
|
|||||||
77
utils/export_yoloV7_u6.py
Normal file
77
utils/export_yoloV7_u6.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import warnings
|
||||||
|
import onnx
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from models.experimental import attempt_load
|
||||||
|
from models.yolo import Detect, V6Detect, IV6Detect
|
||||||
|
from utils.torch_utils import select_device
|
||||||
|
|
||||||
|
|
||||||
|
class DeepStreamOutput(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x.transpose(1, 2)
|
||||||
|
boxes = x[:, :, :4]
|
||||||
|
scores, classes = torch.max(x[:, :, 4:], 2, keepdim=True)
|
||||||
|
return torch.cat((boxes, scores, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
|
def suppress_warnings():
|
||||||
|
warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
|
||||||
|
warnings.filterwarnings('ignore', category=UserWarning)
|
||||||
|
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
||||||
|
|
||||||
|
|
||||||
|
def yolov7_u6_export(weights, device):
|
||||||
|
model = attempt_load(weights, device=device, inplace=True, fuse=True)
|
||||||
|
model.eval()
|
||||||
|
for k, m in model.named_modules():
|
||||||
|
if isinstance(m, (Detect, V6Detect, IV6Detect)):
|
||||||
|
m.inplace = False
|
||||||
|
m.dynamic = False
|
||||||
|
m.export = True
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
suppress_warnings()
|
||||||
|
device = select_device('cpu')
|
||||||
|
model = yolov7_u6_export(args.weights, device)
|
||||||
|
|
||||||
|
model = nn.Sequential(model, DeepStreamOutput())
|
||||||
|
|
||||||
|
img_size = args.size * 2 if len(args.size) == 1 else args.size
|
||||||
|
|
||||||
|
onnx_input_im = torch.zeros(1, 3, *img_size).to(device)
|
||||||
|
onnx_output_file = os.path.basename(args.weights).split('.pt')[0] + '.onnx'
|
||||||
|
|
||||||
|
torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset,
|
||||||
|
do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes=None)
|
||||||
|
|
||||||
|
if args.simplify:
|
||||||
|
import onnxsim
|
||||||
|
model_onnx = onnx.load(onnx_output_file)
|
||||||
|
model_onnx, _ = onnxsim.simplify(model_onnx)
|
||||||
|
onnx.save(model_onnx, onnx_output_file)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description='DeepStream YOLOv7-u6 conversion')
|
||||||
|
parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
|
||||||
|
parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
|
||||||
|
parser.add_argument('--opset', type=int, default=12, help='ONNX opset version')
|
||||||
|
parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
|
||||||
|
args = parser.parse_args()
|
||||||
|
if not os.path.isfile(args.weights):
|
||||||
|
raise SystemExit('Invalid weights file')
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
sys.exit(main(args))
|
||||||
@@ -16,7 +16,7 @@ class DeepStreamOutput(nn.Module):
|
|||||||
boxes = x[:, :, :4]
|
boxes = x[:, :, :4]
|
||||||
objectness = x[:, :, 4:5]
|
objectness = x[:, :, 4:5]
|
||||||
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
||||||
return torch.cat((boxes, scores, classes, objectness), dim=2)
|
return torch.cat((boxes, scores * objectness, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
def suppress_warnings():
|
def suppress_warnings():
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class DeepStreamOutput(nn.Module):
|
|||||||
boxes = x[:, :, :4]
|
boxes = x[:, :, :4]
|
||||||
objectness = x[:, :, 4:5]
|
objectness = x[:, :, 4:5]
|
||||||
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
scores, classes = torch.max(x[:, :, 5:], 2, keepdim=True)
|
||||||
return torch.cat((boxes, scores, classes, objectness), dim=2)
|
return torch.cat((boxes, scores * objectness, classes), dim=2)
|
||||||
|
|
||||||
|
|
||||||
def suppress_warnings():
|
def suppress_warnings():
|
||||||
|
|||||||
Reference in New Issue
Block a user