Update Benchmarks + Add YOLOv7-u6 + Fixes

2023-05-21 02:12:09 -03:00
parent af20c2f72c
commit 79d22283c1
13 changed files with 176 additions and 87 deletions
--- a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu
+++ b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu
@@ -68,13 +68,12 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* output

  int _count = (int)atomicAdd(count, 1);

-  output[_count * 7 + 0] = xc;
-  output[_count * 7 + 1] = yc;
-  output[_count * 7 + 2] = w;
-  output[_count * 7 + 3] = h;
-  output[_count * 7 + 4] = maxProb;
-  output[_count * 7 + 5] = maxIndex;
-  output[_count * 7 + 6] = objectness;
+  output[_count * 6 + 0] = xc;
+  output[_count * 6 + 1] = yc;
+  output[_count * 6 + 2] = w;
+  output[_count * 6 + 3] = h;
+  output[_count * 6 + 4] = maxProb * objectness;
+  output[_count * 6 + 5] = maxIndex;
 }

 cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void* count, const uint& batchSize,
@@ -93,7 +92,7 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, void
    gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
        reinterpret_cast<const float*> (input) + (batch * inputSize),
        reinterpret_cast<float*> (softmax) + (batch * inputSize),
-        reinterpret_cast<float*> (output) + (batch * 7 * outputSize),
+        reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
        reinterpret_cast<int*> (count) + (batch),
        netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
        reinterpret_cast<const float*> (anchors));