GPU Batched NMS

This commit is contained in:
Marcos Luciano
2022-06-19 12:12:04 -03:00
parent f621c0f429
commit f80aa10cf2
6 changed files with 47 additions and 56 deletions

View File

@@ -14,10 +14,8 @@ __global__ void gpuYoloLayer_nc(
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes))
{
if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
return;
}
const int numGridCells = gridSizeX * gridSizeY;
const int bbindex = y_id * gridSizeX + x_id;
@@ -25,7 +23,8 @@ __global__ void gpuYoloLayer_nc(
const float objectness
= input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
if (objectness < scoreThreshold) return;
if (objectness < scoreThreshold)
return;
int count = (int)atomicAdd(&countData[0], 1);