Fix Darknet models / Small fixes
This commit is contained in:
@@ -190,7 +190,7 @@ parse-bbox-func-name=NvDsInferParseYoloE
|
|||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
**NOTE**: If you are using a custom model, you should edit the `config_infer_primary_yolonas_custom.txt` file.
|
**NOTE**: If you are using a **custom** model, you should edit the `config_infer_primary_yolonas_custom.txt` file.
|
||||||
|
|
||||||
**NOTE**: The **YOLO-NAS** resizes the input with left/top padding. To get better accuracy, use
|
**NOTE**: The **YOLO-NAS** resizes the input with left/top padding. To get better accuracy, use
|
||||||
|
|
||||||
@@ -201,6 +201,22 @@ symmetric-padding=0
|
|||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**NOTE**: The **pre-trained YOLO-NAS** uses zero mean normalization on the image preprocess. It is important to change the `net-scale-factor` according to the trained values.
|
||||||
|
|
||||||
|
```
|
||||||
|
...
|
||||||
|
net-scale-factor=0.0039215697906911373
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE**: The **custom YOLO-NAS** uses no normalization on the image preprocess. It is important to change the `net-scale-factor` according to the trained values.
|
||||||
|
|
||||||
|
```
|
||||||
|
...
|
||||||
|
net-scale-factor=1
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
**NOTE**: By default, the dynamic batch-size is set. To use implicit batch-size, uncomment the line
|
**NOTE**: By default, the dynamic batch-size is set. To use implicit batch-size, uncomment the line
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
|
||||||
|
|
||||||
__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, int* classes, const uint netWidth,
|
__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, float* classes, const uint netWidth,
|
||||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||||
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
||||||
{
|
{
|
||||||
@@ -54,7 +54,7 @@ __global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, in
|
|||||||
boxes[count * 4 + 2] = w;
|
boxes[count * 4 + 2] = w;
|
||||||
boxes[count * 4 + 3] = h;
|
boxes[count * 4 + 3] = h;
|
||||||
scores[count] = maxProb * objectness;
|
scores[count] = maxProb * objectness;
|
||||||
classes[count] = maxIndex;
|
classes[count] = (float) maxIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||||
@@ -76,7 +76,7 @@ cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* cl
|
|||||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||||
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
||||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, int* classes, const uint netWidth,
|
__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, float* classes, const uint netWidth,
|
||||||
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
|
||||||
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
|
||||||
{
|
{
|
||||||
@@ -52,7 +52,7 @@ __global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores,
|
|||||||
boxes[count * 4 + 2] = w;
|
boxes[count * 4 + 2] = w;
|
||||||
boxes[count * 4 + 3] = h;
|
boxes[count * 4 + 3] = h;
|
||||||
scores[count] = maxProb * objectness;
|
scores[count] = maxProb * objectness;
|
||||||
classes[count] = maxIndex;
|
classes[count] = (float) maxIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize,
|
||||||
@@ -74,7 +74,7 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void*
|
|||||||
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
reinterpret_cast<const float*> (input) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||||
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
|
||||||
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, int* classes,
|
__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, float* classes,
|
||||||
const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses,
|
||||||
const uint numBBoxes, const uint64_t lastInputSize, const float* anchors)
|
const uint numBBoxes, const uint64_t lastInputSize, const float* anchors)
|
||||||
{
|
{
|
||||||
@@ -73,7 +73,7 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes,
|
|||||||
boxes[count * 4 + 2] = w;
|
boxes[count * 4 + 2] = w;
|
||||||
boxes[count * 4 + 3] = h;
|
boxes[count * 4 + 3] = h;
|
||||||
scores[count] = maxProb * objectness;
|
scores[count] = maxProb * objectness;
|
||||||
classes[count] = maxIndex;
|
classes[count] = (float) maxIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
|
cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes,
|
||||||
@@ -96,7 +96,7 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void*
|
|||||||
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
reinterpret_cast<float*> (softmax) + (batch * inputSize),
|
||||||
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
reinterpret_cast<float*> (boxes) + (batch * 4 * outputSize),
|
||||||
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (scores) + (batch * 1 * outputSize),
|
||||||
reinterpret_cast<int*> (classes) + (batch * 1 * outputSize),
|
reinterpret_cast<float*> (classes) + (batch * 1 * outputSize),
|
||||||
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize,
|
netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize,
|
||||||
reinterpret_cast<const float*> (anchors));
|
reinterpret_cast<const float*> (anchors));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -181,17 +181,13 @@ YoloLayer::getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT
|
|||||||
bool
|
bool
|
||||||
YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
|
YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept
|
||||||
{
|
{
|
||||||
return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && (inOut[pos].type == nvinfer1::DataType::kFLOAT ||
|
return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && inOut[pos].type == nvinfer1::DataType::kFLOAT;
|
||||||
inOut[pos].type == nvinfer1::DataType::kINT32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nvinfer1::DataType
|
nvinfer1::DataType
|
||||||
YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
|
YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept
|
||||||
{
|
{
|
||||||
assert(index < 3);
|
assert(index < 3);
|
||||||
if (index == 2) {
|
|
||||||
return nvinfer1::DataType::kINT32;
|
|
||||||
}
|
|
||||||
return nvinfer1::DataType::kFLOAT;
|
return nvinfer1::DataType::kFLOAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user