diff --git a/docs/YOLONAS.md b/docs/YOLONAS.md index 13ca78e..81f68cf 100644 --- a/docs/YOLONAS.md +++ b/docs/YOLONAS.md @@ -190,7 +190,7 @@ parse-bbox-func-name=NvDsInferParseYoloE ... ``` -**NOTE**: If you are using a custom model, you should edit the `config_infer_primary_yolonas_custom.txt` file. +**NOTE**: If you are using a **custom** model, you should edit the `config_infer_primary_yolonas_custom.txt` file. **NOTE**: The **YOLO-NAS** resizes the input with left/top padding. To get better accuracy, use @@ -201,6 +201,22 @@ symmetric-padding=0 ... ``` +**NOTE**: The **pre-trained YOLO-NAS** uses zero mean normalization on the image preprocess. It is important to change the `net-scale-factor` according to the trained values. + +``` +... +net-scale-factor=0.0039215697906911373 +... +``` + +**NOTE**: The **custom YOLO-NAS** uses no normalization on the image preprocess. It is important to change the `net-scale-factor` according to the trained values. + +``` +... +net-scale-factor=1 +... +``` + **NOTE**: By default, the dynamic batch-size is set. To use implicit batch-size, uncomment the line ``` diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/nvdsinfer_custom_impl_Yolo/yoloForward.cu index be6ebcb..b353f86 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -7,7 +7,7 @@ inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, int* classes, const uint netWidth, +__global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, float* classes, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) { @@ -54,7 +54,7 @@ __global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, in boxes[count * 4 + 2] = w; boxes[count * 4 + 3] = h; scores[count] = maxProb * objectness; - classes[count] = maxIndex; + classes[count] = (float) maxIndex; } cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, @@ -76,7 +76,7 @@ cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* cl reinterpret_cast (input) + (batch * inputSize), reinterpret_cast (boxes) + (batch * 4 * outputSize), reinterpret_cast (scores) + (batch * 1 * outputSize), - reinterpret_cast (classes) + (batch * 1 * outputSize), + reinterpret_cast (classes) + (batch * 1 * outputSize), netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, reinterpret_cast (anchors), reinterpret_cast (mask)); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu index 03c01f0..e8418b4 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu @@ -5,7 +5,7 @@ #include -__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, int* classes, const uint netWidth, +__global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, float* classes, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) { @@ -52,7 +52,7 @@ __global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, boxes[count * 4 + 2] = w; boxes[count * 4 + 3] = h; scores[count] = maxProb * objectness; - classes[count] = maxIndex; + classes[count] = (float) maxIndex; } cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, @@ -74,7 +74,7 @@ cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* reinterpret_cast (input) + (batch * inputSize), reinterpret_cast (boxes) + (batch * 4 * outputSize), reinterpret_cast (scores) + (batch * 1 * outputSize), - reinterpret_cast (classes) + (batch * 1 * outputSize), + reinterpret_cast (classes) + (batch * 1 * outputSize), netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, reinterpret_cast (anchors), reinterpret_cast (mask)); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu index c5ebc4a..66fff49 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu @@ -27,7 +27,7 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG } } -__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, int* classes, +__global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, float* classes, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const uint64_t lastInputSize, const float* anchors) { @@ -73,7 +73,7 @@ __global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, boxes[count * 4 + 2] = w; boxes[count * 4 + 3] = h; scores[count] = maxProb * objectness; - classes[count] = maxIndex; + classes[count] = (float) maxIndex; } cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes, @@ -96,7 +96,7 @@ cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* reinterpret_cast (softmax) + (batch * inputSize), reinterpret_cast (boxes) + (batch * 4 * outputSize), reinterpret_cast (scores) + (batch * 1 * outputSize), - reinterpret_cast (classes) + (batch * 1 * outputSize), + reinterpret_cast (classes) + (batch * 1 * outputSize), netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, reinterpret_cast (anchors)); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp index 5c921b9..967cb4e 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -181,17 +181,13 @@ YoloLayer::getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT bool YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept { - return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && (inOut[pos].type == nvinfer1::DataType::kFLOAT || - inOut[pos].type == nvinfer1::DataType::kINT32); + return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && inOut[pos].type == nvinfer1::DataType::kFLOAT; } nvinfer1::DataType YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept { assert(index < 3); - if (index == 2) { - return nvinfer1::DataType::kINT32; - } return nvinfer1::DataType::kFLOAT; }