From 555152064e05d913179ac1a273f6913b6259b87b Mon Sep 17 00:00:00 2001 From: Marcos Luciano Date: Mon, 21 Feb 2022 23:46:29 -0300 Subject: [PATCH] Minor fixes --- config_infer_primary.txt | 3 +- config_infer_primary_yoloV5.txt | 3 +- config_infer_primary_yolor.txt | 3 +- docs/customModels.md | 82 +++--- nvdsinfer_custom_impl_Yolo/calibrator.h | 2 +- .../layers/activation_layer.cpp | 2 +- .../layers/channels_layer.cpp | 2 +- .../layers/convolutional_layer.cpp | 2 +- .../layers/dropout_layer.cpp | 2 +- .../layers/implicit_layer.cpp | 2 +- .../layers/maxpool_layer.cpp | 2 +- .../layers/reorg_r_layer.cpp | 2 +- .../layers/route_layer.cpp | 2 +- .../layers/route_layer.h | 2 +- .../layers/shortcut_layer.cpp | 2 +- .../layers/upsample_layer.cpp | 2 +- .../nvdsinfer_yolo_engine.cpp | 17 +- .../nvdsparsebbox_Yolo.cpp | 254 +++--------------- nvdsinfer_custom_impl_Yolo/utils.cpp | 12 +- nvdsinfer_custom_impl_Yolo/utils.h | 9 +- nvdsinfer_custom_impl_Yolo/yolo.cpp | 110 ++++---- nvdsinfer_custom_impl_Yolo/yolo.h | 37 +-- nvdsinfer_custom_impl_Yolo/yoloForward.cu | 38 ++- nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu | 38 ++- nvdsinfer_custom_impl_Yolo/yoloForward_r.cu | 38 ++- nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu | 45 ++-- nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp | 185 +++++++------ nvdsinfer_custom_impl_Yolo/yoloPlugins.h | 33 ++- readme.md | 26 +- 29 files changed, 416 insertions(+), 541 deletions(-) diff --git a/config_infer_primary.txt b/config_infer_primary.txt index 11ab7aa..c661e95 100644 --- a/config_infer_primary.txt +++ b/config_infer_primary.txt @@ -14,11 +14,12 @@ interval=0 gie-unique-id=1 process-mode=1 network-type=0 -cluster-mode=4 +cluster-mode=2 maintain-aspect-ratio=0 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] +nms-iou-threshold=0.6 pre-cluster-threshold=0.25 diff --git a/config_infer_primary_yoloV5.txt b/config_infer_primary_yoloV5.txt index bc408b1..e1744ba 100644 --- a/config_infer_primary_yoloV5.txt +++ b/config_infer_primary_yoloV5.txt @@ -14,11 +14,12 @@ interval=0 gie-unique-id=1 process-mode=1 network-type=0 -cluster-mode=4 +cluster-mode=2 maintain-aspect-ratio=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] +nms-iou-threshold=0.6 pre-cluster-threshold=0.25 diff --git a/config_infer_primary_yolor.txt b/config_infer_primary_yolor.txt index 7db5111..ad082be 100644 --- a/config_infer_primary_yolor.txt +++ b/config_infer_primary_yolor.txt @@ -14,11 +14,12 @@ interval=0 gie-unique-id=1 process-mode=1 network-type=0 -cluster-mode=4 +cluster-mode=2 maintain-aspect-ratio=1 parse-bbox-func-name=NvDsInferParseYolo custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so engine-create-func-name=NvDsInferYoloCudaEngineGet [class-attrs-all] +nms-iou-threshold=0.6 pre-cluster-threshold=0.25 diff --git a/docs/customModels.md b/docs/customModels.md index 88346bc..894869f 100644 --- a/docs/customModels.md +++ b/docs/customModels.md @@ -12,7 +12,7 @@ ### Requirements * [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) -* [Pre-treined YOLO model](https://github.com/AlexeyAB/darknet) +* Pre-treined YOLO model in Darknet or PyTorch ## @@ -25,36 +25,11 @@ git clone https://github.com/marcoslucianops/DeepStream-Yolo.git cd DeepStream-Yolo ``` -#### 2. Remane the obj.names file to labels.txt and copy it to DeepStream-Yolo directory +#### 2. Copy your labels file to DeepStream-Yolo directory and remane it to labels.txt -#### 3. Copy the yolo.cfg and yolo.weights files to DeepStream-Yolo directory +#### 3. Copy the yolo.cfg and yolo.weights/yolo.wts files to DeepStream-Yolo directory -#### 4. Edit config_infer_primary.txt for your model - -``` -[property] -... -# CFG -custom-network-config=yolo.cfg -# Weights -model-file=yolo.weights -# Model labels file -labelfile-path=labels.txt -... -``` - -**NOTE**: If you want to use YOLOv2 or YOLOv2-Tiny models, change the deepstream_app_config.txt file before run it - -``` -[primary-gie] -enable=1 -gpu-id=0 -gie-unique-id=1 -nvbuf-memory-type=0 -config-file=config_infer_primary_yoloV2.txt -``` - -**NOTE**: The config_infer_primary.txt file uses cluster-mode=4 and NMS = 0.45 (via code) when beta_nms isn't available (when beta_nms is available, NMS = beta_nms), while the config_infer_primary_yoloV2.txt file uses cluster-mode=2 and nms-iou-threshold=0.45 to set NMS. +**NOTE**: It's important to keep the YOLO model reference (yolov4_, yolov5_, yolor_, etc) in you cfg and weights/wts file to generate the engine correctly. ## @@ -192,6 +167,8 @@ nvbuf-memory-type=0 config-file=config_infer_primary.txt ``` +**NOTE**: Choose the correct config_infer_primary based on your YOLO model. + ## ### Understanding and editing config_infer_primary @@ -211,6 +188,26 @@ model-color-format=0 ## +#### custom-network-config + +* Example for custom YOLOv4 model + +``` +custom-network-config=yolov4_custom.cfg +``` + +## + +#### model-file + +* Example for custom YOLOv4 model + +``` +model-file=yolov4_custom.weights +``` + +## + #### model-engine-file * Example for batch-size=1 and network-mode=2 @@ -219,6 +216,12 @@ model-color-format=0 model-engine-file=model_b1_gpu0_fp16.engine ``` +* Example for batch-size=1 and network-mode=1 + +``` +model-engine-file=model_b1_gpu0_int8.engine +``` + * Example for batch-size=1 and network-mode=0 ``` @@ -260,15 +263,6 @@ num-detected-classes=80 ## -#### network-type - -``` -# 0=Detector, 1=Classifier, 2=Segmentation -network-type=0 -``` - -## - #### interval ``` @@ -278,11 +272,19 @@ interval=0 ## +#### nms-iou-threshold + +``` +# IOU threshold +nms-iou-threshold=0.6 +``` + +## + #### pre-cluster-threshold ``` -[class-attrs-all] -# CONF_THRESH +# Socre threshold pre-cluster-threshold=0.25 ``` diff --git a/nvdsinfer_custom_impl_Yolo/calibrator.h b/nvdsinfer_custom_impl_Yolo/calibrator.h index f912761..3d06865 100644 --- a/nvdsinfer_custom_impl_Yolo/calibrator.h +++ b/nvdsinfer_custom_impl_Yolo/calibrator.h @@ -59,4 +59,4 @@ namespace nvinfer1 { std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box); -#endif //CALIBRATOR_H \ No newline at end of file +#endif //CALIBRATOR_H diff --git a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp index 5d516c7..b42b4de 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp @@ -101,4 +101,4 @@ nvinfer1::ILayer* activationLayer( std::abort(); } return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp index af61bac..42d17a1 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp @@ -29,4 +29,4 @@ nvinfer1::ILayer* channelsLayer( } return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp index c0135c8..2bcb00a 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp @@ -221,4 +221,4 @@ nvinfer1::ILayer* convolutionalLayer( assert(output != nullptr); return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp index 112af16..467aa0d 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp @@ -12,4 +12,4 @@ nvinfer1::ILayer* dropoutLayer( { nvinfer1::ILayer* output; return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp index 555119e..0d7e044 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp @@ -27,4 +27,4 @@ nvinfer1::ILayer* implicitLayer( assert(implicit != nullptr); return implicit; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp index e5e53bf..38efa06 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp @@ -27,4 +27,4 @@ nvinfer1::ILayer* maxpoolLayer( pool->setName(maxpoolLayerName.c_str()); return pool; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/reorg_r_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/reorg_r_layer.cpp index 5f252ba..8b50384 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/reorg_r_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/reorg_r_layer.cpp @@ -59,4 +59,4 @@ nvinfer1::ILayer* reorgRLayer( assert(concat != nullptr); return concat; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp index d4bf5dc..5ca4844 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp @@ -60,4 +60,4 @@ nvinfer1::ILayer* routeLayer( } return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/route_layer.h b/nvdsinfer_custom_impl_Yolo/layers/route_layer.h index 63c6bd6..409beee 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/route_layer.h +++ b/nvdsinfer_custom_impl_Yolo/layers/route_layer.h @@ -15,4 +15,4 @@ nvinfer1::ILayer* routeLayer( std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); -#endif \ No newline at end of file +#endif diff --git a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp index 87ba3f8..725d781 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp @@ -42,4 +42,4 @@ nvinfer1::ILayer* shortcutLayer( assert(output != nullptr); return output; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp index f268bd2..066cce8 100644 --- a/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp +++ b/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp @@ -21,4 +21,4 @@ nvinfer1::ILayer* upsampleLayer( std::string layer_name = "upsample_" + std::to_string(layerIdx); resize_layer->setName(layer_name.c_str()); return resize_layer; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp b/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp index 37391fa..bd2e32c 100644 --- a/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp +++ b/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -42,12 +42,15 @@ static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContext yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg")); - networkInfo.networkType = yoloType; - networkInfo.configFilePath = initParams->customNetworkConfigFilePath; - networkInfo.wtsFilePath = initParams->modelFilePath; - networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; - networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); - networkInfo.inputBlobName = "data"; + networkInfo.inputBlobName = "data"; + networkInfo.networkType = yoloType; + networkInfo.configFilePath = initParams->customNetworkConfigFilePath; + networkInfo.wtsFilePath = initParams->modelFilePath; + networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; + networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); + networkInfo.numDetectedClasses = initParams->numDetectedClasses; + networkInfo.clusterMode = initParams->clusterMode; + networkInfo.iouThreshold = initParams->perClassDetectionParams->nmsIOUThreshold; if(initParams->networkMode == 0) { networkInfo.networkMode = "FP32"; diff --git a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp b/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp index 6c6df56..e31888a 100644 --- a/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -37,103 +37,36 @@ extern "C" bool NvDsInferParseYolo( NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); -static std::vector -nonMaximumSuppression(const float nmsThresh, std::vector binfo) -{ - auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float { - if (x1min > x2min) - { - std::swap(x1min, x2min); - std::swap(x1max, x2max); - } - return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; - }; - auto computeIoU - = [&overlap1D](NvDsInferParseObjectInfo& bbox1, NvDsInferParseObjectInfo& bbox2) -> float { - float overlapX - = overlap1D(bbox1.left, bbox1.left + bbox1.width, bbox2.left, bbox2.left + bbox2.width); - float overlapY - = overlap1D(bbox1.top, bbox1.top + bbox1.height, bbox2.top, bbox2.top + bbox2.height); - float area1 = (bbox1.width) * (bbox1.height); - float area2 = (bbox2.width) * (bbox2.height); - float overlap2D = overlapX * overlapY; - float u = area1 + area2 - overlap2D; - return u == 0 ? 0 : overlap2D / u; - }; - - std::stable_sort(binfo.begin(), binfo.end(), - [](const NvDsInferParseObjectInfo& b1, const NvDsInferParseObjectInfo& b2) { - return b1.detectionConfidence > b2.detectionConfidence; - }); - std::vector out; - for (auto i : binfo) - { - bool keep = true; - for (auto j : out) - { - if (keep) - { - float overlap = computeIoU(i, j); - keep = overlap <= nmsThresh; - } - else - break; - } - if (keep) out.push_back(i); - } - return out; -} - -static std::vector -nmsAllClasses(const float nmsThresh, - std::vector& binfo, - const uint numClasses) -{ - std::vector result; - std::vector> splitBoxes(numClasses); - for (auto& box : binfo) - { - splitBoxes.at(box.classId).push_back(box); - } - - for (auto& boxes : splitBoxes) - { - boxes = nonMaximumSuppression(nmsThresh, boxes); - result.insert(result.end(), boxes.begin(), boxes.end()); - } - return result; -} - -static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw, - const float& bh, const int& stride, const uint& netW, - const uint& netH) +static NvDsInferParseObjectInfo convertBBox( + const float& bx, const float& by, const float& bw, + const float& bh, const uint& netW, const uint& netH) { NvDsInferParseObjectInfo b; - float xCenter = bx * stride; - float yCenter = by * stride; - float x0 = xCenter - bw / 2; - float y0 = yCenter - bh / 2; - float x1 = x0 + bw; - float y1 = y0 + bh; - x0 = clamp(x0, 0, netW); - y0 = clamp(y0, 0, netH); + float x1 = bx - bw / 2; + float y1 = by - bh / 2; + float x2 = x1 + bw; + float y2 = y1 + bh; + x1 = clamp(x1, 0, netW); y1 = clamp(y1, 0, netH); + x2 = clamp(x2, 0, netW); + y2 = clamp(y2, 0, netH); - b.left = x0; - b.width = clamp(x1 - x0, 0, netW); - b.top = y0; - b.height = clamp(y1 - y0, 0, netH); + b.left = x1; + b.width = clamp(x2 - x1, 0, netW); + b.top = y1; + b.height = clamp(y2 - y1, 0, netH); return b; } -static void addBBoxProposal(const float bx, const float by, const float bw, const float bh, - const uint stride, const uint& netW, const uint& netH, const int maxIndex, - const float maxProb, std::vector& binfo) +static void addBBoxProposal( + const float bx, const float by, const float bw, const float bh, + const uint& netW, const uint& netH, const int maxIndex, + const float maxProb, std::vector& binfo) { - NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH); + NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, netW, netH); if (bbi.width < 1 || bbi.height < 1) return; bbi.detectionConfidence = maxProb; @@ -141,11 +74,10 @@ static void addBBoxProposal(const float bx, const float by, const float bw, cons binfo.push_back(bbi); } -static std::vector -decodeYoloTensor( +static std::vector decodeYoloTensor( const float* detections, - const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, - const uint numOutputClasses, const uint& netW, const uint& netH, const float confThresh) + const uint gridSizeW, const uint gridSizeH, const uint numBBoxes, + const uint numOutputClasses, const uint& netW, const uint& netH) { std::vector binfo; for (uint y = 0; y < gridSizeH; ++y) { @@ -163,92 +95,32 @@ decodeYoloTensor( = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]; const float bh = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]; - const float maxProb = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; - const int maxIndex = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 5)]; - if (maxProb > confThresh) - { - addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); - } + addBBoxProposal(bx, by, bw, bh, netW, netH, maxIndex, maxProb, binfo); } } } return binfo; } -static std::vector -decodeYoloV2Tensor( - const float* detections, - const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, - const uint numOutputClasses, const uint& netW, const uint& netH) -{ - std::vector binfo; - for (uint y = 0; y < gridSizeH; ++y) { - for (uint x = 0; x < gridSizeW; ++x) { - for (uint b = 0; b < numBBoxes; ++b) - { - const int numGridCells = gridSizeH * gridSizeW; - const int bbindex = y * gridSizeW + x; - - const float bx - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)]; - const float by - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)]; - const float bw - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)] * stride; - const float bh - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)] * stride; - - const float maxProb - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; - - const int maxIndex - = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 5)]; - - addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); - } - } - } - return binfo; -} - -static inline std::vector -SortLayers(const std::vector & outputLayersInfo) -{ - std::vector outLayers; - for (auto const &layer : outputLayersInfo) { - outLayers.push_back (&layer); - } - std::sort(outLayers.begin(), outLayers.end(), - [](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) { - return a->inferDims.d[1] < b->inferDims.d[1]; - }); - return outLayers; -} - -static bool NvDsInferParseYolo( +static bool NvDsInferParseCustomYolo( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList, const uint &numBBoxes, - const uint &numClasses, - const float &betaNMS) + const uint &numClasses) { - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;; + if (outputLayersInfo.empty()) + { + std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; return false; } - const float kCONF_THRESH = detectionParams.perClassThreshold[0]; - - const std::vector sortedLayers = - SortLayers (outputLayersInfo); - if (numClasses != detectionParams.numClassesConfigured) { std::cerr << "WARNING: Num classes mismatch. Configured: " @@ -258,57 +130,23 @@ static bool NvDsInferParseYolo( std::vector objects; - for (uint idx = 0; idx < sortedLayers.size(); ++idx) { - const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid + for (uint idx = 0; idx < outputLayersInfo.size(); ++idx) + { + const NvDsInferLayerInfo &layer = outputLayersInfo[idx]; assert(layer.inferDims.numDims == 3); const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; - const uint stride = DIVUP(networkInfo.width, gridSizeW); std::vector outObjs = - decodeYoloTensor((const float*)(layer.buffer), gridSizeW, gridSizeH, stride, numBBoxes, - numClasses, networkInfo.width, networkInfo.height, kCONF_THRESH); + decodeYoloTensor( + (const float*)(layer.buffer), + gridSizeW, gridSizeH, numBBoxes, numClasses, + networkInfo.width, networkInfo.height); + objects.insert(objects.end(), outObjs.begin(), outObjs.end()); } - objectList.clear(); - objectList = nmsAllClasses(betaNMS, objects, numClasses); - - return true; -} - -static bool NvDsInferParseYoloV2( - std::vector const& outputLayersInfo, - NvDsInferNetworkInfo const& networkInfo, - NvDsInferParseDetectionParams const& detectionParams, - std::vector& objectList, - const uint &numBBoxes, - const uint &numClasses) -{ - if (outputLayersInfo.empty()) { - std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;; - return false; - } - - const NvDsInferLayerInfo &layer = outputLayersInfo[0]; - - if (numClasses != detectionParams.numClassesConfigured) - { - std::cerr << "WARNING: Num classes mismatch. Configured: " - << detectionParams.numClassesConfigured - << ", detected by network: " << numClasses << std::endl; - } - - assert(layer.inferDims.numDims == 3); - const uint gridSizeH = layer.inferDims.d[1]; - const uint gridSizeW = layer.inferDims.d[2]; - const uint stride = DIVUP(networkInfo.width, gridSizeW); - - std::vector objects = - decodeYoloV2Tensor((const float*)(layer.buffer), gridSizeW, gridSizeH, stride, numBBoxes, - numClasses, networkInfo.width, networkInfo.height); - objectList = objects; return true; @@ -320,19 +158,11 @@ extern "C" bool NvDsInferParseYolo( NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { - int model_type = kMODEL_TYPE; - int num_bboxes = kNUM_BBOXES; - int num_classes = kNUM_CLASSES; - float beta_nms = kBETA_NMS; + uint numBBoxes = kNUM_BBOXES; + uint numClasses = kNUM_CLASSES; - if (model_type != 0) { - return NvDsInferParseYolo (outputLayersInfo, networkInfo, detectionParams, objectList, - num_bboxes, num_classes, beta_nms); - } - else { - return NvDsInferParseYoloV2 (outputLayersInfo, networkInfo, detectionParams, objectList, - num_bboxes, num_classes); - } + return NvDsInferParseCustomYolo ( + outputLayersInfo, networkInfo, detectionParams, objectList, numBBoxes, numClasses); } -CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); \ No newline at end of file +CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); diff --git a/nvdsinfer_custom_impl_Yolo/utils.cpp b/nvdsinfer_custom_impl_Yolo/utils.cpp index 2fd7911..8830a0f 100644 --- a/nvdsinfer_custom_impl_Yolo/utils.cpp +++ b/nvdsinfer_custom_impl_Yolo/utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -149,12 +149,6 @@ int getNumChannels(nvinfer1::ITensor* t) return d.d[0]; } -uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) -{ - assert(inputDims.nbDims == 3); - return inputDims.d[0] * inputDims.d[1] * inputDims.d[2]; -} - void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr) { @@ -162,4 +156,4 @@ void printLayerInfo(std::string layerIndex, std::string layerName, std::string l std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput; std::cout << std::setw(7) << std::left << weightPtr << std::endl; -} \ No newline at end of file +} diff --git a/nvdsinfer_custom_impl_Yolo/utils.h b/nvdsinfer_custom_impl_Yolo/utils.h index 2e479f7..5013763 100644 --- a/nvdsinfer_custom_impl_Yolo/utils.h +++ b/nvdsinfer_custom_impl_Yolo/utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -35,17 +35,12 @@ #include "NvInfer.h" -#define UNUSED(expr) (void)(expr) -#define DIVUP(n, d) ((n) + (d)-1) / (d) - std::string trim(std::string s); float clamp(const float val, const float minVal, const float maxVal); bool fileExists(const std::string fileName, bool verbose = true); std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); std::string dimsToString(const nvinfer1::Dims d); -void displayDimType(const nvinfer1::Dims d); int getNumChannels(nvinfer1::ITensor* t); -uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr); diff --git a/nvdsinfer_custom_impl_Yolo/yolo.cpp b/nvdsinfer_custom_impl_Yolo/yolo.cpp index d79530f..de343ec 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -32,17 +32,23 @@ #endif Yolo::Yolo(const NetworkInfo& networkInfo) - : m_NetworkType(networkInfo.networkType), // YOLO type - m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg - m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights - m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path - m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16 - m_DeviceType(networkInfo.deviceType), // kDLA, kGPU - m_InputBlobName(networkInfo.inputBlobName), // data + : m_InputBlobName(networkInfo.inputBlobName), + m_NetworkType(networkInfo.networkType), + m_ConfigFilePath(networkInfo.configFilePath), + m_WtsFilePath(networkInfo.wtsFilePath), + m_Int8CalibPath(networkInfo.int8CalibPath), + m_DeviceType(networkInfo.deviceType), + m_NumDetectedClasses(networkInfo.numDetectedClasses), + m_ClusterMode(networkInfo.clusterMode), + m_IouThreshold(networkInfo.iouThreshold), + m_NetworkMode(networkInfo.networkMode), m_InputH(0), m_InputW(0), m_InputC(0), - m_InputSize(0) + m_InputSize(0), + m_NumClasses(0), + m_LetterBox(0), + m_BetaNMS(networkInfo.iouThreshold) {} Yolo::~Yolo() @@ -65,9 +71,19 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder, nvinfer1 std::cout << "Building the TensorRT Engine" << std::endl; - if (m_LetterBox == 1) { - std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl; + if (m_NumClasses != m_NumDetectedClasses) { + std::cout << "\nNOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses << " in config_infer file" << std::endl; } + if (m_LetterBox == 1) { + std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy" << std::endl; + } + if (m_BetaNMS != m_IouThreshold) { + std::cout << "\nNOTE: beta_nms is set in cfg file, make sure to set nms-iou-threshold=" << m_BetaNMS << " in config_infer file to get better accuracy" << std::endl; + } + if (m_ClusterMode != 2) { + std::cout << "\nNOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file" << std::endl; + } + std::cout << "" << std::endl; if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) { assert(builder->platformHasFastInt8()); @@ -314,43 +330,31 @@ NvDsInferStatus Yolo::buildYoloNetwork( } else if (m_ConfigBlocks.at(i).at("type") == "yolo") { - uint model_type; + uint modelType = 1; + uint newCoords = 0; + float scaleXY = 1.0; if (m_NetworkType.find("yolor") != std::string::npos) { - model_type = 2; + modelType = 2; } - else { - model_type = 1; - } - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); - curYoloTensor.gridSizeY = prevTensorDims.d[1]; - curYoloTensor.gridSizeX = prevTensorDims.d[2]; - curYoloTensor.stride = m_InputH / curYoloTensor.gridSizeY; - m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSizeY - * curYoloTensor.gridSizeX - * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); - std::string layerName = "yolo_" + std::to_string(i); - curYoloTensor.blobName = layerName; - int new_coords = 0; - float scale_x_y = 1; - float beta_nms = 0.45; if (m_ConfigBlocks.at(i).find("new_coords") != m_ConfigBlocks.at(i).end()) { - new_coords = std::stoi(m_ConfigBlocks.at(i).at("new_coords")); + newCoords = std::stoi(m_ConfigBlocks.at(i).at("new_coords")); } if (m_ConfigBlocks.at(i).find("scale_x_y") != m_ConfigBlocks.at(i).end()) { - scale_x_y = std::stof(m_ConfigBlocks.at(i).at("scale_x_y")); + scaleXY = std::stof(m_ConfigBlocks.at(i).at("scale_x_y")); } if (m_ConfigBlocks.at(i).find("beta_nms") != m_ConfigBlocks.at(i).end()) { - beta_nms = std::stof(m_ConfigBlocks.at(i).at("beta_nms")); + m_BetaNMS = std::stof(m_ConfigBlocks.at(i).at("beta_nms")); } + + std::string layerName = "yolo_" + std::to_string(i); + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); + m_NumClasses = curYoloTensor.numClasses; + curYoloTensor.blobName = layerName; nvinfer1::IPluginV2* yoloPlugin - = new YoloLayer(curYoloTensor.numBBoxes, - curYoloTensor.numClasses, - curYoloTensor.gridSizeX, - curYoloTensor.gridSizeY, - model_type, new_coords, scale_x_y, beta_nms, - curYoloTensor.anchors, - curYoloTensor.masks); + = new YoloLayer(curYoloTensor.numBBoxes, curYoloTensor.numClasses, m_InputW, m_InputH, + prevTensorDims.d[2], prevTensorDims.d[1], modelType, newCoords, scaleXY, + curYoloTensor.anchors, curYoloTensor.mask); assert(yoloPlugin != nullptr); nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(&previous, 1, *yoloPlugin); @@ -368,26 +372,16 @@ NvDsInferStatus Yolo::buildYoloNetwork( ++outputTensorCount; } - //YOLOv2 support else if (m_ConfigBlocks.at(i).at("type") == "region") { + std::vector mask; + std::string layerName = "region_" + std::to_string(i); nvinfer1::Dims prevTensorDims = previous->getDimensions(); TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); - curRegionTensor.gridSizeY = prevTensorDims.d[1]; - curRegionTensor.gridSizeX = prevTensorDims.d[2]; - curRegionTensor.stride = m_InputH / curRegionTensor.gridSizeY; - m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSizeY - * curRegionTensor.gridSizeX - * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses)); - std::string layerName = "region_" + std::to_string(i); + m_NumClasses = curRegionTensor.numClasses; curRegionTensor.blobName = layerName; - std::vector mask; nvinfer1::IPluginV2* regionPlugin - = new YoloLayer(curRegionTensor.numBBoxes, - curRegionTensor.numClasses, - curRegionTensor.gridSizeX, - curRegionTensor.gridSizeY, - 0, 0, 1.0, 0, - curRegionTensor.anchors, + = new YoloLayer(curRegionTensor.numBBoxes, curRegionTensor.numClasses, m_InputW, m_InputH, + prevTensorDims.d[2], prevTensorDims.d[1], 0, 0, 1.0, curRegionTensor.anchors, mask); assert(regionPlugin != nullptr); nvinfer1::IPluginV2Layer* region = @@ -531,20 +525,20 @@ void Yolo::parseConfigBlocks() if (npos != -1) { int mask = std::stoul(trim(maskString.substr(0, npos))); - outputTensor.masks.push_back(mask); + outputTensor.mask.push_back(mask); maskString.erase(0, npos + 1); } else { int mask = std::stoul(trim(maskString)); - outputTensor.masks.push_back(mask); + outputTensor.mask.push_back(mask); break; } } } - outputTensor.numBBoxes = outputTensor.masks.size() > 0 - ? outputTensor.masks.size() + outputTensor.numBBoxes = outputTensor.mask.size() > 0 + ? outputTensor.mask.size() : std::stoul(trim(block.at("num"))); outputTensor.numClasses = std::stoul(block.at("classes")); m_OutputTensors.push_back(outputTensor); diff --git a/nvdsinfer_custom_impl_Yolo/yolo.h b/nvdsinfer_custom_impl_Yolo/yolo.h index 25fd2f9..122badc 100644 --- a/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/nvdsinfer_custom_impl_Yolo/yolo.h @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -40,28 +40,25 @@ struct NetworkInfo { + std::string inputBlobName; std::string networkType; std::string configFilePath; std::string wtsFilePath; std::string int8CalibPath; - std::string networkMode; std::string deviceType; - std::string inputBlobName; + uint numDetectedClasses; + int clusterMode; + float iouThreshold; + std::string networkMode; }; struct TensorInfo { std::string blobName; - uint stride{0}; - uint gridSizeY{0}; - uint gridSizeX{0}; - uint numClasses{0}; - uint numBBoxes{0}; - uint64_t volume{0}; - std::vector masks; + uint numBBoxes {0}; + uint numClasses {0}; std::vector anchors; - int bindingIndex{-1}; - float* hostBuffer{nullptr}; + std::vector mask; }; class Yolo : public IModelParser { @@ -78,21 +75,27 @@ public: nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config); protected: + const std::string m_InputBlobName; const std::string m_NetworkType; const std::string m_ConfigFilePath; const std::string m_WtsFilePath; const std::string m_Int8CalibPath; - const std::string m_NetworkMode; const std::string m_DeviceType; - const std::string m_InputBlobName; - std::vector m_OutputTensors; - std::vector> m_ConfigBlocks; + const uint m_NumDetectedClasses; + const int m_ClusterMode; + const float m_IouThreshold; + const std::string m_NetworkMode; + uint m_InputH; uint m_InputW; uint m_InputC; uint64_t m_InputSize; + uint m_NumClasses; uint m_LetterBox; + float m_BetaNMS; + std::vector m_OutputTensors; + std::vector> m_ConfigBlocks; std::vector m_TrtWeights; private: @@ -104,4 +107,4 @@ private: void destroyNetworkUtils(); }; -#endif // _YOLO_H_ \ No newline at end of file +#endif // _YOLO_H_ diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/nvdsinfer_custom_impl_Yolo/yoloForward.cu index d534c28..3e15fa6 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -11,8 +11,10 @@ inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, - const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) +__global__ void gpuYoloLayer( + const float* input, float* output, const uint netWidth, const uint netHeight, const uint gridSizeX, + const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, + const float* anchors, const int* mask) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -30,16 +32,20 @@ __global__ void gpuYoloLayer(const float* input, float* output, const uint gridS const float beta = -0.5 * (scaleXY - 1); output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + * alpha + beta + x_id) * netWidth / gridSizeX; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + * alpha + beta + y_id) * netHeight / gridSizeY; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2]; + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) + * anchors[mask[z_id] * 2]; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1]; + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) + * anchors[mask[z_id] * 2 + 1]; const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); @@ -66,24 +72,28 @@ __global__ void gpuYoloLayer(const float* input, float* output, const uint gridS = maxIndex; } -cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask); +cudaError_t cudaYoloLayer( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask) +cudaError_t cudaYoloLayer( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, (numBBoxes / threads_per_block.z) + 1); + for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, - numBBoxes, scaleXY, reinterpret_cast(anchors), reinterpret_cast(mask)); + reinterpret_cast(output) + (batch * outputSize), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, + reinterpret_cast(anchors), reinterpret_cast(mask)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu index e2a2a90..313bb78 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu @@ -9,8 +9,10 @@ #include #include -__global__ void gpuYoloLayer_nc(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, - const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) +__global__ void gpuYoloLayer_nc( + const float* input, float* output, const uint netWidth, const uint netHeight, const uint gridSizeX, + const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, + const float* anchors, const int* mask) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -28,16 +30,20 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, const uint gr const float beta = -0.5 * (scaleXY - 1); output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] - = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id; + = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] + * alpha + beta + x_id) * netWidth / gridSizeX; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] - = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id; + = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] + * alpha + beta + y_id) * netHeight / gridSizeY; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] - = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2]; + = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) + * anchors[mask[z_id] * 2]; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] - = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * anchors[mask[z_id] * 2 + 1]; + = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) + * anchors[mask[z_id] * 2 + 1]; const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; @@ -64,24 +70,28 @@ __global__ void gpuYoloLayer_nc(const float* input, float* output, const uint gr = maxIndex; } -cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask); +cudaError_t cudaYoloLayer_nc( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask) +cudaError_t cudaYoloLayer_nc( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, (numBBoxes / threads_per_block.z) + 1); + for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer_nc<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, - numBBoxes, scaleXY, reinterpret_cast(anchors), reinterpret_cast(mask)); + reinterpret_cast(output) + (batch * outputSize), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, + reinterpret_cast(anchors), reinterpret_cast(mask)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu index 5b659c1..7142e61 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu @@ -11,8 +11,10 @@ inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__global__ void gpuYoloLayer_r(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, - const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) +__global__ void gpuYoloLayer_r( + const float* input, float* output, const uint netWidth, const uint netHeight, const uint gridSizeX, + const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float scaleXY, + const float* anchors, const int* mask) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -30,16 +32,20 @@ __global__ void gpuYoloLayer_r(const float* input, float* output, const uint gri const float beta = -0.5 * (scaleXY - 1); output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + * alpha + beta + x_id) * netWidth / gridSizeX; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + * alpha + beta + y_id) * netHeight / gridSizeY; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] - = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2) * anchors[mask[z_id] * 2]; + = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2) + * anchors[mask[z_id] * 2]; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] - = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2) * anchors[mask[z_id] * 2 + 1]; + = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2) + * anchors[mask[z_id] * 2 + 1]; const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); @@ -66,24 +72,28 @@ __global__ void gpuYoloLayer_r(const float* input, float* output, const uint gri = maxIndex; } -cudaError_t cudaYoloLayer_r(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask); +cudaError_t cudaYoloLayer_r( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer_r(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const float scaleXY, const void* anchors, const void* mask) +cudaError_t cudaYoloLayer_r( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, (numBBoxes / threads_per_block.z) + 1); + for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer_r<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, - numBBoxes, scaleXY, reinterpret_cast(anchors), reinterpret_cast(mask)); + reinterpret_cast(output) + (batch * outputSize), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, + reinterpret_cast(anchors), reinterpret_cast(mask)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu index 1f1b28e..186cb80 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu +++ b/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu @@ -11,8 +11,9 @@ inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__device__ void softmaxGPU(const float* input, const int bbindex, const int numGridCells, - uint z_id, const uint numOutputClasses, float temp, float* output) +__device__ void softmaxGPU( + const float* input, const int bbindex, const int numGridCells, uint z_id, + const uint numOutputClasses, float temp, float* output) { int i; float sum = 0; @@ -31,8 +32,10 @@ __device__ void softmaxGPU(const float* input, const int bbindex, const int numG } } -__global__ void gpuRegionLayer(const float* input, float* output, float* softmax, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, - const uint numBBoxes, const float* anchors) +__global__ void gpuRegionLayer( + const float* input, float* output, float* softmax, const uint netWidth, const uint netHeight, + const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, + const float* anchors) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; @@ -47,16 +50,20 @@ __global__ void gpuRegionLayer(const float* input, float* output, float* softmax const int bbindex = y_id * gridSizeX + x_id; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + + x_id) * netWidth / gridSizeX; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id; + = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + + y_id) * netHeight / gridSizeY; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2]; + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) + * anchors[z_id * 2] * netWidth / gridSizeX; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1]; + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) + * anchors[z_id * 2 + 1] * netHeight / gridSizeY; softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax); @@ -85,25 +92,29 @@ __global__ void gpuRegionLayer(const float* input, float* output, float* softmax = maxIndex; } -cudaError_t cudaYoloLayer_v2(const void* input, void* output, void* softmax, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const void* anchors); +cudaError_t cudaRegionLayer( + const void* input, void* output, void* softmax, const uint& batchSize, const uint& netWidth, + const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, + const uint& numBBoxes, uint64_t& outputSize, const void* anchors, cudaStream_t stream); -cudaError_t cudaYoloLayer_v2(const void* input, void* output, void* softmax, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, - const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, - const void* anchors) +cudaError_t cudaRegionLayer( + const void* input, void* output, void* softmax, const uint& batchSize, const uint& netWidth, + const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, + const uint& numBBoxes, uint64_t& outputSize, const void* anchors, cudaStream_t stream) { dim3 threads_per_block(16, 16, 4); dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, (numBBoxes / threads_per_block.z) + 1); + for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuRegionLayer<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), - reinterpret_cast(softmax) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, - numBBoxes, reinterpret_cast(anchors)); + reinterpret_cast(output) + (batch * outputSize), + reinterpret_cast(softmax) + (batch * outputSize), + netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, + reinterpret_cast(anchors)); } return cudaGetLastError(); } diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp index 69de790..735c527 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -29,10 +29,8 @@ #include #include -int kMODEL_TYPE; -int kNUM_BBOXES; -int kNUM_CLASSES; -float kBETA_NMS; +uint kNUM_BBOXES; +uint kNUM_CLASSES; namespace { template @@ -50,42 +48,40 @@ namespace { } } -cudaError_t cudaYoloLayer_r ( - const void* input, void* output, const uint& batchSize, - const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const float scaleXY, - const void* anchors, const void* mask); +cudaError_t cudaYoloLayer_r( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer_nc ( - const void* input, void* output, const uint& batchSize, - const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const float scaleXY, - const void* anchors, const void* mask); +cudaError_t cudaYoloLayer_nc( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer ( - const void* input, void* output, const uint& batchSize, - const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const float scaleXY, - const void* anchors, const void* mask); +cudaError_t cudaYoloLayer( + const void* input, void* output, const uint& batchSize, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, + uint64_t& outputSize, const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); -cudaError_t cudaYoloLayer_v2 ( - const void* input, void* output, void* softmax, const uint& batchSize, - const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, - const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const void* anchors); +cudaError_t cudaRegionLayer( + const void* input, void* output, void* softmax, const uint& batchSize, const uint& netWidth, + const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, + const uint& numBBoxes, uint64_t& outputSize, const void* anchors, cudaStream_t stream); YoloLayer::YoloLayer (const void* data, size_t length) { const char *d = static_cast(data); - read(d, m_NumBoxes); + + read(d, m_NumBBoxes); read(d, m_NumClasses); + read(d, m_NetWidth); + read(d, m_NetHeight); read(d, m_GridSizeX); read(d, m_GridSizeY); - read(d, m_OutputSize); - read(d, m_Type); read(d, m_NewCoords); read(d, m_ScaleXY); - read(d, m_BetaNMS); + read(d, m_OutputSize); uint anchorsSize; read(d, anchorsSize); @@ -103,63 +99,71 @@ YoloLayer::YoloLayer (const void* data, size_t length) m_Mask.push_back(result); } - kMODEL_TYPE = m_Type; - kNUM_BBOXES = m_NumBoxes; - kNUM_CLASSES = m_NumClasses; - kBETA_NMS = m_BetaNMS; - if (m_Anchors.size() > 0) { - float* m_anchors = m_Anchors.data(); - CHECK(cudaMallocHost(&mAnchors, m_Anchors.size() * sizeof(float))); - CHECK(cudaMemcpy(mAnchors, m_anchors, m_Anchors.size() * sizeof(float), cudaMemcpyHostToDevice)); + float* anchors = m_Anchors.data(); + CUDA_CHECK(cudaMallocHost(&p_Anchors, m_Anchors.size() * sizeof(float))); + CUDA_CHECK(cudaMemcpy(p_Anchors, anchors, m_Anchors.size() * sizeof(float), cudaMemcpyHostToDevice)); } if (m_Mask.size() > 0) { - int* m_mask = m_Mask.data(); - CHECK(cudaMallocHost(&mMask, m_Mask.size() * sizeof(int))); - CHECK(cudaMemcpy(mMask, m_mask, m_Mask.size() * sizeof(int), cudaMemcpyHostToDevice)); + int* mask = m_Mask.data(); + CUDA_CHECK(cudaMallocHost(&p_Mask, m_Mask.size() * sizeof(int))); + CUDA_CHECK(cudaMemcpy(p_Mask, mask, m_Mask.size() * sizeof(int), cudaMemcpyHostToDevice)); } + + kNUM_BBOXES = m_NumBBoxes; + kNUM_CLASSES = m_NumClasses; }; YoloLayer::YoloLayer ( - const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint modelType, const uint newCoords, const float scaleXY, const float betaNMS, const std::vector anchors, std::vector mask) : - m_NumBoxes(numBoxes), + const uint& numBBoxes, const uint& numClasses, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& modelType, const uint& newCoords, + const float& scaleXY, const std::vector anchors, + const std::vector mask) : + m_NumBBoxes(numBBoxes), m_NumClasses(numClasses), + m_NetWidth(netWidth), + m_NetHeight(netHeight), m_GridSizeX(gridSizeX), m_GridSizeY(gridSizeY), m_Type(modelType), m_NewCoords(newCoords), m_ScaleXY(scaleXY), - m_BetaNMS(betaNMS), m_Anchors(anchors), m_Mask(mask) { - assert(m_NumBoxes > 0); + assert(m_NumBBoxes > 0); assert(m_NumClasses > 0); + assert(m_NetWidth > 0); + assert(m_NetHeight > 0); assert(m_GridSizeX > 0); assert(m_GridSizeY > 0); - m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBoxes * (4 + 1 + m_NumClasses)); + + m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBBoxes * (4 + 1 + m_NumClasses)); if (m_Anchors.size() > 0) { - float* m_anchors = m_Anchors.data(); - CHECK(cudaMallocHost(&mAnchors, m_Anchors.size() * sizeof(float))); - CHECK(cudaMemcpy(mAnchors, m_anchors, m_Anchors.size() * sizeof(float), cudaMemcpyHostToDevice)); + float* anchors = m_Anchors.data(); + CUDA_CHECK(cudaMallocHost(&p_Anchors, m_Anchors.size() * sizeof(float))); + CUDA_CHECK(cudaMemcpy(p_Anchors, anchors, m_Anchors.size() * sizeof(float), cudaMemcpyHostToDevice)); } if (m_Mask.size() > 0) { - int* m_mask = m_Mask.data(); - CHECK(cudaMallocHost(&mMask, m_Mask.size() * sizeof(int))); - CHECK(cudaMemcpy(mMask, m_mask, m_Mask.size() * sizeof(int), cudaMemcpyHostToDevice)); + int* mask = m_Mask.data(); + CUDA_CHECK(cudaMallocHost(&p_Mask, m_Mask.size() * sizeof(int))); + CUDA_CHECK(cudaMemcpy(p_Mask, mask, m_Mask.size() * sizeof(int), cudaMemcpyHostToDevice)); } + + kNUM_BBOXES = m_NumBBoxes; + kNUM_CLASSES = m_NumClasses; }; YoloLayer::~YoloLayer() { if (m_Anchors.size() > 0) { - CHECK(cudaFreeHost(mAnchors)); + CUDA_CHECK(cudaFreeHost(p_Anchors)); } if (m_Mask.size() > 0) { - CHECK(cudaFreeHost(mMask)); + CUDA_CHECK(cudaFreeHost(p_Mask)); } } @@ -185,73 +189,79 @@ YoloLayer::configureWithFormat ( nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept { assert(nbInputs == 1); - assert (format == nvinfer1::PluginFormat::kLINEAR); + assert(format == nvinfer1::PluginFormat::kLINEAR); assert(inputDims != nullptr); } -int YoloLayer::enqueue( +int YoloLayer::enqueue ( int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0 - CHECK(cudaYoloLayer_r( - inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, - m_OutputSize, stream, 2.0, mAnchors, mMask)); + CUDA_CHECK(cudaYoloLayer_r( + inputs[0], outputs[0], batchSize, m_NetWidth, m_NetHeight, m_GridSizeX, m_GridSizeY, + m_NumClasses, m_NumBBoxes, m_OutputSize, 2.0, p_Anchors, p_Mask, stream)); } else if (m_Type == 1) { if (m_NewCoords) { - CHECK(cudaYoloLayer_nc( - inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, - m_OutputSize, stream, m_ScaleXY, mAnchors, mMask)); + CUDA_CHECK(cudaYoloLayer_nc( + inputs[0], outputs[0], batchSize, m_NetWidth, m_NetHeight, m_GridSizeX, m_GridSizeY, + m_NumClasses, m_NumBBoxes, m_OutputSize, m_ScaleXY, p_Anchors, p_Mask, stream)); } else { - CHECK(cudaYoloLayer( - inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, - m_OutputSize, stream, m_ScaleXY, mAnchors, mMask)); + CUDA_CHECK(cudaYoloLayer( + inputs[0], outputs[0], batchSize, m_NetWidth, m_NetHeight, m_GridSizeX, m_GridSizeY, + m_NumClasses, m_NumBBoxes, m_OutputSize, m_ScaleXY, p_Anchors, p_Mask, stream)); } } else { void* softmax; - CHECK(cudaMallocHost(&softmax, sizeof(outputs[0]))); - CHECK(cudaMemcpy(softmax, outputs[0], sizeof(outputs[0]), cudaMemcpyHostToDevice)); + cudaMallocHost(&softmax, sizeof(outputs[0])); + cudaMemcpy(softmax, outputs[0], sizeof(outputs[0]), cudaMemcpyHostToDevice); - CHECK(cudaYoloLayer_v2( - inputs[0], outputs[0], softmax, batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, - m_OutputSize, stream, mAnchors)); + CUDA_CHECK(cudaRegionLayer( + inputs[0], outputs[0], softmax, batchSize, m_NetWidth, m_NetHeight, m_GridSizeX, m_GridSizeY, + m_NumClasses, m_NumBBoxes, m_OutputSize, p_Anchors, stream)); - CHECK(cudaFreeHost(softmax)); + CUDA_CHECK(cudaFreeHost(softmax)); } return 0; } size_t YoloLayer::getSerializationSize() const noexcept { - int anchorsSum = 1; - for (uint i = 0; i < m_Anchors.size(); i++) { - anchorsSum += 1; - } - int maskSum = 1; - for (uint i = 0; i < m_Mask.size(); i++) { - maskSum += 1; - } + size_t totalSize = 0; - return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSizeX) + sizeof(m_GridSizeY) + sizeof(m_OutputSize) + sizeof(m_Type) - + sizeof(m_NewCoords) + sizeof(m_ScaleXY) + sizeof(m_BetaNMS) + anchorsSum * sizeof(float) + maskSum * sizeof(int); + totalSize += sizeof(m_NumBBoxes); + totalSize += sizeof(m_NumClasses); + totalSize += sizeof(m_NetWidth); + totalSize += sizeof(m_NetHeight); + totalSize += sizeof(m_GridSizeX); + totalSize += sizeof(m_GridSizeY); + totalSize += sizeof(m_Type); + totalSize += sizeof(m_NewCoords); + totalSize += sizeof(m_ScaleXY); + totalSize += sizeof(m_OutputSize); + totalSize += sizeof(uint) + sizeof(m_Anchors[0]) * m_Anchors.size(); + totalSize += sizeof(uint) + sizeof(m_Mask[0]) * m_Mask.size(); + + return totalSize; } void YoloLayer::serialize(void* buffer) const noexcept { char *d = static_cast(buffer); - write(d, m_NumBoxes); + + write(d, m_NumBBoxes); write(d, m_NumClasses); + write(d, m_NetWidth); + write(d, m_NetHeight); write(d, m_GridSizeX); write(d, m_GridSizeY); - write(d, m_OutputSize); - write(d, m_Type); write(d, m_NewCoords); write(d, m_ScaleXY); - write(d, m_BetaNMS); + write(d, m_OutputSize); uint anchorsSize = m_Anchors.size(); write(d, anchorsSize); @@ -264,16 +274,13 @@ void YoloLayer::serialize(void* buffer) const noexcept for (uint i = 0; i < maskSize; i++) { write(d, m_Mask[i]); } - - kMODEL_TYPE = m_Type; - kNUM_BBOXES = m_NumBoxes; - kNUM_CLASSES = m_NumClasses; - kBETA_NMS = m_BetaNMS; } nvinfer1::IPluginV2* YoloLayer::clone() const noexcept { - return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSizeX, m_GridSizeY, m_Type, m_NewCoords, m_ScaleXY, m_BetaNMS, m_Anchors, m_Mask); + return new YoloLayer ( + m_NumBBoxes, m_NumClasses, m_NetWidth, m_NetHeight, m_GridSizeX, m_GridSizeY, m_Type, + m_NewCoords, m_ScaleXY, m_Anchors, m_Mask); } REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); diff --git a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index 52fd8da..77ee3cb 100644 --- a/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -18,7 +18,7 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - + * * Edited by Marcos Luciano * https://www.github.com/marcoslucianops */ @@ -36,7 +36,7 @@ #include "NvInferPlugin.h" -#define CHECK(status) \ +#define CUDA_CHECK(status) \ { \ if (status != 0) \ { \ @@ -56,9 +56,11 @@ class YoloLayer : public nvinfer1::IPluginV2 { public: YoloLayer (const void* data, size_t length); - YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, - const uint modelType, const uint newCoords, const float scaleXY, const float betaNMS, - const std::vector anchors, const std::vector mask); + YoloLayer ( + const uint& numBBoxes, const uint& numClasses, const uint& netWidth, const uint& netHeight, + const uint& gridSizeX, const uint& gridSizeY, const uint& modelType, const uint& newCoords, + const float& scaleXY, const std::vector anchors, + const std::vector mask); ~YoloLayer (); const char* getPluginType () const noexcept override { return YOLOLAYER_PLUGIN_NAME; } const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } @@ -95,22 +97,21 @@ public: } private: - uint m_NumBoxes {0}; + std::string m_Namespace {""}; + uint m_NumBBoxes {0}; uint m_NumClasses {0}; + uint m_NetWidth {0}; + uint m_NetHeight {0}; uint m_GridSizeX {0}; uint m_GridSizeY {0}; - uint64_t m_OutputSize {0}; - std::string m_Namespace {""}; - uint m_Type {0}; uint m_NewCoords {0}; float m_ScaleXY {0}; - float m_BetaNMS {0}; std::vector m_Anchors; std::vector m_Mask; - - void* mAnchors; - void* mMask; + uint64_t m_OutputSize {0}; + void* p_Anchors; + void* p_Mask; }; class YoloLayerPluginCreator : public nvinfer1::IPluginCreator @@ -152,9 +153,7 @@ private: std::string m_Namespace {""}; }; -extern int kMODEL_TYPE; -extern int kNUM_BBOXES; -extern int kNUM_CLASSES; -extern float kBETA_NMS; +extern uint kNUM_BBOXES; +extern uint kNUM_CLASSES; #endif // __YOLO_PLUGINS__ diff --git a/readme.md b/readme.md index 2eb889a..e8d1533 100644 --- a/readme.md +++ b/readme.md @@ -2,12 +2,11 @@ NVIDIA DeepStream SDK 6.0 configuration for YOLO models -### Future updates (comming soon, stay tuned) +### Future updates * New documentation for multiple models * DeepStream tutorials * Native PP-YOLO support -* GPU NMS [#71](https://github.com/marcoslucianops/DeepStream-Yolo/issues/71#issuecomment-1046270722]) * Dynamic batch-size ### Improvements on this repository @@ -23,8 +22,9 @@ NVIDIA DeepStream SDK 6.0 configuration for YOLO models * Support for reorg, implicit and channel layers (YOLOR) * YOLOv5 6.0 native support * YOLOR native support -* Models benchmarks +* Models benchmarks (**outdated**) * **GPU YOLO Decoder (moved from CPU to GPU to get better performance)** [#138](https://github.com/marcoslucianops/DeepStream-Yolo/issues/138) +* **Improved NMS** [#71](https://github.com/marcoslucianops/DeepStream-Yolo/issues/142) ## @@ -316,9 +316,9 @@ CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo ... # 0=RGB, 1=BGR, 2=GRAYSCALE model-color-format=0 -# CFG +# YOLO cfg custom-network-config=yolov4.cfg -# Weights +# YOLO weights model-file=yolov4.weights # Generated TensorRT model (will be created if it doesn't exist) model-engine-file=model_b1_gpu0_fp32.engine @@ -332,7 +332,9 @@ network-mode=0 num-detected-classes=80 ... [class-attrs-all] -# CONF_THRESH +# IOU threshold +nms-iou-threshold=0.6 +# Socre threshold pre-cluster-threshold=0.25 ``` @@ -354,8 +356,6 @@ nvbuf-memory-type=0 config-file=config_infer_primary_yoloV2.txt ``` -**NOTE**: The config_infer_primary.txt file uses cluster-mode=4 and NMS = 0.45 (via code) when beta_nms isn't available (when beta_nms is available, NMS = beta_nms), while the config_infer_primary_yoloV2.txt file uses cluster-mode=2 and nms-iou-threshold=0.45 to set NMS. - ## ### YOLOv5 usage @@ -417,7 +417,9 @@ network-mode=0 num-detected-classes=80 ... [class-attrs-all] -# CONF_THRESH +# IOU threshold +nms-iou-threshold=0.6 +# Socre threshold pre-cluster-threshold=0.25 ``` @@ -534,7 +536,9 @@ network-mode=0 num-detected-classes=80 ... [class-attrs-all] -# CONF_THRESH +# IOU threshold +nms-iou-threshold=0.6 +# Socre threshold pre-cluster-threshold=0.25 ``` @@ -652,4 +656,4 @@ In C++/Python deepstream-test application, your code need be in osd_sink_pad_buf ## -My projects: https://www.youtube.com/MarcosLucianoTV (new videos and tutorials comming soon) +My projects: https://www.youtube.com/MarcosLucianoTV