diff --git a/examples/multiple_inferences/deepstream_app_config.txt b/examples/multiple_inferences/deepstream_app_config.txt index d359a3c..3740377 100644 --- a/examples/multiple_inferences/deepstream_app_config.txt +++ b/examples/multiple_inferences/deepstream_app_config.txt @@ -14,7 +14,7 @@ nvbuf-memory-type=0 [source0] enable=1 type=3 -uri=rtsp://192.168.1.2/Streaming/Channels/101/httppreview +uri=file:///opt/nvidia/deepstream/deepstream-5.1/samples/streams/sample_1080p_h264.mp4 num-sources=1 gpu-id=0 cudadec-memtype=0 diff --git a/examples/multiple_inferences/pgie/config_infer_primary.txt b/examples/multiple_inferences/pgie/config_infer_primary.txt index 2a7711d..e59d5c9 100644 --- a/examples/multiple_inferences/pgie/config_infer_primary.txt +++ b/examples/multiple_inferences/pgie/config_infer_primary.txt @@ -4,10 +4,11 @@ net-scale-factor=0.0039215697906911373 model-color-format=0 custom-network-config=pgie/yolo.cfg model-file=yolo.weights -model-engine-file=model_b1_gpu0_fp16.engine +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 -network-mode=2 +network-mode=0 num-detected-classes=2 interval=0 gie-unique-id=1 diff --git a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/Makefile b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/Makefile index a7884da..f2474bc 100644 --- a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/Makefile +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/Makefile @@ -27,13 +27,25 @@ CUDA_VER?= ifeq ($(CUDA_VER),) $(error "CUDA_VER is not set") endif + +OPENCV?= +ifeq ($(OPENCV),) + OPENCV=0 +endif + CC:= g++ NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations -CFLAGS+= -I../../../includes -I/usr/local/cuda-$(CUDA_VER)/include +CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include -LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs +ifeq ($(OPENCV), 1) +COMMON= -DOPENCV +CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) +LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) +endif + +LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group INCS:= $(wildcard *.h) @@ -50,6 +62,11 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \ utils.cpp \ yolo.cpp \ yoloForward.cu + +ifeq ($(OPENCV), 1) +SRCFILES+= calibrator.cpp +endif + TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so TARGET_OBJS:= $(SRCFILES:.cpp=.o) @@ -58,7 +75,7 @@ TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) all: $(TARGET_LIB) %.o: %.cpp $(INCS) Makefile - $(CC) -c -o $@ $(CFLAGS) $< + $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< %.o: %.cu $(INCS) Makefile $(NVCC) -c -o $@ --compiler-options '-fPIC' $< diff --git a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.cpp new file mode 100644 index 0000000..0c5dd63 --- /dev/null +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.cpp @@ -0,0 +1,130 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "calibrator.h" +#include +#include + +namespace nvinfer1 +{ + int8EntroyCalibrator::int8EntroyCalibrator(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath, + const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0) + { + inputCount = batchsize * channels * height * width; + std::fstream f(imgPath); + if (f.is_open()) + { + std::string temp; + while (std::getline(f, temp)) imgPaths.push_back(temp); + } + batchData = new float[inputCount]; + CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); + } + + int8EntroyCalibrator::~int8EntroyCalibrator() + { + CUDA_CHECK(cudaFree(deviceInput)); + if (batchData) + delete[] batchData; + } + + bool int8EntroyCalibrator::getBatch(void **bindings, const char **names, int nbBindings) + { + if (imageIndex + batchSize > uint(imgPaths.size())) + return false; + + float* ptr = batchData; + for (size_t j = imageIndex; j < imageIndex + batchSize; ++j) + { + cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR); + std::vectorinputData = prepareImage(img, inputC, inputH, inputW, letterBox); + + int len = (int)(inputData.size()); + memcpy(ptr, inputData.data(), len * sizeof(float)); + + ptr += inputData.size(); + std::cout << "Load image: " << imgPaths[j] << std::endl; + std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl; + } + imageIndex += batchSize; + CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); + bindings[0] = deviceInput; + return true; + } + + const void* int8EntroyCalibrator::readCalibrationCache(std::size_t &length) + { + calibrationCache.clear(); + std::ifstream input(calibTablePath, std::ios::binary); + input >> std::noskipws; + if (readCache && input.good()) + { + std::copy(std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calibrationCache)); + } + length = calibrationCache.size(); + return length ? calibrationCache.data() : nullptr; + } + + void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length) + { + std::ofstream output(calibTablePath, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box) +{ + cv::Mat out; + if (letter_box == 2) + { + int image_w = img.cols; + int image_h = img.rows; + int resize_w = 0; + int resize_h = 0; + int offset_top = 0; + int offset_bottom = 0; + int offset_left = 0; + int offset_right = 0; + if ((float)input_h / image_h > (float)input_w / image_w) + { + resize_w = input_w; + resize_h = (input_w * image_h) / image_w; + offset_bottom = input_h - resize_h; + } + else + { + resize_h = input_h; + resize_w = (input_h * image_w) / image_h; + offset_right = input_w - resize_w; + } + cv::resize(img, out, cv::Size(resize_w, resize_h), 0, 0, cv::INTER_CUBIC); + cv::copyMakeBorder(out, out, offset_top, offset_bottom, offset_left, offset_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + } + else + { + cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC); + } + cv::cvtColor(out, out, cv::COLOR_BGR2RGB); + if (input_c == 3) + { + out.convertTo(out, CV_32FC3, 1.0 / 255.0); + } + else + { + out.convertTo(out, CV_32FC1, 1.0 / 255.0); + } + std::vector input_channels(input_c); + cv::split(out, input_channels); + std::vector result(input_h * input_w * input_c); + auto data = result.data(); + int channelLength = input_h * input_w; + for (int i = 0; i < input_c; ++i) + { + memcpy(data, input_channels[i].data, channelLength * sizeof(float)); + data += channelLength; + } + return result; +} diff --git a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.h new file mode 100644 index 0000000..a78e062 --- /dev/null +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/calibrator.h @@ -0,0 +1,62 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef CALIBRATOR_H +#define CALIBRATOR_H + +#include "opencv2/opencv.hpp" +#include "cuda_runtime.h" +#include "NvInfer.h" +#include +#include + +#ifndef CUDA_CHECK +#define CUDA_CHECK(callstr) \ + { \ + cudaError_t error_code = callstr; \ + if (error_code != cudaSuccess) { \ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ + assert(0); \ + } \ + } +#endif + +namespace nvinfer1 { + class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { + public: + int8EntroyCalibrator(const int &batchsize, + const int &channels, + const int &height, + const int &width, + const int &letterbox, + const std::string &imgPath, + const std::string &calibTablePath); + + virtual ~int8EntroyCalibrator(); + int getBatchSize() const override { return batchSize; } + bool getBatch(void *bindings[], const char *names[], int nbBindings) override; + const void *readCalibrationCache(std::size_t &length) override; + void writeCalibrationCache(const void *ptr, std::size_t length) override; + + private: + int batchSize; + int inputC; + int inputH; + int inputW; + int letterBox; + std::string calibTablePath; + size_t imageIndex; + size_t inputCount; + std::vector imgPaths; + float *batchData{ nullptr }; + void *deviceInput{ nullptr }; + bool readCache; + std::vector calibrationCache; + }; +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box); + +#endif //CALIBRATOR_H \ No newline at end of file diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/route_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/route_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/route_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/route_layer.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h diff --git a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp new file mode 100644 index 0000000..eb49011 --- /dev/null +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp @@ -0,0 +1,24 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "upsample_layer.h" + +nvinfer1::ILayer* upsampleLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "upsample"); + int stride = std::stoi(block.at("stride")); + + nvinfer1::IResizeLayer* resize_layer = network->addResize(*input); + resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + float scale[3] = {1, stride, stride}; + resize_layer->setScales(scale, 3); + std::string layer_name = "upsample_" + std::to_string(layerIdx); + resize_layer->setName(layer_name.c_str()); + return resize_layer; +} \ No newline at end of file diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h similarity index 78% rename from non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h index 6b0224a..d1b7768 100644 --- a/non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h @@ -15,9 +15,6 @@ nvinfer1::ILayer* upsampleLayer( int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); diff --git a/non_square/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp similarity index 91% rename from non_square/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp index 29306cd..03a4820 100644 --- a/non_square/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp @@ -45,9 +45,20 @@ static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContext networkInfo.networkType = yoloType; networkInfo.configFilePath = initParams->customNetworkConfigFilePath; networkInfo.wtsFilePath = initParams->modelFilePath; + networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); networkInfo.inputBlobName = "data"; + if(initParams->networkMode == 0) { + networkInfo.networkMode = "FP32"; + } + else if(initParams->networkMode == 1) { + networkInfo.networkMode = "INT8"; + } + else if(initParams->networkMode == 2) { + networkInfo.networkMode = "FP16"; + } + if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty()) { std::cerr << "YOLO config file or weights file is not specified" diff --git a/non_square/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp similarity index 99% rename from non_square/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp index 5c9801b..1cc8d39 100644 --- a/non_square/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp @@ -302,7 +302,6 @@ static bool NvDsInferParseYolo( const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); - //assert(stride == DIVUP(networkInfo.height, gridSizeH)); std::vector outObjs = decodeYoloTensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, masks[idx].size(), @@ -344,7 +343,6 @@ static bool NvDsInferParseYoloV2( const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); - //assert(stride == DIVUP(networkInfo.height, gridSizeH)); for (auto& anchor : anchors) { anchor *= stride; } diff --git a/non_square/nvdsinfer_custom_impl_Yolo/utils.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/utils.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/utils.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/utils.cpp diff --git a/non_square/nvdsinfer_custom_impl_Yolo/utils.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/utils.h similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/utils.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/utils.h diff --git a/non_square/nvdsinfer_custom_impl_Yolo/yolo.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.cpp similarity index 92% rename from non_square/nvdsinfer_custom_impl_Yolo/yolo.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.cpp index b601524..035ed11 100644 --- a/non_square/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -25,6 +25,11 @@ #include "yolo.h" #include "yoloPlugins.h" +#include + +#ifdef OPENCV +#include "calibrator.h" +#endif void orderParams(std::vector> *maskVector) { std::vector> maskinput = *maskVector; @@ -45,6 +50,8 @@ Yolo::Yolo(const NetworkInfo& networkInfo) : m_NetworkType(networkInfo.networkType), // YOLO type m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights + m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path + m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16 m_DeviceType(networkInfo.deviceType), // kDLA, kGPU m_InputBlobName(networkInfo.inputBlobName), // data m_InputH(0), @@ -62,6 +69,38 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) { assert (builder); + m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); + parseConfigBlocks(); + orderParams(&m_OutputMasks); + + if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) { + assert(builder->platformHasFastInt8()); +#ifdef OPENCV + std::string calib_image_list; + int calib_batch_size; + if (getenv("INT8_CALIB_IMG_PATH")) { + calib_image_list = getenv("INT8_CALIB_IMG_PATH"); + } + else { + std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl; + std::abort(); + } + if (getenv("INT8_CALIB_BATCH_SIZE")) { + calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE")); + } + else { + std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl; + std::abort(); + } + nvinfer1::int8EntroyCalibrator *calibrator = new nvinfer1::int8EntroyCalibrator(calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath); + builder->setInt8Mode(true); + builder->setInt8Calibrator(calibrator); +#else + std::cerr << "OpenCV is required to run INT8 calibrator" << std::endl; + std::abort(); +#endif + } + std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); std::vector trtWeights; @@ -71,8 +110,12 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) return nullptr; } - // Build the engine std::cout << "Building the TensorRT Engine" << std::endl; + + if (m_LetterBox == 1) { + std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl; + } + nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network); if (engine) { std::cout << "Building complete\n" << std::endl; @@ -80,7 +123,6 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) std::cerr << "Building engine failed\n" << std::endl; } - // destroy network->destroy(); return engine; } @@ -88,12 +130,7 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { destroyNetworkUtils(); - m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); - parseConfigBlocks(); - orderParams(&m_OutputMasks); - std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); - // build yolo network std::cout << "Building YOLO network" << std::endl; NvDsInferStatus status = buildYoloNetwork(weights, network); @@ -121,9 +158,7 @@ NvDsInferStatus Yolo::buildYoloNetwork( std::vector tensorOutputs; uint outputTensorCount = 0; - // build the network using the network API for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { - // check if num. of channels is correct assert(getNumChannels(previous) == channels); std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; @@ -192,7 +227,7 @@ NvDsInferStatus Yolo::buildYoloNetwork( else if (m_ConfigBlocks.at(i).at("type") == "upsample") { std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], weights, m_TrtWeights, channels, previous, &network); + nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network); previous = out->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); @@ -212,7 +247,6 @@ NvDsInferStatus Yolo::buildYoloNetwork( else if (m_ConfigBlocks.at(i).at("type") == "yolo") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); - //assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); curYoloTensor.gridSizeY = prevTensorDims.d[1]; curYoloTensor.gridSizeX = prevTensorDims.d[2]; @@ -262,7 +296,6 @@ NvDsInferStatus Yolo::buildYoloNetwork( //YOLOv2 support else if (m_ConfigBlocks.at(i).at("type") == "region") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); - //assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); curRegionTensor.gridSizeY = prevTensorDims.d[1]; curRegionTensor.gridSizeX = prevTensorDims.d[2]; @@ -391,8 +424,14 @@ void Yolo::parseConfigBlocks() m_InputH = std::stoul(block.at("height")); m_InputW = std::stoul(block.at("width")); m_InputC = std::stoul(block.at("channels")); - //assert(m_InputW == m_InputH); m_InputSize = m_InputC * m_InputH * m_InputW; + + if (block.find("letter_box") != block.end()) { + m_LetterBox = std::stoul(block.at("letter_box")); + } + else { + m_LetterBox = 0; + } } else if ((block.at("type") == "region") || (block.at("type") == "yolo")) { @@ -460,7 +499,6 @@ void Yolo::parseConfigBlocks() } void Yolo::destroyNetworkUtils() { - // deallocate the weights for (uint i = 0; i < m_TrtWeights.size(); ++i) { if (m_TrtWeights[i].count > 0) free(const_cast(m_TrtWeights[i].values)); diff --git a/non_square/nvdsinfer_custom_impl_Yolo/yolo.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.h similarity index 95% rename from non_square/nvdsinfer_custom_impl_Yolo/yolo.h rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.h index ce1c3be..053e9c7 100644 --- a/non_square/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yolo.h @@ -40,6 +40,8 @@ struct NetworkInfo std::string networkType; std::string configFilePath; std::string wtsFilePath; + std::string int8CalibPath; + std::string networkMode; std::string deviceType; std::string inputBlobName; }; @@ -76,6 +78,8 @@ protected: const std::string m_NetworkType; const std::string m_ConfigFilePath; const std::string m_WtsFilePath; + const std::string m_Int8CalibPath; + const std::string m_NetworkMode; const std::string m_DeviceType; const std::string m_InputBlobName; std::vector m_OutputTensors; @@ -85,6 +89,7 @@ protected: uint m_InputW; uint m_InputC; uint64_t m_InputSize; + uint m_LetterBox; std::vector m_TrtWeights; diff --git a/non_square/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloForward.cu similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/yoloForward.cu rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloForward.cu diff --git a/non_square/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp similarity index 100% rename from non_square/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp rename to examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp diff --git a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index ebf5661..177ca10 100644 --- a/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/examples/multiple_inferences/pgie/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -56,7 +56,7 @@ class YoloLayer : public nvinfer1::IPluginV2 { public: YoloLayer (const void* data, size_t length); - YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSize, + YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, const std::vector> mask); const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; } @@ -96,7 +96,8 @@ public: private: uint m_NumBoxes {0}; uint m_NumClasses {0}; - uint m_GridSize {0}; + uint m_GridSizeX {0}; + uint m_GridSizeY {0}; uint64_t m_OutputSize {0}; std::string m_Namespace {""}; @@ -152,4 +153,4 @@ extern float kBETA_NMS; extern std::vector kANCHORS; extern std::vector> kMASK; -#endif // __YOLO_PLUGINS__ \ No newline at end of file +#endif // __YOLO_PLUGINS__ diff --git a/examples/multiple_inferences/sgie1/config_infer_secondary1.txt b/examples/multiple_inferences/sgie1/config_infer_secondary1.txt index 632f387..076c937 100644 --- a/examples/multiple_inferences/sgie1/config_infer_secondary1.txt +++ b/examples/multiple_inferences/sgie1/config_infer_secondary1.txt @@ -4,10 +4,11 @@ net-scale-factor=0.0039215697906911373 model-color-format=0 custom-network-config=sgie1/yolo.cfg model-file=yolo.weights -model-engine-file=model_b16_gpu0_fp16.engine +model-engine-file=model_b16_gpu0_fp32.engine +#int8-calib-file=calib.table labelfile-path=labels.txt batch-size=16 -network-mode=2 +network-mode=0 num-detected-classes=10 interval=0 gie-unique-id=2 diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/Makefile b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/Makefile index a7884da..f2474bc 100644 --- a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/Makefile +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/Makefile @@ -27,13 +27,25 @@ CUDA_VER?= ifeq ($(CUDA_VER),) $(error "CUDA_VER is not set") endif + +OPENCV?= +ifeq ($(OPENCV),) + OPENCV=0 +endif + CC:= g++ NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations -CFLAGS+= -I../../../includes -I/usr/local/cuda-$(CUDA_VER)/include +CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include -LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs +ifeq ($(OPENCV), 1) +COMMON= -DOPENCV +CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) +LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) +endif + +LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group INCS:= $(wildcard *.h) @@ -50,6 +62,11 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \ utils.cpp \ yolo.cpp \ yoloForward.cu + +ifeq ($(OPENCV), 1) +SRCFILES+= calibrator.cpp +endif + TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so TARGET_OBJS:= $(SRCFILES:.cpp=.o) @@ -58,7 +75,7 @@ TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) all: $(TARGET_LIB) %.o: %.cpp $(INCS) Makefile - $(CC) -c -o $@ $(CFLAGS) $< + $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< %.o: %.cu $(INCS) Makefile $(NVCC) -c -o $@ --compiler-options '-fPIC' $< diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.cpp new file mode 100644 index 0000000..0c5dd63 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.cpp @@ -0,0 +1,130 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "calibrator.h" +#include +#include + +namespace nvinfer1 +{ + int8EntroyCalibrator::int8EntroyCalibrator(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath, + const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0) + { + inputCount = batchsize * channels * height * width; + std::fstream f(imgPath); + if (f.is_open()) + { + std::string temp; + while (std::getline(f, temp)) imgPaths.push_back(temp); + } + batchData = new float[inputCount]; + CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); + } + + int8EntroyCalibrator::~int8EntroyCalibrator() + { + CUDA_CHECK(cudaFree(deviceInput)); + if (batchData) + delete[] batchData; + } + + bool int8EntroyCalibrator::getBatch(void **bindings, const char **names, int nbBindings) + { + if (imageIndex + batchSize > uint(imgPaths.size())) + return false; + + float* ptr = batchData; + for (size_t j = imageIndex; j < imageIndex + batchSize; ++j) + { + cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR); + std::vectorinputData = prepareImage(img, inputC, inputH, inputW, letterBox); + + int len = (int)(inputData.size()); + memcpy(ptr, inputData.data(), len * sizeof(float)); + + ptr += inputData.size(); + std::cout << "Load image: " << imgPaths[j] << std::endl; + std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl; + } + imageIndex += batchSize; + CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); + bindings[0] = deviceInput; + return true; + } + + const void* int8EntroyCalibrator::readCalibrationCache(std::size_t &length) + { + calibrationCache.clear(); + std::ifstream input(calibTablePath, std::ios::binary); + input >> std::noskipws; + if (readCache && input.good()) + { + std::copy(std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calibrationCache)); + } + length = calibrationCache.size(); + return length ? calibrationCache.data() : nullptr; + } + + void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length) + { + std::ofstream output(calibTablePath, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box) +{ + cv::Mat out; + if (letter_box == 2) + { + int image_w = img.cols; + int image_h = img.rows; + int resize_w = 0; + int resize_h = 0; + int offset_top = 0; + int offset_bottom = 0; + int offset_left = 0; + int offset_right = 0; + if ((float)input_h / image_h > (float)input_w / image_w) + { + resize_w = input_w; + resize_h = (input_w * image_h) / image_w; + offset_bottom = input_h - resize_h; + } + else + { + resize_h = input_h; + resize_w = (input_h * image_w) / image_h; + offset_right = input_w - resize_w; + } + cv::resize(img, out, cv::Size(resize_w, resize_h), 0, 0, cv::INTER_CUBIC); + cv::copyMakeBorder(out, out, offset_top, offset_bottom, offset_left, offset_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + } + else + { + cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC); + } + cv::cvtColor(out, out, cv::COLOR_BGR2RGB); + if (input_c == 3) + { + out.convertTo(out, CV_32FC3, 1.0 / 255.0); + } + else + { + out.convertTo(out, CV_32FC1, 1.0 / 255.0); + } + std::vector input_channels(input_c); + cv::split(out, input_channels); + std::vector result(input_h * input_w * input_c); + auto data = result.data(); + int channelLength = input_h * input_w; + for (int i = 0; i < input_c; ++i) + { + memcpy(data, input_channels[i].data, channelLength * sizeof(float)); + data += channelLength; + } + return result; +} diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.h new file mode 100644 index 0000000..a78e062 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/calibrator.h @@ -0,0 +1,62 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef CALIBRATOR_H +#define CALIBRATOR_H + +#include "opencv2/opencv.hpp" +#include "cuda_runtime.h" +#include "NvInfer.h" +#include +#include + +#ifndef CUDA_CHECK +#define CUDA_CHECK(callstr) \ + { \ + cudaError_t error_code = callstr; \ + if (error_code != cudaSuccess) { \ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ + assert(0); \ + } \ + } +#endif + +namespace nvinfer1 { + class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { + public: + int8EntroyCalibrator(const int &batchsize, + const int &channels, + const int &height, + const int &width, + const int &letterbox, + const std::string &imgPath, + const std::string &calibTablePath); + + virtual ~int8EntroyCalibrator(); + int getBatchSize() const override { return batchSize; } + bool getBatch(void *bindings[], const char *names[], int nbBindings) override; + const void *readCalibrationCache(std::size_t &length) override; + void writeCalibrationCache(const void *ptr, std::size_t length) override; + + private: + int batchSize; + int inputC; + int inputH; + int inputW; + int letterBox; + std::string calibTablePath; + size_t imageIndex; + size_t inputCount; + std::vector imgPaths; + float *batchData{ nullptr }; + void *deviceInput{ nullptr }; + bool readCache; + std::vector calibrationCache; + }; +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box); + +#endif //CALIBRATOR_H \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp new file mode 100644 index 0000000..d730fd2 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp @@ -0,0 +1,82 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "activation_layer.h" + +nvinfer1::ILayer* activationLayer( + int layerIdx, + std::string activation, + nvinfer1::ILayer* output, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + if (activation == "relu") + { + nvinfer1::IActivationLayer* relu = network->addActivation( + *input, nvinfer1::ActivationType::kRELU); + assert(relu != nullptr); + std::string reluLayerName = "relu_" + std::to_string(layerIdx); + relu->setName(reluLayerName.c_str()); + output = relu; + } + else if (activation == "sigmoid" || activation == "logistic") + { + nvinfer1::IActivationLayer* sigmoid = network->addActivation( + *input, nvinfer1::ActivationType::kSIGMOID); + assert(sigmoid != nullptr); + std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx); + sigmoid->setName(sigmoidLayerName.c_str()); + output = sigmoid; + } + else if (activation == "tanh") + { + nvinfer1::IActivationLayer* tanh = network->addActivation( + *input, nvinfer1::ActivationType::kTANH); + assert(tanh != nullptr); + std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); + tanh->setName(tanhLayerName.c_str()); + output = tanh; + } + else if (activation == "leaky") + { + nvinfer1::IActivationLayer* leaky = network->addActivation( + *input, nvinfer1::ActivationType::kLEAKY_RELU); + leaky->setAlpha(0.1); + assert(leaky != nullptr); + std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); + leaky->setName(leakyLayerName.c_str()); + output = leaky; + } + else if (activation == "softplus") + { + nvinfer1::IActivationLayer* softplus = network->addActivation( + *input, nvinfer1::ActivationType::kSOFTPLUS); + assert(softplus != nullptr); + std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); + softplus->setName(softplusLayerName.c_str()); + output = softplus; + } + else if (activation == "mish") + { + nvinfer1::IActivationLayer* softplus = network->addActivation( + *input, nvinfer1::ActivationType::kSOFTPLUS); + assert(softplus != nullptr); + std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); + softplus->setName(softplusLayerName.c_str()); + nvinfer1::IActivationLayer* tanh = network->addActivation( + *softplus->getOutput(0), nvinfer1::ActivationType::kTANH); + assert(tanh != nullptr); + std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); + tanh->setName(tanhLayerName.c_str()); + nvinfer1::IElementWiseLayer* mish = network->addElementWise( + *tanh->getOutput(0), *input, + nvinfer1::ElementWiseOperation::kPROD); + assert(mish != nullptr); + std::string mishLayerName = "mish_" + std::to_string(layerIdx); + mish->setName(mishLayerName.c_str()); + output = mish; + } + return output; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h new file mode 100644 index 0000000..e6081e6 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h @@ -0,0 +1,23 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __ACTIVATION_LAYER_H__ +#define __ACTIVATION_LAYER_H__ + +#include +#include + +#include "NvInfer.h" + +#include "activation_layer.h" + +nvinfer1::ILayer* activationLayer( + int layerIdx, + std::string activation, + nvinfer1::ILayer* output, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp new file mode 100644 index 0000000..abb0d32 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp @@ -0,0 +1,168 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include +#include "convolutional_layer.h" + +nvinfer1::ILayer* convolutionalLayer( + int layerIdx, + std::map& block, + std::vector& weights, + std::vector& trtWeights, + int& weightPtr, + int& inputChannels, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "convolutional"); + assert(block.find("filters") != block.end()); + assert(block.find("pad") != block.end()); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int filters = std::stoi(block.at("filters")); + int padding = std::stoi(block.at("pad")); + int kernelSize = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + std::string activation = block.at("activation"); + int bias = filters; + + bool batchNormalize = false; + if (block.find("batch_normalize") != block.end()) + { + bias = 0; + batchNormalize = (block.at("batch_normalize") == "1"); + } + + int groups = 1; + if (block.find("groups") != block.end()) + { + groups = std::stoi(block.at("groups")); + } + + int pad; + if (padding) + pad = (kernelSize - 1) / 2; + else + pad = 0; + + int size = filters * inputChannels * kernelSize * kernelSize / groups; + std::vector bnBiases; + std::vector bnWeights; + std::vector bnRunningMean; + std::vector bnRunningVar; + nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias}; + + if (batchNormalize == false) + { + float* val = new float[filters]; + for (int i = 0; i < filters; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convBias.values = val; + trtWeights.push_back(convBias); + val = new float[size]; + for (int i = 0; i < size; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convWt.values = val; + trtWeights.push_back(convWt); + } + else + { + for (int i = 0; i < filters; ++i) + { + bnBiases.push_back(weights[weightPtr]); + weightPtr++; + } + + for (int i = 0; i < filters; ++i) + { + bnWeights.push_back(weights[weightPtr]); + weightPtr++; + } + for (int i = 0; i < filters; ++i) + { + bnRunningMean.push_back(weights[weightPtr]); + weightPtr++; + } + for (int i = 0; i < filters; ++i) + { + bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); + weightPtr++; + } + float* val = new float[size]; + for (int i = 0; i < size; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convWt.values = val; + trtWeights.push_back(convWt); + trtWeights.push_back(convBias); + } + + nvinfer1::IConvolutionLayer* conv = network->addConvolution( + *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); + assert(conv != nullptr); + std::string convLayerName = "conv_" + std::to_string(layerIdx); + conv->setName(convLayerName.c_str()); + conv->setStride(nvinfer1::DimsHW{stride, stride}); + conv->setPadding(nvinfer1::DimsHW{pad, pad}); + + if (block.find("groups") != block.end()) + { + conv->setNbGroups(groups); + } + + nvinfer1::ILayer* output = conv; + + if (batchNormalize == true) + { + size = filters; + nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; + float* shiftWt = new float[size]; + for (int i = 0; i < size; ++i) + { + shiftWt[i] + = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); + } + shift.values = shiftWt; + float* scaleWt = new float[size]; + for (int i = 0; i < size; ++i) + { + scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; + } + scale.values = scaleWt; + float* powerWt = new float[size]; + for (int i = 0; i < size; ++i) + { + powerWt[i] = 1.0; + } + power.values = powerWt; + trtWeights.push_back(shift); + trtWeights.push_back(scale); + trtWeights.push_back(power); + + nvinfer1::IScaleLayer* bn = network->addScale( + *output->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + assert(bn != nullptr); + std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); + bn->setName(bnLayerName.c_str()); + output = bn; + } + + output = activationLayer(layerIdx, activation, output, output->getOutput(0), network); + assert(output != nullptr); + + return output; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h new file mode 100644 index 0000000..b114493 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h @@ -0,0 +1,26 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __CONVOLUTIONAL_LAYER_H__ +#define __CONVOLUTIONAL_LAYER_H__ + +#include +#include + +#include "NvInfer.h" + +#include "activation_layer.h" + +nvinfer1::ILayer* convolutionalLayer( + int layerIdx, + std::map& block, + std::vector& weights, + std::vector& trtWeights, + int& weightPtr, + int& inputChannels, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp new file mode 100644 index 0000000..112af16 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.cpp @@ -0,0 +1,15 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "dropout_layer.h" + +nvinfer1::ILayer* dropoutLayer( + float probability, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ILayer* output; + return output; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h new file mode 100644 index 0000000..3e8a1d4 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/dropout_layer.h @@ -0,0 +1,16 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __DROPOUT_LAYER_H__ +#define __DROPOUT_LAYER_H__ + +#include "NvInfer.h" + +nvinfer1::ILayer* dropoutLayer( + float probability, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp new file mode 100644 index 0000000..06948dc --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.cpp @@ -0,0 +1,30 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "maxpool_layer.h" + +nvinfer1::ILayer* maxpoolLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "maxpool"); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int size = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + + nvinfer1::IPoolingLayer* pool + = network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); + assert(pool); + std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); + pool->setStride(nvinfer1::DimsHW{stride, stride}); + pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); + pool->setName(maxpoolLayerName.c_str()); + + return pool; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h new file mode 100644 index 0000000..a0d139e --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/maxpool_layer.h @@ -0,0 +1,20 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __MAXPOOL_LAYER_H__ +#define __MAXPOOL_LAYER_H__ + +#include +#include + +#include "NvInfer.h" + +nvinfer1::ILayer* maxpoolLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp new file mode 100644 index 0000000..d4bf5dc --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp @@ -0,0 +1,63 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "route_layer.h" + +nvinfer1::ILayer* routeLayer( + int layerIdx, + std::map& block, + std::vector tensorOutputs, + nvinfer1::INetworkDefinition* network) +{ + std::string strLayers = block.at("layers"); + std::vector idxLayers; + size_t lastPos = 0, pos = 0; + while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { + int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); + idxLayers.push_back (vL); + lastPos = pos + 1; + } + if (lastPos < strLayers.length()) { + std::string lastV = trim(strLayers.substr(lastPos)); + if (!lastV.empty()) { + idxLayers.push_back (std::stoi(lastV)); + } + } + assert (!idxLayers.empty()); + std::vector concatInputs; + for (int idxLayer : idxLayers) { + if (idxLayer < 0) { + idxLayer = tensorOutputs.size() + idxLayer; + } + assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size()); + concatInputs.push_back (tensorOutputs[idxLayer]); + } + + nvinfer1::IConcatenationLayer* concat = + network->addConcatenation(concatInputs.data(), concatInputs.size()); + assert(concat != nullptr); + std::string concatLayerName = "route_" + std::to_string(layerIdx - 1); + concat->setName(concatLayerName.c_str()); + concat->setAxis(0); + + nvinfer1::ILayer* output = concat; + + if (block.find("groups") != block.end()) { + nvinfer1::Dims prevTensorDims = output->getOutput(0)->getDimensions(); + int groups = stoi(block.at("groups")); + int group_id = stoi(block.at("group_id")); + int startSlice = (prevTensorDims.d[0] / groups) * group_id; + int channelSlice = (prevTensorDims.d[0] / groups); + nvinfer1::ISliceLayer* sl = network->addSlice( + *output->getOutput(0), + nvinfer1::Dims3{startSlice, 0, 0}, + nvinfer1::Dims3{channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}, + nvinfer1::Dims3{1, 1, 1}); + assert(sl != nullptr); + output = sl; + } + + return output; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.h new file mode 100644 index 0000000..63c6bd6 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/route_layer.h @@ -0,0 +1,18 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __ROUTE_LAYER_H__ +#define __ROUTE_LAYER_H__ + +#include "NvInfer.h" +#include "../utils.h" + +nvinfer1::ILayer* routeLayer( + int layerIdx, + std::map& block, + std::vector tensorOutputs, + nvinfer1::INetworkDefinition* network); + +#endif \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp new file mode 100644 index 0000000..87ba3f8 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp @@ -0,0 +1,45 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "shortcut_layer.h" + +nvinfer1::ILayer* shortcutLayer( + int layerIdx, + std::string activation, + std::string inputVol, + std::string shortcutVol, + nvinfer1::ITensor* input, + nvinfer1::ITensor* shortcutTensor, + nvinfer1::INetworkDefinition* network) +{ + nvinfer1::ILayer* output; + nvinfer1::ITensor* outputTensor; + + if (inputVol != shortcutVol) + { + nvinfer1::ISliceLayer* sl = network->addSlice( + *shortcutTensor, + nvinfer1::Dims3{0, 0, 0}, + input->getDimensions(), + nvinfer1::Dims3{1, 1, 1}); + assert(sl != nullptr); + outputTensor = sl->getOutput(0); + assert(outputTensor != nullptr); + } else + { + outputTensor = shortcutTensor; + assert(outputTensor != nullptr); + } + + nvinfer1::IElementWiseLayer* ew = network->addElementWise( + *input, *outputTensor, + nvinfer1::ElementWiseOperation::kSUM); + assert(ew != nullptr); + + output = activationLayer(layerIdx, activation, ew, ew->getOutput(0), network); + assert(output != nullptr); + + return output; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h new file mode 100644 index 0000000..c564d3d --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h @@ -0,0 +1,22 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __SHORTCUT_LAYER_H__ +#define __SHORTCUT_LAYER_H__ + +#include "NvInfer.h" + +#include "activation_layer.h" + +nvinfer1::ILayer* shortcutLayer( + int layerIdx, + std::string activation, + std::string inputVol, + std::string shortcutVol, + nvinfer1::ITensor* input, + nvinfer1::ITensor* shortcutTensor, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp new file mode 100644 index 0000000..eb49011 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp @@ -0,0 +1,24 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "upsample_layer.h" + +nvinfer1::ILayer* upsampleLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "upsample"); + int stride = std::stoi(block.at("stride")); + + nvinfer1::IResizeLayer* resize_layer = network->addResize(*input); + resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + float scale[3] = {1, stride, stride}; + resize_layer->setScales(scale, 3); + std::string layer_name = "upsample_" + std::to_string(layerIdx); + resize_layer->setName(layer_name.c_str()); + return resize_layer; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h new file mode 100644 index 0000000..d1b7768 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h @@ -0,0 +1,21 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef __UPSAMPLE_LAYER_H__ +#define __UPSAMPLE_LAYER_H__ + +#include +#include +#include + +#include "NvInfer.h" + +nvinfer1::ILayer* upsampleLayer( + int layerIdx, + std::map& block, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp new file mode 100644 index 0000000..03a4820 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "nvdsinfer_custom_impl.h" +#include "nvdsinfer_context.h" +#include "yoloPlugins.h" +#include "yolo.h" + +#include + +#define USE_CUDA_ENGINE_GET_API 1 + +static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams) +{ + std::string yoloCfg = initParams->customNetworkConfigFilePath; + std::string yoloType; + + std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) { + return std::tolower (c);}); + + yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg")); + + networkInfo.networkType = yoloType; + networkInfo.configFilePath = initParams->customNetworkConfigFilePath; + networkInfo.wtsFilePath = initParams->modelFilePath; + networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; + networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); + networkInfo.inputBlobName = "data"; + + if(initParams->networkMode == 0) { + networkInfo.networkMode = "FP32"; + } + else if(initParams->networkMode == 1) { + networkInfo.networkMode = "INT8"; + } + else if(initParams->networkMode == 2) { + networkInfo.networkMode = "FP16"; + } + + if (networkInfo.configFilePath.empty() || + networkInfo.wtsFilePath.empty()) { + std::cerr << "YOLO config file or weights file is not specified" + << std::endl; + return false; + } + + if (!fileExists(networkInfo.configFilePath) || + !fileExists(networkInfo.wtsFilePath)) { + std::cerr << "YOLO config file or weights file is not exist" + << std::endl; + return false; + } + + return true; +} + +#if !USE_CUDA_ENGINE_GET_API +IModelParser* NvDsInferCreateModelParser( + const NvDsInferContextInitParams* initParams) { + NetworkInfo networkInfo; + if (!getYoloNetworkInfo(networkInfo, initParams)) { + return nullptr; + } + + return new Yolo(networkInfo); +} +#else +extern "C" +bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder, + const NvDsInferContextInitParams * const initParams, + nvinfer1::DataType dataType, + nvinfer1::ICudaEngine *& cudaEngine); + +extern "C" +bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder, + const NvDsInferContextInitParams * const initParams, + nvinfer1::DataType dataType, + nvinfer1::ICudaEngine *& cudaEngine) +{ + NetworkInfo networkInfo; + if (!getYoloNetworkInfo(networkInfo, initParams)) { + return false; + } + + Yolo yolo(networkInfo); + cudaEngine = yolo.createEngine (builder); + if (cudaEngine == nullptr) + { + std::cerr << "Failed to build CUDA engine on " + << networkInfo.configFilePath << std::endl; + return false; + } + + return true; +} +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp new file mode 100644 index 0000000..1cc8d39 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include +#include +#include +#include "nvdsinfer_custom_impl.h" +#include "utils.h" + +#include "yoloPlugins.h" + +extern "C" bool NvDsInferParseYolo( + std::vector const& outputLayersInfo, + NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, + std::vector& objectList); + +static std::vector +nonMaximumSuppression(const float nmsThresh, std::vector binfo) +{ + auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float { + if (x1min > x2min) + { + std::swap(x1min, x2min); + std::swap(x1max, x2max); + } + return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; + }; + auto computeIoU + = [&overlap1D](NvDsInferParseObjectInfo& bbox1, NvDsInferParseObjectInfo& bbox2) -> float { + float overlapX + = overlap1D(bbox1.left, bbox1.left + bbox1.width, bbox2.left, bbox2.left + bbox2.width); + float overlapY + = overlap1D(bbox1.top, bbox1.top + bbox1.height, bbox2.top, bbox2.top + bbox2.height); + float area1 = (bbox1.width) * (bbox1.height); + float area2 = (bbox2.width) * (bbox2.height); + float overlap2D = overlapX * overlapY; + float u = area1 + area2 - overlap2D; + return u == 0 ? 0 : overlap2D / u; + }; + + std::stable_sort(binfo.begin(), binfo.end(), + [](const NvDsInferParseObjectInfo& b1, const NvDsInferParseObjectInfo& b2) { + return b1.detectionConfidence > b2.detectionConfidence; + }); + std::vector out; + for (auto i : binfo) + { + bool keep = true; + for (auto j : out) + { + if (keep) + { + float overlap = computeIoU(i, j); + keep = overlap <= nmsThresh; + } + else + break; + } + if (keep) out.push_back(i); + } + return out; +} + +static std::vector +nmsAllClasses(const float nmsThresh, + std::vector& binfo, + const uint numClasses) +{ + std::vector result; + std::vector> splitBoxes(numClasses); + for (auto& box : binfo) + { + splitBoxes.at(box.classId).push_back(box); + } + + for (auto& boxes : splitBoxes) + { + boxes = nonMaximumSuppression(nmsThresh, boxes); + result.insert(result.end(), boxes.begin(), boxes.end()); + } + return result; +} + +static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw, + const float& bh, const int& stride, const uint& netW, + const uint& netH) +{ + NvDsInferParseObjectInfo b; + float xCenter = bx * stride; + float yCenter = by * stride; + float x0 = xCenter - bw / 2; + float y0 = yCenter - bh / 2; + float x1 = x0 + bw; + float y1 = y0 + bh; + + x0 = clamp(x0, 0, netW); + y0 = clamp(y0, 0, netH); + x1 = clamp(x1, 0, netW); + y1 = clamp(y1, 0, netH); + + b.left = x0; + b.width = clamp(x1 - x0, 0, netW); + b.top = y0; + b.height = clamp(y1 - y0, 0, netH); + + return b; +} + +static void addBBoxProposal(const float bx, const float by, const float bw, const float bh, + const uint stride, const uint& netW, const uint& netH, const int maxIndex, + const float maxProb, std::vector& binfo) +{ + NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH); + if (bbi.width < 1 || bbi.height < 1) return; + + bbi.detectionConfidence = maxProb; + bbi.classId = maxIndex; + binfo.push_back(bbi); +} + +static std::vector +decodeYoloTensor( + const float* detections, const std::vector &mask, const std::vector &anchors, + const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, + const uint numOutputClasses, const uint& netW, + const uint& netH, + const float confThresh) +{ + std::vector binfo; + for (uint y = 0; y < gridSizeH; ++y) { + for (uint x = 0; x < gridSizeW; ++x) { + for (uint b = 0; b < numBBoxes; ++b) + { + const float pw = anchors[mask[b] * 2]; + const float ph = anchors[mask[b] * 2 + 1]; + + const int numGridCells = gridSizeH * gridSizeW; + const int bbindex = y * gridSizeW + x; + const float bx + = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)]; + const float by + = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)]; + const float bw + = pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]; + const float bh + = ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]; + + const float objectness + = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; + + float maxProb = 0.0f; + int maxIndex = -1; + + for (uint i = 0; i < numOutputClasses; ++i) + { + float prob + = (detections[bbindex + + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]); + + if (prob > maxProb) + { + maxProb = prob; + maxIndex = i; + } + } + maxProb = objectness * maxProb; + + if (maxProb > confThresh) + { + addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); + } + } + } + } + return binfo; +} + +static std::vector +decodeYoloV2Tensor( + const float* detections, const std::vector &anchors, + const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, + const uint numOutputClasses, const uint& netW, + const uint& netH) +{ + std::vector binfo; + for (uint y = 0; y < gridSizeH; ++y) { + for (uint x = 0; x < gridSizeW; ++x) { + for (uint b = 0; b < numBBoxes; ++b) + { + const float pw = anchors[b * 2]; + const float ph = anchors[b * 2 + 1]; + + const int numGridCells = gridSizeH * gridSizeW; + const int bbindex = y * gridSizeW + x; + const float bx + = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)]; + const float by + = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)]; + const float bw + = pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]; + const float bh + = ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]; + + const float objectness + = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; + + float maxProb = 0.0f; + int maxIndex = -1; + + for (uint i = 0; i < numOutputClasses; ++i) + { + float prob + = (detections[bbindex + + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]); + + if (prob > maxProb) + { + maxProb = prob; + maxIndex = i; + } + } + maxProb = objectness * maxProb; + + addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); + } + } + } + return binfo; +} + +static inline std::vector +SortLayers(const std::vector & outputLayersInfo) +{ + std::vector outLayers; + for (auto const &layer : outputLayersInfo) { + outLayers.push_back (&layer); + } + std::sort(outLayers.begin(), outLayers.end(), + [](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) { + return a->inferDims.d[1] < b->inferDims.d[1]; + }); + return outLayers; +} + +static bool NvDsInferParseYolo( + std::vector const& outputLayersInfo, + NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, + std::vector& objectList, + const std::vector &anchors, + const std::vector> &masks, + const uint &num_classes, + const float &beta_nms) +{ + const float kCONF_THRESH = detectionParams.perClassThreshold[0]; + + const std::vector sortedLayers = + SortLayers (outputLayersInfo); + + if (sortedLayers.size() != masks.size()) { + std::cerr << "ERROR: YOLO output layer.size: " << sortedLayers.size() + << " does not match mask.size: " << masks.size() << std::endl; + return false; + } + + if (num_classes != detectionParams.numClassesConfigured) + { + std::cerr << "WARNING: Num classes mismatch. Configured: " + << detectionParams.numClassesConfigured + << ", detected by network: " << num_classes << std::endl; + } + + std::vector objects; + + for (uint idx = 0; idx < masks.size(); ++idx) { + const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid + + assert(layer.inferDims.numDims == 3); + const uint gridSizeH = layer.inferDims.d[1]; + const uint gridSizeW = layer.inferDims.d[2]; + const uint stride = DIVUP(networkInfo.width, gridSizeW); + + std::vector outObjs = + decodeYoloTensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, masks[idx].size(), + num_classes, networkInfo.width, networkInfo.height, kCONF_THRESH); + objects.insert(objects.end(), outObjs.begin(), outObjs.end()); + } + + + objectList.clear(); + objectList = nmsAllClasses(beta_nms, objects, num_classes); + + return true; +} + +static bool NvDsInferParseYoloV2( + std::vector const& outputLayersInfo, + NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, + std::vector& objectList, + std::vector &anchors, + const uint &num_classes) +{ + if (outputLayersInfo.empty()) { + std::cerr << "Could not find output layer in bbox parsing" << std::endl;; + return false; + } + const uint kNUM_BBOXES = anchors.size() / 2; + + const NvDsInferLayerInfo &layer = outputLayersInfo[0]; + + if (num_classes != detectionParams.numClassesConfigured) + { + std::cerr << "WARNING: Num classes mismatch. Configured: " + << detectionParams.numClassesConfigured + << ", detected by network: " << num_classes << std::endl; + } + + assert(layer.inferDims.numDims == 3); + const uint gridSizeH = layer.inferDims.d[1]; + const uint gridSizeW = layer.inferDims.d[2]; + const uint stride = DIVUP(networkInfo.width, gridSizeW); + for (auto& anchor : anchors) { + anchor *= stride; + } + std::vector objects = + decodeYoloV2Tensor((const float*)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES, + num_classes, networkInfo.width, networkInfo.height); + + objectList = objects; + + return true; +} + +extern "C" bool NvDsInferParseYolo( + std::vector const& outputLayersInfo, + NvDsInferNetworkInfo const& networkInfo, + NvDsInferParseDetectionParams const& detectionParams, + std::vector& objectList) +{ + + int num_classes = kNUM_CLASSES; + float beta_nms = kBETA_NMS; + std::vector anchors = kANCHORS; + std::vector> mask = kMASK; + + if (mask.size() > 0) { + return NvDsInferParseYolo (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, mask, num_classes, beta_nms); + } + else { + return NvDsInferParseYoloV2 (outputLayersInfo, networkInfo, detectionParams, objectList, anchors, num_classes); + } +} + +CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.cpp new file mode 100644 index 0000000..5afe57e --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "utils.h" + +#include +#include +#include +#include + +static void leftTrim(std::string& s) +{ + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); +} + +static void rightTrim(std::string& s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); +} + +std::string trim(std::string s) +{ + leftTrim(s); + rightTrim(s); + return s; +} + +float clamp(const float val, const float minVal, const float maxVal) +{ + assert(minVal <= maxVal); + return std::min(maxVal, std::max(minVal, val)); +} + +bool fileExists(const std::string fileName, bool verbose) +{ + if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) + { + if (verbose) std::cout << "File does not exist: " << fileName << std::endl; + return false; + } + return true; +} + +std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType) +{ + assert(fileExists(weightsFilePath)); + std::cout << "\nLoading pre-trained weights" << std::endl; + std::ifstream file(weightsFilePath, std::ios_base::binary); + assert(file.good()); + std::string line; + + if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos) + { + // Remove 4 int32 bytes of data from the stream belonging to the header + file.ignore(4 * 4); + } + else + { + // Remove 5 int32 bytes of data from the stream belonging to the header + file.ignore(4 * 5); + } + + std::vector weights; + char floatWeight[4]; + while (!file.eof()) + { + file.read(floatWeight, 4); + assert(file.gcount() == 4); + weights.push_back(*reinterpret_cast(floatWeight)); + if (file.peek() == std::istream::traits_type::eof()) break; + } + std::cout << "Loading weights of " << networkType << " complete" + << std::endl; + std::cout << "Total weights read: " << weights.size() << std::endl; + return weights; +} + +std::string dimsToString(const nvinfer1::Dims d) +{ + std::stringstream s; + assert(d.nbDims >= 1); + for (int i = 0; i < d.nbDims - 1; ++i) + { + s << std::setw(4) << d.d[i] << " x"; + } + s << std::setw(4) << d.d[d.nbDims - 1]; + + return s.str(); +} + +void displayDimType(const nvinfer1::Dims d) +{ + std::cout << "(" << d.nbDims << ") "; + for (int i = 0; i < d.nbDims; ++i) + { + switch (d.type[i]) + { + case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break; + case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break; + case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break; + case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break; + } + } + std::cout << std::endl; +} + +int getNumChannels(nvinfer1::ITensor* t) +{ + nvinfer1::Dims d = t->getDimensions(); + assert(d.nbDims == 3); + + return d.d[0]; +} + +uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) +{ + assert(inputDims.nbDims == 3); + return inputDims.d[0] * inputDims.d[1] * inputDims.d[2]; +} + +void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, + std::string layerOutput, std::string weightPtr) +{ + std::cout << std::setw(6) << std::left << layerIndex << std::setw(24) << std::left << layerName; + std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left + << layerOutput; + std::cout << std::setw(7) << std::left << weightPtr << std::endl; +} \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.h new file mode 100644 index 0000000..2e479f7 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/utils.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + + +#ifndef __UTILS_H__ +#define __UTILS_H__ + +#include +#include +#include +#include +#include + +#include "NvInfer.h" + +#define UNUSED(expr) (void)(expr) +#define DIVUP(n, d) ((n) + (d)-1) / (d) + +std::string trim(std::string s); +float clamp(const float val, const float minVal, const float maxVal); +bool fileExists(const std::string fileName, bool verbose = true); +std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); +std::string dimsToString(const nvinfer1::Dims d); +void displayDimType(const nvinfer1::Dims d); +int getNumChannels(nvinfer1::ITensor* t); +uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); + +void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, + std::string layerOutput, std::string weightPtr); + +#endif diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.cpp new file mode 100644 index 0000000..035ed11 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "yolo.h" +#include "yoloPlugins.h" +#include + +#ifdef OPENCV +#include "calibrator.h" +#endif + +void orderParams(std::vector> *maskVector) { + std::vector> maskinput = *maskVector; + std::vector maskPartial; + for (uint i = 0; i < maskinput.size(); i++) { + for (uint j = i + 1; j < maskinput.size(); j++) { + if (maskinput[i][0] <= maskinput[j][0]) { + maskPartial = maskinput[i]; + maskinput[i] = maskinput[j]; + maskinput[j] = maskPartial; + } + } + } + *maskVector = maskinput; +} + +Yolo::Yolo(const NetworkInfo& networkInfo) + : m_NetworkType(networkInfo.networkType), // YOLO type + m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg + m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights + m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path + m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16 + m_DeviceType(networkInfo.deviceType), // kDLA, kGPU + m_InputBlobName(networkInfo.inputBlobName), // data + m_InputH(0), + m_InputW(0), + m_InputC(0), + m_InputSize(0) +{} + +Yolo::~Yolo() +{ + destroyNetworkUtils(); +} + +nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) +{ + assert (builder); + + m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); + parseConfigBlocks(); + orderParams(&m_OutputMasks); + + if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) { + assert(builder->platformHasFastInt8()); +#ifdef OPENCV + std::string calib_image_list; + int calib_batch_size; + if (getenv("INT8_CALIB_IMG_PATH")) { + calib_image_list = getenv("INT8_CALIB_IMG_PATH"); + } + else { + std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl; + std::abort(); + } + if (getenv("INT8_CALIB_BATCH_SIZE")) { + calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE")); + } + else { + std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl; + std::abort(); + } + nvinfer1::int8EntroyCalibrator *calibrator = new nvinfer1::int8EntroyCalibrator(calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath); + builder->setInt8Mode(true); + builder->setInt8Calibrator(calibrator); +#else + std::cerr << "OpenCV is required to run INT8 calibrator" << std::endl; + std::abort(); +#endif + } + + std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); + std::vector trtWeights; + + nvinfer1::INetworkDefinition *network = builder->createNetwork(); + if (parseModel(*network) != NVDSINFER_SUCCESS) { + network->destroy(); + return nullptr; + } + + std::cout << "Building the TensorRT Engine" << std::endl; + + if (m_LetterBox == 1) { + std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl; + } + + nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network); + if (engine) { + std::cout << "Building complete\n" << std::endl; + } else { + std::cerr << "Building engine failed\n" << std::endl; + } + + network->destroy(); + return engine; +} + +NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { + destroyNetworkUtils(); + + std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); + std::cout << "Building YOLO network" << std::endl; + NvDsInferStatus status = buildYoloNetwork(weights, network); + + if (status == NVDSINFER_SUCCESS) { + std::cout << "Building YOLO network complete" << std::endl; + } else { + std::cerr << "Building YOLO network failed" << std::endl; + } + + return status; +} + +NvDsInferStatus Yolo::buildYoloNetwork( + std::vector& weights, nvinfer1::INetworkDefinition& network) { + int weightPtr = 0; + int channels = m_InputC; + + nvinfer1::ITensor* data = + network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, + nvinfer1::DimsCHW{static_cast(m_InputC), + static_cast(m_InputH), static_cast(m_InputW)}); + assert(data != nullptr && data->getDimensions().nbDims > 0); + + nvinfer1::ITensor* previous = data; + std::vector tensorOutputs; + uint outputTensorCount = 0; + + for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { + assert(getNumChannels(previous) == channels); + std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; + + if (m_ConfigBlocks.at(i).at("type") == "net") { + printLayerInfo("", "layer", " input", " outup", "weightPtr"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "convolutional") { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network); + previous = out->getOutput(0); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerType = "conv_" + m_ConfigBlocks.at(i).at("activation"); + printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); + } + + else if (m_ConfigBlocks.at(i).at("type") == "dropout") { + assert(m_ConfigBlocks.at(i).find("probability") != m_ConfigBlocks.at(i).end()); + //float probability = std::stof(m_ConfigBlocks.at(i).at("probability")); + //nvinfer1::ILayer* out = dropoutLayer(probability, previous, &network); + //previous = out->getOutput(0); + //Skip dropout layer + assert(previous != nullptr); + tensorOutputs.push_back(previous); + printLayerInfo(layerIndex, "dropout", " -", " -", " -"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "shortcut") { + assert(m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()); + assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); + std::string activation = m_ConfigBlocks.at(i).at("activation"); + int from = stoi(m_ConfigBlocks.at(i).at("from")); + if (from > 0) { + from = from - i + 1; + } + assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); + assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); + assert(i + from - 1 < i - 2); + std::string inputVol = dimsToString(previous->getDimensions()); + std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions()); + nvinfer1::ILayer* out = shortcutLayer(i, activation, inputVol, shortcutVol, previous, tensorOutputs[i + from - 1], &network); + previous = out->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + std::string layerType = "shortcut_" + m_ConfigBlocks.at(i).at("activation") + ": " + std::to_string(i + from - 1); + printLayerInfo(layerIndex, layerType, " -", outputVol, " -"); + if (inputVol != shortcutVol) { + std::cout << inputVol << " +" << shortcutVol << std::endl; + } + } + + else if (m_ConfigBlocks.at(i).at("type") == "route") { + assert(m_ConfigBlocks.at(i).find("layers") != m_ConfigBlocks.at(i).end()); + nvinfer1::ILayer* out = routeLayer(i, m_ConfigBlocks.at(i), tensorOutputs, &network); + previous = out->getOutput(0); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); + } + + else if (m_ConfigBlocks.at(i).at("type") == "upsample") { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network); + previous = out->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); + } + + else if (m_ConfigBlocks.at(i).at("type") == "maxpool") { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = maxpoolLayer(i, m_ConfigBlocks.at(i), previous, &network); + previous = out->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(previous); + printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); + } + + else if (m_ConfigBlocks.at(i).at("type") == "yolo") { + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); + curYoloTensor.gridSizeY = prevTensorDims.d[1]; + curYoloTensor.gridSizeX = prevTensorDims.d[2]; + curYoloTensor.stride = m_InputH / curYoloTensor.gridSizeY; + m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSizeY + * curYoloTensor.gridSizeX + * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); + std::string layerName = "yolo_" + std::to_string(i); + curYoloTensor.blobName = layerName; + int new_coords = 0; + float scale_x_y = 1; + float beta_nms = 0.45; + if (m_ConfigBlocks.at(i).find("new_coords") != m_ConfigBlocks.at(i).end()) { + new_coords = std::stoi(m_ConfigBlocks.at(i).at("new_coords")); + } + if (m_ConfigBlocks.at(i).find("scale_x_y") != m_ConfigBlocks.at(i).end()) { + scale_x_y = std::stof(m_ConfigBlocks.at(i).at("scale_x_y")); + } + if (m_ConfigBlocks.at(i).find("beta_nms") != m_ConfigBlocks.at(i).end()) { + beta_nms = std::stof(m_ConfigBlocks.at(i).at("beta_nms")); + } + nvinfer1::IPluginV2* yoloPlugin + = new YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes, + m_OutputTensors.at(outputTensorCount).numClasses, + m_OutputTensors.at(outputTensorCount).gridSizeX, + m_OutputTensors.at(outputTensorCount).gridSizeY, + 1, new_coords, scale_x_y, beta_nms, + curYoloTensor.anchors, + m_OutputMasks); + assert(yoloPlugin != nullptr); + nvinfer1::IPluginV2Layer* yolo = + network.addPluginV2(&previous, 1, *yoloPlugin); + assert(yolo != nullptr); + yolo->setName(layerName.c_str()); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = yolo->getOutput(0); + assert(previous != nullptr); + previous->setName(layerName.c_str()); + std::string outputVol = dimsToString(previous->getDimensions()); + network.markOutput(*previous); + channels = getNumChannels(previous); + tensorOutputs.push_back(yolo->getOutput(0)); + printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); + ++outputTensorCount; + } + + //YOLOv2 support + else if (m_ConfigBlocks.at(i).at("type") == "region") { + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); + curRegionTensor.gridSizeY = prevTensorDims.d[1]; + curRegionTensor.gridSizeX = prevTensorDims.d[2]; + curRegionTensor.stride = m_InputH / curRegionTensor.gridSizeY; + m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSizeY + * curRegionTensor.gridSizeX + * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses)); + std::string layerName = "region_" + std::to_string(i); + curRegionTensor.blobName = layerName; + std::vector> mask; + nvinfer1::IPluginV2* regionPlugin + = new YoloLayer(curRegionTensor.numBBoxes, + curRegionTensor.numClasses, + curRegionTensor.gridSizeX, + curRegionTensor.gridSizeY, + 0, 0, 1.0, 0, + curRegionTensor.anchors, + mask); + assert(regionPlugin != nullptr); + nvinfer1::IPluginV2Layer* region = + network.addPluginV2(&previous, 1, *regionPlugin); + assert(region != nullptr); + region->setName(layerName.c_str()); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = region->getOutput(0); + assert(previous != nullptr); + previous->setName(layerName.c_str()); + std::string outputVol = dimsToString(previous->getDimensions()); + network.markOutput(*previous); + channels = getNumChannels(previous); + tensorOutputs.push_back(region->getOutput(0)); + printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr)); + ++outputTensorCount; + } + else if (m_ConfigBlocks.at(i).at("type") == "reorg") { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); + assert(reorgPlugin != nullptr); + nvinfer1::IPluginV2Layer* reorg = + network.addPluginV2(&previous, 1, *reorgPlugin); + assert(reorg != nullptr); + std::string layerName = "reorg_" + std::to_string(i); + reorg->setName(layerName.c_str()); + previous = reorg->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + channels = getNumChannels(previous); + tensorOutputs.push_back(reorg->getOutput(0)); + printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr)); + } + + else + { + std::cout << "Unsupported layer type --> \"" + << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; + assert(0); + } + } + + if ((int)weights.size() != weightPtr) + { + std::cout << "Number of unused weights left: " << weights.size() - weightPtr << std::endl; + assert(0); + } + + std::cout << "Output YOLO blob names: " << std::endl; + for (auto& tensor : m_OutputTensors) { + std::cout << tensor.blobName << std::endl; + } + + int nbLayers = network.getNbLayers(); + std::cout << "Total number of YOLO layers: " << nbLayers << std::endl; + + return NVDSINFER_SUCCESS; +} + +std::vector> +Yolo::parseConfigFile (const std::string cfgFilePath) +{ + assert(fileExists(cfgFilePath)); + std::ifstream file(cfgFilePath); + assert(file.good()); + std::string line; + std::vector> blocks; + std::map block; + + while (getline(file, line)) + { + if (line.size() == 0) continue; + if (line.front() == '#') continue; + line = trim(line); + if (line.front() == '[') + { + if (block.size() > 0) + { + blocks.push_back(block); + block.clear(); + } + std::string key = "type"; + std::string value = trim(line.substr(1, line.size() - 2)); + block.insert(std::pair(key, value)); + } + else + { + int cpos = line.find('='); + std::string key = trim(line.substr(0, cpos)); + std::string value = trim(line.substr(cpos + 1)); + block.insert(std::pair(key, value)); + } + } + blocks.push_back(block); + return blocks; +} + +void Yolo::parseConfigBlocks() +{ + for (auto block : m_ConfigBlocks) { + if (block.at("type") == "net") + { + assert((block.find("height") != block.end()) + && "Missing 'height' param in network cfg"); + assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); + assert((block.find("channels") != block.end()) + && "Missing 'channels' param in network cfg"); + + m_InputH = std::stoul(block.at("height")); + m_InputW = std::stoul(block.at("width")); + m_InputC = std::stoul(block.at("channels")); + m_InputSize = m_InputC * m_InputH * m_InputW; + + if (block.find("letter_box") != block.end()) { + m_LetterBox = std::stoul(block.at("letter_box")); + } + else { + m_LetterBox = 0; + } + } + else if ((block.at("type") == "region") || (block.at("type") == "yolo")) + { + assert((block.find("num") != block.end()) + && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); + assert((block.find("classes") != block.end()) + && std::string("Missing 'classes' param in " + block.at("type") + " layer") + .c_str()); + assert((block.find("anchors") != block.end()) + && std::string("Missing 'anchors' param in " + block.at("type") + " layer") + .c_str()); + + TensorInfo outputTensor; + std::string anchorString = block.at("anchors"); + while (!anchorString.empty()) + { + int npos = anchorString.find_first_of(','); + if (npos != -1) + { + float anchor = std::stof(trim(anchorString.substr(0, npos))); + outputTensor.anchors.push_back(anchor); + anchorString.erase(0, npos + 1); + } + else + { + float anchor = std::stof(trim(anchorString)); + outputTensor.anchors.push_back(anchor); + break; + } + } + + + if (block.find("mask") != block.end()) { + + std::string maskString = block.at("mask"); + std::vector pMASKS; + while (!maskString.empty()) + { + int npos = maskString.find_first_of(','); + if (npos != -1) + { + int mask = std::stoul(trim(maskString.substr(0, npos))); + pMASKS.push_back(mask); + outputTensor.masks.push_back(mask); + maskString.erase(0, npos + 1); + } + else + { + int mask = std::stoul(trim(maskString)); + pMASKS.push_back(mask); + outputTensor.masks.push_back(mask); + break; + } + } + m_OutputMasks.push_back(pMASKS); + } + + outputTensor.numBBoxes = outputTensor.masks.size() > 0 + ? outputTensor.masks.size() + : std::stoul(trim(block.at("num"))); + outputTensor.numClasses = std::stoul(block.at("classes")); + m_OutputTensors.push_back(outputTensor); + } + } +} + +void Yolo::destroyNetworkUtils() { + for (uint i = 0; i < m_TrtWeights.size(); ++i) { + if (m_TrtWeights[i].count > 0) + free(const_cast(m_TrtWeights[i].values)); + } + m_TrtWeights.clear(); +} diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.h new file mode 100644 index 0000000..053e9c7 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yolo.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef _YOLO_H_ +#define _YOLO_H_ + +#include "layers/convolutional_layer.h" +#include "layers/dropout_layer.h" +#include "layers/shortcut_layer.h" +#include "layers/route_layer.h" +#include "layers/upsample_layer.h" +#include "layers/maxpool_layer.h" + +#include "nvdsinfer_custom_impl.h" + +struct NetworkInfo +{ + std::string networkType; + std::string configFilePath; + std::string wtsFilePath; + std::string int8CalibPath; + std::string networkMode; + std::string deviceType; + std::string inputBlobName; +}; + +struct TensorInfo +{ + std::string blobName; + uint stride{0}; + uint gridSizeY{0}; + uint gridSizeX{0}; + uint numClasses{0}; + uint numBBoxes{0}; + uint64_t volume{0}; + std::vector masks; + std::vector anchors; + int bindingIndex{-1}; + float* hostBuffer{nullptr}; +}; + +class Yolo : public IModelParser { +public: + Yolo(const NetworkInfo& networkInfo); + ~Yolo() override; + bool hasFullDimsSupported() const override { return false; } + const char* getModelName() const override { + return m_ConfigFilePath.empty() ? m_NetworkType.c_str() + : m_ConfigFilePath.c_str(); + } + NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override; + + nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder); + +protected: + const std::string m_NetworkType; + const std::string m_ConfigFilePath; + const std::string m_WtsFilePath; + const std::string m_Int8CalibPath; + const std::string m_NetworkMode; + const std::string m_DeviceType; + const std::string m_InputBlobName; + std::vector m_OutputTensors; + std::vector> m_OutputMasks; + std::vector> m_ConfigBlocks; + uint m_InputH; + uint m_InputW; + uint m_InputC; + uint64_t m_InputSize; + uint m_LetterBox; + + std::vector m_TrtWeights; + +private: + NvDsInferStatus buildYoloNetwork( + std::vector& weights, nvinfer1::INetworkDefinition& network); + std::vector> parseConfigFile( + const std::string cfgFilePath); + void parseConfigBlocks(); + void destroyNetworkUtils(); +}; + +#endif // _YOLO_H_ \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloForward.cu new file mode 100644 index 0000000..dcc4b95 --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA Corporation is strictly prohibited. + * + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + * + */ + +#include +#include +#include +#include +#include + +inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } + +__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, + const uint numBBoxes, const uint new_coords, const float scale_x_y) +{ + uint x_id = blockIdx.x * blockDim.x + threadIdx.x; + uint y_id = blockIdx.y * blockDim.y + threadIdx.y; + uint z_id = blockIdx.z * blockDim.z + threadIdx.z; + + if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes)) + { + return; + } + + const int numGridCells = gridSizeX * gridSizeY; + const int bbindex = y_id * gridSizeX + x_id; + + float alpha = scale_x_y; + float beta = -0.5 * (scale_x_y - 1); + + if (new_coords == 1) { + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] + = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta; + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] + = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta; + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] + = pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] + = pow(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] + = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; + + for (uint i = 0; i < numOutputClasses; ++i) + { + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] + = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; + } + } + else { + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta; + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta; + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); + + for (uint i = 0; i < numOutputClasses; ++i) + { + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); + } + } +} + +__global__ void gpuRegionLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, + const uint numBBoxes) +{ + uint x_id = blockIdx.x * blockDim.x + threadIdx.x; + uint y_id = blockIdx.y * blockDim.y + threadIdx.y; + uint z_id = blockIdx.z * blockDim.z + threadIdx.z; + + if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes)) + { + return; + } + + const int numGridCells = gridSizeX * gridSizeY; + const int bbindex = y_id * gridSizeX + x_id; + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] + = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]); + + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] + = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); + + float temp = 1.0; + int i; + float sum = 0; + float largest = -INFINITY; + for(i = 0; i < numOutputClasses; ++i){ + int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; + largest = (val>largest) ? val : largest; + } + for(i = 0; i < numOutputClasses; ++i){ + float e = exp(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp); + sum += e; + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e; + } + for(i = 0; i < numOutputClasses; ++i){ + output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum; + } +} + +cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, + uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType); + +cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, + const uint& numOutputClasses, const uint& numBBoxes, + uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType) +{ + dim3 threads_per_block(16, 16, 4); + dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, + (gridSizeY / threads_per_block.y) + 1, + (numBBoxes / threads_per_block.z) + 1); + if (modelType == 1) { + for (unsigned int batch = 0; batch < batchSize; ++batch) + { + gpuYoloLayer<<>>( + reinterpret_cast(input) + (batch * outputSize), + reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, + numBBoxes, modelCoords, modelScale); + } + } + else if (modelType == 0) { + for (unsigned int batch = 0; batch < batchSize; ++batch) + { + gpuRegionLayer<<>>( + reinterpret_cast(input) + (batch * outputSize), + reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, + numBBoxes); + } + } + return cudaGetLastError(); +} diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp new file mode 100644 index 0000000..0ae7cbb --- /dev/null +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + + * Edited by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "yoloPlugins.h" +#include "NvInferPlugin.h" +#include +#include +#include + +int kNUM_CLASSES; +float kBETA_NMS; +std::vector kANCHORS; +std::vector> kMASK; + +namespace { +template +void write(char*& buffer, const T& val) +{ + *reinterpret_cast(buffer) = val; + buffer += sizeof(T); +} + +template +void read(const char*& buffer, T& val) +{ + val = *reinterpret_cast(buffer); + buffer += sizeof(T); +} +} + +cudaError_t cudaYoloLayer ( + const void* input, void* output, const uint& batchSize, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, + const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType); + +YoloLayer::YoloLayer (const void* data, size_t length) +{ + const char *d = static_cast(data); + read(d, m_NumBoxes); + read(d, m_NumClasses); + read(d, m_GridSizeX); + read(d, m_GridSizeY); + read(d, m_OutputSize); + + read(d, m_type); + read(d, m_new_coords); + read(d, m_scale_x_y); + read(d, m_beta_nms); + uint anchorsSize; + read(d, anchorsSize); + for (uint i = 0; i < anchorsSize; i++) { + float result; + read(d, result); + m_Anchors.push_back(result); + } + uint maskSize; + read(d, maskSize); + for (uint i = 0; i < maskSize; i++) { + uint nMask; + read(d, nMask); + std::vector pMask; + for (uint f = 0; f < nMask; f++) { + int result; + read(d, result); + pMask.push_back(result); + } + m_Mask.push_back(pMask); + } + kNUM_CLASSES = m_NumClasses; + kBETA_NMS = m_beta_nms; + kANCHORS = m_Anchors; + kMASK = m_Mask; +}; + +YoloLayer::YoloLayer ( + const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, std::vector> mask) : + m_NumBoxes(numBoxes), + m_NumClasses(numClasses), + m_GridSizeX(gridSizeX), + m_GridSizeY(gridSizeY), + m_type(model_type), + m_new_coords(new_coords), + m_scale_x_y(scale_x_y), + m_beta_nms(beta_nms), + m_Anchors(anchors), + m_Mask(mask) +{ + assert(m_NumBoxes > 0); + assert(m_NumClasses > 0); + assert(m_GridSizeX > 0); + assert(m_GridSizeY > 0); + m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBoxes * (4 + 1 + m_NumClasses)); +}; + +nvinfer1::Dims +YoloLayer::getOutputDimensions( + int index, const nvinfer1::Dims* inputs, int nbInputDims) +{ + assert(index == 0); + assert(nbInputDims == 1); + return inputs[0]; +} + +bool YoloLayer::supportsFormat ( + nvinfer1::DataType type, nvinfer1::PluginFormat format) const { + return (type == nvinfer1::DataType::kFLOAT && + format == nvinfer1::PluginFormat::kNCHW); +} + +void +YoloLayer::configureWithFormat ( + const nvinfer1::Dims* inputDims, int nbInputs, + const nvinfer1::Dims* outputDims, int nbOutputs, + nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) +{ + assert(nbInputs == 1); + assert (format == nvinfer1::PluginFormat::kNCHW); + assert(inputDims != nullptr); +} + +int YoloLayer::enqueue( + int batchSize, const void* const* inputs, void** outputs, void* workspace, + cudaStream_t stream) +{ + CHECK(cudaYoloLayer( + inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, + m_OutputSize, stream, m_new_coords, m_scale_x_y, m_type)); + return 0; +} + +size_t YoloLayer::getSerializationSize() const +{ + int anchorsSum = 1; + for (uint i = 0; i < m_Anchors.size(); i++) { + anchorsSum += 1; + } + int maskSum = 1; + for (uint i = 0; i < m_Mask.size(); i++) { + maskSum += 1; + for (uint f = 0; f < m_Mask[i].size(); f++) { + maskSum += 1; + } + } + + return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSizeX) + sizeof(m_GridSizeY) + sizeof(m_OutputSize) + sizeof(m_type) + + sizeof(m_new_coords) + sizeof(m_scale_x_y) + sizeof(m_beta_nms) + anchorsSum * sizeof(float) + maskSum * sizeof(int); +} + +void YoloLayer::serialize(void* buffer) const +{ + char *d = static_cast(buffer); + write(d, m_NumBoxes); + write(d, m_NumClasses); + write(d, m_GridSizeX); + write(d, m_GridSizeY); + write(d, m_OutputSize); + + write(d, m_type); + write(d, m_new_coords); + write(d, m_scale_x_y); + write(d, m_beta_nms); + uint anchorsSize = m_Anchors.size(); + write(d, anchorsSize); + for (uint i = 0; i < anchorsSize; i++) { + write(d, m_Anchors[i]); + } + uint maskSize = m_Mask.size(); + write(d, maskSize); + for (uint i = 0; i < maskSize; i++) { + uint pMaskSize = m_Mask[i].size(); + write(d, pMaskSize); + for (uint f = 0; f < pMaskSize; f++) { + write(d, m_Mask[i][f]); + } + } + kNUM_CLASSES = m_NumClasses; + kBETA_NMS = m_beta_nms; + kANCHORS = m_Anchors; + kMASK = m_Mask; +} + +nvinfer1::IPluginV2* YoloLayer::clone() const +{ + return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSizeX, m_GridSizeY, m_type, m_new_coords, m_scale_x_y, m_beta_nms, m_Anchors, m_Mask); +} + +REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); \ No newline at end of file diff --git a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index 94812ae..58cb0d5 100644 --- a/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/examples/multiple_inferences/sgie1/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -56,7 +56,7 @@ class YoloLayer : public nvinfer1::IPluginV2 { public: YoloLayer (const void* data, size_t length); - YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSize, + YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, const std::vector> mask); const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; } @@ -96,7 +96,8 @@ public: private: uint m_NumBoxes {0}; uint m_NumClasses {0}; - uint m_GridSize {0}; + uint m_GridSizeX {0}; + uint m_GridSizeY {0}; uint64_t m_OutputSize {0}; std::string m_Namespace {""}; @@ -152,4 +153,4 @@ extern float kBETA_NMS; extern std::vector kANCHORS; extern std::vector> kMASK; -#endif // __YOLO_PLUGINS__ \ No newline at end of file +#endif // __YOLO_PLUGINS__ diff --git a/native/config_infer_primary.txt b/native/config_infer_primary.txt index 1c6f9e7..11ab7aa 100644 --- a/native/config_infer_primary.txt +++ b/native/config_infer_primary.txt @@ -4,10 +4,11 @@ net-scale-factor=0.0039215697906911373 model-color-format=0 custom-network-config=yolov4.cfg model-file=yolov4.weights -model-engine-file=model_b1_gpu0_fp16.engine +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 -network-mode=2 +network-mode=0 num-detected-classes=80 interval=0 gie-unique-id=1 diff --git a/native/config_infer_primary_yoloV2.txt b/native/config_infer_primary_yoloV2.txt index d80a68d..8d939d6 100644 --- a/native/config_infer_primary_yoloV2.txt +++ b/native/config_infer_primary_yoloV2.txt @@ -4,10 +4,11 @@ net-scale-factor=0.0039215697906911373 model-color-format=0 custom-network-config=yolov2.cfg model-file=yolov2.weights -model-engine-file=model_b1_gpu0_fp16.engine +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table labelfile-path=labels.txt batch-size=1 -network-mode=2 +network-mode=0 num-detected-classes=80 interval=0 gie-unique-id=1 diff --git a/native/deepstream_app_config.txt b/native/deepstream_app_config.txt index cd2a411..b811b6e 100644 --- a/native/deepstream_app_config.txt +++ b/native/deepstream_app_config.txt @@ -1,6 +1,6 @@ [application] enable-perf-measurement=1 -perf-measurement-interval-sec=1 +perf-measurement-interval-sec=5 [tiled-display] enable=1 @@ -14,7 +14,7 @@ nvbuf-memory-type=0 [source0] enable=1 type=3 -uri=file://../../samples/streams/sample_1080p_h264.mp4 +uri=file:///opt/nvidia/deepstream/deepstream-5.1/samples/streams/sample_1080p_h264.mp4 num-sources=1 gpu-id=0 cudadec-memtype=0 diff --git a/native/nvdsinfer_custom_impl_Yolo/Makefile b/native/nvdsinfer_custom_impl_Yolo/Makefile index 1780210..f2474bc 100644 --- a/native/nvdsinfer_custom_impl_Yolo/Makefile +++ b/native/nvdsinfer_custom_impl_Yolo/Makefile @@ -27,13 +27,25 @@ CUDA_VER?= ifeq ($(CUDA_VER),) $(error "CUDA_VER is not set") endif + +OPENCV?= +ifeq ($(OPENCV),) + OPENCV=0 +endif + CC:= g++ NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations -CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include +CFLAGS+= -I/opt/nvidia/deepstream/deepstream-5.1/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include -LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs +ifeq ($(OPENCV), 1) +COMMON= -DOPENCV +CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) +LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) +endif + +LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group INCS:= $(wildcard *.h) @@ -50,6 +62,11 @@ SRCFILES:= nvdsinfer_yolo_engine.cpp \ utils.cpp \ yolo.cpp \ yoloForward.cu + +ifeq ($(OPENCV), 1) +SRCFILES+= calibrator.cpp +endif + TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so TARGET_OBJS:= $(SRCFILES:.cpp=.o) @@ -58,7 +75,7 @@ TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) all: $(TARGET_LIB) %.o: %.cpp $(INCS) Makefile - $(CC) -c -o $@ $(CFLAGS) $< + $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< %.o: %.cu $(INCS) Makefile $(NVCC) -c -o $@ --compiler-options '-fPIC' $< diff --git a/native/nvdsinfer_custom_impl_Yolo/calibrator.cpp b/native/nvdsinfer_custom_impl_Yolo/calibrator.cpp new file mode 100644 index 0000000..0c5dd63 --- /dev/null +++ b/native/nvdsinfer_custom_impl_Yolo/calibrator.cpp @@ -0,0 +1,130 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#include "calibrator.h" +#include +#include + +namespace nvinfer1 +{ + int8EntroyCalibrator::int8EntroyCalibrator(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath, + const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0) + { + inputCount = batchsize * channels * height * width; + std::fstream f(imgPath); + if (f.is_open()) + { + std::string temp; + while (std::getline(f, temp)) imgPaths.push_back(temp); + } + batchData = new float[inputCount]; + CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); + } + + int8EntroyCalibrator::~int8EntroyCalibrator() + { + CUDA_CHECK(cudaFree(deviceInput)); + if (batchData) + delete[] batchData; + } + + bool int8EntroyCalibrator::getBatch(void **bindings, const char **names, int nbBindings) + { + if (imageIndex + batchSize > uint(imgPaths.size())) + return false; + + float* ptr = batchData; + for (size_t j = imageIndex; j < imageIndex + batchSize; ++j) + { + cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR); + std::vectorinputData = prepareImage(img, inputC, inputH, inputW, letterBox); + + int len = (int)(inputData.size()); + memcpy(ptr, inputData.data(), len * sizeof(float)); + + ptr += inputData.size(); + std::cout << "Load image: " << imgPaths[j] << std::endl; + std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl; + } + imageIndex += batchSize; + CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); + bindings[0] = deviceInput; + return true; + } + + const void* int8EntroyCalibrator::readCalibrationCache(std::size_t &length) + { + calibrationCache.clear(); + std::ifstream input(calibTablePath, std::ios::binary); + input >> std::noskipws; + if (readCache && input.good()) + { + std::copy(std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calibrationCache)); + } + length = calibrationCache.size(); + return length ? calibrationCache.data() : nullptr; + } + + void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length) + { + std::ofstream output(calibTablePath, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box) +{ + cv::Mat out; + if (letter_box == 2) + { + int image_w = img.cols; + int image_h = img.rows; + int resize_w = 0; + int resize_h = 0; + int offset_top = 0; + int offset_bottom = 0; + int offset_left = 0; + int offset_right = 0; + if ((float)input_h / image_h > (float)input_w / image_w) + { + resize_w = input_w; + resize_h = (input_w * image_h) / image_w; + offset_bottom = input_h - resize_h; + } + else + { + resize_h = input_h; + resize_w = (input_h * image_w) / image_h; + offset_right = input_w - resize_w; + } + cv::resize(img, out, cv::Size(resize_w, resize_h), 0, 0, cv::INTER_CUBIC); + cv::copyMakeBorder(out, out, offset_top, offset_bottom, offset_left, offset_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + } + else + { + cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC); + } + cv::cvtColor(out, out, cv::COLOR_BGR2RGB); + if (input_c == 3) + { + out.convertTo(out, CV_32FC3, 1.0 / 255.0); + } + else + { + out.convertTo(out, CV_32FC1, 1.0 / 255.0); + } + std::vector input_channels(input_c); + cv::split(out, input_channels); + std::vector result(input_h * input_w * input_c); + auto data = result.data(); + int channelLength = input_h * input_w; + for (int i = 0; i < input_c; ++i) + { + memcpy(data, input_channels[i].data, channelLength * sizeof(float)); + data += channelLength; + } + return result; +} diff --git a/native/nvdsinfer_custom_impl_Yolo/calibrator.h b/native/nvdsinfer_custom_impl_Yolo/calibrator.h new file mode 100644 index 0000000..a78e062 --- /dev/null +++ b/native/nvdsinfer_custom_impl_Yolo/calibrator.h @@ -0,0 +1,62 @@ +/* + * Created by Marcos Luciano + * https://www.github.com/marcoslucianops + */ + +#ifndef CALIBRATOR_H +#define CALIBRATOR_H + +#include "opencv2/opencv.hpp" +#include "cuda_runtime.h" +#include "NvInfer.h" +#include +#include + +#ifndef CUDA_CHECK +#define CUDA_CHECK(callstr) \ + { \ + cudaError_t error_code = callstr; \ + if (error_code != cudaSuccess) { \ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ + assert(0); \ + } \ + } +#endif + +namespace nvinfer1 { + class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { + public: + int8EntroyCalibrator(const int &batchsize, + const int &channels, + const int &height, + const int &width, + const int &letterbox, + const std::string &imgPath, + const std::string &calibTablePath); + + virtual ~int8EntroyCalibrator(); + int getBatchSize() const override { return batchSize; } + bool getBatch(void *bindings[], const char *names[], int nbBindings) override; + const void *readCalibrationCache(std::size_t &length) override; + void writeCalibrationCache(const void *ptr, std::size_t length) override; + + private: + int batchSize; + int inputC; + int inputH; + int inputW; + int letterBox; + std::string calibTablePath; + size_t imageIndex; + size_t inputCount; + std::vector imgPaths; + float *batchData{ nullptr }; + void *deviceInput{ nullptr }; + bool readCache; + std::vector calibrationCache; + }; +} + +std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box); + +#endif //CALIBRATOR_H \ No newline at end of file diff --git a/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp index e0c7752..eb49011 100644 --- a/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp +++ b/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp @@ -8,79 +8,17 @@ nvinfer1::ILayer* upsampleLayer( int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "upsample"); - nvinfer1::Dims inpDims = input->getDimensions(); - assert(inpDims.nbDims == 3); - assert(inpDims.d[1] == inpDims.d[2]); - int h = inpDims.d[1]; - int w = inpDims.d[2]; int stride = std::stoi(block.at("stride")); - nvinfer1::Dims preDims{3, - {1, stride * h, w}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - int size = stride * h * w; - nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* preWt = new float[size]; - - for (int i = 0, idx = 0; i < h; ++i) - { - for (int s = 0; s < stride; ++s) - { - for (int j = 0; j < w; ++j, ++idx) - { - preWt[idx] = (i == j) ? 1.0 : 0.0; - } - } - } - preMul.values = preWt; - trtWeights.push_back(preMul); - nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul); - assert(preM != nullptr); - std::string preLayerName = "preMul_" + std::to_string(layerIdx); - preM->setName(preLayerName.c_str()); - - nvinfer1::Dims postDims{3, - {1, h, stride * w}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - size = stride * h * w; - nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* postWt = new float[size]; - - for (int i = 0, idx = 0; i < h; ++i) - { - for (int j = 0; j < stride * w; ++j, ++idx) - { - postWt[idx] = (j / stride == i) ? 1.0 : 0.0; - } - } - postMul.values = postWt; - trtWeights.push_back(postMul); - nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul); - assert(post_m != nullptr); - std::string postLayerName = "postMul_" + std::to_string(layerIdx); - post_m->setName(postLayerName.c_str()); - - nvinfer1::IMatrixMultiplyLayer* mm1 - = network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input, - nvinfer1::MatrixOperation::kNONE); - assert(mm1 != nullptr); - std::string mm1LayerName = "mm1_" + std::to_string(layerIdx); - mm1->setName(mm1LayerName.c_str()); - nvinfer1::IMatrixMultiplyLayer* mm2 - = network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE, - *post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE); - assert(mm2 != nullptr); - std::string mm2LayerName = "mm2_" + std::to_string(layerIdx); - mm2->setName(mm2LayerName.c_str()); - - return mm2; + nvinfer1::IResizeLayer* resize_layer = network->addResize(*input); + resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST); + float scale[3] = {1, stride, stride}; + resize_layer->setScales(scale, 3); + std::string layer_name = "upsample_" + std::to_string(layerIdx); + resize_layer->setName(layer_name.c_str()); + return resize_layer; } \ No newline at end of file diff --git a/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h b/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h index 6b0224a..d1b7768 100644 --- a/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h +++ b/native/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h @@ -15,9 +15,6 @@ nvinfer1::ILayer* upsampleLayer( int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); diff --git a/native/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp b/native/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp index 29306cd..03a4820 100644 --- a/native/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp +++ b/native/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp @@ -45,9 +45,20 @@ static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContext networkInfo.networkType = yoloType; networkInfo.configFilePath = initParams->customNetworkConfigFilePath; networkInfo.wtsFilePath = initParams->modelFilePath; + networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); networkInfo.inputBlobName = "data"; + if(initParams->networkMode == 0) { + networkInfo.networkMode = "FP32"; + } + else if(initParams->networkMode == 1) { + networkInfo.networkMode = "INT8"; + } + else if(initParams->networkMode == 2) { + networkInfo.networkMode = "FP16"; + } + if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty()) { std::cerr << "YOLO config file or weights file is not specified" diff --git a/native/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp b/native/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp index f3128cd..1cc8d39 100644 --- a/native/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp +++ b/native/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp @@ -302,7 +302,6 @@ static bool NvDsInferParseYolo( const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); - assert(stride == DIVUP(networkInfo.height, gridSizeH)); std::vector outObjs = decodeYoloTensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, masks[idx].size(), @@ -344,7 +343,6 @@ static bool NvDsInferParseYoloV2( const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); - assert(stride == DIVUP(networkInfo.height, gridSizeH)); for (auto& anchor : anchors) { anchor *= stride; } diff --git a/native/nvdsinfer_custom_impl_Yolo/yolo.cpp b/native/nvdsinfer_custom_impl_Yolo/yolo.cpp index d3596c6..035ed11 100644 --- a/native/nvdsinfer_custom_impl_Yolo/yolo.cpp +++ b/native/nvdsinfer_custom_impl_Yolo/yolo.cpp @@ -25,6 +25,11 @@ #include "yolo.h" #include "yoloPlugins.h" +#include + +#ifdef OPENCV +#include "calibrator.h" +#endif void orderParams(std::vector> *maskVector) { std::vector> maskinput = *maskVector; @@ -45,6 +50,8 @@ Yolo::Yolo(const NetworkInfo& networkInfo) : m_NetworkType(networkInfo.networkType), // YOLO type m_ConfigFilePath(networkInfo.configFilePath), // YOLO cfg m_WtsFilePath(networkInfo.wtsFilePath), // YOLO weights + m_Int8CalibPath(networkInfo.int8CalibPath), // INT8 calibration path + m_NetworkMode(networkInfo.networkMode), // FP32, INT8, FP16 m_DeviceType(networkInfo.deviceType), // kDLA, kGPU m_InputBlobName(networkInfo.inputBlobName), // data m_InputH(0), @@ -62,6 +69,38 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) { assert (builder); + m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); + parseConfigBlocks(); + orderParams(&m_OutputMasks); + + if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) { + assert(builder->platformHasFastInt8()); +#ifdef OPENCV + std::string calib_image_list; + int calib_batch_size; + if (getenv("INT8_CALIB_IMG_PATH")) { + calib_image_list = getenv("INT8_CALIB_IMG_PATH"); + } + else { + std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl; + std::abort(); + } + if (getenv("INT8_CALIB_BATCH_SIZE")) { + calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE")); + } + else { + std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl; + std::abort(); + } + nvinfer1::int8EntroyCalibrator *calibrator = new nvinfer1::int8EntroyCalibrator(calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath); + builder->setInt8Mode(true); + builder->setInt8Calibrator(calibrator); +#else + std::cerr << "OpenCV is required to run INT8 calibrator" << std::endl; + std::abort(); +#endif + } + std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); std::vector trtWeights; @@ -71,8 +110,12 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) return nullptr; } - // Build the engine std::cout << "Building the TensorRT Engine" << std::endl; + + if (m_LetterBox == 1) { + std::cout << "\nNOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file to get better accuracy\n" << std::endl; + } + nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network); if (engine) { std::cout << "Building complete\n" << std::endl; @@ -80,7 +123,6 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) std::cerr << "Building engine failed\n" << std::endl; } - // destroy network->destroy(); return engine; } @@ -88,12 +130,7 @@ nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { destroyNetworkUtils(); - m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); - parseConfigBlocks(); - orderParams(&m_OutputMasks); - std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); - // build yolo network std::cout << "Building YOLO network" << std::endl; NvDsInferStatus status = buildYoloNetwork(weights, network); @@ -121,9 +158,7 @@ NvDsInferStatus Yolo::buildYoloNetwork( std::vector tensorOutputs; uint outputTensorCount = 0; - // build the network using the network API for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { - // check if num. of channels is correct assert(getNumChannels(previous) == channels); std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; @@ -192,7 +227,7 @@ NvDsInferStatus Yolo::buildYoloNetwork( else if (m_ConfigBlocks.at(i).at("type") == "upsample") { std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], weights, m_TrtWeights, channels, previous, &network); + nvinfer1::ILayer* out = upsampleLayer(i - 1, m_ConfigBlocks[i], previous, &network); previous = out->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); @@ -212,12 +247,12 @@ NvDsInferStatus Yolo::buildYoloNetwork( else if (m_ConfigBlocks.at(i).at("type") == "yolo") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); - assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); - curYoloTensor.gridSize = prevTensorDims.d[1]; - curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; - m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSize - * curYoloTensor.gridSize + curYoloTensor.gridSizeY = prevTensorDims.d[1]; + curYoloTensor.gridSizeX = prevTensorDims.d[2]; + curYoloTensor.stride = m_InputH / curYoloTensor.gridSizeY; + m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSizeY + * curYoloTensor.gridSizeX * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); std::string layerName = "yolo_" + std::to_string(i); curYoloTensor.blobName = layerName; @@ -236,7 +271,8 @@ NvDsInferStatus Yolo::buildYoloNetwork( nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes, m_OutputTensors.at(outputTensorCount).numClasses, - m_OutputTensors.at(outputTensorCount).gridSize, + m_OutputTensors.at(outputTensorCount).gridSizeX, + m_OutputTensors.at(outputTensorCount).gridSizeY, 1, new_coords, scale_x_y, beta_nms, curYoloTensor.anchors, m_OutputMasks); @@ -260,12 +296,12 @@ NvDsInferStatus Yolo::buildYoloNetwork( //YOLOv2 support else if (m_ConfigBlocks.at(i).at("type") == "region") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); - assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); - curRegionTensor.gridSize = prevTensorDims.d[1]; - curRegionTensor.stride = m_InputW / curRegionTensor.gridSize; - m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSize - * curRegionTensor.gridSize + curRegionTensor.gridSizeY = prevTensorDims.d[1]; + curRegionTensor.gridSizeX = prevTensorDims.d[2]; + curRegionTensor.stride = m_InputH / curRegionTensor.gridSizeY; + m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSizeY + * curRegionTensor.gridSizeX * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses)); std::string layerName = "region_" + std::to_string(i); curRegionTensor.blobName = layerName; @@ -273,7 +309,8 @@ NvDsInferStatus Yolo::buildYoloNetwork( nvinfer1::IPluginV2* regionPlugin = new YoloLayer(curRegionTensor.numBBoxes, curRegionTensor.numClasses, - curRegionTensor.gridSize, + curRegionTensor.gridSizeX, + curRegionTensor.gridSizeY, 0, 0, 1.0, 0, curRegionTensor.anchors, mask); @@ -387,8 +424,14 @@ void Yolo::parseConfigBlocks() m_InputH = std::stoul(block.at("height")); m_InputW = std::stoul(block.at("width")); m_InputC = std::stoul(block.at("channels")); - assert(m_InputW == m_InputH); m_InputSize = m_InputC * m_InputH * m_InputW; + + if (block.find("letter_box") != block.end()) { + m_LetterBox = std::stoul(block.at("letter_box")); + } + else { + m_LetterBox = 0; + } } else if ((block.at("type") == "region") || (block.at("type") == "yolo")) { @@ -456,10 +499,9 @@ void Yolo::parseConfigBlocks() } void Yolo::destroyNetworkUtils() { - // deallocate the weights for (uint i = 0; i < m_TrtWeights.size(); ++i) { if (m_TrtWeights[i].count > 0) free(const_cast(m_TrtWeights[i].values)); } m_TrtWeights.clear(); -} \ No newline at end of file +} diff --git a/native/nvdsinfer_custom_impl_Yolo/yolo.h b/native/nvdsinfer_custom_impl_Yolo/yolo.h index 77d6127..053e9c7 100644 --- a/native/nvdsinfer_custom_impl_Yolo/yolo.h +++ b/native/nvdsinfer_custom_impl_Yolo/yolo.h @@ -40,6 +40,8 @@ struct NetworkInfo std::string networkType; std::string configFilePath; std::string wtsFilePath; + std::string int8CalibPath; + std::string networkMode; std::string deviceType; std::string inputBlobName; }; @@ -48,7 +50,8 @@ struct TensorInfo { std::string blobName; uint stride{0}; - uint gridSize{0}; + uint gridSizeY{0}; + uint gridSizeX{0}; uint numClasses{0}; uint numBBoxes{0}; uint64_t volume{0}; @@ -75,6 +78,8 @@ protected: const std::string m_NetworkType; const std::string m_ConfigFilePath; const std::string m_WtsFilePath; + const std::string m_Int8CalibPath; + const std::string m_NetworkMode; const std::string m_DeviceType; const std::string m_InputBlobName; std::vector m_OutputTensors; @@ -84,6 +89,7 @@ protected: uint m_InputW; uint m_InputC; uint64_t m_InputSize; + uint m_LetterBox; std::vector m_TrtWeights; diff --git a/native/nvdsinfer_custom_impl_Yolo/yoloForward.cu b/native/nvdsinfer_custom_impl_Yolo/yoloForward.cu index a9310a5..dcc4b95 100644 --- a/native/nvdsinfer_custom_impl_Yolo/yoloForward.cu +++ b/native/nvdsinfer_custom_impl_Yolo/yoloForward.cu @@ -20,20 +20,20 @@ inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } -__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSize, const uint numOutputClasses, +__global__ void gpuYoloLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const uint new_coords, const float scale_x_y) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; uint z_id = blockIdx.z * blockDim.z + threadIdx.z; - if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes)) + if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes)) { return; } - const int numGridCells = gridSize * gridSize; - const int bbindex = y_id * gridSize + x_id; + const int numGridCells = gridSizeX * gridSizeY; + const int bbindex = y_id * gridSizeX + x_id; float alpha = scale_x_y; float beta = -0.5 * (scale_x_y - 1); @@ -84,20 +84,20 @@ __global__ void gpuYoloLayer(const float* input, float* output, const uint gridS } } -__global__ void gpuRegionLayer(const float* input, float* output, const uint gridSize, const uint numOutputClasses, +__global__ void gpuRegionLayer(const float* input, float* output, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes) { uint x_id = blockIdx.x * blockDim.x + threadIdx.x; uint y_id = blockIdx.y * blockDim.y + threadIdx.y; uint z_id = blockIdx.z * blockDim.z + threadIdx.z; - if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes)) + if ((x_id >= gridSizeX) || (y_id >= gridSizeY) || (z_id >= numBBoxes)) { return; } - const int numGridCells = gridSize * gridSize; - const int bbindex = y_id * gridSize + x_id; + const int numGridCells = gridSizeX * gridSizeY; + const int bbindex = y_id * gridSizeX + x_id; output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]); @@ -132,24 +132,24 @@ __global__ void gpuRegionLayer(const float* input, float* output, const uint gri } } -cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSize, +cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType); -cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSize, +cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType) { dim3 threads_per_block(16, 16, 4); - dim3 number_of_blocks((gridSize / threads_per_block.x) + 1, - (gridSize / threads_per_block.y) + 1, + dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, + (gridSizeY / threads_per_block.y) + 1, (numBBoxes / threads_per_block.z) + 1); if (modelType == 1) { for (unsigned int batch = 0; batch < batchSize; ++batch) { gpuYoloLayer<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), gridSize, numOutputClasses, + reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, numBBoxes, modelCoords, modelScale); } } @@ -158,7 +158,7 @@ cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize { gpuRegionLayer<<>>( reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), gridSize, numOutputClasses, + reinterpret_cast(output) + (batch * outputSize), gridSizeX, gridSizeY, numOutputClasses, numBBoxes); } } diff --git a/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp b/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp index c7184e0..0ae7cbb 100644 --- a/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp +++ b/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp @@ -52,7 +52,7 @@ void read(const char*& buffer, T& val) cudaError_t cudaYoloLayer ( const void* input, void* output, const uint& batchSize, - const uint& gridSize, const uint& numOutputClasses, + const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream, const uint modelCoords, const float modelScale, const uint modelType); YoloLayer::YoloLayer (const void* data, size_t length) @@ -60,7 +60,8 @@ YoloLayer::YoloLayer (const void* data, size_t length) const char *d = static_cast(data); read(d, m_NumBoxes); read(d, m_NumClasses); - read(d, m_GridSize); + read(d, m_GridSizeX); + read(d, m_GridSizeY); read(d, m_OutputSize); read(d, m_type); @@ -94,10 +95,11 @@ YoloLayer::YoloLayer (const void* data, size_t length) }; YoloLayer::YoloLayer ( - const uint& numBoxes, const uint& numClasses, const uint& gridSize, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, std::vector> mask) : + const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, std::vector> mask) : m_NumBoxes(numBoxes), m_NumClasses(numClasses), - m_GridSize(gridSize), + m_GridSizeX(gridSizeX), + m_GridSizeY(gridSizeY), m_type(model_type), m_new_coords(new_coords), m_scale_x_y(scale_x_y), @@ -107,8 +109,9 @@ YoloLayer::YoloLayer ( { assert(m_NumBoxes > 0); assert(m_NumClasses > 0); - assert(m_GridSize > 0); - m_OutputSize = m_GridSize * m_GridSize * (m_NumBoxes * (4 + 1 + m_NumClasses)); + assert(m_GridSizeX > 0); + assert(m_GridSizeY > 0); + m_OutputSize = m_GridSizeX * m_GridSizeY * (m_NumBoxes * (4 + 1 + m_NumClasses)); }; nvinfer1::Dims @@ -142,7 +145,7 @@ int YoloLayer::enqueue( cudaStream_t stream) { CHECK(cudaYoloLayer( - inputs[0], outputs[0], batchSize, m_GridSize, m_NumClasses, m_NumBoxes, + inputs[0], outputs[0], batchSize, m_GridSizeX, m_GridSizeY, m_NumClasses, m_NumBoxes, m_OutputSize, stream, m_new_coords, m_scale_x_y, m_type)); return 0; } @@ -161,7 +164,7 @@ size_t YoloLayer::getSerializationSize() const } } - return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSize) + sizeof(m_OutputSize) + sizeof(m_type) + return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSizeX) + sizeof(m_GridSizeY) + sizeof(m_OutputSize) + sizeof(m_type) + sizeof(m_new_coords) + sizeof(m_scale_x_y) + sizeof(m_beta_nms) + anchorsSum * sizeof(float) + maskSum * sizeof(int); } @@ -170,7 +173,8 @@ void YoloLayer::serialize(void* buffer) const char *d = static_cast(buffer); write(d, m_NumBoxes); write(d, m_NumClasses); - write(d, m_GridSize); + write(d, m_GridSizeX); + write(d, m_GridSizeY); write(d, m_OutputSize); write(d, m_type); @@ -199,7 +203,7 @@ void YoloLayer::serialize(void* buffer) const nvinfer1::IPluginV2* YoloLayer::clone() const { - return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSize, m_type, m_new_coords, m_scale_x_y, m_beta_nms, m_Anchors, m_Mask); + return new YoloLayer (m_NumBoxes, m_NumClasses, m_GridSizeX, m_GridSizeY, m_type, m_new_coords, m_scale_x_y, m_beta_nms, m_Anchors, m_Mask); } REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); \ No newline at end of file diff --git a/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.h index ebf5661..177ca10 100644 --- a/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ b/native/nvdsinfer_custom_impl_Yolo/yoloPlugins.h @@ -56,7 +56,7 @@ class YoloLayer : public nvinfer1::IPluginV2 { public: YoloLayer (const void* data, size_t length); - YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSize, + YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, const std::vector anchors, const std::vector> mask); const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; } @@ -96,7 +96,8 @@ public: private: uint m_NumBoxes {0}; uint m_NumClasses {0}; - uint m_GridSize {0}; + uint m_GridSizeX {0}; + uint m_GridSizeY {0}; uint64_t m_OutputSize {0}; std::string m_Namespace {""}; @@ -152,4 +153,4 @@ extern float kBETA_NMS; extern std::vector kANCHORS; extern std::vector> kMASK; -#endif // __YOLO_PLUGINS__ \ No newline at end of file +#endif // __YOLO_PLUGINS__ diff --git a/non_square/nvdsinfer_custom_impl_Yolo/Makefile b/non_square/nvdsinfer_custom_impl_Yolo/Makefile deleted file mode 100644 index 1780210..0000000 --- a/non_square/nvdsinfer_custom_impl_Yolo/Makefile +++ /dev/null @@ -1,71 +0,0 @@ -################################################################################ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -# Edited by Marcos Luciano -# https://www.github.com/marcoslucianops -################################################################################ - -CUDA_VER?= -ifeq ($(CUDA_VER),) - $(error "CUDA_VER is not set") -endif -CC:= g++ -NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc - -CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations -CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include - -LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs -LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group - -INCS:= $(wildcard *.h) -SRCFILES:= nvdsinfer_yolo_engine.cpp \ - nvdsparsebbox_Yolo.cpp \ - yoloPlugins.cpp \ - layers/convolutional_layer.cpp \ - layers/dropout_layer.cpp \ - layers/shortcut_layer.cpp \ - layers/route_layer.cpp \ - layers/upsample_layer.cpp \ - layers/maxpool_layer.cpp \ - layers/activation_layer.cpp \ - utils.cpp \ - yolo.cpp \ - yoloForward.cu -TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so - -TARGET_OBJS:= $(SRCFILES:.cpp=.o) -TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) - -all: $(TARGET_LIB) - -%.o: %.cpp $(INCS) Makefile - $(CC) -c -o $@ $(CFLAGS) $< - -%.o: %.cu $(INCS) Makefile - $(NVCC) -c -o $@ --compiler-options '-fPIC' $< - -$(TARGET_LIB) : $(TARGET_OBJS) - $(CC) -o $@ $(TARGET_OBJS) $(LFLAGS) - -clean: - rm -rf $(TARGET_LIB) - rm -rf $(TARGET_OBJS) diff --git a/non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp b/non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp deleted file mode 100644 index 4642b62..0000000 --- a/non_square/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Created by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#include "upsample_layer.h" - -nvinfer1::ILayer* upsampleLayer( - int layerIdx, - std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& inputChannels, - nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "upsample"); - nvinfer1::Dims inpDims = input->getDimensions(); - assert(inpDims.nbDims == 3); - //assert(inpDims.d[1] == inpDims.d[2]); - int h = inpDims.d[1]; - int w = inpDims.d[2]; - int stride = std::stoi(block.at("stride")); - - /*nvinfer1::Dims preDims{3, - {1, stride * h, h}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - int size = stride * h * w; - nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* preWt = new float[size]; - - for (int i = 0, idx = 0; i < h; ++i) - { - for (int j = 0; j < h * stride; ++j, ++idx) - { - preWt[idx] = (i == j) ? 1.0 : 0.0; - } - } - preMul.values = preWt; - trtWeights.push_back(preMul); - nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul); - assert(preM != nullptr); - std::string preLayerName = "preMul_" + std::to_string(layerIdx); - preM->setName(preLayerName.c_str()); - - nvinfer1::Dims postDims{3, - {1, w, stride * w}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - size = stride * w * w; - nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* postWt = new float[size]; - - for (int i = 0, idx = 0; i < w; ++i) - { - for (int j = 0; j < stride * w; ++j, ++idx) - { - postWt[idx] = (j / stride == i) ? 1.0 : 0.0; - } - } - postMul.values = postWt; - trtWeights.push_back(postMul); - nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul); - assert(post_m != nullptr); - std::string postLayerName = "postMul_" + std::to_string(layerIdx); - post_m->setName(postLayerName.c_str()); - - nvinfer1::IMatrixMultiplyLayer* mm1 - = network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input, - nvinfer1::MatrixOperation::kNONE); - assert(mm1 != nullptr); - std::string mm1LayerName = "mm1_" + std::to_string(layerIdx); - mm1->setName(mm1LayerName.c_str()); - nvinfer1::IMatrixMultiplyLayer* mm2 - = network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE, - *post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE); - assert(mm2 != nullptr); - std::string mm2LayerName = "mm2_" + std::to_string(layerIdx); - mm2->setName(mm2LayerName.c_str()); - - return mm2;*/ - nvinfer1::IResizeLayer* resize_layer = network->addResize(*input); - resize_layer->setResizeMode(nvinfer1::ResizeMode::kNEAREST); - float scale[3] = {1, stride, stride}; - resize_layer->setScales(scale, 3); - std::string layer_name = "upsample_" + std::to_string(layerIdx); - resize_layer->setName(layer_name.c_str()); - return resize_layer; -} diff --git a/non_square/nvdsinfer_custom_impl_Yolo/yoloPlugins.h b/non_square/nvdsinfer_custom_impl_Yolo/yoloPlugins.h deleted file mode 100644 index 177ca10..0000000 --- a/non_square/nvdsinfer_custom_impl_Yolo/yoloPlugins.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - - * Edited by Marcos Luciano - * https://www.github.com/marcoslucianops - */ - -#ifndef __YOLO_PLUGINS__ -#define __YOLO_PLUGINS__ - -#include -#include -#include -#include -#include - -#include - -#include "NvInferPlugin.h" - -#define CHECK(status) \ - { \ - if (status != 0) \ - { \ - std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ - << " at line " << __LINE__ << std::endl; \ - abort(); \ - } \ - } - -namespace -{ -const char* YOLOLAYER_PLUGIN_VERSION {"1"}; -const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"}; -} // namespace - -class YoloLayer : public nvinfer1::IPluginV2 -{ -public: - YoloLayer (const void* data, size_t length); - YoloLayer (const uint& numBoxes, const uint& numClasses, const uint& gridSizeX, const uint& gridSizeY, - const uint model_type, const uint new_coords, const float scale_x_y, const float beta_nms, - const std::vector anchors, const std::vector> mask); - const char* getPluginType () const override { return YOLOLAYER_PLUGIN_NAME; } - const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; } - int getNbOutputs () const override { return 1; } - - nvinfer1::Dims getOutputDimensions ( - int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; - - bool supportsFormat ( - nvinfer1::DataType type, nvinfer1::PluginFormat format) const override; - - void configureWithFormat ( - const nvinfer1::Dims* inputDims, int nbInputs, - const nvinfer1::Dims* outputDims, int nbOutputs, - nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override; - - int initialize () override { return 0; } - void terminate () override {} - size_t getWorkspaceSize (int maxBatchSize) const override { return 0; } - int enqueue ( - int batchSize, const void* const* inputs, void** outputs, - void* workspace, cudaStream_t stream) override; - size_t getSerializationSize() const override; - void serialize (void* buffer) const override; - void destroy () override { delete this; } - nvinfer1::IPluginV2* clone() const override; - - void setPluginNamespace (const char* pluginNamespace)override { - m_Namespace = pluginNamespace; - } - virtual const char* getPluginNamespace () const override { - return m_Namespace.c_str(); - } - -private: - uint m_NumBoxes {0}; - uint m_NumClasses {0}; - uint m_GridSizeX {0}; - uint m_GridSizeY {0}; - uint64_t m_OutputSize {0}; - std::string m_Namespace {""}; - - uint m_type {0}; - uint m_new_coords {0}; - float m_scale_x_y {0}; - float m_beta_nms {0}; - std::vector m_Anchors; - std::vector> m_Mask; -}; - -class YoloLayerPluginCreator : public nvinfer1::IPluginCreator -{ -public: - YoloLayerPluginCreator () {} - ~YoloLayerPluginCreator () {} - - const char* getPluginName () const override { return YOLOLAYER_PLUGIN_NAME; } - const char* getPluginVersion () const override { return YOLOLAYER_PLUGIN_VERSION; } - - const nvinfer1::PluginFieldCollection* getFieldNames() override { - std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl; - return nullptr; - } - - nvinfer1::IPluginV2* createPlugin ( - const char* name, const nvinfer1::PluginFieldCollection* fc) override - { - std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented"; - return nullptr; - } - - nvinfer1::IPluginV2* deserializePlugin ( - const char* name, const void* serialData, size_t serialLength) override - { - std::cout << "Deserialize yoloLayer plugin: " << name << std::endl; - return new YoloLayer(serialData, serialLength); - } - - void setPluginNamespace(const char* libNamespace) override { - m_Namespace = libNamespace; - } - const char* getPluginNamespace() const override { - return m_Namespace.c_str(); - } - -private: - std::string m_Namespace {""}; -}; - -extern int kNUM_CLASSES; -extern float kBETA_NMS; -extern std::vector kANCHORS; -extern std::vector> kMASK; - -#endif // __YOLO_PLUGINS__ diff --git a/readme.md b/readme.md index 3e9bedc..d4d2c7f 100644 --- a/readme.md +++ b/readme.md @@ -6,45 +6,46 @@ NVIDIA DeepStream SDK 5.1 configuration for YOLO models ### Improvements on this repository * Darknet CFG params parser (not need to edit nvdsparsebbox_Yolo.cpp or another file for native models) -* Support to new_coords, beta_nms and scale_x_y params -* Support to new models not supported in official DeepStream SDK YOLO. -* Support to layers not supported in official DeepStream SDK YOLO. -* Support to activations not supported in official DeepStream SDK YOLO. -* Support to Convolutional groups +* Support for new_coords, beta_nms and scale_x_y params +* Support for new models not supported in official DeepStream SDK YOLO. +* Support for layers not supported in official DeepStream SDK YOLO. +* Support for activations not supported in official DeepStream SDK YOLO. +* Support for Convolutional groups +* **Support for INT8 calibration** (not available for YOLOv5 models) +* **Support for non square models** ## Tutorial +* [Basic usage](#basic-usage) +* [INT8 calibration](#int8-calibration) * [Configuring to your custom model](https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/customModels.md) * [Multiple YOLO inferences](https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/multipleInferences.md) -Benchmark -* [mAP/FPS comparison between models](#mapfps-comparison-between-models) - TensorRT conversion -* [Native](#native-tensorrt-conversion) (tested models below) - * YOLOv4x-Mish - * YOLOv4-CSP - * YOLOv4 - * YOLOv4-Tiny - * YOLOv3-SSP - * YOLOv3 - * YOLOv3-Tiny-PRN - * YOLOv3-Tiny - * YOLOv3-Lite - * YOLOv3-Nano - * YOLO-Fastest - * YOLO-Fastest-XL - * YOLOv2 - * YOLOv2-Tiny +* Native (tested models below) + * [YOLOv4x-Mish](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights)] + * [YOLOv4-CSP](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights)] + * [YOLOv4](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights)] + * [YOLOv4-Tiny](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)] + * [YOLOv3-SPP](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-spp.cfg)] [[weights](https://pjreddie.com/media/files/yolov3-spp.weights)] + * [YOLOv3](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg)] [[weights](https://pjreddie.com/media/files/yolov3.weights)] + * [YOLOv3-Tiny-PRN](https://github.com/WongKinYiu/PartialResidualNetworks) [[cfg](https://raw.githubusercontent.com/WongKinYiu/PartialResidualNetworks/master/cfg/yolov3-tiny-prn.cfg)] [[weights](https://github.com/WongKinYiu/PartialResidualNetworks/raw/master/model/yolov3-tiny-prn.weights)] + * [YOLOv3-Tiny](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-tiny.cfg)] [[weights](https://pjreddie.com/media/files/yolov3-tiny.weights)] + * [YOLOv3-Lite](https://github.com/dog-qiuqiu/MobileNet-Yolo) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/MobileNet-Yolo/master/MobileNetV2-YOLOv3-Lite/COCO/MobileNetV2-YOLOv3-Lite-coco.cfg)] [[weights](https://github.com/dog-qiuqiu/MobileNet-Yolo/raw/master/MobileNetV2-YOLOv3-Lite/COCO/MobileNetV2-YOLOv3-Lite-coco.weights)] + * [YOLOv3-Nano](https://github.com/dog-qiuqiu/MobileNet-Yolo) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/MobileNet-Yolo/master/MobileNetV2-YOLOv3-Nano/COCO/MobileNetV2-YOLOv3-Nano-coco.cfg)] [[weights](https://github.com/dog-qiuqiu/MobileNet-Yolo/raw/master/MobileNetV2-YOLOv3-Nano/COCO/MobileNetV2-YOLOv3-Nano-coco.weights)] + * [YOLO-Fastest 1.1](https://github.com/dog-qiuqiu/Yolo-Fastest) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/Yolo-Fastest/master/ModelZoo/yolo-fastest-1.1_coco/yolo-fastest-1.1-xl.cfg)] [[weights](https://github.com/dog-qiuqiu/Yolo-Fastest/raw/master/ModelZoo/yolo-fastest-1.1_coco/yolo-fastest-1.1-xl.weights)] + * [YOLO-Fastest-XL 1.1](https://github.com/dog-qiuqiu/Yolo-Fastest) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/Yolo-Fastest/master/ModelZoo/yolo-fastest-1.1_coco/yolo-fastest-1.1.cfg)] [[weights](https://github.com/dog-qiuqiu/Yolo-Fastest/raw/master/ModelZoo/yolo-fastest-1.1_coco/yolo-fastest-1.1.weights)] + * [YOLOv2](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2.cfg)] [[weights](https://pjreddie.com/media/files/yolov2.weights)] + * [YOLOv2-Tiny](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-tiny.cfg)] [[weights](https://pjreddie.com/media/files/yolov2-tiny.weights)] * External * [YOLOv5 5.0](https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/YOLOv5-5.0.md) * [YOLOv5 4.0](https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/YOLOv5-4.0.md) * [YOLOv5 3.X (3.0/3.1)](https://github.com/marcoslucianops/DeepStream-Yolo/blob/master/YOLOv5-3.X.md) -Request -* [Request native TensorRT conversion for your YOLO-based model](#request-native-tensorrt-conversion-for-your-yolo-based-model) +Benchmark +* [mAP/FPS comparison between models](#mapfps-comparison-between-models) ## @@ -55,202 +56,24 @@ Request ## -### mAP/FPS comparison between models (OUTDATED) - -DeepStream SDK YOLOv4: https://youtu.be/Qi_F_IYpuFQ - -Darknet YOLOv4: https://youtu.be/AxJJ9fnJ7Xk - -
NVIDIA GTX 1050 (4GB Mobile) +### Basic usage ``` -CUDA 10.2 -Driver 440.33 -TensorRT 7.2.1 -cuDNN 8.0.5 -OpenCV 3.2.0 (libopencv-dev) -OpenCV Python 4.4.0 (opencv-python) -PyTorch 1.7.0 -Torchvision 0.8.1 +git clone https://github.com/marcoslucianops/DeepStream-Yolo.git +cd DeepStream-Yolo/native ``` -| TensorRT | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with display) | FPS
(without display) | -|:---------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:-----------------------:|:--------------------------:| -| YOLOv5x | FP32 | 608 | 0.406 | 0.562 | 0.441 | 7.91 | 7.99 | -| YOLOv5l | FP32 | 608 | 0.385 | 0.540 | 0.419 | 12.82 | 12.97 | -| YOLOv5m | FP32 | 608 | 0.354 | 0.507 | 0.388 | 25.09 | 25.97 | -| YOLOv5s | FP32 | 608 | 0.281 | 0.430 | 0.307 | 52.02 | 56.21 | -| YOLOv4x-MISH | FP32 | 640 | 0.454 | 0.644 | 0.491 | 7.45 | 7.56 | -| YOLOv4x-MISH | FP32 | 608 | 0.450 | 0.644 | 0.482 | 7.93 | 8.05 | -| YOLOv4-CSP | FP32 | 608 | 0.434 | 0.628 | 0.465 | 13.74 | 14.11 | -| YOLOv4-CSP | FP32 | 512 | 0.427 | 0.618 | 0.459 | 21.69 | 22.75 | -| YOLOv4 | FP32 | 608 | 0.490 | 0.734 | 0.538 | 11.72 | 12.09 | -| YOLOv4 | FP32 | 512 | 0.484 | 0.725 | 0.533 | 19.00 | 19.70 | -| YOLOv4 | FP32 | 416 | 0.456 | 0.693 | 0.491 | 22.63 | 23.81 | -| YOLOv4 | FP32 | 320 | 0.400 | 0.623 | 0.424 | 32.46 | 35.07 | -| YOLOv3-SPP | FP32 | 608 | 0.411 | 0.680 | 0.436 | 11.85 | 12.12 | -| YOLOv3 | FP32 | 608 | 0.374 | 0.654 | 0.387 | 12.00 | 12.33 | -| YOLOv3 | FP32 | 416 | 0.369 | 0.651 | 0.379 | 23.19 | 24.55 | -| YOLOv4-Tiny | FP32 | 416 | 0.195 | 0.382 | 0.175 | 144.55 | 176.31 | -| YOLOv3-Tiny-PRN | FP32 | 416 | 0.168 | 0.369 | 0.130 | 181.71 | 244.47 | -| YOLOv3-Tiny | FP32 | 416 | 0.165 | 0.357 | 0.128 | 154.19 | 190.42 | -| YOLOv3-Lite | FP32 | 416 | 0.165 | 0.350 | 0.131 | 122.40 | 146.19 | -| YOLOv3-Lite | FP32 | 320 | 0.155 | 0.324 | 0.128 | 163.76 | 204.21 | -| YOLOv3-Nano | FP32 | 416 | 0.127 | 0.277 | 0.098 | 191.77 | 264.59 | -| YOLOv3-Nano | FP32 | 320 | 0.122 | 0.258 | 0.099 | 207.04 | 269.89 | -| YOLO-Fastest | FP32 | 416 | 0.092 | 0.213 | 0.062 | 174.26 | 221.05 | -| YOLO-Fastest | FP32 | 320 | 0.090 | 0.201 | 0.068 | 199.48 | 258.56 | -| YOLO-FastestXL | FP32 | 416 | 0.144 | 0.306 | 0.115 | 121.89 | 145.13 | -| YOLO-FastestXL | FP32 | 320 | 0.136 | 0.279 | 0.117 | 162.65 | 199.75 | -| YOLOv2 | FP32 | 608 | 0.286 | 0.534 | 0.274 | 23.92 | 25.47 | -| YOLOv2-Tiny | FP32 | 416 | 0.103 | 0.251 | 0.064 | 165.01 | 203.02 | - -| Darknet | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with display) | FPS
(without display) | -|:---------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:-----------------------:|:--------------------------:| -| YOLOv4x-MISH | FP32 | 640 | 0.495 | 0.682 | 0.538 | 5.3 | 5.5 | -| YOLOv4x-MISH | FP32 | 608 | 0.493 | 0.680 | 0.535 | 5.4 | 5.6 | -| YOLOv4-CSP | FP32 | 608 | 0.473 | 0.661 | 0.515 | 9.2 | 9.5 | -| YOLOv4-CSP | FP32 | 512 | 0.458 | 0.645 | 0.496 | 13.6 | 14.0 | -| YOLOv4 | FP32 | 608 | 0.513 | 0.748 | 0.574 | 7.3 | 7.5 | -| YOLOv4 | FP32 | 512 | 0.506 | 0.738 | 0.564 | 11.8 | 12.3 | -| YOLOv4 | FP32 | 416 | 0.479 | 0.709 | 0.527 | 15.4 | 15.8 | -| YOLOv4 | FP32 | 320 | 0.421 | 0.638 | 0.454 | 21.0 | 21.7 | -| YOLOv3-SPP | FP32 | 608 | 0.432 | 0.701 | 0.465 | 6.9 | 7.1 | -| YOLOv3 | FP32 | 608 | 0.391 | 0.672 | 0.412 | 7.0 | 7.3 | -| YOLOv3 | FP32 | 416 | 0.384 | 0.668 | 0.402 | 16.3 | 16.9 | -| YOLOv4-Tiny | FP32 | 416 | 0.203 | 0.388 | 0.189 | 68.0 | 112.5 | -| YOLOv3-Tiny-PRN | FP32 | 416 | 0.172 | 0.378 | 0.133 | 71.6 | 143.9 | -| YOLOv3-Tiny | FP32 | 416 | 0.171 | 0.367 | 0.137 | 71.5 | 117.9 | -| YOLOv3-Lite | FP32 | 416 | 0.169 | 0.349 | 0.144 | 53.8 | 63.4 | -| YOLOv3-Lite | FP32 | 320 | 0.159 | 0.326 | 0.139 | 55.2 | 97.5 | -| YOLOv3-Nano | FP32 | 416 | 0.129 | 0.275 | 0.102 | 58.0 | 113.1 | -| YOLOv3-Nano | FP32 | 320 | 0.124 | 0.259 | 0.106 | 61.6 | 156.8 | -| YOLO-Fastest | FP32 | 416 | 0.095 | 0.213 | 0.068 | 61.7 | 104.1 | -| YOLO-Fastest | FP32 | 320 | 0.093 | 0.202 | 0.074 | 65.8 | 143.3 | -| YOLO-FastestXL | FP32 | 416 | 0.148 | 0.308 | 0.125 | 62.0 | 75.9 | -| YOLO-FastestXL | FP32 | 320 | 0.141 | 0.284 | 0.125 | 63.9 | 112.3 | -| YOLOv2 | FP32 | 608 | 0.297 | 0.548 | 0.291 | 12.1 | 12.1 | -| YOLOv2-Tiny | FP32 | 416 | 0.105 | 0.255 | 0.068 | 34.5 | 40.7 | - -| PyTorch | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with output) | FPS
(without output) | -|:-------:|:---------:|:----------:|:------------:|:-------:|:--------:|:----------------------:|:-------------------------:| -| YOLOv5x | FP32 | 608 | 0.487 | 0.676 | 0.527 | 8.25 | 9.49 | -| YOLOv5l | FP32 | 608 | 0.471 | 0.662 | 0.512 | 12.67 | 15.77 | -| YOLOv5m | FP32 | 608 | 0.439 | 0.631 | 0.474 | 18.13 | 24.80 | -| YOLOv5s | FP32 | 608 | 0.369 | 0.567 | 0.395 | 28.03 | 49.52 | - -
- -
- -
NVIDIA Jetson Nano (4GB) - -``` -JetPack 4.4.1 -CUDA 10.2 -TensorRT 7.1.3 -cuDNN 8.0 -OpenCV 4.1.1 -``` - -| TensorRT | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with display) | FPS
(without display) | -|:---------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:-----------------------:|:--------------------------:| -| YOLOv4 | FP32 | 416 | 0.462 | 0.694 | 0.503 | 2.97 | 2.99 | -| YOLOv4 | FP16 | 416 | 0.462 | 0.694 | 0.504 | 4.89 | 4.96 | -| YOLOv4 | FP32 | 320 | 0.407 | 0.625 | 0.434 | | | -| YOLOv4 | FP16 | 320 | 0.408 | 0.625 | 0.435 | | | -| YOLOv3 | FP32 | 416 | 0.370 | 0.664 | 0.379 | | | -| YOLOv3 | FP16 | 416 | 0.370 | 0.664 | 0.378 | | | -| YOLOv4-Tiny | FP32 | 416 | 0.194 | 0.378 | 0.177 | 21.79 | 23.23 | -| YOLOv4-Tiny | FP16 | 416 | 0.194 | 0.378 | 0.177 | 24.76 | 26.18 | -| YOLOv3-Tiny-PRN | FP32 | 416 | 0.163 | 0.375 | 0.120 | 23.79 | 25.18 | -| YOLOv3-Tiny-PRN | FP16 | 416 | 0.163 | 0.375 | 0.119 | 26.08 | 27.96 | -| YOLOv3-Tiny | FP32 | 416 | 0.162 | 0.363 | 0.122 | 22.84 | 24.28 | -| YOLOv3-Tiny | FP16 | 416 | 0.162 | 0.363 | 0.122 | 25.47 | 27.18 | - -| Darknet | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with display) | FPS
(without display) | -|:---------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:-----------------------:|:--------------------------:| -| YOLOv4 | FP32 | 416 | | | | | | -| YOLOv4 | FP32 | 320 | | | | | | -| YOLOv3 | FP32 | 416 | | | | | | -| YOLOv4-Tiny | FP32 | 416 | | | | | | -| YOLOv3-Tiny-PRN | FP32 | 416 | | | | | | -| YOLOv3-Tiny | FP32 | 416 | | | | | | -| YOLOv2 | FP32 | 608 | | | | | | -| YOLOv2-Tiny | FP32 | 416 | | | | | | - -| PyTorch | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with output) | FPS
(without output) | -|:-------:|:---------:|:----------:|:------------:|:-------:|:--------:|:----------------------:|:-------------------------:| -| YOLOv5s | FP32 | 416 | | | | | | -| YOLOv5s | FP16 | 416 | | | | | | - -
- -
- -#### DeepStream settings - -* General -``` -width = 1920 -height = 1080 -maintain-aspect-ratio = 0 -batch-size = 1 -``` - -* Evaluate mAP -``` -valid = val2017 (COCO) -nms-iou-threshold = 0.6 -pre-cluster-threshold = 0.001 (CONF_THRESH) -``` - -* Evaluate FPS and Demo -``` -nms-iou-threshold = 0.45 (NMS; changed to beta_nms when available) -pre-cluster-threshold = 0.25 (CONF_THRESH) -``` - -## - -### Native TensorRT conversion - -Run command -``` -sudo chmod -R 777 /opt/nvidia/deepstream/deepstream-5.1/sources/ -``` - -Download [my native folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/native), rename to yolo and move to your deepstream/sources folder. - -Download cfg and weights files from your model and move to deepstream/sources/yolo folder. - -* [YOLOv4x-Mish](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights)] -* [YOLOv4-CSP](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights)] -* [YOLOv4](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights)] -* [YOLOv4-Tiny](https://github.com/AlexeyAB/darknet) [[cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg)] [[weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)] -* [YOLOv3-SPP](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-spp.cfg)] [[weights](https://pjreddie.com/media/files/yolov3-spp.weights)] -* [YOLOv3](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg)] [[weights](https://pjreddie.com/media/files/yolov3.weights)] -* [YOLOv3-Tiny-PRN](https://github.com/WongKinYiu/PartialResidualNetworks) [[cfg](https://raw.githubusercontent.com/WongKinYiu/PartialResidualNetworks/master/cfg/yolov3-tiny-prn.cfg)] [[weights](https://github.com/WongKinYiu/PartialResidualNetworks/raw/master/model/yolov3-tiny-prn.weights)] -* [YOLOv3-Tiny](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-tiny.cfg)] [[weights](https://pjreddie.com/media/files/yolov3-tiny.weights)] -* [YOLOv3-Lite](https://github.com/dog-qiuqiu/MobileNet-Yolo) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/MobileNet-Yolo/master/MobileNetV2-YOLOv3-Lite/COCO/MobileNetV2-YOLOv3-Lite-coco.cfg)] [[weights](https://github.com/dog-qiuqiu/MobileNet-Yolo/raw/master/MobileNetV2-YOLOv3-Lite/COCO/MobileNetV2-YOLOv3-Lite-coco.weights)] -* [YOLOv3-Nano](https://github.com/dog-qiuqiu/MobileNet-Yolo) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/MobileNet-Yolo/master/MobileNetV2-YOLOv3-Nano/COCO/MobileNetV2-YOLOv3-Nano-coco.cfg)] [[weights](https://github.com/dog-qiuqiu/MobileNet-Yolo/raw/master/MobileNetV2-YOLOv3-Nano/COCO/MobileNetV2-YOLOv3-Nano-coco.weights)] -* [YOLO-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/Yolo-Fastest/master/Yolo-Fastest/COCO/yolo-fastest.cfg)] [[weights](https://github.com/dog-qiuqiu/Yolo-Fastest/raw/master/Yolo-Fastest/COCO/yolo-fastest.weights)] -* [YOLO-Fastest-XL](https://github.com/dog-qiuqiu/Yolo-Fastest) [[cfg](https://raw.githubusercontent.com/dog-qiuqiu/Yolo-Fastest/master/Yolo-Fastest/COCO/yolo-fastest-xl.cfg)] [[weights](https://github.com/dog-qiuqiu/Yolo-Fastest/raw/master/Yolo-Fastest/COCO/yolo-fastest-xl.weights)] -* [YOLOv2](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2.cfg)] [[weights](https://pjreddie.com/media/files/yolov2.weights)] -* [YOLOv2-Tiny](https://github.com/pjreddie/darknet) [[cfg](https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-tiny.cfg)] [[weights](https://pjreddie.com/media/files/yolov2-tiny.weights)] - +Download cfg and weights files from your model and move to DeepStream-Yolo/native folder Compile * x86 platform ``` -cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo CUDA_VER=11.1 make -C nvdsinfer_custom_impl_Yolo ``` * Jetson platform ``` -cd /opt/nvidia/deepstream/deepstream-5.1/sources/yolo CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo ``` @@ -299,12 +122,131 @@ Note: config_infer_primary.txt uses cluster-mode=4 and NMS = 0.45 (via code) whe ## -### Request native TensorRT conversion for your YOLO-based model -To request moded files for native TensorRT conversion to use in DeepStream SDK, send me the model cfg and weights files via Issues tab. +### INT8 calibration -
+Install OpenCV +``` +sudo apt-get install libopencv-dev +``` -Note: If your model are listed in native tab, you can use [my native folder](https://github.com/marcoslucianops/DeepStream-Yolo/tree/master/native) to run your model in DeepStream. +Compile/recompile the nvdsinfer_custom_impl_Yolo lib with OpenCV support + +* x86 platform +``` +cd DeepStream-Yolo/native +CUDA_VER=11.1 OPENCV=1 make -C nvdsinfer_custom_impl_Yolo +``` + +* Jetson platform +``` +cd DeepStream-Yolo/native +CUDA_VER=10.2 OPENCV=1 make -C nvdsinfer_custom_impl_Yolo +``` + +For COCO dataset, download the [val2017](https://drive.google.com/file/d/1gbvfn7mcsGDRZ_luJwtITL-ru2kK99aK/view?usp=sharing), extract, and move to DeepStream-Yolo/native folder + +Select 1000 random images from COCO dataset to run calibration +``` +mkdir calibration +for jpg in $(ls -1 val2017/*.jpg | sort -R | head -1000); do \ + cp val2017/${jpg} calibration/; \ +done +``` + +Create the calibration.txt file with all selected images +``` +realpath calibration/*jpg > calibration.txt +``` + +Set environment variables +``` +export INT8_CALIB_IMG_PATH=calibration.txt +export INT8_CALIB_BATCH_SIZE=1 +``` + +Change config_infer_primary.txt file +``` +... +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table +... +network-mode=0 +... +``` +To +``` +... +model-engine-file=model_b1_gpu0_int8.engine +int8-calib-file=calib.table +... +network-mode=1 +... +``` + +Run +``` +deepstream-app -c deepstream_app_config.txt +``` + +Note: NVIDIA recommends at least 500 images to get a good accuracy. In this example I used 1000 images to get better accuracy (more images = more accuracy). Higher INT8_CALIB_BATCH_SIZE values will increase the accuracy and calibration speed. Set it according to you GPU memory. This process can take a long time. The calibration isn't available for YOLOv5 models. + +### + +### mAP/FPS comparison between models + +
Open + +``` +valid = val2017 (COCO) +NMS = 0.45 (changed to beta_nms when used in Darknet cfg file) / 0.6 (YOLOv5 models) +pre-cluster-threshold = 0.001 (mAP eval) / 0.25 (FPS measurement) +batch-size = 1 +FPS measurement display width = 1920 +FPS measurement display height = 1080 +NOTE: Used NVIDIA GTX 1050 (4GB Mobile) for evaluate. Used maintain-aspect-ratio=1 in config_infer file for YOLOv4 (with letter_box=1) and YOLOv5 models. For INT8 calibration, was used 1000 random images from val2017 (COCO) and INT8_CALIB_BATCH_SIZE=1. +``` + +| TensorRT | Precision | Resolution | IoU=0.5:0.95 | IoU=0.5 | IoU=0.75 | FPS
(with display) | FPS
(without display) | +|:---------------:|:---------:|:----------:|:------------:|:-------:|:--------:|:-----------------------:|:--------------------------:| +| YOLOv5x 5.0 | FP32 | 640 | 0. | 0. | 0. | . | . | +| YOLOv5l 5.0 | FP32 | 640 | 0. | 0. | 0. | . | . | +| YOLOv5m 5.0 | FP32 | 640 | 0. | 0. | 0. | . | . | +| YOLOv5s 5.0 | FP32 | 640 | 0. | 0. | 0. | . | . | +| YOLOv5s 5.0 | FP32 | 416 | 0. | 0. | 0. | . | . | +| YOLOv4x-MISH | FP32 | 640 | 0.461 | 0.649 | 0.499 | . | . | +| YOLOv4x-MISH | **INT8** | 640 | 0.443 | 0.629 | 0.479 | . | . | +| YOLOv4x-MISH | FP32 | 608 | 0.461 | 0.650 | 0.496 | . | . | +| YOLOv4-CSP | FP32 | 640 | 0.443 | 0.632 | 0.477 | . | . | +| YOLOv4-CSP | FP32 | 608 | 0.443 | 0.632 | 0.477 | . | . | +| YOLOv4-CSP | FP32 | 512 | 0.437 | 0.625 | 0.471 | . | . | +| YOLOv4-CSP | **INT8** | 512 | 0.414 | 0.601 | 0.447 | . | . | +| YOLOv4 | FP32 | 640 | 0.492 | 0.729 | 0.547 | . | . | +| YOLOv4 | FP32 | 608 | 0.499 | 0.739 | 0.551 | . | . | +| YOLOv4 | **INT8** | 608 | 0.483 | 0.728 | 0.534 | . | . | +| YOLOv4 | FP32 | 512 | 0.492 | 0.730 | 0.542 | . | . | +| YOLOv4 | FP32 | 416 | 0.468 | 0.702 | 0.507 | . | . | +| YOLOv3-SPP | FP32 | 608 | 0.412 | 0.687 | 0.434 | . | . | +| YOLOv3 | FP32 | 608 | 0.378 | 0.674 | 0.389 | . | . | +| YOLOv3 | **INT8** | 608 | 0.381 | 0.677 | 0.388 | . | . | +| YOLOv3 | FP32 | 416 | 0.373 | 0.669 | 0.379 | . | . | +| YOLOv2 | FP32 | 608 | 0.211 | 0.365 | 0.220 | . | . | +| YOLOv2 | FP32 | 416 | 0.207 | 0.362 | 0.211 | . | . | +| YOLOv4-Tiny | FP32 | 416 | 0.216 | 0.403 | 0.207 | . | . | +| YOLOv4-Tiny | **INT8** | 416 | 0.203 | 0.385 | 0.192 | . | . | +| YOLOv3-Tiny-PRN | FP32 | 416 | 0.168 | 0.381 | 0.126 | . | . | +| YOLOv3-Tiny-PRN | **INT8** | 416 | 0.155 | 0.358 | 0.113 | . | . | +| YOLOv3-Tiny | FP32 | 416 | 0.096 | 0.203 | 0.080 | . | . | +| YOLOv2-Tiny | FP32 | 416 | 0.084 | 0.194 | 0.062 | . | . | +| YOLOv3-Lite | FP32 | 416 | 0.169 | 0.356 | 0.137 | . | . | +| YOLOv3-Lite | FP32 | 320 | 0.158 | 0.328 | 0.132 | . | . | +| YOLOv3-Nano | FP32 | 416 | 0.128 | 0.278 | 0.099 | . | . | +| YOLOv3-Nano | FP32 | 320 | 0.122 | 0.260 | 0.099 | . | . | +| YOLO-Fastest-XL | FP32 | 416 | 0.160 | 0.342 | 0.130 | . | . | +| YOLO-Fastest-XL | FP32 | 320 | 0.158 | 0.329 | 0.135 | . | . | +| YOLO-Fastest | FP32 | 416 | 0.101 | 0.230 | 0.072 | . | . | +| YOLO-Fastest | FP32 | 320 | 0.102 | 0.232 | 0.073 | . | . | + +
##