YOLOv5 files updated

* Added supported version information
* Not needed to use libmyplugins.so anymore
This commit is contained in:
Marcos Luciano
2021-01-10 11:08:20 -03:00
parent 5b45057821
commit 470ed82658
6 changed files with 539 additions and 7 deletions

View File

@@ -13,7 +13,6 @@ cluster-mode=4
maintain-aspect-ratio=0
parse-bbox-func-name=NvDsInferParseCustomYoloV5
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
engine-create-func-name=NvDsInferYoloCudaEngineGet
[class-attrs-all]
pre-cluster-threshold=0.25

View File

@@ -28,7 +28,8 @@ LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib6
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
INCS:= $(wildcard *.h)
SRCFILES:= nvdsparsebbox_Yolo.cpp
SRCFILES:= nvdsparsebbox_Yolo.cpp \
yololayer.cu
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
@@ -48,3 +49,4 @@ $(TARGET_LIB) : $(TARGET_OBJS)
clean:
rm -rf $(TARGET_LIB)
rm -rf $(TARGET_OBJS)

View File

@@ -0,0 +1,94 @@
#ifndef __TRT_UTILS_H_
#define __TRT_UTILS_H_
#include <iostream>
#include <vector>
#include <algorithm>
#include <cudnn.h>
#ifndef CUDA_CHECK
#define CUDA_CHECK(callstr) \
{ \
cudaError_t error_code = callstr; \
if (error_code != cudaSuccess) { \
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
assert(0); \
} \
}
#endif
namespace Tn
{
class Profiler : public nvinfer1::IProfiler
{
public:
void printLayerTimes(int itrationsTimes)
{
float totalTime = 0;
for (size_t i = 0; i < mProfile.size(); i++)
{
printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
totalTime += mProfile[i].second;
}
printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
}
private:
typedef std::pair<std::string, float> Record;
std::vector<Record> mProfile;
virtual void reportLayerTime(const char* layerName, float ms)
{
auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
if (record == mProfile.end())
mProfile.push_back(std::make_pair(layerName, ms));
else
record->second += ms;
}
};
//Logger for TensorRT info/warning/errors
class Logger : public nvinfer1::ILogger
{
public:
Logger(): Logger(Severity::kWARNING) {}
Logger(Severity severity): reportableSeverity(severity) {}
void log(Severity severity, const char* msg) override
{
// suppress messages with severity enum value greater than the reportable
if (severity > reportableSeverity) return;
switch (severity)
{
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
case Severity::kERROR: std::cerr << "ERROR: "; break;
case Severity::kWARNING: std::cerr << "WARNING: "; break;
case Severity::kINFO: std::cerr << "INFO: "; break;
default: std::cerr << "UNKNOWN: "; break;
}
std::cerr << msg << std::endl;
}
Severity reportableSeverity{Severity::kWARNING};
};
template<typename T>
void write(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
template<typename T>
void read(const char*& buffer, T& val)
{
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
}
#endif

View File

@@ -0,0 +1,270 @@
#include <assert.h>
#include "yololayer.h"
#include "utils.h"
using namespace Yolo;
namespace nvinfer1
{
YoloLayerPlugin::YoloLayerPlugin()
{
mClassCount = CLASS_NUM;
mYoloKernel.clear();
mYoloKernel.push_back(yolo1);
mYoloKernel.push_back(yolo2);
mYoloKernel.push_back(yolo3);
mKernelCount = mYoloKernel.size();
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
size_t AnchorLen = sizeof(float)* CHECK_COUNT*2;
for(int ii = 0; ii < mKernelCount; ii ++)
{
CUDA_CHECK(cudaMalloc(&mAnchor[ii],AnchorLen));
const auto& yolo = mYoloKernel[ii];
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
}
}
YoloLayerPlugin::~YoloLayerPlugin()
{
}
// create the plugin at runtime from a byte stream
YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
{
using namespace Tn;
const char *d = reinterpret_cast<const char *>(data), *a = d;
read(d, mClassCount);
read(d, mThreadCount);
read(d, mKernelCount);
mYoloKernel.resize(mKernelCount);
auto kernelSize = mKernelCount*sizeof(YoloKernel);
memcpy(mYoloKernel.data(),d,kernelSize);
d += kernelSize;
CUDA_CHECK(cudaMallocHost(&mAnchor, mKernelCount * sizeof(void*)));
size_t AnchorLen = sizeof(float)* CHECK_COUNT*2;
for(int ii = 0; ii < mKernelCount; ii ++)
{
CUDA_CHECK(cudaMalloc(&mAnchor[ii],AnchorLen));
const auto& yolo = mYoloKernel[ii];
CUDA_CHECK(cudaMemcpy(mAnchor[ii], yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
}
assert(d == a + length);
}
void YoloLayerPlugin::serialize(void* buffer) const
{
using namespace Tn;
char* d = static_cast<char*>(buffer), *a = d;
write(d, mClassCount);
write(d, mThreadCount);
write(d, mKernelCount);
auto kernelSize = mKernelCount*sizeof(YoloKernel);
memcpy(d,mYoloKernel.data(),kernelSize);
d += kernelSize;
assert(d == a + getSerializationSize());
}
size_t YoloLayerPlugin::getSerializationSize() const
{
return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size();
}
int YoloLayerPlugin::initialize()
{
return 0;
}
Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
{
//output the result to channel
int totalsize = MAX_OUTPUT_BBOX_COUNT * sizeof(Detection) / sizeof(float);
return Dims3(totalsize + 1, 1, 1);
}
// Set plugin namespace
void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
{
mPluginNamespace = pluginNamespace;
}
const char* YoloLayerPlugin::getPluginNamespace() const
{
return mPluginNamespace;
}
// Return the DataType of the plugin output at the requested index
DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
{
return DataType::kFLOAT;
}
// Return true if output tensor is broadcast across a batch.
bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
{
return false;
}
// Return true if plugin can use input that is broadcast across batch without replication.
bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
{
return false;
}
void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
{
}
// Attach the plugin object to an execution context and grant the plugin the access to some context resource.
void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
{
}
// Detach the plugin object from its execution context.
void YoloLayerPlugin::detachFromContext() {}
const char* YoloLayerPlugin::getPluginType() const
{
return "YoloLayer_TRT";
}
const char* YoloLayerPlugin::getPluginVersion() const
{
return "1";
}
void YoloLayerPlugin::destroy()
{
delete this;
}
// Clone the plugin
IPluginV2IOExt* YoloLayerPlugin::clone() const
{
YoloLayerPlugin *p = new YoloLayerPlugin();
p->setPluginNamespace(mPluginNamespace);
return p;
}
__device__ float Logist(float data){ return 1.0f / (1.0f + expf(-data)); };
__global__ void CalDetection(const float *input, float *output,int noElements,
int yoloWidth,int yoloHeight,const float anchors[CHECK_COUNT*2],int classes,int outputElem) {
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx >= noElements) return;
int total_grid = yoloWidth * yoloHeight;
int bnIdx = idx / total_grid;
idx = idx - total_grid*bnIdx;
int info_len_i = 5 + classes;
const float* curInput = input + bnIdx * (info_len_i * total_grid * CHECK_COUNT);
for (int k = 0; k < 3; ++k) {
float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
if (box_prob < IGNORE_THRESH) continue;
int class_id = 0;
float max_cls_prob = 0.0;
for (int i = 5; i < info_len_i; ++i) {
float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
if (p > max_cls_prob) {
max_cls_prob = p;
class_id = i - 5;
}
}
float *res_count = output + bnIdx*outputElem;
int count = (int)atomicAdd(res_count, 1);
if (count >= MAX_OUTPUT_BBOX_COUNT) return;
char* data = (char *)res_count + sizeof(float) + count * sizeof(Detection);
Detection* det = (Detection*)(data);
int row = idx / yoloWidth;
int col = idx % yoloWidth;
//Location
det->bbox[0] = (col - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid])) * INPUT_W / yoloWidth;
det->bbox[1] = (row - 0.5f + 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid])) * INPUT_H / yoloHeight;
det->bbox[2] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]);
det->bbox[2] = det->bbox[2] * det->bbox[2] * anchors[2*k];
det->bbox[3] = 2.0f * Logist(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]);
det->bbox[3] = det->bbox[3] * det->bbox[3] * anchors[2*k + 1];
det->conf = box_prob * max_cls_prob;
det->class_id = class_id;
}
}
void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) {
int outputElem = 1 + MAX_OUTPUT_BBOX_COUNT * sizeof(Detection) / sizeof(float);
for(int idx = 0 ; idx < batchSize; ++idx) {
CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float)));
}
int numElem = 0;
for (unsigned int i = 0; i < mYoloKernel.size(); ++i)
{
const auto& yolo = mYoloKernel[i];
numElem = yolo.width*yolo.height*batchSize;
if (numElem < mThreadCount)
mThreadCount = numElem;
CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
(inputs[i], output, numElem, yolo.width, yolo.height, (float *)mAnchor[i], mClassCount, outputElem);
}
}
int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
{
forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
return 0;
}
PluginFieldCollection YoloPluginCreator::mFC{};
std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
YoloPluginCreator::YoloPluginCreator()
{
mPluginAttributes.clear();
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
const char* YoloPluginCreator::getPluginName() const
{
return "YoloLayer_TRT";
}
const char* YoloPluginCreator::getPluginVersion() const
{
return "1";
}
const PluginFieldCollection* YoloPluginCreator::getFieldNames()
{
return &mFC;
}
IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
{
YoloLayerPlugin* obj = new YoloLayerPlugin();
obj->setPluginNamespace(mNamespace.c_str());
return obj;
}
IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
{
// This object will be deleted when the network is destroyed, which will
// call MishPlugin::destroy()
YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
obj->setPluginNamespace(mNamespace.c_str());
return obj;
}
}

View File

@@ -0,0 +1,152 @@
#ifndef _YOLO_LAYER_H
#define _YOLO_LAYER_H
#include <vector>
#include <string>
#include "NvInfer.h"
namespace Yolo
{
static constexpr int CHECK_COUNT = 3;
static constexpr float IGNORE_THRESH = 0.1f;
static constexpr int MAX_OUTPUT_BBOX_COUNT = 1000;
static constexpr int CLASS_NUM = 80;
static constexpr int INPUT_H = 608;
static constexpr int INPUT_W = 608;
struct YoloKernel
{
int width;
int height;
float anchors[CHECK_COUNT*2];
};
static constexpr YoloKernel yolo1 = {
INPUT_W / 32,
INPUT_H / 32,
{116,90, 156,198, 373,326}
};
static constexpr YoloKernel yolo2 = {
INPUT_W / 16,
INPUT_H / 16,
{30,61, 62,45, 59,119}
};
static constexpr YoloKernel yolo3 = {
INPUT_W / 8,
INPUT_H / 8,
{10,13, 16,30, 33,23}
};
static constexpr int LOCATIONS = 4;
struct alignas(float) Detection{
//center_x center_y w h
float bbox[LOCATIONS];
float conf; // bbox_conf * cls_conf
float class_id;
};
}
namespace nvinfer1
{
class YoloLayerPlugin: public IPluginV2IOExt
{
public:
explicit YoloLayerPlugin();
YoloLayerPlugin(const void* data, size_t length);
~YoloLayerPlugin();
int getNbOutputs() const override
{
return 1;
}
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
int initialize() override;
virtual void terminate() override {};
virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
virtual size_t getSerializationSize() const override;
virtual void serialize(void* buffer) const override;
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
}
const char* getPluginType() const override;
const char* getPluginVersion() const override;
void destroy() override;
IPluginV2IOExt* clone() const override;
void setPluginNamespace(const char* pluginNamespace) override;
const char* getPluginNamespace() const override;
DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
bool canBroadcastInputAcrossBatch(int inputIndex) const override;
void attachToContext(
cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
void detachFromContext() override;
private:
void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
int mClassCount;
int mKernelCount;
std::vector<Yolo::YoloKernel> mYoloKernel;
int mThreadCount = 256;
void** mAnchor;
const char* mPluginNamespace;
};
class YoloPluginCreator : public IPluginCreator
{
public:
YoloPluginCreator();
~YoloPluginCreator() override = default;
const char* getPluginName() const override;
const char* getPluginVersion() const override;
const PluginFieldCollection* getFieldNames() override;
IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
void setPluginNamespace(const char* libNamespace) override
{
mNamespace = libNamespace;
}
const char* getPluginNamespace() const override
{
return mNamespace.c_str();
}
private:
std::string mNamespace;
static PluginFieldCollection mFC;
static std::vector<PluginField> mPluginAttributes;
};
REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
};
#endif