From b62b20d5172566f65b428390fb8381b163a3d76c Mon Sep 17 00:00:00 2001 From: DennisJ <106725464+DennisJcy@users.noreply.github.com> Date: Sun, 10 Dec 2023 23:41:24 +0800 Subject: [PATCH] Add C++ Classify inference example (#6868) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher --- examples/YOLOv8-ONNXRuntime-CPP/README.md | 38 +- examples/YOLOv8-ONNXRuntime-CPP/inference.cpp | 355 ++++++++++-------- examples/YOLOv8-ONNXRuntime-CPP/inference.h | 64 ++-- examples/YOLOv8-ONNXRuntime-CPP/main.cpp | 146 +++++-- 4 files changed, 380 insertions(+), 223 deletions(-) diff --git a/examples/YOLOv8-ONNXRuntime-CPP/README.md b/examples/YOLOv8-ONNXRuntime-CPP/README.md index f70127ff..032cf4a7 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/README.md +++ b/examples/YOLOv8-ONNXRuntime-CPP/README.md @@ -13,6 +13,10 @@ This example demonstrates how to perform inference using YOLOv8 in C++ with ONNX - Faster than OpenCV's DNN inference on both CPU and GPU. - Supports FP32 and FP16 CUDA acceleration. +## Note :coffee: + +1.~~This repository should also work for YOLOv5, which needs a permute operator for the output of the YOLOv5 model, but this has not been implemented yet.~~ Benefit for ultralytics's latest release,a `Transpose` op is added to the Yolov8 model,while make v8 and v5 has the same output shape.Therefore,you can inference your yolov5/v7/v8 via this project. + ## Exporting YOLOv8 Models 📦 To export YOLOv8 models, use the following Python script: @@ -33,6 +37,17 @@ Alternatively, you can use the following command for exporting the model in the yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640 ``` +## Exporting YOLOv8 FP16 Models 📦 + +```python +import onnx +from onnxconverter_common import float16 + +model = onnx.load(R'YOUR_ONNX_PATH') +model_fp16 = float16.convert_float_to_float16(model) +onnx.save(model_fp16, R'YOUR_FP16_ONNX_PATH') +``` + ## Download COCO.yaml file 📂 In order to run example, you also need to download coco.yaml. You can download the file manually from [here](https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml) @@ -79,16 +94,15 @@ make ## Usage 🚀 ```c++ -// CPU inference -DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, 0.1, 0.5, false}; -// GPU inference -DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, 0.1, 0.5, true}; -// Load your image -cv::Mat img = cv::imread(img_path); -// Init Inference Session -char* ret = yoloDetector->CreateSession(params); - -ret = yoloDetector->RunSession(img, res); +//change your param as you like +//Pay attention to your device and the onnx model type(fp32 or fp16) +DL_INIT_PARAM params; +params.rectConfidenceThreshold = 0.1; +params.iouThreshold = 0.5; +params.modelPath = "yolov8n.onnx"; +params.imgSize = { 640, 640 }; +params.cudaEnable = true; +params.modelType = YOLO_DETECT_V8; +yoloDetector->CreateSession(params); +Detector(yoloDetector); ``` - -This repository should also work for YOLOv5, which needs a permute operator for the output of the YOLOv5 model, but this has not been implemented yet. diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp index a2de7727..3d0427f3 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp +++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp @@ -2,13 +2,13 @@ #include #define benchmark - -DCSP_CORE::DCSP_CORE() { +#define min(a,b) (((a) < (b)) ? (a) : (b)) +YOLO_V8::YOLO_V8() { } -DCSP_CORE::~DCSP_CORE() { +YOLO_V8::~YOLO_V8() { delete session; } @@ -22,16 +22,19 @@ namespace Ort template -char *BlobFromImage(cv::Mat &iImg, T &iBlob) { +char* BlobFromImage(cv::Mat& iImg, T& iBlob) { int channels = iImg.channels(); int imgHeight = iImg.rows; int imgWidth = iImg.cols; - for (int c = 0; c < channels; c++) { - for (int h = 0; h < imgHeight; h++) { - for (int w = 0; w < imgWidth; w++) { + for (int c = 0; c < channels; c++) + { + for (int h = 0; h < imgHeight; h++) + { + for (int w = 0; w < imgWidth; w++) + { iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( - (iImg.at(h, w)[c]) / 255.0f); + (iImg.at(h, w)[c]) / 255.0f); } } } @@ -39,119 +42,148 @@ char *BlobFromImage(cv::Mat &iImg, T &iBlob) { } -char* DL_CORE::PreProcess(cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) +char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) { - if (iImg.channels() == 3) - { - oImg = iImg.clone(); - cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); - } - else - { - cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); - } + if (iImg.channels() == 3) + { + oImg = iImg.clone(); + cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); + } + else + { + cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); + } - if (iImg.cols >= iImg.rows) - { - resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); - } - else - { - resizeScales = iImg.rows / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); - } - cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); - oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); - oImg = tempImg; - return RET_OK; + switch (modelType) + { + case YOLO_DETECT_V8: + case YOLO_POSE: + case YOLO_DETECT_V8_HALF: + case YOLO_POSE_V8_HALF://LetterBox + { + if (iImg.cols >= iImg.rows) + { + resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + } + else + { + resizeScales = iImg.rows / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + } + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); + oImg = tempImg; + break; + } + case YOLO_CLS://CenterCrop + { + int h = iImg.rows; + int w = iImg.cols; + int m = min(h, w); + int top = (h - m) / 2; + int left = (w - m) / 2; + cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1))); + break; + } + } + return RET_OK; } -char *DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams) { - char *Ret = RET_OK; +char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) { + char* Ret = RET_OK; std::regex pattern("[\u4e00-\u9fa5]"); - bool result = std::regex_search(iParams.ModelPath, pattern); - if (result) { - Ret = "[DCSP_ONNX]:Model path error.Change your model path without chinese characters."; + bool result = std::regex_search(iParams.modelPath, pattern); + if (result) + { + Ret = "[YOLO_V8]:Your model path is error.Change your model path without chinese characters."; std::cout << Ret << std::endl; return Ret; } - try { - rectConfidenceThreshold = iParams.RectConfidenceThreshold; + try + { + rectConfidenceThreshold = iParams.rectConfidenceThreshold; iouThreshold = iParams.iouThreshold; imgSize = iParams.imgSize; - modelType = iParams.ModelType; + modelType = iParams.modelType; env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo"); Ort::SessionOptions sessionOption; - if (iParams.CudaEnable) { - cudaEnable = iParams.CudaEnable; + if (iParams.cudaEnable) + { + cudaEnable = iParams.cudaEnable; OrtCUDAProviderOptions cudaOption; cudaOption.device_id = 0; sessionOption.AppendExecutionProvider_CUDA(cudaOption); } sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); - sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads); - sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel); + sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); + sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); #ifdef _WIN32 - int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast(iParams.ModelPath.length()), nullptr, 0); + int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), nullptr, 0); wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1]; - MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast(iParams.ModelPath.length()), wide_cstr, ModelPathSize); + MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), wide_cstr, ModelPathSize); wide_cstr[ModelPathSize] = L'\0'; const wchar_t* modelPath = wide_cstr; #else - const char *modelPath = iParams.ModelPath.c_str(); + const char* modelPath = iParams.ModelPath.c_str(); #endif // _WIN32 session = new Ort::Session(env, modelPath, sessionOption); Ort::AllocatorWithDefaultOptions allocator; size_t inputNodesNum = session->GetInputCount(); - for (size_t i = 0; i < inputNodesNum; i++) { + for (size_t i = 0; i < inputNodesNum; i++) + { Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); - char *temp_buf = new char[50]; + char* temp_buf = new char[50]; strcpy(temp_buf, input_node_name.get()); inputNodeNames.push_back(temp_buf); } size_t OutputNodesNum = session->GetOutputCount(); - for (size_t i = 0; i < OutputNodesNum; i++) { + for (size_t i = 0; i < OutputNodesNum; i++) + { Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char *temp_buf = new char[10]; + char* temp_buf = new char[10]; strcpy(temp_buf, output_node_name.get()); outputNodeNames.push_back(temp_buf); } - options = Ort::RunOptions{nullptr}; + options = Ort::RunOptions{ nullptr }; WarmUpSession(); return RET_OK; } - catch (const std::exception &e) { - const char *str1 = "[DCSP_ONNX]:"; - const char *str2 = e.what(); + catch (const std::exception& e) + { + const char* str1 = "[YOLO_V8]:"; + const char* str2 = e.what(); std::string result = std::string(str1) + std::string(str2); - char *merged = new char[result.length() + 1]; + char* merged = new char[result.length() + 1]; std::strcpy(merged, result.c_str()); std::cout << merged << std::endl; delete[] merged; - return "[DCSP_ONNX]:Create session failed."; + return "[YOLO_V8]:Create session failed."; } } -char *DCSP_CORE::RunSession(cv::Mat &iImg, std::vector &oResult) { +char* YOLO_V8::RunSession(cv::Mat& iImg, std::vector& oResult) { #ifdef benchmark clock_t starttime_1 = clock(); #endif // benchmark - char *Ret = RET_OK; + char* Ret = RET_OK; cv::Mat processedImg; PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) { - float *blob = new float[processedImg.total() * 3]; + if (modelType < 4) + { + float* blob = new float[processedImg.total() * 3]; BlobFromImage(processedImg, blob); - std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + std::vector inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) }; TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult); - } else { + } + else + { #ifdef USE_CUDA half* blob = new half[processedImg.total() * 3]; BlobFromImage(processedImg, blob); @@ -165,16 +197,16 @@ char *DCSP_CORE::RunSession(cv::Mat &iImg, std::vector &oResult) { template -char *DCSP_CORE::TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - std::vector &oResult) { +char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector& inputNodeDims, + std::vector& oResult) { Ort::Value inputTensor = Ort::Value::CreateTensor::type>( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - inputNodeDims.data(), inputNodeDims.size()); + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), + inputNodeDims.data(), inputNodeDims.size()); #ifdef benchmark clock_t starttime_2 = clock(); #endif // benchmark auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + outputNodeNames.size()); #ifdef benchmark clock_t starttime_3 = clock(); #endif // benchmark @@ -184,109 +216,134 @@ char *DCSP_CORE::TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std std::vector outputNodeDims = tensor_info.GetShape(); auto output = outputTensor.front().GetTensorMutableData::type>(); delete blob; - switch (modelType) { - case 1://V8_ORIGIN_FP32 - case 4://V8_ORIGIN_FP16 + switch (modelType) + { + case YOLO_DETECT_V8: + case YOLO_DETECT_V8_HALF: + { + int strideNum = outputNodeDims[1];//8400 + int signalResultNum = outputNodeDims[2];//84 + std::vector class_ids; + std::vector confidences; + std::vector boxes; + cv::Mat rawData; + if (modelType == YOLO_DETECT_V8) { - int strideNum = outputNodeDims[2]; - int signalResultNum = outputNodeDims[1]; - std::vector class_ids; - std::vector confidences; - std::vector boxes; + // FP32 + rawData = cv::Mat(strideNum, signalResultNum, CV_32F, output); + } + else + { + // FP16 + rawData = cv::Mat(strideNum, signalResultNum, CV_16F, output); + rawData.convertTo(rawData, CV_32F); + } + //Note: + //ultralytics add transpose operator to the output of yolov8 model.which make yolov8/v5/v7 has same shape + //https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt + //rowData = rowData.t(); - cv::Mat rawData; - if (modelType == 1) { - // FP32 - rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output); - } else { - // FP16 - rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output); - rawData.convertTo(rawData, CV_32F); + float* data = (float*)rawData.data; + + for (int i = 0; i < strideNum; ++i) + { + float* classesScores = data + 4; + cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores); + cv::Point class_id; + double maxClassScore; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + if (maxClassScore > rectConfidenceThreshold) + { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * resizeScales); + int top = int((y - 0.5 * h) * resizeScales); + + int width = int(w * resizeScales); + int height = int(h * resizeScales); + + boxes.push_back(cv::Rect(left, top, width, height)); } - rawData = rawData.t(); - float *data = (float *) rawData.data; - - for (int i = 0; i < strideNum; ++i) { - float *classesScores = data + 4; - cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores); - cv::Point class_id; - double maxClassScore; - cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); - if (maxClassScore > rectConfidenceThreshold) { - confidences.push_back(maxClassScore); - class_ids.push_back(class_id.x); - - float x = data[0]; - float y = data[1]; - float w = data[2]; - float h = data[3]; - - int left = int((x - 0.5 * w) * resizeScales); - int top = int((y - 0.5 * h) * resizeScales); - - int width = int(w * resizeScales); - int height = int(h * resizeScales); - - boxes.emplace_back(left, top, width, height); - } - data += signalResultNum; - } - - std::vector nmsResult; - cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult); - - for (int i = 0; i < nmsResult.size(); ++i) { - int idx = nmsResult[i]; - DCSP_RESULT result; - result.classId = class_ids[idx]; - result.confidence = confidences[idx]; - result.box = boxes[idx]; - oResult.push_back(result); - } - + data += signalResultNum; + } + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult); + for (int i = 0; i < nmsResult.size(); ++i) + { + int idx = nmsResult[i]; + DL_RESULT result; + result.classId = class_ids[idx]; + result.confidence = confidences[idx]; + result.box = boxes[idx]; + oResult.push_back(result); + } #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double) (starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double) (starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double) (starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { - std::cout << "[DCSP_ONNX(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time - << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } else { - std::cout << "[DCSP_ONNX(CPU)]: " << pre_process_time << "ms pre-process, " << process_time - << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } + clock_t starttime_4 = clock(); + double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) + { + std::cout << "[YOLO_V8(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } + else + { + std::cout << "[YOLO_V8(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } #endif // benchmark - break; + break; + } + case YOLO_CLS: + { + DL_RESULT result; + for (int i = 0; i < this->classes.size(); i++) + { + result.classId = i; + result.confidence = output[i]; + oResult.push_back(result); } + break; + } + default: + std::cout << "[YOLO_V8]: " << "Not support model type." << std::endl; } return RET_OK; + } -char *DCSP_CORE::WarmUpSession() { +char* YOLO_V8::WarmUpSession() { clock_t starttime_1 = clock(); cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); cv::Mat processedImg; PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) { - float *blob = new float[iImg.total() * 3]; + if (modelType < 4) + { + float* blob = new float[iImg.total() * 3]; BlobFromImage(processedImg, blob); - std::vector YOLO_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; + std::vector YOLO_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) }; Ort::Value input_tensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - YOLO_input_node_dims.data(), YOLO_input_node_dims.size()); + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), + YOLO_input_node_dims.data(), YOLO_input_node_dims.size()); auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); - double post_process_time = (double) (starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { - std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; + double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) + { + std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } - } else { + } + else + { #ifdef USE_CUDA half* blob = new half[iImg.total() * 3]; BlobFromImage(processedImg, blob); @@ -298,7 +355,7 @@ char *DCSP_CORE::WarmUpSession() { double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; if (cudaEnable) { - std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; + std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } #endif } diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.h b/examples/YOLOv8-ONNXRuntime-CPP/inference.h index bd85e783..3174ae93 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/inference.h +++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.h @@ -19,53 +19,59 @@ #endif -enum MODEL_TYPE { +enum MODEL_TYPE +{ //FLOAT32 MODEL - YOLO_ORIGIN_V5 = 0, - YOLO_ORIGIN_V8 = 1,//only support v8 detector currently - YOLO_POSE_V8 = 2, - YOLO_CLS_V8 = 3, - YOLO_ORIGIN_V8_HALF = 4, + YOLO_DETECT_V8 = 1, + YOLO_POSE = 2, + YOLO_CLS = 3, + + //FLOAT16 MODEL + YOLO_DETECT_V8_HALF = 4, YOLO_POSE_V8_HALF = 5, - YOLO_CLS_V8_HALF = 6 }; -typedef struct _DCSP_INIT_PARAM { - std::string ModelPath; - MODEL_TYPE ModelType = YOLO_ORIGIN_V8; - std::vector imgSize = {640, 640}; - float RectConfidenceThreshold = 0.6; +typedef struct _DL_INIT_PARAM +{ + std::string modelPath; + MODEL_TYPE modelType = YOLO_DETECT_V8; + std::vector imgSize = { 640, 640 }; + float rectConfidenceThreshold = 0.6; float iouThreshold = 0.5; - bool CudaEnable = false; - int LogSeverityLevel = 3; - int IntraOpNumThreads = 1; -} DCSP_INIT_PARAM; + int keyPointsNum = 2;//Note:kpt number for pose + bool cudaEnable = false; + int logSeverityLevel = 3; + int intraOpNumThreads = 1; +} DL_INIT_PARAM; -typedef struct _DCSP_RESULT { +typedef struct _DL_RESULT +{ int classId; float confidence; cv::Rect box; -} DCSP_RESULT; + std::vector keyPoints; +} DL_RESULT; -class DCSP_CORE { +class YOLO_V8 +{ public: - DCSP_CORE(); + YOLO_V8(); - ~DCSP_CORE(); + ~YOLO_V8(); public: - char *CreateSession(DCSP_INIT_PARAM &iParams); + char* CreateSession(DL_INIT_PARAM& iParams); - char *RunSession(cv::Mat &iImg, std::vector &oResult); + char* RunSession(cv::Mat& iImg, std::vector& oResult); - char *WarmUpSession(); + char* WarmUpSession(); template - char *TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - std::vector &oResult); + char* TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector& inputNodeDims, + std::vector& oResult); char* PreProcess(cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg); @@ -73,11 +79,11 @@ public: private: Ort::Env env; - Ort::Session *session; + Ort::Session* session; bool cudaEnable; Ort::RunOptions options; - std::vector inputNodeNames; - std::vector outputNodeNames; + std::vector inputNodeNames; + std::vector outputNodeNames; MODEL_TYPE modelType; std::vector imgSize; diff --git a/examples/YOLOv8-ONNXRuntime-CPP/main.cpp b/examples/YOLOv8-ONNXRuntime-CPP/main.cpp index 00abec8a..6e4ef1dd 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/main.cpp +++ b/examples/YOLOv8-ONNXRuntime-CPP/main.cpp @@ -3,18 +3,22 @@ #include "inference.h" #include #include +#include -void file_iterator(DCSP_CORE *&p) { +void Detector(YOLO_V8*& p) { std::filesystem::path current_path = std::filesystem::current_path(); std::filesystem::path imgs_path = current_path / "images"; - for (auto &i: std::filesystem::directory_iterator(imgs_path)) { - if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { + for (auto& i : std::filesystem::directory_iterator(imgs_path)) + { + if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") + { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); - std::vector res; + std::vector res; p->RunSession(img, res); - for (auto &re: res) { + for (auto& re : res) + { cv::RNG rng(cv::getTickCount()); cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256)); @@ -23,24 +27,24 @@ void file_iterator(DCSP_CORE *&p) { float confidence = floor(100 * re.confidence) / 100; std::cout << std::fixed << std::setprecision(2); std::string label = p->classes[re.classId] + " " + - std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4); + std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4); cv::rectangle( - img, - cv::Point(re.box.x, re.box.y - 25), - cv::Point(re.box.x + label.length() * 15, re.box.y), - color, - cv::FILLED + img, + cv::Point(re.box.x, re.box.y - 25), + cv::Point(re.box.x + label.length() * 15, re.box.y), + color, + cv::FILLED ); cv::putText( - img, - label, - cv::Point(re.box.x, re.box.y - 5), - cv::FONT_HERSHEY_SIMPLEX, - 0.75, - cv::Scalar(0, 0, 0), - 2 + img, + label, + cv::Point(re.box.x, re.box.y - 5), + cv::FONT_HERSHEY_SIMPLEX, + 0.75, + cv::Scalar(0, 0, 0), + 2 ); @@ -53,10 +57,51 @@ void file_iterator(DCSP_CORE *&p) { } } -int read_coco_yaml(DCSP_CORE *&p) { + +void Classifier(YOLO_V8*& p) +{ + std::filesystem::path current_path = std::filesystem::current_path(); + std::filesystem::path imgs_path = current_path;// / "images" + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, 255); + for (auto& i : std::filesystem::directory_iterator(imgs_path)) + { + if (i.path().extension() == ".jpg" || i.path().extension() == ".png") + { + std::string img_path = i.path().string(); + //std::cout << img_path << std::endl; + cv::Mat img = cv::imread(img_path); + std::vector res; + char* ret = p->RunSession(img, res); + + float positionY = 50; + for (int i = 0; i < res.size(); i++) + { + int r = dis(gen); + int g = dis(gen); + int b = dis(gen); + cv::putText(img, std::to_string(i) + ":", cv::Point(10, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2); + cv::putText(img, std::to_string(res.at(i).confidence), cv::Point(70, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2); + positionY += 50; + } + + cv::imshow("TEST_CLS", img); + cv::waitKey(0); + cv::destroyAllWindows(); + //cv::imwrite("E:\\output\\" + std::to_string(k) + ".png", img); + } + + } +} + + + +int ReadCocoYaml(YOLO_V8*& p) { // Open the YAML file std::ifstream file("coco.yaml"); - if (!file.is_open()) { + if (!file.is_open()) + { std::cerr << "Failed to open file" << std::endl; return 1; } @@ -64,17 +109,22 @@ int read_coco_yaml(DCSP_CORE *&p) { // Read the file line by line std::string line; std::vector lines; - while (std::getline(file, line)) { + while (std::getline(file, line)) + { lines.push_back(line); } // Find the start and end of the names section std::size_t start = 0; std::size_t end = 0; - for (std::size_t i = 0; i < lines.size(); i++) { - if (lines[i].find("names:") != std::string::npos) { + for (std::size_t i = 0; i < lines.size(); i++) + { + if (lines[i].find("names:") != std::string::npos) + { start = i + 1; - } else if (start > 0 && lines[i].find(':') == std::string::npos) { + } + else if (start > 0 && lines[i].find(':') == std::string::npos) + { end = i; break; } @@ -82,7 +132,8 @@ int read_coco_yaml(DCSP_CORE *&p) { // Extract the names std::vector names; - for (std::size_t i = start; i < end; i++) { + for (std::size_t i = start; i < end; i++) + { std::stringstream ss(lines[i]); std::string name; std::getline(ss, name, ':'); // Extract the number before the delimiter @@ -95,19 +146,48 @@ int read_coco_yaml(DCSP_CORE *&p) { } -int main() { - DCSP_CORE *yoloDetector = new DCSP_CORE; - std::string model_path = "yolov8n.onnx"; - read_coco_yaml(yoloDetector); +void DetectTest() +{ + YOLO_V8* yoloDetector = new YOLO_V8; + ReadCocoYaml(yoloDetector); + DL_INIT_PARAM params; + params.rectConfidenceThreshold = 0.1; + params.iouThreshold = 0.5; + params.modelPath = "yolov8n.onnx"; + params.imgSize = { 640, 640 }; #ifdef USE_CUDA + params.cudaEnable = true; + // GPU FP32 inference - DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, true }; + params.modelType = YOLO_DETECT_V8; // GPU FP16 inference - // DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8_HALF, {640, 640}, 0.1, 0.5, true }; + //Note: change fp16 onnx model + //params.modelType = YOLO_DETECT_V8_HALF; + #else // CPU inference - DCSP_INIT_PARAM params{model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, false}; + params.modelType = YOLO_DETECT_V8; + params.cudaEnable = false; + #endif yoloDetector->CreateSession(params); - file_iterator(yoloDetector); + Detector(yoloDetector); +} + + +void ClsTest() +{ + YOLO_V8* yoloDetector = new YOLO_V8; + std::string model_path = "cls.onnx"; + ReadCocoYaml(yoloDetector); + DL_INIT_PARAM params{ model_path, YOLO_CLS, {224, 224} }; + yoloDetector->CreateSession(params); + Classifier(yoloDetector); +} + + +int main() +{ + //DetectTest(); + ClsTest(); }