728x90
YOLOv8 github참조해서 동영상 파일에서 객체 검출을 진행합니다.
컴파일 전에 opencv라이브러리를 설치합니다.
동영상 파일을 decode하여 프레임마다 onnx모델추론을 진행합니다.
onnxruntime 라이브러리를 따로 안쓰고 opencv의 cv::dnn::readNetFromONNX 함수를 사용합니다.
// 컴파일 명령어
// g++ main.cpp -o test `pkg-config opencv4 --cflags --libs`
// pt,onnx 모델 다운로드 opset 12로 해야 에러가 안납니다.
// yolo export model=yolov8n.pt imgsz=480,640 format=onnx opset=12
#include <iostream>
#include <vector>
#include <getopt.h>
#include <ctime>
#include <opencv2/opencv.hpp>
#include "inference.h"
using namespace std;
using namespace cv;
clock_t input_start, input_end, run_start, run_end, output_start, output_end, draw_start, draw_end, total_start, total_end;
double input_duration, run_duration, output_duration, draw_duration, total_duration;
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
modelPath = onnxModelPath;
modelShape = modelInputShape;
classesPath = classesTxtFile;
cudaEnabled = runWithCuda;
loadOnnxNetwork();
// loadClassesFromFile(); The classes are hard-coded for this example
}
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
cv::Mat modelInput = input;
if (letterBoxForSquare && modelShape.width == modelShape.height)
modelInput = formatToSquare(modelInput);
// prepare input data 시간 체크
cv::Mat blob;
input_start = clock();
cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
input_end = clock();
input_duration = (double)(input_end - input_start) / CLOCKS_PER_SEC;
cout << "input time:" << input_duration << "초" << endl;
// model run 시간체크
run_start = clock();
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
int rows = outputs[0].size[1];
int dimensions = outputs[0].size[2];
run_end = clock();
run_duration = (double)(run_end - run_start) / CLOCKS_PER_SEC;
cout << "run time:" << run_duration << "초" << endl;
// output 시간체크
output_start = clock();
bool yolov8 = false;
// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
// yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
{
yolov8 = true;
rows = outputs[0].size[2];
dimensions = outputs[0].size[1];
outputs[0] = outputs[0].reshape(1, dimensions);
cv::transpose(outputs[0], outputs[0]);
}
float *data = (float *)outputs[0].data;
float x_factor = modelInput.cols / modelShape.width;
float y_factor = modelInput.rows / modelShape.height;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i)
{
if (yolov8)
{
float *classes_scores = data + 4;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
if (class_id.x == 0)
{
if (maxClassScore > modelScoreThreshold)
{
confidences.push_back(maxClassScore);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
else // yolov5
{
float confidence = data[4];
if (confidence >= modelConfidenceThreshold)
{
float *classes_scores = data + 5;
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > modelScoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
std::vector<Detection> detections{};
for (unsigned long i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(100, 255);
result.color = cv::Scalar(dis(gen),
dis(gen),
dis(gen));
result.className = classes[result.class_id];
result.box = boxes[idx];
detections.push_back(result);
}
output_end = clock();
output_duration = (double)(output_end - output_start) / CLOCKS_PER_SEC;
cout << "output time:" << output_duration << "초" << endl;
return detections;
}
void Inference::loadClassesFromFile()
{
std::ifstream inputFile(classesPath);
if (inputFile.is_open())
{
std::string classLine;
while (std::getline(inputFile, classLine))
classes.push_back(classLine);
inputFile.close();
}
}
void Inference::loadOnnxNetwork()
{
net = cv::dnn::readNetFromONNX(modelPath);
if (cudaEnabled)
{
std::cout << "\nRunning on CUDA" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
}
else
{
std::cout << "\nRunning on CPU" << std::endl;
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
}
cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
int main(int argc, char **argv)
{
int count = 0;
bool runOnGPU = true;
std::string projectBasePath = "/home/yimstar9/github/ObjectTracking"; // Set your ultralytics base path
std::string videoPath = "../../data/crowd.mp4";
Inference inf("yolov8n.onnx", cv::Size(640, 640), "classes.txt", runOnGPU);
cv::VideoCapture cap(videoPath);
if (!cap.isOpened())
{
std::cout << "동영상 파일을 열 수 없습니다." << std::endl;
return -1;
}
total_start = clock();
// 동영상 파일에서 첫 번째 프레임 가져오기
cv::Mat frame;
cap.read(frame);
// cv::namedWindow("Video", cv::WINDOW_NORMAL);
// cv::resizeWindow("Video", frame.cols, frame.rows);
// 동영상 프레임 표시 반복
while (true)
{
count += 1;
std::cout << "frame: " << count << std::endl;
// Inference starts here...
std::vector<Detection> output = inf.runInference(frame);
int detections = output.size();
// 그리기 시간 체크
draw_start = clock();
for (int i = 0; i < detections; ++i)
{
Detection detection = output[i];
cv::Rect box = detection.box;
cv::Scalar color = detection.color;
// Detection box
cv::rectangle(frame, box, color, 2);
// Detection box text
std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
cv::rectangle(frame, textBox, color, cv::FILLED);
cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
}
// 현재 프레임 표시
cv::imshow("Video", frame);
draw_end = clock();
draw_duration = (double)(draw_end - draw_start) / CLOCKS_PER_SEC;
cout << "draw time:" << draw_duration << "초" << endl;
// 다음 프레임 가져오기
cap.read(frame);
// 동영상 재생 완료 시 반복 종료
if (frame.empty())
{
break;
}
// 키 입력 대기 (30ms)
if (cv::waitKey(30) == 27)
{ // ESC 키를 누르면 종료
break;
}
}
total_end = clock();
total_duration = (double)(total_end - total_start) / CLOCKS_PER_SEC;
cout << "total time:" << total_duration << "초" << endl;
// 창 닫기
// cv::destroyAllWindows();
return 0;
}
'C++' 카테고리의 다른 글
[C] #ifndef~ #endif (97) | 2024.02.07 |
---|---|
[C++] Visual Studio Code에서 C++환경 만들기 (Linux) (142) | 2023.07.10 |