DataScience
728x90

https://github.com/ultralytics/ultralytics/blob/8a11eda4a9a8cf94b17f7cae4c58099d538926a4/examples/YOLOv8-CPP-Inference/main.cpp

 

GitHub - ultralytics/ultralytics: NEW - YOLOv8 🚀 in PyTorch > ONNX > CoreML > TFLite

NEW - YOLOv8 🚀 in PyTorch > ONNX > CoreML > TFLite - GitHub - ultralytics/ultralytics: NEW - YOLOv8 🚀 in PyTorch > ONNX > CoreML > TFLite

github.com

YOLOv8 github참조해서 동영상 파일에서 객체 검출을 진행합니다.

컴파일 전에 opencv라이브러리를 설치합니다.

동영상 파일을 decode하여 프레임마다 onnx모델추론을 진행합니다. 

onnxruntime 라이브러리를 따로 안쓰고 opencv의  cv::dnn::readNetFromONNX 함수를 사용합니다. 

// 컴파일 명령어
// g++ main.cpp -o test `pkg-config opencv4 --cflags --libs`

// pt,onnx 모델 다운로드 opset 12로 해야 에러가 안납니다.
// yolo export model=yolov8n.pt imgsz=480,640 format=onnx opset=12

#include <iostream>
#include <vector>
#include <getopt.h>
#include <ctime>
#include <opencv2/opencv.hpp>
#include "inference.h"

using namespace std;
using namespace cv;
clock_t input_start, input_end, run_start, run_end, output_start, output_end, draw_start, draw_end, total_start, total_end;
double input_duration, run_duration, output_duration, draw_duration, total_duration;
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
    modelPath = onnxModelPath;
    modelShape = modelInputShape;
    classesPath = classesTxtFile;
    cudaEnabled = runWithCuda;

    loadOnnxNetwork();
    // loadClassesFromFile(); The classes are hard-coded for this example
}

std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
    cv::Mat modelInput = input;
    if (letterBoxForSquare && modelShape.width == modelShape.height)
        modelInput = formatToSquare(modelInput);

    // prepare input data 시간 체크
    cv::Mat blob;
    input_start = clock();
    cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
    input_end = clock();
    input_duration = (double)(input_end - input_start) / CLOCKS_PER_SEC;
    cout << "input time:" << input_duration << "초" << endl;

    // model run 시간체크
    run_start = clock();
    net.setInput(blob);
    std::vector<cv::Mat> outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());

    int rows = outputs[0].size[1];
    int dimensions = outputs[0].size[2];
    run_end = clock();
    run_duration = (double)(run_end - run_start) / CLOCKS_PER_SEC;
    cout << "run time:" << run_duration << "초" << endl;

    // output 시간체크
    output_start = clock();
    bool yolov8 = false;
    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
    {
        yolov8 = true;
        rows = outputs[0].size[2];
        dimensions = outputs[0].size[1];

        outputs[0] = outputs[0].reshape(1, dimensions);
        cv::transpose(outputs[0], outputs[0]);
    }
    float *data = (float *)outputs[0].data;

    float x_factor = modelInput.cols / modelShape.width;
    float y_factor = modelInput.rows / modelShape.height;

    std::vector<int> class_ids;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    for (int i = 0; i < rows; ++i)
    {
        if (yolov8)
        {
            float *classes_scores = data + 4;

            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
            cv::Point class_id;
            double maxClassScore;

            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);

            if (class_id.x == 0)
            {
                if (maxClassScore > modelScoreThreshold)
                {
                    confidences.push_back(maxClassScore);
                    class_ids.push_back(class_id.x);

                    float x = data[0];
                    float y = data[1];
                    float w = data[2];
                    float h = data[3];

                    int left = int((x - 0.5 * w) * x_factor);
                    int top = int((y - 0.5 * h) * y_factor);

                    int width = int(w * x_factor);
                    int height = int(h * y_factor);

                    boxes.push_back(cv::Rect(left, top, width, height));
                }
            }
        }
        else // yolov5
        {
            float confidence = data[4];

            if (confidence >= modelConfidenceThreshold)
            {
                float *classes_scores = data + 5;

                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
                cv::Point class_id;
                double max_class_score;

                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);

                if (max_class_score > modelScoreThreshold)
                {
                    confidences.push_back(confidence);
                    class_ids.push_back(class_id.x);

                    float x = data[0];
                    float y = data[1];
                    float w = data[2];
                    float h = data[3];

                    int left = int((x - 0.5 * w) * x_factor);
                    int top = int((y - 0.5 * h) * y_factor);

                    int width = int(w * x_factor);
                    int height = int(h * y_factor);

                    boxes.push_back(cv::Rect(left, top, width, height));
                }
            }
        }

        data += dimensions;
    }

    std::vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);

    std::vector<Detection> detections{};
    for (unsigned long i = 0; i < nms_result.size(); ++i)
    {
        int idx = nms_result[i];

        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int> dis(100, 255);
        result.color = cv::Scalar(dis(gen),
                                  dis(gen),
                                  dis(gen));

        result.className = classes[result.class_id];
        result.box = boxes[idx];

        detections.push_back(result);
    }
    output_end = clock();
    output_duration = (double)(output_end - output_start) / CLOCKS_PER_SEC;
    cout << "output time:" << output_duration << "초" << endl;
    return detections;
}

void Inference::loadClassesFromFile()
{
    std::ifstream inputFile(classesPath);
    if (inputFile.is_open())
    {
        std::string classLine;
        while (std::getline(inputFile, classLine))
            classes.push_back(classLine);
        inputFile.close();
    }
}

void Inference::loadOnnxNetwork()
{
    net = cv::dnn::readNetFromONNX(modelPath);
    if (cudaEnabled)
    {
        std::cout << "\nRunning on CUDA" << std::endl;
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
    }
    else
    {
        std::cout << "\nRunning on CPU" << std::endl;
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    }
}

cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
    int col = source.cols;
    int row = source.rows;
    int _max = MAX(col, row);
    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
    source.copyTo(result(cv::Rect(0, 0, col, row)));
    return result;
}

int main(int argc, char **argv)
{
    int count = 0;
    bool runOnGPU = true;
    std::string projectBasePath = "/home/yimstar9/github/ObjectTracking"; // Set your ultralytics base path
    std::string videoPath = "../../data/crowd.mp4";
    Inference inf("yolov8n.onnx", cv::Size(640, 640), "classes.txt", runOnGPU);

    cv::VideoCapture cap(videoPath);
    if (!cap.isOpened())
    {
        std::cout << "동영상 파일을 열 수 없습니다." << std::endl;
        return -1;
    }
    total_start = clock();

    // 동영상 파일에서 첫 번째 프레임 가져오기
    cv::Mat frame;
    cap.read(frame);
    // cv::namedWindow("Video", cv::WINDOW_NORMAL);
    // cv::resizeWindow("Video", frame.cols, frame.rows);

    // 동영상 프레임 표시 반복
    while (true)
    {
        count += 1;
        std::cout << "frame: " << count << std::endl;
        // Inference starts here...
        std::vector<Detection> output = inf.runInference(frame);

        int detections = output.size();

        // 그리기 시간 체크
        draw_start = clock();
        for (int i = 0; i < detections; ++i)
        {
            Detection detection = output[i];
            cv::Rect box = detection.box;
            cv::Scalar color = detection.color;

            // Detection box
            cv::rectangle(frame, box, color, 2);

            // Detection box text
            std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
            cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
            cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

            cv::rectangle(frame, textBox, color, cv::FILLED);
            cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
        }
        // 현재 프레임 표시
        cv::imshow("Video", frame);
        draw_end = clock();
        draw_duration = (double)(draw_end - draw_start) / CLOCKS_PER_SEC;
        cout << "draw time:" << draw_duration << "초" << endl;

        // 다음 프레임 가져오기
        cap.read(frame);

        // 동영상 재생 완료 시 반복 종료
        if (frame.empty())
        {
            break;
        }

        // 키 입력 대기 (30ms)
        if (cv::waitKey(30) == 27)
        { // ESC 키를 누르면 종료
            break;
        }
    }

    total_end = clock();
    total_duration = (double)(total_end - total_start) / CLOCKS_PER_SEC;
    cout << "total time:" << total_duration << "초" << endl;
    // 창 닫기
    // cv::destroyAllWindows();

    return 0;
}

'C++' 카테고리의 다른 글

[C] #ifndef~ #endif  (97) 2024.02.07
[C++] Visual Studio Code에서 C++환경 만들기 (Linux)  (142) 2023.07.10
profile

DataScience

@Ninestar

포스팅이 좋았다면 "좋아요❤️" 또는 "구독👍🏻" 해주세요!