OpenCV GPU对象检测速度慢，与CPU版本相比，检测更少 [英] OpenCV GPU object detection is slow and gives less detections as compared to CPU version

查看：3251 发布时间：2016/11/2 2:43:22 c++ opencv visual-c++ cuda

本文介绍了OpenCV GPU对象检测速度慢，与CPU版本相比，检测更少的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

下面是来自OpenCV的对象检测代码的CPU和GPU实现。

Below are CPU and GPU implementations of the object detection code from OpenCV.

1）GPU的执行速度比CPU版本慢

1) The GPU implementation is slow as compared to the CPU version

2）检测率很慢与同一分类器的代码的CPU版本相比

2) Detection rate is slow as compared to the CPU version of the code for same classifier

任何想法为什么会是这样？

Any idea why is it like that?

CODE版本

#include <windows.h>
#include <mmsystem.h>
#pragma comment(lib, "winmm.lib")

#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#include <iostream>
#include <stdio.h>

using namespace std;
using namespace cv;

int main(int argc, const char** argv)
{
    //create the cascade classifier object used for the face detection
    CascadeClassifier face_cascade;
    //use the haarcascade_frontalface_alt.xml library
    face_cascade.load("C:/cascades/haarcascade_frontalface_alt_tree.xml");

    //setup video capture device and link it to the first capture device
    VideoCapture captureDevice;
    captureDevice.open(3);

    //setup image files used in the capture process
    Mat captureFrame;
    Mat grayscaleFrame;

    //create a window to present the results
    namedWindow("outputCapture", 1);

    //create a loop to capture and find faces
    while(true)
    {
        //capture a new image frame
        captureDevice>>captureFrame;

        //convert captured image to gray scale and equalize
        cvtColor(captureFrame, grayscaleFrame, CV_BGR2GRAY);
        equalizeHist(grayscaleFrame, grayscaleFrame);

    //create a vector array to store the face found
    std::vector<Rect> faces;

    //find faces and store them in the vector array
    face_cascade.detectMultiScale(grayscaleFrame, faces);

    //draw a rectangle for all found faces in the vector array on the original image
    for(int i = 0; i < (int)faces.size(); i++)
    {
        Scalar color(0, 0, 255);

        Point pt1(faces[i].x + faces[i].width, faces[i].y + faces[i].height);
        Point pt2(faces[i].x, faces[i].y);

        rectangle(captureFrame, pt1, pt2, color, 1, 8, 0);

        string text = "Adam yuzi";
        int fontFace = FONT_HERSHEY_TRIPLEX;
        double fontScale = 1;
        int thickness = 2;  

        putText(captureFrame, text, pt2, fontFace, fontScale, color, thickness);
        //PlaySound(TEXT("C:/cascades/adam.wav"), NULL, SND_FILENAME | SND_SYNC);
        // the correct code
        //Sleep(1000);
        //break;
        //cout<<char(7);
        }
       //print the output
        imshow("outputCapture", captureFrame);

       //pause for 33ms
        waitKey(33);
    }
    return 0;
}

和GPU版本实施在此示例墨水中提供
GPU版本CODE

and the GPU version implementation is provided in this sample ink GPU Version of CODE

// WARNING: this sample is under construction! Use it on your own risk.
#if defined _MSC_VER && _MSC_VER >= 1400
#pragma warning(disable : 4100)
#endif


#include <iostream>
#include <iomanip>
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"

using namespace std;
using namespace cv;
using namespace cv::cuda;

static void help()
{
    cout << "Usage: ./cascadeclassifier_gpu \n\t--cascade <cascade_file>\n\t(<image>|--    video <video>|--camera <camera_id>)\n"
            "Using OpenCV version " << CV_VERSION << endl << endl;
}


static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
{
    if (src.channels() == 3)
    {
        cv::cvtColor( src, gray, COLOR_BGR2GRAY );
    }
    else
    {
        gray = src;
    }

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));

    if (scale != 1)
    {
        cv::resize(gray, resized, sz);
    }
    else
    {
        resized = gray;
    }
}

static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double     scale)
{
    if (src.channels() == 3)
    {
        cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
    }
    else
    {
        gray = src;
    }

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));

    if (scale != 1)
    {
        cv::cuda::resize(gray, resized, sz);
    }
    else
    {
        resized = gray;
    }
}
static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
{
    int fontFace = FONT_HERSHEY_DUPLEX;
    double fontScale = 0.8;
    int fontThickness = 2;
    Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);

    Point org;
    org.x = 1;
    org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
    putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
    putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
}


static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool     bFilter, double fps)
{
    Scalar fontColorRed = Scalar(255,0,0);
    Scalar fontColorNV  = Scalar(118,185,0);

    ostringstream ss;
    ss << "FPS = " << setprecision(1) << fixed << fps;
    matPrint(canvas, 0, fontColorRed, ss.str());
    ss.str("");
    ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
        (bGpu ? "GPU, " : "CPU, ") <<
        (bLargestFace ? "OneFace, " : "MultiFace, ") <<
        (bFilter ? "Filter:ON" : "Filter:OFF");
    matPrint(canvas, 1, fontColorRed, ss.str());

    // by Anatoly. MacOS fix. ostringstream(const string&) is a private
    // matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU / CPU"));
   if (bHelp)
    {
        matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
        matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
        matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
        matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
        matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale");
    }
    else
    {
        matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
    }
}


int main(int argc, const char *argv[])
{
    if (argc == 1)
    {
        help();
        return -1;
    }

    if (getCudaEnabledDeviceCount() == 0)
    {
        return cerr << "No GPU found or the library is compiled without CUDA support"     << endl, -1;
    }

    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());

    string cascadeName;
    string inputName;
    bool isInputImage = false;
    bool isInputVideo = false;
    bool isInputCamera = false;

    for (int i = 1; i < argc; ++i)
    {
        if (string(argv[i]) == "--cascade")
            cascadeName = argv[++i];
        else if (string(argv[i]) == "--video")
        {
            inputName = argv[++i];
            isInputVideo = true;
        }
        else if (string(argv[i]) == "--camera")
        {
            inputName = argv[++i];
            isInputCamera = true;
        }
        else if (string(argv[i]) == "--help")
        {
            help();
            return -1;
        }    
        else if (!isInputImage)
        {
            inputName = argv[i];
            isInputImage = true;
        }
        else
        {
            cout << "Unknown key: " << argv[i] << endl;
            return -1;
        }
    }

    CascadeClassifier_CUDA cascade_gpu;
    if (!cascade_gpu.load(cascadeName)){
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<     "\"" << endl, help(), -1;
    }

    CascadeClassifier cascade_cpu;
    if (!cascade_cpu.load(cascadeName)) {
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<     "\"" << endl, help(), -1;
    }

    VideoCapture capture;
    Mat image;

    if (isInputImage) {
        image = imread(inputName);
        CV_Assert(!image.empty());
        }
    else if (isInputVideo) {
        capture.open(inputName);
        CV_Assert(capture.isOpened());
    }
else   {
        capture.open(atoi(inputName.c_str()));
        CV_Assert(capture.isOpened());
    }

    namedWindow("result", 1);

    Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
    vector<Rect> facesBuf_cpu;

    GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;

/* parameters */
    bool useGPU = true;
    double scaleFactor = 1.0;
    bool findLargestObject = false;
    bool filterRects = true;
    bool helpScreen = false;

    int detections_num;
    for (;;)    {
        if (isInputCamera || isInputVideo)        {
            capture >> frame;
            if (frame.empty())            {
                break;
            }
        }

        (image.empty() ? frame : image).copyTo(frame_cpu);
        frame_gpu.upload(image.empty() ? frame : image);

        convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor);
        convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor);

        TickMeter tm;
        tm.start();

    if (useGPU)        {
            //cascade_gpu.visualizeInPlace = true;
            cascade_gpu.findLargestObject = findLargestObject;

            detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu,     1.2,
                                                          (filterRects ||     findLargestObject) ? 4 : 0);
            facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
        }
        else        {
            Size minSize = cascade_gpu.getClassifierSize();
            cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
                                         (filterRects || findLargestObject) ? 4 : 0,
                                         (findLargestObject ?     CASCADE_FIND_BIGGEST_OBJECT : 0)
                                            | CASCADE_SCALE_IMAGE,
                                         minSize);
            detections_num = (int)facesBuf_cpu.size();
        }

        if (!useGPU && detections_num)      {
            for (int i = 0; i < detections_num; ++i)            {
                rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
            }
        }

        if (useGPU)        {
            resized_gpu.download(resized_cpu);
             for (int i = 0; i < detections_num; ++i)     {
                rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i],     Scalar(255));
             }
        }

           tm.stop();
        double detectionTime = tm.getTimeMilli();
        double fps = 1000 / detectionTime;
        //print detections to console
        cout << setfill(' ') << setprecision(2);
        cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
    if ((filterRects || findLargestObject) && detections_num > 0)        {
            Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
            for (int i = 0; i < min(detections_num, 2); ++i)            {
                cout << ", [" << setw(4) << faceRects[i].x
                     << ", " << setw(4) << faceRects[i].y
                         << ", " << setw(4) << faceRects[i].width
                         << ", " << setw(4) << faceRects[i].height << "]";
                    }
            }
            cout << endl;

            cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
            displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects,     fps);
            imshow("result", frameDisp);

            char key = (char)waitKey(5);
            if (key == 27)        {
                break;
            }    
            switch (key)            {
            case ' ':
                useGPU = !useGPU;
                break;
            case 'm':
            case 'M':
                findLargestObject = !findLargestObject;
                break;
            case 'f':
                case 'F':
                filterRects = !filterRects;
                break;
            case '1':
                scaleFactor *= 1.05;
                break;
                case 'q':
            case 'Q':
                scaleFactor /= 1.05;
                break;
            case 'h':
            case 'H':
                helpScreen = !helpScreen;
                break;
            }
        }
        return 0;
    }

注意：我没有写这个代码，我把来自的CPU版本和 GPU版本从这里。我也在在中发布了我的观察点。

NOTE: I did not write this code, I took the CPU version from and the GPU version from here . I also posted my observatios in.

OpenCV GPU对象检测速度慢，与CPU版本相比，检测更少 [英] OpenCV GPU object detection is slow and gives less detections as compared to CPU version

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

OpenCV GPU对象检测速度慢，与CPU版本相比，检测更少 [英] OpenCV GPU object detection is slow and gives less detections as compared to CPU version

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭