OpenCV - 加速SSD的3x3补丁的计算 [英] OpenCV - Speeding up the computation of SSD of 3x3 patches

查看:152
本文介绍了OpenCV - 加速SSD的3x3补丁的计算的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

作为更大应用程序的一部分,我需要计算以下代码:

  ax2 + = + 0.5); 
ay2 + =(int)(25 + 0.5);

bx2 + =(int)(25 + 0.5);
by2 + =(int)(25 + 0.5);

cx2 + =(int)(25 + 0.5);
cy2 + =(int)(25 + 0.5);

for(int ix = -1; ix <= 1; ix ++){
for(int iy = -1; iy <= 1; iy ++){
suma + =(grayImage.at (ay2 + iy,ax2 + ix)-grarImage.at (by2 + iy,bx2 + ix))* grayImage.at< uchar>(ay2 + iy,ax2 + ix) - grayImage.at (by2 + iy,bx2 + ix));
}
}

它基本上计算两个3X3的平方差的和补丁。



运行速度非常慢。有什么办法加速吗?



编辑:



我更改为以下版本:

  for(int ix = -1; ix <= 1; ix ++){
for(int iy = -1 ; iy <= 1; iy ++){
double difa = grayImage.at uchar>(ay2 + iy,ax2 + ix)-grillImage.at (by2 + iy,bx2 + ix)
suma + =(difa)*(difa);
}
}

运行速度更快,

感谢,



Gil。



编辑:根据注释和答案,我不使用以下代码:

  // int iy = -1; 
Mi_a = grayImage.ptr< uchar>(ay2-1);
Mi_b = grayImage.ptr< uchar>(by2-1);
Mi_c = grayImage.ptr< uchar>(cy2-1);

difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(difa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);



// int iy = 0;
Mi_a = grayImage.ptr< uchar>(ay2 + 0);
Mi_b = grayImage.ptr< uchar>(by2 + 0);
Mi_c = grayImage.ptr< uchar>(cy2 + 0);

difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(difa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);


// int iy = 1
Mi_a = grayImage.ptr< uchar>(ay2 + 1);
Mi_b = grayImage.ptr< uchar>(by2 + 1);
Mi_c = grayImage.ptr< uchar>(cy2 + 1);

difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(difa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);

或者它的循环版本具有相同的运行时间:

  for(int iy = -1; iy <= 1; iy ++)
{
const uchar * Mi_a = grayImage.ptr< uchar> (ay2 + iy);
const uchar * Mi_b = grayImage.ptr< uchar>(by2 + iy);
const uchar * Mi_c = grayImage.ptr< uchar>(cy2 + iy);
for(int ix = -1; ix <= 1; ix ++)
{
double difa = Mi_a [ax2 + ix] -Mi_b [bx2 + ix]
suma + =(difa)*(difa);

double difc = Mi_c [cx2 + ix] -Mi_b [bx2 + ix];
sumc + =(difc)*(difc);
}
}

有什么办法加快速度吗? / p>

感谢



Gil

解决方案

OpenCv Mat文档


如果你需要处理一个二维数组的整行,最有效的方法是首先获取该行的指针,然后使用普通C运算符[]


所以:

 code> for(int iy = -1; iy <= 1; iy ++)
{
const uchar * Mi_a = grayImage.ptr< uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr< uchar>(by2 + iy);

for(int ix = -1; ix <= 1; ix ++)
{
double difa = Mi_a [ax2 + ix] - Mi_b [bx2 + ix]
suma + =(difa)*(difa);
}
}


As part of a bigger application, I need to compute the following code:

ax2 += (int)(25 + 0.5);
ay2 += (int)(25 + 0.5);

bx2 += (int)(25 + 0.5);
by2 += (int)(25 + 0.5);

cx2 += (int)(25 + 0.5);
cy2 += (int)(25 + 0.5);

    for (int ix = -1; ix <= 1; ix++){
        for (int iy = -1; iy <= 1; iy++){
            suma += (grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix))* grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix));
}
}

It basically computes the sum of squared difference of two 3X3 patches.

It runs extremely slow. Is there any way of speeding it up?

EDIT:

I changed to the following version:

for (int ix = -1; ix <= 1; ix++){
        for (int iy = -1; iy <= 1; iy++){
            double difa = grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix);
            suma += (difa)*(difa);
        }
}

And it runs faster, but is there any way to improve it further?

Thanks,

Gil.

EDIT: following the comment and the answer, I'm not using the following code:

//int iy = -1;
     Mi_a = grayImage.ptr<uchar>(ay2 - 1);
     Mi_b = grayImage.ptr<uchar>(by2 - 1);
     Mi_c = grayImage.ptr<uchar>(cy2 - 1);

    difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
    sumc += (difc)*(difc);



    //int iy=0;
    Mi_a = grayImage.ptr<uchar>(ay2 + 0);
    Mi_b = grayImage.ptr<uchar>(by2 + 0);
    Mi_c = grayImage.ptr<uchar>(cy2 + 0);

    difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
    sumc += (difc)*(difc);


    //int iy=1
    Mi_a = grayImage.ptr<uchar>(ay2 + 1);
    Mi_b = grayImage.ptr<uchar>(by2 + 1);
    Mi_c = grayImage.ptr<uchar>(cy2 + 1);

    difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
    sumc += (difc)*(difc);
    difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
    suma += (difa)*(difa);
    difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
    sumc += (difc)*(difc);

Or it's looped version which has the same running time:

for (int iy = -1; iy <= 1; iy++)
    {
        const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
        const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);
        const uchar * Mi_c = grayImage.ptr<uchar>(cy2 + iy);
        for (int ix = -1; ix <= 1; ix++)
        {
            double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix];
            suma += (difa)*(difa);

            double difc = Mi_c[cx2 + ix] - Mi_b[bx2 + ix];
            sumc += (difc)*(difc);
        }
    }

Is there any way to speed it even further?

Thanks

Gil

解决方案

From OpenCv Mat documentation

If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to the row first, and then just use the plain C operator []

So:

for (int iy = -1; iy <= 1; iy++)
{
    const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
    const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);

    for (int ix = -1; ix <= 1; ix++)
    {
        double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix]
        suma += (difa)*(difa);
    }    
}

这篇关于OpenCV - 加速SSD的3x3补丁的计算的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆