OpenCV - 加速SSD的3x3补丁的计算 [英] OpenCV - Speeding up the computation of SSD of 3x3 patches
问题描述
作为更大应用程序的一部分,我需要计算以下代码:
ax2 + = + 0.5);
ay2 + =(int)(25 + 0.5);
bx2 + =(int)(25 + 0.5);
by2 + =(int)(25 + 0.5);
cx2 + =(int)(25 + 0.5);
cy2 + =(int)(25 + 0.5);
for(int ix = -1; ix <= 1; ix ++){
for(int iy = -1; iy <= 1; iy ++){
suma + =(grayImage.at (ay2 + iy,ax2 + ix)-grarImage.at (by2 + iy,bx2 + ix))* grayImage.at< uchar>(ay2 + iy,ax2 + ix) - grayImage.at (by2 + iy,bx2 + ix));
}
}
它基本上计算两个3X3的平方差的和补丁。
运行速度非常慢。有什么办法加速吗?
编辑:
我更改为以下版本:
for(int ix = -1; ix <= 1; ix ++){
for(int iy = -1 ; iy <= 1; iy ++){
double difa = grayImage.at uchar>(ay2 + iy,ax2 + ix)-grillImage.at (by2 + iy,bx2 + ix)
suma + =(difa)*(difa);
}
}
运行速度更快,
感谢,
Gil。
编辑:根据注释和答案,我不使用以下代码:
// int iy = -1;
Mi_a = grayImage.ptr< uchar>(ay2-1);
Mi_b = grayImage.ptr< uchar>(by2-1);
Mi_c = grayImage.ptr< uchar>(cy2-1);
difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(difa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);
// int iy = 0;
Mi_a = grayImage.ptr< uchar>(ay2 + 0);
Mi_b = grayImage.ptr< uchar>(by2 + 0);
Mi_c = grayImage.ptr< uchar>(cy2 + 0);
difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(difa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);
// int iy = 1
Mi_a = grayImage.ptr< uchar>(ay2 + 1);
Mi_b = grayImage.ptr< uchar>(by2 + 1);
Mi_c = grayImage.ptr< uchar>(cy2 + 1);
difa = Mi_a [ax2-1] -Mi_b [bx2-1];
suma + =(difa)*(diffa);
difc = Mi_c [cx2-1] -Mi_b [bx2-1];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 0] -Mi_b [bx2 + 0];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 0] -Mi_b [bx2 + 0];
sumc + =(difc)*(difc);
difa = Mi_a [ax2 + 1] -Mi_b [bx2 + 1];
suma + =(difa)*(difa);
difc = Mi_c [cx2 + 1] -Mi_b [bx2 + 1];
sumc + =(difc)*(difc);
或者它的循环版本具有相同的运行时间:
for(int iy = -1; iy <= 1; iy ++)
{
const uchar * Mi_a = grayImage.ptr< uchar> (ay2 + iy);
const uchar * Mi_b = grayImage.ptr< uchar>(by2 + iy);
const uchar * Mi_c = grayImage.ptr< uchar>(cy2 + iy);
for(int ix = -1; ix <= 1; ix ++)
{
double difa = Mi_a [ax2 + ix] -Mi_b [bx2 + ix]
suma + =(difa)*(difa);
double difc = Mi_c [cx2 + ix] -Mi_b [bx2 + ix];
sumc + =(difc)*(difc);
}
}
有什么办法加快速度吗? / p>
感谢
Gil
如果你需要处理一个二维数组的整行,最有效的方法是首先获取该行的指针,然后使用普通C运算符[]
所以:
code> for(int iy = -1; iy <= 1; iy ++)
{
const uchar * Mi_a = grayImage.ptr< uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr< uchar>(by2 + iy);
for(int ix = -1; ix <= 1; ix ++)
{
double difa = Mi_a [ax2 + ix] - Mi_b [bx2 + ix]
suma + =(difa)*(difa);
}
}
As part of a bigger application, I need to compute the following code:
ax2 += (int)(25 + 0.5);
ay2 += (int)(25 + 0.5);
bx2 += (int)(25 + 0.5);
by2 += (int)(25 + 0.5);
cx2 += (int)(25 + 0.5);
cy2 += (int)(25 + 0.5);
for (int ix = -1; ix <= 1; ix++){
for (int iy = -1; iy <= 1; iy++){
suma += (grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix))* grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix));
}
}
It basically computes the sum of squared difference of two 3X3 patches.
It runs extremely slow. Is there any way of speeding it up?
EDIT:
I changed to the following version:
for (int ix = -1; ix <= 1; ix++){
for (int iy = -1; iy <= 1; iy++){
double difa = grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix);
suma += (difa)*(difa);
}
}
And it runs faster, but is there any way to improve it further?
Thanks,
Gil.
EDIT: following the comment and the answer, I'm not using the following code:
//int iy = -1;
Mi_a = grayImage.ptr<uchar>(ay2 - 1);
Mi_b = grayImage.ptr<uchar>(by2 - 1);
Mi_c = grayImage.ptr<uchar>(cy2 - 1);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
//int iy=0;
Mi_a = grayImage.ptr<uchar>(ay2 + 0);
Mi_b = grayImage.ptr<uchar>(by2 + 0);
Mi_c = grayImage.ptr<uchar>(cy2 + 0);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
//int iy=1
Mi_a = grayImage.ptr<uchar>(ay2 + 1);
Mi_b = grayImage.ptr<uchar>(by2 + 1);
Mi_c = grayImage.ptr<uchar>(cy2 + 1);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
Or it's looped version which has the same running time:
for (int iy = -1; iy <= 1; iy++)
{
const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);
const uchar * Mi_c = grayImage.ptr<uchar>(cy2 + iy);
for (int ix = -1; ix <= 1; ix++)
{
double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix];
suma += (difa)*(difa);
double difc = Mi_c[cx2 + ix] - Mi_b[bx2 + ix];
sumc += (difc)*(difc);
}
}
Is there any way to speed it even further?
Thanks
Gil
If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to the row first, and then just use the plain C operator []
So:
for (int iy = -1; iy <= 1; iy++)
{
const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);
for (int ix = -1; ix <= 1; ix++)
{
double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix]
suma += (difa)*(difa);
}
}
这篇关于OpenCV - 加速SSD的3x3补丁的计算的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!