在图像处理边境检查 [英] Border check in image processing

查看：308 发布时间：2016/5/29 15:03:49 c arm simd neon

本文介绍了在图像处理边境检查的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我要照顾的边界条件，同时处理图像处理。我是外推边界的任何过滤器和创建新boundary.For例如我有4x3输入：

  //输入
INT图像[4] [3] =
1 2 3 4
2 4 6 8
3 6 9 12//输出
INT extensionimage [6] [5] =
1 1 2 3 4 4
1 1 2 3 4 4
2 2 4 6 8 8
3 3 6 9 12 12
3 3 6 9 12 12

我的code：

 的#include＆LT;＆stdio.h中GT;
＃包括LT＆;＆string.h中GT;
＃包括LT＆;＆stdlib.h中GT;无效padd_border为（int * IMG，为int *扩展，诠释的宽度，高度INT）;诠释主（）{
    INT宽度= 4，身高= 3;
    为int * IMG =新的INT [（宽）*（高）];
    对于（INT J = 0; J＆LT;高度; J ++）{
        的for（int i = 0; I＆LT;宽度;我++）{
            IMG [J *宽度+我=（I + 1）*（J + 1）;
            的printf（％d个\\ t的，IMG [J *宽度+ 1]）;
        }
    }
    //分配信号扩展内存
    为int *延长=新的INT [（宽+ 2）*（高+ 2）];    //检查内存分配
    如果（！扩展）
        返回0;    //初始化为零
    memset的（扩展，0，sizeof的（INT）*（宽+ 2）*（高+ 2））;    // PADD输入边境条件
    padd_border（IMG，延伸，宽，高）;
    //这里用扩展名输入虚拟功能    删除[]扩展;
    删除[] IMG;    返回0;
}无效padd_border为（int *形象，诠释*扩展，诠释的宽度，高度INT）{    //创建图像扩展
    的for（int i = 0; I＆LT;高度; ++ I）
    {
        的memcpy（扩展名+（宽+ 2）×（I + 1）+ 1，+图像宽*我，宽* sizeof的（INT））;
        扩展[（宽+ 2）*（I + 1）] =图片[宽* I];
        分机[（宽+ 2）*第（i + 2） -  1] =图像[宽度*第（i + 1） -  1];
    }    //填写形象推广第一线
    的memcpy（分机，+宽+ 2，（宽+ 2）* sizeof的（INT））;
    //填充图像延长最后一行
    的memcpy（扩展名+（宽+ 2）×（高+ 1），扩展+（宽+ 2）*高度（宽度+ 2）* sizeof的（INT））;
}

我的问题：

1），我不希望创建扩展缓冲区。我想重新使用的图像做推断。那么，这可能吗？

2）如何使用霓虹灯这样做WRT我的上述code？

根据PaulR伪code更改code后，我得到一些奇怪的结果：

固定边界结果在编辑过程中我对运行时间的问题的问题
我输入：

  221 220 221 223 230 233 234 235 ..
71 73 70 70 92 130 141 143 ..

我想这个操作获得目标：

  -1 * v_m1_m1 + 0 * v_m1_0 + 1 * v_m1_p1
 -1 * v_0_m1 + 0 * v_0_0 + 1 * v_0_p1  - ＆GT; V_OUT
 -1 * v_p1_m1 + 0 * v_p1_0 + 1 * v_p1_p1

改变code边境我得到以下valuse后：

  221 221 221 221 221 220 221 223 230 233 234 235
    221 221 221 221 221 220 221 223 230 233 234 235
    71 71 71 71 71 73 70 70 92 130 141 143

在标量code，如果我需要计算221（@ I，J = 0,0），边境它正在寻找这样的：

  221 221 220
 221 221 220
 71 71 73

但随着霓虹灯矢量化，我得到这是不对的。

  v_m1_m1.0 v_m1_0.1 v_m1_p1.2
v_0_m1.0 v_0_0.1 v_0_p1.2
v_p1_m1.0 v_p1_0.1 v_p1_p1.2
221 221 230
221 221 230
71 71 92

我的伪code：

 对于i = 0至NROWS  -  1
        //初始化行指针
        p_row_m1 = SRC + src_width * MAX（I-1，0）; //指向minus1行
        p_row_0 = SRC + src_width *我; //指向当前行
        p_row_p1 = SRC + src_width * MIN第（i + 1，src_width-1）; //指向PLUS1行        v_m1_m1 = vdupq_n_u32（p_row_m1 [0]）; //填补SRC左向量[I-1] [0]
        v_0_m1 = vdupq_n_u32（p_row_0 [0]）; //从SRC填补左向量[I] [0]
        v_p1_m1 = vdupq_n_u32（p_row_p1 [0]）; //从SRC填充左矢量第[i + 1] [0]        v_m1_0 = vld1q_u32（安培; p_row_m1 [0]）; //从SRC负荷中心矢量[I-1] [0..7]
        v_0_0 = vld1q_u32（安培; p_row_0 [0]）; //从SRC负荷中心向量[I] [0..7]
        v_p1_0 = vld1q_u32（安培; p_row_p1 [0]）; //从SRC负荷中心向量[I + 1] [0..7]        对于j = 0到（NCOLS  -  4）步骤4 //假设每SIMD向量4元件            v_m1_p1 = vld1q_u32（安培; p_row_m1 [J + 4]）; //将SRC权向量[I-1] [0..7]
            v_0_p1 = vld1q_u32（安培; p_row_0 [J + 4]）; //从SRC加载正确的向量[I] [0..7]
            v_p1_p1 = vld1q_u32（安培; p_row_p1 [J + 4]）; //从SRC加载正确的向量[I + 1] [0..7]
    //
    //你现在有载体的一个3x3的排列上
    //您可以执行操作居委会和生成
    当前迭代// 16输出像素：
    //
    // v_m1_m1 v_m1_0 v_m1_p1
    // v_0_m1 v_0_0 v_0_p1
    // v_p1_m1 v_p1_0 v_p1_p1
    //
    // |
    // V
    //
    // V_OUT
    vst1q_s32（V_OUT，＆安培; image_out [I] [J]）//在image_out店输出向量[I] [j..j + 15]
    //洗牌载体，这样我们可以在下一次迭代中使用它们
    v_m1_m1 = v_m1_0
    v_m1_0 = v_m1_p1    v_0_m1 = v_0_0
    v_0_0 = v_0_p1    v_p1_m1 = v_p1_0
    v_p1_0 = v_p1_p1  END_FOR
  //为最后一次迭代，我们需要处理的右边缘的像素...
  v_m1_p1 = vdupq_n_u32（p_row_m1 [NCOLS-1]）//填补肖像权向量[I-1] [NCOLS-1]
  v_0_p1 = vdupq_n_u32（p_row_0 [NCOLS-1]）//从图像填满权向量[I] [NCOLS-1]
  v_p1_p1 = vdupq_n_u32（p_row_p1 [NCOLS-1]）//填补肖像权向量[I + 1] [NCOLS-1]
  //计算V_OUT如上
  vst1q_s32（V_OUT，＆安培; image_out [I] [J]）//在image_out店输出向量[I] [ncols_16..ncols-1]
END_FOR

解决方案

下面是一些伪code执行SIMD使用与复制的边缘像素3x3的邻里操作。输入图像图片[NROWS] [NCOLS] ，输出图像为 image_out [NROWS] [NCOLS] 。

 对于i = 0至NROWS  -  1
  //初始化行指针
  p_row_m1 =安培;图像[最大值（ⅰ-1，0）] [0] //指针开始行第i-1
  p_row_0 =安培;图片[I] [0] //指向启动行我
  p_row_p1 =安培;图像[分钟第（i + 1，NCOLS-1）] [0] //指针开始行i + 1的
  v_m1_m1 = init_vec（p_row_m1 [0]）//从图像填充左向量[I-1] [0]
  v_0_m1 = init_vec（p_row_0 [0]）//从图像填充左向量[I] [0]
  v_p1_m1 = init_vec（p_row_p1 [0]）//从图像填充左向量[I + 1] [0]
  v_m1_0 = load_vec（安培; p_row_m1 [0]）从图像//负荷中心矢量[I-1] [0..15]
  v_0_0 = load_vec（安培; p_row_0 [0]）从图像//负荷中心向量[I] [0..15]
  v_p1_0 = load_vec（安培; p_row_p1 [0]）从图像//负荷中心矢量第[i + 1] [0..15]
  对于j = 0到（NCOLS  -  16）步骤16 //假设每SIMD向量16的元素
    v_m1_p1 = load_vec（安培; p_row_m1 [J + 16]）//从图像加载正确的向量[I-1] [0..15]
    v_0_p1 = load_vec（安培; p_row_0 [J + 16]）//从图像加载正确的向量[I] [0..15]
    v_p1_p1 = load_vec（安培; p_row_p1 [J + 16]）//从图像加载正确的向量[I + 1] [0..15]
    //
    //你现在有载体的一个3x3的排列上
    //您可以执行操作居委会和生成
    当前迭代// 16输出像素：
    //
    // v_m1_m1 v_m1_0 v_m1_p1
    // v_0_m1 v_0_0 v_0_p1
    // v_p1_m1 v_p1_0 v_p1_p1
    //
    // |
    // V
    //
    // V_OUT
    //
    store_vec（V_OUT，＆安培; image_out [I] [J]）//在image_out店输出向量[I] [j..j + 15]
    //洗牌载体，这样我们可以在下一次迭代中使用它们
    v_m1_m1 = v_m1_0
    v_m1_0 = v_m1_p1
    v_0_m1 = v_0_0
    v_0_0 = v_0_p1
    v_p1_m1 = v_p1_0
    v_p1_0 = v_p1_p1
  END_FOR
  //为最后一次迭代，我们需要处理的右边缘的像素...
  v_m1_p1 = init_vec（p_row_m1 [NCOLS-1]）//填补肖像权向量[I-1] [NCOLS-1]
  v_0_p1 = init_vec（p_row_0 [NCOLS-1]）//从图像填满权向量[I] [NCOLS-1]
  v_p1_p1 = init_vec（p_row_p1 [NCOLS-1]）//填补肖像权向量[I + 1] [NCOLS-1]
  //计算V_OUT如上
  store_vec（V_OUT，＆安培; image_out [I] [J]）//在image_out店输出向量[I] [ncols_16..ncols-1]
END_FOR

请注意，这里假设每个向量16像素，也是 NCOLS 16的倍数。

I want to take care the border conditions while handling any filters in image processing .I am extrapolating the border and creating the new boundary.For example I am having 4x3 input :

//Input
int image[4][3] = 
1 2 3 4 
2 4 6 8 
3 6 9 12

//Output
int extensionimage[6][5] =
1 1 2 3 4 4
1 1 2 3 4 4 
2 2 4 6 8 8
3 3 6 9 12 12
3 3 6 9 12 12

My code :

#include <stdio.h> 
#include <string.h> 
#include <stdlib.h> 

void padd_border(int *img,int *extension,int width,int height);

int main(){
    int width = 4,height = 3;
    int *img =  new int[(width) * (height)];
    for(int j = 0;j < height; j++){
        for(int i = 0;i < width; i++){
            img[j*width + i] = (i+1)*(j+1);
            printf("%d\t",img[j*width + i]);
        }
    }
    //Allocate memory for signal extension
    int *extension =  new int[(width + 2) * (height + 2)];

    //Check memory allocation
    if (!extension)
        return 0;

    // init to zero
    memset(extension, 0, sizeof(int)*(width + 2) * (height + 2));

    //Padd the input for border conditions
    padd_border(img,extension,width,height);
    //HERE using "extension" input for dummy functionality 

    delete[] extension;
    delete[] img;

    return 0;
}

void padd_border(int *image,int *extension,int width,int height){

    //   Create image extension
    for (int i = 0; i < height; ++i)
    {
        memcpy(extension + (width + 2) * (i + 1) + 1, image + width * i, width * sizeof(int));
        extension[(width + 2) * (i + 1)] = image[width * i];
        extension[(width + 2) * (i + 2) - 1] = image[width * (i + 1) - 1];
    }

    //   Fill first line of image extension
    memcpy(extension, extension + width + 2, (width + 2) * sizeof(int));
    //   Fill last line of image extension
    memcpy(extension + (width + 2) * (height + 1), extension + (width + 2) * height, (width + 2) * sizeof(int));
}

My questions:

1) I don't want to create "extension" buffer. I want to reuse the image for doing the extrapolation. So is it possible ?

2) How can I use Neon to do so wrt my above code ?

After Changing the code according to PaulR pseudo code ,I am getting some strange results :

Editing My question for run time issues during fixing the border
My Input :

221 220 221 223 230 233 234 235 ..
71  73  70  70  92  130 141 143 ..

I want to this operation to get destination :

 -1*v_m1_m1 + 0*v_m1_0 + 1*v_m1_p1
 -1*v_0_m1  + 0*v_0_0  + 1*v_0_p1       ->V_OUT
 -1*v_p1_m1 + 0*v_p1_0 + 1*v_p1_p1

after changing the code for border I am getting below valuse:

    221 221 221 221    221 220 221 223   230 233 234 235
    221 221 221 221    221 220 221 223   230 233 234 235
    71  71  71  71     71  73  70  70    92  130 141 143

In scalar code if I want to calculate for 221 (@i,j =0,0) ,With border it is looking like this :

 221 221 220
 221 221 220
 71  71  73

But with vectorization in Neon ,I am getting which is wrong

v_m1_m1.0  v_m1_0.1  v_m1_p1.2
v_0_m1.0   v_0_0.1   v_0_p1.2
v_p1_m1.0  v_p1_0.1  v_p1_p1.2


221 221 230 
221 221 230
71  71  92

my pseudo code:

for i = 0 to nrows - 1
        // init row pointers
        p_row_m1 = src + src_width * MAX(i-1, 0);           // pointing to minus1 row
        p_row_0  = src + src_width * i;                     // pointing to current row
        p_row_p1 = src + src_width * MIN(i+1, src_width-1); // pointing to plus1 row

        v_m1_m1 = vdupq_n_u32(p_row_m1[0]);   // fill left vector from src[i-1][0]
        v_0_m1  = vdupq_n_u32(p_row_0[0]);    // fill left vector from src[i][0]
        v_p1_m1 = vdupq_n_u32(p_row_p1[0]);   // fill left vector from src[i+1][0]

        v_m1_0 = vld1q_u32(&p_row_m1[0]);   // load center vector from src[i-1][0..7]
        v_0_0  = vld1q_u32(&p_row_0[0]);    // load center vector from src[i][0..7]
        v_p1_0 = vld1q_u32(&p_row_p1[0]);   // load center vector from src[i+1][0..7]

        for j = 0 to (ncols - 4) step 4         // assuming 4 elements per SIMD vector

            v_m1_p1  = vld1q_u32(&p_row_m1[j+4]);   // load right vector from src[i-1][0..7]
            v_0_p1   = vld1q_u32(&p_row_0[j+4]);    // load right vector from src[i][0..7]
            v_p1_p1  = vld1q_u32(&p_row_p1[j+4]);   // load right vector from src[i+1][0..7]
    //
    // you now have a 3x3 arrangement of vectors on which
    // you can perform a neighbourhood operation and generate
    // 16 output pixels for the current iteration:
    //
    //    v_m1_m1  v_m1_0  v_m1_p1
    //    v_0_m1   v_0_0   v_0_p1
    //    v_p1_m1  v_p1_0  v_p1_p1
    //
    //               |
    //               V
    //
    //              v_out
    vst1q_s32(v_out, &image_out[i][j])      // store output vector at image_out[i][j..j+15]
    // shuffle vectors so that we can use them on next iteration
    v_m1_m1 = v_m1_0
    v_m1_0  = v_m1_p1

    v_0_m1  = v_0_0 
    v_0_0   = v_0_p1

    v_p1_m1 = v_p1_0
    v_p1_0  = v_p1_p1

  end_for
  // for final iteration we need to handle right edge pixels...
  v_m1_p1 = vdupq_n_u32(p_row_m1[ncols-1])     // fill right vector from image[i-1][ncols-1]
  v_0_p1  = vdupq_n_u32(p_row_0[ncols-1])       // fill right vector from image[i][ncols-1]
  v_p1_p1 = vdupq_n_u32(p_row_p1[ncols-1])     // fill right vector from image[i+1][ncols-1]
  // calculate v_out as above
  vst1q_s32(v_out, &image_out[i][j])        // store output vector at image_out[i][ncols_16..ncols-1]
end_for

解决方案

Here is some pseudo code for performing a 3x3 neighbourhood operation using SIMD with replicated edge pixels. Input image is image[nrows][ncols], output image is image_out[nrows][ncols].

for i = 0 to nrows - 1
  // init row pointers
  p_row_m1 = &image[max(i-1, 0)][0]         // pointer to start of row i-1
  p_row_0 = &image[i][0]                    // pointer to start of row i
  p_row_p1 = &image[min(i+1, ncols-1)][0]   // pointer to start of row i+1
  v_m1_m1 = init_vec(p_row_m1[0])           // fill left vector from image[i-1][0]
  v_0_m1 = init_vec(p_row_0[0])             // fill left vector from image[i][0]
  v_p1_m1 = init_vec(p_row_p1[0])           // fill left vector from image[i+1][0]
  v_m1_0 = load_vec(&p_row_m1[0])           // load centre vector from image[i-1][0..15]
  v_0_0 = load_vec(&p_row_0[0])             // load centre vector from image[i][0..15]
  v_p1_0 = load_vec(&p_row_p1[0])           // load centre vector from image[i+1][0..15]
  for j = 0 to (ncols - 16) step 16         // assuming 16 elements per SIMD vector
    v_m1_p1 = load_vec(&p_row_m1[j+16])     // load right vector from image[i-1][0..15]
    v_0_p1 = load_vec(&p_row_0[j+16])       // load right vector from image[i][0..15]
    v_p1_p1 = load_vec(&p_row_p1[j+16])     // load right vector from image[i+1][0..15]
    //
    // you now have a 3x3 arrangement of vectors on which
    // you can perform a neighbourhood operation and generate
    // 16 output pixels for the current iteration:
    //
    //    v_m1_m1  v_m1_0  v_m1_p1
    //    v_0_m1   v_0_0   v_0_p1
    //    v_p1_m1  v_p1_0  v_p1_p1
    //
    //               |
    //               V
    //
    //              v_out
    //
    store_vec(v_out, &image_out[i][j])      // store output vector at image_out[i][j..j+15]
    // shuffle vectors so that we can use them on next iteration
    v_m1_m1 = v_m1_0
    v_m1_0  = v_m1_p1
    v_0_m1  = v_0_0 
    v_0_0   = v_0_p1
    v_p1_m1 = v_p1_0
    v_p1_0  = v_p1_p1
  end_for
  // for final iteration we need to handle right edge pixels...
  v_m1_p1 = init_vec(p_row_m1[ncols-1])     // fill right vector from image[i-1][ncols-1]
  v_0_p1 = init_vec(p_row_0[ncols-1])       // fill right vector from image[i][ncols-1]
  v_p1_p1 = init_vec(p_row_p1[ncols-1])     // fill right vector from image[i+1][ncols-1]
  // calculate v_out as above
  store_vec(v_out, &image_out[i][j])        // store output vector at image_out[i][ncols_16..ncols-1]
end_for

Note that this assumes 16 pixels per vector and also that ncols is a multiple of 16.

这篇关于在图像处理边境检查的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

在图像处理边境检查 [英] Border check in image processing

问题描述

相关文章

其它硬件开发最新文章

热门教程

热门工具

登录关闭

在图像处理边境检查 [英] Border check in image processing

问题描述

相关文章

其它硬件开发最新文章

热门教程

热门工具

登录 关闭

登录关闭