错误的结果cufft 3D就地 [英] Wrong results cufft 3D in-place

查看：597 发布时间：2017/3/5 19:26:49 cuda cufft

本文介绍了错误的结果cufft 3D就地的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我写，因为我面对的问题与袖口3D转换就地，而我没有问题的外部版本。我试图关注Robert Crovella的回答这里，但是我没有获得正确的结果，当我做FFT + IFT。
这是我的代码：

  #include< stdio.h> 
 #include< stdlib.h> 
 #include< cuda_runtime.h> 
 #include< complex.h> 
 #include< cuComplex.h> 
 #include< cufft.h> 
 
 //主函数
 int main（int argc，char ** argv）{
 int N = 4; 
 double * in = NULL，* d_in = NULL; 
 cuDoubleComplex * out = NULL，* d_out = NULL; 
 cufftHandle plan_r2c，plan_c2r; 
 
 unsigned int out_mem_size = sizeof（cuDoubleComplex）* N * N *（N / 2 + 1）; 
 unsigned int in_mem_size = out_mem_size; 
 
 in =（double *）malloc（in_mem_size）; 
 out =（cuDoubleComplex *）in; 
 
 cudaMalloc（（void **）& d_in，in_mem_size）; 
 d_out =（cuDoubleComplex *）d_in; 
 
 cufftPlan3d（& plan_r2c，N，N，N，CUFFT_D2Z）; 
 cufftPlan3d（& plan_c2r，N，N，N，CUFFT_Z2D）; 
 
 memset（in，0，in_mem_size）; 
 unsigned int idx; 
 for（int z = 0; z  for（int y = 0; y  for（int x = 0; x < N; x ++）{
 idx = z + N *（y + x * N）; 
 in [idx] = idx; 
} 
} 
} 
 printf（\\\
Start：\\\
）; 
 for（int z = 0; z< N; z ++）{
 printf（plane =％d -------------------- -------- \\\
，z）; 
 for（int x = 0; x  for（int y = 0; y  idx = z + N * + x * N）; 
 printf（％。3f \t，in [idx]）; 
} 
 printf（\\\
）; 
} 
} 
 cudaMemcpy（d_in，in，in_mem_size，cudaMemcpyHostToDevice）; 
 
 cufftExecD2Z（plan_r2c，（cufftDoubleReal *）d_in，（cufftDoubleComplex *）d_out）; 
 cufftExecZ2D（plan_c2r，（cufftDoubleComplex *）d_out，（cufftDoubleReal *）d_in）; 
 
 memset（in，0，in_mem_size）; 
 CU_ERR_CHECK（cudaMemcpy（in，d_in，in_mem_size，cudaMemcpyDeviceToHost））; 
 
 printf（\\\
After FFT + IFT：\\\
）; 
 for（int z = 0; z< N; z ++）{
 printf（plane =％d -------------------- -------- \\\
，z）; 
 for（int x = 0; x  for（int y = 0; y  idx = z + N * + x * N）; 
 //规范化
 in [idx] / =（N * N * N）; 
 printf（％。3f \t，in [idx]）; 
} 
 printf（\\\
）; 
} 
} 
 
 return 0; 
}

程序输出以下数据：

启动文件

plane = 0 ------------------------ ----

0.000 4.000 8.000 12.000

16.000 20.000 24.000 28.000

32.000 36.000 40.000 44.000

48.000 52.000 56.000 60.000

plane = 1 -------------------------- -

1.000 5.000 9.000 13.000

17.000 21.000 25.000 29.000

33.000 37.000 41.000 45.000

49.000 53.000 57.000 61.000

plane = 2 ----------------------------

2.000 6.000 10.000 14.000

18.000 22.000 26.000 30.000

34.000 38.000 42.000 46.000

50.000 54.000 58.000 62.000

plane = 3 ---------------------------- p>

3.000 7.000 11.000 15.000

19.000 23.000 27.000 31.000

35.000 39.000 43.000 47.000

51.000 55.000 59.000 63.000

FFT + IFT后

plane = 0 ------------- ---------------

-0.000 -0.344 8.000 12.000

-0.031 20.000 24.000 -0.031 < br>
32.000 36.000 0.031 44.000

48.000 -0.094 56.000 60.000

plane = 1 ---------- ------------------

1.000 -0.000 9.000 13.000

-0.000 21.000 25.000 0.125

33.000 37.000 0.000 45.000

49.000 0.000 57.000 61.000

plane = 2 ---------- ------------------

2.000 6.000 -0.000 14.000

18.000 0.000 26.000 30.000 < br>
0.000 38.000 42.000 -0.000

50.000 54.000 -0.000 62.000

plane = 3 --------- -------------------

3.000 7.000 0.031 15.000

19.000 -0.031 27.000 31.000

-0.031 39.000 43.000 0.031

51.000 55.000 0.031 63.000

我甚至尝试以这种方式填充数据：

  //带有填充
 unsigned int idx; 
 for（int x = 0; x  for（int y = 0; y  for（int z = 0; z < 2 *（N / 2 + 1）; z ++）{
 idx = z + N *（y + x * N） 
 if（z <4）in [idx] = idx; 
 else in [idx] = 0; 
} 
} 
}

我做错了什么？

解决方案

正如您已经发现的，如果使用 CUFFT_COMPATIBILITY_FFTW_PADDING 默认的兼容模式。要使代码生效，您可以使用 cufftSetCompatibilityMode（）设置 CUFFT_COMPATIBILITY_NATIVE 。但是，此模式在当前版本的CUDA中标记为已弃用。

因此，我建议使用默认兼容模式并使用填充。您尝试实现填充是错误的。计算3维x，y，z的线性指数的公式其中z是最快运行指数， idx = z + Nz *（y + Ny * x）。包括填充的 z 维的大小 Nz 是 Nz =（N / 2 + 1 ）* 2 。然后，数组的正确初始化是：

  unsigned int idx; 
 for（int z = 0; z  for（int y = 0; y  for（int x = 0; x < N; x ++）{
 idx = z +（N / 2 + 1）* 2 *（y + x * N） 
 in [idx] = idx; 
} 
} 
}

I write because I'm facing problems with the cufft 3D transform in-place, while I have no problems for the out-of-place version. I tried to follow Robert Crovella's answer here but I'm not obtaining the correct results when I make a FFT+IFT. This is my code:

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <complex.h>
#include <cuComplex.h>
#include <cufft.h>

// Main function
int main(int argc, char **argv){
    int N = 4; 
    double *in = NULL, *d_in = NULL;
    cuDoubleComplex *out = NULL, *d_out = NULL;
    cufftHandle plan_r2c, plan_c2r;

    unsigned int out_mem_size = sizeof(cuDoubleComplex) * N*N*(N/2 + 1);
    unsigned int in_mem_size = out_mem_size;

    in  = (double *) malloc (in_mem_size);
    out  = (cuDoubleComplex *)in;

    cudaMalloc((void **)&d_in, in_mem_size);
    d_out = (cuDoubleComplex *)d_in;

    cufftPlan3d(&plan_r2c, N, N, N, CUFFT_D2Z);
    cufftPlan3d(&plan_c2r, N, N, N, CUFFT_Z2D);

    memset(in, 0, in_mem_size);
    unsigned int idx;
    for (int z = 0; z < N; z++){
        for (int y = 0; y < N; y++){
            for (int x = 0; x < N; x++){
                idx = z + N * ( y + x * N);
                in[idx] = idx;
            }
        }
    }
    printf("\nStart: \n");
    for (int z = 0; z < N; z++){
        printf("plane = %d ----------------------------\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + N * ( y + x * N);
                printf("%.3f \t", in[idx]);
            }
            printf("\n");
        }
    }
    cudaMemcpy(d_in, in, in_mem_size, cudaMemcpyHostToDevice);

    cufftExecD2Z(plan_r2c, (cufftDoubleReal *)d_in, (cufftDoubleComplex *)d_out);
    cufftExecZ2D(plan_c2r, (cufftDoubleComplex *)d_out, (cufftDoubleReal *)d_in);

    memset(in, 0, in_mem_size);
    CU_ERR_CHECK( cudaMemcpy(in, d_in, in_mem_size, cudaMemcpyDeviceToHost) );

    printf("\nAfter FFT+IFT: \n");
    for (int z = 0; z < N; z++){
        printf("plane = %d ----------------------------\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + N * ( y + x * N);
                // Normalisation
                in[idx] /= (N*N*N);
                printf("%.3f \t", in[idx]);
            }
            printf("\n");
        }
    }

    return 0;
}

The program outputs the following data:

Starting file

plane = 0 ----------------------------

0.000 4.000 8.000 12.000
16.000 20.000 24.000 28.000
32.000 36.000 40.000 44.000
48.000 52.000 56.000 60.000

plane = 1 ----------------------------

1.000 5.000 9.000 13.000
17.000 21.000 25.000 29.000
33.000 37.000 41.000 45.000
49.000 53.000 57.000 61.000

plane = 2 ----------------------------

2.000 6.000 10.000 14.000
18.000 22.000 26.000 30.000
34.000 38.000 42.000 46.000
50.000 54.000 58.000 62.000

plane = 3 ----------------------------

3.000 7.000 11.000 15.000
19.000 23.000 27.000 31.000
35.000 39.000 43.000 47.000
51.000 55.000 59.000 63.000

After FFT+IFT

plane = 0 ----------------------------

-0.000 -0.344 8.000 12.000
-0.031 20.000 24.000 -0.031
32.000 36.000 0.031 44.000
48.000 -0.094 56.000 60.000

plane = 1 ----------------------------

1.000 -0.000 9.000 13.000
-0.000 21.000 25.000 0.125
33.000 37.000 0.000 45.000
49.000 0.000 57.000 61.000

plane = 2 ----------------------------

2.000 6.000 -0.000 14.000
18.000 0.000 26.000 30.000
0.000 38.000 42.000 -0.000
50.000 54.000 -0.000 62.000

plane = 3 ----------------------------

3.000 7.000 0.031 15.000
19.000 -0.031 27.000 31.000
-0.031 39.000 43.000 0.031
51.000 55.000 0.031 63.000

I even tried to pad the data this way:

// With padding
    unsigned int idx;
    for (int x = 0; x < N; x++){
        for (int y = 0; y < N; y++){
            for (int z = 0; z < 2*(N/2+1); z++){
                idx = z + N * ( y + x * N);
                if (z < 4) in[idx] = idx;
                else in[idx] = 0;
            }
        }
    }

What am I doing wrong?

解决方案

As you already found out, you need padding if you use the CUFFT_COMPATIBILITY_FFTW_PADDINGcompatibility mode which is default. For your code to work you could use cufftSetCompatibilityMode() to set CUFFT_COMPATIBILITY_NATIVE. However, this mode is marked as deprecated in the current version of CUDA.

Therefore, I recommend to use the default compatibility mode and use padding. Your try to implement padding is wrong. The formula to calculate a linear index for 3 dimension x, y, z where z is the fastest running index is idx = z + Nz*(y + Ny*x). The size Nz of the z dimension including padding is Nz = (N/2+1)*2. Then, the correct initialization of the array is:

unsigned int idx;
for (int z = 0; z < N; z++){
    for (int y = 0; y < N; y++){
        for (int x = 0; x < N; x++){
            idx = z + (N/2+1)*2 * ( y + x * N);
            in[idx] = idx;
        }
    }
}

Accordingly for the print loops.

这篇关于错误的结果cufft 3D就地的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

错误的结果cufft 3D就地 [英] Wrong results cufft 3D in-place

问题描述

相关文章

其它硬件开发最新文章

热门教程

热门工具

登录关闭

错误的结果cufft 3D就地 [英] Wrong results cufft 3D in-place

问题描述

相关文章

其它硬件开发最新文章

热门教程

热门工具

登录 关闭

登录关闭