使用cudaMemcpy将结构数据从主机复制到CUDA上的设备 [英] Copying struct data from host to device on CUDA using cudaMemcpy
问题描述
在CUDA架构中,我在将结构数据从主机复制到设备时遇到问题。
以下是代码段。
struct point
{
double x,y;
};
int main()
{
point * a =(point *)malloc(sizeof(point));
a-> x = 10.0;
a-> y = 10.0;
point * d_a;
cudaMalloc((void **)d_a,sizeof(point));
cudaMemcpy((void **)d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);
MyFunc<<< dimgid,dimblock>>>>(d_a);
cudaMemcpy((void **)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf(%lf%lf \\\
,a-> x,a-> y);
}
__global__ void MyFunc(point * d_a)
{
if(threadIdx.x == 0&& threadIdx.y == 0)
{
d_a-> x = 100.0;
d_a-> y = 100.0;
}
}
点a的x和y字段更改为100.相反,它仍然是10初始化。这里发生了什么?请帮助。
cudaMemcpy()调用的语法不正确,应为
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
和
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
编辑:
p>
#include< cstdio>
#include< cstdlib>
struct point
{
double x,y;
};
__global__ void MyFunc(point * d_a)
{
if(threadIdx.x == 0&& threadIdx.y == 0)
{
d_a-> x = 100.0;
d_a-> y = 100.0;
}
}
int main(void)
{
point * a =(point *)malloc(sizeof
a-> x = 10.0;
a-> y = 10.0;
point * d_a;
cudaMalloc((void **)& d_a,sizeof(point));
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);
MyFunc<<< dimgid,dimblock>>>>(d_a);
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf(%lf%lf \\\
,a-> x,a-> y);
return cudaThreadExit();
}
与64位linux上运行的CUDA 3.2完全一样:
cuda:〜$ nvcc -arch = sm_20 -o bungle bungle.cu
cuda:〜$ ./bungle
100.000000 100.000000
$ b $ p
所以如果你不能复制这个,那么你的CUDA安装可能有问题。 / p>
I am facing a problem in copying struct data from host to device in the CUDA architecture.
Following is the code snippet.
struct point
{
double x,y;
};
int main()
{
point * a = (point*)malloc(sizeof(point));
a->x=10.0;
a->y=10.0;
point * d_a;
cudaMalloc((void**)d_a,sizeof(point));
cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);
MyFunc<<<dimgrid,dimblock>>>(d_a);
cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf("%lf %lf\n",a->x,a->y);
}
__global__ void MyFunc(point* d_a)
{
if(threadIdx.x == 0 && threadIdx.y == 0)
{
d_a->x=100.0;
d_a->y = 100.0;
}
}
The x and y fields of point a should have been changed to 100. Instead, it is still 10 as initialized. What is happening here? Please help.
The syntax of both cudaMemcpy() calls is incorrect, they should be
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
and
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
EDIT:
This:
#include <cstdio>
#include <cstdlib>
struct point
{
double x,y;
};
__global__ void MyFunc(point* d_a)
{
if(threadIdx.x == 0 && threadIdx.y == 0)
{
d_a->x=100.0;
d_a->y = 100.0;
}
}
int main(void)
{
point * a = (point*)malloc(sizeof(point));
a->x=10.0;
a->y=10.0;
point * d_a;
cudaMalloc((void**)&d_a,sizeof(point));
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);
MyFunc<<<dimgrid,dimblock>>>(d_a);
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf("%lf %lf\n",a->x,a->y);
return cudaThreadExit();
}
works precisely as expected with CUDA 3.2 running on 64 bit linux:
cuda:~$ nvcc -arch=sm_20 -o bungle bungle.cu
cuda:~$ ./bungle
100.000000 100.000000
So if you cannot replicate this, then something is probably wrong with your CUDA installation.
这篇关于使用cudaMemcpy将结构数据从主机复制到CUDA上的设备的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!