使用cudaMemcpy将结构数据从主机复制到CUDA上的设备 [英] Copying struct data from host to device on CUDA using cudaMemcpy

查看:628
本文介绍了使用cudaMemcpy将结构数据从主机复制到CUDA上的设备的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

在CUDA架构中,我在将结构数据从主机复制到设备时遇到问题。

以下是代码段。

  struct point 
{
double x,y;
};

int main()
{
point * a =(point *)malloc(sizeof(point));
a-> x = 10.0;
a-> y = 10.0;
point * d_a;
cudaMalloc((void **)d_a,sizeof(point));
cudaMemcpy((void **)d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);

MyFunc<<< dimgid,dimblock>>>>(d_a);
cudaMemcpy((void **)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf(%lf%lf \\\
,a-> x,a-> y);
}

__global__ void MyFunc(point * d_a)
{
if(threadIdx.x == 0&& threadIdx.y == 0)
{
d_a-> x = 100.0;
d_a-> y = 100.0;
}
}

点a的x和y字段更改为100.相反,它仍然是10初始化。这里发生了什么?请帮助。

解决方案

cudaMemcpy()调用的语法不正确,应为

  cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice); 

  cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost); 

编辑:



p>

  #include< cstdio> 
#include< cstdlib>

struct point
{
double x,y;
};

__global__ void MyFunc(point * d_a)
{
if(threadIdx.x == 0&& threadIdx.y == 0)
{
d_a-> x = 100.0;
d_a-> y = 100.0;
}
}

int main(void)
{
point * a =(point *)malloc(sizeof
a-> x = 10.0;
a-> y = 10.0;
point * d_a;
cudaMalloc((void **)& d_a,sizeof(point));
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
dim3 dimblock(16,16);
dim3 dimgrid(1,1);

MyFunc<<< dimgid,dimblock>>>>(d_a);
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);
printf(%lf%lf \\\
,a-> x,a-> y);

return cudaThreadExit();
}

与64位linux上运行的CUDA 3.2完全一样:

  cuda:〜$ nvcc -arch = sm_20 -o bungle bungle.cu 
cuda:〜$ ./bungle
100.000000 100.000000


$ b $ p

所以如果你不能复制这个,那么你的CUDA安装可能有问题。 / p>

I am facing a problem in copying struct data from host to device in the CUDA architecture.
Following is the code snippet.

struct point  
{  
     double x,y;  
};

int main()  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)d_a,sizeof(point));  
   cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  

   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);
}  

__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  

The x and y fields of point a should have been changed to 100. Instead, it is still 10 as initialized. What is happening here? Please help.

解决方案

The syntax of both cudaMemcpy() calls is incorrect, they should be

cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);

and

cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    

EDIT:

This:

#include <cstdio>
#include <cstdlib>

struct point  
{  
     double x,y;  
};

__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  

int main(void)  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)&d_a,sizeof(point));  
   cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  

   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);

   return cudaThreadExit();
} 

works precisely as expected with CUDA 3.2 running on 64 bit linux:

cuda:~$ nvcc -arch=sm_20 -o bungle bungle.cu 
cuda:~$ ./bungle 
100.000000 100.000000

So if you cannot replicate this, then something is probably wrong with your CUDA installation.

这篇关于使用cudaMemcpy将结构数据从主机复制到CUDA上的设备的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆