设备功能指针 [英] device function pointers
本文介绍了设备功能指针的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我需要以下设备版本主机代码:
I need a device version of the following host code:
double (**func)(double x);
double func1(double x)
{
return x+1.;
}
double func2(double x)
{
return x+2.;
}
double func3(double x)
{
return x+3.;
}
void test(void)
{
double x;
for(int i=0;i<3;++i){
x=func[i](2.0);
printf("%g
",x);
}
}
int main(void)
{
func=(double (**)(double))malloc(10*sizeof(double (*)(double)));
test();
return 0;
}
其中 func1、func2、func3必须是 __device__ 功能和测试"必须是(适当修改的)__global__ 内核.
where func1, func2, func3 have to be __device__ functions and "test" has to be a (suitably modified) __global__ kernel.
我有一个 NVIDIA GeForce GTS 450(计算能力 2.1)先感谢您米歇尔
I have a NVIDIA GeForce GTS 450 (compute capability 2.1) Thank you in advance Michele
=========================================================
========================================================
一个可行的解决方案
#define REAL double
typedef REAL (*func)(REAL x);
__host__ __device__ REAL func1(REAL x)
{
return x+1.0f;
}
__host__ __device__ REAL func2(REAL x)
{
return x+2.0f;
}
__host__ __device__ REAL func3(REAL x)
{
return x+3.0f;
}
__device__ func func_list_d[3];
func func_list_h[3];
__global__ void assign_kernel(void)
{
func_list_d[0]=func1;
func_list_d[1]=func2;
func_list_d[2]=func3;
}
void assign(void)
{
func_list_h[0]=func1;
func_list_h[1]=func2;
func_list_h[2]=func3;
}
__global__ void test_kernel(void)
{
REAL x;
for(int i=0;i<3;++i){
x=func_list_d[i](2.0);
printf("%g
",x);
}
}
void test(void)
{
REAL x;
printf("=============
");
for(int i=0;i<3;++i){
x=func_list_h[i](2.0);
printf("%g
",x);
}
}
int main(void)
{
assign_kernel<<<1,1>>>();
test_kernel<<<1,1>>>();
cudaThreadSynchronize();
assign();
test();
return 0;
}
推荐答案
Fermi 上允许使用函数指针.你可以这样做:
function pointers are allowed on Fermi. This is how you could do it:
typedef double (*func)(double x);
__device__ double func1(double x)
{
return x+1.0f;
}
__device__ double func2(double x)
{
return x+2.0f;
}
__device__ double func3(double x)
{
return x+3.0f;
}
__device__ func pfunc1 = func1;
__device__ func pfunc2 = func2;
__device__ func pfunc3 = func3;
__global__ void test_kernel(func* f, int n)
{
double x;
for(int i=0;i<n;++i){
x=f[i](2.0);
printf("%g
",x);
}
}
int main(void)
{
int N = 5;
func* h_f;
func* d_f;
h_f = (func*)malloc(N*sizeof(func));
cudaMalloc((void**)&d_f,N*sizeof(func));
cudaMemcpyFromSymbol( &h_f[0], pfunc1, sizeof(func));
cudaMemcpyFromSymbol( &h_f[1], pfunc1, sizeof(func));
cudaMemcpyFromSymbol( &h_f[2], pfunc2, sizeof(func));
cudaMemcpyFromSymbol( &h_f[3], pfunc3, sizeof(func));
cudaMemcpyFromSymbol( &h_f[4], pfunc3, sizeof(func));
cudaMemcpy(d_f,h_f,N*sizeof(func),cudaMemcpyHostToDevice);
test_kernel<<<1,1>>>(d_f,N);
cudaFree(d_f);
free(h_f);
return 0;
}
这篇关于设备功能指针的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文