函数作为推理迭代器CUDA的参数 [英] function as argument of thrust iterator CUDA
问题描述
我正在尝试使用CUDA :: Thurst迭代器来实现在GPU上运行的ODE解算器程序,以解决GPU中的一系列方程式,详细信息,这里是一小段代码:
#include< thrust / device_vector.h>
#include< thrust / transform.h>
#include< thrust / sequence.h>
#include< thrust / copy.h>
#include< thrust / fill.h>
#include< thrust / replace.h>
#include< thrust / functional.h>
#include< thrust / for_each.h>
#include< thrust / device_vector.h>
#include< thrust / iterator / zip_iterator.h>
#include< iostream>
#include< math.h>
__host__ __device__ float f(float x,float y)
{
return cos(y)* sin(x);
}
struct euler_functor
{
const float h;
euler_functor(float _h):h(_h){};
__host__ __device__
float operator()(float(* f)(double,double),const float& x,const float& y)const {
y + = h * (* f)(x,y);
x + = h;
}
};
int main(void)
{
//分配三个device_vectors和10个元素
thrust :: device_vector< int> X(10);
// initilaize to random vaues
thrust :: generate(X.begin(),X.end(),rand);
//为X
的每个元素应用euler thrust :: for_each(X.begin(),X.end(),euler_functor(f,0.0,X));
//打印值
for(int i = 0; i <10; i ++)std :: cout < X [i] << std :: endl;
}
但是当我编译
nvcc euler.cu -o euler.x -lm
发生以下错误:
lala.cu(29):错误:显式类型丢失(假设为int)
lala.cu错误:预期a;
lala.cu(33):错误:表达式必须是可修改的左值
lala.cu(34):错误:表达式必须是可修改的左值
lala.cu(35):warning:在非空函数euler_functor :: operator()结尾处缺少返回语句
lala.cu(46):错误:构造函数存在从float(float,float)转换为euler_functor
lala.cu(46):错误:预期a)
似乎不可能按我的方式使用函数的指针。
是前一种方法在可分性和性能之间的一个很好的折衷方案,这是一个很好的方法来实现欧拉过程和使用迭代器运行它。
到底希望对我来说,理想的解决方案是能够定义一个指向下列函数的指针数组:
typedef int(* foo_ptr_t)(int);
foo_ptr_t foo_ptr_array [2];
int f1(int);
int f2(int);
foo_ptr_array [0] = f1;
foo_ptr_array [1] = f2;
foo_ptr_array [0](1);
将foo_ptr_array作为参数传递给euler函子。是否有可能?
感谢回答。
可行的改进:
可以定义一个组合耦合微分方程作为fucntors超过元组,我尝试以下方法?
这将是
解决方案最后,你要求在主机代码中使用一个
__ device __
函数参数,然后将其作为(函数)指针传递, (在引擎盖下)由推力产生的内核参数。
这是非法获取主机代码中
__ device __
函数参数的地址,因此传递一个__ device __
函数指针作为参数,这种方式将不工作。
通过创建额外的
__ device __
变量(指针)来存储函数指针设备。然后使用cudaGetSymbolAddress
构建一个函数指针指针表。这将需要运行前体内核以在设备上设置函数指针。看起来很乱。
根据参数参数化函子来选择设备函数可能更简单。 Loke this:
#include< thrust / device_vector.h>
#include< thrust / transform.h>
#include< thrust / sequence.h>
#include< thrust / copy.h>
#include< thrust / fill.h>
#include< thrust / replace.h>
#include< thrust / functional.h>
#include< thrust / for_each.h>
#include< thrust / iterator / zip_iterator.h>
#include< iostream>
#include< math.h>
__host__ __device__ float f1(float x)
{
return sinf(x);
}
__host__ __device__ float f2(float x)
{
return cosf(x);
}
struct euler_functor
{
unsigned h;
euler_functor(unsigned _h):h(_h){};
__host__ __device__
void operator()(float& y)const {
if(h == 1)y = f1(y);
else if(h == 2)y = f2(y);
}
};
int main(void)
{
const unsigned N = 8;
//用10个元素分配三个device_vectors
thrust :: device_vector< float> X(N);
// initilaize to random vaues
thrust :: sequence(X.begin(),X.end(),0.0f,(float)(6.283 /(float)N)
//为X
的每个元素应用euler thrust :: for_each(X.begin(),X.end(),euler_functor(1));
//打印值
for(int i = 0; i< N; i ++)std :: cout< X [i] << std :: endl;
std :: cout<< ******************< std :: endl;
thrust :: sequence(X.begin(),X.end(),0.0f,(float)(6.283 /(float)N)
//为X
的每个元素应用euler thrust :: for_each(X.begin(),X.end(),euler_functor(2));
//打印值
for(int i = 0; i< N; i ++)std :: cout< X [i] << std :: endl;
}
I am trying to implement ODEs solver routines running on GPUs using CUDA::Thurst iterators to solve a bunch of equations in the GPU, going to the details, here is a small piece of code:
#include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/sequence.h> #include <thrust/copy.h> #include <thrust/fill.h> #include <thrust/replace.h> #include <thrust/functional.h> #include <thrust/for_each.h> #include <thrust/device_vector.h> #include <thrust/iterator/zip_iterator.h> #include <iostream> #include <math.h> __host__ __device__ float f(float x, float y) { return cos(y)*sin(x); } struct euler_functor { const float h; euler_functor(float _h) : h(_h) {}; __host__ __device__ float operator()( float(*f)(double,double),const float& x, const float& y) const { y += h * (*f)( x, y ); x += h; } }; int main(void) { // allocate three device_vectors with 10 elements thrust::device_vector<int> X(10); // initilaize to random vaues thrust::generate(X.begin(), X.end(), rand); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(f,0.0,X)); // print the values for(int i = 0; i < 10; i++) std::cout<< X[i]<< std::endl; }
But when I compile
nvcc euler.cu -o euler.x -lm the following errors occurs:
lala.cu(29): error: explicit type is missing ("int" assumed) lala.cu(29): error: expected a ";" lala.cu(33): error: expression must be a modifiable lvalue lala.cu(34): error: expression must be a modifiable lvalue lala.cu(35): warning: missing return statement at end of non-void function "euler_functor::operator()" lala.cu(46): error: no suitable constructor exists to convert from "float (float, float)" to "euler_functor" lala.cu(46): error: expected a ")"
it seems like it is not possible use pointers to functions in the way I am trying?
sugestions for better ways to implement the Euler procedure and run it using iterators will be very appreciated.
is the former approach a good compromise between partability and performance?
At the end hopefully the ideal solution for me is be able to define an array of pointer to functions like:
typedef int (*foo_ptr_t)( int ); foo_ptr_t foo_ptr_array[2]; int f1( int ); int f2( int ); foo_ptr_array[0] = f1; foo_ptr_array[1] = f2; foo_ptr_array[0]( 1 );
To pass foo_ptr_array as argument to the euler functor. Is it possible?
Thanks for Answer.
Posible improvement:
Is posible define the a set coupled differential equations as fucntors over tuples as I try in following approach? Can I get some error information from the numerical approach to the soution?
It would be
解决方案Ultimately, you are asking to take a
__device__
function argument in host code, and then pass it as a (function) pointer, in what is ultimately (under the hood) a kernel argument, generated by thrust.It is illegal to take the address of a
__device__
function argument in host code, so passing a__device__
function pointer as an argument this way won't work.It might be possible to work around this by creating additional
__device__
variables (pointers) to store function pointers on the device. Then usecudaGetSymbolAddress
to build a table of pointers-to-pointers to functions. This would necessitate running a precursor kernel to set up the function pointers on the device. It seems rather messy.It might be simpler to parameterize the functor to select a device function based on the parameter. Lke this:
#include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/sequence.h> #include <thrust/copy.h> #include <thrust/fill.h> #include <thrust/replace.h> #include <thrust/functional.h> #include <thrust/for_each.h> #include <thrust/iterator/zip_iterator.h> #include <iostream> #include <math.h> __host__ __device__ float f1(float x) { return sinf(x); } __host__ __device__ float f2(float x) { return cosf(x); } struct euler_functor { unsigned h; euler_functor(unsigned _h) : h(_h) {}; __host__ __device__ void operator()(float &y) const { if (h == 1) y = f1(y); else if (h == 2) y = f2(y); } }; int main(void) { const unsigned N = 8; // allocate three device_vectors with 10 elements thrust::device_vector<float> X(N); // initilaize to random vaues thrust::sequence(X.begin(), X.end(), 0.0f, (float)(6.283/(float)N)); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(1)); // print the values for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl; std::cout << "******************" << std::endl; thrust::sequence(X.begin(), X.end(), 0.0f, (float)(6.283/(float)N)); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(2)); // print the values for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl; }
这篇关于函数作为推理迭代器CUDA的参数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!