功能推力迭代CUDA的说法 [英] function as argument of thrust iterator CUDA
问题描述
我想实现使用CUDA :: Thurst迭代器,解决了一堆在GPU方程的GPU上运行微分方程求解程序,要到细节,这里是一小块code的:
的#include<推力/ device_vector.h>
#包括LT&;推力/ transform.h>
#包括LT&;推力/ sequence.h>
#包括LT&;推力/ copy.h>
#包括LT&;推力/ fill.h>
#包括LT&;推力/ replace.h>
#包括LT&;推力/ functional.h> #包括LT&;推力/ for_each.h>
#包括LT&;推力/ device_vector.h>
#包括LT&;推力/迭代器/ zip_iterator.h>
#包括LT&;&iostream的GT;
#包括LT&;&math.h中GT;
__host__ __device__浮动F(浮法X,浮法Y)
{
返回COS(Y)* SIN(X);
} 结构euler_functor
{
常量浮动H; euler_functor(浮点_H):H(_H){}; __host__ __device__
浮动运算符()(浮点(* F)(双,双),常量浮动&放大器; X,常量浮动&放大器; Y){常量
Y + = H *(* F)(X,Y);
X + = H;
}
};
INT主要(无效)
{
//分配3 device_vectors有10个元素
推力:: device_vector<&诠释GT; X(10);
// initilaize随机vaues
推力::生成(X.begin(),X.end(),RAND);
//申请欧拉对于x的每个元素
推力:: for_each的(X.begin(),X.end(),euler_functor(F,0.0,X));
//打印值
的for(int i = 0;我小于10;我++)的std ::法院LT&;< X [1] - ;&下;的std :: ENDL; }
但是,当我编译
NVCC euler.cu -o euler.x -lm
出现以下错误:
块引用>lala.cu(29):错误:显式类型缺失(INT假设)lala.cu(29):错误:预期;lala.cu(33):错误:EX pression必须修改的左值lala.cu(34):错误:EX pression必须修改的左值lala.cu(35):警告:在非void函数的末尾缺少return语句euler_functor ::运算符()lala.cu(46):错误:没有合适的构造存在,从浮动(浮动,浮动)到euler_functor转换lala.cu(46):错误:预期)
现在看来似乎是在路上无法使用的函数指针我想?
有关实施欧拉程序,使用迭代器将是非常美联社preciated运行它更好的方式sugestions。
是前一种方法partability和性能之间的良好折衷?
在年底有望对我来说,理想的解决方案是能够定义指针数组功能,如:
的typedef INT(* foo_ptr_t)(INT);
foo_ptr_t foo_ptr_array [2];INT F1(INT);
INT F2(INT);
foo_ptr_array [0] = F1;
foo_ptr_array [1] = f2的;
foo_ptr_array [0](1);要通过foo_ptr_array作为参数传递给欧拉仿函数。这可能吗?
感谢您的回答。
更多钞票的改进:
更多钞票是在定义的元组一组耦合微分方程的fucntors我尝试以下的方法吗?我可以从数值的方法来soution得到一些错误信息?
这将是
解决方案最后,你所要求采取
__设备__
函数参数在主机code,然后将它作为(功能)的指针,在什么是最终一个内核参数,以产生推力(引擎盖下)。有非法以取的地址的
__设备__
函数参数在主机code,所以传递一个__设备__
函数指针作为参数这种方法是行不通的。这可能是通过创建额外的
__ __设备
变量(指针)存储设备上的函数指针来解决这个问题。然后使用cudaGetSymbolAddress
来建立指针到指针的表功能。这将需要运行precursor内核建立函数指针设备。这似乎相当混乱。有可能是更简单的参数化函数子来选择基于所述参数的装置的功能。艾克这样的:
的#include<推力/ device_vector.h>
#包括LT&;推力/ transform.h>
#包括LT&;推力/ sequence.h>
#包括LT&;推力/ copy.h>
#包括LT&;推力/ fill.h>
#包括LT&;推力/ replace.h>
#包括LT&;推力/ functional.h>
#包括LT&;推力/ for_each.h>
#包括LT&;推力/迭代器/ zip_iterator.h> #包括LT&;&iostream的GT;
#包括LT&;&math.h中GT;
__host__ __device__浮动F1(浮X)
{
返回SINF(X);
} __host__ __device__浮动F2(浮X)
{
返回cosf(x)的;
}
结构euler_functor
{
无符号H; euler_functor(无符号_H):H(_H){}; __host__ __device__
void运算符()(浮动&安培; Y){常量
如果(H == 1)Y = F1(Y);
否则,如果(H == 2)Y = F2(Y);
}
};
INT主要(无效)
{
const的无符号的N = 8;
//分配3 device_vectors有10个元素
推力:: device_vector<浮动> X(N);
// initilaize随机vaues
推力::序列(X.begin(),X.end(),0.0(浮点)(6.283 /(浮点)N));
//申请欧拉对于x的每个元素
推力::的for_each(X.begin(),X.end(),euler_functor(1));
//打印值
的for(int i = 0; I< N;我++)的std ::法院LT&;< X [1] - ;&下;的std :: ENDL; 性病::法院LT&;< ******************<<的std :: ENDL; 推力::序列(X.begin(),X.end(),0.0(浮点)(6.283 /(浮点)N));
//申请欧拉对于x的每个元素
推力::的for_each(X.begin(),X.end(),euler_functor(2));
//打印值
的for(int i = 0; I< N;我++)的std ::法院LT&;< X [1] - ;&下;的std :: ENDL; }I am trying to implement ODEs solver routines running on GPUs using CUDA::Thurst iterators to solve a bunch of equations in the GPU, going to the details, here is a small piece of code:
#include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/sequence.h> #include <thrust/copy.h> #include <thrust/fill.h> #include <thrust/replace.h> #include <thrust/functional.h> #include <thrust/for_each.h> #include <thrust/device_vector.h> #include <thrust/iterator/zip_iterator.h> #include <iostream> #include <math.h> __host__ __device__ float f(float x, float y) { return cos(y)*sin(x); } struct euler_functor { const float h; euler_functor(float _h) : h(_h) {}; __host__ __device__ float operator()( float(*f)(double,double),const float& x, const float& y) const { y += h * (*f)( x, y ); x += h; } }; int main(void) { // allocate three device_vectors with 10 elements thrust::device_vector<int> X(10); // initilaize to random vaues thrust::generate(X.begin(), X.end(), rand); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(f,0.0,X)); // print the values for(int i = 0; i < 10; i++) std::cout<< X[i]<< std::endl; }
But when I compile
nvcc euler.cu -o euler.x -lm the following errors occurs:
lala.cu(29): error: explicit type is missing ("int" assumed) lala.cu(29): error: expected a ";" lala.cu(33): error: expression must be a modifiable lvalue lala.cu(34): error: expression must be a modifiable lvalue lala.cu(35): warning: missing return statement at end of non-void function "euler_functor::operator()" lala.cu(46): error: no suitable constructor exists to convert from "float (float, float)" to "euler_functor" lala.cu(46): error: expected a ")"
it seems like it is not possible use pointers to functions in the way I am trying?
sugestions for better ways to implement the Euler procedure and run it using iterators will be very appreciated.
is the former approach a good compromise between partability and performance?
At the end hopefully the ideal solution for me is be able to define an array of pointer to functions like:
typedef int (*foo_ptr_t)( int ); foo_ptr_t foo_ptr_array[2]; int f1( int ); int f2( int ); foo_ptr_array[0] = f1; foo_ptr_array[1] = f2; foo_ptr_array[0]( 1 );
To pass foo_ptr_array as argument to the euler functor. Is it possible?
Thanks for Answer.
Posible improvement:
Is posible define the a set coupled differential equations as fucntors over tuples as I try in following approach? Can I get some error information from the numerical approach to the soution?
It would be
解决方案Ultimately, you are asking to take a
__device__
function argument in host code, and then pass it as a (function) pointer, in what is ultimately (under the hood) a kernel argument, generated by thrust.It is illegal to take the address of a
__device__
function argument in host code, so passing a__device__
function pointer as an argument this way won't work.It might be possible to work around this by creating additional
__device__
variables (pointers) to store function pointers on the device. Then usecudaGetSymbolAddress
to build a table of pointers-to-pointers to functions. This would necessitate running a precursor kernel to set up the function pointers on the device. It seems rather messy.It might be simpler to parameterize the functor to select a device function based on the parameter. Lke this:
#include <thrust/device_vector.h> #include <thrust/transform.h> #include <thrust/sequence.h> #include <thrust/copy.h> #include <thrust/fill.h> #include <thrust/replace.h> #include <thrust/functional.h> #include <thrust/for_each.h> #include <thrust/iterator/zip_iterator.h> #include <iostream> #include <math.h> __host__ __device__ float f1(float x) { return sinf(x); } __host__ __device__ float f2(float x) { return cosf(x); } struct euler_functor { unsigned h; euler_functor(unsigned _h) : h(_h) {}; __host__ __device__ void operator()(float &y) const { if (h == 1) y = f1(y); else if (h == 2) y = f2(y); } }; int main(void) { const unsigned N = 8; // allocate three device_vectors with 10 elements thrust::device_vector<float> X(N); // initilaize to random vaues thrust::sequence(X.begin(), X.end(), 0.0f, (float)(6.283/(float)N)); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(1)); // print the values for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl; std::cout << "******************" << std::endl; thrust::sequence(X.begin(), X.end(), 0.0f, (float)(6.283/(float)N)); // apply euler for each element of X thrust::for_each(X.begin(),X.end(),euler_functor(2)); // print the values for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl; }
这篇关于功能推力迭代CUDA的说法的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!