功能推力迭代CUDA的说法 [英] function as argument of thrust iterator CUDA

查看：292 发布时间：2016/8/22 15:46:47 c++ c cuda thrust

本文介绍了功能推力迭代CUDA的说法的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我想实现使用CUDA :: Thurst迭代器，解决了一堆在GPU方程的GPU上运行微分方程求解程序，要到细节，这里是一小块code的：

 的#include＆LT;推力/ device_vector.h＆GT;
    ＃包括LT＆;推力/ transform.h＆GT;
    ＃包括LT＆;推力/ sequence.h＆GT;
    ＃包括LT＆;推力/ copy.h＆GT;
    ＃包括LT＆;推力/ fill.h＆GT;
    ＃包括LT＆;推力/ replace.h＆GT;
    ＃包括LT＆;推力/ functional.h＆GT;    ＃包括LT＆;推力/ for_each.h＆GT;
    ＃包括LT＆;推力/ device_vector.h＆GT;
    ＃包括LT＆;推力/迭代器/ zip_iterator.h＆GT;
   ＃包括LT＆;＆iostream的GT;
   ＃包括LT＆;＆math.h中GT;
   __host__ __device__浮动F（浮法X，浮法Y）
   {
     返回COS（Y）* SIN（X）;
   }   结构euler_functor
   {
   常量浮动H;   euler_functor（浮点_H）：H（_H）{};   __host__ __device__
   浮动运算符（）（浮点（* F）（双，双），常量浮动＆放大器; X，常量浮动＆放大器; Y）{常量
   Y + = H *（* F）（X，Y）;
   X + = H;
   }
   };
   INT主要（无效）
   {
   //分配3 device_vectors有10个元素
   推力:: device_vector＆LT;＆诠释GT; X（10）;
   // initilaize随机vaues
   推力::生成（X.begin（），X.end（），RAND）;
   //申请欧拉对于x的每个元素
   推力:: for_each的（X.begin（），X.end（），euler_functor（F，0.0，X））;
   //打印值
   的for（int i = 0;我小于10;我++）的std ::法院LT＆;＆LT; X [1]  - ;＆下;的std :: ENDL;   }

但是，当我编译

NVCC euler.cu -o euler.x -lm
出现以下错误：
  lala.cu（29）：错误：显式类型缺失（INT假设）lala.cu（29）：错误：预期;lala.cu（33）：错误：EX pression必须修改的左值lala.cu（34）：错误：EX pression必须修改的左值lala.cu（35）：警告：在非void函数的末尾缺少return语句euler_functor ::运算符（）lala.cu（46）：错误：没有合适的构造存在，从浮动（浮动，浮动）到euler_functor转换lala.cu（46）：错误：预期）
 
现在看来似乎是在路上无法使用的函数指针我想？
有关实施欧拉程序，使用迭代器将是非常美联社preciated运行它更好的方式sugestions。
是前一种方法partability和性能之间的良好折衷？
在年底有望对我来说，理想的解决方案是能够定义指针数组功能，如：
 的typedef INT（* foo_ptr_t）（INT）;
foo_ptr_t foo_ptr_array [2];INT F1（INT）;
INT F2（INT）;
foo_ptr_array [0] = F1;
foo_ptr_array [1] = f2的;
foo_ptr_array [0]（1）;
 
要通过foo_ptr_array作为参数传递给欧拉仿函数。这可能吗？
感谢您的回答。
更多钞票的改进：
更多钞票是在定义的元组一组耦合微分方程的fucntors我尝试以下的方法吗？我可以从数值的方法来soution得到一些错误信息？
这将是

解决方案

最后，你所要求采取 __设备__ 函数参数在主机code，然后将它作为（功能）的指针，在什么是最终一个内核参数，以产生推力（引擎盖下）。
有非法以取的地址的 __设备__ 函数参数在主机code，所以传递一个 __设备__ 函数指针作为参数这种方法是行不通的。
这可能是通过创建额外的 __ __设备变量（指针）存储设备上的函数指针来解决这个问题。然后使用 cudaGetSymbolAddress 来建立指针到指针的表功能。这将需要运行precursor内核建立函数指针设备。这似乎相当混乱。
有可能是更简单的参数化函数子来选择基于所述参数的装置的功能。艾克这样的：
 的#include＆LT;推力/ device_vector.h＆GT;
   ＃包括LT＆;推力/ transform.h＆GT;
   ＃包括LT＆;推力/ sequence.h＆GT;
   ＃包括LT＆;推力/ copy.h＆GT;
   ＃包括LT＆;推力/ fill.h＆GT;
   ＃包括LT＆;推力/ replace.h＆GT;
   ＃包括LT＆;推力/ functional.h＆GT;
   ＃包括LT＆;推力/ for_each.h＆GT;
   ＃包括LT＆;推力/迭代器/ zip_iterator.h＆GT;   ＃包括LT＆;＆iostream的GT;
   ＃包括LT＆;＆math.h中GT;
   __host__ __device__浮动F1（浮X）
   {
     返回SINF（X）;
   }   __host__ __device__浮动F2（浮X）
   {
     返回cosf（x）的;
   }
   结构euler_functor
   {
     无符号H;     euler_functor（无符号_H）：H（_H）{};     __host__ __device__
     void运算符（）（浮动＆安培; Y）{常量
       如果（H == 1）Y = F1（Y）;
       否则，如果（H == 2）Y = F2（Y）;
     }
   };
   INT主要（无效）
   {
     const的无符号的N = 8;
     //分配3 device_vectors有10个元素
     推力:: device_vector＆LT;浮动＆GT; X（N）;
     // initilaize随机vaues
     推力::序列（X.begin（），X.end（），0.0（浮点）（6.283 /（浮点）N））;
     //申请欧拉对于x的每个元素
     推力::的for_each（X.begin（），X.end（），euler_functor（1））;
     //打印值
     的for（int i = 0; I＆LT; N;我++）的std ::法院LT＆;＆LT; X [1]  - ;＆下;的std :: ENDL;     性病::法院LT＆;＆LT; ******************＆LT;＆LT;的std :: ENDL;     推力::序列（X.begin（），X.end（），0.0（浮点）（6.283 /（浮点）N））;
     //申请欧拉对于x的每个元素
     推力::的for_each（X.begin（），X.end（），euler_functor（2））;
     //打印值
     的for（int i = 0; I＆LT; N;我++）的std ::法院LT＆;＆LT; X [1]  - ;＆下;的std :: ENDL;   }
 
I am trying to implement ODEs solver routines running on GPUs using CUDA::Thurst iterators to solve a bunch of equations in the GPU, going to the details, here is a small piece of code:
    #include <thrust/device_vector.h>
    #include <thrust/transform.h> 
    #include <thrust/sequence.h>
    #include <thrust/copy.h> 
    #include <thrust/fill.h>
    #include <thrust/replace.h>
    #include <thrust/functional.h>

    #include <thrust/for_each.h>
    #include <thrust/device_vector.h>
    #include <thrust/iterator/zip_iterator.h>


   #include <iostream>
   #include <math.h>


   __host__ __device__ float f(float x, float y)
   {
     return cos(y)*sin(x);
   }



   struct euler_functor
   {
   const float h;

   euler_functor(float _h) : h(_h) {};

   __host__ __device__
   float operator()( float(*f)(double,double),const float& x, const float& y) const {
   y +=  h * (*f)( x, y );
   x += h;
   }
   };


   int main(void)
   {
   // allocate three device_vectors with 10 elements
   thrust::device_vector<int> X(10);
   // initilaize to random vaues
   thrust::generate(X.begin(), X.end(), rand);
   // apply euler for each element of X
   thrust::for_each(X.begin(),X.end(),euler_functor(f,0.0,X));
   // print the values
   for(int i = 0; i < 10; i++) std::cout<< X[i]<< std::endl;

   }
But when I compile

nvcc euler.cu -o euler.x -lm the following errors occurs:
    lala.cu(29): error: explicit type is missing ("int" assumed)

lala.cu(29): error: expected a ";"

lala.cu(33): error: expression must be a modifiable lvalue

lala.cu(34): error: expression must be a modifiable lvalue

lala.cu(35): warning: missing return statement at end of non-void function "euler_functor::operator()"

lala.cu(46): error: no suitable constructor exists to convert from "float (float, float)" to "euler_functor"

lala.cu(46): error: expected a ")"
it seems like it is not possible use pointers to functions in the way I am trying?

sugestions for better ways to implement the Euler procedure and run it using iterators will be very appreciated.

is the former approach a good compromise between partability and performance?

At the end hopefully the ideal solution for me is be able to define an array of pointer to functions like:
typedef int (*foo_ptr_t)( int );
foo_ptr_t foo_ptr_array[2];

int f1( int );
int f2( int );
foo_ptr_array[0] = f1;
foo_ptr_array[1] = f2;
foo_ptr_array[0]( 1 );
To pass foo_ptr_array as argument to the euler functor. Is it possible?

Thanks for Answer.

Posible improvement:

Is posible define the a set coupled differential equations as fucntors over tuples as I try in following approach? Can I get some error information from the numerical approach to the soution?

It would be
解决方案
Ultimately, you are asking to take a __device__ function argument in host code, and then pass it as a (function) pointer, in what is ultimately (under the hood) a kernel argument, generated by thrust.

It is illegal to take the address of a __device__ function argument in host code, so passing a __device__ function pointer as an argument this way won't work.

It might be possible to work around this by creating additional __device__ variables (pointers) to store function pointers on the device. Then use cudaGetSymbolAddress to build a table of pointers-to-pointers to functions. This would necessitate running a precursor kernel to set up the function pointers on the device. It seems rather messy.

It might be simpler to parameterize the functor to select a device function based on the parameter. Lke this:
   #include <thrust/device_vector.h>
   #include <thrust/transform.h>
   #include <thrust/sequence.h>
   #include <thrust/copy.h>
   #include <thrust/fill.h>
   #include <thrust/replace.h>
   #include <thrust/functional.h>
   #include <thrust/for_each.h>
   #include <thrust/iterator/zip_iterator.h>

   #include <iostream>
   #include <math.h>


   __host__ __device__ float f1(float x)
   {
     return sinf(x);
   }

   __host__ __device__ float f2(float x)
   {
     return cosf(x);
   }




   struct euler_functor
   {
     unsigned h;

     euler_functor(unsigned _h) : h(_h) {};

     __host__ __device__
     void operator()(float &y) const  {
       if (h == 1) y = f1(y);
       else if (h == 2) y = f2(y);
     }
   };


   int main(void)
   {
     const unsigned N = 8;
     // allocate three device_vectors with 10 elements
     thrust::device_vector<float> X(N);
     // initilaize to random vaues
     thrust::sequence(X.begin(), X.end(),  0.0f, (float)(6.283/(float)N));
     // apply euler for each element of X
     thrust::for_each(X.begin(),X.end(),euler_functor(1));
     // print the values
     for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl;

     std::cout << "******************" << std::endl;

     thrust::sequence(X.begin(), X.end(),  0.0f, (float)(6.283/(float)N));
     // apply euler for each element of X
     thrust::for_each(X.begin(),X.end(),euler_functor(2));
     // print the values
     for(int i = 0; i < N; i++) std::cout<< X[i]<< std::endl;

   }
这篇关于功能推力迭代CUDA的说法的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

功能推力迭代CUDA的说法 [英] function as argument of thrust iterator CUDA

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

功能推力迭代CUDA的说法 [英] function as argument of thrust iterator CUDA

问题描述

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭