推力操作空主机数组 [英] thrust operations empty host array

查看:222
本文介绍了推力操作空主机数组的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想做一些推进操作,但我不知道到底是怎么回事。



现在,我收到一个数组充满零(h_a数组) / p>

我有:

  #include< cstdio> 
#include< cstdlib>
#include< cmath>
#include< iostream>

#include< cuda.h>
#include< cuda_runtime_api.h>

#include< thrust / device_ptr.h>
#include< thrust / fill.h>
#include< thrust / transform.h>
#include< thrust / functional.h>
#include< thrust / device_vector.h>
#include< thrust / host_vector.h>
#include< thrust / copy.h>
#include< thrust / generate.h>


template< typename T>
struct square
{
__host__ __device__
T operator()(const T& x)const
{
return x * x;
}

};


int
main(
int argc,
const char * argv [])
{
const size_t NbOfPoints = 256;

int BlocksPerGridX = 16;
int BlocksPerGridY = 16;

int ThreadsPerBlockX = 16;
int ThreadsPerBlockY = 16;

//在主机上生成随机数据
thrust :: host_vector< float> h_Kx(NbOfPoints);
thrust :: generate(h_Kx.begin(),h_Kx.end(),rand);

thrust :: host_vector< float> h_Ky(NbOfPoints);
thrust :: generate(h_Ky.begin(),h_Ky.end(),rand);

//传送到设备
thrust :: device_vector< float> dev_Kx = h_Kx;
thrust :: device_vector< float> dev_Ky = h_Ky;

//创建用于保存每个维中每个块的线程数的数组
int * X,* Y;
cudaMalloc((void **)& X,ThreadsPerBlockX * BlocksPerGridX * sizeof(* X));
cudaMalloc((void **)& Y,ThreadsPerBlockY * BlocksPerGridY * sizeof(* Y));

//用device_ptr包装原始指针
thrust :: device_ptr< int> dev_X(X);
thrust :: device_ptr< int> dev_Y(Y);

//在推力算法中使用device_ptr
thrust :: fill(dev_X,dev_X +(ThreadsPerBlockX * BlocksPerGridX),(int)0);
thrust :: fill(dev_Y,dev_Y +(ThreadsPerBlockY * BlocksPerGridY),(int)0);

//设置参数
square< float> square_op;

//创建各种向量
thrust :: device_vector< int> distX(NbOfPoints);
thrust :: device_vector< int> distY(NbOfPoints);
thrust :: device_vector< unsigned int> Tmp(NbOfPoints);
thrust :: host_vector< unsigned int> h_a(NbOfPoints);
thrust :: device_vector< unsigned int> distXSquared(NbOfPoints);
thrust :: device_vector< unsigned int> distYSquared(NbOfPoints);


// compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
thrust :: transform(dev_Kx.begin(),dev_Kx.begin(),dev_X,distX .begin(),thrust :: minus< float>());
thrust :: transform(dev_Ky.begin(),dev_Ky.begin(),dev_Y,distY.begin(),thrust :: minus< float>());

//平方距离
thrust :: transform(distX.begin(),distX.end(),distXSquared.begin(),square_op);
thrust :: transform(distY.begin(),distY.end(),distYSquared.begin(),square_op);

//计算Tmp = distX + distY
thrust :: transform(distXSquared.begin(),distXSquared.begin(),distYSquared.begin(),Tmp.begin :: plus< unsigned int>());
thrust :: copy(Tmp.begin(),Tmp.end(),h_a.begin());


for(int i = 0; i <5; i ++)
printf(\\\
temp =%u,h_a [i]);


return 0;
}

UPDATE:



除了Robert Crovella的修改外,您还必须修改为整数:

  square< int& square_op; 
thrust :: transform(dev_Kx.begin(),dev_Kx.end(),dev_X,distX.begin(),thrust :: minus< int>());
thrust :: transform(dev_Ky.begin(),dev_Ky.end(),dev_Y,distY.begin(),thrust :: minus< int>());


解决方案

transforms:

  thrust :: transform(dev_Kx.begin(),dev_Kx.begin(),dev_X,distX.begin ,thrust :: minus< float>()); 
thrust :: transform(dev_Ky.begin(),dev_Ky.begin(),dev_Y,distY.begin(),thrust :: minus< float>());

和:

 code> thrust :: transform(distXSquared.begin(),distXSquared.begin(),distYSquared.begin(),Tmp.begin(),thrust :: plus< unsigned int>()); 

由于上述每个变换的前两个参数是相同的,所做的工作是零。大概你想要在第二个位置而不是 .begin()

中相应的 .end

当我进行这些更改时,我打印出非零值。他们是相当大,但你似乎是平方大的值,所以我不知道你的意图是什么。


I want to do some thrust operations but I am not sure how exactly.

Right now , I am receiving am array full of zeros ( the h_a array)

I have :

#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime_api.h>

#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>


template <typename T>
struct square
{
    __host__ __device__
    T operator()( const T& x ) const
    {
        return x * x;
    }

};


int
main(
             int argc,
    const char * argv[] )
{
    const size_t NbOfPoints  = 256;

    int BlocksPerGridX    = 16;
    int BlocksPerGridY    = 16;

    int ThreadsPerBlockX  = 16;
    int ThreadsPerBlockY  = 16;

    // generate random data on the host
    thrust::host_vector<float> h_Kx ( NbOfPoints );
    thrust::generate( h_Kx.begin(), h_Kx.end(), rand );

    thrust::host_vector<float> h_Ky ( NbOfPoints );
    thrust::generate( h_Ky.begin(), h_Ky.end(), rand );

    // transfer to device
    thrust::device_vector<float> dev_Kx = h_Kx;
    thrust::device_vector<float> dev_Ky = h_Ky;

    // create arrays for holding the number of threads per block in each dimension
    int * X , * Y;
    cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
    cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<int> dev_X ( X );
    thrust::device_ptr<int> dev_Y ( Y );

    // use device_ptr in Thrust algorithms
    thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
    thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );

    // setup arguments
    square<float> square_op;

    // create various vectors
    thrust::device_vector<int> distX ( NbOfPoints );
    thrust::device_vector<int> distY ( NbOfPoints );
    thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
    thrust::host_vector<unsigned int> h_a ( NbOfPoints );
    thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
    thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );


    // compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
    thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
    thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

    //square distances
    thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
    thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );

    // compute Tmp =  distX + distY
    thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
    thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );


    for ( int i = 0; i < 5; i ++ )
        printf("\n temp = %u",h_a[ i ] );


return 0;
}

UPDATE:

Apart the edits from Robert Crovella , you must edit to integers:

square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );

解决方案

You've got several instances of doing zero-length transforms:

thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

and:

thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );

Since the first two parameters to each of the above transforms is the same, the work being done is zero. Presumably you want the corresponding .end() iterators in the second position rather than .begin()

When I make those changes, I got non-zero values printed out. They are quite large, but you appear to be squaring large values, so I'm not sure what your intent is.

这篇关于推力操作空主机数组的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆