CUDA使用double2数组减少推力 [英] CUDA Thrust reduction with double2 arrays

查看:223
本文介绍了CUDA使用double2数组减少推力的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有以下(可编译和可执行)代码使用CUDA Thrust执行 float2 数组的减少。

I have the following (compilable and executable) code using CUDA Thrust to perform reductions of float2 arrays. It works correctly

using namespace std;

// includes, system 
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <conio.h>

#include <typeinfo>  
#include <iostream>

// includes CUDA
#include <cuda.h>
#include <cuda_runtime.h>

// includes Thrust
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>

// float2 + struct
struct add_float2 {
    __device__ float2 operator()(const float2& a, const float2& b) const {
        float2 r;
        r.x = a.x + b.x;
        r.y = a.y + b.y;
        return r;
    }
 };

// double2 + struct
struct add_double2 {
    __device__ double2 operator()(const double2& a, const double2& b) const {
        double2 r;
        r.x = a.x + b.x;
        r.y = a.y + b.y;
        return r;
    }
 };

void main( int argc, char** argv) 
{
    int N = 20;

    // --- Host
    float2* ha; ha = (float2*) malloc(N*sizeof(float2));
    for (unsigned i=0; i<N; ++i) {
        ha[i].x = 1;
        ha[i].y = 2;
    }

    // --- Device
    float2* da; cudaMalloc((void**)&da,N*sizeof(float2));
    cudaMemcpy(da,ha,N*sizeof(float2),cudaMemcpyHostToDevice);

    thrust::device_ptr<float2> dev_ptr_1(da);
    thrust::device_ptr<float2> dev_ptr_2(da+N);

    float2 init; init.x = init.y = 0.0f;

    float2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_float2());

    cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

    getch();

 }

但是,当我更改 float2在程序中的 double2 ,即

However, when I change float2 to double2 in the main program, namely

void main( int argc, char** argv) 
{
    int N = 20;

    // --- Host
    double2* ha; ha = (double2*) malloc(N*sizeof(double2));
    for (unsigned i=0; i<N; ++i) {
        ha[i].x = 1;
        ha[i].y = 2;
    }

    // --- Device
    double2* da; cudaMalloc((void**)&da,N*sizeof(double2));
    cudaMemcpy(da,ha,N*sizeof(double2),cudaMemcpyHostToDevice);

    thrust::device_ptr<double2> dev_ptr_1(da);
    thrust::device_ptr<double2> dev_ptr_2(da+N);

    double2 init; init.x = init.y = 0.0;

    double2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_double2());

    cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

    getch();

}

我收到一个异常 reduce 行。如何使用CUDA Thrust减少与 double2 数组?我做错了什么?提前感谢。

I receive an exception at the reduce line. How can I use CUDA Thrust reduction with double2 arrays? Am i doing anything wrong? Thanks in advance.

解决方案遵循TALONMIES的回答

使用命名空间std ;

using namespace std;

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <conio.h>

#include <typeinfo>
#include <iostream>

// includes CUDA
#include <cuda.h>
#include <cuda_runtime.h>

// includes Thrust
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>

struct my_double2 {
    double x, y;
};

// double2 + struct
struct add_my_double2 {
    __device__ my_double2 operator()(const my_double2& a, const my_double2& b) const {
        my_double2 r;
        r.x = a.x + b.x;
        r.y = a.y + b.y;
        return r;
    }
};

void main( int argc, char** argv) 
{
    int N = 20;

    // --- Host
    my_double2* ha; ha = (my_double2*) malloc(N*sizeof(my_double2));
    for (unsigned i=0; i<N; ++i) {
        ha[i].x = 1;
        ha[i].y = 2;
    }

    // --- Device
    my_double2* da; cudaMalloc((void**)&da,N*sizeof(my_double2));
    cudaMemcpy(da,ha,N*sizeof(my_double2),cudaMemcpyHostToDevice);

    thrust::device_ptr<my_double2> dev_ptr_1(da);
    thrust::device_ptr<my_double2> dev_ptr_2(da+N);

    my_double2 init; init.x = init.y = 0.0;

    cout << "here3\n";
    my_double2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_my_double2());

    cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

    getch();

}


推荐答案

与MSVC和nvcc的已知不兼容性。例如,请参见此处。解决方案是定义您自己的版本 double2 并使用它。

This is a known incompatibility with MSVC and nvcc. See here for example. The solution is to define your own version of double2 and use that instead.

仅供参考,我可以在具有CUDA 5.5的Linux 64位框上正确编译和运行代码。

Just for reference, I can compile and run your code correctly on a Linux 64 bit box with CUDA 5.5.

这篇关于CUDA使用double2数组减少推力的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆