在不同范围内的Cuda内核中生成随机数 [英] Generating random number within Cuda kernel in a varying range

查看:824
本文介绍了在不同范围内的Cuda内核中生成随机数的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想在cuda内核中产生随机数随机数。我希望从均匀分布和整数形式生成随机数,从1到8开始。每个线程的随机数将不同。可以生成随机数的范围也将从一个线程到另一个线程变化。一个线程中的范围的最大值可以低至2,或者在另一个线程中,其可以高达8,但不高于8。所以,我提供了一个例子,下面我想要生成的数字:

 在线程#1  - 最大范围是2,所以随机数应该在1和2之间
在线程#2 - >最大范围是6,所以随机数应在1和6之间
在线程#3 - >最大范围是5,所以随机数应在1和5之间

等。 ..



任何帮助将非常感谢。谢谢。

解决方案

编辑:我编辑了我的答案, nvidia.com/cuda/curand/device-api-overview.html#distributionsrel =noreferrer> uniform
distribution
0.0和1.0之间

  • 然后将此乘以所需范围(最大值 - 最小
    值+ 0.999999)。

  • 然后添加偏移量(+最小值)。

  • 然后截断为整数。

  • 在您的设备代码中有类似的东西:

      int idx = threadIdx.x + blockDim.x * blockIdx.x; 
    //假设已经为每个线程设置了curand和生成的状态...
    //假定范围因线程索引而异
    float myrandf = curand_uniform(&(my_curandstate [idx]) );
    myrandf * =(max_rand_int [idx] - min_rand_int [idx] + 0.999999);
    myrandf + = min_rand_int [idx];
    int myrand =(int)truncf(myrandf);

    您应该:

      #include< math.h> 

    truncf



    这是一个完整的例子:

      $ cat t527.cu 
    #include < stdio.h>
    #include< curand.h>
    #include< curand_kernel.h>
    #include< math.h>
    #include< assert.h>
    #define MIN 2
    #define MAX 7
    #define ITER 10000000

    __global__ void setup_kernel(curandState * state){

    int idx = threadIdx.x + blockDim.x * blockIdx.x;
    curand_init(1234,idx,0,& state [idx]);
    }

    __global__ void generate_kernel(curandState * my_curandstate,const unsigned int n,const unsigned * max_rand_int,const unsigned * min_rand_int,unsigned int * result){

    int idx = threadIdx.x + blockDim.x * blockIdx.x;

    int count = 0;
    while(count< n){
    float myrandf = curand_uniform(my_curandstate + idx);
    myrandf * =(max_rand_int [idx] - min_rand_int [idx] +0.999999);
    myrandf + = min_rand_int [idx];
    int myrand =(int)truncf(myrandf);

    assert(myrand< = max_rand_int [idx]);
    assert(myrand> = min_rand_int [idx]);
    result [myrand-min_rand_int [idx]] ++;
    count ++;}
    }

    int main(){

    curandState * d_state;
    cudaMalloc(& d_state,sizeof(curandState));
    unsigned * d_result,* h_result;
    unsigned * d_max_rand_int,* h_max_rand_int,* d_min_rand_int,* h_min_rand_int;
    cudaMalloc(& d_result,(MAX-MIN + 1)* sizeof(unsigned));
    h_result =(unsigned *)malloc((MAX-MIN + 1)* sizeof(unsigned));
    cudaMalloc(& d_max_rand_int,sizeof(unsigned));
    h_max_rand_int =(unsigned *)malloc(sizeof(unsigned));
    cudaMalloc(& d_min_rand_int,sizeof(unsigned));
    h_min_rand_int =(unsigned *)malloc(sizeof(unsigned));
    cudaMemset(d_result,0,(MAX-MIN + 1)* sizeof(unsigned));
    setup_kernel<<< 1,1>>>(d_state);

    * h_max_rand_int = MAX;
    * h_min_rand_int = MIN;
    cudaMemcpy(d_max_rand_int,h_max_rand_int,sizeof(unsigned),cudaMemcpyHostToDevice);
    cudaMemcpy(d_min_rand_int,h_min_rand_int,sizeof(unsigned),cudaMemcpyHostToDevice);
    generate_kernel<< 1,1>>>(d_state,ITER,d_max_rand_int,d_min_rand_int,d_result);
    cudaMemcpy(h_result,d_result,(MAX-MIN + 1)* sizeof(unsigned),cudaMemcpyDeviceToHost);
    printf(Bin:Count:\\\
    );
    for(int i = MIN; i <= MAX; i ++)
    printf(%d%d\\\
    ,i,h_result [i-MIN]);

    return 0;
    }


    $ nvcc -arch = sm_20 -o t527 t527.cu -lcurand
    $ cuda-memcheck ./t527
    ==== ===== CUDA-MEMCHECK
    Bin:Count:
    2 1665496
    3 1668130
    4 1667644
    5 1667435
    6 1665026
    7 1666269
    =========错误摘要:0个错误
    $


    I am trying to generate random number random numbers within the cuda kernel. I wish to generate the random numbers from uniform distribution and in the integer form, starting from 1 up to 8. The random numbers would be different for each of the threads. The range up to which random number can be generated would also vary from one thread to another. The maximum of the range in one thread might be as low as 2 or in the other thread it can be high as 8, but not higher than that. So, I am providing an example below of how I want the numbers to get generated :

    In thread#1 --> maximum of the range is 2 and so the random number should be between 1 and 2
    In thread#2 --> maximum of the range is 6  and so the random number should be between 1 and 6
    In thread#3 --> maximum of the range is 5 and so the random number should be between 1 and 5
    

    and so on...

    Any help would be very much appreciated. Thank you.

    解决方案

    EDIT: I've edited my answer to fix some of the deficiencies pointed out in the other answers (@tudorturcu) and comments.

    1. Use CURAND to generate a uniform distribution between 0.0 and 1.0
    2. Then multiply this by the desired range (largest value - smallest value + 0.999999).
    3. Then add the offset (+ smallest value).
    4. Then truncate to an integer.

    Something like this in your device code:

    int idx = threadIdx.x+blockDim.x*blockIdx.x;
    // assume have already set up curand and generated state for each thread...
    // assume ranges vary by thread index
    float myrandf = curand_uniform(&(my_curandstate[idx]));
    myrandf *= (max_rand_int[idx] - min_rand_int[idx] + 0.999999);
    myrandf += min_rand_int[idx];
    int myrand = (int)truncf(myrandf);
    

    You should:

    #include <math.h>
    

    for truncf

    Here's a fully worked example:

    $ cat t527.cu
    #include <stdio.h>
    #include <curand.h>
    #include <curand_kernel.h>
    #include <math.h>
    #include <assert.h>
    #define MIN 2
    #define MAX 7
    #define ITER 10000000
    
    __global__ void setup_kernel(curandState *state){
    
      int idx = threadIdx.x+blockDim.x*blockIdx.x;
      curand_init(1234, idx, 0, &state[idx]);
    }
    
    __global__ void generate_kernel(curandState *my_curandstate, const unsigned int n, const unsigned *max_rand_int, const unsigned *min_rand_int,  unsigned int *result){
    
      int idx = threadIdx.x + blockDim.x*blockIdx.x;
    
      int count = 0;
      while (count < n){
        float myrandf = curand_uniform(my_curandstate+idx);
        myrandf *= (max_rand_int[idx] - min_rand_int[idx]+0.999999);
        myrandf += min_rand_int[idx];
        int myrand = (int)truncf(myrandf);
    
        assert(myrand <= max_rand_int[idx]);
        assert(myrand >= min_rand_int[idx]);
        result[myrand-min_rand_int[idx]]++;
        count++;}
    }
    
    int main(){
    
      curandState *d_state;
      cudaMalloc(&d_state, sizeof(curandState));
      unsigned *d_result, *h_result;
      unsigned *d_max_rand_int, *h_max_rand_int, *d_min_rand_int, *h_min_rand_int;
      cudaMalloc(&d_result, (MAX-MIN+1) * sizeof(unsigned));
      h_result = (unsigned *)malloc((MAX-MIN+1)*sizeof(unsigned));
      cudaMalloc(&d_max_rand_int, sizeof(unsigned));
      h_max_rand_int = (unsigned *)malloc(sizeof(unsigned));
      cudaMalloc(&d_min_rand_int, sizeof(unsigned));
      h_min_rand_int = (unsigned *)malloc(sizeof(unsigned));
      cudaMemset(d_result, 0, (MAX-MIN+1)*sizeof(unsigned));
      setup_kernel<<<1,1>>>(d_state);
    
      *h_max_rand_int = MAX;
      *h_min_rand_int = MIN;
      cudaMemcpy(d_max_rand_int, h_max_rand_int, sizeof(unsigned), cudaMemcpyHostToDevice);
      cudaMemcpy(d_min_rand_int, h_min_rand_int, sizeof(unsigned), cudaMemcpyHostToDevice);
      generate_kernel<<<1,1>>>(d_state, ITER, d_max_rand_int, d_min_rand_int, d_result);
      cudaMemcpy(h_result, d_result, (MAX-MIN+1) * sizeof(unsigned), cudaMemcpyDeviceToHost);
      printf("Bin:    Count: \n");
      for (int i = MIN; i <= MAX; i++)
        printf("%d    %d\n", i, h_result[i-MIN]);
    
      return 0;
    }
    
    
    $ nvcc -arch=sm_20 -o t527 t527.cu -lcurand
    $ cuda-memcheck ./t527
    ========= CUDA-MEMCHECK
    Bin:    Count:
    2    1665496
    3    1668130
    4    1667644
    5    1667435
    6    1665026
    7    1666269
    ========= ERROR SUMMARY: 0 errors
    $
    

    这篇关于在不同范围内的Cuda内核中生成随机数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

    查看全文
    登录 关闭
    扫码关注1秒登录
    发送“验证码”获取 | 15天全站免登陆