如何将float 数组绑定到cuda中的1D纹理？ [英] How to bind a float* array to a 1D texture in cuda?*

查看：82 发布时间：2020/10/13 0:51:54 cuda

本文介绍了如何将float *数组绑定到cuda中的1D纹理？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我试图通过将纹理内存绑定到线性设备数组（而不是cudaArray）来了解如何使用纹理内存。我的代码很简单（如下）。我有一个由8个数字组成的float *数组，我试图将其绑定到一维纹理，然后在内核函数中尝试读取纹理并将值放入输出数组。但是当我运行此测试时，我的输出数组中的所有值均为零：

I am trying to understand how to use the texture memory by binding it to a linear device array (not a cudaArray). My code is simple (below). I have a float* array of 8 numbers which I am trying to bind to a 1D texture and then in my kernel function I try to read out of the texture and put the values into an output array. But when I run this test, all values in my output array are zero:

输入= 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000

输出= 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

Input = 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000
Output = 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

我在这里错过了什么？

texture<float, 1, cudaReadModeElementType> texInput;

__global__ void copyKernel(float*output, int n) {
for (int i = 0; i < n; i++) {
    output[i] = tex1D(texInput, (float)i);
}
}

int main(int argc, char*argv[]) {

const int WIDTH = 8;

float* hInput = (float*)malloc(sizeof(float) * WIDTH);
float*hOutput = (float*)malloc(sizeof(float) * WIDTH);

for (int i = 0; i < WIDTH; i++) {
    hInput[i] = (float)i;
}

float* dInput = NULL, *dOutput = NULL;

size_t offset = 0;

texInput.addressMode[0] = cudaAddressModeBorder;
texInput.addressMode[1] = cudaAddressModeBorder;
texInput.filterMode = cudaFilterModePoint;
texInput.normalized = false;

checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));

cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);

cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);


copyKernel<<<1,1>>>(dOutput, WIDTH);

cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
printf("\nInput = ");

for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hInput[i]);
    }
printf("\nOutput = ");
for (int i = 0; i < WIDTH; i++) {
    printf("%f\t",hOutput[i]);
}

return 0;
}

推荐答案

根据文档， tex1D（）在基础分配是CUDA数组时使用。对于线性内存绑定纹理，正确的纹理函数是 tex1Dfetch（）。

According to the documentation, tex1D() is used when the underlying allocation is a CUDA Array. For linear-memory bound textures, the correct texturing function is tex1Dfetch().

该修改（仅用于）您的代码对我有用：

That modification (only) to your code makes it work for me:

$ cat t1139.cu
#include <stdio.h>
#include <helper_cuda.h>

texture<float, 1, cudaReadModeElementType> texInput;

__global__ void copyKernel(float*output, int n) {
for (int i = 0; i < n; i++) {
    output[i] = tex1Dfetch(texInput, i);
}
}

int main(int argc, char*argv[]) {

const int WIDTH = 8;

float* hInput = (float*)malloc(sizeof(float) * WIDTH);
float*hOutput = (float*)malloc(sizeof(float) * WIDTH);

for (int i = 0; i < WIDTH; i++) {
    hInput[i] = (float)i;
}

float* dInput = NULL, *dOutput = NULL;

size_t offset = 0;

texInput.addressMode[0] = cudaAddressModeBorder;
texInput.addressMode[1] = cudaAddressModeBorder;
texInput.filterMode = cudaFilterModePoint;
texInput.normalized = false;

checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));

cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);

cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);


copyKernel<<<1,1>>>(dOutput, WIDTH);

cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
printf("\nInput = ");

for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hInput[i]);
    }
printf("\nOutput = ");
for (int i = 0; i < WIDTH; i++) {
    printf("%f\t",hOutput[i]);
}

return 0;
}
$ nvcc -I/usr/local/cuda/samples/common/inc t1139.cu -o t1139
$ cuda-memcheck ./t1139
========= CUDA-MEMCHECK

Input = 0.000000        1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000
Output = 0.000000       1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000        ========= ERROR SUMMARY: 0 errors
$

这篇关于如何将float *数组绑定到cuda中的1D纹理？的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

如何将float 数组绑定到cuda中的1D纹理？ [英] How to bind a float* array to a 1D texture in cuda?*

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

如何将float *数组绑定到cuda中的1D纹理？ [英] How to bind a float* array to a 1D texture in cuda?

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

如何将float 数组绑定到cuda中的1D纹理？ [英] How to bind a float* array to a 1D texture in cuda?*

登录关闭