使用CUDA Thrust permutation iterator复制数组的特定元素 [英] Copy specific elements of an array with CUDA Thrust permutation iterator
问题描述
我有一个数组 glm :: vec3
和 count * 3
元素。我有另一个数组,其中包含要复制的元素的 int
索引。示例:
I have an array of glm::vec3
with count * 3
elements. I have another array which contains int
indices of the elements to copy. An example:
thrust::device_vector<glm::vec3> vals(9);
// vals contains 9 vec3, which represent 3 "items"
// vals[0], vals[1], vals[2] are the first "item",
// vals[3], vals[4], vals[5] are the second "item"...
int idcs[] = {0, 2};
// index 0 and 2 should be copied, i.e.
// vals[0..2] and vals[6..8]
我试图使用排列迭代器,但我不能让它工作。我的方法是:
I tried to use permutation iterators, but I cannot get it to work. My approach is:
thrust::copy(
thrust::make_permutation_iterator(vals, idcs),
thrust::make_permutation_iterator(vals, idcs + 2),
target.begin()
);
但当然这只会复制 vals [0]
和 vals [2]
而不是 vals [0] vals [1] code> vals [6] vals [7] vals [8]
。
But of course this will only copy vals[0]
and vals[2]
instead of vals[0] vals[1] vals[2]
and vals[6] vals[7] vals[8]
.
缓冲区到另一个与Thrust?
Is it possible to copy the desired values from one buffer to another with Thrust?
推荐答案
我们可以结合的想法跨度范围与您的 permutation iterator 方法来实现你想要的,我想。
We can combine the idea of strided ranges with your permutation iterator approach to achieve what you want, I think.
基本思想是使用您的排列迭代器方法来选择要复制的项目的组,我们将使用一组3个stride range迭代器组合成一个zip迭代器。我们需要一个zip迭代器的输入,和一个zip迭代器的输出。这是一个完全工作的例子,使用 uint3
作为 glm :: vec3
的代理:
The basic idea is to use your permutation iterator method to select the "groups" of items to copy, and we will select the 3 items in each group using a set of 3 strided range iterators combined into a zip iterator. We need a zip iterator for the input, and a zip iterator for the output. Here is a fully worked example, using uint3
as a proxy for glm::vec3
:
$ cat t484.cu
#include <vector_types.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/functional.h>
#define DSIZE 18
template <typename Iterator>
class strided_range
{
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;
stride_functor(difference_type stride)
: stride(stride) {}
__host__ __device__
difference_type operator()(const difference_type& i) const
{
return stride * i;
}
};
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}
iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
}
iterator end(void) const
{
return begin() + ((last - first) + (stride - 1)) / stride;
}
protected:
Iterator first;
Iterator last;
difference_type stride;
};
typedef thrust::device_vector<uint3>::iterator Iter;
int main(){
// set up test data
int idcs[] = {0, 2, 5};
unsigned num_idcs = sizeof(idcs)/sizeof(int);
thrust::host_vector<uint3> h_vals(DSIZE);
for (int i = 0; i < DSIZE; i ++) {
h_vals[i].x = i;
h_vals[i].y = 100+i;
h_vals[i].z = 1000+i;}
thrust::device_vector<uint3> d_target(num_idcs*3);
thrust::host_vector<int> h_idcs(idcs, idcs + num_idcs);
thrust::device_vector<int> d_idcs = h_idcs;
thrust::device_vector<uint3> d_vals = h_vals;
// set up strided ranges for input, output
strided_range<Iter> item_1(d_vals.begin() , d_vals.end(), 3);
strided_range<Iter> item_2(d_vals.begin()+1, d_vals.end(), 3);
strided_range<Iter> item_3(d_vals.begin()+2, d_vals.end(), 3);
// set up strided ranges for output
strided_range<Iter> out_1(d_target.begin() , d_target.end(), 3);
strided_range<Iter> out_2(d_target.begin()+1, d_target.end(), 3);
strided_range<Iter> out_3(d_target.begin()+2, d_target.end(), 3);
// copy from input to output
thrust::copy(thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.begin()), thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.end()), thrust::make_zip_iterator(thrust::make_tuple(out_1.begin(), out_2.begin(), out_3.begin())));
// print out results
thrust::host_vector<uint3> h_target = d_target;
for (int i = 0; i < h_target.size(); i++)
std::cout << "index: " << i << " x: " << h_target[i].x << " y: " << h_target[i].y << " z: " << h_target[i].z << std::endl;
return 0;
}
$ nvcc -arch=sm_20 -o t484 t484.cu
$ ./t484
index: 0 x: 0 y: 100 z: 1000
index: 1 x: 1 y: 101 z: 1001
index: 2 x: 2 y: 102 z: 1002
index: 3 x: 6 y: 106 z: 1006
index: 4 x: 7 y: 107 z: 1007
index: 5 x: 8 y: 108 z: 1008
index: 6 x: 15 y: 115 z: 1015
index: 7 x: 16 y: 116 z: 1016
index: 8 x: 17 y: 117 z: 1017
$
这篇关于使用CUDA Thrust permutation iterator复制数组的特定元素的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!