使用Thrust通过键组合两个列表 [英] Combining two lists by key using Thrust

查看:247
本文介绍了使用Thrust通过键组合两个列表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

给定两个键值列表,我试图通过匹配键并在键匹配时对两个值应用函数来组合两侧。在我的情况下,我想乘以值。一个小例子,使其更清楚:

 左键:{1,2,4,5,6} 
左值:{3,4,1,2,1}

右键:{1,3,4,5,6,7};
右值:{2,1,1,4,1,2};

预期的输出键:{1,4,5,6}
预期输出值:{6,1,8,1}

我已经能够在CPU上使用下面的代码实现这一点:

  int main(){
int leftKeys [5] = {1,2,4,5,6}
int leftValues [5] = {3,4,1,2,1};
int rightKeys [6] = {1,3,4,5,6,7};
int rightValues [6] = {2,1,1,4,1,2};

int leftIndex = 0,rightIndex = 0;
std :: vector< std :: tuple< int,int>>结果;
while(leftIndex< 5&& rightIndex< 6){
if(leftKeys [leftIndex]< rightKeys [rightIndex]){
leftIndex ++;
}
if(leftKeys [leftIndex]> rightKeys [rightIndex]){
rightIndex ++;
}
result.push_back(std :: make_tuple(leftKeys [leftIndex],leftValues [leftIndex] * rightValues [rightIndex]));
leftIndex ++;
rightIndex ++;
}

//打印结果
for(int i = 0; i std :: cout< < Key:<< std :: get< 0>(result [i])< ; Value:< std :: get< 1>(result [i])< \\\
;
}
}

但是,我有输入键和值Thrust的 device_vector ,我也需要GPU上的结果。因此,如果我不需要将所有输入复制到主机并将所有输出复制到设备,这将更有效。



问题是,我找不到一个Thrust函数,可以用来使用一组键组合两个列表(并对两个值应用一个函数)。



更新:

/ p>

可以对输入做以下假设:





  • 在单个列表中不存在重复键(在列表之间,重复键当然存在,否则结果将为空)。



更新2:



卡在转型。我的代码到目前为止:

  struct multiply_transformation:public thrust :: binary_function< std :: tuple< int,int& std :: tuple< int,int>,std :: tuple< int,int> 
{
__host__ __device__
thrust :: tuple< int,int> operator()(thrust :: tuple< int,int> d_left,thrust :: tuple< int,int> d_right)
{
if(thrust :: get& :: get< 1>(d_right)){
return thrust :: make_tuple(thrust :: get< 0>(d_left),thrust :: get(d_left)* thrust :: get< (d_right));
}
return thrust :: make_tuple(-1,-1);
}
};


thrust :: device_vector< int> d_mergedKeys(h_leftCount + h_rightCount);
thrust :: device_vector< int> d_mergedValues(h_leftCount + h_rightCount);
thrust :: merge_by_key(d_leftCountKeys.begin(),d_leftCountKeys.begin()+ h_leftCount,
d_rightCountKeys.begin(),d_rightCountKeys.begin()+ h_rightCount,
d_leftCounts.begin ,d_rightCounts.begin(),d_mergedKeys.begin(),d_mergedValues.begin());

typedef thrust :: tuple< int,int> IntTuple;
thrust :: zip_iterator< IntTuple> d_zippedCounts(thrust :: make_tuple(d_mergedKeys.begin(),d_mergedValues.begin()));
thrust :: zip_iterator< IntTuple> d_zippedCountsOffset(d_zippedCounts + 1);

multiply_transformation transformOperator;
thrust :: device_vector< IntTuple> d_transformedResult(h_leftCount + h_rightCount);
thrust :: transform(d_zippedCounts,d_zippedCounts + h_leftCount + h_rightCount - 1,d_zippedCountsOffset,d_transformedResult.begin(),transformOperator);然而,我得到的错误,没有重载的函数 thrust :: transform

$ 匹配参数列表。在上面的代码中, h_leftCount h_rightCount 是左右输入的大小。 d_leftCountKeys d_rightCountKeys d_leftCounts d_rightCounts thrust :: device_vector< int>

解决方案

好吧,我不确定这是最好的方法(@ms通常提出比我更好的方法)但一种可能的方法是(方法1):


  1. set_intersection_by_key (左,右)

  2. set_intersection_by_key(右,左)

  3. 从步骤1和步骤2输出值,然后对其执行变换这些值一起生成(或者您希望对步骤1和步骤2的相应值执行其他任何数学运算)。

我不知道您的技能水平与推力有关,但



另一种可能的方法(方法2):


  1. merge_by_key 这两个列表一起

  2. 使用来自步骤1的两个版本的结果列表执行转换:第一个由[first,last-1)组成,第二个由[first + 1,last)组成。这将需要一个特殊的函数,它需要一个压缩版本的键和值,并比较显示的两个键。如果是匹配,则在两个表示值上输出期望的数学运算;如果不匹配,输出某种标记或已知的非法值。 (如果这种非法值不可能构建,我们可以根据需要压缩第三个标记数组。)

  3. 执行 remove_if 在步骤2的输出上,将结果压缩到所需的结果,删除所有非法的值条目,或者删除所有值条目由标记阵列指示。

我的感觉是第二种方法可能更快,但我没有仔细考虑。在任何情况下,最好基准测试案例,而不是工作(我的)直觉。



基于下面的注释,下面是使用您的示例数据集,从方法2的第二步开始的内容的描述:



步骤1的输出(merge_by_key操作)看起来像这样:

  :{1,1,2,3,4,4,5,5,6,6,7}; 
值:{3,2,4,1,1,1,2,4,1,1,2};

让我们构造两个版本,第一个是项目,第二个是下一个项目右侧:

  keys1:{1,1,2,3,4,4,5,5,6,6 }; 
values1:{3,2,4,1,1,1,2,4,1,1};

keys2:{1,2,3,4,4,5,5,6,6,7};
values2:{2,4,1,1,1,1,2,4,1,1,2};

实际的构造是微不足道的。 keys1只是[keys.begin(),keys.end() - 1),keys2只是[keys.begin()+ 1,keys.end())。同样对于values1和values2。



我们将zip1和values1压缩在一起,我们将zip2和values2一起压缩。然后我们将这两个压缩的实体传递给一个具有特殊函数的转换操作,它将执行以下操作:



如果keys1 == keys2,执行所需的数学运算对values1和values2数量,并将一个在标记数组中。如果不是,在标记数组中放置一个0。此操作的输出将是:

 键:{1,2,3,4,4,5,5,6 ,6,7}; 
values:{6,4,1,1,1,8,4,1,1,2};
标记:{1,0,0,1,0,1,0,1,0,0};

现在将上面的3个向量压缩在一起,然后传递给remove_if。 remove_if函数将指示删除其中标记== 0的任何项目,留下:

  keys:{1,4,5 ,6}; 
values:{6,1,8,1};
marker:{1,1,1,1};

下面是一个完整的示例演示两种方法:

  $ cat t1007.cu 
#include< iostream>
#include< thrust / device_vector.h>
#include< thrust / iterator / zip_iterator.h>
#include< thrust / set_operations.h>
#include< thrust / transform.h>
#include< thrust / functional.h>
#include< thrust / copy.h>
#include< thrust / merge.h>
#include< thrust / remove.h>
#include< assert.h>

struct mark_mpy_func
{
template< typename T1,typename T2>
__host__ __device__
int operator()(T1& z1,T2& z2){
int res = 0;
if(thrust :: get< 0>(z1)== thrust :: get< 0>(z2)){
res = thrust :: get(z1)* thrust :: get(1)(z2)。
thrust :: get< 2(z1)= 1;}
return res;
}
};

struct mtest_func
{
__host__ __device__
bool operator()(int t){
if(t == 0)return true;
return false;
}
};

int main(){

int Lkeys [] = {1,2,4,5,6};
int Lvals [] = {3,4,1,2,1};
int Rkeys [] = {1,3,4,5,6,7};
int Rvals [] = {2,1,1,4,1,2};

size_t Lsize = sizeof(Lkeys)/ sizeof(int);
size_t Rsize = sizeof(Rkeys)/ sizeof(int);

thrust :: device_vector< int> Lkeysv(Lkeys,Lkeys + Lsize);
thrust :: device_vector< int> Lvalsv(Lvals,Lvals + Lsize);
thrust :: device_vector< int> Rkeysv(Rkeys,Rkeys + Rsize);
thrust :: device_vector< int> Rvalsv(Rvals,Rvals + Rsize);

//方法1

thrust :: device_vector< int> Lkeysvo(Lsize);
thrust :: device_vector< int> Lvalsize(Lsize);
thrust :: device_vector< int> Rkeysvo(Rsize);
thrust :: device_vector< int> Rvalsvo(Rsize);

size_t Lsizeo = thrust :: set_intersection_by_key(Lkeysv.begin(),Lkeysv.end(),Rkeysv.begin(),Rkeysv.end(),Lvalsv.begin(),Lkeysvo.begin ),Lvalsvo.begin())。first - Lkeysvo.begin();
size_t Rsizeo = thrust :: set_intersection_by_key(Rkeysv.begin(),Rkeysv.end(),Lkeysv.begin(),Lkeysv.end(),Rvalsv.begin(),Rkeysvo.begin(),Rvalsvo。 begin())。first - Rkeysvo.begin();

assert(Lsizeo == Rsizeo);
thrust :: device_vector< int> res1(Lsizeo);
thrust :: transform(Lvalsvo.begin(),Lvalsvo.begin()+ Lsizeo,Rvalsvo.begin(),res1.begin(),thrust :: multiplies< int>());

std :: cout<< 方法1结果:< std :: endl< keys:;
thrust :: copy_n(Lkeysvo.begin(),Lsizeo,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl< vals:;
thrust :: copy_n(res1.begin(),Lsizeo,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl;

//方法2

thrust :: device_vector< int> Mkeysv(Lsize + Rsize);
thrust :: device_vector< int> Mvalsv(Lsize + Rsize);

thrust :: merge_by_key(Lkeysv.begin(),Lkeysv.end(),Rkeysv.begin(),Rkeysv.end(),Lvalsv.begin(),Rvalsv.begin .begin(),Mvalsv.begin());
thrust :: device_vector< int>标记(Lsize + Rsize-1);
thrust :: device_vector< int> res2(Lsize + Rsize-1);
thrust :: transform(thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),Mvalsv.begin(),marker.begin())),thrust :: make_zip_iterator(thrust :: make_tuple .end() - 1,Mvalsv.end() - 1,marker.end())),thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin()+ 1,Mvalsv.begin()+ 1)) ,res2.begin(),mark_mpy_func());
size_t rsize2 = thrust :: remove_if(thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),res2.begin())),thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.end )-1,res2.end())),marker.begin(),mtest_func()) - thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),res2.begin()));
std :: cout<< 方法2结果:< std :: endl< keys:;
thrust :: copy_n(Mkeysv.begin(),rsize2,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl< vals:;
thrust :: copy_n(res2.begin(),rsize2,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl;


return 0;
}

$ nvcc -o t1007 t1007.cu
$ ./t1007
方法1结果:
键:1,4,5,6 ,
vals:6,1,8,1,
方法2结果:
键:1,4,5,6,
vals:6,1,8,1 ,
$

如果可以使用标记值在输出数据中通知remove_if操作,则可以省去单独的标记阵列。这是方法2的修改版本,工作方式如下:

  $ cat t1007.cu 
#include< iostream> ;
#include< thrust / device_vector.h>
#include< thrust / iterator / zip_iterator.h>
#include< thrust / transform.h>
#include< thrust / copy.h>
#include< thrust / merge.h>
#include< thrust / remove.h>

#define MARK_VAL -1

struct mark_mpy_func
{
template< typename T1,typename T2>
__host__ __device__
int operator()(T1& z1,T2& z2){
int res = MARK_VAL;
if(thrust :: get< 0>(z1)== thrust :: get< 0>(z2)){
res = thrust :: get(z1)* thrust :: get< 1>(z2);}
return res;
}
};

struct mtest_func
{
template< typename T>
__host__ __device__
bool operator()(T t){
if(thrust :: get'(t)== MARK_VAL)return true;
return false;
}
};

int main(){

int Lkeys [] = {1,2,4,5,6};
int Lvals [] = {3,4,1,2,1};
int Rkeys [] = {1,3,4,5,6,7};
int Rvals [] = {2,1,1,4,1,2};

size_t Lsize = sizeof(Lkeys)/ sizeof(int);
size_t Rsize = sizeof(Rkeys)/ sizeof(int);

thrust :: device_vector< int> Lkeysv(Lkeys,Lkeys + Lsize);
thrust :: device_vector< int> Lvalsv(Lvals,Lvals + Lsize);
thrust :: device_vector< int> Rkeysv(Rkeys,Rkeys + Rsize);
thrust :: device_vector< int> Rvalsv(Rvals,Rvals + Rsize);

//方法2

thrust :: device_vector< int> Mkeysv(Lsize + Rsize);
thrust :: device_vector< int> Mvalsv(Lsize + Rsize);

thrust :: merge_by_key(Lkeysv.begin(),Lkeysv.end(),Rkeysv.begin(),Rkeysv.end(),Lvalsv.begin(),Rvalsv.begin(),Mkeysv .begin(),Mvalsv.begin());
thrust :: device_vector< int> res2(Lsize + Rsize-1);
thrust :: transform(thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),Mvalsv.begin())),thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.end ,Mvalsv.end() - 1)),thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin()+ 1,Mvalsv.begin()+ 1)),res2.begin(),mark_mpy_func
size_t rsize2 = thrust :: remove_if(thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),res2.begin())),thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.end )-1,res2.end())),mtest_func()) - thrust :: make_zip_iterator(thrust :: make_tuple(Mkeysv.begin(),res2.begin()));
std :: cout<< 方法2结果:< std :: endl< keys:;
thrust :: copy_n(Mkeysv.begin(),rsize2,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl< vals:;
thrust :: copy_n(res2.begin(),rsize2,std :: ostream_iterator< int>(std :: cout,,));
std :: cout<< std :: endl;


return 0;
}

$ nvcc -o t1007 t1007.cu
$ ./t1007
方法2结果:
键:1,4,5,6 ,
vals:6,1,8,1,
$


Given two key-value lists, I am trying to combine the two sides by matching the keys and applying a function to the two values when the keys match. In my case I want to multiply the values. A small example to make it more clear:

Left keys:   { 1, 2, 4, 5, 6 }
Left values: { 3, 4, 1, 2, 1 }

Right keys:   { 1, 3, 4, 5, 6, 7 };
Right values: { 2, 1, 1, 4, 1, 2 };

Expected output keys:   { 1, 4, 5, 6 }
Expected output values: { 6, 1, 8, 1 }

I have been able to implement this on the CPU using C++ using the next code:

int main() {
    int leftKeys[5] =   { 1, 2, 4, 5, 6 };
    int leftValues[5] = { 3, 4, 1, 2, 1 };
    int rightKeys[6] =   { 1, 3, 4, 5, 6, 7 };
    int rightValues[6] = { 2, 1, 1, 4, 1, 2 };

    int leftIndex = 0, rightIndex = 0;
    std::vector<std::tuple<int, int>> result;
    while (leftIndex < 5 && rightIndex < 6) {
        if (leftKeys[leftIndex] < rightKeys[rightIndex]) {
            leftIndex++;
        }
        if (leftKeys[leftIndex] > rightKeys[rightIndex]) {
            rightIndex++;
        }
        result.push_back(std::make_tuple(leftKeys[leftIndex], leftValues[leftIndex] * rightValues[rightIndex]));
        leftIndex++;
        rightIndex++;
    }

    // Print results
    for (int i = 0; i < result.size(); i++) {
        std::cout << "Key: " << std::get<0>(result[i]) << "; Value: " << std::get<1>(result[i]) << "\n";
    }
}

However, I have got the input keys and values in Thrust's device_vectors and I need the results on the GPU as well. Therefore it would be more efficient if I did not need to copy all inputs to the host and all outputs back to the device.

The problem is that I cannot find a Thrust function that can be used to combine two lists using a set of keys (and apply a function to both values). Does such a function exist or is there an easy way to implement it myself of should I just do this on the host?

Update:

The following assumtions can be made about the input:

  • The keys are always sorted.
  • No duplicate keys exist within a single list (between the lists, duplicate keys of course do exist, otherwise the result would be empty).

Update 2:

While implementing the second approach in @Robert's answer I get stuck at the transformation. My code so far is below:

struct multiply_transformation : public thrust::binary_function<std::tuple<int, int>, std::tuple<int, int>, std::tuple<int, int>>
{
    __host__ __device__
        thrust::tuple<int, int> operator()(thrust::tuple<int, int> d_left, thrust::tuple<int, int> d_right)
    {
        if (thrust::get<0>(d_left) == thrust::get<1>(d_right)) {
            return thrust::make_tuple(thrust::get<0>(d_left), thrust::get<1>(d_left) * thrust::get<1>(d_right));
        }
        return thrust::make_tuple(-1, -1);
    }
};


thrust::device_vector<int> d_mergedKeys(h_leftCount + h_rightCount);
thrust::device_vector<int> d_mergedValues(h_leftCount + h_rightCount);
thrust::merge_by_key(d_leftCountKeys.begin(), d_leftCountKeys.begin() + h_leftCount,
    d_rightCountKeys.begin(), d_rightCountKeys.begin() + h_rightCount,
    d_leftCounts.begin(), d_rightCounts.begin(), d_mergedKeys.begin(), d_mergedValues.begin());

typedef thrust::tuple<int, int> IntTuple;
thrust::zip_iterator<IntTuple> d_zippedCounts(thrust::make_tuple(d_mergedKeys.begin(), d_mergedValues.begin()));
thrust::zip_iterator<IntTuple> d_zippedCountsOffset(d_zippedCounts + 1);

multiply_transformation transformOperator;
thrust::device_vector<IntTuple> d_transformedResult(h_leftCount + h_rightCount);
thrust::transform(d_zippedCounts, d_zippedCounts + h_leftCount + h_rightCount - 1, d_zippedCountsOffset, d_transformedResult.begin(), transformOperator);

However, I get the error that no overloaded function thrust::transform matches the argument list. In the above code h_leftCount and h_rightCount are the sizes of the left and right inputs. d_leftCountKeys, d_rightCountKeys, d_leftCounts, and d_rightCounts are thrust::device_vector<int>.

解决方案

Well, I'm not sure this is the best method (@m.s. usually comes up with better approaches than I), but one possible approach would be (method 1):

  1. set_intersection_by_key(Left,Right)
  2. set_intersection_by_key(Right,Left)
  3. Take the values outputs from step 1 and step 2, and perform a transform on them to multiply the values results together (or whatever other mathematical operation you'd like to perform on the corresponding values results from step 1 and step 2).

I don't know what your skill level is with thrust but I can provide a trivial worked example of the above if desired.

Another possible approach (method 2):

  1. merge_by_key the two lists together
  2. Perform a transform using two versions of the resultant list from step 1: The first consisting of [first, last-1) and the second consisting of [first+1, last). This will require a special functor that takes a zipped version of the keys and values, and compares the two keys presented. If it is a match, output the desired mathematical operation on the two presented values; if it is not a match, output some kind of marker or known illegal value. (If such an illegal value is impossible to construct, we can zip a 3rd marker array in if needed.)
  3. Do a remove_if on the output of step 2, to compact the result down to the desired result, removing all value entries that are illegal, or else removing all value entries that are indicated by the marker array.

My sense is the second method might be faster, but I haven't carefully thought through it. In any event, it's better to benchmark test cases than to work off of (my) intuition.

Based on a comment below, here is a description of what is happening starting with the 2nd step of method 2, using your example dataset:

The output of step 1 (the merge_by_key operation) would look like something like this:

keys:   { 1, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
values: { 3, 2, 4, 1, 1, 1, 2, 4, 1, 1, 2 };

Let's construct two versions, the first being "the item" and the second being "the next item to the right":

keys1:   { 1, 1, 2, 3, 4, 4, 5, 5, 6, 6 };
values1: { 3, 2, 4, 1, 1, 1, 2, 4, 1, 1 };

keys2:   { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
values2: { 2, 4, 1, 1, 1, 2, 4, 1, 1, 2 };

The actual "construction" is trivial. keys1 is just [keys.begin(), keys.end()-1), and keys2 is just [keys.begin()+1, keys.end()). And likewise for values1 and values2.

We'll zip keys1 and values1 together and we'll zip keys2 and values2 together. Then we'll pass these two zipped entities to a transform operation that has a special functor that will do the following:

If keys1 == keys2, do the desired math operation on the values1 and values2 quantities, and place a one in the marker array. If not, place a 0 in a marker array. The output of this operation would be:

 keys:   { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
 values: { 6, 4, 1, 1, 1, 8, 4, 1, 1, 2 };
 marker: { 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 };

Now zip the 3 vectors above together, and pass that to remove_if. The remove_if functor would indicate removal of any items for which marker == 0, leaving:

 keys:   { 1, 4, 5, 6 };
 values: { 6, 1, 8, 1 };
 marker: { 1, 1, 1, 1 };

Here is a fully worked example demonstrating both methods:

$ cat t1007.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/set_operations.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/copy.h>
#include <thrust/merge.h>
#include <thrust/remove.h>
#include <assert.h>

struct mark_mpy_func
{
  template <typename T1, typename T2>
  __host__ __device__
  int operator()(T1 &z1, T2 &z2){
    int res = 0;
    if (thrust::get<0>(z1) == thrust::get<0>(z2)){
      res = thrust::get<1>(z1) * thrust::get<1>(z2);
      thrust::get<2>(z1) = 1;}
    return res;
    }
};

struct mtest_func
{
  __host__ __device__
  bool operator()(int t){
    if (t == 0) return true;
    return false;
    }
};

int main(){

  int Lkeys[] =   { 1, 2, 4, 5, 6 };
  int Lvals[] =   { 3, 4, 1, 2, 1 };
  int Rkeys[] =   { 1, 3, 4, 5, 6, 7 };
  int Rvals[] =   { 2, 1, 1, 4, 1, 2 };

  size_t Lsize = sizeof(Lkeys)/sizeof(int);
  size_t Rsize = sizeof(Rkeys)/sizeof(int);

  thrust::device_vector<int> Lkeysv(Lkeys, Lkeys+Lsize);
  thrust::device_vector<int> Lvalsv(Lvals, Lvals+Lsize);
  thrust::device_vector<int> Rkeysv(Rkeys, Rkeys+Rsize);
  thrust::device_vector<int> Rvalsv(Rvals, Rvals+Rsize);

  // method 1

  thrust::device_vector<int> Lkeysvo(Lsize);
  thrust::device_vector<int> Lvalsvo(Lsize);
  thrust::device_vector<int> Rkeysvo(Rsize);
  thrust::device_vector<int> Rvalsvo(Rsize);

  size_t Lsizeo = thrust::set_intersection_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Lkeysvo.begin(), Lvalsvo.begin()).first - Lkeysvo.begin();
  size_t Rsizeo = thrust::set_intersection_by_key(Rkeysv.begin(), Rkeysv.end(), Lkeysv.begin(), Lkeysv.end(), Rvalsv.begin(), Rkeysvo.begin(), Rvalsvo.begin()).first - Rkeysvo.begin();

  assert(Lsizeo == Rsizeo);
  thrust::device_vector<int> res1(Lsizeo);
  thrust::transform(Lvalsvo.begin(), Lvalsvo.begin()+Lsizeo, Rvalsvo.begin(), res1.begin(), thrust::multiplies<int>());

  std::cout << "Method 1 result:" << std::endl << "keys: ";
  thrust::copy_n(Lkeysvo.begin(), Lsizeo, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl << "vals: ";
  thrust::copy_n(res1.begin(), Lsizeo, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;

  // method 2

  thrust::device_vector<int> Mkeysv(Lsize + Rsize);
  thrust::device_vector<int> Mvalsv(Lsize + Rsize);

  thrust::merge_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Rvalsv.begin(), Mkeysv.begin(), Mvalsv.begin());
  thrust::device_vector<int> marker(Lsize + Rsize - 1);
  thrust::device_vector<int> res2(Lsize + Rsize - 1);
  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), Mvalsv.begin(), marker.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, Mvalsv.end()-1, marker.end())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin()+1, Mvalsv.begin()+1)), res2.begin(), mark_mpy_func());
  size_t rsize2 = thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple( Mkeysv.begin(), res2.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, res2.end())), marker.begin(), mtest_func()) - thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), res2.begin()));
  std::cout << "Method 2 result:" << std::endl << "keys: ";
  thrust::copy_n(Mkeysv.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl << "vals: ";
  thrust::copy_n(res2.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;


  return 0;
}

$ nvcc -o t1007 t1007.cu
$ ./t1007
Method 1 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
Method 2 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
$

If it is acceptable to use a marker value (say, -1) in the output data to inform the remove_if operation, then the separate marker array can be dispensed with. Here's a modified version of method 2 that works this way:

$ cat t1007.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform.h>
#include <thrust/copy.h>
#include <thrust/merge.h>
#include <thrust/remove.h>

#define MARK_VAL -1

struct mark_mpy_func
{
  template <typename T1, typename T2>
  __host__ __device__
  int operator()(T1 &z1, T2 &z2){
    int res = MARK_VAL;
    if (thrust::get<0>(z1) == thrust::get<0>(z2)){
      res = thrust::get<1>(z1) * thrust::get<1>(z2);}
    return res;
    }
};

struct mtest_func
{
  template <typename T>
  __host__ __device__
  bool operator()(T t){
    if (thrust::get<1>(t) == MARK_VAL) return true;
    return false;
    }
};

int main(){

  int Lkeys[] =   { 1, 2, 4, 5, 6 };
  int Lvals[] =   { 3, 4, 1, 2, 1 };
  int Rkeys[] =   { 1, 3, 4, 5, 6, 7 };
  int Rvals[] =   { 2, 1, 1, 4, 1, 2 };

  size_t Lsize = sizeof(Lkeys)/sizeof(int);
  size_t Rsize = sizeof(Rkeys)/sizeof(int);

  thrust::device_vector<int> Lkeysv(Lkeys, Lkeys+Lsize);
  thrust::device_vector<int> Lvalsv(Lvals, Lvals+Lsize);
  thrust::device_vector<int> Rkeysv(Rkeys, Rkeys+Rsize);
  thrust::device_vector<int> Rvalsv(Rvals, Rvals+Rsize);

  // method 2

  thrust::device_vector<int> Mkeysv(Lsize + Rsize);
  thrust::device_vector<int> Mvalsv(Lsize + Rsize);

  thrust::merge_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Rvalsv.begin(), Mkeysv.begin(), Mvalsv.begin());
  thrust::device_vector<int> res2(Lsize + Rsize - 1);
  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), Mvalsv.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, Mvalsv.end()-1)), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin()+1, Mvalsv.begin()+1)), res2.begin(), mark_mpy_func());
  size_t rsize2 = thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple( Mkeysv.begin(), res2.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, res2.end())), mtest_func()) - thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), res2.begin()));
  std::cout << "Method 2 result:" << std::endl << "keys: ";
  thrust::copy_n(Mkeysv.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl << "vals: ";
  thrust::copy_n(res2.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;


  return 0;
}

$ nvcc -o t1007 t1007.cu
$ ./t1007
Method 2 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
$

这篇关于使用Thrust通过键组合两个列表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆