OpenMP中的并行累积（前缀）和：在线程之间传递值 [英] Parallel cumulative (prefix) sums in OpenMP: communicating values between threads

查看：269 发布时间：2017/5/2 20:47:07 dependencies sum openmp

本文介绍了OpenMP中的并行累积（前缀）和：在线程之间传递值的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

假设我有一个函数 f（i），这取决于索引 i （除了不能预先计算）。
我要填充一个数组 a ，以便从i = 0到n的 a [n] = sum（f（i）） -1 。

编辑：在Hristo Iliev发表评论后，我意识到我正在做的是一个href =http://en.wikipedia.org/wiki/Prefix_sum =nofollow>累积/前缀和。

这可以是用代码写成

  float sum = 0; （int i = 0; i< N; i ++）{
 sum + = f（i）; 
 
 a [i] = sum; 
}

现在我想使用OpenMP并行执行此操作。我可以使用OpenMP来做到这一点，就是并行地写出 f（i）的值，然后在序列中处理依赖关系。如果 f（i）是一个缓慢的函数，那么这可能会很好，因为非并行循环很简单。

<$ （int i = 0; i a [i] = f（i）;对于
，p $ p>

 #pragma omp parallel （int i = 1; i< N; i ++）{
 a [i] + = a [i-1]; 
} 
 
}

但是可以在没有OpenMP的非并行循环的情况下执行此操作。然而，我提出的解决方案是复杂的，也许是黑客。那么我的问题是，如果OpenMP有这样一个更简单的方法呢？

下面的代码基本上运行了我为每个线程列出的第一个代码。结果是给定线程中 a 的值正确到一个常数。我将每个线程的总和保存到 suma 与 nthreads + 1 元素的数组。这允许我在线程之间进行通信，并确定每个线程的固定偏移量。然后我使用偏移值更正 a [i] 的值。

  float * suma; 
 #pragma omp parallel 
 {
 const int ithread = omp_get_thread_num（）; 
 const int nthreads = omp_get_num_threads（）; 
 const int start = ithread * N / nthreads; 
 const int finish =（ithread + 1）* N / nthreads; 
 #pragma omp single 
 {
 suma = new float [nthreads + 1]; 
 suma [0] = 0; 
} 
 float sum = 0; （int i = start; i< finish; i ++）{
 sum + = f（i）; 
 
 a [i] = sum; 
} 
 suma [ithread + 1] = sum; 
 #pragma omp barrier 
 float offset = 0; （int i = 0; i<（iithread + 1）; i ++）{
 offset + = suma [i]; 
 （int i = start; i< finish; i ++）{
 a [i] + = offset; 
} 
 
} 
} 
 delete [] suma;

一个简单的测试就是设置 f（i）= i 。然后解决方案是 a [i] = i *（i + 1）/ 2 （无限远，它的 - 1/12 ）。

解决方案

你可以将您的策略扩展到任意数量的子区域，并使用任务递归减少它们：

 ＃include< vector> 
＃include< iostream> 
 
使用namespace std; 
 
 const int n = 10000; 
 const int baseLength = 100; 
 
 int f（int ii）{
 return ii; 
} 
 
 int recursiveSumBody（int * begin，int * end）{
 
 size_t length = end  -  begin; 
 size_t mid = length / 2; 
 int sum = 0; 
 
 
 if（length< baseLength）{
 for（size_t ii = 1; ii< length; ii ++）{
 begin [ii] + = [II-1]; 
} 
} else {
 #pragma omp任务共享（sum）
 {
 sum = recursiveSumBody（begin，begin + mid）; 
} 
 #pragma omp task 
 {
 recursiveSumBody（begin + mid，end）; 
} 
 #pragma omp taskwait 
 
 #pragma omp parallel for 
 for（size_t ii = mid; ii< length; ii ++）{
 begin [ii] + = sum; 
} 
 
} 
 return begin [length-1]; 
} 
 
 void recursiveSum（int * begin，int * end）{
 
 #pragma omp single 
 {
 recursiveSumBody结束）; 
} 
} 
 
 
 int main（）{
 
 vector< int>第（n，0）; 
 
 #pragma omp parallel 
 {
 #pragma omp for 
 for（int ii = 0; ii< n; ii ++）{
a [ii ] = f（ii）; 
} 
 
 recursiveSum（& a [0]，& a [n]）; 
 
} 
 cout<<< n *（n-1）/ 2 < ENDL; 
 cout<<< a [n-1]< ENDL; 
 
 return 0; 
}

Assume I have a function f(i) which depends on an index i (among other values which cannot be precomputed). I want to fill an array a so that a[n] = sum(f(i)) from i=0 to n-1.



Edit: After a comment by Hristo Iliev I realized what I am doing is a cumulative/prefix sum.

This can be written in code as
float sum = 0;
for(int i=0; i<N; i++) {
    sum += f(i);
    a[i] = sum;
}
Now I want to use OpenMP to do this in parallel.  One way I could do this with OpenMP is to write out the values for f(i) in parallel and then take care of the dependency in serial.  If f(i) is a slow function then this could work well since the non-paralleled loop is simple.
#pragma omp parallel for
for(int i=0; i<N; i++) {
    a[i] = f(i);
}
for(int i=1; i<N; i++) {
    a[i] += a[i-1];
}
But it's possible to do this without the non-parallel loop with OpenMP.  The solution, however, that I have come up with is complicated and perhaps hackish.  So my question is if there is a simpler less convoluted way to do this with OpenMP?  

The code below basically runs the first code I listed for each thread.  The result is that values of a in a given thread are correct up to a constant.  I save the sum for each thread to an array suma with nthreads+1 elements.  This allows me to communicate between threads and determine the constant offset for each thread.  Then I correct the values of a[i] with the offset.
float *suma;
#pragma omp parallel
{
    const int ithread = omp_get_thread_num();
    const int nthreads = omp_get_num_threads();
    const int start = ithread*N/nthreads;
    const int finish = (ithread+1)*N/nthreads;
    #pragma omp single
    {
        suma = new float[nthreads+1];
        suma[0] = 0;
    }
    float sum = 0;
    for (int i=start; i<finish; i++) {
        sum += f(i);
        a[i] = sum;
    }
    suma[ithread+1] = sum;
    #pragma omp barrier
    float offset = 0;
    for(int i=0; i<(ithread+1); i++) {
        offset += suma[i];
    }
    for(int i=start; i<finish; i++) {
        a[i] += offset;
    }
}
delete[] suma;
A simple test is just to set f(i) = i.  Then the solution is a[i] = i*(i+1)/2 (and at infinity it's -1/12).
 解决方案 
You can extend your strategy to an arbitrary number of sub-regions, and reduce them recursively, using tasks:
#include<vector>
#include<iostream>

using namespace std;

const int n          = 10000;
const int baseLength = 100;

int f(int ii) {
  return ii;
}

int recursiveSumBody(int * begin, int * end){

  size_t length  = end - begin;
  size_t mid     = length/2;
  int    sum     = 0;


  if ( length < baseLength ) {
    for(size_t ii = 1; ii < length; ii++ ){
        begin[ii] += begin[ii-1];
    }
  } else {
#pragma omp task shared(sum)
    {
      sum = recursiveSumBody(begin    ,begin+mid);
    }
#pragma omp task
    {
      recursiveSumBody(begin+mid,end      );
    }
#pragma omp taskwait

#pragma omp parallel for
    for(size_t ii = mid; ii < length; ii++) {
      begin[ii] += sum;
    }

  }
  return begin[length-1];
}

void recursiveSum(int * begin, int * end){

#pragma omp single
  {
    recursiveSumBody(begin,end);
  }    
}


int main() {

  vector<int> a(n,0);

#pragma omp parallel
  {
    #pragma omp for
    for(int ii=0; ii < n; ii++) {          
      a[ii] = f(ii);
    }  

    recursiveSum(&a[0],&a[n]);

  }
  cout << n*(n-1)/2 << endl;
  cout << a[n-1] << endl;

  return 0;
}


                        
这篇关于OpenMP中的并行累积（前缀）和：在线程之间传递值的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！


                    
                        查看全文

OpenMP中的并行累积（前缀）和：在线程之间传递值 [英] Parallel cumulative (prefix) sums in OpenMP: communicating values between threads

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

OpenMP中的并行累积（前缀）和：在线程之间传递值 [英] Parallel cumulative (prefix) sums in OpenMP: communicating values between threads

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭