使用OpenMP并行化 [英] Parallelizing with OpenMP

查看：138 发布时间：2016/8/24 12:52:09 c parallel-processing openmp

本文介绍了使用OpenMP并行化的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我有这种并行code的问题，我想我必须使用关键的条款，但我不知道如何...

 的#include＆LT;＆stdio.h中GT;
＃包括LT＆; SYS / time.h中＆GT;＃定义N4 5000
＃定义N5 5000
＃定义PIXMAX 10
＃定义NUM_THREADS 4INT I，J，K;
INT组织相容[PIXMAX]，图像[N4] [N5]无效calculate_histo为（int *数组，INT矩阵[N4] [N5]）{对于（I = 0; I＆下; PIXMAX;我+ +）阵列由[i] = 0;    OMP的#pragma并行
    {
        INT array_private [PIXMAX]
        对于（I = 0; I＆下; PIXMAX;我++）array_private [I] = 0;        OMP的#pragma为
        对于（i = 0; I＆LT; N4，我++）
            为（J = 0; J＆下; N; J ++）{
                array_private [矩阵[I] [J] ++;
                                }
        OMP的#pragma关键
        {
            对于（i = 0; I＆LT; PIXMAX;我++）{
                数组[I] + = array_private [I]
            }
        }
    }
}
main（）的
{
OMP_SET_NUM_THREADS（NUM_THREADS）;对于（i = 0; I＆LT; N4，我++）
   为（J = 0; J＆LT; N5; J ++）
   {
     如果（I％3）图像[I] [J] =（I + J）％PIXMAX;
     否则图像[I] [J] =（I + I * j）条％PIXMAX;
   }
calculate_histo（组织相容，图像）;对于（K = 0; K＆LT; PIXMAX; k ++）的printf（％9D，组织相容[K]）;
}

我得到不同的结果每次我运行它的时候，
在5处决输出：

  1：3424378 1765911 2356499 1767451 2354765 2123619 2355686 1767270 2355937 1762464
2：3359050 1728213 2310171 1727858 2309947 2094584 2309402 1727705 2310021 1726228
3.- 3479377 1782549 2373773 1783920 2372319 2153420 2374614 1785481 2375290 1781468
4.- 3459613 1781119 2362956 1783067 2362662 2154083 2360726 1781994 2362982 1779394
5.- 3434711 1751408 2349619 1750327 2348681 2104916 2348510 1750427 2350599 1747760

问题解决了，为帮助所有工作很好，谢谢！
最后code我用的是这样的：

查看评论获取更多信息，比如不使用全局变量或使用矩阵[我* 5000 + J]，而不是矩阵[i] [j]的

 ＃包括LT＆;＆stdio.h中GT;
＃包括LT＆; SYS / time.h中＆GT;
＃包括LT＆;＆omp.h GT;＃定义N4 5000
＃定义N5 5000
＃定义PIXMAX 10
＃定义NUM_THREADS 4INT组织相容[PIXMAX]，图像[N4] [N5]
INT I，J，K;
无效calculate_histo为（int *数组，INT矩阵[N4] [N5]）{对于（I = 0; I＆下; PIXMAX;我+ +）阵列由[i] = 0;OMP的#pragma平行私人（I，J）
  {
    INT array_private [PIXMAX]    对于（i = 0; I＆LT; PIXMAX;我++）
      array_private [I] = 0;OMP的#pragma为
    对于（i = 0; I＆LT; N4，我++）
      为（J = 0; J＆下; N; J ++）{
    array_private [矩阵[I] [J] ++;
      }OMP的#pragma关键
    {
      对于（i = 0; I＆LT; PIXMAX;我++）{
    数组[I] + = array_private [I]
      }
    }
  }
}诠释主（）{  OMP_SET_NUM_THREADS（NUM_THREADS）;
  对于（i = 0; I＆LT; N4，我++）
    为（J = 0; J＆下; N; J ++）{
      如果（ⅰ％3）
        图片[I] [J] =（I + J）％PIXMAX;
      其他
        图片[I] [J] =（I + I * j）条％PIXMAX;
    }  对于（K = 0; K＆LT; PIXMAX; k ++）
    的printf（％9D，组织相容[K]）;
  的printf（\\ n）;  calculate_histo（组织相容，图像）;  对于（K = 0; K＆LT; PIXMAX; k ++）
    的printf（％9D，组织相容[K]）;
  的printf（\\ n）;
  返回0;
}

解决方案

您可以使用原子来做到这一点，但它不会是有效的。更好的方法是使用私有数组为每个线程，并行填补他们，然后填入一个关键部分共享阵列。请参阅下面的code。它也可以做到这一点没有一个关键部分，但它是一个比较复杂一点的Fill直方图与OpenMP并行（阵列降低），而无需使用一个关键部分

下面是我推荐的功能（我用的矩阵[我* 5000 + J]，而不是矩阵[i] [j]的，因为Fortran和C做对方的索引对面，我永远记得哪个是哪个）。

 无效foo_omp_v2为（int *阵列，为int *矩阵）{
    的for（int i = 0;我小于10;我++）阵列[我] = 0;    OMP的#pragma并行
    {
        INT array_private [10];
        对（INT I = 0; I＆小于10;我++）array_private [I] = 0;        OMP的#pragma为
        的for（int i = 0; I＆LT; 5000;我++）{
            对于（INT J = 0; J＆LT; 5000; J ++）{
                array_private [矩阵[我* 5000 + J] ++;
            }
        }
        OMP的#pragma关键
        {
            对（INT I = 0; I＆小于10;我++）{
                数组[I] + = array_private [I]
            }
        }
    }
}

下面是完整的code我用出更糟糕的是原子

 的#include＆LT;＆stdio.h中GT;
＃包括LT＆;＆stdlib.h中GT;
＃包括LT＆;＆omp.h GT;无效美孚（INT *阵列，为int *矩阵）{
    的for（int i = 0;我小于10;我++）阵列[我] = 0;    的for（int i = 0; I＆LT; 5000;我++）{
        对于（INT J = 0; J＆LT; 5000; J ++）{
          数组[矩阵[我* 5000 + J] ++;
        }
    }    对（INT I = 0; I＆小于10;我++）{
        的printf（％d个，数组[我]）;
    } printf的（\\ n）;
}无效foo_omp_v1为（int *阵列，为int *矩阵）{
    的for（int i = 0;我小于10;我++）阵列[我] = 0;    OMP的#pragma为平行
    的for（int i = 0; I＆LT; 5000;我++）{
        对于（INT J = 0; J＆LT; 5000; J ++）{
            OMP的#pragma原子
            数组[矩阵[我* 5000 + J] ++;
        }
    }    对（INT I = 0; I＆小于10;我++）{
        的printf（％d个，数组[我]）;
    } printf的（\\ n）;
}无效foo_omp_v2为（int *阵列，为int *矩阵）{
    的for（int i = 0;我小于10;我++）阵列[我] = 0;    OMP的#pragma并行
    {
        INT array_private [10];
        对（INT I = 0; I＆小于10;我++）array_private [I] = 0;        OMP的#pragma为
        的for（int i = 0; I＆LT; 5000;我++）{
            对于（INT J = 0; J＆LT; 5000; J ++）{
                array_private [矩阵[我* 5000 + J] ++;
            }
        }
        OMP的#pragma关键
        {
            对（INT I = 0; I＆小于10;我++）{
                数组[I] + = array_private [I]
            }
        }
    }    对（INT I = 0; I＆小于10;我++）{
        的printf（％d个，数组[我]）;
    } printf的（\\ n）;
}诠释主（）{
    int数组[10];
    为int *矩阵=新的INT [5000 * 5000];
    对（INT I = 0; I＆≤（5000 * 5000）;我++）{
        矩阵[I] =兰特（）％10;
    }    双DTIME;    DTIME = omp_get_wtime（）;
    美孚（数组，矩阵）;
    DTIME = omp_get_wtime（） -  DTIME;
    的printf（时间％F \\ N，DTIME）;    DTIME = omp_get_wtime（）;
    foo_omp_v1（数组，矩阵）;
    DTIME = omp_get_wtime（） -  DTIME;
    的printf（时间％F \\ N，DTIME）;    DTIME = omp_get_wtime（）;
    foo_omp_v2（数组，矩阵）;
    DTIME = omp_get_wtime（） -  DTIME;
    的printf（时间％F \\ N，DTIME）;}

下面是你的code的，在海湾合作委员会和Visual Studio对我的作品的版本

 的#include＆LT;＆stdio.h中GT;
＃包括LT＆;＆omp.h GT;
//＃包括LT＆; SYS / time.h中＆GT;＃定义N4 5000
＃定义N5 5000
＃定义PIXMAX 10
＃定义NUM_THREADS 4INT组织相容[PIXMAX]，图像[N4] [N5]
无效calculate_histo为（int *数组，INT矩阵[N4] [N5]）{    INT I;
    对于（I = 0; I＆下; PIXMAX;我+ +）阵列由[i] = 0;    OMP的#pragma并行
    {
        INT I，J;
        INT array_private [PIXMAX]
        对于（I = 0; I＆下; PIXMAX;我++）array_private [I] = 0;        OMP的#pragma为
        对于（i = 0; I＆LT; N4，我++）
            为（J = 0; J＆下; N; J ++）{
                array_private [矩阵[I] [J] ++;
                                }
        OMP的#pragma关键
        {
            对于（i = 0; I＆LT; PIXMAX;我++）{
                数组[I] + = array_private [I]
            }
        }
    }
}
诠释主（）{
    OMP_SET_NUM_THREADS（NUM_THREADS）;    INT I，J;
    对于（i = 0; I＆LT; N4，我++）
       为（J = 0; J＆LT; N5; J ++）
       {
         如果（I％3）图像[I] [J] =（I + J）％PIXMAX;
         否则图像[I] [J] =（I + I * j）条％PIXMAX;
       }
    calculate_histo（组织相容，图像）;    对于（i = 0; I＆LT; PIXMAX;我++）
        的printf（％9D，组织相容[I]）;
        的printf（\\ n）;
}

I have problems parallelizing this code, I think I have to use the critical clause but I don't know how...

#include <stdio.h>
#include <sys/time.h>

#define N4 5000
#define N5 5000
#define PIXMAX 10
#define NUM_THREADS 4

int i, j, k;
int histo[PIXMAX], image[N4][N5];

void calculate_histo(int *array, int matrix[N4][N5]) {

for(i=0; i<PIXMAX; i++) array[i] = 0;

    #pragma omp parallel
    {
        int array_private[PIXMAX];
        for(i=0; i<PIXMAX; i++) array_private[i] = 0;

        #pragma omp for
        for(i=0; i<N4; i++)
            for(j=0; j<N5; j++) {
                array_private[matrix[i][j]]++;
                                }
        #pragma omp critical
        {
            for(i=0; i<PIXMAX; i++) {
                array[i] += array_private[i];
            }
        }
    }
}
main ()
{
omp_set_num_threads(NUM_THREADS);

for(i=0; i<N4; i++)
   for(j=0; j<N5; j++)
   {
     if(i%3) image[i][j] = (i+j) % PIXMAX;
     else    image[i][j] = (i+i*j) % PIXMAX;
   }
calculate_histo(histo,image);

for (k=0; k<PIXMAX; k++) printf("%9d", histo[k]);
}

I get different results each time I run it, the outputs in 5 executions:

1.- 3424378  1765911  2356499  1767451  2354765  2123619  2355686  1767270  2355937  1762464
2.- 3359050  1728213  2310171  1727858  2309947  2094584  2309402  1727705  2310021  1726228
3.- 3479377  1782549  2373773  1783920  2372319  2153420  2374614  1785481  2375290  1781468
4.- 3459613  1781119  2362956  1783067  2362662  2154083  2360726  1781994  2362982  1779394
5.- 3434711  1751408  2349619  1750327  2348681  2104916  2348510  1750427  2350599  1747760

Problems solved, all working fine, thanks for the help! the final code I use is this:

See the comments for more information, like not using global variables or using matrix[i* 5000 + j] instead of matrix[i][j]

#include<stdio.h>
#include<sys/time.h>
#include<omp.h>

#define N4 5000
#define N5 5000
#define PIXMAX 10
#define NUM_THREADS 4

int histo[PIXMAX], image[N4][N5];
int i,j,k;
void calculate_histo(int *array, int matrix[N4][N5]) {

for(i=0; i<PIXMAX; i++) array[i] = 0;

#pragma omp parallel private(i,j)
  {
    int array_private[PIXMAX];

    for(i=0; i<PIXMAX; i++)
      array_private[i] = 0;

#pragma omp for
    for(i=0; i<N4; i++)
      for( j=0; j<N5; j++) {
    array_private[matrix[i][j]]++;
      }

#pragma omp critical
    {
      for( i=0; i<PIXMAX; i++) {
    array[i] += array_private[i];
      }
    }
  }
}

int main () {

  omp_set_num_threads(NUM_THREADS);
  for( i=0; i<N4; i++)
    for( j=0; j<N5; j++) {
      if(i%3) 
        image[i][j] = (i+j) % PIXMAX;
      else
        image[i][j] = (i+i*j) % PIXMAX;
    }

  for ( k=0; k<PIXMAX; k++) 
    printf("%9d", histo[k]);
  printf("\n");

  calculate_histo(histo,image);

  for ( k=0; k<PIXMAX; k++) 
    printf("%9d", histo[k]);
  printf("\n");
  return 0;
}

解决方案

You could use atomic to do this but it won't be efficient. A better way is to use a private array for each thread, fill them in parallel, and then fill the shared array in a critical section. See the code below. It's also possible to do this without a critical section but it's a bit more complicated Fill histograms (array reduction) in parallel with OpenMP without using a critical section

Here is the function I recommend (I use matrix[i*5000 + j] instead of matrix[i][j] because Fortran and C do the indexing opposite of each other and I can never remember which is which).

void foo_omp_v2(int *array, int *matrix) {
    for(int i=0; i<10; i++) array[i] = 0;

    #pragma omp parallel
    {
        int array_private[10];
        for(int i=0; i<10; i++) array_private[i] = 0;

        #pragma omp for
        for(int i=0; i<5000; i++) {
            for(int j=0; j<5000; j++) {
                array_private[matrix[i*5000 + j]]++;
            }
        }
        #pragma omp critical 
        {
            for(int i=0; i<10; i++) {
                array[i] += array_private[i];
            }
        }
    }
}

Here is the full code I used showing atomic being worse

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

void foo(int *array, int *matrix) {
    for(int i=0; i<10; i++) array[i] = 0;

    for(int i=0; i<5000; i++) {
        for(int j=0; j<5000; j++) {
          array[matrix[i*5000 + j]]++;
        }
    }

    for(int i=0; i<10; i++) {
        printf("%d ", array[i]);
    } printf("\n");
}

void foo_omp_v1(int *array, int *matrix) {
    for(int i=0; i<10; i++) array[i] = 0;

    #pragma omp parallel for
    for(int i=0; i<5000; i++) {
        for(int j=0; j<5000; j++) {
            #pragma omp atomic
            array[matrix[i*5000 + j]]++;
        }
    }

    for(int i=0; i<10; i++) {
        printf("%d ", array[i]);
    } printf("\n");
}

void foo_omp_v2(int *array, int *matrix) {
    for(int i=0; i<10; i++) array[i] = 0;

    #pragma omp parallel
    {
        int array_private[10];
        for(int i=0; i<10; i++) array_private[i] = 0;

        #pragma omp for
        for(int i=0; i<5000; i++) {
            for(int j=0; j<5000; j++) {
                array_private[matrix[i*5000 + j]]++;
            }
        }
        #pragma omp critical 
        {
            for(int i=0; i<10; i++) {
                array[i] += array_private[i];
            }
        }
    }

    for(int i=0; i<10; i++) {
        printf("%d ", array[i]);
    } printf("\n");
}

int main() {
    int array[10];
    int *matrix = new int[5000*5000];
    for(int i=0; i<(5000*5000); i++) {
        matrix[i]=rand()%10;
    }

    double dtime;

    dtime = omp_get_wtime();
    foo(array, matrix);
    dtime = omp_get_wtime() - dtime;
    printf("time %f\n", dtime);

    dtime = omp_get_wtime();
    foo_omp_v1(array, matrix);
    dtime = omp_get_wtime() - dtime;
    printf("time %f\n", dtime);

    dtime = omp_get_wtime();
    foo_omp_v2(array, matrix);
    dtime = omp_get_wtime() - dtime;
    printf("time %f\n", dtime);

}

Here is the version of your code that works for me in GCC and Visual Studio

#include <stdio.h>
#include <omp.h>
//#include <sys/time.h>

#define N4 5000
#define N5 5000
#define PIXMAX 10
#define NUM_THREADS 4

int histo[PIXMAX], image[N4][N5];
void calculate_histo(int *array, int matrix[N4][N5]) {

    int i;
    for(i=0; i<PIXMAX; i++) array[i] = 0;

    #pragma omp parallel
    {
        int i,j;
        int array_private[PIXMAX];
        for(i=0; i<PIXMAX; i++) array_private[i] = 0;

        #pragma omp for
        for(i=0; i<N4; i++)
            for(j=0; j<N5; j++) {
                array_private[matrix[i][j]]++;
                                }
        #pragma omp critical
        {
            for(i=0; i<PIXMAX; i++) {
                array[i] += array_private[i];
            }
        }
    }
}
int main () {
    omp_set_num_threads(NUM_THREADS);

    int i,j;
    for(i=0; i<N4; i++)
       for(j=0; j<N5; j++)
       {
         if(i%3) image[i][j] = (i+j) % PIXMAX;
         else    image[i][j] = (i+i*j) % PIXMAX;
       }
    calculate_histo(histo,image);

    for (i=0; i<PIXMAX; i++) 
        printf("%9d", histo[i]);
        printf("\n");
}

这篇关于使用OpenMP并行化的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

使用OpenMP并行化 [英] Parallelizing with OpenMP

问题描述

相关文章

C/C++最新文章

热门教程

热门工具

登录关闭

使用OpenMP并行化 [英] Parallelizing with OpenMP

问题描述

相关文章

C/C++最新文章

热门教程

热门工具

登录 关闭

登录关闭