使用openmp优化N皇后 [英] Optimizing N-queen with openmp

查看:166
本文介绍了使用openmp优化N皇后的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在学习OPENMP,并编写了以下代码来解决nqueens问题.

I am learning OPENMP and wrote the following code to solve nqueens problem.

//Full Code: https://github.com/Shafaet/Codes/blob/master/OPENMP/Parallel%20N-  Queen%20problem.cpp
int n;

int call(int col,int rowmask,int dia1,int dia2)
{
    if(col==n) 
    {
        return 1;

    }
    int row,ans=0;
    for(row=0;row<n;row++)
    {
        if(!(rowmask & (1<<row)) & !(dia1 & (1<<(row+col))) & !(dia2 & (1<<((row+n-1)-col))))
        {           
            ans+=call(col+1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
        }
    }
    return ans;

}

double parallel()
{
    double st=omp_get_wtime();
    int ans=0;
    int i;
    int rowmask=0,dia1=0,dia2=0;
     #pragma omp parallel for reduction(+:ans) shared(i,rowmask)
    for(i=0;i<n;i++)
    {
        rowmask=0;
        dia1=0,dia2=0;
        int col=0,row=i;
        ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
    }
    printf("Found %d configuration for n=%d\n",ans,n);
    double en=omp_get_wtime();
    printf("Time taken using openmp %lf\n",en-st);
    return en-st;

}
double serial()
{

    double st=omp_get_wtime();
    int ans=0;
    int i;
    int rowmask=0,dia1=0,dia2=0;
    for(i=0;i<n;i++)
    {
        rowmask=0;
        dia1=0,dia2=0;
        int col=0,row=i;
        ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
    }
    printf("Found %d configuration for n=%d\n",ans,n);
    double en=omp_get_wtime();
    printf("Time taken without openmp %lf\n",en-st);
    return en-st;

}
int main()
{
    double average=0;
    int count=0;
    for(int i=2;i<=13;i++)
    {
        count++;
        n=i;

        double stime=serial();
        double ptime=parallel();
        printf("OpenMP is %lf times faster for n=%d\n",stime/ptime,n);
        average+=stime/ptime;
        puts("===============");
    }
    printf("On average OpenMP is %lf times faster\n",average/count);
    return 0;

}

并行代码已经比普通代码快,但是我想知道如何使用openmp pragma对其进行更优化.我想知道我应该做些什么以获得更好的性能,而我不应该这样做.

Parallel code is already faster than normal one but i wonder how can i optimize it more using openmp pragmas. I want to know what i should do for better performance and what i should not do.

谢谢.

(请不要建议与并行编程无关的任何优化)

(Please dont suggest any optimizations which are non-related to parallel programming)

推荐答案

我知道我参加聚会有点晚了,但是您可以使用任务队列进行进一步优化(结果快7-10%).为什么.这是我正在使用的代码:

I know I am a little late for the party, but you can use task queueing for further optimization.(about 7-10% faster results).No idea why. Here's the code,that i am using :

#include <iostream>  // std::cout, cin, cerr ...
#include <iomanip>   // modify std::out
#include <omp.h>

using namespace std;

int nrOfSolutions=0;
int size=0;

void print(int queens[]) {
  cerr << "Solution " << nrOfSolutions << endl; 
  for(int row=0; row<size; row++) {
    for(int col=0; col<size; col++) {
      if(queens[row]==col) {
  cout << "Q";
      }
      else {
  cout << "-";
      }
    }
    cout << endl;
  }
}

void setQueen(int queens[], int row, int col, int id) {

  for(int i=0; i<row; i++) {
    // vertical attacks
    if (queens[i]==col) {
      return;
    }
    // diagonal attacks
    if (abs(queens[i]-col) == (row-i) ) {
      return;
    }
  }

  // column is ok, set the queen
  queens[row]=col;

  if(row==size-1) {


    // only one thread should print allowed to print at a time
    {
      // increasing the solution counter is not atomic
#pragma omp critical
      nrOfSolutions++;
#ifdef _DEBUG
#pragma omp critical
      print(queens);
#endif
    }

  }
  else {
    // try to fill next row
    for(int i=0; i<size; i++) {
      setQueen(queens, row+1, i, id);
    }
  }
}

void solve() {
  int myid=0 ;

#pragma omp parallel
#pragma omp single
  {
      for(int i=0; i<size; i++) {
/*
#ifdef _OMP //(???)
  myid = omp_get_thread_num();  
#endif
#ifdef _DEBUG
  cout << "ThreadNum: " << myid << endl ;
#endif
  */
  // try all positions in first row
  // create separate array for each recursion
  // started here
#pragma omp task
    setQueen(new int[size], 0, i, myid);
      }
    }
}

int main(int argc, char*argv[]) {

  if(argc !=2) {
    cerr << "Usage: nq-openmp-taskq boardSize.\n";
    return 0;
  }

  size = atoi(argv[1]);
  cout << "Starting OpenMP Task Queue solver for size " << size << "...\n";

    double st=omp_get_wtime();
    solve();

    double en=omp_get_wtime();
    printf("Time taken using openmp %lf\n",en-st);

  cout << "Number of solutions: " << nrOfSolutions << endl;

return 0;
}

这篇关于使用openmp优化N皇后的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆