cholesky分解ScaLapack错误 [英] cholesky decomposition ScaLapack error

查看:285
本文介绍了cholesky分解ScaLapack错误的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我收到以下错误,我不知道为什么。

  {1,1}到PDPOTRF参数号2的值为非法值
{1,0}:在进入PDPOTRF时,参数号2具有非法值
{0,1}:在进入PDPOTRF时,参数号2有一个非法值
{0,0}:在进入PDPOTRF时,参数2具有非法值


info< 0:如果第i个参数是数组,并且j条目具有非法值,则INFO = - (i * 100 + j),如果第i个参数是标量并且具有非法值,则INFO = -i。

我知道错误消息的含义,但我尽可能遵循网上提供的日期文档并试图从工作示例代码在Web上拼凑一个平行cholesky因式分解。我不知道我在哪里错了。



可能有人解释下我的错误在下面的代码?下面是代码的概述,我使用4个处理器进行测试,并将8x8矩阵划分为2 x 2处理器块网格
从文件加载矩阵,再看一个示例8 x 8 matrixfile,

  182 147 140 125 132 76 126 157 
147 213 185 150 209 114 166 188
140 185 232 129 194 142 199 205
125 150 129 143 148 81 104 150
132 209 194 148 214 122 172 189
76 114 142 81 122 102 129 117
126 166 199 104 172 129 187 181
157 188 205 150 189 117 181 259

我按照例子将矩阵分布到4在4个节点中的每一个上分离4x4个局部阵列。然后,我运行 descinit _ 并调用相关的 pdpotrf _ 例程,产生上述错误。我不知道我在哪里错了,并尽量跟踪文档,尽我所能。在fortran中并行cholesky分解的工作示例也将极大地帮助



函数调用的引用



pdpotrf_



descinit_



运行参数

  - 含义=值
N - 全局行= 8
M - 全局集合= 8
Nb - 本地块行= 2
Mb - 本地块集合= 2
nrows - Local Rows = 4
ncols Local Cols = 4
lda - 本地数组的前导维数= 4(我试过2,4,8)
ord - 矩阵的秩为4 (我也尝试过许多不同的事情)

我在每个节点上打印上述参数它们是相同的

  #include< mpi.h> 
#include< iostream>
#include< iomanip>
#include< string>
#include< fstream>
#include< iostream>
#include< stdlib.h>
#include< sstream>
using namespace std;


/ *
编译:
mpic ++ test.cpp -o test -L / home / admin / libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
要运行:
mpirun -np 4 ./test matrixfile 8 8 2 2

* /

externC
/ * Cblacs声明* /
void Cblacs_pinfo(int *,int *);
void Cblacs_get(int,int,int *);
void Cblacs_gridinit(int *,const char *,int,int);
void Cblacs_gridinfo(int,int *,int *,int *,int *);
void Cblacs_pcoord(int,int,int *,int *);
void Cblacs_gridexit(int);
void Cblacs_barrier(int,const char *);
void Cdgerv2d(int,int,int,double *,int,int,int);
void Cdgesd2d(int,int,int,double *,int,int,int);

int numroc_(int *,int *,int *,int *,int *);

void pdpotrf_(char *,int *,double *,
int *,int *,int *,int *);

void descinit_(int *,int *,int *,int *,int *,int *,int *,
int *,int *,int *);

}

int main(int argc,char ** argv){
/ * MPI * /
int mpirank,nprocs;
MPI_Init(& argc,& argv);
MPI_Comm_rank(MPI_COMM_WORLD,& mpirank);
MPI_Comm_size(MPI_COMM_WORLD,& nprocs);
double MPIelapsed;
double MPIt2;
double MPIt1;

/ *帮助vars * /
int iZERO = 0;
int verbose = 1;
bool mpiroot =(mpirank == 0);

if(argc <6){
if(mpiroot)
cerr<< 用法:matrixTest矩阵文件N M Nb Mb
< endl
<< N = Rows,M = Cols,Nb = Row Blocks,Mb = Col Blocks
< endl;

MPI_Finalize();
return 1;
}
/ * Scalapack / Blacs Vars * /
int N,M,Nb,Mb;
int descA [9];
int info = 0;
// int mla = 4;
int ord = 8;



double * A_glob = NULL,* A_glob2 = NULL,* A_loc = NULL;

/ *解析命令行参数* /
if(mpiroot){
/ *读取命令行参数* /
stringstream stream;
stream<< argv [2]< < argv [3]< < argv [4]< < argv [5];
stream>> N> M>> Nb> Mb;


/ *预留空间和读矩阵(带转置!)* /
A_glob = new double [N * M];
A_glob2 = new double [N * M];
String fname(argv [1]);
ifstream file(fname.c_str());
for(int r = 0; r< N; ++ r){
for(int c = 0; c file> ; *(A_glob + N * c + r);
}
}

/ *打印矩阵* /

if(verbose == 1){
cout< Matrix A:\\\
;
for(int r = 0; r for(int c = 0; c cout ; setw(3)<< *(A_glob + N * c + r) ;
}
cout<< \\\
;
}
cout<< endl;
}


}

/ *开始Cblas上下文* /
int ctxt,myid,myrow,mycol,numproc;
//< TODO> make dynamic
int procrows = 2,proccols = 2;
Cblacs_pinfo(& myid,& numproc);
Cblacs_get(0,0,& ctxt);
Cblacs_gridinit(& ctxt,Row-major,procrows,proccols);
Cblacs_gridinfo(ctxt,& procrows,& proccols,& myrow,& mycol);
/ *进程网格的进程坐标* /
// Cblacs_pcoord(ctxt,myid,& myrow,& mycol);


/ *广播矩阵维* /
int dimensions [4];
if(mpiroot){
dimensions [0] = N; //全局行
dimensions [1] = M; //全局cols
dimension [2] = Nb; // Local Rows
dimensions [3] = Mb; // Local Cols
}
MPI_Bcast(dimensions,4,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(& ord,1,MPI_INT,0,MPI_COMM_WORLD);
N = dimensions [0];
M = dimensions [1];
Nb = dimensions [2];
Mb = dimensions [3];

int nrows = numroc _(& N,& Nb,& myrow,& iZERO,& procrows);
int ncols = numroc _(& M,& Mb,& mycol,& iZERO,& proccols);

int lda = max(1,nrows);

MPI_Bcast(& lda,1,MPI_INT,0,MPI_COMM_WORLD);

/ *打印网格图案* /
if(myid == 0)
cout< 处理网格图案:< endl;
for(int r = 0; r for(int c = 0; c Cblacs_barrier(ctxt, 所有);
if(myrow == r&& mycol == c){
cout<< myid<< <冲洗;
}
}
Cblacs_barrier(ctxt,All);
if(myid == 0)
cout<< endl;
}

if(myid == 0){
cout<<Run Parameters<< endl;
cout<<Global Rows =<< M << endl;
cout<<Global Cols =<< N<< endl;
cout<<Local Block Rows =<< Mb << endl;
cout<<Local Block Cols =<< Nb cout<< nrows =<< nrows<< endl;
cout<< ncols =<< ncols<< endl;
cout<< lda =<<< lda<< endl;
cout<<Order =<< ord<< endl;
}


for(int id = 0; id< numproc; ++ id){
Cblacs_barrier(ctxt,All);
}
A_loc = new double [nrows * ncols];
for(int i = 0; i
/ *散列矩阵* /
int sendr = 0,sendc = 0,recvr = 0,recvc = 0;
for(int r = 0; r sendc = 0;
int nr = Nb;
if(N-r nr = N-r;

for(int c = 0; c int nc = Mb;
if(M-c nc = M-c;

if(mpiroot){
Cdgesd2d(ctxt,nr,nc,A_glob + N * c + r,N,sendr,sendc);
}

if(myrow == sendr& mycol == sendc){
Cdgerv2d(ctxt,nr,nc,A_loc + nrows * recvc + recvr,nrows ,0,0);
recvc =(recvc + nc)%ncols;
}

}

if(myrow == sendr)
recvr =(recvr + nr)%nrows;
}

/ *打印局部矩阵* /
if(verbose == 1){
for(int id = 0; id< numproc; ++ id){
if(id == myid){
cout< A_loc on node< myid<< endl;
for(int r = 0; r for(int c = 0; c< ncols; ++ c)
cout< setw(3)<< *(A_loc + nrows * c + r) ;
cout<< endl;
}
cout<< endl;
}
Cblacs_barrier(ctxt,All);
}
}

for(int id = 0; id< numproc; ++ id){
Cblacs_barrier(ctxt,All);
}

/ * DescInit * /
info = 0;
descinit_(descA,& N,& M,& Nb,& Mb,& iZERO,& iZERO,& ctxt,& lda,&

if(mpiroot){
if(verbose == 1){
if(info == 0){
cout< << endl;
}
if(info< 0){
cout<<Error Info< 0:if INFO = -i,the i-th argument has a illegal value ;& endl
<<Info =<< info<< endl;
}
}
// Cblacs_barrier(ctxt,All);
}

// psgesv_(n,1,al,1,1,idescal,ipiv,b,1,1,idescb,info)* /
// psgesv_ (& n,& one,al,& one,& one,idescal,ipiv,b,& one,& one,idescb,&
// pXelset http://www.netlib.org/scalapack/tools/pdelset.f


/ * CHOLESKY HERE * /
info = 0;
MPIt1 = MPI_Wtime();

pdpotrf _(L,& ord,A_loc,& Nb,& Mb,descA,& info);

for(int id = 0; id< numproc; ++ id){
Cblacs_barrier(ctxt,All);
}

MPIt2 = MPI_Wtime();
MPIelapsed = MPIt2-MPIt1;
if(mpiroot){
std :: cout<<Cholesky MPI Run Time< MPIelapsed<< std :: end1;

if(info == 0){
std :: cout<<SUCCESS<< std :: endl;
}
if(info< 0){

cout< info< 0:如果第i个参数是数组,并且j条目具有非法值,则INFO = - (i * 100 + j),如果第i个参数是标量并且具有非法值,则INFO = -i。<< endl;
cout<<info =<<信息< endl;
}
if(info> 0){
std :: cout<<matrix is not positive definte<< std :: endl;
}
}

//在节点收到节点之前,sanity check将全局矩阵设置为零
if(mpiroot){
for(int r = 0; r for(int c = 0; c A_glob2 [c * N + r]
}
}
}

/ *集合矩阵* /
sendr = 0;
for(int r = 0; r sendc = 0;
//发送的行数
//这是最后一行块吗?
int nr = Nb;
if(N-r nr = N-r;

for(int c = 0; c //要发送的cols数量
//这是最后一个col块吗?
int nc = Mb;
if(M-c nc = M-c;

if(myrow == sendr& mycol == sendc){
//发送一个nr-by-nc子矩阵到进程(sendr,sendc)
Cdgesd2d (ctxt,nr,nc,A_loc + nrows * recvc + recvr,nrows,0,0);
recvc =(recvc + nc)%ncols;
}

if(mpiroot){
//接收相同的数据
//局部矩阵的前导维数为nrows!
Cdgerv2d(ctxt,nr,nc,A_glob2 + N * c + r,N,sendr,sendc);
}
}
if(myrow == sendr)
recvr =(recvr + nr)%nrows;
}
/ *打印测试矩阵* /
if(mpiroot){
if(verbose == 1){
cout< 矩阵A测试:\\\
;
for(int r = 0; r for(int c = 0; c cout ; setw(3)<< *(A_glob2 + N * c + r) ;
}
cout<< endl;
}
}
}

/ *释放资源* /
delete [] A_glob;
delete [] A_glob2;
delete [] A_loc;
Cblacs_gridexit(ctxt);
MPI_Finalize();
}


解决方案

这里是你必须做的(我检查了修改的MPI程序的结果对Cholesky分解矩阵在八度 - 它的工作原理)。



我发现IBM的以下LAPACK引用比链接中的引用更有用:
http://publib.boulder.ibm.com/infocenter /clresctr/vxrx/index.jsp?topic=%2Fcom.ibm.cluster.pessl.v4r2.pssl100.doc%2Fam6gr_lpotrf.htm



PDPOTRF(UPLO,N,A,IA,JA,DESCA,INFO)



您正通过 Mb Nb IA JA 。然而,这些参数意味着在更大的矩阵内提供全局矩阵的起始行和列。它们只有在你有一个大矩阵并且只想要一个子矩阵的Cholesky分解时才是相关的。在你的情况下, IA JA 都必须 1



所以你需要做的是:

  int IA = 1; 
int JA = 1;
pdpotrf _(L,& ord,A_loc,& IA,& JA,descA,& info);

您也可以更改硬编码 int ord = 8; code>,因此它取决于从命令行读取的值,否则以后会遇到问题。



如上所述修改的程序的输出:

  Cholesky MPI运行时间0.000659943 
成功
矩阵A测试:
13.4907 147 140 125 132 76 126 157
10.8964 9.70923 185 150 209 114 166 188
10.3775 7.4077 8.33269 129 194 142 199 205
9.26562 5.0507 -0.548194 5.59806 148 81 104 150
9.78449 10.5451 1.72175 0.897537 1.81524 122 172 189
5.63349 5.41911 5.20784 0.765767 0.0442447 3.63139 129 117
9.33974 6.61543 6.36911 -2.22569 1.03941 2.48498 1.79738 181
11.6376 6.30249 4.50561 2.28799 -0.627688 -2.17633 7.27182 0.547228
<

  octave:1> A = dlmread(matrixfile4)
A =

182 147 140 125 132 76 126 157
147 213 185 150 209 114 166 188
140 185 232 129 194 142 199 205
125 150 129 143 148 81 104 150
132 209 194 148 214 122 172 189
76 114 142 81 122 102 129 117
126 166 199 104 172 129 187 181
157 188 205 150 189 117 181 259

octave:2> C = chol(A)
C =

13.49074 10.89636 10.37749 9.26562 9.78449 5.63349 9.33974 11.63761
0.00000 9.70923 7.40770 5.05070 10.54508 5.41911 6.61543 6.30249
0.00000 0.00000 8.33269 -0.54819 1.72175 5.20784 6.36911 4.50561
0.00000 0.00000 0.00000 5.59806 0.89754 0.76577 -2.22569 2.28799
0.00000 0.00000 0.00000 0.00000 1.81524 0.04424 1.03941 -0.62769
0.00000 0.00000 0.00000 0.00000 0.00000 3.63139 2.48498 -2.17633
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.79738 7.27182
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.54723

现在被删除 - 关于矩阵被错误地复制到节点不适用,你的程序的其余部分对我来说似乎很好。)


I'm getting the following error and i'm not sure why.

{    1,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    1,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value


 info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. 

I Know what the error messages means but I followed the dated documentation available on the web as best as possible and tried to piece together a parallel cholesky factorization from working example codes on the web. I'm not sure where I went wrong.

Could some one explain where I went wrong in the code below? Here's an overview of what the code does, I'm testing with 4 processors and divide the 8x8 matrix into 2 x 2 processor block grid loads a matrix from file, heres an example 8 x 8 matrixfile ,

182   147   140   125   132    76   126   157
147   213   185   150   209   114   166   188
140   185   232   129   194   142   199   205
125   150   129   143   148    81   104   150
132   209   194   148   214   122   172   189
76   114   142    81   122   102   129   117
126   166   199   104   172   129   187   181
157   188   205   150   189   117   181   259

I followed examples to distribute the Matrix to 4 separate 4x4 local arrays one on each of the 4 nodes. I then run descinit_ and call the associated pdpotrf_ routine which yields the above error. I have no idea where i went wrong and tried to follow documentation as best as I could. A working example of a parallel cholesky decomposition in fortran would also greatly help

References for function calls

pdpotrf_

descinit_

Run Parameters

code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4   (i've also tried many different things here as well)

I Printed the above parameters on every node and they are the same

#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;


/*
  To compile:
  mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
  To run:
  mpirun -np 4 ./test matrixfile 8 8 2 2

*/

extern "C" {
  /* Cblacs declarations */
  void Cblacs_pinfo(int*, int*);
  void Cblacs_get(int, int, int*);
  void Cblacs_gridinit(int*, const char*, int, int);
  void Cblacs_gridinfo(int, int*, int*, int*,int*);
  void Cblacs_pcoord(int, int, int*, int*);
  void Cblacs_gridexit(int);
  void Cblacs_barrier(int, const char*);
  void Cdgerv2d(int, int, int, double*, int, int, int);
  void Cdgesd2d(int, int, int, double*, int, int, int);

  int numroc_(int*, int*, int*, int*, int*);

  void pdpotrf_(char*, int*, double*,
        int*, int*, int*, int*);

  void descinit_( int *, int *, int *, int *, int *, int *, int *,
          int *, int *, int *);

}

int main(int argc, char **argv){
  /* MPI */
  int mpirank,nprocs;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  double MPIelapsed;
  double MPIt2;
  double MPIt1;

  /* Helping vars */
  int iZERO = 0;
  int verbose = 1;
  bool mpiroot = (mpirank == 0);

  if (argc < 6) {
    if (mpiroot)
      cerr << "Usage: matrixTest matrixfile N M Nb Mb"
       << endl
       << " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
       << endl;

    MPI_Finalize();
    return 1;
  }
  /* Scalapack / Blacs Vars */
  int N, M, Nb, Mb;
  int descA[9];
  int info = 0;
  //  int mla = 4;
  int ord = 8;



  double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;

  /* Parse command line arguments */
  if (mpiroot) {
    /* Read command line arguments */
    stringstream stream;
    stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
    stream >> N >> M >> Nb >> Mb;


    /* Reserve space and read matrix (with transposition!) */
    A_glob  = new double[N*M];
    A_glob2 = new double[N*M];
    string fname(argv[1]);
    ifstream file(fname.c_str());
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    file >> *(A_glob + N*c + r);
      }
    }

    /* Print matrix */

      if(verbose == 1) {
    cout << "Matrix A:\n";
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
        cout << setw(3) << *(A_glob + N*c + r) << " ";
      }
      cout << "\n";
    }
    cout << endl;
      }


  }

  /* Begin Cblas context */
  int ctxt, myid, myrow, mycol, numproc;
  //<TODO> make dynamic
  int procrows = 2, proccols = 2;
  Cblacs_pinfo(&myid, &numproc);
  Cblacs_get(0, 0, &ctxt);
  Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
  Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
  /* process coordinates for the process grid */
  // Cblacs_pcoord(ctxt, myid, &myrow, &mycol);


  /* Broadcast of the matrix dimensions */
  int dimensions[4];
  if (mpiroot) {
    dimensions[0] = N;//Global Rows
    dimensions[1] = M;//Global Cols
    dimensions[2] = Nb;//Local Rows
    dimensions[3] = Mb;//Local Cols
  }
  MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
  N = dimensions[0];
  M = dimensions[1];
  Nb = dimensions[2];
  Mb = dimensions[3];

  int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
  int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);

  int lda = max(1,nrows);

  MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* Print grid pattern */
  if (myid == 0)
    cout << "Processes grid pattern:" << endl;
  for (int r = 0; r < procrows; ++r) {
    for (int c = 0; c < proccols; ++c) {
      Cblacs_barrier(ctxt, "All");
      if (myrow == r && mycol == c) {
    cout << myid << " " << flush;
      }
    }
    Cblacs_barrier(ctxt, "All");
    if (myid == 0)
      cout << endl;
  }

  if(myid == 0){
    cout <<"Run Parameters"<<endl;
    cout <<"Global Rows = " << M <<endl;
    cout <<"Global Cols = " << N <<endl;
    cout <<"Local Block Rows = " << Mb <<endl;
    cout <<"Local Block Cols = " << Nb <<endl;
    cout << "nrows = "<<nrows<<endl;
    cout << "ncols = "<<ncols<<endl;
    cout << "lda = "<<lda<<endl;
    cout <<"Order = "<<ord<<endl;
  }


  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }
  A_loc = new double[nrows*ncols];
  for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;

  /* Scatter matrix */
  int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (mpiroot) {
    Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
      }

      if (myrow == sendr && mycol == sendc) {
    Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

    }

    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }

  /* Print local matrices */
  if(verbose == 1) {
  for (int id = 0; id < numproc; ++id) {
    if (id == myid) {
    cout << "A_loc on node " << myid << endl;
    for (int r = 0; r < nrows; ++r) {
      for (int c = 0; c < ncols; ++c)
        cout << setw(3) << *(A_loc+nrows*c+r) << " ";
      cout << endl;
    }
    cout << endl;
      }
      Cblacs_barrier(ctxt, "All");
    }
  }

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  /* DescInit */
  info=0;
  descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);

  if(mpiroot){
    if(verbose == 1){
      if (info == 0){
    cout<<"Description init sucesss!"<<endl;
      }
      if(info < 0){
    cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
         <<"Info = " << info<<endl;
      }
    }
    // Cblacs_barrier(ctxt, "All");
  }

  //psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb,  info) */
  //    psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb,  &info);
  //pXelset http://www.netlib.org/scalapack/tools/pdelset.f


  /* CHOLESKY HERE */
  info = 0;
  MPIt1=MPI_Wtime();

  pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  MPIt2 = MPI_Wtime();
  MPIelapsed=MPIt2-MPIt1;
  if(mpiroot){
    std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;

    if(info == 0){
      std::cout<<"SUCCESS"<<std::endl;
    }
    if(info < 0){

      cout << "info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
      cout<<"info = " << info << endl;
    }
    if(info > 0){
      std::cout<<"matrix is not positve definte"<<std::endl;
    }
  }

  //sanity check set global matrix to zero before it's recieved by nodes
  if(mpiroot){
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    A_glob2[c *N + r]  = 0; 
      }
    }
  }

  /* Gather matrix */
  sendr = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    // Number of rows to be sent
    // Is this the last row block?
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      // Number of cols to be sent
      // Is this the last col block?
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (myrow == sendr && mycol == sendc) {
    // Send a nr-by-nc submatrix to process (sendr, sendc)
    Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

      if (mpiroot) {
    // Receive the same data
    // The leading dimension of the local matrix is nrows!
    Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
      }
    }
    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }
  /* Print test matrix */
  if (mpiroot) {
    if(verbose == 1){
      cout << "Matrix A test:\n";
      for (int r = 0; r < N; ++r) {
    for (int c = 0; c < M; ++c) {
      cout << setw(3) << *(A_glob2+N*c+r) << " ";
    }
    cout << endl;
      }
    }
  }

  /* Release resources */
  delete[] A_glob;
  delete[] A_glob2;
  delete[] A_loc;
  Cblacs_gridexit(ctxt);
  MPI_Finalize();
}

解决方案

Right - I solved this. Here's what you have to do (I checked the result of the modified MPI program against a Cholesky decomp of your matrix in Octave -- it works.).

I found the following LAPACK reference by IBM to be more helpful than the one in your link: http://publib.boulder.ibm.com/infocenter/clresctr/vxrx/index.jsp?topic=%2Fcom.ibm.cluster.pessl.v4r2.pssl100.doc%2Fam6gr_lpotrf.htm

PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO )

You are passing Mb and Nb as IA and JA. However, those parameters are meant to provide the starting row and column of your global matrix inside a larger matrix. They are only relevant if you have a big matrix and only want the Cholesky decomp of a submatrix. In your case, IA and JA both have to be 1!

So all you need to do is:

int IA = 1;
int JA = 1;
pdpotrf_("L",&ord,A_loc,&IA,&JA,descA,&info);

You may also want to change your hardcoded int ord = 8; so that it depends on the value read from the command line, otherwise you'll run into problems later.

Output of your program modified as described above:

Cholesky MPI Run Time0.000659943
SUCCESS
Matrix A test:
13.4907 147 140 125 132  76 126 157 
10.8964 9.70923 185 150 209 114 166 188 
10.3775 7.4077 8.33269 129 194 142 199 205 
9.26562 5.0507 -0.548194 5.59806 148  81 104 150 
9.78449 10.5451 1.72175 0.897537 1.81524 122 172 189 
5.63349 5.41911 5.20784 0.765767 0.0442447 3.63139 129 117 
9.33974 6.61543 6.36911 -2.22569 1.03941 2.48498 1.79738 181 
11.6376 6.30249 4.50561 2.28799 -0.627688 -2.17633 7.27182 0.547228 

Octave output for comparison:

octave:1> A=dlmread("matrixfile4")
A =

   182   147   140   125   132    76   126   157
   147   213   185   150   209   114   166   188
   140   185   232   129   194   142   199   205
   125   150   129   143   148    81   104   150
   132   209   194   148   214   122   172   189
    76   114   142    81   122   102   129   117
   126   166   199   104   172   129   187   181
   157   188   205   150   189   117   181   259

octave:2> C=chol(A)
C =

   13.49074   10.89636   10.37749    9.26562    9.78449    5.63349    9.33974   11.63761
    0.00000    9.70923    7.40770    5.05070   10.54508    5.41911    6.61543    6.30249
    0.00000    0.00000    8.33269   -0.54819    1.72175    5.20784    6.36911    4.50561
    0.00000    0.00000    0.00000    5.59806    0.89754    0.76577   -2.22569    2.28799
    0.00000    0.00000    0.00000    0.00000    1.81524    0.04424    1.03941   -0.62769
    0.00000    0.00000    0.00000    0.00000    0.00000    3.63139    2.48498   -2.17633
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    1.79738    7.27182
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.54723

(My earlier comment -now deleted- about matrices being wrongly copied to the nodes does not apply, the rest of your program seems fine to me.)

这篇关于cholesky分解ScaLapack错误的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆