在mpi fortran中更新矩阵 [英] Updating matrix in mpi fortran

查看:187
本文介绍了在mpi fortran中更新矩阵的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我运行以下mpi fortran代码,在每个处理器中生成一个矩阵。然后,每个矩阵值增加1,并将更新的矩阵发送到根处理器。最后,组装完整的矩阵后打印。我在根处理器中面临一个问题,矩阵没有得到更新。这是为什么?

 程序主要
包含mpif.h
参数(nx = 4)
参数(ny = 4)
参数(tsteps = 5)
real * 8 a(nx,ny),b(nx,ny)
整数行,列
整数myid,myid1,根,源,数组
整数它,comm2d,ierr,req
整数sx,ex,sy,ey
整数dims (2)
逻辑周期
整数状态(MPI_STATUS_SIZE)
数据周期/ 2 * .false。/

Root = 0
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c获取一个用于分解域的新通信器。
c让MPI找到一个好分解
dims(1)= 0
dims(2)= 0
CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
if(myid1.EQ.Root)然后
print *,'dimensions:',dims(1),dims(2)
endif
CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,句点,.true。,
& comm2d,ierr)
c获取我在这个交流者的位置
c CALL MPI_COMM_RANK(comm2d,myid,ierr)
c计算分解
CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
行= ex-sx + 1
cols = ey-sy + 1
c初始化矩阵
do i = sx,ex
do j = sy,ey
a(i,j)=(i-1)+(j-1)
enddo
enddo
do it = 1,tsteps
do i = sx,ex
do j = sy,ey
a(i,j)= a(i,j)+1
enddo
enddo
C将结果发送给其他处理器
调用MPI_ISEND(sx,1, MPI_INTEGER,Root,1,
& (例如,MPI_INTEGER,Root,1,$ b $ amp; comm2d,req,ierr)
调用MPI_ISEND(sy,1,MPI_INTEGER,Root, 1,
& comm2d,req,ierr)
调用MPI_ISEND(ey,1,MPI_INTEGER,Root,1,$ b $ amp; comm2d,req,ierr)
call MPI_ISEND a(sx:ex,sy:ey),cols * rows,MPI_DOUBLE_PRECISION,
& Root,1,comm2d,req,ierr)
c从其他人处获得结果
if(myid1 .EQ.Root)then
do source = 0,numprocs-1
call MPI_RECV(sx,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr)
调用MPI_RECV(例如,1,MPI_INTEGER,源,
& 1,comm2d,status,ierr)
调用MPI_RECV(sy,1,MPI_INTEGER,source,
& 1,comm2d ,status,ierr)
调用MPI_RECV(ey,1,MPI_INTEGER,source,
& 1,co mm2d,status,ierr)
调用MPI_RECV(a(sx:ex,sy:ey),cols * rows,MPI_DOUBLE_PRECISION,
&源,1,comm2d,status,ierr)
a(sx:ex,sy:ey)= a(sx:ex,sy:ey)
调用MPI_Wait(req,status,ierr)
enddo
endif
if(myid1.EQ.Root)然后
c打印结果
print *,'time step =',it
do 90 i = 1 ,nx
do 80 j = 1,ny
write(*,70)a(i,j)
70格式(2x,f8.2,$)
80继续
print *,''
90继续
endif
enddo
C Cleanup会在这里进行。
CALL MPI_Comm_free(comm2d,ierr)
30 CALL MPI_FINALIZE(ierr)

STOP
END
C ********** *********************************************
子程序fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
整数comm2d
整数nx,ny,sx,ex,sy,ey
整数dims(2),coords 2),
逻辑周期(2)
c从笛卡尔拓扑中获取处理器的(i,j)位置。
CALL MPI_Cart_get(comm2d,2,dims,句点,坐标,ierr)
C以第一(即X)方向分解
CALL MPE_DECOMP1D(nx,dims(1),coords(1 ),sx,ex)
C以秒(即Y)方向分解
CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
return
结束
c ******************************************* **************************
SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
整数n, numprocs,myid,s,e,nlocal,赤字
nlocal = n / numprocs
s = myid * nlocal + 1
赤字= mod(n,numprocs)
s = s + min (myid,赤字)
C如果(myid .lt。赤字)然后
nlocal = nlocal + 1
endif
e = s + nlocal - 1
if(e .gt。n。or myid .eq。numprocs-1)e = n
return
end

我正在生成以下矩阵:

  A = [0 1 2 3 
1 2 3 4
2 3 4 5
3 4 5 6];

我正在通过在循环中添加1来更新矩阵A,发送,接收部分A和打印桌面。 A(1:2,1:2)在打印矩阵时没有显示任何更新。



使用四个处理器运行代码,以便更好地理解我的问题。 我在代码中遇到了错误。发送给Root时,索引sx,ex,sy,ey正在被覆盖,因此没有更新。我已更正了代码并在下面发布。

 程序主要
包含mpif.h
参数(nx = 4)
参数(ny = 4)
参数(tsteps = 5)
实数* 8 a(nx,ny),b(nx,ny)
整数行,列
整数myid ,myid1,Root,source,numprocs
整数it,comm2d,ierr,req
整数sx,ex,sy,ey
整数sx0,ex0,sy0,ey0
integer dims (2)
逻辑周期
整数状态(MPI_STATUS_SIZE)
数据周期/ 2 * .false。/

Root = 0
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c获取一个用于分解域的新通信器。
c让MPI找到一个好分解
dims(1)= 0
dims(2)= 0
CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
if(myid1.EQ.Root)然后
print *,'dimensions:',dims(1),dims(2)
endif
CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,句点,.true。,
& comm2d,ierr)
c获取我在这个交流者的位置
c CALL MPI_COMM_RANK(comm2d,myid,ierr)
c计算分解
CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
行= ex-sx + 1
cols = ey-sy + 1
c初始化矩阵
do i = sx,ex
do j = sy,ey
a(i,j)=(i-1)+(j-1)
enddo
enddo
do it = 1,tsteps
do i = sx,ex
do j = sy,ey
a(i,j)= a(i,j)+1
enddo
enddo
C将结果发送给其他处理器
调用MPI_ISEND(sx,1, MPI_INTEGER,Root,1,
& (例如,MPI_INTEGER,Root,1,$ b $ amp; comm2d,req,ierr)
调用MPI_ISEND(sy,1,MPI_INTEGER,Root, 1,
& comm2d,req,ierr)
调用MPI_ISEND(ey,1,MPI_INTEGER,Root,1,$ b $ amp; comm2d,req,ierr)
call MPI_ISEND a(sx:ex,sy:ey),cols * rows,MPI_DOUBLE_PRECISION,
& Root,1,comm2d,req,ierr)
c从其他人处获得结果
if(myid1 .EQ.Root)then
do source = 0,numprocs-1
call MPI_RECV(sx0,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr)
调用MPI_RECV(ex0,1,MPI_INTEGER,源,
& 1,comm2d,status,ierr)
调用MPI_RECV(sy0,1,MPI_INTEGER,source,
& 1,comm2d ,状态,ierr)
调用MPI_RECV(ey0,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr)
调用MPI_RECV(a(sx0:ex0,sy0:ey0),cols * rows,MPI_DOUBLE_PRECISION,
&源,1,comm2d,状态,ierr)
a(sx0:ex0,sy0:ey0)= a(sx0:ex0,sy0:ey0)
调用MPI_Wait(req,status,ierr)
enddo
endif
if(myid1.EQ.Root)然后
c打印结果
print *,'time step =',it
do 90 i = 1 ,nx
do 80 j = 1,ny
write(*,70)a(i,j)
70格式(2x,f8.2,$)
80继续
print *,''
90继续
endif
enddo
C Cleanup会在这里进行。
CALL MPI_Comm_free(comm2d,ierr)
30 CALL MPI_FINALIZE(ierr)

STOP
END
C ********** *********************************************
子程序fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
整数comm2d
整数nx,ny,sx,ex,sy,ey
整数dims(2),coords 2),
逻辑周期(2)
c从笛卡尔拓扑中获取处理器的(i,j)位置。
CALL MPI_Cart_get(comm2d,2,dims,句点,坐标,ierr)
C以第一(即X)方向分解
CALL MPE_DECOMP1D(nx,dims(1),coords(1 ),sx,ex)
C以秒(即Y)方向分解
CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
return
结束
c ******************************************* **************************
SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
整数n, numprocs,myid,s,e,nlocal,赤字
nlocal = n / numprocs
s = myid * nlocal + 1
赤字= mod(n,numprocs)
s = s + min (myid,赤字)
C如果(myid .lt。赤字)然后
nlocal = nlocal + 1
endif
e = s + nlocal - 1
if(e .gt。n。or myid .eq。numprocs-1)e = n
return
end


I am running the following mpi fortran code where I generate a matrix in each processor. Then each matrix value is incremented by one and the updated matrix sent to the root processor. Finally, the full matrix is printed after assembling. I am facing a problem in the root processor, where the matrix does not get updated. Why is that? Run the code using four processors to better understand my problem.

    PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx:ex,sy:ey) = a(sx:ex,sy:ey) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end

I am generating the following matrix:

   A=[0     1     2     3
     1     2     3     4
     2     3     4     5
     3     4     5     6] ;

I am updating matrix A by adding 1 in a loop, sending, receiving A in parts and printing on desktop. A(1:2,1:2) is not showing any update on printing the matrix.

Run the code with four processors for better understanding my problem.

解决方案

I got the mistake in the code. The indices sx,ex,sy,ey when sending to Root are being over written and thus it was not getting updated. I have corrected the code and posting below.

PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   sx0, ex0, sy0, ey0
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx0:ex0,sy0:ey0),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx0:ex0,sy0:ey0) = a(sx0:ex0,sy0:ey0) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end

这篇关于在mpi fortran中更新矩阵的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆