MPI分区矩阵成块 [英] MPI partition matrix into blocks

查看:158
本文介绍了MPI分区矩阵成块的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想矩阵划分成块(没有条纹),然后使用MPI_Scatter分发此块。

我想出了解决方案,它的工作原理,但我认为这还远远没有最佳实践。我有8×8矩阵,写满数字从0到63。然后,我把它分成4 4x4块,采用MPI_Type_vector并通过分发MPI_SEND,但是这需要一些额外的计算,因为我已经来计算偏移量在大矩阵的每个块。

如果我使用分散,第一个(左上)块转移OK,但其他块不会(错块的起始偏移量)。

所以是有可能利用传输矩阵MPI_Scatter块,或者说是做所需分解的最好方法?

这是我的code:

 的#include<&stdio.h中GT;
#包括LT&;&stdlib.h中GT;
#包括LT&;&mpi.h GT;#定义尺寸8
诠释主要(无效){        MPI_INIT(NULL,NULL);
        INT P,等级;
        MPI_Comm_size(MPI_COMM_WORLD,&放大器; P);
        MPI_Comm_rank(MPI_COMM_WORLD,&安培;等级);
        烧焦我;        所以char a [SIZE * SIZE];
        字符B〔(SIZE / 2)*(SIZE / 2)];        MPI_Datatype columntype;
        MPI_Datatype columntype2;        MPI_Type_vector(4,4,大小,MPI_CHAR,&放大器; columntype2);
        MPI_Type_create_resized(columntype2,0,sizeof的(MPI_CHAR),放大器; columntype);
        MPI_Type_commit(安培; columntype);        如果(排名== 0){
                对于(i = 0; I<尺寸*尺寸;我++){
                        一个[我] =我;
                }                对于(INT REC = 0;录制和LT; P; REC ++){
                        偏移为int =(REC%2)* 4 +(REC / 2)* 32;
                      MPI_SEND(A +偏移,1,columntype,REC,0,MPI_COMM_WORLD);
                }
        }
        MPI_RECV(二,16,MPI_CHAR,0,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
        // MPI_Scatter(&放大器;一,1,BOKI,和b,16,MPI_CHAR,0,MPI_COMM_WORLD);        的printf(等级=%DB = \\ n%D%D \\ n%D%D \\ n%D%D \\ n%D%D \\ n ,等级,b [0],b [1],b [2],b [3],b [4],b [5],b [6],b [7],b [8],b [9],b [10],b [11],b [12],b [13],b [14],b [15]);        MPI_Finalize();        返回0;
}


解决方案

什么你得到的是pretty多少最佳做法;直到你习惯它,它只是有点混乱。

两件事情,虽然:

首先,要小心这样的:的sizeof(MPI_CHAR)是,我认为,4个字节,而不是1 MPI_CHAR 是描述(到MPI库)字符的(整数)不变。您可能希望的sizeof(char)的尺寸/ 2 * sizeof的(字符),或其他任何方便。但在做大小调整的基本思路是正确的。

第二,我认为你正在使用卡住 MPI_Scatterv ,不过,因为没有简单的方法,使每之间的偏移块大小相同。也就是说,在第一个块中的第一个元素是在 A [0] ,二是在 A [SIZE / 2] (尺寸/ 2跳),接下来是 A [SIZE *(SIZE / 2)] (尺寸 - 1)*(SIZE / 2))。所以,你需要能够手动生成偏移量。

以下似乎为我工作(我全身那么一点点,使其更加清晰时,大小是指与列数,行数等):

 的#include<&stdio.h中GT;
#包括LT&;&stdlib.h中GT;
#包括LT&;&mpi.h GT;#定义COLS 12
#定义ROWS 8INT主(INT ARGC,字符** argv的){    MPI_INIT(安培; ARGC,&安培; argv的);
    INT P,等级;
    MPI_Comm_size(MPI_COMM_WORLD,&放大器; P);
    MPI_Comm_rank(MPI_COMM_WORLD,&安培;等级);
    烧焦我;    所以char a [ROWS * COLS]。
    const int的NPROWS = 2; / *在_decomposition_行数* /
    const int的NPCOLS = 3; / *在_decomposition_ COLS数* /
    const int的BLOCKROWS =行/ NPROWS; / *在_block_行数* /
    const int的BLOCKCOLS = COLS / NPCOLS; / *在_block_ COLS数* /    如果(排名== 0){
        为(中间体二= 0; II蛋白酶ROWS * COLS;ⅱ++){
            一个[II] =(char)的二;
        }
    }    如果(P!= NPROWS * NPCOLS){
        fprintf中(标准错误,错误:在PE%的D号=%深x%d个\\ N,P,NPROWS,NPCOLS);
        MPI_Finalize();
        出口(-1);
    }
    焦B〔BLOCKROWS * BLOCKCOLS]。
    为(中间体二= 0; II蛋白酶BLOCKROWS * BLOCKCOLS;ⅱ++)B〔二〕= 0;    MPI_Datatype blocktype;
    MPI_Datatype blocktype2;    MPI_Type_vector(BLOCKROWS,BLOCKCOLS,COLS,MPI_CHAR,&安培; blocktype2);
    MPI_Type_create_resized(blocktype2,0,sizeof的(炭),放大器; blocktype);
    MPI_Type_commit(安培; blocktype);    诠释disps [NPROWS * NPCOLS]。
    INT计数[NPROWS * NPCOLS]。
    为(中间体二= 0; II蛋白酶NPROWS;ⅱ++){
        对于(INT JJ = 0; JJ< NPCOLS; JJ ++){
            disps [II * NPCOLS + JJ] = II * * COLS + BLOCKROWS JJ * BLOCKCOLS;
            计数[II * NPCOLS + JJ] = 1;
        }
    }    MPI_Scatterv(一,计数,disps,blocktype,B,BLOCKROWS * BLOCKCOLS,MPI_CHAR,0,MPI_COMM_WORLD);
    / *每个进程内打印,它的B出来,为了* /
    对于(INT PROC = 0;&PROC LT; P; PROC ++){
        如果(PROC ==排名){
            的printf(等级=%d个\\ N级);
            如果(排名== 0){
                的printf(全球矩阵:\\ n);
                为(中间体二= 0; II蛋白酶ROWS;ⅱ++){
                    对于(INT JJ = 0; JJ< COLS; JJ ++){
                        的printf(%3D(INT)一[II * COLS + JJ]);
                    }
                    的printf(\\ n);
                }
            }
            的printf(本地矩阵:\\ n);
            为(中间体二= 0; II蛋白酶BLOCKROWS;ⅱ++){
                对于(INT JJ = 0; JJ< BLOCKCOLS; JJ ++){
                    的printf(%3D(INT)B〔II * BLOCKCOLS + JJ]);
                }
                的printf(\\ n);
            }
            的printf(\\ n);
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }    MPI_Finalize();    返回0;
}

运行:

  $的mpirun -np 6 ./matrix排名= 0
全球矩阵:
  0 1 2 3 4 5 6 7 8 9 10 11
 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35
 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59
 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83
 84 85 86 87 88 89 90 91 92 93 94 95
本地矩阵:
  0 1 2 3
 12 13 14 15
 24 25 26 27
 36 37 38 39排名= 1
本地矩阵:
  4 5 6 7
 16 17 18 19
 28 29 30 31
 40 41 42 43排名= 2
本地矩阵:
  8 9 10 11
 20 21 22 23
 32 33 34 35
 44 45 46 47排名= 3
本地矩阵:
 48 49 50 51
 60 61 62 63
 72 73 74 75
 84 85 86 87排名= 4
本地矩阵:
 52 53 54 55
 64 65 66 67
 76 77 78 79
 88 89 90 91排名= 5
本地矩阵:
 56 57 58 59
 68 69 70 71
 80 81 82 83
 92 93 94 95

I want to partition matrix into blocks (not stripes) and then distribute this blocks using MPI_Scatter.

I came up with solution which works, but I think it is far from "best practice". I have 8x8 matrix, filled with numbers from 0 to 63. Then I divide it into 4 4x4 blocks, using MPI_Type_vector and distribute it via MPI_Send, but this require some extra computation since i have to compute offsets for each block in big matrix.

If I use scatter, first (top left) block is transfered OK, but other blocks are not (wrong offset for start of block).

So is it possible to transfer blocks of matrix using MPI_Scatter, or what is the best way to do desired decomposition?

This is my code:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

#define SIZE 8


int main(void) {

        MPI_Init(NULL, NULL);
        int p, rank;
        MPI_Comm_size(MPI_COMM_WORLD, &p);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        char i;

        char a[SIZE*SIZE];
        char b[(SIZE/2)*(SIZE/2)];

        MPI_Datatype columntype;
        MPI_Datatype columntype2;

        MPI_Type_vector(4, 4, SIZE, MPI_CHAR, &columntype2);
        MPI_Type_create_resized( columntype2, 0, sizeof(MPI_CHAR), &columntype );
        MPI_Type_commit(&columntype);

        if(rank == 0) {
                for( i = 0; i < SIZE*SIZE; i++) {
                        a[i] = i;
                }

                for(int rec=0; rec < p; rec++) {
                        int offset = (rec%2)*4 + (rec/2)*32;
                      MPI_Send (a+offset, 1, columntype, rec, 0, MPI_COMM_WORLD);
                }
        }
        MPI_Recv (b, 16, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        //MPI_Scatter(&a, 1, boki, &b, 16, MPI_CHAR , 0, MPI_COMM_WORLD);

        printf("rank= %d  b= \n%d %d %d %d\n%d %d %d %d\n%d %d %d %d\n%d %d %d %d\n", rank, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]);

        MPI_Finalize();

        return 0;
}

解决方案

What you've got is pretty much "best practice"; it's just a bit confusing until you get used to it.

Two things, though:

First, be careful with this: sizeof(MPI_CHAR) is, I assume, 4 bytes, not 1. MPI_CHAR is an (integer) constant that describes (to the MPI library) a character. You probably want sizeof(char), or SIZE/2*sizeof(char), or anything else convenient. But the basic idea of doing a resize is right.

Second, I think you're stuck using MPI_Scatterv, though, because there's no easy way to make the offset between each block the same size. That is, the first element in the first block is at a[0], the second is at a[SIZE/2] (jump of size/2), the next is at a[SIZE*(SIZE/2)] (jump of (SIZE-1)*(SIZE/2)). So you need to be able to manually generate the offsets.

The following seems to work for me (I generalized it a little bit to make it clearer when "size" means "number of rows" vs "number of columns", etc):

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

#define COLS  12
#define ROWS  8

int main(int argc, char **argv) {

    MPI_Init(&argc, &argv);
    int p, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    char i;

    char a[ROWS*COLS];
    const int NPROWS=2;  /* number of rows in _decomposition_ */
    const int NPCOLS=3;  /* number of cols in _decomposition_ */
    const int BLOCKROWS = ROWS/NPROWS;  /* number of rows in _block_ */
    const int BLOCKCOLS = COLS/NPCOLS; /* number of cols in _block_ */

    if (rank == 0) {
        for (int ii=0; ii<ROWS*COLS; ii++) {
            a[ii] = (char)ii;
        }
    }

    if (p != NPROWS*NPCOLS) {
        fprintf(stderr,"Error: number of PEs %d != %d x %d\n", p, NPROWS, NPCOLS);
        MPI_Finalize();
        exit(-1);
    }
    char b[BLOCKROWS*BLOCKCOLS];
    for (int ii=0; ii<BLOCKROWS*BLOCKCOLS; ii++) b[ii] = 0;

    MPI_Datatype blocktype;
    MPI_Datatype blocktype2;

    MPI_Type_vector(BLOCKROWS, BLOCKCOLS, COLS, MPI_CHAR, &blocktype2);
    MPI_Type_create_resized( blocktype2, 0, sizeof(char), &blocktype);
    MPI_Type_commit(&blocktype);

    int disps[NPROWS*NPCOLS];
    int counts[NPROWS*NPCOLS];
    for (int ii=0; ii<NPROWS; ii++) {
        for (int jj=0; jj<NPCOLS; jj++) {
            disps[ii*NPCOLS+jj] = ii*COLS*BLOCKROWS+jj*BLOCKCOLS;
            counts [ii*NPCOLS+jj] = 1;
        }
    }

    MPI_Scatterv(a, counts, disps, blocktype, b, BLOCKROWS*BLOCKCOLS, MPI_CHAR, 0, MPI_COMM_WORLD);
    /* each proc prints it's "b" out, in order */
    for (int proc=0; proc<p; proc++) {
        if (proc == rank) {
            printf("Rank = %d\n", rank);
            if (rank == 0) {
                printf("Global matrix: \n");
                for (int ii=0; ii<ROWS; ii++) {
                    for (int jj=0; jj<COLS; jj++) {
                        printf("%3d ",(int)a[ii*COLS+jj]);
                    }
                    printf("\n");
                }
            }
            printf("Local Matrix:\n");
            for (int ii=0; ii<BLOCKROWS; ii++) {
                for (int jj=0; jj<BLOCKCOLS; jj++) {
                    printf("%3d ",(int)b[ii*BLOCKCOLS+jj]);
                }
                printf("\n");
            }
            printf("\n");
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Finalize();

    return 0;
}

Running:

$ mpirun -np 6 ./matrix

Rank = 0
Global matrix: 
  0   1   2   3   4   5   6   7   8   9  10  11 
 12  13  14  15  16  17  18  19  20  21  22  23 
 24  25  26  27  28  29  30  31  32  33  34  35 
 36  37  38  39  40  41  42  43  44  45  46  47 
 48  49  50  51  52  53  54  55  56  57  58  59 
 60  61  62  63  64  65  66  67  68  69  70  71 
 72  73  74  75  76  77  78  79  80  81  82  83 
 84  85  86  87  88  89  90  91  92  93  94  95 
Local Matrix:
  0   1   2   3 
 12  13  14  15 
 24  25  26  27 
 36  37  38  39 

Rank = 1
Local Matrix:
  4   5   6   7 
 16  17  18  19 
 28  29  30  31 
 40  41  42  43 

Rank = 2
Local Matrix:
  8   9  10  11 
 20  21  22  23 
 32  33  34  35 
 44  45  46  47 

Rank = 3
Local Matrix:
 48  49  50  51 
 60  61  62  63 
 72  73  74  75 
 84  85  86  87 

Rank = 4
Local Matrix:
 52  53  54  55 
 64  65  66  67 
 76  77  78  79 
 88  89  90  91 

Rank = 5
Local Matrix:
 56  57  58  59 
 68  69  70  71 
 80  81  82  83 
 92  93  94  95 

这篇关于MPI分区矩阵成块的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆