MPI挂在MPI_Send上以发送大消息 [英] MPI hangs on MPI_Send for large messages

查看:79
本文介绍了MPI挂在MPI_Send上以发送大消息的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

在c ++/mpi(mpich2)中有一个简单的程序,它发送一个double类型的数组.如果数组的大小超过9000,则在调用MPI_Send期间,我的程序挂起.如果数组小于9000(例如8000),则programm可以正常工作.源代码如下:

main.cpp

using namespace std;

Cube** cubes;
int cubesLen;

double* InitVector(int N) {
   double* x = new double[N];
   for (int i = 0; i < N; i++) {
       x[i] = i + 1;
   }
   return x;
}

void CreateCubes() {
    cubes = new Cube*[12];
    cubesLen = 12;
    for (int i = 0; i < 12; i++) {
       cubes[i] = new Cube(9000);
    }
}

void SendSimpleData(int size, int rank) {
    Cube* cube = cubes[0];
    int nodeDest = rank + 1;
    if (nodeDest > size - 1) {
        nodeDest = 1;
    }

    double* coefImOut = (double *) malloc(sizeof (double)*cube->coefficentsImLength);
    cout << "Before send" << endl;
    int count = cube->coefficentsImLength;
    MPI_Send(coefImOut, count, MPI_DOUBLE, nodeDest, 0, MPI_COMM_WORLD);
    cout << "After send" << endl;
    free(coefImOut);

    MPI_Status status;
    double *coefIm = (double *) malloc(sizeof(double)*count);

    int nodeFrom = rank - 1;
    if (nodeFrom < 1) {
        nodeFrom = size - 1;
    }

    MPI_Recv(coefIm, count, MPI_DOUBLE, nodeFrom, 0, MPI_COMM_WORLD, &status);
    free(coefIm);
}

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    CreateCubes();

    if (rank != root) {
         SendSimpleData(size, rank);
    }

    MPI_Finalize();
    return 0;
}

Class Cube

 class Cube {
 public:
    Cube(int size);
    Cube(const Cube& orig);
    virtual ~Cube();

    int Id() { return id; } 
    void Id(int id) { this->id = id; }

    int coefficentsImLength;
    double* coefficentsIm;

private:
    int id;
};

Cube::Cube(int size) {
    this->coefficentsImLength = size;

    coefficentsIm = new double[size];
    for (int i = 0; i < size; i++) {
        coefficentsIm[i] = 1;
    }
}

Cube::Cube(const Cube& orig) {
}

Cube::~Cube() {
    delete[] coefficentsIm;
}

程序在4个进程上运行:

mpiexec -n 4 ./myApp1

有什么想法吗?

解决方案

Cube类的详细信息与此处无关:考虑一个更简单的版本

#include <mpi.h>
#include <cstdlib>

using namespace std;

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    int datasize = atoi(argv[1]);

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank != root) {
        int nodeDest = rank + 1;
        if (nodeDest > size - 1) {
            nodeDest = 1;
        }
        int nodeFrom = rank - 1;
        if (nodeFrom < 1) {
            nodeFrom = size - 1;
        }

        MPI_Status status;
        int *data = new int[datasize];
        for (int i=0; i<datasize; i++)
            data[i] = rank;

        cout << "Before send" << endl;
        MPI_Send(&data, datasize, MPI_INT, nodeDest, 0, MPI_COMM_WORLD);
        cout << "After send" << endl;
        MPI_Recv(&data, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);

        delete [] data;

    }

    MPI_Finalize();
    return 0;
}

跑步在哪里

$ mpirun -np 4 ./send 1
Before send
After send
Before send
After send
Before send
After send
$ mpirun -np 4 ./send 65000
Before send
Before send
Before send

如果在DDT中您查看了消息队列窗口,您会看到每个人都在发送邮件,而没有人在接收邮件,并且您拥有经典的MPI_Ssend更清晰;它将一直阻塞,直到收货过帐为止.可以在此处中查看有关不同发送模式的详细信息.. >

它处理较小消息的原因是实现的偶然事件;对于足够小的"消息(在您的情况下,它看起来是< 64kB),您的MPI_Send实现使用渴望发送"协议,并且不会阻止接收;对于较大的消息,仅将消息的缓冲副本保留在内存中不一定是安全的,发送将等待匹配的接收(始终允许这样做).

您可以采取一些措施来避免这种情况;您要做的就是确保不是每个人都同时调用阻塞的MPI_Send.您可能(说)让偶数处理器先发送,然后接收,奇数处理器先接收,然后发送.您可以使用非阻塞通信(Isend/Irecv/Waitall).但是在这种情况下,最简单的解决方案是使用MPI_Sendrecv,这是一个阻止(发送+接收),而不是阻止发送加上阻止接收.发送和接收将同时执行,并且该函数将阻塞,直到两者都完成为止.这样就行了

#include <mpi.h>
#include <cstdlib>

using namespace std;

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    int datasize = atoi(argv[1]);

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank != root) {
        int nodeDest = rank + 1;
        if (nodeDest > size - 1) {
            nodeDest = 1;
        }
        int nodeFrom = rank - 1;
        if (nodeFrom < 1) {
            nodeFrom = size - 1;
        }

        MPI_Status status;
        int *outdata = new int[datasize];
        int *indata  = new int[datasize];
        for (int i=0; i<datasize; i++)
            outdata[i] = rank;

        cout << "Before sendrecv" << endl;
        MPI_Sendrecv(outdata, datasize, MPI_INT, nodeDest, 0,
                     indata, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);
        cout << "After sendrecv" << endl;

        delete [] outdata;
        delete [] indata;
    }

    MPI_Finalize();
    return 0;
}

跑步给予

$ mpirun -np 4 ./send 65000
Before sendrecv
Before sendrecv
Before sendrecv
After sendrecv
After sendrecv
After sendrecv

There is a simple program in c++ / mpi (mpich2), which sends an array of type double. If the size of the array more than 9000, then during the call MPI_Send my programm hangs. If array is smaller than 9000 (8000, for example) programm works fine. Source code is bellow:

main.cpp

using namespace std;

Cube** cubes;
int cubesLen;

double* InitVector(int N) {
   double* x = new double[N];
   for (int i = 0; i < N; i++) {
       x[i] = i + 1;
   }
   return x;
}

void CreateCubes() {
    cubes = new Cube*[12];
    cubesLen = 12;
    for (int i = 0; i < 12; i++) {
       cubes[i] = new Cube(9000);
    }
}

void SendSimpleData(int size, int rank) {
    Cube* cube = cubes[0];
    int nodeDest = rank + 1;
    if (nodeDest > size - 1) {
        nodeDest = 1;
    }

    double* coefImOut = (double *) malloc(sizeof (double)*cube->coefficentsImLength);
    cout << "Before send" << endl;
    int count = cube->coefficentsImLength;
    MPI_Send(coefImOut, count, MPI_DOUBLE, nodeDest, 0, MPI_COMM_WORLD);
    cout << "After send" << endl;
    free(coefImOut);

    MPI_Status status;
    double *coefIm = (double *) malloc(sizeof(double)*count);

    int nodeFrom = rank - 1;
    if (nodeFrom < 1) {
        nodeFrom = size - 1;
    }

    MPI_Recv(coefIm, count, MPI_DOUBLE, nodeFrom, 0, MPI_COMM_WORLD, &status);
    free(coefIm);
}

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    CreateCubes();

    if (rank != root) {
         SendSimpleData(size, rank);
    }

    MPI_Finalize();
    return 0;
}

class Cube

 class Cube {
 public:
    Cube(int size);
    Cube(const Cube& orig);
    virtual ~Cube();

    int Id() { return id; } 
    void Id(int id) { this->id = id; }

    int coefficentsImLength;
    double* coefficentsIm;

private:
    int id;
};

Cube::Cube(int size) {
    this->coefficentsImLength = size;

    coefficentsIm = new double[size];
    for (int i = 0; i < size; i++) {
        coefficentsIm[i] = 1;
    }
}

Cube::Cube(const Cube& orig) {
}

Cube::~Cube() {
    delete[] coefficentsIm;
}

The program runs on 4 processes:

mpiexec -n 4 ./myApp1

Any ideas?

解决方案

The details of the Cube class aren't relevant here: consider a simpler version

#include <mpi.h>
#include <cstdlib>

using namespace std;

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    int datasize = atoi(argv[1]);

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank != root) {
        int nodeDest = rank + 1;
        if (nodeDest > size - 1) {
            nodeDest = 1;
        }
        int nodeFrom = rank - 1;
        if (nodeFrom < 1) {
            nodeFrom = size - 1;
        }

        MPI_Status status;
        int *data = new int[datasize];
        for (int i=0; i<datasize; i++)
            data[i] = rank;

        cout << "Before send" << endl;
        MPI_Send(&data, datasize, MPI_INT, nodeDest, 0, MPI_COMM_WORLD);
        cout << "After send" << endl;
        MPI_Recv(&data, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);

        delete [] data;

    }

    MPI_Finalize();
    return 0;
}

where running gives

$ mpirun -np 4 ./send 1
Before send
After send
Before send
After send
Before send
After send
$ mpirun -np 4 ./send 65000
Before send
Before send
Before send

If in DDT you looked at the message queue window, you'd see everyone is sending, and no one is receiving, and you have a classic deadlock.

MPI_Send's semantics, wierdly, aren't well defined, but it is allowed to block until "the receive has been posted". MPI_Ssend is clearer in this regard; it will always block until the receive has been posted. Details about the different send modes can be seen here.

The reason it worked for smaller messages is an accident of the implementation; for "small enough" messages (for your case, it looks to be <64kB), your MPI_Send implementation uses an "eager send" protocol and doesn't block on the receive; for larger messages, where it isn't necessarily safe just to keep buffered copies of the message kicking around in memory, the Send waits for the matching receive (which it is always allowed to do anyway).

There's a few things you could do to avoid this; all you have to do is make sure not everyone is calling a blocking MPI_Send at the same time. You could (say) have even processors send first, then receive, and odd processors receive first, and then send. You could use nonblocking communications (Isend/Irecv/Waitall). But the simplest solution in this case is to use MPI_Sendrecv, which is a blocking (Send + Recv), rather than a blocking send plus a blocking receive. The send and receive will execute concurrently, and the function will block until both are complete. So this works

#include <mpi.h>
#include <cstdlib>

using namespace std;

int main(int argc, char *argv[]) {
    int size, rank;
    const int root = 0;

    int datasize = atoi(argv[1]);

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (rank != root) {
        int nodeDest = rank + 1;
        if (nodeDest > size - 1) {
            nodeDest = 1;
        }
        int nodeFrom = rank - 1;
        if (nodeFrom < 1) {
            nodeFrom = size - 1;
        }

        MPI_Status status;
        int *outdata = new int[datasize];
        int *indata  = new int[datasize];
        for (int i=0; i<datasize; i++)
            outdata[i] = rank;

        cout << "Before sendrecv" << endl;
        MPI_Sendrecv(outdata, datasize, MPI_INT, nodeDest, 0,
                     indata, datasize, MPI_INT, nodeFrom, 0, MPI_COMM_WORLD, &status);
        cout << "After sendrecv" << endl;

        delete [] outdata;
        delete [] indata;
    }

    MPI_Finalize();
    return 0;
}

Running gives

$ mpirun -np 4 ./send 65000
Before sendrecv
Before sendrecv
Before sendrecv
After sendrecv
After sendrecv
After sendrecv

这篇关于MPI挂在MPI_Send上以发送大消息的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆