mpirun无法找到指定的可执行文件 [英] mpirun was unable to find the specified executable file

查看:2051
本文介绍了mpirun无法找到指定的可执行文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我在使用OpenMPI编译此代码时遇到问题.由于我对使用OpenMPI的概念有些陌生,如果你们中的任何人可以给我提示此处的错误,那将是很棒的. 编译工作正常,但是如果我运行代码,则会收到以下消息:

I have problems compiling this code using OpenMPI.Since I am a bit new to the concepts of using OpenMPI, it would be great if someone of you could give me a hint to the mistake here. Compiling works just fine, but if I run the code I get this message:

mpirun was unable to find the specified executable file, and therefore
did not launch the job.  This error was first reported for process
rank 0; it may have occurred for other processes as well.

NOTE: A common cause for this error is misspelling a mpirun command
      line parameter option (remember that mpirun interprets the first
      unrecognized command line token as the executable).  

我正在使用

mpic++ matmult.cpp -o matmult

并运行:

mpirun -n 2 matmult

...这是使用的代码:

... and here is the used code:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define MASTER 0
#define FROM_MASTER 1
#define FROM_WORKER 2

// ---------------------------------------------------------------------------
// allocate space for empty matrix A[row][col]
// access to matrix elements possible with:
// - A[row][col]
// - A[0][row*col]

float **alloc_mat(int row, int col)
{
    float **A1, *A2;

    A1 = (float **)calloc(row, sizeof(float *));        // pointer on rows
    A2 = (float *)calloc(row*col, sizeof(float));    // all matrix elements
    for (int i = 0; i < row; i++)
        A1[i] = A2 + i*col;

    return A1;
}

// ---------------------------------------------------------------------------
// random initialisation of matrix with values [0..9]

   void init_mat(float **A, int row, int col)
   {
       for (int i = 0; i < row*col; i++)
            A[0][i] = (float)(rand() % 10);
   }

    // ---------------------------------------------------------------------------
    // DEBUG FUNCTION: printout of all matrix elements

   void print_mat(float **A, int row, int col, char *tag)
   {
    int i, j;

    printf("Matrix %s:\n", tag);
    for (i = 0; i < row; i++)
    {
        for (j = 0; j < col; j++) 
            printf("%6.1f   ", A[i][j]);
        printf("\n"); 
    }
}

// ---------------------------------------------------------------------------

int main(int argc, char *argv[]) {   
    int numtasks;
    int taskid;
    int numworkers;
    int source;
    int dest;
    int mtype;
    int rows;
    int averow, extra, offset;
    double starttime, endtime;
    float **A, **B, **C;    // matrices
    int d1, d2, d3;         // dimensions of matrices
    int i, j, k, rc;            // loop variables


    MPI_Status status;
    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
    MPI_Comm_size(MPI_COMM_WORLD,&numtasks);

    if (argc != 4) {
        printf ("Matrix multiplication: C = A x B\n");
        printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
        return 0;
    }

    if (numtasks < 2 ) {
    printf("Need at least two MPI tasks. Quitting...\n");
    MPI_Abort(MPI_COMM_WORLD,rc);
    exit(1);
    }

     /* read user input */
    d1 = atoi(argv[1]);     // rows of A and C  d1
    d2 = atoi(argv[2]);     // cols of A and rows of B  d2
    d3 = atoi(argv[3]);     // cols of B and C d3

    printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

    /* prepare matrices */
    A = alloc_mat(d1, d2);
    init_mat(A, d1, d2); 
    B = alloc_mat(d2, d3);
    init_mat(B, d2, d3);
    C = alloc_mat(d1, d3);


     /* Code für den Manager */
    if (taskid == MASTER) {
        /*printf("matrix multiplikation withMPI\n");
        printf("initializing arrays ...\n");
            for (i=0; i<d1; i++) 
                for (j=0; j<d2; j++) 
                A[i][j]=i+j;


            for (i=0; i<d2; i++) 
                for (j=0; j<d3; j++) 
                B[i][j]=i*j;*/



             /* Matrizen versenden */
            averow = d1/numworkers;
            extra = d1%numworkers;
            offset = 0;
            mtype = FROM_MASTER;

            starttime=MPI_Wtime();

            for (dest=1;dest<=numworkers;dest++) {
                rows = (dest <= extra) ? averow+1 :averow;
                printf("Sending %drows to task %doffset=%d\n",rows,dest,offset);
                MPI_Send(&offset, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
                MPI_Send(&rows, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
                MPI_Send(&A[offset][0],rows*d2, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
                MPI_Send(&B, d2*d3, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
                offset =offset+rows;
            }

             /* Ergebnisse empfangen */
             mtype = FROM_WORKER;

            for (i=1; i<=numworkers; i++) {
                source = i;
                MPI_Recv(&offset, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
                MPI_Recv(&rows, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
                MPI_Recv(&C[offset][0],rows*d3, 
                MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status);
                printf("Received results from task %d\n",source);
            }

            endtime=MPI_Wtime();
            printf("\nIt took %fseconds.\n",endtime-starttime);
     }       

    /* Code für die Arbeiter */

    if (taskid > MASTER) {
        mtype = FROM_MASTER;

        MPI_Recv(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&A,rows*d2, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&B, d2*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);

    /* print user instruction */


    // no initialisation of C, because it gets filled by matmult

    /* serial version of matmult */
        printf("Perform matrix multiplication...\n");
        for (i = 0; i < d1; i++)
            for (j = 0; j < d3; j++)
                for (k = 0; k < d2; k++)
                C[i][j] += A[i][k] * B[k][j];

        mtype = FROM_WORKER;
        MPI_Send(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
        MPI_Send(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
        MPI_Send(&C,rows*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD);

    }

    MPI_Finalize();


    /* test output 
    print_mat(A, d1, d2, "A"); 
    print_mat(B, d2, d3, "B"); 
    print_mat(C, d1, d3, "C"); */

    printf ("\nDone.\n");


    //return 0;
}


运行结果 mpirun matmult (默认设置,单个进程):

由于进程等级为0,节点上的PID为77202,因此mpirun已退出 juliuss-mbp-3无法正确退出.这可能有三个原因 发生:

mpirun has exited due to process rank 0 with PID 77202 on node juliuss-mbp-3 exiting improperly. There are three reasons this could occur:

  1. 此过程在退出之前未调用"init",但在 工作做到了.这可能导致作业在等待期间无限期挂起 为所有进程调用"init".按照规则,如果一个进程调用 "init",那么所有进程都必须在终止之前调用"init".

  1. this process did not call "init" before exiting, but others in the job did. This can cause a job to hang indefinitely while it waits for all processes to call "init". By rule, if one process calls "init", then ALL processes must call "init" prior to termination.

此过程称为"init",但退出时未调用"finalize".根据规则,所有调用"init"的进程都必须调用 退出前完成",否则将被视为异常" 终止"

this process called "init", but exited without calling "finalize". By rule, all processes that call "init" MUST call "finalize" prior to exiting or it will be considered an "abnormal termination"

此过程称为"MPI_Abort"或"orte_abort",并且mca参数orte_create_session_dirs设置为false.在这种情况下, 运行时无法检测到异常终止调用是异常的 终止.因此,您将收到的唯一错误消息是 一.这可能导致应用程序中的其他进程被 由mpirun发送的信号终止(如此处报告).你可以 通过在mpirun命令行上指定-quiet来避免出现此消息.

this process called "MPI_Abort" or "orte_abort" and the mca parameter orte_create_session_dirs is set to false. In this case, the run-time cannot detect that the abort call was an abnormal termination. Hence, the only error message you will receive is this one. This may have caused other processes in the application to be terminated by signals sent by mpirun (as reported here). You can avoid this message by specifying -quiet on the mpirun command line.

推荐答案

次要问题(仍然很重要):

您的程序期望参数为4,即.程序名称+从以下代码传入的3个参数:

if (argc != 4) {
    printf ("Matrix multiplication: C = A x B\n");
    printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
    return 0;
}

由于此条件返回0而不调用正确的MPI_Abort(...)MPI_Finalize(),因此您将收到mpi错误:

Since this conditional returns 0 without calling the proper MPI_Abort(...) or MPI_Finalize() then you will receive the mpi error:

由于进程等级0,mpirun已退出,节点juliuss-mbp-3上的PID 77202不正确退出.

mpirun has exited due to process rank 0 with PID 77202 on node juliuss-mbp-3 exiting improperly.

通过在return 0之前添加MPI_Abort(MPI_COMM_WORLD,rc);,我相信您的程序将毫无疑问.

By adding MPI_Abort(MPI_COMM_WORLD,rc); before return 0 I believe your program will be in the clear.

if (argc != 4) {
    printf ("Matrix multiplication: C = A x B\n");
    printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
    MPI_Abort(MPI_COMM_WORLD,rc);
    return 0;
}


但是,我们应该解决此问题的主要原因,即:运行mpirun -np 2 matmultmpirun matmult时,需要将3个参数传递给程序.应当采用以下格式:

However we should address the main cause of the issue, which is: you need to pass 3 arguments to your program when you run mpirun -np 2 matmult or mpirun matmult. Which should be in this format:

mpirun -np 2 matmult parameter1 parameter2 parameter3

mpirun matmult parameter1 parameter2 parameter3

mpirun -np 2 matmult parameter1 parameter2 parameter3
or
mpirun matmult parameter1 parameter2 parameter3

在您的代码中,参数(参数)应为:

From your code the parameters (arguments) should be:

parameter1 = rows of A and C
parameter2 = cols of A and rows of B
parameter3 = cols of B and C

parameter1 = rows of A and C
parameter2 = cols of A and rows of B
parameter3 = cols of B and C

,您的运行命令可能类似于:

and your run command could look like:

mpirun -np 2 matmult 2 2 2

这篇关于mpirun无法找到指定的可执行文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆