Limit to number of Broadcasts on a cluster? - c

I have a problem with a loop containing mpi broadcast line. After 1000th iterations, the broadcast line stops or hangs the execution (waiting for the transmission presumably).
See the following code:
#include <stdio.h>
#include <string.h>
#include "mpi.h"
int main(int argc, char* argv[]){
int aa = 0, size = 10000;
int my_rank; /* rank of process */
int smessage = 9, rmessage = 9; /* storage for message */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank !=0) {
for(aa=1;aa<size;aa++) {
MPI_Bcast(&rmessage, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("rec message(i=%d)=%d\n", aa, rmessage);
}
}
else {
for(aa=1;aa<size;aa++) {
smessage=aa;
printf("send message(i=%d)=%d\n", aa, smessage);
MPI_Bcast(&smessage, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
MPI_Finalize();
return 0;
}
when I run it with
mpicc -openmp -fopenmp example_code.c -o example_prog
mpirun -n 3 example_prog
I get the expected output (up to some rearrangements)
sent message(i=1)=1
rec message(i=1)=1
rec message(i=1)=1
...
sent message(i=9999)=9999
rec message(i=9999)=9999
rec message(i=9999)=9999
However, when I send it to my university cluster using SLURM's sbatch and a script containing
mpirun -n 3 --loadbalance --cpus-per-proc 24 ./example_prog
(I'm using OpenMPI 1.6.5) I get the program either hanged in the middle of the loop or gets automatically terminated by the job handler. The output is
sent message(i=1)=1
rec message(i=1)=1
rec message(i=1)=1
...
rec message(i=9999)=999
rec message(i=9999)=999
sent message(i=9999)=1000
It clearly stops in the "middle" of the loop. Would you have any ideas as to what causes this error or how to avoid this error?
Thank you so much!

Related

MPI_Scatter produces write error, bad address (3)

I am receiving a writing error when trying to scatter a dynamically allocated matrix (it is contiguous), it happens when more than 5 cores are involved in the computation. I have placed printfs and it occurs in the scatter, the code is the next:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
#include <sys/time.h>
int main(int argc, char* argv[])
{
int err = MPI_Init(&argc, &argv);
MPI_Comm world;
world=MPI_COMM_WORLD;
int size = 0;
err = MPI_Comm_size(world, &size);
int rank = 0;
err = MPI_Comm_rank(world, &rank);
int n_rows=2400, n_cols=2400, n_rpc=n_rows/size;
float *A, *Asc, *B, *C; //Dyn alloc A B and C
Asc=malloc(n_rpc*n_cols*sizeof(float));
B=malloc(n_rows*n_cols*sizeof(float));
C=malloc(n_rows*n_cols*sizeof(float));
A=malloc(n_rows*n_cols*sizeof(float));
if(rank==0)
{
for (int i=0; i<n_rows; i++)
{
for (int j=0; j<n_cols; j++)
{
A[i*n_cols+j]= i+1.0;
B[i*n_cols+j]=A[i*n_cols+j];
}
}
}
struct timeval start, end;
if(rank==0) gettimeofday(&start,NULL);
MPI_Bcast(B, n_rows*n_cols, MPI_FLOAT, 0, MPI_COMM_WORLD);
if(rank==0) printf("Before Scatter\n"); //It is breaking here
MPI_Scatter(A, n_rpc*n_cols, MPI_FLOAT, Asc, n_rpc*n_cols, MPI_FLOAT, 0, MPI_COMM_WORLD);
if(rank==0) printf("After Scatter\n");
/* Some computation */
err = MPI_Finalize();
if (err) DIE("MPI_Finalize");
return err;
}
Upto 4 cores, it works correctly and performs the scatter, but with 5 or more it does not, and I can not find a clear reason.
The error message is as follows:
[raspberrypi][[26238,1],0][btl_tcp_frag.c:130:mca_btl_tcp_frag_send] mca_btl_tcp_frag_send: writev error (0xac51e0, 8)
Bad address(3)
[raspberrypi][[26238,1],0][btl_tcp_frag.c:130:mca_btl_tcp_frag_send] mca_btl_tcp_frag_send: writev error (0xaf197048, 29053982)
Bad address(1)
[raspberrypi:05345] pml_ob1_sendreq.c:308 FATAL
Thanks in advance!
Multiple errors, first of all, take care of using always the same type when defining variables. Then, when you use scatter, the send count and receive are the same, and you will be sending Elements/Cores. Also when receiving with gather you have to receive the same amount you sent, so again Elements/Cores.

How to run c code in mpi : cannot connect to local mpd (/tmp/mpd2.console_karim); possible causes?

Can't run mpi code written in c on terminal using ubuntu
karim#karim:~/Desktop/greetings$ mpicc main.c -o test
karim#karim:~/Desktop/greetings$ mpirun -np 3 test
mpiexec_karim: cannot connect to local mpd (/tmp/mpd2.console_karim); possible causes:
1. no mpd is running on this host
2. an mpd is running but was started without a "console" (-n option)
In case 1, you can start an mpd on this host with:
mpd &
and you will be able to run jobs just on this host.
For more details on starting mpds on a set of hosts, see
the MPICH2 Installation Guide.
#include <stdio.h>
#include <string.h>
#include "mpi.h"
int main(int argc , char * argv[])
{
int my_rank; /* rank of process */
int p; /* number of process */
int source; /* rank of sender */
int dest; /* rank of reciever */
int tag = 0; /* tag for messages */
char message[100]; /* storage for message */
MPI_Status status; /* return status for recieve */
/* Start up MPI */
MPI_Init( &argc , &argv );
/* Find out process rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Find out number of process */
MPI_Comm_size(MPI_COMM_WORLD, &p);
if( my_rank != 0)
{
/* create message */
sprintf( message, "Greetings from process %d !",my_rank);
dest = 0;
/* use Strlen+1 to transmit /0 */
MPI_Send( message, strlen(message)+1, MPI_CHAR, dest, tag,
MPI_COMM_WORLD);
}else
{
for( source = 1; source < p ; source++)
{
MPI_Recv(message, 100, MPI_CHAR, source, tag, MPI_COMM_WORLD,
&status);
printf("%s\n" , message);
}
}
/* shutdown MPI */
MPI_Finalize();
return 0;
}
Output:
Greetings from process 1 !
Greetings from process 2 !
Greetings from process 3 !
after searching I solve the problem by this code
mpd --ncpus=2 &
mpirun -np 2 --host localhost ./try.exe

Two MPI_Allreduce() functions not working, gives error of NULL Communicator

I am using an example code from an MPI book [will give the name shortly].
What it does is the following:
a) It creates two communicators world = MPI_COMM_WORLD containing all the processes and worker which excludes the random number generator server (the last rank process).
b) So, the server generates random numbers and serves them to the workers on requests from the workers.
c) What the workers do is they count separately the number of samples falling inside and outside an unit circle inside an unit square.
d) After sufficient level of accuracy, the counts inside and outside are Allreduced to compute the value of PI as their ratio.
**The code compiles well. However, when running with the following command (actually with any value of n) **
>mpiexec -n 2 apple.exe 0.0001
I get the following errors:
Fatal error in MPI_Allreduce: Invalid communicator, error stack:
MPI_Allreduce(855): MPI_Allreduce(sbuf=000000000022EDCC, rbuf=000000000022EDDC,
count=1, MPI_INT, MPI_SUM, MPI_COMM_NULL) failed
MPI_Allreduce(780): Null communicator
pi = 0.00000000000000000000
job aborted:
rank: node: exit code[: error message]
0: PC: 1: process 0 exited without calling finalize
1: PC: 123
Edit: ((( Removed: But when I am removing any one of the two MPI_Allreduce() functions, it is running without any runtime errors, albeit with wrong answer.))
Code:
#include <mpi.h>
#include <mpe.h>
#include <stdlib.h>
#define CHUNKSIZE 1000
/* message tags */
#define REQUEST 1
#define REPLY 2
int main(int argc, char *argv[])
{
int iter;
int in, out, i, iters, max, ix, iy, ranks [1], done, temp;
double x, y, Pi, error, epsilon;
int numprocs, myid, server, totalin, totalout, workerid;
int rands[CHUNKSIZE], request;
MPI_Comm world, workers;
MPI_Group world_group, worker_group;
MPI_Status status;
MPI_Init(&argc,&argv);
world = MPI_COMM_WORLD;
MPI_Comm_size(world,&numprocs);
MPI_Comm_rank(world,&myid);
server = numprocs-1; /* last proc is server */
if(myid==0) sscanf(argv[1], "%lf", &epsilon);
MPI_Bcast(&epsilon, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Comm_group(world, &world_group);
ranks[0] = server;
MPI_Group_excl(world_group, 1, ranks, &worker_group);
MPI_Comm_create(world, worker_group, &workers);
MPI_Group_free(&worker_group);
if(myid==server) /* I am the rand server */
{
srand(time(NULL));
do
{
MPI_Recv(&request, 1, MPI_INT, MPI_ANY_SOURCE, REQUEST, world, &status);
if(request)
{
for(i=0; i<CHUNKSIZE;)
{
rands[i] = rand();
if(rands[i]<=INT_MAX) ++i;
}
MPI_Send(rands, CHUNKSIZE, MPI_INT,status.MPI_SOURCE, REPLY, world);
}
}
while(request>0);
}
else /* I am a worker process */
{
request = 1;
done = in = out = 0;
max = INT_MAX; /* max int, for normalization */
MPI_Send(&request, 1, MPI_INT, server, REQUEST, world);
MPI_Comm_rank(workers, &workerid);
iter = 0;
while(!done)
{
++iter;
request = 1;
MPI_Recv(rands, CHUNKSIZE, MPI_INT, server, REPLY, world, &status);
for(i=0; i<CHUNKSIZE;)
{
x = (((double) rands[i++])/max)*2-1;
y = (((double) rands[i++])/max)*2-1;
if(x*x+y*y<1.0) ++in;
else ++out;
}
/* ** see error here ** */
MPI_Allreduce(&in, &totalin, 1, MPI_INT, MPI_SUM, workers);
MPI_Allreduce(&out, &totalout, 1, MPI_INT, MPI_SUM, workers);
/* only one of the above two MPI_Allreduce() functions working */
Pi = (4.0*totalin)/(totalin+totalout);
error = fabs( Pi-3.141592653589793238462643);
done = (error<epsilon||(totalin+totalout)>1000000);
request = (done)?0:1;
if(myid==0)
{
printf("\rpi = %23.20f", Pi);
MPI_Send(&request, 1, MPI_INT, server, REQUEST, world);
}
else
{
if(request)
MPI_Send(&request, 1, MPI_INT, server, REQUEST, world);
}
MPI_Comm_free(&workers);
}
}
if(myid==0)
{
printf("\npoints: %d\nin: %d, out: %d, <ret> to exit\n", totalin+totalout, totalin, totalout);
getchar();
}
MPI_Finalize();
}
What is the error here? Am I missing something? Any help or pointer will be highly appreciated.
You are freeing the workers communicator before you are done using it. Move the MPI_Comm_free(&workers) call after the while(!done) { ... } loop.

MPI_Waitany causes segmentation fault

I am using MPI to distribute images to different processes so that:
Process 0 distribute images to different processes.
Processes other
than 0 process the image and then send the result back to process 0.
Process 0 tries to busy a process whenever the latter finishes its job with an image, so that as soon as it is idle, it is assigned another image to process. The code follows:
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include "mpi.h"
#define MAXPROC 16 /* Max number of processes */
#define TOTAL_FILES 7
int main(int argc, char* argv[]) {
int i, nprocs, tprocs, me, index;
const int tag = 42; /* Tag value for communication */
MPI_Request recv_req[MAXPROC]; /* Request objects for non-blocking receive */
MPI_Request send_req[MAXPROC]; /* Request objects for non-blocking send */
MPI_Status status; /* Status object for non-blocing receive */
char myname[MPI_MAX_PROCESSOR_NAME]; /* Local host name string */
char hostname[MAXPROC][MPI_MAX_PROCESSOR_NAME]; /* Received host names */
int namelen;
MPI_Init(&argc, &argv); /* Initialize MPI */
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* Get nr of processes */
MPI_Comm_rank(MPI_COMM_WORLD, &me); /* Get own identifier */
MPI_Get_processor_name(myname, &namelen); /* Get host name */
myname[namelen++] = (char)0; /* Terminating null byte */
/* First check that we have at least 2 and at most MAXPROC processes */
if (nprocs<2 || nprocs>MAXPROC) {
if (me == 0) {
printf("You have to use at least 2 and at most %d processes\n", MAXPROC);
}
MPI_Finalize(); exit(0);
}
/* if TOTAL_FILES < nprocs then use only TOTAL_FILES + 1 procs */
tprocs = (TOTAL_FILES < nprocs) ? TOTAL_FILES + 1 : nprocs;
int done = -1;
if (me == 0) { /* Process 0 does this */
int send_counter = 0, received_counter;
for (i=1; i<tprocs; i++) {
MPI_Isend(&send_counter, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &send_req[i]);
++send_counter;
/* Receive a message from all other processes */
MPI_Irecv (hostname[i], namelen, MPI_CHAR, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &recv_req[i]);
}
for (received_counter = 0; received_counter < TOTAL_FILES; received_counter++){
/* Wait until at least one message has been received from any process other than 0*/
MPI_Waitany(tprocs-1, &recv_req[1], &index, &status);
if (index == MPI_UNDEFINED) perror("Errorrrrrrr");
printf("Received a message from process %d on %s\n", status.MPI_SOURCE, hostname[index+1]);
if (send_counter < TOTAL_FILES){ /* si todavia faltan imagenes por procesar */
MPI_Isend(&send_counter, 1, MPI_INT, status.MPI_SOURCE, tag, MPI_COMM_WORLD, &send_req[status.MPI_SOURCE]);
++send_counter;
MPI_Irecv (hostname[status.MPI_SOURCE], namelen, MPI_CHAR, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &recv_req[status.MPI_SOURCE]);
}
}
for (i=1; i<tprocs; i++) {
MPI_Isend(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &send_req[i]);
}
} else if (me < tprocs) { /* all other processes do this */
int y;
MPI_Recv(&y, 1, MPI_INT, 0,tag,MPI_COMM_WORLD,&status);
while (y != -1) {
printf("Process %d: Received image %d\n", me, y);
sleep(me%3+1); /* Let the processes sleep for 1-3 seconds */
/* Send own identifier back to process 0 */
MPI_Send (myname, namelen, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
MPI_Recv(&y, 1, MPI_INT, 0,tag,MPI_COMM_WORLD,&status);
}
}
MPI_Finalize();
exit(0);
}
which is based on this example.
Right now I'm getting a segmentation fault, not sure why. I'm fairly new to MPI but I can't see a mistake in the code above. It only happens with certain numbers of processes. For example, when TOTAL_FILES = 7 and is run with 5, 6 or 7 processes. Works fine with 9 processes or above.
The entire code can be found here. Trying it with 6 processes causes the mentioned error.
To compile and execute :
mpicc -Wall sscce.c -o sscce -lm
mpirun -np 6 sscce
It's not MPI_Waitany that is causing segmentation fault but it is the way you handle the case when all requests in recv_req[] are completed (i.e. index == MPI_UNDEFINED). perror() does not stop the code and it continues further and then segfaults in the printf statement while trying to access hostname[index+1]. The reason for all requests in the array being completed is that due to the use of MPI_ANY_SOURCE in the receive call the rank of the sender is not guaranteed to be equal to the index of the request in recv_req[] - simply compare index and status.MPI_SOURCE after MPI_Waitany returns to see it for yourself. Therefore the subsequent calls to MPI_Irecv with great probability overwrite still not completed requests and thus the number of requests that can get completed by MPI_Waitany is less than the actual number of results expected.
Also note that you never wait for the send requests to complete. You are lucky that Open MPI implementation uses an eager protocol to send small messages and therefore those get sent even though MPI_Wait(any|all) or MPI_Test(any|all) is never called on the started send requests.

Mpirun hangs when mpi send and recieve is put in a loop

I was trying to run the given program on a 4 node cluster using mpirun.
Node0 is distributing the data to node 1, 2 and 3.
In the program, computation has to be done for different values of variable 'dir',
ranging from -90 to 90.
So Node0 is distributing the data and collecting the result in a looped fashion(for different values of var 'dir').
When the do {*******}while(dir<=90); loop is given, mpirun hangs, and gets no output.
But when I comment the do {*******}while(dir<=90); loop output is obtained for initialized value of the variable dir,(dir=-90), and that output is correct. The problem occurs when given in loop.
Could anyone please help me solve this issue.
#include "mpi.h"
int main(int argc,char *argv[])
float dir=-90;
int rank,numprocs;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
if(rank==0)
{
do{
/*initializing data*/
for(dest=1;dest<numprocs;dest++)
{
MPI_Send(&offset,1,MPI_INT,dest,FROM_MASTER,MPI_COMM_WORLD);
MPI_Send(&s_psi[offset],count,MPI_FLOAT,dest,FROM_MASTER,MPI_COMM_WORLD);
}
gettimeofday(&start,NULL);
for (dest=1; dest<numprocs; dest++)
{
MPI_Recv(&offset,1,MPI_INT,dest,FROM_WORKER,MPI_COMM_WORLD,&status);
MPI_Recv(&P[offset],count,MPI_FLOAT,dest,FROM_WORKER,MPI_COMM_WORLD,&status);
}
gettimeofday(&end,NULL);
timersub(&end,&start,&total);
printf("time consumed=%ds %dus\n",total.tv_sec,total.tv_usec);
dir++;
}while(dir<=90);
}
if(rank>0)
{
MPI_Recv(&offset,1,MPI_INT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
MPI_Recv(&s_psi[offset],count,MPI_FLOAT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
//Does the computation
}
MPI_Send(&offset,1,MPI_INT,0,FROM_WORKER,MPI_COMM_WORLD);
MPI_Send(&P[offset],count,MPI_FLOAT,0,FROM_WORKER,MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
The part where rank > 0 should be enclosed in a loop.
Each MPI_Send should have its corresponding MPI_Recv.
if(rank>0) {
do {
MPI_Recv(&offset,1,MPI_INT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
MPI_Recv(&s_psi[offset],count,MPI_FLOAT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
// Computation
MPI_Send(&offset,1,MPI_INT,0,FROM_WORKER,MPI_COMM_WORLD);
MPI_Send(&P[offset],count,MPI_FLOAT,0,FROM_WORKER,MPI_COMM_WORLD);
dir++;
} while(dir <= 90);
}
But you probably don't know dir in you worker nodes. Usually, we node0 send a magic packet to end the workers.
at the end of node0 :
for(r = 1; r < numprocs; r++)
MPI_Send(&dummy, 1, MPI_INT, r, STOP, COMM);
for the woker nodes :
if(rank>0) {
while(true) {
MPI_Recv(&offset,1,MPI_INT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
MPI_Recv(&s_psi[offset],count,MPI_FLOAT,0,FROM_MASTER,MPI_COMM_WORLD,&status);
// Computation
MPI_Send(&offset,1,MPI_INT,0,FROM_WORKER,MPI_COMM_WORLD);
MPI_Send(&P[offset],count,MPI_FLOAT,0,FROM_WORKER,MPI_COMM_WORLD);
if(MPI_Iprobe(ANY_SOURCE, STOP, COMM, &flag, &status)) {
MPI_Recv(&dummy, 1, MPI_INT, ANY_SOURCE, STOP, COMM, NO_STATUS);
break;
}
};
}
And you can finally MPI_finalize
By the way, you might want to look at blocking and not bloking Send/Recv.

Resources