EDIT: My question is similar to C, Open MPI: segmentation fault from call to MPI_Finalize(). Segfault does not always happen, especially with low numbers of processes, so it you answer that one instead that would be great, either way . . .
I was hoping to get some help debugging the following code:
int main(){
long* my_local;
long n, s, f;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank == 0){
/* Get size n from user */
printf("Total processes: %d\n", comm_sz);
printf("Number of keys to be sorted? ");
fflush(stdout);
scanf("%ld", &n);
/* Broadcast size n to other processes */
MPI_Bcast(&n, 1, MPI_LONG, 0, MPI_COMM_WORLD);
/* Create n/comm_sz keys
NOTE! some processes will have 1 extra key if
n%comm_sz != 0 */
create_Keys(&my_local, my_rank, comm_sz, n, &s, &f);
}
if(my_rank != 0){
/* Receive n from process 0 */
MPI_Bcast(&n, 1, MPI_LONG, 0, MPI_COMM_WORLD);
/* Create n/comm_sz keys */
create_Keys(&my_local, my_rank, comm_sz, n, &s, &f);
}
/* The offending function, f is a long set to num elements of my_local*/
Odd_Even_Tsort(&my_local, my_rank, f, comm_sz);
printf("Process %d completed the function", my_rank);
MPI_Finalize();
return 0;
}
void Odd_Even_Tsort(long** my_local, int my_rank, long my_size, int comm_sz)
{
long nochange = 1;
long phase = 0;
long complete = 1;
MPI_Status Stat;
long your_size = 1;
long* recv_buf = malloc(sizeof(long)*(my_size+1));
printf("rank %d has size %ld\n", my_rank, my_size);
while (complete!=0){
if((phase%2)==0){
if( ((my_rank%2)==0) && my_rank < comm_sz-1){
/* Send right */
MPI_Send(&my_size, 1, MPI_LONG, my_rank+1, 0, MPI_COMM_WORLD);
MPI_Send(*my_local, my_size, MPI_LONG, my_rank+1, 0, MPI_COMM_WORLD);
MPI_Recv(&your_size, 1, MPI_LONG, my_rank+1, 0, MPI_COMM_WORLD, &Stat);
MPI_Recv(&recv_buf, your_size, MPI_LONG, my_rank+1, 0, MPI_COMM_WORLD, &Stat);
}
if( ((my_rank%2)==1) && my_rank < comm_sz){
/* Send left */
MPI_Recv(&your_size, 1, MPI_LONG, my_rank-1, 0, MPI_COMM_WORLD, &Stat);
MPI_Recv(&recv_buf, your_size, MPI_LONG, my_rank-1, 0, MPI_COMM_WORLD, &Stat);
MPI_Send(&my_size, 1, MPI_LONG, my_rank-1, 0, MPI_COMM_WORLD);
MPI_Send(*my_local, my_size, MPI_LONG, my_rank-1, 0, MPI_COMM_WORLD);
}
}
phase ++;
complete = 0;
}
printf("Done!\n");
fflush(stdout);
}
And the Error I'm getting is:
[ubuntu:04968] *** Process received signal ***
[ubuntu:04968] Signal: Segmentation fault (11)
[ubuntu:04968] Signal code: Address not mapped (1)
[ubuntu:04968] Failing at address: 0xb
--------------------------------------------------------------------------
mpiexec noticed that process rank 1 with PID 4968 on node ubuntu exited on signal 11 (Segmentation fault).
The reason I'm baffled is that the print statements after the function are still displayed, but if I comment out the function, no errors. So, where the heap am I getting a Segmentation fault?? I'm getting the error with mpiexec -n 2 ./a.out and an 'n' size bigger than 9.
If you actually wanted the entire runnable code, let me know. Really I was hoping not so much for the precise answer but more how to use the gdb/valgrind tools to debug this problem and others like it (and how to read their output).
(And yes, I realize the 'sort' function isn't sorting yet).
The problem here is simple, yet difficult to see unless you use a debugger or print out exhaustive debugging information:
Look at the code where MPI_Recv is called. The recv_buf variable should be supplied as an argument instead of &recv_buf.
MPI_Recv( recv_buf , your_size, MPI_LONG, my_rank-1, 0, MPI_COMM_WORLD, &Stat);
The rest seems ok.
Related
i'm trying send packed structure by MPI_Bsend(). Something i'm doing wrong and i cannot find solution.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "mpi.h"
#define SIZE 10
struct car {
int id;
int vmax;
char marka[SIZE];
char model[SIZE];
};
int main(int argc, char **argv) {
int i;
int rank, size;
double t1, t2;
struct car BMW, BMW2;
BMW.id = 1;
strcpy(BMW.marka, "BMW");
strcpy(BMW.model, "szybki");
BMW.vmax = 199;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
int rozmiar, packet_size, msg_size, position = 0,tag;
void *bufor;
MPI_Pack_size(2, MPI_INT, MPI_COMM_WORLD, &rozmiar);
packet_size = rozmiar;
MPI_Pack_size(2 * SIZE, MPI_CHAR, MPI_COMM_WORLD, &rozmiar);
packet_size += rozmiar;
msg_size = 2 * packet_size + MPI_BSEND_OVERHEAD;
bufor = (void *)malloc(msg_size);
MPI_Buffer_attach(bufor, msg_size);
t1 = MPI_Wtime();
if (rank == 0) {
tag = 0;
for(i=1;i<size;i++){
MPI_Pack(&BMW.id,1, MPI_INT, bufor, msg_size, &position, MPI_COMM_WORLD);
MPI_Pack(&BMW.vmax,1, MPI_INT, bufor, msg_size, &position, MPI_COMM_WORLD);
MPI_Pack(&BMW.model,SIZE, MPI_CHAR, bufor, msg_size, &position, MPI_COMM_WORLD);
MPI_Pack(&BMW.marka,SIZE, MPI_CHAR, bufor, msg_size, &position, MPI_COMM_WORLD);
MPI_Bsend(bufor,position,MPI_PACKED,i,tag,MPI_COMM_WORLD);
}
} else {
MPI_Recv(bufor,msg_size,MPI_PACKED,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
position = 0;
MPI_Unpack(bufor, msg_size, &position, &BMW2.id, 1, MPI_INT, MPI_COMM_WORLD);
MPI_Unpack(bufor, msg_size, &position, &BMW2.vmax, 1, MPI_INT, MPI_COMM_WORLD);
MPI_Unpack(bufor, msg_size, &position, &BMW2.model, SIZE, MPI_CHAR, MPI_COMM_WORLD);
MPI_Unpack(bufor, msg_size, &position, &BMW2.marka, SIZE, MPI_CHAR, MPI_COMM_WORLD);
printf("rank = %d | BMW id: %d, marka: %s, model: %s, vmax: %d \n",rank, BMW2.id, BMW2.marka, BMW2.model, BMW2.vmax);
}
t2 = MPI_Wtime();
MPI_Buffer_detach(&bufor, &msg_size);
MPI_Finalize();
if (i == size)
printf("Elapsed time is %.15f\n", t2 - t1 );
return(0);
}
Error:
====================================================================
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
PID 25637 RUNNING AT debian
EXIT CODE: 11
================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault
(signal 11)
You are using the buffered mode of MPI incorrectly. The buffer you give to MPI via MPI_Buffer_attach is supposed to be used by MPI internally. Do not use the buffered MPI interface, it is very rarely useful and very difficult to get right.
Just remove the MPI_Buffer_ and use MPI_Send instead of MPI_Bsend and you are on the right track. MPI_Pack can be a bit clumsy, you may want to look insto custom datatypes (MPI_Type_create_struct) instead. If you have a homogeneous system, you can also send the raw bytes of the struct car.
I have a piece of code, unfortunately, I couldn't run it, but I was trying to find if it has an error logically. Or if there is something missing, here is the
code:
main(int argc, char *argv[]) {
int numtasks, rank, dest, source, rc, count, tag=1;
char inmsg, outmsg=’x’;
MPI_Status Stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) {
dest = 1;
source = 1;
rc = MPI_Send(&outmsg, 1, MPI_CHAR, dest, tag, MPI_COMM_WORLD);
rc = MPI_Recv(&inmsg, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, &Stat);
}
else if (rank == 1) {
dest = 0;
source = 0;
rc = MPI_Send(&outmsg, 1, MPI_CHAR, dest, tag, MPI_COMM_WORLD);
rc = MPI_Recv(&inmsg, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, &Stat);
}
rc = MPI_Get_count(&Stat, MPI_CHAR, &count);
printf("Task %d: Received %d char(s) from task %d with tag %d \n",
rank, count, Stat.MPI_SOURCE, Stat.MPI_TAG);
MPI_Finalize();
}
And, is it allowed to save MPI send and receive in a variable, here rc has been used?
Your code is wrong. It contains a deadlock, which means that it can hang forever or misbehave otherwise. MPI_Send is a blocking operation - it may block until the respective MPI_Recv is called. So both processes will be stuck at their respective MPI_Send operation before MPI_Recv is called. Use MPI_Sendrecv instead.
Note that due to optimizations, MPI may instead chose to send the data immediately for small messages, so the code may complete even though it is wrong. Do not rely on that!
Normally, you don't have to check MPI return codes, as errors are fatal in MPI by default. In particular, don't assign the return code without checking it for MPI_SUCCESS.
Note that you can easily install MPI on any system, e.g. OpenMPI is available for most Linux distributions. There is no reason not to play around with MPI on a normal desktop system.
I've got many slave nodes which might or might not send messages to the master node. So currently there's no way the master node knows how many MPI_Recv to expect. Slave nodes had to sent minimum number of messages to the master node for efficiency reasons.
I managed to find a cool trick, which sends an additional "done" message when its no longer expecting any messages. Unfortunately, it doesn't seem to work in my case, where there're variable number of senders. Any idea on how to go about this? Thanks!
if(rank == 0){ //MASTER NODE
while (1) {
MPI_Recv(&buffer, 10, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == DONE) break;
/* Do stuff */
}
}else{ //MANY SLAVE NODES
if(some conditions){
MPI_Send(&buffer, 64, MPI_INT, root, 1, MPI_COMM_WORLD);
}
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Send(NULL, 1, MPI_INT, root, DONE, MPI_COMM_WORLD);
Not working, the program seem to be still waiting for a MPI_Recv
A simpler and more elegant option would be to use the MPI_IBARRIER. Have each worker call all of the sends that it needs to and then call MPI_IBARRIER when it's done. On the master, you can loop on both an MPI_IRECV on MPI_ANY_SOURCE and an MPI_IBARRIER. When the MPI_IBARRIER is done, you know that everyone has finished and you can cancel the MPI_IRECV and move on. The pseudocode would look something like this:
if (master) {
/* Start the barrier. Each process will join when it's done. */
MPI_Ibarrier(MPI_COMM_WORLD, &requests[0]);
do {
/* Do the work */
MPI_Irecv(..., MPI_ANY_SOURCE, &requests[1]);
/* If the index that finished is 1, we received a message.
* Otherwise, we finished the barrier and we're done. */
MPI_Waitany(2, requests, &index, MPI_STATUSES_IGNORE);
} while (index == 1);
/* If we're done, we should cancel the receive request and move on. */
MPI_Cancel(&requests[1]);
} else {
/* Keep sending work back to the master until we're done. */
while( ...work is to be done... ) {
MPI_Send(...);
}
/* When we finish, join the Ibarrier. Note that
* you can't use an MPI_Barrier here because it
* has to match with the MPI_Ibarrier above. */
MPI_Ibarrier(MPI_COMM_WORLD, &request);
MPI_Wait(&request, MPI_STATUS_IGNORE);
}
1- you called MPI_Barrier in wrong place, it should be called after MPI_Send.
2- the root will exit from loop when it receives DONE from all other ranks (size -1).
the code after some modifications:
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char** argv)
{
MPI_Init(NULL, NULL);
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
int DONE = 888;
int buffer = 77;
int root = 0 ;
printf("here is rank %d with size=%d\n" , rank , size);fflush(stdout);
int num_of_DONE = 0 ;
if(rank == 0){ //MASTER NODE
while (1) {
MPI_Recv(&buffer, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
printf("root recev %d from %d with tag = %d\n" , buffer , status.MPI_SOURCE , status.MPI_TAG );fflush(stdout);
if (status.MPI_TAG == DONE)
num_of_DONE++;
printf("num_of_DONE=%d\n" , num_of_DONE);fflush(stdout);
if(num_of_DONE == size -1)
break;
/* Do stuff */
}
}else{ //MANY SLAVE NODES
if(1){
buffer = 66;
MPI_Send(&buffer, 1, MPI_INT, root, 1, MPI_COMM_WORLD);
printf("rank %d sent data.\n" , rank);fflush(stdout);
}
}
if(rank != 0)
{
buffer = 55;
MPI_Send(&buffer, 1, MPI_INT, root, DONE, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
printf("rank %d done.\n" , rank);fflush(stdout);
MPI_Finalize();
return 0;
}
output:
hosam#hosamPPc:~/Desktop$ mpicc -o aa aa.c
hosam#hosamPPc:~/Desktop$ mpirun -n 3 ./aa
here is rank 2 with size=3
here is rank 0 with size=3
rank 2 sent data.
here is rank 1 with size=3
rank 1 sent data.
root recev 66 from 1 with tag = 1
num_of_DONE=0
root recev 66 from 2 with tag = 1
num_of_DONE=0
root recev 55 from 2 with tag = 888
num_of_DONE=1
root recev 55 from 1 with tag = 888
num_of_DONE=2
rank 0 done.
rank 1 done.
rank 2 done.
Ok, so the aim of the game here is for each one of 64 processors (representing an 8x8 grid) to generate a random number (between 0 and 1), and give process zero a string representing the complete situation. For example grid:
[0,1,0,1]
[1,1,1,1]
[0,0,0,0]
would ultimately get have string '0101111000' for a 4x3.
Each process can only communicate with the ones above and to their left.
To do this, I have each process receive a string of all numbers on its right (if it's not on the far right), add its number to the front of the string and send it to the left.
If the process is on the far left, it also receives a string from the process below it (not including bottom left, rank 56), the description of the state of all nodes below that rank .It joins its own value, the left and bottom strings, and sends it up.
All far left nodes begin their row's string.
My attempted code is below:
#include <stdio.h>
#include "mpi.h"
#include <string.h>
#include <stdlib.h>
int farLeft(int rank){// edit
if (rank%8==0){
return 1;
}
return 0;
}
int farRight(int rank){// edit
if (rank%8==7){
return 1;
}
return 0;
}
int main(argc, argv)
int argc;
char **argv;
{
char inputList[100],myWhisp[100],snum[256];
int rank,value;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
srand(rank);
value = rand() % 2;
sprintf(snum, "%d", value);
strcpy(myWhisp,snum);
if (farLeft(rank)){
MPI_Recv(inputList, strlen(inputList)+1, MPI_CHAR, rank+1, 0, MPI_COMM_WORLD, &status);
strcat(snum,inputList);
strcpy(myWhisp,snum);
if (rank !=56){
MPI_Recv(inputList, strlen(inputList)+1, MPI_CHAR, rank+8, 0, MPI_COMM_WORLD, &status);//rank48 crashes here
strcat(myWhisp,inputList);
}
strcpy(inputList,myWhisp);
if(rank==0){
printf("%s\n",inputList);
}
else{
MPI_Send(inputList, strlen(inputList)+1, MPI_CHAR, rank-8, 0, MPI_COMM_WORLD);
}
}
else if (farRight(rank)){
strcpy(inputList,myWhisp);
MPI_Send(inputList, strlen(inputList)+1, MPI_CHAR, rank-1, 0, MPI_COMM_WORLD);
}
else{
MPI_Recv(inputList, strlen(inputList)+1, MPI_CHAR, rank+1, 0, MPI_COMM_WORLD, &status);
strcat(snum,inputList);
strcpy(inputList,snum);
MPI_Send(inputList, strlen(inputList)+1, MPI_CHAR, rank-1, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
I'm getting a truncation error with rank 48, the second last rank in the far left. This happens on the receive function below if(rank != 56). So there's something wrong with the way I send/ receive inputString I guess...
Thanks very much.
You're passing the count parameter of MPI_Recv as strlen(inputList)+1, but inputList was never initialised. You probably want sizeof(inputList) here.
I have developed a given simple MPI program such that process 0 sends message to process 1 and receives message from process p-1. Following is the code :
In the skeleton given to me ,
char *message;
message= (char*)malloc(msg_size);
is confusing me. To check the correctness of program, I am trying to look value of message that been sent or received. So should it be hexadecimal value?
int main(int argc, char **argv)
{
double startwtime, endwtime;
float elapsed_time, bandwidth;
int my_id, next_id; /* process id-s */
int p; /* number of processes */
char* message; /* storage for the message */
int i, k, max_msgs, msg_size, v;
MPI_Status status; /* return status for receive */
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &my_id );
MPI_Comm_size( MPI_COMM_WORLD, &p );
if (argc < 3)
{
fprintf (stderr, "need msg count and msg size as params\n");
goto EXIT;
}
if ((sscanf (argv[1], "%d", &max_msgs) < 1) ||
(sscanf (argv[2], "%d", &msg_size) < 1))
{
fprintf (stderr, "need msg count and msg size as params\n");
goto EXIT;
}
**message = (char*)malloc (msg_size);**
if (argc > 3) v=1; else v=0; /*are we in verbose mode*/
/* don't start timer until everybody is ok */
MPI_Barrier(MPI_COMM_WORLD);
int t=0;
if( my_id == 0 ) {
startwtime = MPI_Wtime();
// do max_msgs times:
// send message of size msg_size chars to process 1
// receive message of size msg_size chars from process p-1
while(t<max_msgs) {
MPI_Send((char *) message, msg_size, MPI_CHAR, 1 , 0, MPI_COMM_WORLD);
MPI_Recv((char *) message, msg_size, MPI_CHAR, p-1, 0, MPI_COMM_WORLD, &status);
t++;
}
MPI_Barrier(MPI_COMM_WORLD);
endwtime = MPI_Wtime();
elapsed_time = endwtime-startwtime;
bandwidth = 2.0 * max_msgs * msg_size / (elapsed_time);
printf("Number, size of messages: %3d , %3d \n", max_msgs, msg_size);
fflush(stdout);
printf("Wallclock time = %f seconds\n", elapsed_time );
fflush(stdout);
printf("Bandwidth = %f bytes per second\n", bandwidth);
fflush(stdout);
} else if( my_id == p-1 ) {
// do max_msgs times:
// receive message of size msg_size from process to the left
// send message of size msg_size to process to the right (p-1 sends to 0)
while(t<max_msgs) {
MPI_Send((char *) message, msg_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
t++;
}
} else {
while(t<max_msgs) {
MPI_Send((char *) message, msg_size, MPI_CHAR, my_id+1, 0, MPI_COMM_WORLD);
MPI_Recv((char *) message, msg_size, MPI_CHAR, my_id-1, 0, MPI_COMM_WORLD, &status);
t++;
}
}
MPI_Barrier(MPI_COMM_WORLD);
EXIT:
MPI_Finalize();
return 0;
}
I am not completely sure if this is what you mean, but I will try.
For what I understand, you want to know what is the message being sent. Well, for the code you provide, memory is assign to the message but any real "readable" message is specify. In this line.
message = (char*)malloc (msg_size);
malloc reserves the memory for the messages, so anyone can write it, however, it doesn't provide any initial value. Sometimes, the memory contains other information previously stored and freed. Then, the message being sent is that "garbage" that is before. This is probably what you call hexadecimal (I hope I understand this right).
The type of value in this case is char (defined as MPI_CHAR in the MPI_Send and MPI_Recv functions). Here you can find more data types for MPI.
I will suggest to assign a value to the message with the with my_id and next_id. So you know who is sending to whom.