C: Signal code: Address not mapped (1) mpirecv - c

I have the following code written in C with MPI:
#include <mpi.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
int size, rank;
MPI_Status status;
int buf[1000];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) {
int i = 0;
while (i != 1000) {
buf[i] = i;
i++;
}
MPI_Send(buf, 999, MPI_INT, 1, 1, MPI_COMM_WORLD);
printf("msg has been sent\n");
}
if (rank == 1) {
int sz = sizeof(buf);
int lst = buf[sz-1];
MPI_Recv(buf, 999, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
printf("la taille du buf %d et dernier %d", sz, lst);
}
MPI_Finalize();
}
And after run it it gives this message:
msg has been sente
[blitzkrieg-TravelMate-P253:03395] *** Process received signal ***
[blitzkrieg-TravelMate-P253:03395] Signal: Segmentation fault (11)
[blitzkrieg-TravelMate-P253:03395] Signal code: Address not mapped (1)
[blitzkrieg-TravelMate-P253:03395] Failing at address: 0xbfee8574
[blitzkrieg-TravelMate-P253:03395] [0] [0xb772d40c]
[blitzkrieg-TravelMate-P253:03395] [1] mpii(main+0x12f) [0x8048883]
[blitzkrieg-TravelMate-P253:03395] [2] /lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0xb74c84d3]
[blitzkrieg-TravelMate-P253:03395] [3] mpii() [0x80486c1]
[blitzkrieg-TravelMate-P253:03395] *** End of error message ***
mpirun noticed that process rank 1 with PID 3395 on node blitzkrieg-
TravelMate-P253 exited on signal 11 (Segmentation fault).
Any suggestion will help thnx.

The stack trace shows that the error is not in the MPI_Recv as the question title suggests. The error is actually here:
int sz = sizeof(buf);
int lst = buf[sz-1]; // <---- here
Since buf is an array of int and sizeof(buf) returns its size in bytes, sz is set to 4 times the number of elements in the array. Accessing buf[sz-1] goes way beyond the bounds of buf and into an unmapped memory region above the stack of the process.
You should divide the total size of the array by the size of one of its elements, e.g. the first one:
int sz = sizeof(buf) / sizeof(buf[0]);
int lst = buf[sz-1];

Related

Send dynamic array inside a struct over MPI

In my C program I have a structure like the one below
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
typedef struct Message
{
int elemNr;
char *elem;
} Msg;
I think I made all the steps to create custom data type in MPI
int main(int argc, char **argv) {
int size, rank;
int i;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (size < 2) {
fprintf(stderr,"Requires at least two processes.\n");
exit(-1);
}
// just for simplicity
const int n = 5;
// create a new type for struct message
MPI_Datatype myType;
Msg msgSnd;
int block_length[2] = {1, n};
MPI_Aint elemNrAddr, elemAddr;
MPI_Aint displacement[2] = {0, 0};
MPI_Get_address(&msgSnd.elemNr, &elemNrAddr);
MPI_Get_address(&msgSnd.elem, &elemAddr);
// just displacement[1] because displacement[0] starts from 0
displacement[1] = elemAddr - elemNrAddr;
MPI_Datatype types[2] = {MPI_INT, MPI_CHAR};
MPI_Type_create_struct(2, block_length, displacement, types, &myType);
MPI_Type_commit(&myType);
// populate the message
msgSnd.elemNr = n;
msgSnd.elem = malloc(sizeof(char) * msgSnd.elemNr);
srand(time(NULL));
for (i = 0; i < msgSnd.elemNr; i++)
msgSnd.elem[i] = rand() % 26 + 'a';
if (rank != 0) {
printf("I'm sending\n");
MPI_Send(&msgSnd, 1, myType, 0, 0, MPI_COMM_WORLD);
printf("I sent\n");
} else {
MPI_Status status;
Msg msgRcv;
printf("I'm receiving\n");
MPI_Recv(&msgRcv, 1, myType, 1, 0, MPI_COMM_WORLD, &status);
printf("I received\n");
for (i = 0; i < msgRcv.elemNr; i++)
printf("element %d: %c\n", i, msgRcv.elem[i]);
if (msgRcv.elem != NULL)
free(msgRcv.elem);
}
if (msgSnd.elem != NULL)
free(msgSnd.elem);
MPI_Type_free(&myType);
MPI_Finalize();
return 0;
}
I ran the above code, but unfortunately when process 0 receives the message, the elem pointer points to null and the program ends with segmentation fault.
Can you help me to find the problem? Or, how can I send a dynamic array inside a struct on MPI?

Receiving an array with MPI

I am attempting to make a parallel program that merge sorts two arrays that are being sent to each other from separate processes. In this simplified version, where I am attempting to get the communication to work, I wish to simply send one array (length of four unsigned integers) from process 0 to process 1, then print both the local and received arrays in process 1. Here is the code for this. (Load_and_distribute simply fills the arrays, and I have checked to ensure that both processes do indeed have four unsigned integers within).
int
main(int argc, char ** argv)
{
int ret;
unsigned int ln, tn;
unsigned int * lvals;
int rank, size;
ret = MPI_Init(&argc, &argv);
assert(MPI_SUCCESS == ret);
/* get information about MPI environment */
ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
assert(MPI_SUCCESS == ret);
ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
assert(MPI_SUCCESS == ret);
load_and_distribute(argv[1], &ln, &lvals);
unsigned int rn;
unsigned int * rvals;
rvals = malloc(4*sizeof(*rvals));
if(rank == 0){
MPI_Send(&lvals, 4, MPI_UNSIGNED, 1, 0, MPI_COMM_WORLD);
}
else if (rank == 1){
rvals[0] = 4;
MPI_Recv(&rvals, 4, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("lvals = %d %d %d %d\n",lvals[0],lvals[1],lvals[2],lvals[3]);
printf("rvals = %d %d %d %d\n",rvals[0],rvals[1],rvals[2],rvals[3]);
}
ret = MPI_Finalize();
assert(MPI_SUCCESS == ret);
return EXIT_SUCCESS;
}
The send and receive seems to go through without a fit, but when it attempts to print the rval values, I arrive at this output, and I am unsure why.
[hpc5:04355] *** Process received signal ***
[hpc5:04355] Signal: Segmentation fault (11)
[hpc5:04355] Signal code: Address not mapped (1)
[hpc5:04355] Failing at address: 0xe0c4ac
[hpc5:04355] [ 0] /lib64/libpthread.so.0(+0xf370)[0x7f2a8d23c370]
[hpc5:04355] [ 1] ./hms_mpi[0x40165d]
[hpc5:04355] [ 2] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2a8ce8db35]
[hpc5:04355] [ 3] ./hms_mpi[0x400c29]
[hpc5:04355] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 4355 on node hpc5 exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
The correct buffers for MPI_Send() and MPI_Recv() are lvals and rvals (e.g. do not use the & keyword)
Remove & in your MPI_Send and MPI_Recv:
MPI_Send(lvals, 4, MPI_UNSIGNED, 1, 0, MPI_COMM_WORLD);
MPI_Recv(rvals, 4, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
It is working like this:
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
buf: initial address of send buffer (choice)*

Using thread creates with pthread_create to call the function MPI_Finalize in a MPI application write in C

First, I precise that I am french and my english is not really good.
I am working on MPI application and I have some problems and I hope that somebody can help me.
As reported in the title of my post, I try to use a thread to listen when I have to kill my application and then call the MPI_Finalize function.
However, my application does not finish correcty.
More precisely, I obtain the following message:
[XPS-2720:27441] * Process received signal *
[XPS-2720:27441] Signal: Segmentation fault (11)
[XPS-2720:27441] Signal code: Address not mapped (1)
[XPS-2720:27441] Failing at address: 0x7f14077a3b6d
[XPS-2720:27440] * Process received signal *
[XPS-2720:27440] Signal: Segmentation fault (11)
[XPS-2720:27440] Signal code: Address not mapped (1)
[XPS-2720:27440] Failing at address: 0x7fb11d07bb6d
mpirun noticed that process rank 1 with PID 27440 on node lagniez-XPS-2720 exited on signal 11 (Segmentation fault).
My slave code is:
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/types.h>
#include <pthread.h>
#include <cassert>
#define send_data_tag 1664
#define send_kill_tag 666
void *finilizeMPICom(void *intercomm)
{
printf("the finilizeMPICom was called\n");
MPI_Comm parentcomm = * ((MPI_Comm *) intercomm);
MPI_Status status;
int res;
// sleep(10);
MPI_Recv(&res, 1, MPI_INT, 0, send_kill_tag, parentcomm, &status);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("we receive something %d -- %d\n", rank, res);
MPI_Finalize();
exit(0);
}// finilizeMPICom
int main( int argc, char *argv[])
{
int numtasks, rank, len, rc;
char hostname[MPI_MAX_PROCESSOR_NAME];
int provided, claimed;
rc = MPI_Init_thread(0, 0, MPI_THREAD_MULTIPLE, &provided);
MPI_Query_thread( &claimed );
if (rc != MPI_SUCCESS || provided != 3)
{
printf ("Error starting MPI program. Terminating.\n");
MPI_Abort(MPI_COMM_WORLD, rc);
}
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm parentcomm;
MPI_Comm_get_parent(&parentcomm);
/* create a second thread to listen when we have to kill the program */
pthread_t properlyKill;
if(pthread_create(&properlyKill, NULL, finilizeMPICom, (void *) &parentcomm))
{
fprintf(stderr, "Error creating thread\n");
return 0;
}
assert(parentcomm != MPI_COMM_NULL);
MPI_Status status;
int root_process, ierr, num_rows_to_receive;
int mode;
MPI_Recv( &mode, 1, MPI_INT, 0, send_data_tag, parentcomm, &status);
printf("c The solver works in the mode %d\n", mode);
printf("I sent a message %d\n", rank);
// if(rank != 1) sleep(100);
int res = 1;
MPI_Send(&res, 1, MPI_INT, 0, send_data_tag, parentcomm);
printf("we want to listen for somethiing %d\n", rank);
int rescc = 1;
MPI_Recv(&rescc, 1, MPI_INT, 0, send_data_tag, parentcomm, &status);
printf("I received the message %d %d\n", rescc, rank);
if(rescc == 1000)
{
printf("~~~~~~~~>>> I print the solution %d\n", rank);
int res3 = 1001;
MPI_Send(&res3, 1, MPI_INT, 0, send_data_tag, parentcomm);
}
else printf("I do not understand %d\n", rank);
printf("I wait the thread to kill the programm %d\n", rank);
pthread_join(properlyKill, (void**)&(res));
return 0;
}
For the master I have:
int main(int argc, char **argv)
{
Parser *p = new Parser("slave.xml");
MPI_Init(&argc, &argv);
if(p->method == "concurrent")
{
ConcurrentManager cc(p->instance, p->solvers);
cc.run();
}
else
{
cerr << "c The only available methods are: concurrent, eps (Embarrassingly Parallel Search) or tree" << endl;
exit(1);
}
delete(p);
MPI_Finalize();
exit(0);
}// main
/**
Create a concurrent manager (means init the data structures to run
the solvers).
#param[in] _instance, the benchmark path
#param[in] _solvers, the set of solvers that will be ran
*/
ConcurrentManager::ConcurrentManager(string _instance, vector<Solver> &_solvers) :
instance(_instance), solvers(_solvers)
{
cout << "c\nc Concurrent manager called" << endl;
nbSolvers = _solvers.size();
np = new int[nbSolvers];
cmds = new char*[nbSolvers];
arrayOfArgs = new char **[nbSolvers];
infos = new MPI_Info[nbSolvers];
for(int i = 0 ; i<nbSolvers ; i++)
{
np[i] = solvers[i].npernode;
cmds[i] = new char[(solvers[i].executablePath).size() + 1];
strcpy(cmds[i], (solvers[i].executablePath).c_str());
arrayOfArgs[i] = new char *[(solvers[i].options).size() + 1];
for(unsigned int j = 0 ; j<(solvers[i].options).size() ; j++)
{
arrayOfArgs[i][j] = new char[(solvers[i].options[j]).size() + 1];
strcpy(arrayOfArgs[i][j], (solvers[i].options[j]).c_str());
}
arrayOfArgs[i][(solvers[i].options).size()] = NULL;
MPI_Info_create(&infos[i]);
char hostname[solvers[i].hostname.size()];
strcpy(hostname, solvers[i].hostname.c_str());
MPI_Info_set(infos[i], "host", hostname);
}
sizeComm = 0;
}// constructor
/**
Wait that at least one process finish and return the code
SOLUTION_FOUND.
#param[in] intercomm, the communicator
*/
void ConcurrentManager::waitForSolution(MPI_Comm &intercomm)
{
MPI_Status arrayStatus[sizeComm], status;
MPI_Request request[sizeComm];
int val[sizeComm], flag;
for(int i = 0 ; i<sizeComm ; i++) MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]);
bool solutionFound = false;
while(!solutionFound)
{
for(int i = 0 ; i<sizeComm ; i++)
{
MPI_Test(&request[i], &flag, &arrayStatus[i]);
if(flag)
{
printf("---------------------> %d reveived %d\n", i , val[i]);
if(val[i] == SOLUTION_FOUND)
{
int msg = PRINT_SOLUTION;
MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // ask to print the solution
int msgJobFinished;
MPI_Recv(&msgJobFinished, 1, MPI_INT, i, TAG_MSG, intercomm, &status); // wait the answer
assert(msgJobFinished == JOB_FINISHED);
cout << "I am going to kill everybody" << endl;
int msgKill[sizeComm];
for(int j = 0 ; j<sizeComm ; j++)
{
msgKill[i] = STOP_AT_ONCE;
MPI_Send(&msgKill[i], 1, MPI_INT, j, TAG_KILL, intercomm);
}
solutionFound = true;
break;
} else
{
printf("restart the communication for %d\n", i);
MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]);
}
}
}
}
}// waitForSolution
/**
Run the solver.
*/
void ConcurrentManager::run()
{
MPI_Comm intercomm;
int errcodes[solvers.size()];
MPI_Comm_spawn_multiple(nbSolvers, cmds, arrayOfArgs, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes);
MPI_Comm_remote_size(intercomm, &sizeComm);
cout << "c Solvers are now running: " << sizeComm << endl;
int msg = CONCU_MODE;
for(int i = 0 ; i<sizeComm ; i++) MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // init the working mode
waitForSolution(intercomm);
}// run
I know that I put a lot of code :(
But, I do not know where is the problem.
Please, help me :)
Best regards.
The MPI documentation for how MPI interacts with threads demands that the call to MPI_Finalize() be performed by the main thread -- that is, the same one that initialized MPI. In your case, that happens also to be your process's initial thread.
In order to satisfy MPI's requirements, you could reorganize your application so that the initial thread is the one that waits for a kill signal and then shuts down MPI. The other work it currently does would then need to be moved to a different thread.

MPI_Gather of indexed typed to raw data

I have encountered a problem on using MPI_Gather for gathering indexed integers to a vector of integers. When I try to gather the integers without creating a new receive type, I get a MPI_ERR_TRUNCATE error.
*** An error occurred in MPI_Gather
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_TRUNCATE: message truncated
*** MPI_ERRORS_ARE_FATAL: your MPI job will now abort
The minimal example replicating the issue is
#include <stdlib.h>
#include "mpi.h"
int i, comm_rank, comm_size, err;
int *send_data, *recv_data;
int *blocklengths, *displacements;
MPI_Datatype send_type;
int main ( int argc, char *argv[] ){
MPI_Init ( &argc, &argv );
MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
unsigned int block = 1000;
unsigned int count = 1000;
send_data = malloc(sizeof(int)*block*count);
for (i=0; i<block*count; ++i) send_data[i] = i;
recv_data = 0;
if(comm_rank==0) recv_data = malloc(sizeof(int)*block*count*comm_size);
blocklengths = malloc(sizeof(int)*count);
displacements = malloc(sizeof(int)*count);
for (i=0; i<count; ++i) {
blocklengths[i] = block;
displacements[i] = i*block;
}
MPI_Type_indexed(count, blocklengths, displacements, MPI_INT, &send_type);
MPI_Type_commit(&send_type);
err = MPI_Gather((void*)send_data, 1, send_type, (void*)recv_data, block*count, MPI_INT, 0, MPI_COMM_WORLD);
if (err) MPI_Abort(MPI_COMM_WORLD, err);
free(send_data);
free(recv_data);
free(blocklengths);
free(displacements);
MPI_Finalize ( );
return 0;
}
I noticed that this error does not occur when I use data transfer size less than 6K bytes.
I found a workaround using MPI_Type_contiguous, although it seems I add extra overhead to my code.
MPI_Type_contiguous(block*count, MPI_INT, &recv_type);
MPI_Type_commit(&recv_type);
err = MPI_Gather((void*)send_data, 1, send_type, (void*)recv_data, 1, recv_type, 0, MPI_COMM_WORLD);
I have verified the error occurs in open-mpi v1.6 and v1.8.
Could anyone explain the source of this issue?

Receiving an array allocated with malloc in MPI

EDIT: There is no problem with this code in particular. I created a reduced version of my code and this part works perfectly. I still don't understand why it is not working in my whole code, because I have everything commented but this, but that's maybe too particular. Sorry, wrong question.
(I edited and added at the bottom the error that I get).
I'm trying to parallelize a C program.
I'm encountering errors when I try to pass an array allocated with malloc from the master process to the rest of processes. Or better, when I try to receive it.
This is the piece of code I'm having trouble with:
if (rank == 0)
{
int *data=(int *) malloc(size*sizeof(int));
int error_code = MPI_Send(data, size, MPI_INT, 1, 1, MPI_COMM_WORLD);
if (error_code != MPI_SUCCESS) {
char error_string[BUFSIZ];
int length_of_error_string;
MPI_Error_string(error_code, error_string, &length_of_error_string);
printf("%3d: %s\n", rank, error_string);
}
printf("Data sent.");
}
else if (rank == 1)
{
int *data=(int *) malloc(size*sizeof(int));
int error_code = MPI_Recv(data, size, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
if (error_code != MPI_SUCCESS) {
char error_string[BUFSIZ];
int length_of_error_string;
MPI_Error_string(error_code, error_string, &length_of_error_string);
printf("%3d: %s\n", rank, error_string);
}
printf("Received.");
}
"Data sent." is printed, followed by a segmentation fault (with memory dump) caused by the second process and "Received" is never printed.
I think I'm not receiving well the data. But I tried several possibilities, I think I have to pass the address of the variable and not just the pointer to the first position, so I thought this was the correct way, but it is not working.
From the error codes nothing gets printed.
Does anyone know what's causing the error and what was my mistake?
Thanks!
EDIT:
This is the exact error:
*** Process received signal ***
*** End of error message ***
Signal: Segmentation fault (11)
Signal code: Address not mapped (1)
EDIT 2:
This code works:
int main(int argc, char* argv[])
{
int size_x = 12;
int size_y = 12;
int rank, size, length;
char nodename[BUFSIZ];
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(nodename, &length);
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
if (rank == 0)
{
int *data=malloc(size*sizeof(int));
int error_code = MPI_Send(data, size, MPI_INT, 1, 1, MPI_COMM_WORLD);
if (error_code != MPI_SUCCESS)
{
char error_string[BUFSIZ];
int length_of_error_string;
MPI_Error_string(error_code, error_string, &length_of_error_string);
printf("%3d: %s\n", rank, error_string);
}
printf("Data sent.");
}
else if (rank > 0)
{
int *data=malloc(size*sizeof(int));
int error_code = MPI_Recv(data, size, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
if (error_code != MPI_SUCCESS)
{
char error_string[BUFSIZ];
int length_of_error_string;
MPI_Error_string(error_code, error_string, &length_of_error_string);
printf("%3d: %s\n", rank, error_string);
}
printf("Received.");
}
MPI_Finalize();
return 0;
}
I found the problem, it was not the MPI calls, but there was a problem with a previous function (forgot a variable in "printf") which I didn't notice. That made the whole code break. Tricky MPI...

Resources