MPI - Segmentation fault EXIT CODE: 139 - c

I have a simple MPI code which runs successfully but just before terminating it shows following error.
===
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 139
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Below is my source code.
/*
AUTHOR ::: KHAYAM ANJAM
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main (int argc, char *argv[])
{
int rank, size, ball_value, ball_present;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &size);
srandom(rank);
int delta = rand() % 13;
int random = rand() % 5;
if (random == 0) delta = -1*delta;
if (rank == 0) {
ball_present = 1;
ball_value = 0;
}
else ball_present = 0;
while (1) {
if(ball_present == 0)
MPI_Recv(&ball_value, 30, MPI_INT, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
ball_present = 1;
printf("Task %d has Ball with value %d\n", rank, ball_value);
if (ball_value == 1000) break;
if (abs(ball_value) > 100) {
int send_1000 = 1000;
int i;
for (i = 0; i < size; i++)
if (i != rank) MPI_Send(&send_1000, 30, MPI_INT, i, 10, MPI_COMM_WORLD); //Broadcast to all others
break;
}
ball_value += delta;
int next_to_send = rand() % size;
if (next_to_send != rank) {
printf("Sending ball to %d\n", next_to_send);
MPI_Send(&ball_value, 30, MPI_INT, next_to_send, 10, MPI_COMM_WORLD);
ball_present = 0;
}
}
MPI_Finalize();
return 0;
}

I'm not too sure for the rest of the code (seems OK but I didn't look too closely), but what is sure is that you've got he MPI_Recv() / MPI_Send() pairs wrong. The problem is that you send and receive arrays of 30 integers, while you only allocated memory for one of each.
Try replacing your 30 parameter by a 1 in the 3 MPI_Send() or MPI_Recv() calls, and you code might just work.

Related

Problems sending data via MPI_Bcast

I defined the point structure with MPI_Type_contiguous. After doing this, I initialize my type and subsequently I want to send the centroids and have them printed by the other processes which are different from the root process. I wrote this code, sending the centroids via MPI_Bcast but when I try to print the centroids inside the for loop it gives me a series of errors:
[rob: 10463] *** Process received signal ***
[rob: 10463] Signal: Segmentation fault (11)
[rob: 10463] Signal code: Address not mapped (1)
[rob: 10463] Failing at address: 0x1000
Primary job terminated normally, but 1 process returned
a non-zero exit code. For user-direction, the job has been aborted.
mpirun noticed that process rank 2 with PID 0 on node rob exited on signal 11 (Segmentation fault).
Where do I go wrong?
typedef struct {
double x;
double y;
} Point;
void initialize(Point *centroids, int num_clusters) {
int dex;
srand(time(NULL));
for (dex = 0; dex < num_clusters; dex++) {
centroids[dex].x = ((double) (rand() % 1000)) / 1000;
centroids[dex].y = ((double) (2 * rand() % 1000)) / 1000;
}
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Datatype POINT;
MPI_Type_contiguous(2, MPI_DOUBLE, &POINT);
MPI_Type_commit(&POINT);
int num_clusters = 0;
Point* centroids;
if (rank == 0) {
num_clusters = 2;
centroids = calloc(sizeof(Point), num_clusters);
initialize(centroids, num_clusters);
}
MPI_Bcast(centroids, 2, POINT, 0, MPI_COMM_WORLD);
if (rank != 0) {
for (int i = 0; i < num_clusters; i++) {
printf("Centroid X: %f\n", centroids[i].x);
printf("Centroid Y: %f\n", centroids[i].y);
}
}
MPI_Finalize();
return 0;
}

Sum of the number 1 to 10000 using MPI_Scatter

I'm currently developing a piece of code that has 10 processes. Process 0 reads a total of 10000 (from lab7.csv). Afterwards, it distributes the array to all of the processes. In order to do this, I created an array named "intArray[10000]" that is shared by all processes. 49893236 is the correct summation.
The following code makes use of 10 processors to compute the sum of numbers ranging from 1 to 1000. This aggregate is calculated by each of the processors, and the results are shown on the screen.
As a result, a follower error is shown.
I didn't figure out what the problem is. Please assist me in this matter.
#include <mpi.h>
#include <stdio.h>
#include <string.h>
int main()
{
int rank, nodes;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nodes);
MPI_Status status;
int intArray[10000];
int subIntArray[1000];
if(rank == 0) {
// Substitute the full file path for the string file_path
FILE *fp = fopen("./lab7.csv", "r");
int i = 0;
int num;
if (!fp) {
printf("Can't open file\n");
} else {
while (fscanf(fp, "%d", &num) > 0)
{
intArray[i] = num;
i++;
}
// Close the file
fclose(fp);
}
}
MPI_Scatter(intArray, 10000, MPI_INT, subIntArray, 1000, MPI_INT, 0, MPI_COMM_WORLD);
int ans = 0;
int total = 0;
int start = rank * 1000;
int end = start + 999;
for(int i = start; i <= end; i++) {
ans = ans + subIntArray[i];
}
if(rank != 0) {
MPI_Ssend(&ans, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
} else {
total = ans;
for(int j = 1; j < 10; j++) {
MPI_Recv(&ans, 1, MPI_INT, j, 0, MPI_COMM_WORLD, &status);
total += ans;
}
printf("Total is %d\n", total);
}
MPI_Finalize();
return 0;
}
The PBS Job file as followers,
#PBS -l nodes=2
#PBS -l walltime=00:02:00
#PBS -l select=5
cat $PBS_NODEFILE
NPROC=10
cd $PBS_O_WORKDIR
MPISIZE=$NPROC
MPIPROG=`basename $PBS_JOBNAME .pbs`
echo 'Running MPI program' $MPIPROG 'on' $MPISIZE 'processes'
echo 'Started at' `date`
echo '--------------------------------------------------------------------------------'
(time mpirun -n $MPISIZE ./$MPIPROG) 2>&1
echo '--------------------------------------------------------------------------------'
echo 'Finished at' `date`
This is the error message that is shown on the terminal.
The orientation of MPI_Scatter is a bit different than the one you have.
You say: I have NTOT data elements and I want to send them to NODECOUNT nodes so I want each node to process NTOT / NODECOUNT of data. This doesn't work if NTOT is not an exact multiple of NODECOUNT
But, MPI_Scatter is oriented the other way: I have NPER number of elements that each node should process, and NODECOUNT nodes, so the total number of elements is NTOT = NPER * NODECOUNT This is how the manpage example shows it.
You want to give a count of NPER to MPI_Scatter and not NTOT. And, you want the send and receive counts to match.
Also, because MPI_Scatter does the split for you, the slave nodes should not use start/end as you calculated, but always do:
start = 0;
end = NPER - 1;
Also, in your code ...
You were indexing into subIntArray as if you could access 0-9999 instead of 0-999, so you were going beyond the end of the array and had UB (undefined behavior)
It's a bit shaky to hardwire 10, 100, 1000 everywhere. Better to use the some #define and the actual node code nodes
And, you assume you have 10000 valid input data elements rather than calculating this based on your i index variable when you do fscanf
Here is the corrected code, with some extra debug code I used.
I used preprocessor conditionals to denote old vs new code (e.g.):
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Also, I didn't have your data files, so I had to synthesize the input data.
Anyway, here it is:
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
int rank;
int nodes;
FILE *xfdbg;
#define dbgprtattr(_lvl) \
__attribute__((__format__(__printf__,_lvl,_lvl + 1)))
#if DEBUG || _USE_ZPRT_
#define dbgprt(_fmt...) \
_dbgprt(_fmt)
#else
#define dbgprt(_fmt...) \
do { } while (0)
#endif
void dbgprtattr(1)
_dbgprt(const char *fmt,...)
{
va_list ap;
char buf[10000];
char *bp = buf;
bp += sprintf(bp,"[%d] ",rank);
va_start(ap,fmt);
bp += vsprintf(bp,fmt,ap);
va_end(ap);
fputs(buf,xfdbg);
fflush(xfdbg);
}
//#define NTOT 10000
//#define NPER (NTOT / nodes)
int
main()
{
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nodes);
MPI_Status status;
char logf[100];
sprintf(logf,"log_%2.2d",rank);
xfdbg = fopen(logf,"w");
#if 0
int BIGSIZE = nodes * 1000;
int NPER = NTOT / nodes;
#else
//int NPER = 1000;
int NPER = 50;
//int NTOT = NPER * nodes;
#endif
int val;
int truetotal = 0;
#if 0
int intArray[NTOT];
#else
int intArray[nodes][NPER];
#endif
#if 1
int subIntArray[NPER];
#else
int subIntArray[NTOT];
#endif
if (rank == 0) {
#if 0
// Substitute the full file path for the string file_path
FILE *fp = fopen("./lab7.csv", "r");
int i = 0;
int num;
if (!fp) {
printf("Can't open file\n");
}
else {
while (fscanf(fp, "%d", &num) > 0) {
intArray[i] = num;
i++;
}
// Close the file
fclose(fp);
}
#endif
for (int nd = 0; nd < nodes; ++nd) {
for (int i = 0; i < NPER; ++i) {
val = (nd << 16) | i;
intArray[nd][i] = val;
truetotal += val;
}
}
}
dbgprt("main: hello\n");
#if 0
MPI_Scatter(intArray, NTOT, MPI_INT,
subIntArray, NPER, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
#if 1
MPI_Scatter(intArray, NPER, MPI_INT,
subIntArray, NPER, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
#if 0
MPI_Scatter(intArray, NTOT, MPI_INT,
subIntArray, NTOT, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
//sleep(10);
int ans = 0;
int total = 0;
#if 0
int start = rank * NPER;
int end = start + NPER - 1;
#else
int start = 0;
int end = NPER - 1;
#endif
dbgprt("main: START start=%d end=%d\n",start,end);
for (int i = start; i <= end; i++) {
dbgprt("main: DATA i=%d sub=%8.8X\n",i,subIntArray[i]);
ans = ans + subIntArray[i];
}
dbgprt("main: loopdone ans=%d\n",ans);
if (rank != 0) {
MPI_Ssend(&ans, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
else {
total = ans;
for (int j = 1; j < nodes; j++) {
MPI_Recv(&ans, 1, MPI_INT, j, 0, MPI_COMM_WORLD, &status);
total += ans;
}
printf("Total is %d\n", total);
printf("Total is %d (TRUE)\n", truetotal);
}
fclose(xfdbg);
MPI_Finalize();
return 0;
}

MPI Search In Array

Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks
Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.

distributed algorithm in C

I am a beginner in C. I have to create a distributed architecture with the library MPI. The following code is:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
int main(int argc, char **argv)
{
int N, w = 1, L = 2, M = 50; // with N number of threads
int T= 2;
int myid;
int buff;
float mit[N][T]; // I initialize a 2d array
for(int i = 0; i < N; ++i){
mit[i][0]= M / (float) N;
for (int j = 1; j < T; ++j){
mit[i][j] = 0;
}
}
float tab[T]; // 1d array
MPI_Status stat;
/*********************************************
start
*********************************************/
MPI_Init(&argc,&argv); // Initialisation
MPI_Comm_size(MPI_COMM_WORLD, &N);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
for(int j = 0; j < T; j++) {
for(int i = 0; i < N; i++) { // I iterate for each slave
if (myid !=0) {
float y = ((float) rand()) / (float) RAND_MAX;
mit[i][j + 1] = mit[i][j]*(1 + w * L * y);
buff=mit[i][j+1];
MPI_Send(&buff, 128, MPI_INT, 0, 0, MPI_COMM_WORLD); // I send the variable buff to the master
buff=0;
}
if( myid == 0 ) { // Master
for(int i = 1; i < N; i++){
MPI_Recv(&buff, 128, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
tab[j] += buff; // I need to receive all the variables buff sent by the salves, sum them and stock into the tab at the index j
}
printf("\n%.20f\n",tab[j]); // I print the result of the sum at index j
}
}
}
MPI_Finalize();
return 0;
}
}
I use the command in the terminal: mpicc .c -o my_file to compile the program
Then mpirun -np 101 my_file_c to start the program with 101 threads
But the problem is I have the following error int the terminal:
It seems that [at least] one of the processes that was started with
> mpirun did not invoke MPI_INIT before quitting (it is possible that
> more than one process did not invoke MPI_INIT -- mpirun was only
> notified of the first one, which was on node n0).
>
> mpirun can *only* be used with MPI programs (i.e., programs that
> invoke MPI_INIT and MPI_FINALIZE). You can use the "lamexec" program
> to run non-MPI programs over the lambooted nodes.
It seems that I have a problem with the master but i don't know why...
Any idea ???
Thank you :)
This behavior is very likely the result of a memory corruption.
You cannot
int buff=mit[i][j+1];
MPI_Send(&buff, 128, MPI_INT, ...);
depending on what you want to achieve, you can try instead
int buff=mit[i][j+1];
MPI_Send(&buff, 1, MPI_INT, ...);
// ...
MPI_Recv(&buff, 1, MPI_INT, ...);
or
int *buff=&mit[i][j+1];
MPI_Send(buff, 128, MPI_INT, ...);
// fix MPI_Recv()

Initialize an array using openmpi once

I am trying to run some tests using OPENmpi processing data in an array by spliting up the work across nodes (the second part is with matricies). I am running into some problems now because the data array is being initialized every time and I don't know how to prevent this from happening.
How, using ANSI C can I create a variable length array, using OPENmpi once? I tried making it static and global, but nothing.
#define NUM_THREADS 4
#define NUM_DATA 1000
static int *list = NULL;
int main(int argc, char *argv[]) {
int numprocs, rank, namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int n = NUM_DATA*NUM_DATA;
printf("hi\n");
int i;
if(list == NULL)
{
printf("ho\n");
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
}
int position;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
printf("Process %d on %s out of %d\n", rank,processor_name, numprocs);
clock_t start = clock();
position = n / NUM_THREADS * rank;
search(list,position, n / NUM_THREADS * (rank + 1));
printf("Time elapsed: %f seconds\n", ((double)clock() - (double)start) /(double) CLOCKS_PER_SEC);
free(list);
MPI_Finalize();
return 0;
}
Probably the easiest way is to have the rank 0 process do the initialization while the other processes block. Then once the initialization is done, have them all start their work.
A basic example trying to call your search function (NB: it's dry-coded):
#define NUM_THREADS 4
#define NUM_DATA 1000
int main(int argc, char *argv[]) {
int *list;
int numprocs, rank, namelen, i, n;
int chunksize,offset;
char processor_name[MPI_MAX_PROCESSOR_NAME];
n= NUM_DATA * NUM_DATA;
MPI_Status stat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
//note you'll need to handle n%NUM_THREADS !=0, but i'm ignoring that for now
chunksize = n / NUM_THREADS;
if (rank == 0) {
//Think of this as a master process
//Do your initialization in this process
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
// Once you're ready, send each slave process a chunk to work on
offset = chunksize;
for(i = 1; i < numprocs; i++) {
MPI_Send(&list[offset], chunksize, MPI_INT, i, 0, MPI_COMM_WORLD);
offset += chunksize
}
search(list, 0, chunksize);
//If you need some sort of response back from the slaves, do a recv loop here
} else {
// If you're not the master, you're a slave process, so wait to receive data
list = malloc(chunksize*sizeof(int));
MPI_Recv(list, chunksize, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
// Now you can do work on your portion
search(list, 0, chunksize);
//If you need to send something back to the master, do it here.
}
MPI_Finalize();
}

Resources