MPI debugging Segmentation fault - c

I'm trying to sort an array of random numbers using Odd- Even transposition but I keep getting a segmentation error when running my code:
[islb:48966] *** Process received signal ***
[islb:48966] Signal: Segmentation fault (11)
[islb:48966] Signal code: Address not mapped (1)
[islb:48966] Failing at address: 0x28
[islb:48966] [ 0] /lib64/libpthread.so.0(+0xf810)[0x7fc3da4cb810]
[islb:48966] [ 1] /lib64/libc.so.6(memcpy+0xa3)[0x7fc3da1c7cf3]
[islb:48966] [ 2] /usr/local/lib/libopen-pal.so.6(opal_convertor_unpack+0x10b)[0x7fc3d9c372db]
[islb:48966] [ 3] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress_match+0x138)[0x7fc3d58507a8]
[islb:48966] [ 4] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_req_start+0x1b1)[0x7fc3d5850d11]
[islb:48966] [ 5] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv+0x139)[0x7fc3d5849489]
[islb:48966] [ 6] /usr/local/lib/libmpi.so.1(MPI_Recv+0xc0)[0x7fc3da742f40]
[islb:48966] [ 7] oddEven[0x40115a]
[islb:48966] [ 8] /lib64/libc.so.6(__libc_start_main+0xe6)[0x7fc3da161c36]
[islb:48966] [ 9] oddEven[0x400c19]
[islb:48966] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 48966 on node islb exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
The program allocates the array, it's when it comes to scattering it amongst the processes that the error seems to occur as the print statment directly after the scatter call only prints for process 0 and then prints the error message.
Here's my code:
#include <stdio.h>
#include <math.h>
#include <malloc.h>
#include <time.h>
#include <string.h>
#include "mpi.h"
const int MAX = 10000;
int myid, numprocs;
int i, n, j, k, arrayChunk, minindex;
int A, B;
int temp;
int swap(int *x, int *y) {
temp = *x;
*x = *y;
*y = temp;
return 0;
}
int main(int argc, char **argv) {
int* arr = NULL;
int* value = NULL;
MPI_Status status;
//int arr[] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
srand(time(0));
time_t t1, t2;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (myid == 0) {
printf("Enter the number of elements you would like in the array \n");
scanf("%d", &n);
arrayChunk = n/numprocs;
//printf("cpus: %d, #s per cpu: %d\n", numprocs, arrayChunk);
//Allocate memory for the array
arr = malloc(n * sizeof(int));
value = malloc(n * sizeof(int));
// Generate an array of size n random numbers and prints them
printf("Elements in the array: ");
for (i = 0; i < n; i++) {
arr[i] = (rand() % 100) + 1;
printf("%d ", arr[i]);
}
printf("\n");
time(&t1);
}
if ((n % numprocs) != 0) {
if (myid == 0)
printf("Number of Elements are not divisible by numprocs \n");
MPI_Finalize();
return(0);
}
// Broadcast the size of each chunk
MPI_Bcast(&arrayChunk, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&arr, arrayChunk, MPI_INT, &value, arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
printf("Processor %d receives %d\n", myid, value[0]);
for (i = 0; i < numprocs; i++) {
if (i % 2 == 0) {
if (myid%2 == 0) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
} else {
if ((myid%2 == 1) && (myid != (numprocs-1))) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else if (myid != 0 && myid != (numprocs-1)) {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], 1, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
}
}
MPI_Gather(&value[0], arrayChunk, MPI_INT, &arr[0], arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
if (myid == 0) {
time(&t2);
printf("Sorted array: ");
for (i = 0; i < n; i++) {
printf("%d ", arr[i]);
}
printf("\n");
printf("Time in sec. %f\n", difftime(t2, t1));
}
// Free allocated memory
if (arr != NULL) {
free(arr);
arr = NULL;
free(value);
value = NULL;
}
MPI_Finalize();
return 0;
}
I'm not very familiar with C and it could well be that I've used malloc and/or addresses and pointers incorrectly, as such it's probably something simple.
Sorry for the amount of code but I thought it would be better to supply all of it to allow for proper debugging.

The problem is in your MPI_Scatter command. You try to scatter the information and store in value, but if you look above that code, only rank 0 has allocated any memory for value. When any and all other ranks try to store data into value, you will get a segmentation fault (and indeed you do). Instead, remove the value = malloc(...); line from inside the if block, and put it after the MPI_Bcast as value = malloc(arrayChunk * sizeof(int));. I've not looked through the rest of the code to see if there are any issues elsewhere as well, but that is likely the cause of the initial seg-fault.

I would build program with debugging info (most likely -g compile flag), try geting coredump and try using gdb debugger to locate the bug. Corefile is created when process crashes and it holds process memory image at the moment of crash.
If after program crash coredump file is not created, You'll need to figure out how to enable it on Your system. You may create simple buggy program (for example with a=x/0; or similar error) and play a bit. Coredump may be called core, PID.core (PID - number of crashed process), or something similar. Sometimes it is enough to set core file size tu unlimited using ulimit. Also check kernel.core_* sysctl's on Linux.
Once You have corecump, You can use it with gdb or similar debuger (ddd):
gdb executable_file core

Related

MPI_Gather doesn't receive data

The program should take two cmd line arguments, N = the number of items each worker should generate, and H = the highest value in the range of random numbers generate by each worker. Each worker makes a list of those random values and then the BigList is where I'm trying to gather them all back to but nothing shows up in the array of BigList. So for example:
Running mpirun -np 3 a.out 4 20 gets:
RANK: 1 --- NUM: 18
RANK: 1 --- NUM: 6
RANK: 1 --- NUM: 12
RANK: 1 --- NUM: 10
RANK: 2 --- NUM: 9
RANK: 2 --- NUM: 3
RANK: 2 --- NUM: 6
RANK: 2 --- NUM: 5
and BigList is empty when I'd expect it to get composed of every num listed above.
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char* argv[]){
double t1, t2;
MPI_Init(&argc, &argv);
int rank;
int wsize;
int N = 10, H = 5;
int num, k, i;
int locarr[25];
int bigList[300];
srand(time(NULL));
if(argc > 1){
N = atoi(argv[1]);
H = atoi(argv[2]);
}
t1 = MPI_Wtime();
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
if( rank == 0){
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else{
for(i = 0; i < N; i++){
locarr[i] = (((rand() % H) + 1) / rank);
printf("RANK: %d --- NUM: %d\n", rank, locarr[i]);
}
}
MPI_Gather(&locarr, N, MPI_INT, bigList, N, MPI_INT, 0, MPI_COMM_WORLD);
if( rank == 0){
printf("BigList: ");
for(k = 0; k < (rank * N); k++){
printf(" %d", bigList[k]);
}
printf("\n");
}
t2 = MPI_Wtime();
// printf("\nMPI_Wtime(): %f\n", t2 - t1);
MPI_Finalize();
return 0;
}
Let me expand the comment of Gilles Gouaillardet,
MPI_Gather call is written correctly. To get the expected results, two changes need to be done.
MPI_Bcast is a collective operation. All processes should call it. So the code should be:
if( rank == 0){
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else{
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < N; i++){
locarr[i] = (((rand() % H) + 1) / rank);
printf("RANK: %d --- NUM: %d\n", rank, locarr[i]);
}
}
Also, rank 0 prints the contents of the bigList. But in for loop, the loop condition is k<rank*N, for rank 0 this will always be false (k<0*N) as a result loop won't be executed and no value will be printed. So it should be world size (wsize) instead of rank.
if( rank == 0){
printf("BigList: ");
for(k = 0; k < wsize*N; k++){
printf(" %d", bigList[k]);
}
printf("\n");
}
The print function was printing some extra garbage values in the BigList array. Instead replacing the print part of the code with this would solve the error
if (rank == 0)
{
printf("BigList: ");
for (k = N; k < wsize * N; k++)
{
printf(" %d", bigList[k]);
}
printf("\n");
}
The full code for the problem is-
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char *argv[])
{
double t1, t2;
MPI_Init(&argc, &argv);
int rank;
int wsize;
int N = 10, H = 5;
int num, k, i;
int locarr[25];
int bigList[300];
srand(time(NULL));
if (argc > 1)
{
N = atoi(argv[1]);
H = atoi(argv[2]);
}
t1 = MPI_Wtime();
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
if (rank == 0)
{
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else
{
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
locarr[i] = (((rand() % H) + 1) / rank);
printf("RANK: %d --- NUM: %d\n", rank, locarr[i]);
}
}
MPI_Gather(&locarr, N, MPI_INT, bigList, N, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
printf("BigList: ");
for (k = N; k < wsize * N; k++)
{
printf(" %d", bigList[k]);
}
printf("\n");
}
MPI_Finalize();
return 0;
}

Use MPI_Sendrecv for Conway Game of Life but the program can't exchange data in the borders

I'm trying to write a code of Conway's Game of Life, the pattern is Rabbit. And I used the Cartesian 2D method to build a processes group, the communication between them is MPI_Sendrecv. But this code didn't work, it just hanging there without any respond when I ran it. It has taken me a long time to find the problem but I got no progress. Could you please help me to figure it out? I'll be so glad for that!
#include <stdio.h>
#include "mpi.h"
#include <math.h>
#include <stdlib.h>
#define array 20
#define arrayhalf (array/2)
main(int argc, char *argv[])
{
int ndims = 2, ierr;
int p, my_rank, my_cart_rank;
MPI_Comm comm2d;
MPI_Datatype newtype;
int dims[ndims], coord[ndims];
int wrap_around[ndims];
int reorder, nrows, ncols;
int x[arrayhalf+2][arrayhalf+2], x2[arrayhalf+2][arrayhalf+2], x_rev[array+4][array+4];
int left, right, down, top;
MPI_Status status;
int tag_up = 20, tag_down =21, tag_left = 22, tag_right = 23;
long start, stop;
/*** start up initial MPI environment ***/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* hardcore 2 processes in each dimension */
nrows = ncols = (int) sqrt(p);
dims[0] = dims[1] = 2;
/* create cartesian topology for processes */
MPI_Dims_create(p, ndims, dims);
/*if (my_rank == 0)
printf("PW[%d]/[%d%]: PEdims = [%d x %d] \n", my_rank, p, dims[0], dims[1]);*/
/* create cartesian mapping, and check it is created either correct or wrong */
wrap_around[0] = wrap_around[1] = 1; /*set periodicity to be true */
reorder = 0;
ierr = 0;
ierr = MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, wrap_around, reorder, &comm2d);
if (ierr != 0)
printf("ERROR[%d] creating CART\n", ierr);
MPI_Type_vector( arrayhalf, 1, arrayhalf+2, MPI_INT, &newtype);
MPI_Type_commit( &newtype );
/* get the neighbour process, which is useful for hola exchange */
int SHIFT_ROW = 0;
int SHIFT_COL = 1;
int DISP = 1;
/*** load pattern ***/
/* initialize the data array */
int i, j ;
for (i = 0; i < arrayhalf + 2 ; i++)
for (j = 0; j < arrayhalf + 2; j++)
{
x[i][j] = 0;
x2[i][j] = 0;
}
if (my_rank == 0)
{
int r,c;
r = arrayhalf / 2;
c = arrayhalf / 2;
/* rabbits pattern
1 1 1 1
1 1 1 1
1
*/
x[r][c] = 1;
x[r][c+4] = 1;
x[r][c+5] = 1;
x[r][c+6] = 1;
x[r+1][c] = 1;
x[r+1][c+1] = 1;
x[r+1][c+2] = 1;
x[r+1][c+5] = 1;
x[r+2][c+1] = 1;
}
/*** calculate the next generation ***/
int row, col;
int steps;
steps = atoi(argv[1]); /* get the generation number from command line */
start = MPI_Wtime();
int soc;
int destination;
for (i = 1; i <= steps; i++)
{
/*** use hola exchange in boundary elements ***/
int * send_buffer = (int*) malloc((arrayhalf)*sizeof(int));
int * recv_buffer = (int*) malloc((arrayhalf)*sizeof(int));
/*int * send_buffer = (int *) calloc(arrayhalf,sizeof(int));
int * recv_buffer = (int *) calloc(arrayhalf,sizeof(int));
*/
/* to up */
MPI_Cart_shift(comm2d, 1, 1, &soc,&destination);
MPI_Sendrecv( &x[1][1], arrayhalf, MPI_INT, destination, tag_up,& x[arrayhalf + 1][1], arrayhalf, MPI_INT, soc, tag_up, comm2d, &status );
/* to down */
MPI_Cart_shift(comm2d, 1, 1, &destination,&soc);
MPI_Sendrecv( &x[arrayhalf][1], arrayhalf, MPI_INT, destination, tag_down,& x[0][1], arrayhalf, MPI_INT, soc, tag_down, comm2d, &status);
/* to left */
MPI_Cart_shift(comm2d, 0, 1, &destination,&soc);
MPI_Sendrecv( &x[1][1], 1,newtype, destination, tag_left,& x[1][arrayhalf+1], 1, newtype, soc, tag_left, comm2d, &status );
/*for (j=0;j<arrayhalf;j++) {
send_buffer[j]=x[j+1][1];
}
MPI_Sendrecv( send_buffer, arrayhalf,MPI_INT, destination, tag_left,recv_buffer, arrayhalf, MPI_INT, soc, tag_left, comm2d, &status );
for (j=0;j<arrayhalf;j++) {
x[j+1][arrayhalf+1]=recv_buffer[j];
}
*/
/* to right */
MPI_Cart_shift(comm2d, 0, 1, &soc,&destination);
MPI_Sendrecv( &x[1][arrayhalf], 1, newtype, destination, tag_right, &x[1][0], 1, newtype, soc, tag_right, comm2d, &status );
/*for (j=0;j<arrayhalf;j++) {
send_buffer[j]=x[j+1][arrayhalf];
}
MPI_Sendrecv( send_buffer, arrayhalf,MPI_INT, destination, tag_right,recv_buffer, arrayhalf, MPI_INT, soc, tag_right, comm2d, &status );
for (j=0;j<arrayhalf;j++) {
x[j+1][1]=recv_buffer[j];
}
*/
/*** sum the neighbour values and get the next generation ***/
for (row = 1; row < arrayhalf; row++)
{
for (col = 1; col < arrayhalf; col++)
{
int neighbor;
neighbor = x[row - 1][col - 1] + x[row - 1][col] + x[row - 1][col + 1] + x[row][col - 1] +
x[row][col + 1] +
x[row + 1][col - 1] + x[row + 1][col] + x[row + 1][col + 1];
if (neighbor == 3)
{
x2[row][col] = 1;
}
else if (x[row][col] == 1 && neighbor == 2)
{
x2[row][col] = 1;
}
else
{
x2[row][col] = 0;
}
}
}
/* used to be swap */
for (row = 1; row < arrayhalf; row++)
{
for (col = 1; col < arrayhalf; col++)
{
x[row][col] = x2[row][col];
}
}
free(send_buffer);
free(recv_buffer);
}
/*** print the final generation ***/
int population = 0;
int* A;
int process_num = dims[0]*dims[1];
int row_indx;
int col_indx;
int k;
if(my_rank == 0)
{
A = (int*) malloc((arrayhalf+2)*(arrayhalf+2)*sizeof(int));
for (k= 1; k< process_num; k++)
{
MPI_Recv(A,(arrayhalf+2)*(arrayhalf+2), MPI_INT,k, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (i = 0; i<arrayhalf+2; i++)
{
for (j = 0; j<arrayhalf+2; j++)
{
row_indx = (k%dims[1])*(arrayhalf+2)+i;
col_indx = (k/dims[0]*(arrayhalf+2))+j;
x_rev[row_indx][col_indx] = A[i*(arrayhalf+2)+j];
}
}
}
for (i = 0; i<arrayhalf+2; i++)
{
for (j = 0; j<arrayhalf+2; j++)
{
x_rev[i][j] = x[i][j];
}
}
for (row = 0; row < array+4; row++) {
for (col = 0; col < array+4; col++)
{
printf("%2d",x_rev[row][col]);
if(x_rev[row][col]==1)
{
population = population + 1;
}
}
printf("\n");
}
stop = MPI_Wtime();
printf("Running Time: %f\n ",stop-start);
printf("Population: %d\n",population);
printf("Generation: %d\n",steps);
}
else{
A = (int*) malloc((array+4)*(array+4)*sizeof(int));
for (i=0; i< arrayhalf +2; i++)
{
for(j = 0; j<arrayhalf+2; j++)
{
A[i*(arrayhalf+2)+j] = x[i][j];
}
}
MPI_Send(A,(arrayhalf+2)*(arrayhalf+2),MPI_INT,0,0,MPI_COMM_WORLD);
}
MPI_Comm_free( &comm2d );
MPI_Type_free( &newtype );
free(A);
MPI_Finalize();
}
I think I found the error.
It is in line 176.
Rank 0 is trying to listen to a msg from rank 0, but rank 0 is not sending a msg to itself. You should start the loop from 1 and not 0.

MPI_Scatter and MPI_Reduce

I'm trying to find the max of randomly generated numbers. Any thoughts on this...
I am using MPI_Scatter to split the randomly generated numbers into equal processes. I am using MPI_Reduce to get the MAX from each process.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <mpi.h>
#define atmost 1000
int find(int* partial_max, int from, int to){
int i, max;
printf("%d----%d\n", from, to);
max = partial_max[from];
for (i = from + 1; i <= to; i++)
if (partial_max[i] > max)
max = partial_max[i];
return max;
}
int main(){
int i, j,n, comm_sz, biggest, b, my_rank, q,result;
//1. Declare array of size 1000
int a[atmost];
//2. generate random integer of 0 to 999
srand((unsigned)time(NULL));
n = rand() % atmost;
//n = 10;
for (i = 0; i <= n; i++){
a[i] = rand() % atmost;
printf("My Numbers: %d\n", a[i]);
//a[i] = i;
}
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//j is the size we will split each segment into
j = (n / (comm_sz-1));
int partial_max[j];
int receive_vector[j];
//Send random numbers equally to each process
MPI_Scatter(a, j, MPI_INT, receive_vector,
j, MPI_INT, 0, MPI_COMM_WORLD);
int localmax;
localmax = -1;
for (i = 0; i <= comm_sz-1; i++)
if (receive_vector[i] > localmax)
localmax = receive_vector[i];
// Get Max from each process
//MPI_Reduce(receive_vector, partial_max, j, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(&localmax, &result, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
if (my_rank == 0)
{
/*
biggest = -1;
for (i = 0; i < comm_sz - 1; i++){
if (i == comm_sz - 2)
b = find(partial_max, i * j, n - 1);
else
b = find(partial_max, i * j, (i + 1) * j - 1);
if (b > biggest)
biggest = b;
}*/
printf("-------------------\n");
printf("The biggest is: %d\n", result);
printf("The n is: %d\n", n);
}
MPI_Finalize();
return 0;
}
You have few bugs there:
You select (a different value of) n in each process. It is better to
select it within rank 0 and bcast to the rest of the processes.
When calculating j you divise by comm_sz-1 instead of comm_sz.
You assume n is divisible by comm_sz and that each process receives the exact same amount of numbers to process.
You loop with i going up to comm_sz-1 instead of going up to j
This is what I could find in a quick look..

Program stops at MPI_Send

Program stops working, when I execute it with more than 1 processor.
It stops at first MPI_Send
What am I doing wrong?
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 200000
#define SIZE2 256
#define VYVOD 1
int main(int argc, char *argv[])
{
int NX, NT;
double TK, UM, DX, DY, DT;
double starttime, endtime;
int numnode, rank, delta=0, ierr, NXnode;
double **U;
double **U1;
double *sosed1;
double *sosed2;
int i, j, k;
MPI_Status stats;
NX = 1*(SIZE2+1);
TK = 20.00;
UM = 10.0;
DX = 0.1;
DY = DX;
DT = 0.1;
NT = (TK/DT);
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numnode);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if(rank == 0)
printf("\nTotal nodes: %d\n", numnode);
NX = NX - 2;
NXnode = (NX-(NX%numnode))/numnode;
if (rank < (NX%numnode))
{
delta = rank * NXnode + rank + 1;
NXnode++;
}
else
{
delta = rank * NXnode + (NX%numnode) + 1;
}
if(rank == 0){
printf("Order counting complete, NXnode = %d\n", NXnode);
}
U = (double**)malloc(NXnode*sizeof(double*));
U1 = (double**)malloc(NXnode*sizeof(double*));
sosed1 = (double*)malloc(SIZE*sizeof(double));
sosed2 = (double*)malloc(SIZE*sizeof(double));
for (i=0; i < NXnode; i++)
{
U[i] = (double*)malloc(SIZE*sizeof(double));
U[i][0]=0;
U[i][SIZE-1]=0;
U1[i] = (double*)malloc(SIZE*sizeof(double));
U1[i][0]=0;
U1[i][SIZE-1]=0;
if (U[i]==NULL || U1[i]==NULL)
{
printf("Error at memory allocation!");
return 1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
starttime = MPI_Wtime();
printf("Array allocation complete\n");
}
for (i = 0; i < NXnode; i++)
{
for (j = 1; j < SIZE-1; j++)
{
if ((delta)<=(NXnode/2))
{
U1[i][j]=2*(UM/NXnode)*(delta+i);
}
else
{
U1[i][j]=-2*(UM/NXnode) + 2*UM;
}
}
}
printf("Array init 1 complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
if (rank > 0)
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
if (rank < (numnode-1))
{
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
printf("Send complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
printf("Array init complete, rank %d\n", rank);
for (k = 1; k <= NT; k++)
{
int cycle = 0;
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[0][cycle] = U1[0][cycle] + DT/(DX*DX)*(U1[1][cycle]-2*U1[0][cycle]+sosed1[cycle])+DT/(DY*DY)*(U1[0][cycle+1]+U1[0][cycle-1]-(U1[0][cycle]*2));
}
for (i=1; i<NXnode-1; i++)
{
for(j=1; j<SIZE-1; j++)
{
U[i][j] = U1[i][j] + DT/(DX*DX)*(U1[i+1][j]-2*U1[i][j]+U[i-1][j])+DT/(DY*DY)*(U1[i][j+1]+U1[i][j-1]-(U1[i][j]*2));
}
}
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[NXnode-1][cycle]=U1[NXnode-1][cycle]+DT/(DX*DX)*(sosed2[cycle]-2*U1[NXnode-1][cycle]+U1[NXnode-2][cycle])+DT/(DY*DY)*(U1[NXnode-1][cycle+1]+U1[NXnode-1][cycle-1]-(U1[NXnode-1][cycle]*2));
}
/*U[0] = U1[0]+DT/(DX*DX)*(U1[0+1]-2*U1[0]+sosed1);
for (j = 0; j<NXnode; j++)
{
U[j]=U1[j]+DT/(DX*DX)*(U1[j+1]-2*U1[j]+U1[j-1]);
}
U[NXnode-1]=U1[NXnode-1]+DT/(DX*DX)*(sosed2-2*U1[NXnode-1]+U1[(NXnode-1)-1]);*/
if (rank > 0)
{
MPI_Send(&(U[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
}
if (rank < (numnode-1))
{
MPI_Send(&(U[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD);
}
if (rank > 0)
{
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
}
if (rank < (numnode-1))
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
for (i = 0; i<NXnode; i++)
{
for (j=0; j<SIZE; j++)
{
U1[i][j]=U[i][j];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Array count complete, rank %d\n", rank);
if (rank == 0)
{
endtime=MPI_Wtime();
printf("\n## TIME: %f\n", endtime-starttime);
}
MPI_Finalize();
}
UPDATE#1
Tried it like that, so rank 0 would be the first, still doesn't work:
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0 && numnode > 1)
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
else if (rank == 0)
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
sosed1[initInd]=0;
}
}
else if (rank < (numnode-1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
}
else if (rank == (numnode - 1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
UPDATE#2
Solved, used same tag for all Send/Recv.
MPI_Send is blocking the execution until the corresponding MPI_Recv is invoked (presumably in another process).
In your program, all processes except rank=0 are calling MPI_Send immediately after the first barrier, and no one is ready to Recv the message, so MPI_Send blocks infinitely. Essentially, every process is waiting for its message to be accepted by the process with the lower rank (rank 2 is waiting for rank 1, rank 1 is waiting for rank 0), and rank 0 is not accepting any messages at all (it goes to the next block of code and in turn calls MPI_Send too), so everything just hangs.
It looks like you are missing communication part for the process with rank=0 (it should do something like MPI_Recv(from rank 1); ...; MPI_Send(to rank 1);.
Another thing is that you use MPI_Send with tag 1, but call MPI_Recv with tag 0. This won't couple. You need to use the same tag, or to specify MPI_TAG_ANY in the receive operation.

MPI error: expected expression before ‘,’ token

i have a strange error when using MPI_Send --i get this error when trying to send a portion of a bi-dimensional array (matrix): "MPI_matrixMultiplication.c:68:99: error: expected expression before ‘,’ token".
The specific line is the one where i try to send a portion if the matrix: MPI_Send(&a[beginPosition][0],... );
(and as you can see, i have commented the other send and receive related with the matrix.
/////////////////////////////////////////////////////////
// multiplication of 2 matrices, parallelized using MPI //
/////////////////////////////////////////////////////////
#include <stdio.h>
#include <mpi.h>
// must use #define here, and not simply int blahblahblah, because "c" doesnt like ints for array dimension :(
#define matrixARowSize 3 // size of the row for matrix A
#define matrixAColumnSize 3 // size of the column for matrix A
#define matrixBRowSize 3 // size of the row for matrix B
#define matrixBColumnSize 3 // size of the column for matrix B
// tags used for sending/receiving data:
#define LOWER_BOUND 1 // first line to be processed
#define UPPER_BOUND 2 // last line to be processed
#define DATA // data to be processed
int a[matrixARowSize][matrixAColumnSize]; // matrix a
int b[matrixBRowSize][matrixBColumnSize]; // matrix b
int c[matrixARowSize][matrixBColumnSize]; // matrix c
int main()
{
int currentProcess; // current process
int worldSize; // world size
int i, j, k; // iterators
int rowsComputedPerProcess; // how many rows of the first matrix should be computed in each process
int numberOfSlaveProcesses; // the number of slave processes
int processesUsed; //how many processes of the available ones are actually used
MPI_Init(NULL, NULL); // MPI_Init()
MPI_Comm_size(MPI_COMM_WORLD, &worldSize); // get the world size
MPI_Comm_rank(MPI_COMM_WORLD, &currentProcess); // get current process
numberOfSlaveProcesses = worldSize - 1; // 0 is the master, rest are slaves
rowsComputedPerProcess = worldSize > matrixARowSize ? 1 : (matrixARowSize/numberOfSlaveProcesses);
processesUsed = worldSize > matrixARowSize ? matrixARowSize : numberOfSlaveProcesses;
/*
* in the first process (the father);
* initialize the 2 matrices, then start splitting the data to the slave processes
*/
if (!currentProcess) // in father process
{
printf("rows per process: %d\n", rowsComputedPerProcess);
printf("nr of processes used: %d\n", processesUsed);
// init matrix A
for(i = 0; i < matrixARowSize; ++i)
for(j = 0; j < matrixAColumnSize; ++j){
a[i][j] = i + j + 1;
// printf("%d\n", a[i][j]);
// printf("%d\n", *(a[i] + j));
}
// init matrix B
for(i = 0; i < matrixBRowSize; ++i)
for(j = 0; j < matrixBColumnSize; ++j)
b[i][j] = i + j + 1;
// start sending data to the slaves for them to work >:)
int beginPosition; // auxiliary values used for sending the offsets to slaves
int endPosition;
for(i = 1; i < processesUsed; ++i) // the last process is dealt with separately
{
beginPosition = (i - 1)*rowsComputedPerProcess;
endPosition = i*rowsComputedPerProcess;
MPI_Send(&beginPosition, 1, MPI_INT, i, LOWER_BOUND, MPI_COMM_WORLD);
MPI_Send(&endPosition, 1, MPI_INT, i, UPPER_BOUND, MPI_COMM_WORLD);
MPI_Send(&a[beginPosition][0], ((endPosition - beginPosition)*matrixARowSize), MPI_INT, i, DATA, MPI_COMM_WORLD);
// MPI_Send(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, MPI_INT, i, DATA, MPI_COMM_WORLD);
// for(j = beginPosition; j < endPosition; ++j)
// for (k = 0; k < matrixAColumnSize; ++k)
// {
// printf("%d ", *(a[j] + k));
// }
// printf("\n");
// printf("beg: %d, end: %d\n", beginPosition, endPosition);
// printf(" data #%d\n", (endPosition - beginPosition)*matrixARowSize);
}
// deal with last process
beginPosition = (i - 1)*rowsComputedPerProcess;
endPosition = matrixARowSize;
MPI_Send(&beginPosition, 1, MPI_INT, i, LOWER_BOUND, MPI_COMM_WORLD);
MPI_Send(&endPosition, 1, MPI_INT, i, UPPER_BOUND, MPI_COMM_WORLD);
// MPI_Send(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, MPI_INT, i, DATA, MPI_COMM_WORLD);
// printf("beg: %d, end: %d\n", beginPosition, endPosition);
// printf(" data #%d\n", (endPosition - beginPosition)*matrixARowSize);
}
else { // if this is a slave (rank > 0)
int beginPosition; // auxiliary values used for sending the offsets to slaves
int endPosition;
MPI_Recv(&beginPosition, 1, MPI_INT, 0, LOWER_BOUND, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&endPosition, 1, MPI_INT, 0, UPPER_BOUND, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// MPI_Recv(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, 0, DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(i = beginPosition; i < endPosition; ++i) {
for (j = 0; j < matrixAColumnSize; ++j)
printf("(# %d, i=%d, j=%d: %d ", currentProcess, i, j, a[i][j]);
// printf("\n");
}
}
MPI_Finalize();
return 0; // bye-bye
}
Your DATA constant is empty.
#define DATA // data to be processed
So you're trying to do :
MPI_Send(&a[beginPosition][0], ((endPosition - beginPosition)*matrixARowSize), MPI_INT, i, , MPI_COMM_WORLD);
Which logically generates an expected expression before ',' token error.

Resources