Segmentation fault(signal 11) - c

I'm new to coding and started learning MPI through examples. After executing with mpirun -np 7 ./lab1part2 , I encountered this problem.
I read a few other threads with similar error but non of them seems to be an identical case to me. I'm guessing that I made some simple error syntax wise but i just can't find it. Thanks in advance for any helpful input!
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int Global_sum(int my_int, int my_rank, int comm_sz, MPI_Comm comm);
const int MAX_CONTRIB = 20;
int main(void) {
int i, sum, my_int;
int my_rank, comm_sz;
MPI_Comm comm;
int* all_ints = NULL;
MPI_Init(NULL, NULL);
comm = MPI_COMM_WORLD;
MPI_Comm_size(comm, &comm_sz);
MPI_Comm_rank(comm, &my_rank);
srandom(my_rank + 1);
my_int = random() % MAX_CONTRIB;
sum = Global_sum(my_int, my_rank, comm_sz, comm);
if ( my_rank == 0) {
all_ints = malloc(comm_sz*sizeof(int));
MPI_Reduce(&my_int, &all_ints, 1, MPI_INT, MPI_SUM, 0, comm);
printf("Ints being summed:\n ");
for (i = 0; i < comm_sz; i++)
printf("%d ", all_ints[i]);
printf("\n");
printf("Sum = %d\n",sum);
free(all_ints);
} else {
MPI_Reduce(&my_int, &all_ints, 1, MPI_INT, MPI_SUM, 0, comm);
}
MPI_Finalize();
return 0;
}
int Global_sum(
int my_int,
int my_rank,
int comm_sz,
MPI_Comm comm ) {
int partner, recvtemp;
int my_sum = my_int;
unsigned bitmask = 1;
while (bitmask < comm_sz) {
partner = my_rank ^ bitmask;
if (my_rank < partner) {
if (partner < comm_sz) {
MPI_Recv(&recvtemp, 1, MPI_INT, partner, 0, comm, MPI_STATUS_IGNORE);
my_sum += recvtemp;
}
bitmask <<= 1;
} else {
MPI_Send(&my_sum, 1, MPI_INT, partner, 0, comm);
break;
}
}
return my_sum;
}

Related

gather large sub Matrices in MPI in c

i wanna make a program that calculate matrix multiplication "large matrices 1000*1000 for example"
in master-worker paradigm and select to each worker number of rows it will work on
for example if i have 1000*1000 matrix and i got 4 processes
process 0 is master wont work on it
process 1 will get from 0 to 333
process 2 will get from 333 to 666
process 3 will get from 666 to 1000
simply here is my code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int **alloc(int rows, int cols)
{
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void free2D(int **mat)
{
free(mat[0]);
free(mat);
}
int main(int argc, char * argv[])
{
int my_rank; /* rank of process */
int p; /* number of process */
int source; /* rank of sender */
int dest; /* rank of reciever */
int tag = 0; /* tag for messages */
MPI_Status status; /* return status for */
MPI_Init( &argc, &argv );
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
int **mat1;
int **mat2;
int **mat3;
int n,r,m;
int i,j,k,sum;
int start[100];
int end[100];
int istart,iend;
int avg,extra,taken,temp;
if (my_rank==0)
{
n=1000;
r=1000;
m=1000;
mat1=alloc(n,r);
mat2=alloc(r,m);
mat3=alloc(n,m);
//found = (int *)malloc(n*m*sizeof(int));
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
avg=n/(p-1);
extra=n%(p-1);
temp=0;
for(i=0; i<(p-1); i++)
{
taken=(extra--<=0)? avg: avg+1;
start[i]=temp;
temp+=taken;
end[i]=temp;
}
for (source=1; source<p; source++)
{
MPI_Send(&start[source-1],1,MPI_INT,source,tag,MPI_COMM_WORLD);
MPI_Send(&end[source-1],1,MPI_INT,source,tag,MPI_COMM_WORLD);
}
for (i=0; i<n; i++)
{
for (j=0; j<r; j++)
{
mat1[i][j]=1;
MPI_Bcast(&(mat1[i][j]), 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
for (i=0; i<r; i++)
{
for (j=0; j<m; j++)
{
mat2[i][j]=5;
MPI_Bcast(&(mat2[i][j]), 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
///here i supposed to receive my sub matrices from worker
free2D(mat1);
free2D(mat2);
free2D(mat3);
}
else{
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Recv(&istart,1,MPI_INT,0,tag, MPI_COMM_WORLD,&status);
MPI_Recv(&iend,1,MPI_INT,0,tag, MPI_COMM_WORLD,&status);
mat1=alloc(n,r);
mat2=alloc(r,m);
mat3=alloc(iend-istart,m);
for (i=0; i<n; i++)
{
for (j=0; j<r; j++)
{
MPI_Bcast(&(mat1[i][j]), 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
for (i=0; i<r; i++)
{
for (j=0; j<m; j++)
{
MPI_Bcast(&(mat2[i][j]), 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
sum=0;
for (i=istart; i<iend; i++)
{
for (j=0; j<m; j++)
{
for(k=0; k<r; k++)
{
sum += mat1[i][k]*mat2[k][j];
}
// printf("%d %d\n",i,j);
mat3[i-istart][j]=sum;
sum = 0;
}
}
/// here i supposed to send sub matrices
//printMat(mat3,iend-istart,m);
free2D(mat1);
free2D(mat2);
free2D(mat3);
}
/* shutdown MPI */
MPI_Finalize();
return 0;
}
it works fine every worker calculate its sub matrix correctly but i don't know how to send them back to master and it takes alot of time if i used MPI_Send and MPI_Recv "as its point to point messaging"
Any idea how to gather them ? i'm new to MPI btw.

MPI sum of array receive working for only one rank

I am trying to find the sum of an array of length 100 elements using MPI, under the restrictions of only using MPI_Send and MPI_receive , the code that I have written finds the sum of each processor but during the re-send to the main processor(rank=0) my code only receives from one processor
My Code
#include "stdafx.h"
#include <stdio.h>
#include <string.h>
#include "mpi.h"
#include "math.h"
int val = 1;
int main(int argc, char* argv[]) {
int my_rank;
int p;
int ierr;
int i;
int a[100];
int q=0;
for (i = 0; i <100; i++)
{
a[i] = i+1;
}
int send,recv;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
int part = 100 /(p-1);
if (my_rank == 0)
{
for (i = 1; i < p; i++)
{
send = part * (i-1);
MPI_Send(&send, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
}
else
{
MPI_Recv(&recv, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
for (i = recv; i < recv + part; i++)
{
val = val+a[i];
}
printf("%d\n", val);
MPI_Send(&val, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
if (my_rank == 0)
{
MPI_Recv(&val, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
printf("%d", val);
q = q + val;
}
MPI_Finalize();
if (my_rank == 0)
{
printf("The output is %d\n", q);
}
return 0;
}
My output
where am i going wrong
Because you only recieve the result from one process. To recieve all results, iterate over process ranks:
if (my_rank == 0)
{
for (rank = 1; rank < proc_cnt; rank++)
{
MPI_Recv(&val, 1, MPI_INT, rank, 0, MPI_COMM_WORLD, &status);
printf("value of rank %d is %d", rank, val);
q = q + val;
}
}
Ordinarily, this a bad practice and may lead to deadlocks. Use mpi_gather() if allowed.

For some reason MPI_Waitall gets stuck (in a deadlock I believe) when I test my program with big numbers

For some reason MPI_Waitall is waiting forever when I enter 10000 as the length for the sequence. Basically I create 4 lists of length n/4 where in this case n is 10000 and I an using non-blocking send so my process 0 does not wait for each list to be sent separately as they do not share any values so they are not overwritten.
Keep in mind that the program works with smaller numbers like 1000 or 100 but I am not sure why it does not work with 10000+.
Here is my code:
#include "ore_header.h"
int main(int argc, char** argv) {
srand(time(NULL));
int my_rank, p;
void generate_sequence(int *arr, int n);
int subsequence_check(int *arr,int n, int m);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
int total;
int length;
int flag;
int seq_length;
MPI_Status stats[p];
MPI_Request reqs[p];
int p_length=0;
int *buf[p];
if (my_rank == 0) {
printf("Enter length and sequence length\n");
scanf("%d %d",&length, &seq_length);
p_length = length / p;
for (int i = 0; i < p; i++) {
buf[i] = (int*)malloc(p_length*sizeof(int));
generate_sequence(buf[i], p_length);
MPI_Isend(buf[i], p_length, MPI_INT, i, 0, MPI_COMM_WORLD, &reqs[i]);
printf("Data sent to process %d\n", i);
}
MPI_Waitall(p, reqs, stats); //Program wont go past this line
printf("\n\n Data sent to all processes \n\n");
}
MPI_Bcast(&p_length, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&seq_length, 1, MPI_INT, 0, MPI_COMM_WORLD);
buf[my_rank] = (int*)malloc(p_length*sizeof(int));
MPI_Recv(buf[my_rank], p_length, MPI_INT, 0, 0, MPI_COMM_WORLD, &stats[my_rank]);
printf("\nData received on process: %d Length: %d\n",my_rank,p_length);
//for (int i = 0; i < p_length; i++) {
// printf("%d",buf[my_rank][i]);
//}
//printf("\n");
total = subsequence_check(buf[my_rank],p_length,seq_length);
printf("\nI am process: %d\nTotal: %d\n",my_rank,total);
MPI_Finalize();
return (0);
}

unexpected deadlock in MPI

I hope someone can help me. My code:
void process(int myid, int numprocs)
{
int i,j, anzahl, rest;
MPI_Status stat;
meta = (int *)malloc(3 * sizeof(int));
if(myid == 0)
{
meta[0] = ASpalten;
meta[1] = AZeilen;
meta[2] = BSpalten;
for (i = 0; i < numprocs; i++) //masternode distributes matrix A to every single core
{
MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
printf("%d: debug04\n", myid);
MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug05\n", myid);
MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug06\n", myid);
}
}
else
{
MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug01\n", myid);
ASpalten = meta[0];
AZeilen = meta[1];
BSpalten=meta[2];
printf("%d: debug02\n", myid);
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug03\n", myid);
// printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
}
The Datatypes:
int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B
The program is supposed to multiply 2 matrices using MPI. My sample matrix proves that the code is likely valid and I also get this running for up to 130 * 90 matrices (maybe more maybe less), but anyway, as soon, as the number increases, I am getting a possible deadlock: the console prints out "debug4" and that's it. I would be very grateful, if anyone has a clue what is going wrong in my program. I already tried to use MPI_INTEGER instead of MPI_INT, but there is no difference. Any help would be appreciated. The output of the console when using very tiny matrices (PS, I already tried to execute my testcases in different orders too and modified existing ones):
Testcase1 MPI:
0: debug04
0: debug05
0: debug06
0: debug04
1: debug01
1: debug02
0: debug05
1: debug03
1: debugx1
0: debug06
0: debug04......
It seems that process 0 sends messages to proc 0 and proc 0 does receive them.
I modified to
for(i=1;i<numprocs;i++)
to remove the deadlock.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B
double *MR; //Matrix B
void process(int myid, int numprocs){
int i,j, anzahl, rest;
int TAG=0;
MPI_Status stat;
meta=(int*)malloc(3*sizeof(int));
if(myid == 0)
{meta[0]=ASpalten;
meta[1]=AZeilen;
meta[2]=BSpalten;
for (i=1; i<numprocs; i++)//masternode distributes matrix A to every single core
{
MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
printf("%d: debug04\n", myid);
MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug05\n", myid);
MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug06\n", myid);
}
}
else
{
MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug01\n", myid);
ASpalten=meta[0];
AZeilen=meta[1];
BSpalten=meta[2];
printf("%d: debug02\n", myid);
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug03\n", myid);
// printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
}
}
int main(int argc,char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
ASpalten=130;
AZeilen=90;
BSpalten=200;
if(rank==0){
}
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
process(rank,size);
MPI_Finalize();
return 0;
}
Bye,
Francis

Send multiple columns of a matrix using MPI_Send

I am trying to send multiple columns of "B" matrix to different processors from processor 0. I am trying to send use MPI_Send but its not working. Can someone pls help me?
For example : Size of square matrix B is 7.
In this way ,it should be distributed.
Processor 0: 3 columns
Processor 1 :2 columns
Processor 2: 2 columns
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#define ERR_BADORDER 255
#define TAG_INIT 31337
#define TAG_RESULT 42
#define DISP_MAXORDER 12
int mm(double *A, double *B, double *C, int n, int n1);
int rc(int rt,int rank, int size);
int main(int argc, char *argv[]) {
double *A, *B, *C,t,tt;
int n = 0, n0, n1, n2, i,ss,sts;
int rank = 0, size = 1,prev,next,k,z,jcol,ix=0,m,j;
MPI_Datatype column;
MPI_Request reqs[4];
MPI_Status stats[2];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (!rank) {
if (argc > 1) {
n = atoi(argv[1]);
}
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (!n) {
MPI_Finalize();
return 0;
}
n1 = rc(n, rank,size);
n0 = n * n1;
n2 = n * n;
A = (double *) malloc(sizeof(double) * (rank ? n0 : n2));
B = (double *) malloc(sizeof(double) * n2 );
C = (double *) malloc(sizeof(double) * (rank ? n0 : n2));
if (!rank) {
for (i=0; i<n2; i++) {
A[i] = 1.0;
B[i] = 1.0;
}
}
t = MPI_Wtime();
if (!rank) {
ss = n0;
for (i=1; i<size; i++) {
sts = n * rc(n, i, size);
MPI_Send(A + ss, sts, MPI_DOUBLE, i, TAG_INIT,
MPI_COMM_WORLD);
ss += sts;
}
}
else {
MPI_Recv(A, n0, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
MPI_Type_vector(n,1,n,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
if (!rank) {
for (i=1; i<size; i++) {
for(m=0;m<=i-1;m++)
ix+=rc(n,m,size);
ss=rc(n,i,size);
for(j=ix;j<ss+ix;j++)
MPI_Send(&B[j], 1, column, i, TAG_INIT, MPI_COMM_WORLD);
/* MPI_Send(&B[i+(n-1)*n], 1, column, i, TAG_INIT,
MPI_COMM_WORLD);*/
}
}
else {
printf("hello");
MPI_Recv(B, n, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
for (i=0; i<n0; i++) {
printf("Processor: %d and matrix %lf \n ",rank, B[i]);
}
for (i=0; i<n0; i++) {
C[i] = 0.0;
}
MPI_Finalize();
return 0;
}
int rc(int rt, int rank, int size) {
return (rt / size) + (rt % size > rank);
}
Please don't call the values with two-three letters, because I can't understand what do you want to do.
You can resolve the problem by different ways. When n = 7 and I have 3 process, I send 2 columns to each process different to the master 0.
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#define ERR_BADORDER 255
#define TAG_INIT 31337
#define TAG_RESULT 42
#define DISP_MAXORDER 12
int main(int argc, char *argv[]) {
double *B;
int n = 0;
int rank , size;
int i;
int columnToSend;
MPI_Datatype column;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (!rank)printf ("rank = %d , size = %d\n", rank, size);
if (!rank) {
if (argc > 1) {
n = atoi(argv[1]);
}
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (!n) {
printf ("n = %d!!\n", n);
MPI_Finalize();
return 0;
}
int offset = n%size;
int gap = n/size;
if (!rank)printf ("n = %d, offset = %d , gap = %d\n", n, offset, gap);
MPI_Type_vector(n,gap,n,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
B = (double *) malloc(sizeof(double) * n*n );
for (i = 0 ; i < n * n ; i++) {
B[i] = -1.0;
}
if (!rank) {
for (i = 0 ; i < n * n ; i++) {
B[i] = i;//<----- I put i instead one
}
for (i=1; i < size; i++) {
columnToSend = gap *i + offset;
printf ("columnToSend = %d to i = %d \n", columnToSend, i);
MPI_Send(&B[columnToSend], 1, column, i, TAG_INIT, MPI_COMM_WORLD);
}
}
if (rank) {
printf ("in rank = %d n*gap = %d \n", rank, n*gap);
MPI_Recv(B, n*gap, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (i=0; i < n*gap; i++) {
printf("Processor: %d and matrix %lf \n ",rank, B[i]);
}
}
MPI_Finalize();
return 0;
}

Resources