I hope someone can help me. My code:
void process(int myid, int numprocs)
{
int i,j, anzahl, rest;
MPI_Status stat;
meta = (int *)malloc(3 * sizeof(int));
if(myid == 0)
{
meta[0] = ASpalten;
meta[1] = AZeilen;
meta[2] = BSpalten;
for (i = 0; i < numprocs; i++) //masternode distributes matrix A to every single core
{
MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
printf("%d: debug04\n", myid);
MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug05\n", myid);
MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug06\n", myid);
}
}
else
{
MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug01\n", myid);
ASpalten = meta[0];
AZeilen = meta[1];
BSpalten=meta[2];
printf("%d: debug02\n", myid);
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug03\n", myid);
// printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
}
The Datatypes:
int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B
The program is supposed to multiply 2 matrices using MPI. My sample matrix proves that the code is likely valid and I also get this running for up to 130 * 90 matrices (maybe more maybe less), but anyway, as soon, as the number increases, I am getting a possible deadlock: the console prints out "debug4" and that's it. I would be very grateful, if anyone has a clue what is going wrong in my program. I already tried to use MPI_INTEGER instead of MPI_INT, but there is no difference. Any help would be appreciated. The output of the console when using very tiny matrices (PS, I already tried to execute my testcases in different orders too and modified existing ones):
Testcase1 MPI:
0: debug04
0: debug05
0: debug06
0: debug04
1: debug01
1: debug02
0: debug05
1: debug03
1: debugx1
0: debug06
0: debug04......
It seems that process 0 sends messages to proc 0 and proc 0 does receive them.
I modified to
for(i=1;i<numprocs;i++)
to remove the deadlock.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
int ASpalten;
int AZeilen;
int BSpalten;
int *meta; //used to transfer meta data in 1 send
double *MA; //Matrix A
double *MB; //Matrix B
double *MR; //Matrix B
void process(int myid, int numprocs){
int i,j, anzahl, rest;
int TAG=0;
MPI_Status stat;
meta=(int*)malloc(3*sizeof(int));
if(myid == 0)
{meta[0]=ASpalten;
meta[1]=AZeilen;
meta[2]=BSpalten;
for (i=1; i<numprocs; i++)//masternode distributes matrix A to every single core
{
MPI_Send(&meta[0], 3, MPI_INT, i, TAG, MPI_COMM_WORLD);
printf("%d: debug04\n", myid);
MPI_Send(&MA[0], ASpalten*AZeilen, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug05\n", myid);
MPI_Send(&MB[0], ASpalten*BSpalten, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
printf("%d: debug06\n", myid);
}
}
else
{
MPI_Recv(meta, 3, MPI_INT, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug01\n", myid);
ASpalten=meta[0];
AZeilen=meta[1];
BSpalten=meta[2];
printf("%d: debug02\n", myid);
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
MPI_Recv(MA, ASpalten*AZeilen, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
MPI_Recv(MB, ASpalten*BSpalten, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, &stat);
printf("%d: debug03\n", myid);
// printf("%d: %f\n", myid, *(MA + _index(1, 1, ASpalten))); //funktioniert
}
}
int main(int argc,char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
ASpalten=130;
AZeilen=90;
BSpalten=200;
if(rank==0){
}
MA = (double*)malloc(ASpalten*AZeilen*sizeof(double));
MB = (double*)malloc(ASpalten*BSpalten*sizeof(double));
MR = (double*)malloc(AZeilen*BSpalten*sizeof(double));
process(rank,size);
MPI_Finalize();
return 0;
}
Bye,
Francis
Related
I have been trying to write a cannon's implementation of the Matrix Multiplication. Using MPI CVT, I am sending a few buffers using MPI_Isend and MPI_Irecv. The program does not proceed during this send recv. I have tried using MPI_Waitall at different places but still, some of the processes stop responding. How do I fix this? Using MPI_Send and MPI_Recv gives the same error. However, without the initial shift, the cannon's algorithm loop works fine. It's just the initial shift that is having issues.
'''
#include "stdio.h"
#include "stdlib.h"
#include "mpi.h"
#include "math.h"
#define sz 9
#define UP 0
#define DOWN 1
#define LEFT 2
#define RIGHT 3
void mat_init(int a[sz][sz],int b[sz][sz]){
int i,j;
for(i=0;i<sz;i++){
for(j=0;j<sz;j++){
a[i][j]=i+j;
b[i][j]=i*j;}}
}
void mat_mult(int n, int a[n][n],int b[n][n], int c[n][n]){
int i,j,k;
for(i=0;i<n;i++){
for(j=0;j<n;j++){
for(k=0;k<n;k++){
c[i][j] +=a[i][k]*b[k][j];}}}
}
void print_mat(int n, int a[n][n]){
int i,j;
for(i=0;i<n;i++){
for(j=0;j<n;j++){
printf("%d\t",a[i][j]);}printf("\n");}
}
int main(char *argv[],int argc)
{
int comm_sz,rank;
int i,j,k,n=3;
int a[sz][sz],b[sz][sz],c[sz][sz];
int a1[n][n],b1[n][n],c1[n][n];
int nbrs[4];
int dim[2];
int n_dims=2;
int periods[2]={1,1};
int reorder = 0;
int coords[2];
int dir;
int dis;
mat_init(a,b);
MPI_Status stats[9];
MPI_Comm cartcomm; // Cartesian Communicator
MPI_Init(NULL,NULL); // Initialize MPI
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); // Initialize Processors
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Processors Rank
if(rank==0){
printf("A=\n");
print_mat(sz,a);
printf("B=\n");
print_mat(sz,b);
}
dim[0] = sqrt(comm_sz);
dim[1] = sqrt(comm_sz);
dir = 0;
dis = 1;
MPI_Cart_create(MPI_COMM_WORLD, n_dims,dim,periods,reorder,&cartcomm);
MPI_Cart_coords(cartcomm,rank,2,coords);
MPI_Cart_shift(cartcomm, 0, dis, &nbrs[UP], &nbrs[DOWN]);
MPI_Cart_shift(cartcomm, 1, dis, &nbrs[LEFT], &nbrs[RIGHT]);
//Discretization
for(i=0;i<n;i++)
for(j=0;j<n;j++){
a1[i][j] = a[coords[0]*n+i][coords[1]*n+j];
b1[i][j] = b[coords[0]*n+i][coords[1]*n+j];
c1[i][j] = 0;
}
MPI_Request reqs[4];
MPI_Request shiftreq[10];
// Accounting initial shift
for(int t=0;t<coords[1];t++){
MPI_Isend(&a1, n*n, MPI_INT, nbrs[RIGHT], 0, MPI_COMM_WORLD, &reqs[0]);
MPI_Irecv(&a1, n*n, MPI_INT, nbrs[LEFT], 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
}
for(int t=0;t<coords[0];t++){
MPI_Isend(&b1, n*n, MPI_INT, nbrs[DOWN], 0, MPI_COMM_WORLD, &reqs[0]);
MPI_Irecv(&b1, n*n, MPI_INT, nbrs[UP], 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
}
for(i=0;i<n;i++){ // Cannon's Algorithm
mat_mult(n,a1,b1,c1);
//Shiftup
MPI_Isend(&b1, n*n, MPI_INT, nbrs[UP], 0, MPI_COMM_WORLD, &reqs[0]);
MPI_Irecv(&b1, n*n, MPI_INT, nbrs[DOWN], 0, MPI_COMM_WORLD, &reqs[1]);
//Shiftleft
MPI_Isend(&a1, n*n, MPI_INT, nbrs[LEFT], 0, MPI_COMM_WORLD, &reqs[2]);
MPI_Irecv(&a1, n*n, MPI_INT, nbrs[RIGHT], 0, MPI_COMM_WORLD, &reqs[3]);
MPI_Waitall(4, reqs, MPI_STATUSES_IGNORE);
}
MPI_Send(&c1,n*n,MPI_INT,0,0,MPI_COMM_WORLD);
MPI_Send(&coords[0],1,MPI_INT,0,1,MPI_COMM_WORLD);
MPI_Send(&coords[1],1,MPI_INT,0,2,MPI_COMM_WORLD);
if(rank==0){
int drds[2];
for(int t=0;t<comm_sz;t++){
MPI_Recv(&c1,n*n,MPI_INT,t,0,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&drds[0],1,MPI_INT,t,1,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&drds[1],1,MPI_INT,t,2,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(i=0;i<n;i++){
for(j=0;j<n;j++){
c[n*drds[0]+i][n*drds[1]+j] = c1[i][j];}}}
printf("\nMatrix C:\n");
for(i=0;i<sz;i++){
for(j=0;j<sz;j++){
printf("%d\t",c[i][j]);}printf("\n");}}
MPI_Finalize();
}
'''
I have tried out many things and have found out that this is mostly an issue with sending and receiving to the neighbours. The algorithm takes care that for every send there exists a recv.
The exercise I'm trying to do asks us to build a ring of processes where each of them passes the number x to the following process, only the process with rank=0 decreases the x. When x is equal to zero the program ends.
The working code is the following:
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include "mpi.h"
int main(int argc, char** argv) {
int x = 0;
int tag = 99;
MPI_Status status;
MPI_Init(&argc, &argv);
int myRank;
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (myRank == 0) {
printf("Process %d enter a value: \n", myRank);
fflush(stdout);
scanf_s("%d", &x);
while (x>0) {
MPI_Send(&x, 1, MPI_INT, 1, tag, MPI_COMM_WORLD);
MPI_Recv(&x, 1, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status);
printf("Process %d : lap number %d \n", myRank, x);
x--;
}
}
else {
do {
MPI_Recv(&x, 1, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status);
printf("Process %d : lap number %d \n", myRank, x);
MPI_Send(&x, 1, MPI_INT, (myRank + 1) % size, tag, MPI_COMM_WORLD);
} while (x > 1);
}
MPI_Finalize();
return 0;
}
but if i change the last do while cycle like this:
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include "mpi.h"
int main(int argc, char** argv) {
int x = 0;
int tag = 99;
MPI_Status status;
MPI_Init(&argc, &argv);
int myRank;
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (myRank == 0) {
printf("Process %d enter a value: \n", myRank);
fflush(stdout);
scanf_s("%d", &x);
while (x>0) {
MPI_Send(&x, 1, MPI_INT, 1, tag, MPI_COMM_WORLD);
MPI_Recv(&x, 1, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status);
printf("Processo %d : giro numero %d \n", myRank, x);
x--;
}
}
else {
while (x>0) {
MPI_Recv(&x, 1, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status);
printf("Processo %d : giro numero %d \n", myRank, x);
MPI_Send(&x, 1, MPI_INT, (myRank + 1) % size, tag, MPI_COMM_WORLD);
}
}
MPI_Finalize();
return 0;
}
I get this error:
job aborted:
[ranks] message
[0] fatal error
Fatal error in MPI_Send: Other MPI error, error stack:
MPI_Send(buf=0x000000F6100FF514, count=1, MPI_INT, dest=1, tag=99, MPI_COMM_WORLD) failed
failed to attach to a bootstrap queue - 10616:296
[1-2] terminated
But I don't understand why... Shouldn't the two codes be equivalent?
You set x to zero, and then all processes but zero do while (x>0) { stuff }. So they don't do anything.
#include <stdio.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
int rank, value, size,count;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
count=2*size-1;
while(count>0){
if (rank==0) {
// fprintf(stderr, "\nPlease give new value=");
printf("please input value= ");
scanf("%d",&value);
// fprintf(stderr, "%d read <-<- (%d)\n",rank,value);
printf("%d read <-<- (%d)\n",rank,value);
count-=1;
if (size>1) {
MPI_Send(&value, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
// fprintf(stderr, "%d send (%d)->-> %d\n", rank,value,rank+1);
printf("%d send (%d)->-> %d\n",rank,value,rank+1);
count-=1;
}
}
else {
MPI_Recv(&value, 1, MPI_INT, rank-1, 0, MPI_COMM_WORLD, &status);
// fprintf(stderr, "%d receive(%d)<-<- %d\n",rank, value, rank-1);
printf("%d receive(%d)<-<- %d\n",rank, value, rank-1);
count-=1;
if (rank<size-1) {
MPI_Send(&value, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD);
fprintf(stderr, "%d send (%d)->-> %d\n", rank, value, rank+1);
count-=1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
MPI_Finalize();
}
The function of this program is to pass numbers between processes.
Now I open two processes that pass the number 4
But the 0 process ran twice, which is not as expected.
Then I used gdb to debug
This has been bothering me for a long time, and I'm not very good at watching variables from the command line. Please help me.
TL;DR : It runs twice because the while loop is executed two times.
But the 0 process ran twice, which is not as expected.
You have the impression that the process 0 runs twice because the count variable right before entering the while loop has the value of 3 from count=2*size-1; (size is 2 because you are running with 2 processes).
In your loop:
while(count>0){
if (rank==0) {
...
count-=1;
if (size>1) {
...
count-=1;
}
}
else {
...
count-=1;
if (rank<size-1) {
...
count-=1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
The count variable is decremented twice for the (process 0), so count is 1, and since the while loop condition is count>0 it is again executed before exiting. Thus, process 0 "runs again".
The process 0 decrements the count variable twice whereas the process 1 only once, so most likely it is a bug. You can run into situations where the process 1 blocks waiting to receive a message from the process 0, but process 0 is already outside the loop.
To test the send and receive of messages from the process 0 try the following:
int main(int argc, char *argv[])
{
int rank, value, size,count;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank==0)
{
printf("please input value= ");
scanf("%d",&value);
for(int i = 1; i < size; i++){
MPI_Send(&value, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
printf("%d send (%d)->-> %d\n",rank, value, i);
}
}
else
{
MPI_Recv(&value, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
printf("%d receive(%d)<-<- %d\n",rank, value, rank-1);
}
MPI_Finalize();
}
Process 0 send a value to all the remaining processes:
for(int i = 1; i < size; i++){
MPI_Send(&value, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
printf("%d send (%d)->-> %d\n",rank, value, i);
}
and all the remaining processes receive a message from the process 0:
MPI_Recv(&value, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
int main(int argc, char **argv)
{
int N;
scanf("%d", &N);
double *a = (double *)malloc(N * sizeof(double));
int i, rank, size, tag = 99, tag1 = 100;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0)
{
for(int j=0;j<N;++j)
{
a[j] = j+0.1;
}
for (i = 1; i < size; i++)
{
MPI_Send(&N, 1, MPI_INT, i, tag1, MPI_COMM_WORLD);
MPI_Send(a, N, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
}
}
else
{
MPI_Recv(&N, 1, MPI_INT, 0, tag1, MPI_COMM_WORLD, &status);
MPI_Recv(a, N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
// for(int j=0;j<N*2;++j)
// printf("%d %f\n", rank, a[j]);
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Message from process %d : %f\n", rank, a[rank]);
MPI_Finalize();
return 0;
}
I'm creating the array 'a' in 0th process and sending it to remaining proccesses. But I'm getting the following error upon doing this.
[nikhil:8599] *** An error occurred in MPI_Recv
[nikhil:8599] *** reported by process [4228579329,1]
[nikhil:8599] *** on communicator MPI_COMM_WORLD
[nikhil:8599] *** MPI_ERR_BUFFER: invalid buffer pointer
[nikhil:8599] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[nikhil:8599] *** and potentially your MPI job)
[nikhil:08593] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[nikhil:08593] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
Can anybody explain why I'm getting this error?
As you can see in the code there's a for loop containing a print statement which is commented. The weird thing is... upon uncommenting that loop. It's working fine.
Thoughts:
MPI_Init should be the first thing in your program.
Only one rank should scanf.
N is not communicated across ranks, so you are allocating memory of undefined size.
Define variables as close to their point of usage as possible. Putting int i at the top of your function is a disaster waiting to happen.
The barrier at the end is unnecessary.
All the ranks need to allocate their own memory.
That gets us to this code:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
int main(int argc, char **argv){
MPI_Init(&argc, &argv);
const int tag = 99;
const int tag1 = 100;
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
double *a; //Pointer to the memory we will allocate
int N;
if (rank == 0){
scanf("%d", &N);
a = (double *)malloc(N * sizeof(double));
for(int j=0;j<N;++j){
a[j] = j+0.1;
}
for (int i = 1; i < size; i++){
MPI_Send(&N, 1, MPI_INT, i, tag1, MPI_COMM_WORLD);
MPI_Send(a, N, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
}
} else {
MPI_Status status;
MPI_Recv(&N, 1, MPI_INT, 0, tag1, MPI_COMM_WORLD, &status);
//Have to allocate memory on all ranks
a = (double *)malloc(N * sizeof(double));
MPI_Recv(a, N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
// for(int j=0;j<N*2;++j)
// printf("%d %f\n", rank, a[j]);
}
printf("Message from process %d : %f\n", rank, a[rank]);
MPI_Finalize();
return 0;
}
Doing it better
The broadcast command is your friend here:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define MPI_Error_Check(x) {const int err=x; if(x!=MPI_SUCCESS) { fprintf(stderr, "MPI ERROR %d at %d.", err, __LINE__);}}
int main(int argc, char **argv){
MPI_Init(&argc, &argv);
int rank, size;
MPI_Error_Check(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
MPI_Error_Check(MPI_Comm_size(MPI_COMM_WORLD, &size));
int N;
if (rank==0){
scanf("%d", &N);
}
MPI_Error_Check(MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD));
double *a = (double *)malloc(N * sizeof(double));
if(rank==0){
for(int j=0;j<N;++j){
a[j] = j+0.1;
}
}
printf("Message from process %d : N=%d\n", rank, N);
MPI_Error_Check(MPI_Bcast(a, N, MPI_DOUBLE, 0, MPI_COMM_WORLD));
fprintf(stderr, "Message from process %d : %f\n", rank, a[rank]);
free(a);
MPI_Finalize();
return 0;
}
Doing It Even Better
The fastest form of communication is no communication at all. In your case, once the value N is known each rank can recreate the data on its own:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define MPI_Error_Check(x) {const int err=x; if(x!=MPI_SUCCESS) { fprintf(stderr, "MPI ERROR %d at %d.", err, __LINE__);}}
int main(int argc, char **argv){
MPI_Init(&argc, &argv);
int rank, size;
MPI_Error_Check(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
MPI_Error_Check(MPI_Comm_size(MPI_COMM_WORLD, &size));
int N;
if (rank==0){
scanf("%d", &N);
}
MPI_Error_Check(MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD));
double *a = (double *)malloc(N * sizeof(double));
for(int j=0;j<N;++j){
a[j] = j+0.1;
}
printf("Message from process %d : N=%d\n", rank, N);
fprintf(stderr, "Message from process %d : %f\n", rank, a[rank]);
free(a);
MPI_Finalize();
return 0;
}
I am trying to find the sum of an array of length 100 elements using MPI, under the restrictions of only using MPI_Send and MPI_receive , the code that I have written finds the sum of each processor but during the re-send to the main processor(rank=0) my code only receives from one processor
My Code
#include "stdafx.h"
#include <stdio.h>
#include <string.h>
#include "mpi.h"
#include "math.h"
int val = 1;
int main(int argc, char* argv[]) {
int my_rank;
int p;
int ierr;
int i;
int a[100];
int q=0;
for (i = 0; i <100; i++)
{
a[i] = i+1;
}
int send,recv;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
int part = 100 /(p-1);
if (my_rank == 0)
{
for (i = 1; i < p; i++)
{
send = part * (i-1);
MPI_Send(&send, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
}
else
{
MPI_Recv(&recv, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
for (i = recv; i < recv + part; i++)
{
val = val+a[i];
}
printf("%d\n", val);
MPI_Send(&val, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
if (my_rank == 0)
{
MPI_Recv(&val, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
printf("%d", val);
q = q + val;
}
MPI_Finalize();
if (my_rank == 0)
{
printf("The output is %d\n", q);
}
return 0;
}
My output
where am i going wrong
Because you only recieve the result from one process. To recieve all results, iterate over process ranks:
if (my_rank == 0)
{
for (rank = 1; rank < proc_cnt; rank++)
{
MPI_Recv(&val, 1, MPI_INT, rank, 0, MPI_COMM_WORLD, &status);
printf("value of rank %d is %d", rank, val);
q = q + val;
}
}
Ordinarily, this a bad practice and may lead to deadlocks. Use mpi_gather() if allowed.