I have a code below
#include <stdio.h>
#include "mpi.h"
#define NRA 512 /* number of rows in matrix A */
#define NCA 512 /* number of columns in matrix A */
#define NCB 512 /* number of columns in matrix B */
#define MASTER 0 /* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 2 /* setting a message type */
MPI_Status status;
double a[NRA][NCA], /* matrix A to be multiplied */
b[NCA][NCB], /* matrix B to be multiplied */
c[NRA][NCB]; /* result matrix C */
main(int argc, char **argv)
{
int numtasks, /* number of tasks in partition */
taskid, /* a task identifier */
numworkers, /* number of worker tasks */
source, /* task id of message source */
dest, /* task id of message destination */
nbytes, /* number of bytes in message */
mtype, /* message type */
intsize, /* size of an integer in bytes */
dbsize, /* size of a double float in bytes */
rows, /* rows of matrix A sent to each worker */
averow, extra, offset, /* used to determine rows sent to each worker */
i, j, k, /* misc */
count;
struct timeval start, stop;
intsize = sizeof(int);
dbsize = sizeof(double);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
numworkers = numtasks-1;
//printf(" size of matrix A = %d by %d\n",NRA,NCA);
//printf(" size of matrix B = %d by %d\n",NRA,NCB);
/*---------------------------- master ----------------------------*/
if (taskid == MASTER) {
printf("Number of worker tasks = %d\n",numworkers);
for (i=0; i<NRA; i++)
for (j=0; j<NCA; j++)
a[i][j]= i+j;
for (i=0; i<NCA; i++)
for (j=0; j<NCB; j++)
b[i][j]= i*j;
gettimeofday(&start, 0);
/* send matrix data to the worker tasks */
averow = NRA/numworkers;
extra = NRA%numworkers;
offset = 0;
mtype = FROM_MASTER;
for (dest=1; dest<=numworkers; dest++) {
rows = (dest <= extra) ? averow+1 : averow;
//printf(" Sending %d rows to task %d\n",rows,dest);
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
count = rows*NCA;
MPI_Send(&a[offset][0], count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
count = NCA*NCB;
MPI_Send(&b, count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
offset = offset + rows;
}
/* wait for results from all worker tasks */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
count = rows*NCB;
MPI_Recv(&c[offset][0], count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD,
&status);
}
#ifdef PRINT
printf("Here is the result matrix\n");
for (i=0; i<NRA; i++) {
printf("\n");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i][j]);
}
printf ("\n");
#endif
gettimeofday(&stop, 0);
fprintf(stdout,"Time = %.6f\n\n",
(stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));
} /* end of master section */
/*---------------------------- worker (slave)----------------------------*/
if (taskid > MASTER) {
mtype = FROM_MASTER;
source = MASTER;
#ifdef PRINT
printf ("Master =%d, mtype=%d\n", source, mtype);
#endif
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("offset =%d\n", offset);
#endif
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("row =%d\n", rows);
#endif
count = rows*NCA;
MPI_Recv(&a, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("a[0][0] =%e\n", a[0][0]);
#endif
count = NCA*NCB;
MPI_Recv(&b, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("b=\n");
#endif
for (k=0; k<NCB; k++)
for (i=0; i<rows; i++) {
c[i][k] = 0.0;
for (j=0; j<NCA; j++)
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
//mtype = FROM_WORKER;
#ifdef PRINT
printf ("after computer\n");
#endif
//MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, FROM_WORKER, MPI_COMM_WORLD);
#ifdef PRINT
printf ("after send\n");
#endif
} /* end of worker */
MPI_Finalize();
} /* end of main */
when i try to compile it, the warning was :
matriks.c(43): error C2079: 'start' uses undefined struct 'timeval'
matriks.c(43): error C2079: 'stop' uses undefined struct 'timeval'
matriks.c(65): warning C4013: 'gettimeofday' undefined; assuming extern returning int
matriks.c(111): error C2224: left of '.tv_sec' must have struct/union type
matriks.c(111): error C2224: left of '.tv_usec' must have struct/union type
matriks.c(111): error C2224: left of '.tv_sec' must have struct/union type
matriks.c(111): error C2224: left of '.tv_usec' must have struct/union type
Please help, I don't know where the error is. Thank you.
I think you'll probably find that timeval needs you to include sys/time.h under POSIX systems (it's not standard C). See the POSIX SUSv2 page for details.
Related
I have this c code that calculates polynomial arrays, where I'm trying to run it from a cluster using MPI.
int main(int argc, char **argv)
{
int id;
int n;
int i, size, arraySize;
double *vet, valor, *vresp, resposta, tempo, a[GRAU + 1];
int hostsize;
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Get_processor_name(hostname, &hostsize);
MPI_Comm_rank(MPI_COMM_WORLD, &id);
MPI_Comm_size(MPI_COMM_WORLD, &n);
if (id == 0) // Master
{
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&a, GRAU, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (size = TAM_INI; size <= TAM_MAX; size += TAM_INC)
{
tempo = -MPI_Wtime();
for (int dest = 1; dest < n; ++dest)
{
int ini = 0;
int fim = dest * size / (n - 1);
int tam = fim - ini;
MPI_Send(&ini, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&tam, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&x[ini], tam, MPI_DOUBLE, dest, 0, MPI_COMM_WORLD);
ini = fim;
fflush(stdout);
}
int total = 0;
for (int dest = 1; dest < n; ++dest)
{
int ini_escravo;
int tam_escravo;
MPI_Recv(&ini_escravo, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&tam_escravo, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&y[ini_escravo], tam_escravo, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
tempo += MPI_Wtime();
}
}
else
{ // Slave
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&a, GRAU, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (arraySize = TAM_INI; arraySize <= TAM_MAX; arraySize += TAM_INC)
{
int ini, tam;
MPI_Recv(&ini, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&tam, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&x[0], tam, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
for (i = 0; i < tam; ++i)
y[i] = polinomio(a, GRAU, x[i]);
MPI_Send(&ini, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&tam, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&y[0], tam, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
fflush(stdout);
}
}
MPI_Finalize();
return 0;
}
The code works fine when I run using 16 tasks or less per node. If I try to run using 32 tasks (16 per node, with 2 nodes), I get the following message:
[06:272259] *** An error occurred in MPI_Recv [06:272259] *** reported
by process [2965045249,0] [06:272259] *** on communicator
MPI_COMM_WORLD [06:272259] *** MPI_ERR_TRUNCATE: message truncated
[06:272259] *** MPI_ERRORS_ARE_FATAL (processes in this communicator
will now abort, [06:272259] *** and potentially your MPI job)
[07][[45243,1],31][btl_tcp.c:559:mca_btl_tcp_recv_blocking] recv(20)
failed: Connection reset by peer (104)
Any idea about what I am missing here?
I want to perform matrix multiplication. I have to write two codes one with MPI blocking and other with MPI non blocking. I have done with MPI blocking. I want some help to convert below code into MPI non blocking.
This is the code of matrix multiplication with Blocking and i want to convert it into MPI non blocking. If anyone is available then Please respond..
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
#include <time.h>
#include <sys/time.h>
// Number of rows and columnns in a matrix
#define N 4
MPI_Status status;
// Matrix holders are created
double matrix_a[N][N],matrix_b[N][N],matrix_c[N][N];
int main(int argc, char **argv)
{
int processCount, processId, slaveTaskCount, source, dest, rows, offset;
struct timeval start, stop;
// MPI environment is initialized
MPI_Init(&argc, &argv);
// Each process gets unique ID (rank)
MPI_Comm_rank(MPI_COMM_WORLD, &processId);
// Number of processes in communicator will be assigned to variable -> processCount
MPI_Comm_size(MPI_COMM_WORLD, &processCount);
// Number of slave tasks will be assigned to variable -> slaveTaskCount
slaveTaskCount = processCount - 1;
// Root (Master) process
if (processId == 0) {
// Matrix A and Matrix B both will be filled with random numbers
srand ( time(NULL) );
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
matrix_a[i][j]= rand()%10;
matrix_b[i][j]= rand()%10;
}
}
printf("\n\t\tMatrix - Matrix Multiplication using MPI\n");
// Print Matrix A
printf("\nMatrix A\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_a[i][j]);
}
printf("\n");
}
// Print Matrix B
printf("\nMatrix B\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_b[i][j]);
}
printf("\n");
}
rows = N/slaveTaskCount;
offset = 0;
for (dest=1; dest <= slaveTaskCount; dest++)
{
// Acknowledging the offset of the Matrix A
MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Acknowledging the number of rows
MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Send rows of the Matrix A which will be assigned to slave process to compute
MPI_Send(&matrix_a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);
// Matrix B is sent
MPI_Send(&matrix_b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
// Offset is modified according to number of rows sent to each process
offset = offset + rows;
}
for (int i = 1; i <= slaveTaskCount; i++)
{
source = i;
// Receive the offset of particular slave process
MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Receive the number of rows that each slave process processed
MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Calculated rows of the each process will be stored int Matrix C according to their offset and
// the processed number of rows
MPI_Recv(&matrix_c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
}
// Print the result matrix
printf("\nResult Matrix C = Matrix A * Matrix B:\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++)
printf("%.0f\t", matrix_c[i][j]);
printf ("\n");
}
printf ("\n");
}
// Slave Processes
if (processId > 0) {
// Source process ID is defined
source = 0;
MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives number of rows sent by root process
MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the sub portion of the Matrix A which assigned by Root
MPI_Recv(&matrix_a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the Matrix B
MPI_Recv(&matrix_b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// Matrix multiplication
for (int k = 0; k<N; k++) {
for (int i = 0; i<rows; i++) {
// Set initial value of the row summataion
matrix_c[i][k] = 0.0;
// Matrix A's element(i, j) will be multiplied with Matrix B's element(j, k)
for (int j = 0; j<N; j++)
matrix_c[i][k] = matrix_c[i][k] + matrix_a[i][j] * matrix_b[j][k];
}
}
// value in matrix C
MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Number of rows the process calculated will be sent to root process
MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Resulting matrix with calculated rows will be sent to root process
MPI_Send(&matrix_c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
MPI_Finalize();
}
Look at non-blocking this way: instead of spelling out "now I send this, now you receive that", you decide in a stage of the computation: "what are all the messages that will be communicated here". Then you do an Isend for all the sends, and Irecv for all the corresponding receives. And then wait for all the resulting requests.
One problem is that each of these Isend/Irecv operations need their own buffer, so you may need to allocate some more memory.
I am working on a program that uses MPI_Send() and MPI_Recv() to replace MPI_Reduce().
I get everything to run except the final bits of the code where it gives the PI approximate, error and runtime. I also don't get the correct sum value after receiving.
I believe there is something going wrong on the MPI_Recv() end but I could be wrong. I am only using 2 processors while running this. The program works fine without PI initialized to a value when using MPI_Reduce.
#include "mpi.h"
#include <stdio.h>
#include <math.h>
int main( int argc, char *argv[])
{
int n, i;
double PI25DT = 3.141592653589793238462643;
double pi, h, sum, x;
int size, rank;
double startTime, endTime;
/* Initialize MPI and get number of processes and my number or rank*/
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
/* Processor zero sets the number of intervals and starts its clock*/
if (rank==0)
{
n=600000000;
startTime=MPI_Wtime();
for (int i = 0; i < size; i++) {
if (i != rank) {
MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
}
}
/* Broadcast number of intervals to all processes */
else
{
MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
/* Calculate the width of intervals */
h = 1.0 / (double) n;
/* Initialize sum */
sum = 0.0;
/* Step over each inteval I own */
for (i = rank+1; i <= n; i += size)
{
/* Calculate midpoint of interval */
x = h * ((double)i - 0.5);
/* Add rectangle's area = height*width = f(x)*h */
sum += (4.0/(1.0+x*x))*h;
}
/* Get sum total on processor zero */
//MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
if (rank == 0)
{
double total_sum = 0;
for (int i = 0; i < size; i++)
{
MPI_Recv(&sum, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
total_sum += sum;
printf("Total Sum is %lf\n", total_sum);
}
}
/* Print approximate value of pi and runtime*/
if (rank==0)
{
printf("pi is approximately %.16f, Error is %e\n",
pi, fabs(pi - PI25DT));
endTime=MPI_Wtime();
printf("runtime is=%.16f",endTime-startTime);
}
MPI_Finalize();
return 0;
}
This
MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
^^^^^^^
and
MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
^^^^^^^
are wrong the MPI_Send and MPI_Recv expect as third parameter MPI_Datatype not a MPI_OP (i.e., MPI_SUM).
But looking at your code what you actually want to do is to replace those calls by:
double pi = sum;
if (myid != 0) {
MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
else {
for (int i = 1; i < numprocs; i++) {
MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
pi += value;
}
}
To replace the behavior of the MPI_Reduce.
A running example:
#include "mpi.h"
#include <stdio.h>
#include <math.h>
int main( int argc, char *argv[])
{
int n, i;
double PI25DT = 3.141592653589793238462643;
double h, sum, x;
int numprocs, myid;
double startTime, endTime;
/* Initialize MPI and get number of processes and my number or rank*/
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
/* Processor zero sets the number of intervals and starts its clock*/
if (myid==0) {
n=600000000;
startTime=MPI_Wtime();
for (int i = 0; i < numprocs; i++) {
if (i != myid) {
MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
}
}
else {
MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
/* Calculate the width of intervals */
h = 1.0 / (double) n;
/* Initialize sum */
sum = 0.0;
/* Step over each inteval I own */
for (i = myid+1; i <= n; i += numprocs) {
/* Calculate midpoint of interval */
x = h * ((double)i - 0.5);
/* Add rectangle's area = height*width = f(x)*h */
sum += (4.0/(1.0+x*x))*h;
}
/* Get sum total on processor zero */
//MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
double value = 0;
double pi = sum;
if (myid != 0) {
MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
else {
for (int i = 1; i < numprocs; i++) {
MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
pi += value;
}
}
/* Print approximate value of pi and runtime*/
if (myid==0) {
printf("pi is approximately %.16f, Error is %e\n",
pi, fabs(pi - PI25DT));
endTime=MPI_Wtime();
printf("runtime is=%.16f",endTime-startTime);
}
MPI_Finalize();
return 0;
}
the output (2 processes):
pi is approximately 3.1415926535898993, Error is 1.061373e-13
runtime is=1.3594319820404053
I have some problems with understanding how works shared memory. There are one main process and N others. The main process sent data to other, I made it like this(data is placed in shared_mem[i] for i process):
int *shared_mem = calloc(numb_of_parts, sizeof(double));
if(world_rank == 0)
{
for(int i = 1; i < numb_of_parts; i++)
{
MPI_Send(shared_mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
}
Next processes calculate something and write data in the same cell:
{
MPI_Recv(shared_mem+world_rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
/* do smth with shared_mem[i] */
MPI_Send(shared_mem+world_rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
Then I wait for all processes and want to count the sum of all cells(with new data) in main process:
PI_Barrier(MPI_COMM_WORLD);
if(world_rank == 0)
{
for(int i = 0; i < numb_of_parts; i++)
{
sum += shared_mem[i];
}
}
But as a result I get always sum of previous data i.e. in main process array haven't changed. What is wrong?
Could you try to decleare double *shared_mem = calloc(numb_of_parts, sizeof(double)); ? For the moment, it is decleared as int*, so shared_mem[i] and shared_mem+i may not be what it is expected to be, since the size of int can be different from the size of double.
Moreover, there are features of MPI which can significantly help you:
The function MPI_Scatter() and MPI_Reduce() using MPI_SUM can be combined.
You can allocate shared memory between processes in a given communicator using MPI_Win_allocate_shared(), if such a thing is possible.
And #Gilles is right: the buffer mem_shared is not shared between processes. Indeed, each process allocates its own buffer mem_shared and this is the reason why message passing is required.
Here is a working code based on your code snippets. I had to add the receive par for the root process. Is it what is missing ? Compile with mpicc main.c -o main -sdt=c99 and run it by mpirun -np 4 main.
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
/* This is an interactive version of cpi */
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc,char *argv[])
{
int numb_of_parts, rank;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&numb_of_parts);
int *mem = calloc(numb_of_parts, sizeof(double));
if(rank == 0)
{
mem[0]=0;
for(int i = 1; i < numb_of_parts; i++)
{
mem[i]=i;
MPI_Send(mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
for(int i = 1; i < numb_of_parts; i++)
{
MPI_Recv(mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
}else{
MPI_Recv(mem+rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
/* do smth with shared_mem[i] */
mem[rank]=mem[rank]*2;
MPI_Send(mem+rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
double sum=0;
if(rank == 0)
{
for(int i = 0; i < numb_of_parts; i++)
{
sum += mem[i];
}
printf("sum is %g\n",sum);
}
MPI_Finalize();
return 0;
}
The problem can be in the /* do smth with shared_mem[i] */... if it does nothing, or if it does not modify mem[rank].
I'm making a matriz multiplication program in OpenMPI, and I got this error message:
[Mecha Liberta:12337] *** Process received signal ***
[Mecha Liberta:12337] Signal: Segmentation fault (11)
[Mecha Liberta:12337] Signal code: Address not mapped (1)
[Mecha Liberta:12337] Failing at address: 0xbfe4f000
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 12337 on node Mecha Liberta exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
That's how I define the matrices:
int **a, **b, **r;
a = (int **)calloc(l,sizeof(int));
b = (int **)calloc(l,sizeof(int));
r = (int **)calloc(l,sizeof(int));
for (i = 0; i < l; i++)
a[i] = (int *)calloc(c,sizeof(int));
for (i = 0; i < l; i++)
b[i] = (int *)calloc(c,sizeof(int));
for (i = 0; i < l; i++)
r[i] = (int *)calloc(c,sizeof(int));
And here's my Send/Recv (i'm pretty sure my problem should be here):
MPI_Send(&sent, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&lines, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(a[sent][0]), lines*NCA, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&b, NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
and:
MPI_Recv(&sent, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&lines, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&a, lines*NCA, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&b, NCA*NCB, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
Can anyone see where is the problem?
This is a common problem with C and multidimensional arrays and MPI.
In this line, say:
MPI_Send(&b, NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
you're telling MPI to send NCAxNCB integers starting at b to dest,MPI_COMM_WORLD with tag tag. But, b isn't a pointer to NCAxNCB integers; it's a pointer to NCA pointers to NCB integers.
So what you want to do is to ensure your arrays are contiguous (probably better for performance anyway), using something like this:
int **alloc_2d_int(int rows, int cols) {
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
/* .... */
int **a, **b, **r;
a = alloc_2d_int(l, c);
b = alloc_2d_int(l, c);
r = alloc_2d_int(l, c);
and then
MPI_Send(&sent, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&lines, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(a[sent][0]), lines*NCA, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(b[0][0]), NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Recv(&sent, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&lines, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&(a[0][0]), lines*NCA, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA*NCB, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
should work more as expected.