This question already has answers here:
Sending and receiving 2D array over MPI
(3 answers)
Closed 2 years ago.
I am trying to share a dynamically allocated 2D array from a master thread to several other threads using MPI in c, from within a function.
A simplified representation of the relevant code is as follows:
//Initialize program, start up the desired number of threads.
//Master thread takes input from user, dynamically allocates and constructs 2d array.
//All threads call method analyze_inputs(**array), which takes the array as input (all threads other than master simply pass NULL as argument)
//The master thread shares the array, along with work division to all other threads:
{//Master thread
MPI_Send(&x, 1, MPI_INT, recievingThread, 0, MPI_COMM_WORLD);
MPI_Send(&y, 1, MPI_INT, recievingThread, 0, MPI_COMM_WORLD);
MPI_Send(&(array[0][0]), x*y, MPI_INT, recievingThread, 0, MPI_COMM_WORLD);
}
{//Subthreads
MPI_Recv(&x, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&y, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&(array[0][0]), x*y, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
This is a soulution i found on this site for sending dynamically allocated 2d arrays, but i get segmentation error for the array recieve.
How can i do this?
edit: Minimal reproducible example
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
int analyze_inputs (int x, int y, int** array);
int main (int argc, char **argv)
{
int x = 10;
int y = 8;
int rank;
int **array = NULL;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
if (rank == 0)
{
array = malloc(x * sizeof(int*));
for (int i = 0; i < x; i++)
{
array[i] = malloc(y * sizeof(int));
}
for (int i = 0; i < x; i++)
{
for (int j = 0; j < y; j++)
{
array[i][j] = rand();
}
}
}
analyze_inputs(x,y,array);
MPI_Finalize ();
}
int analyze_inputs(int x,int y, int** array)
{
int rank, x_temp, y_temp, **array_temp;
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
if (rank == 0)
{
MPI_Send(&x, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
MPI_Send(&y, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
MPI_Send(&(array[0][0]), x*y, MPI_INT, 1, 0, MPI_COMM_WORLD);
}
else
{
MPI_Recv(&x_temp, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&y_temp, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Works to here.\n");
MPI_Recv(&(array_temp[0][0]), x_temp*y_temp, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Crashes before here.\n");
}
}
Each row of array is allocated separately in your code, so simple
MPI_Send(&(array[0][0]), x*y, MPI_INT, 1, 0, MPI_COMM_WORLD);
won't work in this case.
An simple solution is to allocate a single block of memory like this:
array = malloc(x * sizeof(int*));
array[0] = malloc(y * x * sizeof(int));
for (int i = 1; i < x; i++)
{
array[i] = array[0] + y * i;
}
And freeing this array will be
free(array[0]);
free(array);
Do not free array[1], array[2], ... in this case because they are already freed by free(array[0]);.
Related
I have this c code that calculates polynomial arrays, where I'm trying to run it from a cluster using MPI.
int main(int argc, char **argv)
{
int id;
int n;
int i, size, arraySize;
double *vet, valor, *vresp, resposta, tempo, a[GRAU + 1];
int hostsize;
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Get_processor_name(hostname, &hostsize);
MPI_Comm_rank(MPI_COMM_WORLD, &id);
MPI_Comm_size(MPI_COMM_WORLD, &n);
if (id == 0) // Master
{
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&a, GRAU, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (size = TAM_INI; size <= TAM_MAX; size += TAM_INC)
{
tempo = -MPI_Wtime();
for (int dest = 1; dest < n; ++dest)
{
int ini = 0;
int fim = dest * size / (n - 1);
int tam = fim - ini;
MPI_Send(&ini, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&tam, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&x[ini], tam, MPI_DOUBLE, dest, 0, MPI_COMM_WORLD);
ini = fim;
fflush(stdout);
}
int total = 0;
for (int dest = 1; dest < n; ++dest)
{
int ini_escravo;
int tam_escravo;
MPI_Recv(&ini_escravo, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&tam_escravo, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&y[ini_escravo], tam_escravo, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
tempo += MPI_Wtime();
}
}
else
{ // Slave
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&a, GRAU, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (arraySize = TAM_INI; arraySize <= TAM_MAX; arraySize += TAM_INC)
{
int ini, tam;
MPI_Recv(&ini, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&tam, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&x[0], tam, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
for (i = 0; i < tam; ++i)
y[i] = polinomio(a, GRAU, x[i]);
MPI_Send(&ini, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&tam, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&y[0], tam, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
fflush(stdout);
}
}
MPI_Finalize();
return 0;
}
The code works fine when I run using 16 tasks or less per node. If I try to run using 32 tasks (16 per node, with 2 nodes), I get the following message:
[06:272259] *** An error occurred in MPI_Recv [06:272259] *** reported
by process [2965045249,0] [06:272259] *** on communicator
MPI_COMM_WORLD [06:272259] *** MPI_ERR_TRUNCATE: message truncated
[06:272259] *** MPI_ERRORS_ARE_FATAL (processes in this communicator
will now abort, [06:272259] *** and potentially your MPI job)
[07][[45243,1],31][btl_tcp.c:559:mca_btl_tcp_recv_blocking] recv(20)
failed: Connection reset by peer (104)
Any idea about what I am missing here?
I want to perform matrix multiplication. I have to write two codes one with MPI blocking and other with MPI non blocking. I have done with MPI blocking. I want some help to convert below code into MPI non blocking.
This is the code of matrix multiplication with Blocking and i want to convert it into MPI non blocking. If anyone is available then Please respond..
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
#include <time.h>
#include <sys/time.h>
// Number of rows and columnns in a matrix
#define N 4
MPI_Status status;
// Matrix holders are created
double matrix_a[N][N],matrix_b[N][N],matrix_c[N][N];
int main(int argc, char **argv)
{
int processCount, processId, slaveTaskCount, source, dest, rows, offset;
struct timeval start, stop;
// MPI environment is initialized
MPI_Init(&argc, &argv);
// Each process gets unique ID (rank)
MPI_Comm_rank(MPI_COMM_WORLD, &processId);
// Number of processes in communicator will be assigned to variable -> processCount
MPI_Comm_size(MPI_COMM_WORLD, &processCount);
// Number of slave tasks will be assigned to variable -> slaveTaskCount
slaveTaskCount = processCount - 1;
// Root (Master) process
if (processId == 0) {
// Matrix A and Matrix B both will be filled with random numbers
srand ( time(NULL) );
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
matrix_a[i][j]= rand()%10;
matrix_b[i][j]= rand()%10;
}
}
printf("\n\t\tMatrix - Matrix Multiplication using MPI\n");
// Print Matrix A
printf("\nMatrix A\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_a[i][j]);
}
printf("\n");
}
// Print Matrix B
printf("\nMatrix B\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_b[i][j]);
}
printf("\n");
}
rows = N/slaveTaskCount;
offset = 0;
for (dest=1; dest <= slaveTaskCount; dest++)
{
// Acknowledging the offset of the Matrix A
MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Acknowledging the number of rows
MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Send rows of the Matrix A which will be assigned to slave process to compute
MPI_Send(&matrix_a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);
// Matrix B is sent
MPI_Send(&matrix_b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
// Offset is modified according to number of rows sent to each process
offset = offset + rows;
}
for (int i = 1; i <= slaveTaskCount; i++)
{
source = i;
// Receive the offset of particular slave process
MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Receive the number of rows that each slave process processed
MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Calculated rows of the each process will be stored int Matrix C according to their offset and
// the processed number of rows
MPI_Recv(&matrix_c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
}
// Print the result matrix
printf("\nResult Matrix C = Matrix A * Matrix B:\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++)
printf("%.0f\t", matrix_c[i][j]);
printf ("\n");
}
printf ("\n");
}
// Slave Processes
if (processId > 0) {
// Source process ID is defined
source = 0;
MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives number of rows sent by root process
MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the sub portion of the Matrix A which assigned by Root
MPI_Recv(&matrix_a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the Matrix B
MPI_Recv(&matrix_b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// Matrix multiplication
for (int k = 0; k<N; k++) {
for (int i = 0; i<rows; i++) {
// Set initial value of the row summataion
matrix_c[i][k] = 0.0;
// Matrix A's element(i, j) will be multiplied with Matrix B's element(j, k)
for (int j = 0; j<N; j++)
matrix_c[i][k] = matrix_c[i][k] + matrix_a[i][j] * matrix_b[j][k];
}
}
// value in matrix C
MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Number of rows the process calculated will be sent to root process
MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Resulting matrix with calculated rows will be sent to root process
MPI_Send(&matrix_c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
MPI_Finalize();
}
Look at non-blocking this way: instead of spelling out "now I send this, now you receive that", you decide in a stage of the computation: "what are all the messages that will be communicated here". Then you do an Isend for all the sends, and Irecv for all the corresponding receives. And then wait for all the resulting requests.
One problem is that each of these Isend/Irecv operations need their own buffer, so you may need to allocate some more memory.
So let's say I'm using MPI and I want to be able to send a number of rows of a matrix of integers from the main process to other processes. It's relatively easy to do so, like this:
MPI_Send(&matrix[start_row][0], amount_of_cells, MPI_INT, target_process, 1, MPI_COMM_WORLD);
Now let's say that in our matrix, instead of each cell holding an integer, each cell holds a reference to an integer array of size 2. How could we send a number of rows of the new matrix to subprocesses?
I was thinking of doing the same thing as the code above but doubling the amount_of_cells variable because each cell holds a reference to an integer array of size 2. However, it doesn't seem to work, I'm at a bit of a loss here.
Any tips or advice on how to approach this would be helpful?
Old matrix:
_________
| 1 | 2 |
--------
| 3 | 4 |
_________
New matrix:
___________________
| [1, 0] | [2, 0] |
--------------------
| [3, 0] | [4, 0] |
___________________
So instead of holding integers, each cell holds a reference to an array of size 2 of integers created using malloc(). How could I send rows of this new matrix to subprocesses?
I was considering defining my own MPI datatype which could be a place to start.
Please try this code,To send an array of pointers to arrays in C MPI
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define n 10
int a[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
int a2[1000];
int main(int argc, char* argv[])
{
int pid, np,
elements_per_process,
n_elements_recieved;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
MPI_Comm_size(MPI_COMM_WORLD, &np);
if (pid == 0) {
int index, i;
elements_per_process = n / np;
if (np > 1) {
for (i = 1; i < np - 1; i++) {
index = i * elements_per_process;
MPI_Send(&elements_per_process,
1, MPI_INT, i, 0,
MPI_COMM_WORLD);
MPI_Send(&a[index],
elements_per_process,
MPI_INT, i, 0,
MPI_COMM_WORLD);
}
index = i * elements_per_process;
int elements_left = n - index;
MPI_Send(&elements_left,
1, MPI_INT,
i, 0,
MPI_COMM_WORLD);
MPI_Send(&a[index],
elements_left,
MPI_INT, i, 0,
MPI_COMM_WORLD);
}
int sum = 0;
for (i = 0; i < elements_per_process; i++)
sum += a[i];
int tmp;
for (i = 1; i < np; i++) {
MPI_Recv(&tmp, 1, MPI_INT,
MPI_ANY_SOURCE, 0,
MPI_COMM_WORLD,
&status);
int sender = status.MPI_SOURCE;
sum += tmp;
}
printf("Sum of array is : %d\n", sum);
}
else {
MPI_Recv(&n_elements_recieved,
1, MPI_INT, 0, 0,
MPI_COMM_WORLD,
&status);
MPI_Recv(&a2, n_elements_recieved,
MPI_INT, 0, 0,
MPI_COMM_WORLD,
&status);
int partial_sum = 0;
for (int i = 0; i < n_elements_recieved; i++)
partial_sum += a2[i];
MPI_Send(&partial_sum, 1, MPI_INT,
0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
I hope this code will be usefull.
Thank you.
I have some problems with understanding how works shared memory. There are one main process and N others. The main process sent data to other, I made it like this(data is placed in shared_mem[i] for i process):
int *shared_mem = calloc(numb_of_parts, sizeof(double));
if(world_rank == 0)
{
for(int i = 1; i < numb_of_parts; i++)
{
MPI_Send(shared_mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
}
Next processes calculate something and write data in the same cell:
{
MPI_Recv(shared_mem+world_rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
/* do smth with shared_mem[i] */
MPI_Send(shared_mem+world_rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
Then I wait for all processes and want to count the sum of all cells(with new data) in main process:
PI_Barrier(MPI_COMM_WORLD);
if(world_rank == 0)
{
for(int i = 0; i < numb_of_parts; i++)
{
sum += shared_mem[i];
}
}
But as a result I get always sum of previous data i.e. in main process array haven't changed. What is wrong?
Could you try to decleare double *shared_mem = calloc(numb_of_parts, sizeof(double)); ? For the moment, it is decleared as int*, so shared_mem[i] and shared_mem+i may not be what it is expected to be, since the size of int can be different from the size of double.
Moreover, there are features of MPI which can significantly help you:
The function MPI_Scatter() and MPI_Reduce() using MPI_SUM can be combined.
You can allocate shared memory between processes in a given communicator using MPI_Win_allocate_shared(), if such a thing is possible.
And #Gilles is right: the buffer mem_shared is not shared between processes. Indeed, each process allocates its own buffer mem_shared and this is the reason why message passing is required.
Here is a working code based on your code snippets. I had to add the receive par for the root process. Is it what is missing ? Compile with mpicc main.c -o main -sdt=c99 and run it by mpirun -np 4 main.
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
/* This is an interactive version of cpi */
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc,char *argv[])
{
int numb_of_parts, rank;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&numb_of_parts);
int *mem = calloc(numb_of_parts, sizeof(double));
if(rank == 0)
{
mem[0]=0;
for(int i = 1; i < numb_of_parts; i++)
{
mem[i]=i;
MPI_Send(mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
for(int i = 1; i < numb_of_parts; i++)
{
MPI_Recv(mem+i, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
}else{
MPI_Recv(mem+rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
/* do smth with shared_mem[i] */
mem[rank]=mem[rank]*2;
MPI_Send(mem+rank, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
double sum=0;
if(rank == 0)
{
for(int i = 0; i < numb_of_parts; i++)
{
sum += mem[i];
}
printf("sum is %g\n",sum);
}
MPI_Finalize();
return 0;
}
The problem can be in the /* do smth with shared_mem[i] */... if it does nothing, or if it does not modify mem[rank].
I'm making a matriz multiplication program in OpenMPI, and I got this error message:
[Mecha Liberta:12337] *** Process received signal ***
[Mecha Liberta:12337] Signal: Segmentation fault (11)
[Mecha Liberta:12337] Signal code: Address not mapped (1)
[Mecha Liberta:12337] Failing at address: 0xbfe4f000
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 12337 on node Mecha Liberta exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
That's how I define the matrices:
int **a, **b, **r;
a = (int **)calloc(l,sizeof(int));
b = (int **)calloc(l,sizeof(int));
r = (int **)calloc(l,sizeof(int));
for (i = 0; i < l; i++)
a[i] = (int *)calloc(c,sizeof(int));
for (i = 0; i < l; i++)
b[i] = (int *)calloc(c,sizeof(int));
for (i = 0; i < l; i++)
r[i] = (int *)calloc(c,sizeof(int));
And here's my Send/Recv (i'm pretty sure my problem should be here):
MPI_Send(&sent, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&lines, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(a[sent][0]), lines*NCA, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&b, NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
and:
MPI_Recv(&sent, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&lines, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&a, lines*NCA, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&b, NCA*NCB, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
Can anyone see where is the problem?
This is a common problem with C and multidimensional arrays and MPI.
In this line, say:
MPI_Send(&b, NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
you're telling MPI to send NCAxNCB integers starting at b to dest,MPI_COMM_WORLD with tag tag. But, b isn't a pointer to NCAxNCB integers; it's a pointer to NCA pointers to NCB integers.
So what you want to do is to ensure your arrays are contiguous (probably better for performance anyway), using something like this:
int **alloc_2d_int(int rows, int cols) {
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
/* .... */
int **a, **b, **r;
a = alloc_2d_int(l, c);
b = alloc_2d_int(l, c);
r = alloc_2d_int(l, c);
and then
MPI_Send(&sent, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&lines, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(a[sent][0]), lines*NCA, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Send(&(b[0][0]), NCA*NCB, MPI_INT, dest, tag, MPI_COMM_WORLD);
MPI_Recv(&sent, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&lines, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&(a[0][0]), lines*NCA, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA*NCB, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
should work more as expected.