Basic Matrix operation with dynamical array allocation using MPI - arrays

I have already looked for answers about MPI and dynamic allocation, but there is still an error in my code.
I think the pairs send/receive work well. The problem is probably due to the identical part when I want to do some basic operations. I can't specify indices of the array, otherwise I get this error:
[lyomatnuc09:07574] * Process received signal *
[lyomatnuc09:07575] * Process received signal *
[lyomatnuc09:07575] Signal: Segmentation fault (11)
[lyomatnuc09:07575] Signal code: Address not mapped (1)
[lyomatnuc09:07575] Failing at address: 0x60
The basic code that reproduce the error is below :
int **alloc_array(int rows, int cols) {
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv); //initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
MPI_Datatype columntype;
MPI_Type_vector(10, 1, 10, MPI_INT, &columntype);
MPI_Type_commit(&columntype);
start_time = MPI_Wtime();
if (rank == 0)
{
int **A;
A = alloc_array(10,10);
for ( int i =1 ;i<size;i++)
{
MPI_Send(&(A[0][0]), 10*10, MPI_INT, i, 1, MPI_COMM_WORLD);
}
} else if (rank >= 1) {
int **A2;
A2 = alloc_array(10,10);
MPI_Recv(&(A2[0][0]), 10*10, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
for (int i =0; i<10; i++)
{
for ( int j=0; j<10;i++)
{
A2[i][j]=i*j;//bug here
}
}
}//end slaves task
MPI_Finalize();
return 0;
}

Related

Can you send an array within an array using MPI_Send and MPI_Recv?

This is the very basic function of my program, and as such is not necessarily reproducible. However, I was wondering if there is a way to send an array of arrays using MPI? Or is this something that is not possible and I should flatten my array? Any help would be greatly appreciated as I've been struggling with trying to figure this out.
int *individual_topIds;
int **cell_topIds;
cell_topIds = (int**) malloc(sizeof(int*)*25*boxes);
if(rank == 0) {
for (int i = 0; i < boxes; i++) {
individual_topIds = (int*) malloc(sizeof(int)*25);
for(int j = 0; j < cellMatrix[i].numTop; j++){
individual_topIds[j] = cellMatrix[i].aTopIds[j];
}
cell_topIds[i] = individual_topIds;
}
MPI_Send(cell_topIds, boxes*25, MPI_INT, 1, 10, MPI_COMM_WORLD);
}
Then in my rank == 1 section. I have tried send and receive with just boxes, and not boxes*25 as well.
for 1 -> boxes
MPI_Recv(cell_topIds, boxes*25, MPI_INT, 0, 10, MPI_COMM_WORLD, &status);
int *ptop;
ptop = (int*) malloc(sizeof(int)*25);
ptop = cell_topIds[i];
printf("1\n");
for(int j = 0; j < sizeof(&ptop)/sizeof(int); j++){
printf("%d, ", ptop[j]);
}
printf("2\n");
end for i -> boxes
free(ptop);
Edit: Forgot to mention that the output of the print is a seg fault
Caught error: Segmentation fault (signal 11)
This is not a particularly well-worded question.
However, MPI will let you send arrays of arrays if you use a custom type, as below:
#include "mpi.h"
#include <stdio.h>
struct Partstruct
{
char c;
double d[6];
char b[7];
};
int main(int argc, char *argv[])
{
struct Partstruct particle[1000];
int i, j, myrank;
MPI_Status status;
MPI_Datatype Particletype;
MPI_Datatype type[3] = { MPI_CHAR, MPI_DOUBLE, MPI_CHAR };
int blocklen[3] = { 1, 6, 7 };
MPI_Aint disp[3];
MPI_Init(&argc, &argv);
disp[0] = &particle[0].c - &particle[0];
disp[1] = &particle[0].d - &particle[0];
disp[2] = &particle[0].b - &particle[0];
MPI_Type_create_struct(3, blocklen, disp, type, &Particletype);
MPI_Type_commit(&Particletype);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if (myrank == 0)
{
MPI_Send(particle, 1000, Particletype, 1, 123, MPI_COMM_WORLD);
}
else if (myrank == 1)
{
MPI_Recv(particle, 1000, Particletype, 0, 123, MPI_COMM_WORLD, &status);
}
MPI_Finalize();
return 0;
}
Alternatively, use a flat array design (this is a good idea for performance reasons as well as easy compatibility with MPI).

Own matrix multiplication implementation with MPI Send/Recv

I tried to write a function called matrixMultiply that simply takes two 4 x 4 matrices called a and b, multiplies them and stores the result in the 4 x 4 matrix c. After this I wanted to expand the program into a more general one, for n x n matrices. Sadly the program compiles but gets stuck while executing it. I would be very thankful about someone of you being able to tell me where my error is.
#import <stdio.h>
#import "mpi.h"
void matrixMultiply(int argc, char* argv[], int a[][4], int b[][4], int c[][4])
{
int n = 4;
int procs;
int rank;
int rootRank = 0;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &procs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == rootRank) {
int current_row[4];
for(int i = 0; i < n; i++) {
int current_column[4];
for(int j = 0; j < n; j++) {
//getting the i-th row
for(int k = 0; k < n; k++) {
current_row[k] = a[i][k];
}
//getting the j-th column
for (int k = 0; k < n; k++)
{
current_column[k] = b[k][j];
}
//MPI_Send(void* data, int count, MPI_Datatype datatype, int destination, int tag, MPI_Comm communicator)
MPI_Bsend(current_row, 4, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Bsend(current_column, 4, MPI_INT, i, 1, MPI_COMM_WORLD);
int result;
//MPI_Recv(void* data, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm communicator, MPI_Status* status)
MPI_Recv(&result, 1, MPI_INT, i ,2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
c[i][j]=result;
}
}
/* this code is only used to check the resulting matrix c*/
printf("c:\n");
for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) {
printf("%d ", c[i][j]);
}
printf("\n");
}
printf("\n");
}
else {
int result = 0;
int local_row[4] = {0,0,0,0};
int local_column[4] = {2,2,2,2};
//MPI_Recv(void* data, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm communicator, MPI_Status* status)
MPI_Recv(local_row, 4, MPI_INT, rootRank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(local_column, 4, MPI_INT, rootRank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(int i = 0; i < 4; i++) {
result+= local_row[i] * local_column[i];
}
//MPI_Send(void* data, int count, MPI_Datatype datatype, int destination, int tag, MPI_Comm communicator)
MPI_Bsend(&result, 1, MPI_INT, rootRank, 2, MPI_COMM_WORLD);
}
MPI_Finalize();
return;
}
int main(int argc, char* argv[]) {
int d[][4] = {{1,2,3,4}, {5,6,7,8}, {9,10,11,12},{13,14,15,16}};
int e[][4] = {{16,15,14,13}, {12,11,10,9}, {8,7,6,5}, {4,3,2,1}};
int f[][4] = {{0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,0,0,0}};
matrixMultiply(argc, argv, d, e, f);
}
A couple issues to address:
MPI_Bsend (as opposed to MPI_Send) requires a preceding call to MPI_Buffer_attach. See the notes here: https://www.mpich.org/static/docs/latest/www3/MPI_Bsend.html or any recent version of the MPI spec.
Try with that change first. It's possible that's enough to get it going. If not, double check any ordering of Sends and Receives, considered globally, to see whether there's a deadlock anywhere in the system.
As an aside, you may be able to improve performance as well as avoiding some deadlock scenarios by switching to nonblocking sends and receives (MPI_Isend, MPI_Irecv and related) and completing them via MPI_Waitall, MPI_Testall or similar after they've all initiated. This would be more similar to how MPI implementations typically execute collective operations under the hood - which could also have performance benefits over making individual send and receive calls, due to the implementation knowing more about the hardware and what's going on under the hood and being able to optimize ordering around that. What you're doing looks something like an MPI_Bcast followed by an MPI_Gather, as one possible alternative pattern that would lean more heavily on the implementation to take care of optimizations for you.

How to send a integer array via MPI_Send?

I'm trying to create a program in regular C that divides an integer array equally between any amount of process. For debugging purposes I'm using an integer array with 12 numbers and only 2 process so that the master process will have [1,2,3,4,5,6] and the slave1 will have [7,8,9,10,11,12]. However I'm getting an error saying: MPI_ERR_BUFFER: invalid buffer pointer.
After some research I found out that there is a function that does that (MPI_Scatter). Unfortunately, since I'm learning MPI the implementation is restricted to MPI_Send and MPI_Recv only. Anyway, both MPI_Send and MPI_Recv use a void*, and I'm sending a int* so it should work. Can anyone point out what am I doing wrong? Thank you.
int* create_sub_vec(int begin, int end, int* origin);
void print(int my_rank, int comm_sz, int n_over_p, int* sub_vec);
int main(void){
int comm_sz;
int my_rank;
int vec[12] = {1,2,3,4,5,6,7,8,9,10,11,12};
int* sub_vec = NULL;
int n_over_p;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
n_over_p = 12/comm_sz;
printf("Process %d calcula n_over_p = %d\n", my_rank, n_over_p);
if (my_rank != 0) {
MPI_Recv(sub_vec, n_over_p, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
print(my_rank, comm_sz, n_over_p, sub_vec);
} else {
printf("Distribuindo dados\n");
for (int i = 1; i < comm_sz; i++) {
sub_vec = create_sub_vec(i*n_over_p, (i*n_over_p)+n_over_p, vec);
MPI_Send(sub_vec, n_over_p, MPI_INT, i, 0, MPI_COMM_WORLD);
}
printf("Fim da distribuicao de dados\n");
sub_vec = create_sub_vec(0, n_over_p, vec);
print(my_rank, comm_sz, n_over_p, sub_vec);
}
MPI_Finalize();
return 0;
}
int* create_sub_vec(int begin, int end, int* origin){
int* sub_vec;
int size;
int aux = 0;
size = end - begin;
sub_vec = (int*)malloc(size * sizeof(int));
for (int i = begin; i < end; ++i) {
*(sub_vec+aux) = *(origin+i);
aux += 1;
}
return sub_vec;
}
void print(int my_rank, int comm_sz, int n_over_p, int* sub_vec){
printf("Process %d out of %d received sub_vecotr: [ ", my_rank, comm_sz);
for (int i = 0; i < n_over_p; ++i)
{
printf("%d, ", *(sub_vec+i));
}
printf("]\n");
}
The issue is that sub_vec is not allocated on non zero rank.
It is up to you to do that (e.g. MPI does not allocate the receive buffer).
the receive part should look like
if (my_rank != 0) {
sub_vec = (int *)malloc(n_over_p * sizeof(int));
MPI_Recv(sub_vec, n_over_p, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
As you wrote, the natural way is via MPI_Scatter() (and once again, it is up to you to allocate the receive buffer before starting the scatter.

MPI_Send works only with statically allocated buffer

If I want to define my own type, and use it as a datatype with MPI_Send to take only even rows from a matrix, does that matrix (send buffer) have to be allocated statically?
I seem to have problems when I allocate it dynamically. Is this because addresses need to be successive for data to be sent?
No, memory to be sent with MPI_Send does not have to be statically allocated.
To send array subsets, you likely want to use MPI_Type_indexed. Here is a slightly modified version of the example from the mpi.deino.net article on MPI_Type_indexed, where I have replaced the statically allocated buffer
int buffer[27];
to a dynamically allocated buffer
int* buffer = (int*)malloc(27 * sizeof(int));
I hope it helps:
#include <mpi.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
int rank, size, i;
MPI_Datatype type, type2;
int blocklen[3] = { 2, 3, 1 };
int displacement[3] = { 0, 3, 8 };
int* buffer = (int*)malloc(27 * sizeof(int)); //int buffer[27];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size < 2)
{
printf("Please run with 2 processes.\n");
MPI_Finalize();
return 1;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Type_contiguous(3, MPI_INT, &type2);
MPI_Type_commit(&type2);
MPI_Type_indexed(3, blocklen, displacement, type2, &type);
MPI_Type_commit(&type);
if (rank == 0)
{
for (i=0; i<27; i++)
buffer[i] = i;
MPI_Send(buffer, 1, type, 1, 123, MPI_COMM_WORLD);
}
if (rank == 1)
{
for (i=0; i<27; i++)
buffer[i] = -1;
MPI_Recv(buffer, 1, type, 0, 123, MPI_COMM_WORLD, &status);
for (i=0; i<27; i++)
printf("buffer[%d] = %d\n", i, buffer[i]);
fflush(stdout);
}
MPI_Finalize();
free(buffer);
return 0;
}

Initialize an array using openmpi once

I am trying to run some tests using OPENmpi processing data in an array by spliting up the work across nodes (the second part is with matricies). I am running into some problems now because the data array is being initialized every time and I don't know how to prevent this from happening.
How, using ANSI C can I create a variable length array, using OPENmpi once? I tried making it static and global, but nothing.
#define NUM_THREADS 4
#define NUM_DATA 1000
static int *list = NULL;
int main(int argc, char *argv[]) {
int numprocs, rank, namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int n = NUM_DATA*NUM_DATA;
printf("hi\n");
int i;
if(list == NULL)
{
printf("ho\n");
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
}
int position;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
printf("Process %d on %s out of %d\n", rank,processor_name, numprocs);
clock_t start = clock();
position = n / NUM_THREADS * rank;
search(list,position, n / NUM_THREADS * (rank + 1));
printf("Time elapsed: %f seconds\n", ((double)clock() - (double)start) /(double) CLOCKS_PER_SEC);
free(list);
MPI_Finalize();
return 0;
}
Probably the easiest way is to have the rank 0 process do the initialization while the other processes block. Then once the initialization is done, have them all start their work.
A basic example trying to call your search function (NB: it's dry-coded):
#define NUM_THREADS 4
#define NUM_DATA 1000
int main(int argc, char *argv[]) {
int *list;
int numprocs, rank, namelen, i, n;
int chunksize,offset;
char processor_name[MPI_MAX_PROCESSOR_NAME];
n= NUM_DATA * NUM_DATA;
MPI_Status stat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
//note you'll need to handle n%NUM_THREADS !=0, but i'm ignoring that for now
chunksize = n / NUM_THREADS;
if (rank == 0) {
//Think of this as a master process
//Do your initialization in this process
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
// Once you're ready, send each slave process a chunk to work on
offset = chunksize;
for(i = 1; i < numprocs; i++) {
MPI_Send(&list[offset], chunksize, MPI_INT, i, 0, MPI_COMM_WORLD);
offset += chunksize
}
search(list, 0, chunksize);
//If you need some sort of response back from the slaves, do a recv loop here
} else {
// If you're not the master, you're a slave process, so wait to receive data
list = malloc(chunksize*sizeof(int));
MPI_Recv(list, chunksize, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
// Now you can do work on your portion
search(list, 0, chunksize);
//If you need to send something back to the master, do it here.
}
MPI_Finalize();
}

Resources