I'm using MPI and I try to send uneven blocks of 2D array to different processors.
For instance if I have not squere image which size is 333x225 and I want to send blocks of different sizes to different processors.
I have seen #Jonathan Dursi method for even arrays:
sending blocks of 2D array in C using MPI
I try to adapt it to my problem. So far I managed to send even chunks of data to two processes like this:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "mpi.h"
int malloc2dchar(char ***array, int n, int m) {
/* allocate the n*m contiguous items */
char *p = (char *)malloc(n*m*sizeof(char));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (char **)malloc(n*sizeof(char*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (int i=0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2dchar(char ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char **argv) {
char **global, **local;
const int gridsize=10; // size of grid
const int procgridsize=2; // size of process grid
int rank, size; // rank of current process and no. of processes
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) {
/* fill in the array, and print it */
malloc2dchar(&global, gridsize, gridsize);
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++)
global[i][j] = '0'+(3*i+j)%10;
}
printf("Global array is:\n");
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++)
putchar(global[i][j]);
printf("\n");
}
}
/* create the local array which we'll process */
malloc2dchar(&local, 5, 10);
/* create a datatype to describe the subarrays of the global array */
int sizes[2] = {gridsize, gridsize}; /* global size */
int subsizes[2] = {5, 10}; /* local size */
int starts[2] = {0,0}; /* where this one starts */
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_CHAR, &type);
MPI_Type_create_resized(type, 0, 10*sizeof(char), &subarrtype);
MPI_Type_commit(&subarrtype);
char *globalptr=NULL;
if (rank == 0) globalptr = &(global[0][0]);
/* scatter the array to all processors */
int sendcounts[2];
int displs[2];
if (rank == 0) {
for (int i=0; i<2; i++) sendcounts[i] = 1;
int disp = 0;
displs[0]=0;
displs[1]=5;
//for (int i=0; i<procgridsize; i++) {
// for (int j=0; j<procgridsize; j++) {
// displs[i*procgridsize+j] = disp;
// disp += 1;
// }
// disp += ((gridsize/procgridsize)-1)*procgridsize;
//}
}
MPI_Scatterv(globalptr, sendcounts, displs, subarrtype, &(local[0][0]),
gridsize*gridsize/2, MPI_CHAR,
0, MPI_COMM_WORLD);
/* now all processors print their local data: */
for (int p=0; p<size; p++) {
if (rank == p) {
printf("Local process on rank %d is:\n", rank);
for (int i=0; i<5; i++) {
putchar('|');
for (int j=0; j<10; j++) {
putchar(local[i][j]);
}
printf("|\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* now each processor has its local array, and can process it */
for (int i=0; i<5; i++) {
for (int j=0; j<10; j++) {
local[i][j] = 'A' + rank;
}
}
/* it all goes back to process 0 */
MPI_Gatherv(&(local[0][0]), gridsize*gridsize/2, MPI_CHAR,
globalptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
/* don't need the local data anymore */
free2dchar(&local);
/* or the MPI data type */
MPI_Type_free(&subarrtype);
if (rank == 0) {
printf("Processed grid:\n");
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++) {
putchar(global[i][j]);
}
printf("\n");
}
free2dchar(&global);
}
MPI_Finalize();
return 0;
}
So I get:
Global array is:
0123456789
3456789012
6789012345
9012345678
2345678901
5678901234
8901234567
1234567890
4567890123
7890123456
Local process on rank 0 is:
|0123456789|
|3456789012|
|6789012345|
|9012345678|
|2345678901|
Local process on rank 1 is:
|5678901234|
|8901234567|
|1234567890|
|4567890123|
|7890123456|
Processed grid:
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
But I want data to be like this (not even chunks):
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
AAAAAAAAAA
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
BBBBBBBBBB
UPDATE
I have tried to set tab_size depending on process rank. But it doesn't work completly fine.
Here is the code:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "mpi.h"
int malloc2dchar(char ***array, int n, int m) {
/* allocate the n*m contiguous items */
char *p = (char *)malloc(n*m*sizeof(char));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (char **)malloc(n*sizeof(char*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (int i=0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2dchar(char ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char **argv) {
char **global, **local;
const int gridsize=10; // size of grid
const int procgridsize=2; // size of process grid
int rank, size; // rank of current process and no. of processes
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
//if (size != procgridsize*procgridsize) {
// fprintf(stderr,"%s: Only works with np=%d for now\n", argv[0], procgridsize);
// MPI_Abort(MPI_COMM_WORLD,1);
//}
int tab_size;
if (rank == 0) {
/* fill in the array, and print it */
malloc2dchar(&global, gridsize, gridsize);
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++)
global[i][j] = '0'+(3*i+j)%10;
}
printf("Global array is:\n");
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++)
putchar(global[i][j]);
printf("\n");
}
tab_size = 4;
}
if(rank == 1)
{
tab_size = 6;
}
/* create the local array which we'll process */
malloc2dchar(&local, tab_size, 10);
/* create a datatype to describe the subarrays of the global array */
int sizes[2] = {gridsize, gridsize}; /* global size */
int subsizes[2] = {tab_size, 10}; /* local size */
int starts[2] = {0,0}; /* where this one starts */
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_CHAR, &type);
MPI_Type_create_resized(type, 0, 10*sizeof(char), &subarrtype);
MPI_Type_commit(&subarrtype);
char *globalptr=NULL;
if (rank == 0) globalptr = &(global[0][0]);
/* scatter the array to all processors */
int sendcounts[2];
int displs[2];
int tabsize;
if (rank == 0) {
for (int i=0; i<2; i++) sendcounts[i] = 1;
int disp = 0;
displs[0]=0;
displs[1]=tab_size;
//for (int i=0; i<procgridsize; i++) {
// for (int j=0; j<procgridsize; j++) {
// displs[i*procgridsize+j] = disp;
// disp += 1;
// }
// disp += ((gridsize/procgridsize)-1)*procgridsize;
//}
}
MPI_Scatterv(globalptr, sendcounts, displs, subarrtype, &(local[0][0]),
gridsize*gridsize/2, MPI_CHAR,
0, MPI_COMM_WORLD);
/* now all processors print their local data: */
for (int p=0; p<size; p++) {
if (rank == p) {
printf("Local process on rank %d is:\n", rank);
for (int i=0; i<tab_size; i++) {
putchar('|');
for (int j=0; j<10; j++) {
putchar(local[i][j]);
}
printf("|\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* now each processor has its local array, and can process it */
for (int i=0; i<tab_size; i++) {
for (int j=0; j<10; j++) {
local[i][j] = 'A' + rank;
}
}
/* it all goes back to process 0 */
MPI_Gatherv(&(local[0][0]), gridsize*gridsize/2, MPI_CHAR,
globalptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
/* don't need the local data anymore */
free2dchar(&local);
/* or the MPI data type */
MPI_Type_free(&subarrtype);
if (rank == 0) {
printf("Processed grid:\n");
for (int i=0; i<gridsize; i++) {
for (int j=0; j<gridsize; j++) {
putchar(global[i][j]);
}
printf("\n");
}
free2dchar(&global);
}
MPI_Finalize();
return 0;
}
And the output looks like this:
Global array is:
0123456789
3456789012
6789012345
9012345678
2345678901
5678901234
8901234567
1234567890
4567890123
7890123456
Local process on rank 0 is:
|0123456789|
|3456789012|
|6789012345|
|9012345678|
Local process on rank 1 is:
|2345678901|
|5678901234|
|8901234567|
|1234567890|
||
||
[blade001:3727] *** An error occurred in MPI_Gatherv
[blade001:3727] *** reported by process [2497249281,0]
[blade001:3727] *** on communicator MPI_COMM_WORLD
[blade001:3727] *** MPI_ERR_TRUNCATE: message truncated
[blade001:3727] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[blade001:3727] *** and potentially your MPI job)
Why is your code wrong
You define a datatype that should be the same differently on different ranks. That's not the way it is done.
How to do what you attempt correctly
A decomposition of contigous data by complete rows, as you describe, is much simpler. There no need for complex derived datatypes, in fact you don't need them at all. You can use a very simple datatype representing a row. Then the only task is to setup the size / displacements of MPI_Scatterv correctly:
int local_rows[2] = {6, 4};
malloc2dchar(&local, local_rows[rank], gridsize);
MPI_Datatype row_type;
MPI_Type_contiguous(gridsize, MPI_CHAR, &row_type);
MPI_Type_commit(&row_type);
int displs[2];
if (rank == 0) {
displs[0] = 0;
for (int r = 1; r < 2; r++) {
displs[r] = displs[r - 1] + local_rows[r - 1];
}
}
MPI_Scatterv(globalptr, local_rows, displs, row_type, &(local[0][0]),
local_rows[rank], row_type, 0, MPI_COMM_WORLD);
...
MPI_Gatherv(&(local[0][0]), local_rows[rank], row_type, globalptr, local_rows,
displs, row_type, 0, MPI_COMM_WORLD);
This assumes that the intended sizes {6, 4} are known by all ranks. You can either have everyone compute it deterministically or have only the root compute that and scatter it (non-root ranks need only know their own row count).
True irregular 2D decomposition
If you truely want to split out chunks not only consisting of whole rows, it becomes much more complicated. There is a very good answer about that already, so I won't repeat that here. Make sure to read it very carefully and follow it closely.
Due to the complexity, I would suggest to only do that if you are absolutely sure you need it.
Overlap
You cannot send overlapping data with a single scatter. If you need overlap, consider exchanging the data directly between the neighbouring processes that own the range in a halo exchange.
Related
I have an array of type Matrix structs which the program got from user's input. I need to distribute the matrices to processes with OpenMPI. I tried using Scatter but I am quite confused about the arguments needed for the program to work (and also how to receive the data in each local arrays). Here is my current code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#define nil NULL
#define NMAX 100
#define DATAMAX 1000
#define DATAMIN -1000
typedef struct Matrix
{
int mat[NMAX][NMAX]; // Matrix cells
int row_eff; // Matrix effective row
int col_eff; // Matrix effective column
} Matrix;
void init_matrix(Matrix *m, int nrow, int ncol)
{
m->row_eff = nrow;
m->col_eff = ncol;
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
m->mat[i][j] = 0;
}
}
}
Matrix input_matrix(int nrow, int ncol)
{
Matrix input;
init_matrix(&input, nrow, ncol);
for (int i = 0; i < nrow; i++)
{
for (int j = 0; j < ncol; j++)
{
scanf("%d", &input.mat[i][j]);
}
}
return input;
}
void print_matrix(Matrix *m)
{
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
printf("%d ", m->mat[i][j]);
}
printf("\n");
}
}
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
// Get number of processes
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Get process rank
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Get matrices from user inputs
int kernel_row, kernel_col, num_targets, target_row, target_col;
// reads kernel's row and column and initalize kernel matrix from input
scanf("%d %d", &kernel_row, &kernel_col);
Matrix kernel = input_matrix(kernel_row, kernel_col);
// reads number of target matrices and their dimensions.
// initialize array of matrices and array of data ranges (int)
scanf("%d %d %d", &num_targets, &target_row, &target_col);
Matrix *arr_mat = (Matrix *)malloc(num_targets * sizeof(Matrix));
for (int i = 0; i < num_targets; i++)
{
arr_mat[i] = input_matrix(target_row, target_col);
}
// Get number of matrices per process
int num_mat_per_proc = ceil(num_targets / size);
// Init local matrices and scatter the global matrices
Matrix *local_mats = (Matrix *)malloc(num_mat_per_proc * sizeof(Matrix));
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
if (rank == 0)
{
// Range arrays -> array of convolution results
int arr_range[num_targets];
printf("From master \n");
for (int i = 0; i < 3; i++)
{
print_matrix(&arr_mat[i]);
}
}
else
{
printf("From slave %d = \n", rank);
print_matrix(&local_mats[0]);
}
MPI_Finalize();
}
So here's a few doubts I have about the current implementation:
Can I accept the input just like that or should I make it so that it only happens in rank 0?
How do I implement the scatter part and possibly using Scatterv because the amount of arrays might not be divisible to the number of processes?
Can I accept the input just like that or should I make it so that it
only happens in rank 0?
No, You should use command line arguments or read from file as best practice.
If you want to use scanf, then use it inside rank 0. STDIN is forwarded to rank 0 (this is not supported in standard as far as I know, But I guess this should work and will be implementation dependent)
How do I implement the scatter part and possibly using Scatterv
because the amount of arrays might not be divisible to the number of
processes?
If you different size to send for different processes, then you should use scatterv.
Scatter Syntax:
MPI_Scatter(
void* send_data,
int send_count,
MPI_Datatype send_datatype,
void* recv_data,
int recv_count,
MPI_Datatype recv_datatype,
int root,
MPI_Comm communicator)
Your usage:
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
Potential error points:
In send_count: Size to send (as Gilles Gouaillardet Pointed out in comments). Sizeof(local_mats) instead it should be num_mat_per_proc * sizeof(Matrix).
recv_count: I believe size to receive should not be sizeof(local_mats).
Since you use the same type (MPI_BYTES) for SEND and RECV, your send_count == recv_count
I am trying to create a program that will ultimately be transposing a matrix in MPI so that it can be used in further computations. But right now I am trying to do a simple thing: Root process has a 4x4 matrix "A" which contains elements 0..15 in row-major order. This data is scattered to 2 processes so that each receives one half of the matrix. Process 0 has a 2x4 sub_matrix "a" and receives elements 0..7 and Process 1 gets elements 8..15 in its sub_matrix "a".
My goal is for these processes to swap their a matrices with each other using MPI_Get. Since I was encountering problems, I decided to test a simpler version and simply make process 0 get process 1's "a" matrix, that way, both processes will have the same elements in their respective sub_matrices once I print after the MPI_Get-call and the MPI_fence are called.
Yet the output is erratic, have tried to trouble-shoot for several hours but haven't been able to crack the nut. Would appreciate your help with this.
This is the code below, and the run-command: mpirun -n 2 ./get
Compile: mpicc -std=c99 -g -O3 -o get get.c -lm
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define NROWS 4
#define NCOLS 4
int allocate_matrix(int ***M, int ROWS, int COLS) {
int *p;
if (NULL == (p = malloc(ROWS * COLS * sizeof(int)))) {
perror("Couldn't allocate memory for input (p in allocate_matrix)");
return -1;
}
if (NULL == (*M = malloc(ROWS * sizeof(int*)))) {
perror("Couldn't allocate memory for input (M in allocate_matrix)");
return -1;
}
for(int i = 0; i < ROWS; i++) {
(*M)[i] = &(p[i * COLS]);
}
return 0;
}
int main(int argc, char *argv[])
{
int rank, nprocs, **A, **a, n_cols, n_rows, block_len;
MPI_Win win;
int errs = 0;
if(rank==0)
{
allocate_matrix(&A, NROWS, NCOLS);
for (int i=0; i<NROWS; i++)
for (int j=0; j<NCOLS; j++)
A[i][j] = i*NCOLS + j;
}
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
n_cols=NCOLS; //cols in a sub_matrix
n_rows=NROWS/nprocs; //rows in a sub_matrix
block_len = n_cols*n_rows;
allocate_matrix(&a, n_rows, n_cols);
for (int i = 0; i <n_rows; i++)
for (int j = 0; j < n_cols; j++)
a[i][j] = 0;
MPI_Datatype block_type;
MPI_Type_vector(n_rows, n_cols, n_cols, MPI_INTEGER, &block_type);
MPI_Type_commit(&block_type);
MPI_Scatter(*A, 1, block_type, &(a[0][0]), block_len, MPI_INTEGER, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
printf("process %d: \n", rank);
for (int j=0; j<n_rows; j++){
for (int i=0; i<n_cols; i++){
printf("%d ",a[j][i]);
}
printf("\n");
}
if (rank == 0)
{
printf("TESTING, before Get a[0][0] %d\n", a[0][0]);
MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), win);
MPI_Get(*a, 8, MPI_INTEGER, 1, 0, 8, MPI_INTEGER, win);
MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
printf("TESTING, after Get a[0][0] %d\n", a[0][0]);
printf("process %d:\n", rank);
for (int j=0; j<n_rows; j++){
for (int i=0; i<n_cols; i++){
printf("%d ", a[j][i]);
}
printf("\n");
}
}
else
{ /* rank = 1 */
MPI_Win_create(a, n_rows*n_cols*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), win);
MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
}
MPI_Type_free(&block_type);
MPI_Win_free(&win);
MPI_Finalize();
return errs;
}
This is the output that I get:
process 0:
0 1 2 3
4 5 6 7
process 1:
8 9 10 11
12 13 14 15
process 0:
1552976336 22007 1552976352 22007
1552800144 22007 117 0
But what I want is for the second time I print the matrix from process 0, it should have the same elements as in process 1.
First, I doubt this is really the code you are testing. You are freeing some MPI type variables that are not defined and also rank is uninitialised in
if(rank==0)
{
allocate_matrix(&A, NROWS, NCOLS);
for (int i=0; i<NROWS; i++)
for (int j=0; j<NCOLS; j++)
A[i][j] = i*NCOLS + j;
}
and the code segfaults because A won't get allocated in the root.
Moving this post MPI_Comm_rank(), freeing the correct MPI type variable, and fixing the call to MPI_Win_create in rank 1:
MPI_Win_create(&a[0][0], n_rows*n_cols*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
// This -------^^^^^^^^
produces the result you are seeking.
I'd recommend to stick to a single notation for the beginning of the array like &a[0][0] instead of a mixture of *a and &a[0][0]. This will prevent (or at least reduce the occurrence of) similar errors in the future.
What is the correct way to handle a sendcount = 0 when using MPI_Gatherv (or any other function that requires a sendcount) when setting up the displs argument?
I have data that needs to be received by all processors, but all processors might not have any data to send themselves. As an MWE, I tried (on just two processors):
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
int main(void)
{
int ntasks;
int thistask;
int n = 0;
int i;
int totcounts = 0;
int *data;
int *rbuf;
int *rcnts;
int *displs;
int *master_data;
int *master_displs;
// Set up mpi
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
MPI_Comm_rank(MPI_COMM_WORLD, &thistask);
// Allocate memory for arrays needed by allgatherv
rbuf = calloc(ntasks, sizeof(int));
rcnts = calloc(ntasks, sizeof(int));
displs = calloc(ntasks, sizeof(int));
master_displs = calloc(ntasks, sizeof(int));
// Initialize the counts and displacement arrays
for(i = 0; i < ntasks; i++)
{
rcnts[i] = 1;
displs[i] = i;
}
// Allocate data on just one task, but not others
if(thistask == 1)
{
n = 3;
data = calloc(n, sizeof(int));
for(i = 0; i < n; i++)
{
data[i] = i;
}
}
// Get n so each other processor knows about what others are sending
MPI_Allgatherv(&n, 1, MPI_INT, rbuf, rcnts, displs, MPI_INT, MPI_COMM_WORLD);
// Now that we know how much data each processor is sending, we allocate the array
// to hold it all
for(i = 0; i < ntasks; i++)
{
totcounts += rbuf[i];
}
master_data = calloc(totcounts, sizeof(int));
// Get displs for master data
master_displs[0] = 0;
for(i = 1; i < ntasks; i++)
{
master_displs[i] = master_displs[i - 1] + rbuf[i - 1];
}
// Send each processor's data to all others
MPI_Allgatherv(&data, n, MPI_INT, master_data, rbuf, master_displs, MPI_INT, MPI_COMM_WORLD);
// Print it out to see if it worked
if(thistask == 0)
{
for(i = 0; i < totcounts; i++)
{
printf("master_data[%d] = %d\n", i, master_data[i]);
}
}
// Free
if(thistask == 1)
{
free(data);
}
free(rbuf);
free(rcnts);
free(displs);
free(master_displs);
free(master_data);
MPI_Finalize();
return 0;
}
The way that I've set up master_displs works when every processor has a non-zero n (that is, they have data to send). In this case, both entries will be zero. However, the results of this program are garbage. How would I set up the master_displs array to ensure that master_data holds the correct information (in this case, just master_data[i] = i, as received from task 1)?
I have already looked for answers about MPI and dynamic allocation, but there is still an error in my code.
I think the pairs send/receive work well. The problem is probably due to the identical part when I want to do some basic operations. I can't specify indices of the array, otherwise I get this error:
[lyomatnuc09:07574] * Process received signal *
[lyomatnuc09:07575] * Process received signal *
[lyomatnuc09:07575] Signal: Segmentation fault (11)
[lyomatnuc09:07575] Signal code: Address not mapped (1)
[lyomatnuc09:07575] Failing at address: 0x60
The basic code that reproduce the error is below :
int **alloc_array(int rows, int cols) {
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv); //initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
MPI_Datatype columntype;
MPI_Type_vector(10, 1, 10, MPI_INT, &columntype);
MPI_Type_commit(&columntype);
start_time = MPI_Wtime();
if (rank == 0)
{
int **A;
A = alloc_array(10,10);
for ( int i =1 ;i<size;i++)
{
MPI_Send(&(A[0][0]), 10*10, MPI_INT, i, 1, MPI_COMM_WORLD);
}
} else if (rank >= 1) {
int **A2;
A2 = alloc_array(10,10);
MPI_Recv(&(A2[0][0]), 10*10, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
for (int i =0; i<10; i++)
{
for ( int j=0; j<10;i++)
{
A2[i][j]=i*j;//bug here
}
}
}//end slaves task
MPI_Finalize();
return 0;
}
I managed to write working code for sending one structure in MPI.But what i need is send two dimensional array of these structures and im stuck.
heres my code of sending one struct. Can you guide me how to modify it?
typedef struct {
unsigned char r;
unsigned char g;
unsigned char b;
} pixel;
MPI_Datatype mpi_pixel; /*datatype variable*/
pixel send_pixel; /*my instance of structure*/
int lengtharray[3]; /* Array of lengths */
MPI_Aint disparray[3]; /* Array of displacements */
MPI_Datatype typearray[3]; /* Array of MPI datatypes */
MPI_Aint startaddress, address;
lengtharray[0] = lengtharray[1] =lengtharray[2] = 1; /* Set array of lengths */
typearray[0] = typearray[1] = typearray[2]= MPI_UNSIGNED_CHAR;/* and data types */
/* First element, a, is at displacement 0 */
disparray[0] = 0;
/* Calculate displacement of b */
MPI_Address(&send_pixel.b, &startaddress);
MPI_Address(&send_pixel.g, &address);
disparray[1] = address-startaddress; /* Displacement of second element, b */
MPI_Address(&send_pixel.r, &address);
disparray[2] = address-startaddress; /* Displacement of third element, n */
/* Build the data structure my_type */
MPI_Type_struct(3, lengtharray, disparray, typearray, &mpi_pixel);
MPI_Type_commit(&mpi_pixel);
MPI_Send(&send_pixel, 1, mpi_pixel, 0, 50, MPI_COMM_WORLD);
There are a few different ways to send this. There are a few answers on SO that explain some of them, here, here.
If you want to continue on along the lines that you are doing, I'd create a contiguous type for a 1D array and then expand that with another contiguous type to a 2D array.
A BIG BIG WARNING I do not check for errors, you REALLY REALLY SHOULD.
I've changed you code around a bit, as I don't like using typedef's of structs. I also put the creation of the basic MPI pixel datatype into a function and added some test sending routines (of course you could extend them to pass in the pixel(s) you want to send:
/*
* Create a MPI datatype of a pixel.
*/
int
mpi_pixel_init(MPI_Datatype *mpi_pixel)
{
struct pixel_s pixel; /* instance of structure */
int i = 0; /* temporary loop indexer */
int count = 3; /* number of blocks in the struct */
int blocks[3] = {1, 1, 1}; /* set up 3 blocks */
MPI_Datatype types[3] = { /* pixel internal types */
MPI_UNSIGNED_CHAR,
MPI_UNSIGNED_CHAR,
MPI_UNSIGNED_CHAR
};
MPI_Aint dis[3] = { /* internal displacements */
offsetof(struct pixel_s, r),
offsetof(struct pixel_s, g),
offsetof(struct pixel_s, b)
};
MPI_Type_create_struct(count, blocks, dis, types, mpi_pixel);
MPI_Type_commit(mpi_pixel);
return(EXIT_SUCCESS);
}
Test sending a single pixel:
/* Send a single pixel */
int
send_pixel(int src, int dst, MPI_Datatype mpixel)
{
int rank = 0;
struct pixel_s x = {0};
MPI_Status stat;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == src) {
x.r = 255;
x.g = 128;
x.b = 128;
MPI_Send(&x, 1, mpixel, 1, 1, MPI_COMM_WORLD);
} else if (rank == dst) {
MPI_Recv(&x, 1, mpixel, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
printf("Single pixel\n");
printf("%d:\tr: %d\tg: %d\tb: %d\n", rank, x.r, x.g, x.b);
printf("----\n");
}
return(EXIT_SUCCESS);
}
Test sending a row of pixels:
/* Send a row/1D of pixels */
int
send_1d_pixels(int src, int dst, MPI_Datatype cpixel)
{
int i = 0;
int rank = 0;
struct pixel_s x[ROWS] = {0};
MPI_Status stat;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Test sending a row of pixels */
if (rank == src) {
for (i = 0; i < ROWS; ++i) {
x[i].r = i;
x[i].g = i + 128;
x[i].b = 255 - i;
}
MPI_Send(&x, 1, cpixel, 1, TAG, MPI_COMM_WORLD);
} else if (rank == dst) {
MPI_Recv(&x, 1, cpixel, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
printf("Row of pixels\n");
for (i = 0; i < ROWS; ++i) {
printf("%d:\tr: %d\tg: %d\tb: %d\n", i,
x[i].r, x[i].g, x[i].b);
}
printf("----\n");
}
return(EXIT_SUCCESS);
}
Test sending a 2D array of pixels:
/* Send an 2D array of pixels */
int
send_2d_pixels(int src, int dst, MPI_Datatype apixel)
{
int i = 0;
int j = 0;
int rank = 0;
struct pixel_s x[ROWS][COLS] = {0};
MPI_Status stat;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Test sending one pixel */
if (rank == src) {
for (i = 0; i < ROWS; ++i) {
for (j = 0; j < COLS; ++j) {
x[i][j].r = i;
x[i][j].g = j;
x[i][j].b = i*COLS + j;
}
}
MPI_Send(&x, 1, apixel, 1, TAG, MPI_COMM_WORLD);
} else if (rank == dst) {
MPI_Recv(&x, 1, apixel, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
printf("Array of pixels\n");
for (i = 0; i < ROWS; ++i) {
for (j = 0; j < COLS; ++j) {
printf("(%d,%d):\tr: %d\tg: %d\tb: %d\n", i, j,
x[i][j].r, x[i][j].g, x[i][j].b);
}
}
printf("----\n");
}
return(EXIT_SUCCESS);
}
Then later on you can use it as:
/*
* Create a 2D array of MPI pixels.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <mpi.h>
#define TAG 1
#define COLS 10
#define ROWS 10
struct pixel_s {
unsigned char r;
unsigned char g;
unsigned char b;
};
int mpi_pixel_init(MPI_Datatype *);
int send_pixel(int, int, MPI_Datatype);
int send_1d_pixels(int, int, MPI_Datatype);
int send_2d_pixels(int, int, MPI_Datatype);
int
main(int argc, char **argv)
{
MPI_Datatype mpixel; /* single pixel */
MPI_Datatype cmpixel; /* row/contiguous pixels */
MPI_Datatype ampixel; /* 2D array of pixels */
MPI_Init(&argc, &argv);
/* Create an MPI pixel datatype */
mpi_pixel_init(&mpixel);
/* Create a 1D array (contiguous) pixels */
MPI_Type_contiguous(ROWS, mpixel, &cmpixel);
MPI_Type_commit(&cmpixel);
/* Create a 2D array from a 1D array of pixels */
MPI_Type_contiguous(COLS, cmpixel, &ixel);
MPI_Type_commit(&ixel);
/* Test sending one pixel */
send_pixel(0, 1, mpixel);
/* Test sending a row of pixels */
send_1d_pixels(0, 1, cmpixel);
/* Test sending a 2D array of pixels */
send_2d_pixels(0, 1, ampixel);
/* Free up the types and finalize MPI */
MPI_Type_free(&ixel);
MPI_Type_free(&cmpixel);
MPI_Type_free(&mpixel);
MPI_Finalize();
return(EXIT_SUCCESS);
}