Recently began to study MPI technology
The task is to find the minimum value in the matrix.
Search itself parallelize for faster work.
But I can’t put my mind to how to make a parallel version from the sequential version of the program
I ask for help
#include <stdio.h>
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char *argv[])
{
int rows, cols, min, value, n;
int done = 0, numprocs, rank, i, j;
srand(time(NULL));
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(processor_name,&namelen);
while (!done) {
if (rank == 0) {
printf("Enter the height/width of the matrix\n");
scanf("%d",&rows);
scanf("%d",&cols);
startwtime = MPI_Wtime();
}
if (rows == 0 || cols == 0) {
done = 1;
} else {
int **arr = (int **) malloc(rows * sizeof(int*)); //creating 2d array
for (i = 0; i < rows; i++) {
arr[i] = (int *) malloc(cols * sizeof(int));
}
for (i = 0; i < rows; i++) { //Array filling
for (j = 0; j < cols; j++) {
arr[i][j] = rand();
}
}
for (i = 0; i < rows; i++) { // output of the array to the screen for clarity
for (j = 0; j < cols; j++) {
printf("%d\n",arr[i][j]);
}
}
min = arr[0][0];
for (i = 0; i < rows; i++) { //find min value
for (j = 0; j < cols; j++) { //
if(min > arr[i][j]){min = arr[i][j];} //need to parallize
}
}
if (rank == 0) {
endwtime = MPI_Wtime();
printf("min = %d\n", min);
printf("wall clock time = %f\n", endwtime-startwtime);
fflush( stdout );
for(i = 0; i < rows; i++) {
free(arr[i]);
}
free(arr);
}
}
}
MPI_Finalize();
return 0;
}
Sorry for my bad English
I am hope for your help
UPD: Added a similar function, but the program gives out garbage. It seems to me that this is due to the creation of an array based on pointers. And most likely I misused the functions
MPI_Scatter (&arr[0][0], rows*cols/numprocs, MPI_INT, &arr[rows][cols], rows*cols/numprocs, MPI_INT, 0, MPI_COMM_WORLD);
min = arr[0][0];
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
min = arr[i][j];
}
}
MPI_Reduce(&min, &value, rows*cols/numprocs, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
Related
So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}
You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.
My code is supposed to take in a matrix M and raise it to the power of an integer A. However, somehow, my output is always M^(2^A). For example, if I want to find a matrix in its 3rd power, I will instead receive its 8th power.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void multiply(int ** p, int pwr, int dim, int ** prod) {
int m, i, j, k;
/*if (n<pwr){*/
int pos = 0;
for (m = 0; m < pwr; m++) {
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
for (k = 0; k < dim; k++) {
pos += p[i][k] * p[k][j];
}
prod[i][j] = pos;
pos = 0;
}
}
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
p[i][j] = prod[i][j];
prod[i][j] = 0;
}
}
}
/*n=n+1;
multiply(prod, q, pwr, dim, prod);
}*/
}
int main(int argc, char * argv[]) {
FILE * fp = fopen(argv[1], "r");
int dim, pwr, i, j;
fscanf(fp, "%d", & dim);
int ** matrix;
matrix = (int ** ) malloc(dim * sizeof(int * ));
for (i = 0; i < dim; i++) {
matrix[i] = (int * ) malloc(dim * sizeof(int));
}
int ** prod;
prod = (int ** ) malloc(dim * sizeof(int * ));
for (i = 0; i < dim; i++) {
prod[i] = (int * ) malloc(dim * sizeof(int));
}
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
fscanf(fp, "%d", & matrix[i][j]);
}
}
fscanf(fp, "%d", & pwr);
if (pwr == 1) {
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
} else if (pwr >= 2) {
multiply(matrix, pwr, dim, prod);
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
}
return 0;
}
You are multiplying your matrix by itself and then store the result in the original one. Then you do it again.
So perfectly normal that it gets powered 8 times. What you need is another temporary matrix on which you store the result and keep the original matrix to multiply your result with.
Trying to pass an array pointer between MPI Processes and receiving it to dynamically allocated memory. it keeps giving a segmentation fault, which we believe is due to the way we are sending it between processes. Our code is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "mpi.h"
#include <math.h>
int main(int argc, char* argv[])
{
int my_rank, i, j, p;
MPI_Request request, request2;
MPI_Status mpi_status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0)
{
int size = 5;
int **slice;
slice = (int **)malloc(sizeof(int*)* size);
slice[0] = (int *)malloc(sizeof(int)* size*size);
for(i = 0; i< size;i++)
{
slice[i] = (*slice + size*i);
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
slice[i][j] = i*j;
}
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
printf("slice[%d][%d]: %d\n", i, j, slice[i][j]);
}
}
MPI_Send(&slice, size * size, MPI_INT, 1, 1, MPI_COMM_WORLD);
} else {
int local_size=5;
int **local_slice;
local_slice = (int **)malloc(sizeof(int*)* local_size);
local_slice[0] = (int *)malloc(sizeof(int)* local_size*local_size);
for(i = 0; i< local_size;i++)
{
local_slice[i] = (*local_slice + local_size*i);
}
MPI_Recv(&local_slice, local_size * local_size, MPI_INT, 0, 1, MPI_COMM_WORLD, &mpi_status);
for (i = 0; i < local_size; i++)
{
for (j = 0; j < local_size; j++)
{
printf("local_slice[%d][%d]: %d\n", i, j, local_slice[i][j]);
}
}
}
MPI_Finalize();
return 0;
}
Can someone explain how to properly pass this type of array between MPI Processes please?
It looks like you need to change the first argument to MPI_Send from &slice to slice[0] and do the same for MPI_Recv.
I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.
Can someone help?
Thanks.
Notes:
The program reads from a .txt file in the same directory called input.txt.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
/*void print2dAddresses(double** array2d, int rows, int cols)
{
int i;
for(i = 0; i < rows; i++)
{
int j;
for(j = 0; j < cols; j++)
{
printf("%d ", &(array2d[i][j]));
}
printf("\n");
}
printf("------------------------------------");
}*/
double** newMatrix(int rows, int cols)
{
double *data = (double*) malloc(rows * cols * sizeof(double));
double **array= (double **)malloc(rows * sizeof(double*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void freeMatrix(double** mat)
{
free(mat[0]);
free(mat);
}
double** new2dArray(int nrows, int ncols)
{
int i;
double** array2d;
array2d = (double**) malloc(nrows * sizeof(double*));
for(i = 0; i < nrows; i++)
{
array2d[i] = (double*) malloc(ncols * sizeof(double));
}
return array2d;
}
double* new1dArray(int size)
{
return (double*) malloc(size * sizeof(double));
}
void free2dArray(double** array2d, int nrows)
{
int i;
for(i = 0; i < nrows; i++)
{
free(array2d[i]);
}
free(array2d);
}
void print2dArray(double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
printf("%lf ", array2d[i][j]);
}
printf("\n");
}
printf("----------------------\n");
}
void print1dArray(double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%lf\n", array[i]);
}
printf("----------------------\n");
}
void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
fscanf(fp, "%lf", &(array2d[i][j]));
}
}
}
void read1dArray(FILE* fp, double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
fscanf(fp, "%lf", &(array[i]));
}
}
void readSymbols(char* symbols, int size, FILE* fp)
{
int i;
for(i = 0; i < size; i++)
{
char c = '\n';
while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
fscanf(fp, "%c", &c);
symbols[i] = c;
}
}
void printSolution(char* symbols, double* x, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%c = %lf\n", symbols[i], x[i]);
}
}
double* copy_1d_array(double* original, int size)
{
double* copy_version;
int i;
copy_version = (double*) malloc(size * sizeof(double));
for(i = 0; i < size; i++)
{
copy_version[i] = original[i];
}
return copy_version;
}
int main(int argc, char** argv)
{
int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
char *symbols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0)
{
FILE* fp;
fp = fopen("input.txt", "r");
fscanf(fp, "%d", &msize);
A = newMatrix(msize, msize);
b = new1dArray(msize);
x = new1dArray(msize);
symbols = (char*) malloc(msize * sizeof(char));
read2dArray(fp, A, msize, msize);
read1dArray(fp, b, msize);
readSymbols(symbols, msize, fp);
fclose(fp);
/*print2dArray(A, msize, msize);
print1dArray(b, msize);*/
}
MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < (msize - 1); i++)
{
int maxIndex;
double maxCoef, tmp, r;
/*finding max row*/
if(rank == 0)
{
maxIndex = i;
maxCoef = fabs(A[i][i]);
for(j = i + 1; j < msize; j++)
{
if(fabs(A[j][i]) > maxCoef)
{
maxCoef = A[j][i];
maxIndex = j;
}
}
/*swapping the current row with the max row*/
for(j = 0; j < msize; j++)
{
tmp = A[i][j];
A[i][j] = A[maxIndex][j];
A[maxIndex][j] = tmp;
}
tmp = b[i];
b[i] = b[maxIndex];
b[maxIndex] = tmp;
/*elimination*/
/*for(j = i + 1; j < msize; j++)
{
double r = A[j][i] / A[i][i];
subtracting r * row i from row j
for(k = i; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}*/
/*parallel elimination*/
startingRow = i + 1;
neededProcesses = p;
remainingRows = msize - startingRow;
if(remainingRows < neededProcesses)
{
neededProcesses = remainingRows;
}
rowsPerProcess = remainingRows / neededProcesses;
remainder = remainingRows % neededProcesses;
}
MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0)
{
currentRow = copy_1d_array(A[startingRow-1], msize);
currentB = b[startingRow-1];
}
else
{
currentRow = new1dArray(msize);
}
MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(¤tB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
receivedB = new1dArray(remainingRows);
}
smallA = newMatrix(rowsPerProcess, msize);
smallB = new1dArray(rowsPerProcess);
MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(j = 0; j < rowsPerProcess; j++)
{
r = smallA[j][startingRow-1] / currentRow[startingRow-1];
for(k = 0; k < msize; k++)
{
smallA[j][k] -= r * currentRow[k];
}
smallB[j] -= r * currentB;
}
MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
freeMatrix(smallA);
free(smallB);
if(rank == 0)
{
for(j = 0; j < remainingRows; j++)
{
for(k = 0; k < msize; k++)
{
A[j+startingRow][k] = receivedA[j][k];
}
b[j+startingRow] = receivedB[j];
}
free(currentRow);
freeMatrix(receivedA);
free(receivedB);
}
if(rank == 0)
{
if(remainder > 0)
{
for(j = (msize - remainder); j < msize; j++)
{
r = A[j][i] / A[i][i];
for(k = 0; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}
}
}
}
if(rank == 0)
{
/*backward substitution*/
for(i = msize - 1; i >= 0; i--)
{
x[i] = b[i];
for(j = msize - 1; j > i; j--)
{
x[i] -= A[i][j] * x[j];
}
x[i] /= A[i][i];
}
printf("solution = \n");
//print1dArray(x, msize);
printSolution(symbols, x, msize);
freeMatrix(A);
free(b);
free(x);
free(symbols);
}
MPI_Finalize();
return 0;
}
Input File:
3
1 1 1
1 1 3
2 1 4
4
9
12
x
y
z
It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
recievedAHead = &(receivedA[0][0]);
receivedB = new1dArray(remainingRows);
}
else {
recievedAHead = NULL;
}
and use recievedAHead in the MPI_Gather call.
I consider this to be an extremely weird issue:
I have this code:
it is supposed to receive a 2d matrix of size chunk and width.
Matrix is allocated using :
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
So it is a continuos memory block.
I have the following code :
MPI_Status st;
int worker;
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
/* for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}*/
}
If code is like this, everything stops and works well.
If I uncomment the region:
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
the thread running this code does not stop, I cannot find an logical explanation for this. Perhaps someone can see the error or the issue. Thank you!
recmat allocation and chunk calculation :
int **recmat;
recmat = alloc2d(chunk,width);
int chunk;
chunk = height / size;
Sorry, this is too long to fit in a comment:
The code you've posted is fine; for instance, putting enough code around it to make it run produces the correct results (below). So the issue is not where you think it is.
If you see code locking up in places you don't think it ought to, this frequently points to weird memory errors or something else going on. You're best of just running it through a debugger, or through something like valgrind to check for memory problems.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
int main(int argc, char **argv) {
int rank, size;
const int height=10, width=10;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int **recmat;
int chunk;
chunk = height / size;
if (chunk*size != height) {
fprintf(stderr, "%s: number of ranks %d does not divide size %d\n",
argv[0], size, height);
MPI_Finalize();
exit(1);
}
if (rank == 0) {
int **recmat = alloc2d(chunk,width);
int **mat = alloc2d(height,width);
int worker;
int i,j,k;
MPI_Status st;
/* deal with my own submatrix */
for (k=0; k<chunk; k++) {
for (j=0; j<width; j++) {
mat[k][j] = 0;
}
}
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
}
free(&(recmat[0][0]));
free(recmat);
printf("Rank 0: mat is \n");
for (int i=0; i<height; i++) {
for (int j=0; j<width; j++) {
printf("%2d ", mat[i][j]);
}
printf("\n");
}
free(&(mat[0][0]));
free(mat);
} else {
int **sendmat = alloc2d(chunk,width);
for (int i=0; i<chunk; i++)
for (int j=0; j<width; j++)
sendmat[i][j] = rank;
MPI_Send(&(sendmat[0][0]), chunk*width, MPI_INT, 0, 1, MPI_COMM_WORLD);
free(&(sendmat[0][0]));
free(sendmat);
}
MPI_Finalize();
return 0;
}
There were so many errors and bugs in my code that it's not even worth mentioning, I'm sorry for this useless question ...
This is also a longer comment. You can prevent double copy of the received data if you first probe for the message and then use the rank from the status to directly receive the content of the message into the big matrix:
for(i = 1; i < size; i++) {
MPI_Probe(MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
MPI_Recv(&(mat[worker*chunk][0]), chunk*width, MPI_INT,
worker, 1, MPI_COMM_WORLD, &st);
}
Less code and should work faster.