mpi matrix multiplication to run with a different number of processors - c

So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}

You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.

Related

Segmentation fault in matrix multiplication using mpi

I am trying to write an mpi program for multiplication of 2 matrix . If I give the size of the matrix lower that 800 the code works but when I give it higher I am getting segmentation fault and I am not able to figure out why . I am new to MPI so still trying to understand everything. If possible please help.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define N 1000
int main(int argc, char* argv[]) {
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double a[N][N], b[N][N], c[N][N];
int i, j, k;
// Initialize the matrices with random values
if (rank == 0) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i][j] = (double)rand() / RAND_MAX;
b[i][j] = (double)rand() / RAND_MAX;
}
}
}
// Broadcast the matrices to all ranks
MPI_Bcast(a, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Each rank calculates a portion of the output matrix
int rows_per_rank = N / size;
int start_row = rows_per_rank * rank;
int end_row = start_row + rows_per_rank;
for (i = start_row; i < end_row; i++) {
for (j = 0; j < N; j++) {
c[i][j] = 0;
for (k = 0; k < N; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
// Gather the output matrix from all ranks
double* c_buffer = (double*) malloc(N*N*sizeof(double));
MPI_Gather(c, rows_per_rank*N, MPI_DOUBLE, c_buffer, rows_per_rank*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Print the output matrix
if (rank == 0) {
printf("Output matrix C:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%lf ", c_buffer[i*N + j]);
}
printf("\n");
}
}
free(c_buffer);
MPI_Finalize();
return 0;
}
this line
double a[N][N], b[N][N], c[N][N];
with N = 1000 requires 24mb of stack space. Thats almost certainly larger than whats available. Either allocate them statically (place the kw static before them) or dynamically on the heap

Find min value in 2d array with MPI

Recently began to study MPI technology
The task is to find the minimum value in the matrix.
Search itself parallelize for faster work.
But I can’t put my mind to how to make a parallel version from the sequential version of the program
I ask for help
#include <stdio.h>
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char *argv[])
{
int rows, cols, min, value, n;
int done = 0, numprocs, rank, i, j;
srand(time(NULL));
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(processor_name,&namelen);
while (!done) {
if (rank == 0) {
printf("Enter the height/width of the matrix\n");
scanf("%d",&rows);
scanf("%d",&cols);
startwtime = MPI_Wtime();
}
if (rows == 0 || cols == 0) {
done = 1;
} else {
int **arr = (int **) malloc(rows * sizeof(int*)); //creating 2d array
for (i = 0; i < rows; i++) {
arr[i] = (int *) malloc(cols * sizeof(int));
}
for (i = 0; i < rows; i++) { //Array filling
for (j = 0; j < cols; j++) {
arr[i][j] = rand();
}
}
for (i = 0; i < rows; i++) { // output of the array to the screen for clarity
for (j = 0; j < cols; j++) {
printf("%d\n",arr[i][j]);
}
}
min = arr[0][0];
for (i = 0; i < rows; i++) { //find min value
for (j = 0; j < cols; j++) { //
if(min > arr[i][j]){min = arr[i][j];} //need to parallize
}
}
if (rank == 0) {
endwtime = MPI_Wtime();
printf("min = %d\n", min);
printf("wall clock time = %f\n", endwtime-startwtime);
fflush( stdout );
for(i = 0; i < rows; i++) {
free(arr[i]);
}
free(arr);
}
}
}
MPI_Finalize();
return 0;
}
Sorry for my bad English
I am hope for your help
UPD: Added a similar function, but the program gives out garbage. It seems to me that this is due to the creation of an array based on pointers. And most likely I misused the functions
MPI_Scatter (&arr[0][0], rows*cols/numprocs, MPI_INT, &arr[rows][cols], rows*cols/numprocs, MPI_INT, 0, MPI_COMM_WORLD);
min = arr[0][0];
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
min = arr[i][j];
}
}
MPI_Reduce(&min, &value, rows*cols/numprocs, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);

C MPI Matrix multiplication error

I'm doing some matrix multiplication in C with MPI.
It works fine until I try to go above 15x15 and I cant figure out why...
From what I've noticed the error seems to mostly happen after I see a "Process # sending..." print, which happens when the slave processes are sending their data back to the master process.
Error message:
[LEC-B125N4J:12183] *** Process received signal ***
[LEC-B125N4J:12183] Signal: Segmentation fault (11)
[LEC-B125N4J:12183] Signal code: Address not mapped (1)
Code:
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <mpi.h>
//#define DIM 1000
#define DIM 15
/*
* Statically allocate the matrices to make the rows
* sequentially placed in memory. (This eases the task
* of distributing the problem among the slaves.)
* Make the matrices global to allow for larger
* dimensions.
*/
int A[DIM][DIM];
int B[DIM][DIM];
int C[DIM][DIM];
int D[DIM][DIM];
int correct_result(int A[DIM][DIM], int B[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; ++i)
for (j=0; j<DIM; ++j)
if (A[i][j] != B[i][j])
return 0;
return 1;
}
int main (argc, argv)
int argc;
char *argv[];
{
int rank=0, size;
int i, j, k;
int time1;
volatile int tmp;
int iOffset = 0;
int iProblemSize = 0;
MPI_Init(&argc, &argv); /* starts MPI */
MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size(MPI_COMM_WORLD, &size); /* get number of processes */
iProblemSize = (DIM / (size - 1));
if(rank == 0) { //Master
printf("Number of processes: %d (1 Master and %d slaves) - DIM: %d\n", size, (size - 1), DIM);
//Fill matrices A and B with random numbers
srand(timer(NULL));
for(i=0; i<DIM; ++i)
{
for (j=0; j<DIM; ++j)
{
A[i][j] = random() % 100 - 50;
B[i][j] = random() % 100 - 50;
C[i][j] = 0;
}
}
}
MPI_Bcast(B, (DIM * DIM), MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0) { //Master
/* Calculate the true answer */
for (i=0; i<DIM; ++i)
for (k=0; k<DIM; ++k)
for (j=0; j<DIM; ++j)
D[i][j] += A[i][k] * B[k][j];
time1 = timer();
//Send pieces of A to the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Send(A[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD);
iOffset += iProblemSize;
/*for(j = 0; j < iProblemSize; j++) {
MPI_Send(A[iOffset + j], DIM, MPI_INT, i, 0, MPI_COMM_WORLD);
}
iOffset += iProblemSize;*/
}
//Take care of leftovers if needed (if uneven number of slaves)
if((size - 1) % DIM != 0) {
for(i = iOffset; i < DIM; i++) {
for(k = 0; k < DIM; k++) {
for(j = 0; j < DIM; j++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
//Gather the results from the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Recv(C[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD, NULL);
iOffset += iProblemSize;
printf("Received from %d!\n", i);
}
printf("All received!\n");
/* Error checking */
time1 = timer() - time1;
printf ("Your calculation is %scorrect.\n", correct_result(C,D) ? "" : "not ");
printf ("Total runtime: %f seconds\n", time1/1000000.0);
}
else { //Slaves
MPI_Recv(A, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
/*for(j = 0; j < iProblemSize; j++) {
MPI_Recv(A[j], DIM, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
}*/
//Do the calculations for C
//printf("Process %d doing calculations...\n", rank);
for (i = 0; i < (iProblemSize * DIM); ++i) {
for (k = 0; k < DIM; ++k) {
for (j = 0; j < DIM; ++j) {
C[i][j] += A[i][k] * B[k][j];
}
//printf("\n");
}
}
//printf("Process %d finished doing the calculations!\n", rank);
//Send the result to the master
printf("Process %d sending...\n", rank);
MPI_Send(C, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD);
printf("Process %d finished sending!\n", rank);
}
MPI_Finalize();
return 0;
}
OK I finally fixed the error.
The problem was in the loop when the slaves are doing the calculations...
for (i = 0; i < (iProblemSize * DIM); ++i) {
should be
for (i = 0; i < iProblemSize; ++i) {
:)

MPI_Gather segmentation fault

I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.
Can someone help?
Thanks.
Notes:
The program reads from a .txt file in the same directory called input.txt.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
/*void print2dAddresses(double** array2d, int rows, int cols)
{
int i;
for(i = 0; i < rows; i++)
{
int j;
for(j = 0; j < cols; j++)
{
printf("%d ", &(array2d[i][j]));
}
printf("\n");
}
printf("------------------------------------");
}*/
double** newMatrix(int rows, int cols)
{
double *data = (double*) malloc(rows * cols * sizeof(double));
double **array= (double **)malloc(rows * sizeof(double*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void freeMatrix(double** mat)
{
free(mat[0]);
free(mat);
}
double** new2dArray(int nrows, int ncols)
{
int i;
double** array2d;
array2d = (double**) malloc(nrows * sizeof(double*));
for(i = 0; i < nrows; i++)
{
array2d[i] = (double*) malloc(ncols * sizeof(double));
}
return array2d;
}
double* new1dArray(int size)
{
return (double*) malloc(size * sizeof(double));
}
void free2dArray(double** array2d, int nrows)
{
int i;
for(i = 0; i < nrows; i++)
{
free(array2d[i]);
}
free(array2d);
}
void print2dArray(double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
printf("%lf ", array2d[i][j]);
}
printf("\n");
}
printf("----------------------\n");
}
void print1dArray(double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%lf\n", array[i]);
}
printf("----------------------\n");
}
void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
fscanf(fp, "%lf", &(array2d[i][j]));
}
}
}
void read1dArray(FILE* fp, double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
fscanf(fp, "%lf", &(array[i]));
}
}
void readSymbols(char* symbols, int size, FILE* fp)
{
int i;
for(i = 0; i < size; i++)
{
char c = '\n';
while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
fscanf(fp, "%c", &c);
symbols[i] = c;
}
}
void printSolution(char* symbols, double* x, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%c = %lf\n", symbols[i], x[i]);
}
}
double* copy_1d_array(double* original, int size)
{
double* copy_version;
int i;
copy_version = (double*) malloc(size * sizeof(double));
for(i = 0; i < size; i++)
{
copy_version[i] = original[i];
}
return copy_version;
}
int main(int argc, char** argv)
{
int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
char *symbols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0)
{
FILE* fp;
fp = fopen("input.txt", "r");
fscanf(fp, "%d", &msize);
A = newMatrix(msize, msize);
b = new1dArray(msize);
x = new1dArray(msize);
symbols = (char*) malloc(msize * sizeof(char));
read2dArray(fp, A, msize, msize);
read1dArray(fp, b, msize);
readSymbols(symbols, msize, fp);
fclose(fp);
/*print2dArray(A, msize, msize);
print1dArray(b, msize);*/
}
MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < (msize - 1); i++)
{
int maxIndex;
double maxCoef, tmp, r;
/*finding max row*/
if(rank == 0)
{
maxIndex = i;
maxCoef = fabs(A[i][i]);
for(j = i + 1; j < msize; j++)
{
if(fabs(A[j][i]) > maxCoef)
{
maxCoef = A[j][i];
maxIndex = j;
}
}
/*swapping the current row with the max row*/
for(j = 0; j < msize; j++)
{
tmp = A[i][j];
A[i][j] = A[maxIndex][j];
A[maxIndex][j] = tmp;
}
tmp = b[i];
b[i] = b[maxIndex];
b[maxIndex] = tmp;
/*elimination*/
/*for(j = i + 1; j < msize; j++)
{
double r = A[j][i] / A[i][i];
subtracting r * row i from row j
for(k = i; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}*/
/*parallel elimination*/
startingRow = i + 1;
neededProcesses = p;
remainingRows = msize - startingRow;
if(remainingRows < neededProcesses)
{
neededProcesses = remainingRows;
}
rowsPerProcess = remainingRows / neededProcesses;
remainder = remainingRows % neededProcesses;
}
MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0)
{
currentRow = copy_1d_array(A[startingRow-1], msize);
currentB = b[startingRow-1];
}
else
{
currentRow = new1dArray(msize);
}
MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&currentB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
receivedB = new1dArray(remainingRows);
}
smallA = newMatrix(rowsPerProcess, msize);
smallB = new1dArray(rowsPerProcess);
MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(j = 0; j < rowsPerProcess; j++)
{
r = smallA[j][startingRow-1] / currentRow[startingRow-1];
for(k = 0; k < msize; k++)
{
smallA[j][k] -= r * currentRow[k];
}
smallB[j] -= r * currentB;
}
MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
freeMatrix(smallA);
free(smallB);
if(rank == 0)
{
for(j = 0; j < remainingRows; j++)
{
for(k = 0; k < msize; k++)
{
A[j+startingRow][k] = receivedA[j][k];
}
b[j+startingRow] = receivedB[j];
}
free(currentRow);
freeMatrix(receivedA);
free(receivedB);
}
if(rank == 0)
{
if(remainder > 0)
{
for(j = (msize - remainder); j < msize; j++)
{
r = A[j][i] / A[i][i];
for(k = 0; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}
}
}
}
if(rank == 0)
{
/*backward substitution*/
for(i = msize - 1; i >= 0; i--)
{
x[i] = b[i];
for(j = msize - 1; j > i; j--)
{
x[i] -= A[i][j] * x[j];
}
x[i] /= A[i][i];
}
printf("solution = \n");
//print1dArray(x, msize);
printSolution(symbols, x, msize);
freeMatrix(A);
free(b);
free(x);
free(symbols);
}
MPI_Finalize();
return 0;
}
Input File:
3
1 1 1
1 1 3
2 1 4
4
9
12
x
y
z
It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
recievedAHead = &(receivedA[0][0]);
receivedB = new1dArray(remainingRows);
}
else {
recievedAHead = NULL;
}
and use recievedAHead in the MPI_Gather call.

OpenMPI array of blocking receives not working properly(In some cases program doesn't stop)

I consider this to be an extremely weird issue:
I have this code:
it is supposed to receive a 2d matrix of size chunk and width.
Matrix is allocated using :
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
So it is a continuos memory block.
I have the following code :
MPI_Status st;
int worker;
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
/* for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}*/
}
If code is like this, everything stops and works well.
If I uncomment the region:
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
the thread running this code does not stop, I cannot find an logical explanation for this. Perhaps someone can see the error or the issue. Thank you!
recmat allocation and chunk calculation :
int **recmat;
recmat = alloc2d(chunk,width);
int chunk;
chunk = height / size;
Sorry, this is too long to fit in a comment:
The code you've posted is fine; for instance, putting enough code around it to make it run produces the correct results (below). So the issue is not where you think it is.
If you see code locking up in places you don't think it ought to, this frequently points to weird memory errors or something else going on. You're best of just running it through a debugger, or through something like valgrind to check for memory problems.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
int main(int argc, char **argv) {
int rank, size;
const int height=10, width=10;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int **recmat;
int chunk;
chunk = height / size;
if (chunk*size != height) {
fprintf(stderr, "%s: number of ranks %d does not divide size %d\n",
argv[0], size, height);
MPI_Finalize();
exit(1);
}
if (rank == 0) {
int **recmat = alloc2d(chunk,width);
int **mat = alloc2d(height,width);
int worker;
int i,j,k;
MPI_Status st;
/* deal with my own submatrix */
for (k=0; k<chunk; k++) {
for (j=0; j<width; j++) {
mat[k][j] = 0;
}
}
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
}
free(&(recmat[0][0]));
free(recmat);
printf("Rank 0: mat is \n");
for (int i=0; i<height; i++) {
for (int j=0; j<width; j++) {
printf("%2d ", mat[i][j]);
}
printf("\n");
}
free(&(mat[0][0]));
free(mat);
} else {
int **sendmat = alloc2d(chunk,width);
for (int i=0; i<chunk; i++)
for (int j=0; j<width; j++)
sendmat[i][j] = rank;
MPI_Send(&(sendmat[0][0]), chunk*width, MPI_INT, 0, 1, MPI_COMM_WORLD);
free(&(sendmat[0][0]));
free(sendmat);
}
MPI_Finalize();
return 0;
}
There were so many errors and bugs in my code that it's not even worth mentioning, I'm sorry for this useless question ...
This is also a longer comment. You can prevent double copy of the received data if you first probe for the message and then use the rank from the status to directly receive the content of the message into the big matrix:
for(i = 1; i < size; i++) {
MPI_Probe(MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
MPI_Recv(&(mat[worker*chunk][0]), chunk*width, MPI_INT,
worker, 1, MPI_COMM_WORLD, &st);
}
Less code and should work faster.

Resources