Parallel, Branch & Bound Traveling Salesman via MPI

Parallel, Branch & Bound Traveling Salesman via MPI - c

I'm trying to understand http://wyattgorman.com/?p=25. So far I have made not much more than clang-format:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 20
int m_row = SIZE, m_column = SIZE, zed = 30, matrix[SIZE][SIZE], visited[SIZE], best_path[SIZE];
int best_cost = 9999999, size = SIZE;
void dfs(int city, int visited_in[], int path_in[], int path_i_in, int cost_in) {
if (cost_in < best_cost) {
int* visited = calloc(sizeof(int), size + 1);
int* path = calloc(sizeof(int), size + 1);
int path_i = path_i_in, cost = cost_in, i;
for (i = 0; i < size; i++) {
visited[i] = visited_in[i];
path[i] = path_in[i];
}
visited[city] = 1;
path[path_i] = city;
path_i++;
int leaf = 0;
for (i = 0; i < size; i++) {
if (visited[i] == 0) {
leaf++;
dfs(i, visited.get(), path.get(), path_i, cost + matrix[city][i]);
}
}
if (leaf == 0) {
cost += matrix[city][0];
path[path_i] = 0;
path_i++;
if (cost < best_cost) {
// printf("Found new best cost: %i\n", cost);
best_cost = cost;
for (i = 0; i < size; i++)
best_path[i] = path[i];
}
}
free(visited);
free(path);
}
}
int main(int argc, char *argv[]) {
int rank, p;
// , source, dest;
// int tag = 0;
MPI_Status status;
MPI_Init(0, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
srand(time(NULL));
if (rank == 0) {
int i, j;
for (i = 0; i < m_row; i++)
for (j = 0; j < m_column; j++)
matrix[i][j] = 0;
for (i = 0; i < m_row; i++) {
for (j = 0; j < i; j++) {
if (i != j) {
int temp = (rand() % zed) + 1;
matrix[i][j] = temp;
matrix[j][i] = temp;
}
}
}
for (i = 1; i < p; i++)
MPI_Send(&matrix[0][0], size * size, MPI_LONG, i, 0, MPI_COMM_WORLD);
printf("Matrix, %ix %i, Max Int : %i\n", m_row, m_column, zed);
for (i = 0; i < m_row; i++) {
for (j = 0; j < m_column; j++)
printf("%i\t", matrix[i][j]);
printf("\n");
fflush(NULL);
}
printf("\n");
int winner;
int node_array[p - 1];
int node_array_i = 0;
for (i = 0; i < p - 1; i++)
node_array[i] = i + 1;
for (i = 1; i < size; i++) {
int temp_best_cost, node;
node = node_array[node_array_i];
if (node_array_i < p - 2)
node_array_i++;
else
node_array_i = 0;
int* temp_best_path = calloc(sizeof(int), size + 1);
MPI_Recv(&temp_best_cost, 1, MPI_INT, node, 0, MPI_COMM_WORLD,&status);
MPI_Recv(&temp_best_path[0], size + 1, MPI_INT, node, 0, MPI_COMM_WORLD, &status);
if (temp_best_cost < best_cost) {
winner = node;
best_cost = temp_best_cost;
for (j = 0; j < size + 1; j++)
best_path[j] = temp_best_path[j];
}
MPI_Send(&best_cost, 1, MPI_INT, node, 0, MPI_COMM_WORLD);
}
printf("Best Path Found by node % i :\n", winner);
printf("% i", best_path[0]);
for (i = 1; i < size + 1; i++)
printf(" –> % i", best_path[i]);
printf("\nBest Cost Found : % i\n", best_cost);
} else {
MPI_Recv(&(matrix[0][0]), m_row * m_column, MPI_LONG, 0, 0, MPI_COMM_WORLD, &status);
int i;
for (i = rank; i < size; i += (p - 1)) {
int* visited = calloc(sizeof(int), size + 1);
int* path = calloc(sizeof(int), size + 1);
int cost = matrix[0][i], path_i = 1;
path[0] = 0;
visited[0] = 1;
dfs(i, visited.get(), path.get(), path_i, cost);
MPI_Send(&best_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&best_path[0], size + 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Recv(&best_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
free(visited);
free(path);
}
}
MPI_Finalize();
return 0;
}
To my eyes it looks like visited[] from
int m_row = SIZE, m_column = SIZE, zed = 30, matrix[SIZE][SIZE], visited[SIZE], best_path[SIZE];
gets overwritten by
int* visited = calloc(sizeof(int), size + 1);
So, is that ok?
Also, in rank, at
MPI_Recv(&(matrix[0][0]), m_row * m_column, MPI_LONG, 0, 0, MPI_COMM_WORLD, &status);
destination matrix looks like the same as origin matrix (does that make sense?), in rank 0, at:
MPI_Send(&matrix[0][0], size * size, MPI_LONG, i, 0, MPI_COMM_WORLD);
Since matrix is shared by all ranks, right?
int m_row = SIZE, m_column = SIZE, zed = 30, matrix[SIZE][SIZE], visited[SIZE], best_path[SIZE];
Also, I'll guess best_path[] (there on the begining) should be best_path[SIZE+1] instead of best_path[SIZE]. Because loop goes to size+1, right?
for (j = 0; j < size + 1; j++)
best_path[j] = temp_best_path[j];

The first (global) visited variable is "shadowed" by the local one used with calloc(). This isn't necessarily wrong, but it is poor coding style.
Yes, matrix is shared by all ranks (for some definition of shared).
As for the writing past the end of best_path, you're correct, that code is broken (it has undefined behavior).

Related

matrix multiply with mpi

I have a problem with the result of my m1 function when I check that some of the array between rank 0 and the last rank is empty and unfortunately none of the workarounds help to solve this problem.
Can anyone help me with this?
Where is the problem in this code?
And how can it be solved?
This is the code:
this is the code:
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#define N 1000
#define M 1000 / 2
int A[N][N], B[N][N], C[N][N];
int m1[M][M], m2[M][M], m3[M][M], m4[M][M], m5[M][M], m6[M][M], m7[M][M];
int A11[M][M], A12[M][M], A21[M][M], A22[M][M], B11[M][M], B12[M][M], B21[M][M], B22[M][M];
int C11[M][M], C12[M][M], C21[M][M], C22[M][M];
int rank, size, start_row, end_row;
void multiplym1(int mySize, int AA[M][M], int BB[M][M], int CC[M][M], int DD[M][M], int resfinal[M][M], int mystart_row, int myend_row)
{
int result1[mySize][mySize], result2[mySize][mySize];
for (int i = mystart_row; i < myend_row; i++)
{
for (int j = 0; j < mySize; j++)
{
result1[i][j] = AA[i][j] + BB[i][j];
result2[i][j] = CC[i][j] + DD[i][j];
}
}
for (int i = mystart_row; i < myend_row; i++)
{
for (int j = 0; j < mySize; j++)
{
resfinal[i][j] = 0;
for (int k = 0; k < mySize; k++)
{
resfinal[i][j] += (result1[i][k] * result2[k][j]);
}
}
}
}
int main(int argc, char const *argv[])
{
srand(time(NULL));
printf("\n------------------------* Initializing matrices *----------------------\n");
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
start_row = rank * (N / size);
if (rank + 1 == size)
{
end_row = N;
}
else
{
end_row = (rank + 1) * (N / size);
}
// printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
for (int i = start_row; i < end_row; i++)
{
for (int j = 0; j < N; j++)
{
A[i][j] = rand() % 50;
B[i][j] = rand() % 20;
C[i][j] = 0;
// printf("#%d: A[%d][%d] = %d\n", rank, i, j, A[i][j]);
}
}
// printf("#%d: Done\n\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
// printf("#%d: start \n\n", rank);
start_row = 0;
end_row = 0;
start_row = rank * (M / size);
if (rank + 1 == size)
{
end_row = M;
}
else
{
end_row = (rank + 1) * (M / size);
}
// printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
for (int i = start_row; i < end_row; i++)
{
for (int j = 0; j < M; j++)
{
A11[i][j] = A[i][j];
A12[i][j] = A[i][j + M];
A21[i][j] = A[i + M][j];
A22[i][j] = A[i + M][j + M];
B11[i][j] = B[i][j];
B12[i][j] = B[i][j + M];
B21[i][j] = B[i + M][j];
B22[i][j] = B[i + M][j + M];
}
}
// printf("#%d: Done\n\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
// printf("#%d: start For M1\n\n", rank);
start_row = 0;
end_row = 0;
start_row = rank * (M / size);
if (rank + 1 == size)
{
end_row = M;
}
else
{
end_row = (rank + 1) * (M / size);
}
printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
multiplym1(M, A11, A22, B11, B22, m1, start_row, end_row);
MPI_Barrier(MPI_COMM_WORLD);
int *counts = malloc(size * sizeof(int));
int *displs = malloc(size * sizeof(int));
for (int i = 0; i < size; i++)
{
counts[i] = (M / size) * M;
displs[i] = i * (M / size) * M;
}
counts[size - 1] = ((M / size) + (M % size)) * M;
MPI_Gatherv(&m1[start_row][0], counts[rank], MPI_INT, m1, counts, displs, MPI_INT, 0, MPI_COMM_WORLD);
printf("#%d: M1 DONE!!\n", rank);
if (rank == 0)
{
for (int i = 0; i < M; i += 49)
{
for (int j = 0; j < M; j += 100)
{
printf("#%d: m1[%d][%d] = %d\n", rank, i, j, m1[i][j]);
}
}
}
MPI_Finalize();
return 0;
}```
`

MPI Parallel summation of two vectors

It is necessary to sum two vectors on separate processes in parts. I find a portion of vectors that needs to be sent out and send it using MPI_Send. Next, I summarize the parts of the vectors on the processes, I succeeded.
It is not possible to collect the resulting array again on the zero process. As far as I understand, you need to use MPI_Isend.
#include <iostream>
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
void Print(int *vector, int n, int start) {
for (int i = start; i < n; i++)
printf("%4d", vector[i]);
}
void RandomFill(int *vector, int n) {
for (int i = 0; i < n; i++)
vector[i] = rand() % 100;
}
//C = A + B
int main(int argc, char* argv[])
{
int ProcNum, ProcRank;
MPI_Status status;
MPI_Request request;
int *A, *B, *C, *buf;
int nResult = 0;
int n = 5 + rand() % (25 - 5 + 1);
bool yes = true;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
int part_send = 0;
A = new int[n];
B = new int[n];
C = new int[n];
RandomFill(A, n);
RandomFill(B, n);
printf("\nProcess number: %d\t n = %d", ProcRank, n);
printf("\nVector A:\n");
Print(A, n, 0);
printf("\nVector B:\n");
Print(B, n, 0);
int rest = n % (ProcNum - 1);
int *rest_array = new int[rest];
printf("\n\nRest = %d", rest);
if (rest != 0)
{
part_send = (n - rest) / (ProcNum - 1);
nResult = ProcNum;
int j = 0;
for (int i = n - rest; i < n; i++) {
rest_array[j] = A[i] + B[i];
j++;
}
}
else
{
nResult = ProcNum - 1;
part_send = n / (ProcNum - 1);
}
for (int i = 1; i < ProcNum; i++)
{
int index = (i - 1) * part_send;
MPI_Send(&A[index], part_send, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&B[index], part_send, MPI_INT, i, 1, MPI_COMM_WORLD);
}
printf("\n\n");
buf = new int[part_send];
for (int i = 1; i < ProcNum; i++) {
MPI_Irecv(&buf, part_send, MPI_INT, MPI_ANY_SOURCE, 3, MPI_COMM_WORLD, &request);
if (MPI_Wait(&request, &status) == MPI_SUCCESS) {
for (int j = 0; j < part_send; j++)
C[(part_send * i) + j] = buf[j];
printf("Result:");
Print(C, n, 0);
}
}
}
else
{
printf("\n Process number: %d\n", ProcRank);
int nRecv = 0;
MPI_Probe(0, 1, MPI_COMM_WORLD, &status);
MPI_Get_count(&status, MPI_INT, &nRecv);
int *recvArr1 = new int[nRecv];
int *recvArr2 = new int[nRecv];
int *recvArrSum = new int[nRecv];
MPI_Recv(recvArr1, nRecv, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(recvArr2, nRecv, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
for (int i = 0; i < nRecv; i++) {
recvArrSum[i] = recvArr1[i] + recvArr2[i];
}
printf("recvArrSum:\n");
Print(recvArrSum, nRecv, 0);
MPI_Isend(&recvArrSum, nRecv, MPI_INT, 0, 3, MPI_COMM_WORLD, &request);
}
MPI_Finalize();
return 0;
}

mpi matrix multiplication to run with a different number of processors

So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}

You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.

C MPI Matrix multiplication error

I'm doing some matrix multiplication in C with MPI.
It works fine until I try to go above 15x15 and I cant figure out why...
From what I've noticed the error seems to mostly happen after I see a "Process # sending..." print, which happens when the slave processes are sending their data back to the master process.
Error message:
[LEC-B125N4J:12183] *** Process received signal ***
[LEC-B125N4J:12183] Signal: Segmentation fault (11)
[LEC-B125N4J:12183] Signal code: Address not mapped (1)
Code:
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <mpi.h>
//#define DIM 1000
#define DIM 15
/*
* Statically allocate the matrices to make the rows
* sequentially placed in memory. (This eases the task
* of distributing the problem among the slaves.)
* Make the matrices global to allow for larger
* dimensions.
*/
int A[DIM][DIM];
int B[DIM][DIM];
int C[DIM][DIM];
int D[DIM][DIM];
int correct_result(int A[DIM][DIM], int B[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; ++i)
for (j=0; j<DIM; ++j)
if (A[i][j] != B[i][j])
return 0;
return 1;
}
int main (argc, argv)
int argc;
char *argv[];
{
int rank=0, size;
int i, j, k;
int time1;
volatile int tmp;
int iOffset = 0;
int iProblemSize = 0;
MPI_Init(&argc, &argv); /* starts MPI */
MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size(MPI_COMM_WORLD, &size); /* get number of processes */
iProblemSize = (DIM / (size - 1));
if(rank == 0) { //Master
printf("Number of processes: %d (1 Master and %d slaves) - DIM: %d\n", size, (size - 1), DIM);
//Fill matrices A and B with random numbers
srand(timer(NULL));
for(i=0; i<DIM; ++i)
{
for (j=0; j<DIM; ++j)
{
A[i][j] = random() % 100 - 50;
B[i][j] = random() % 100 - 50;
C[i][j] = 0;
}
}
}
MPI_Bcast(B, (DIM * DIM), MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0) { //Master
/* Calculate the true answer */
for (i=0; i<DIM; ++i)
for (k=0; k<DIM; ++k)
for (j=0; j<DIM; ++j)
D[i][j] += A[i][k] * B[k][j];
time1 = timer();
//Send pieces of A to the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Send(A[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD);
iOffset += iProblemSize;
/*for(j = 0; j < iProblemSize; j++) {
MPI_Send(A[iOffset + j], DIM, MPI_INT, i, 0, MPI_COMM_WORLD);
}
iOffset += iProblemSize;*/
}
//Take care of leftovers if needed (if uneven number of slaves)
if((size - 1) % DIM != 0) {
for(i = iOffset; i < DIM; i++) {
for(k = 0; k < DIM; k++) {
for(j = 0; j < DIM; j++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
//Gather the results from the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Recv(C[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD, NULL);
iOffset += iProblemSize;
printf("Received from %d!\n", i);
}
printf("All received!\n");
/* Error checking */
time1 = timer() - time1;
printf ("Your calculation is %scorrect.\n", correct_result(C,D) ? "" : "not ");
printf ("Total runtime: %f seconds\n", time1/1000000.0);
}
else { //Slaves
MPI_Recv(A, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
/*for(j = 0; j < iProblemSize; j++) {
MPI_Recv(A[j], DIM, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
}*/
//Do the calculations for C
//printf("Process %d doing calculations...\n", rank);
for (i = 0; i < (iProblemSize * DIM); ++i) {
for (k = 0; k < DIM; ++k) {
for (j = 0; j < DIM; ++j) {
C[i][j] += A[i][k] * B[k][j];
}
//printf("\n");
}
}
//printf("Process %d finished doing the calculations!\n", rank);
//Send the result to the master
printf("Process %d sending...\n", rank);
MPI_Send(C, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD);
printf("Process %d finished sending!\n", rank);
}
MPI_Finalize();
return 0;
}

OK I finally fixed the error.
The problem was in the loop when the slaves are doing the calculations...
for (i = 0; i < (iProblemSize * DIM); ++i) {
should be
for (i = 0; i < iProblemSize; ++i) {
:)

MPI_Gather segmentation fault

I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.
Can someone help?
Thanks.
Notes:
The program reads from a .txt file in the same directory called input.txt.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
/*void print2dAddresses(double** array2d, int rows, int cols)
{
int i;
for(i = 0; i < rows; i++)
{
int j;
for(j = 0; j < cols; j++)
{
printf("%d ", &(array2d[i][j]));
}
printf("\n");
}
printf("------------------------------------");
}*/
double** newMatrix(int rows, int cols)
{
double *data = (double*) malloc(rows * cols * sizeof(double));
double **array= (double **)malloc(rows * sizeof(double*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void freeMatrix(double** mat)
{
free(mat[0]);
free(mat);
}
double** new2dArray(int nrows, int ncols)
{
int i;
double** array2d;
array2d = (double**) malloc(nrows * sizeof(double*));
for(i = 0; i < nrows; i++)
{
array2d[i] = (double*) malloc(ncols * sizeof(double));
}
return array2d;
}
double* new1dArray(int size)
{
return (double*) malloc(size * sizeof(double));
}
void free2dArray(double** array2d, int nrows)
{
int i;
for(i = 0; i < nrows; i++)
{
free(array2d[i]);
}
free(array2d);
}
void print2dArray(double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
printf("%lf ", array2d[i][j]);
}
printf("\n");
}
printf("----------------------\n");
}
void print1dArray(double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%lf\n", array[i]);
}
printf("----------------------\n");
}
void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
fscanf(fp, "%lf", &(array2d[i][j]));
}
}
}
void read1dArray(FILE* fp, double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
fscanf(fp, "%lf", &(array[i]));
}
}
void readSymbols(char* symbols, int size, FILE* fp)
{
int i;
for(i = 0; i < size; i++)
{
char c = '\n';
while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
fscanf(fp, "%c", &c);
symbols[i] = c;
}
}
void printSolution(char* symbols, double* x, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%c = %lf\n", symbols[i], x[i]);
}
}
double* copy_1d_array(double* original, int size)
{
double* copy_version;
int i;
copy_version = (double*) malloc(size * sizeof(double));
for(i = 0; i < size; i++)
{
copy_version[i] = original[i];
}
return copy_version;
}
int main(int argc, char** argv)
{
int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
char *symbols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0)
{
FILE* fp;
fp = fopen("input.txt", "r");
fscanf(fp, "%d", &msize);
A = newMatrix(msize, msize);
b = new1dArray(msize);
x = new1dArray(msize);
symbols = (char*) malloc(msize * sizeof(char));
read2dArray(fp, A, msize, msize);
read1dArray(fp, b, msize);
readSymbols(symbols, msize, fp);
fclose(fp);
/*print2dArray(A, msize, msize);
print1dArray(b, msize);*/
}
MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < (msize - 1); i++)
{
int maxIndex;
double maxCoef, tmp, r;
/*finding max row*/
if(rank == 0)
{
maxIndex = i;
maxCoef = fabs(A[i][i]);
for(j = i + 1; j < msize; j++)
{
if(fabs(A[j][i]) > maxCoef)
{
maxCoef = A[j][i];
maxIndex = j;
}
}
/*swapping the current row with the max row*/
for(j = 0; j < msize; j++)
{
tmp = A[i][j];
A[i][j] = A[maxIndex][j];
A[maxIndex][j] = tmp;
}
tmp = b[i];
b[i] = b[maxIndex];
b[maxIndex] = tmp;
/*elimination*/
/*for(j = i + 1; j < msize; j++)
{
double r = A[j][i] / A[i][i];
subtracting r * row i from row j
for(k = i; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}*/
/*parallel elimination*/
startingRow = i + 1;
neededProcesses = p;
remainingRows = msize - startingRow;
if(remainingRows < neededProcesses)
{
neededProcesses = remainingRows;
}
rowsPerProcess = remainingRows / neededProcesses;
remainder = remainingRows % neededProcesses;
}
MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0)
{
currentRow = copy_1d_array(A[startingRow-1], msize);
currentB = b[startingRow-1];
}
else
{
currentRow = new1dArray(msize);
}
MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&currentB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
receivedB = new1dArray(remainingRows);
}
smallA = newMatrix(rowsPerProcess, msize);
smallB = new1dArray(rowsPerProcess);
MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(j = 0; j < rowsPerProcess; j++)
{
r = smallA[j][startingRow-1] / currentRow[startingRow-1];
for(k = 0; k < msize; k++)
{
smallA[j][k] -= r * currentRow[k];
}
smallB[j] -= r * currentB;
}
MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
freeMatrix(smallA);
free(smallB);
if(rank == 0)
{
for(j = 0; j < remainingRows; j++)
{
for(k = 0; k < msize; k++)
{
A[j+startingRow][k] = receivedA[j][k];
}
b[j+startingRow] = receivedB[j];
}
free(currentRow);
freeMatrix(receivedA);
free(receivedB);
}
if(rank == 0)
{
if(remainder > 0)
{
for(j = (msize - remainder); j < msize; j++)
{
r = A[j][i] / A[i][i];
for(k = 0; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}
}
}
}
if(rank == 0)
{
/*backward substitution*/
for(i = msize - 1; i >= 0; i--)
{
x[i] = b[i];
for(j = msize - 1; j > i; j--)
{
x[i] -= A[i][j] * x[j];
}
x[i] /= A[i][i];
}
printf("solution = \n");
//print1dArray(x, msize);
printSolution(symbols, x, msize);
freeMatrix(A);
free(b);
free(x);
free(symbols);
}
MPI_Finalize();
return 0;
}
Input File:
3
1 1 1
1 1 3
2 1 4
4
9
12
x
y
z

It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
recievedAHead = &(receivedA[0][0]);
receivedB = new1dArray(remainingRows);
}
else {
recievedAHead = NULL;
}
and use recievedAHead in the MPI_Gather call.