MPI Backwards Substitution error while slaves receives x from previous processes - c

I want to convert backwards substitution sequential C code to parallel and I have an error while rank 1-size receiving data MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);. The logic is a pipeline between processes.
MY code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#include <math.h>
int main(int argc, char* argv[]){
int i,j,N;
float **a, *b;
float *local_x, *prev_x, *total_proc_x;
int tag = 100;
//MPI variables
int rank, size;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(argc != 2){
if(rank == 0) printf("Using : %s <matrix_size>\n", argv[0]);
return 0;
}
N = strtol(argv[1], NULL, 10);
/* Allocate space for matrices */
a = (float **) malloc ( N * sizeof ( float *) );
for ( i = 0; i < N; i++)
a[i] = ( float * ) malloc ( (i+1) * sizeof ( float ) );
b = ( float * ) malloc ( N * sizeof ( float ) );
if(rank == 0){
srand ( time ( NULL));
for (i = 0; i < N; i++) {
b[i] = (float)rand()/(RAND_MAX*2.0-1.0);
a[i][i] = 2.0+(float)rand()/(RAND_MAX*2.0-1.0);
for (j = 0; j < i; j++)
a[i][j] = (float)rand()/(RAND_MAX*2.0-1.0);
for (j=i; j<N; j++)
a[i][j] = 0.0;
}
}
//broadcast data (a,b)
MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N, MPI_FLOAT, 0, MPI_COMM_WORLD);
int block_size = N/size;
int *counts = (int *) malloc(size*sizeof(int));
int *displacements = (int *) malloc(size*sizeof(int));
int start, end;
for(i=0; i<size; i++){
start = 0;
for(j=0; j<i; j++){
start += block_size;
if(size-(j+1) < N%size) start++;
}
end = start + block_size;
if(size-(i+1) < N%size) end++;
counts[i] = end - start;
displacements[i] = start;
}
local_x = (float *) malloc(counts[rank]*sizeof(float));
for(i=0; i<counts[rank]; i++){
local_x[i] = 0.0;
}
prev_x = (float *) malloc(displacements[rank]*sizeof(float));
if(rank == 0) printf("Size: %d\n", size);
printf("Rank %d, Displacement: %d, Count: %d\n", rank, displacements[rank], counts[rank]);
//calculation
float sum;
if(rank == 0){
printf("Rank %d, OK\n", rank);
for(i=0; i<counts[0]; i++){
sum = 0.0;
for(j=0; j<i; j++){
sum = sum + (local_x[j] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
MPI_Send(local_x, displacements[rank+1], MPI_FLOAT, rank+1, tag, MPI_COMM_WORLD);
printf("Process %d sent data to process %d\n", rank, rank+1);
}
if(rank != 0 && rank != (size-1)){
printf("Rank %d, OK\n", rank);
MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
printf("Process %d received data from process %d", rank, rank-1);
for(i=displacements[rank]; i<(displacements[rank] + counts[rank]); i++){
sum = 0.0;
//unowned rows
for(j=0; j<displacements[rank]; j++){
sum = sum + (prev_x[j] * a[i][j]);
}
//owned rows
for(j=displacements[rank]; j<i; j++){
sum = sum + (local_x[j-displacements[rank]] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
//concatenate prev and local x
total_proc_x = (float *) malloc((displacements[rank] + counts[rank])*sizeof(float));
for(i=0; i<displacements[rank]; i++){
total_proc_x[i] = prev_x[i];
}
for(i=0; i<counts[rank]; i++){
total_proc_x[i+displacements[rank]] = local_x[i];
}
//send to next process
MPI_Send(total_proc_x, displacements[rank+1], MPI_FLOAT, rank+1, tag, MPI_COMM_WORLD);
}
if(rank == (size-1)){
printf("Rank %d, OK\n", rank);
MPI_Recv(prev_x, displacements[rank], MPI_FLOAT, rank-1, tag, MPI_COMM_WORLD, &status);
printf("Process %d received data from process %d", rank, rank-1);
for(i=displacements[rank]; i<(displacements[rank] + counts[rank]); i++){
sum = 0.0;
//unowned rows
for(j=0; j<displacements[rank]; j++){
sum = sum + (prev_x[j] * a[i][j]);
}
//owned rows
for(j=displacements[rank]; j<i; j++){
sum = sum + (local_x[j-displacements[rank]] * a[i][j]);
}
local_x[i] = (b[i] - sum) / a[i][i];
}
//concatenate prev and local x
float *total_proc_x = (float *) malloc((displacements[rank] + counts[rank])*sizeof(float));
for(i=0; i<displacements[rank]; i++){
total_proc_x[i] = prev_x[i];
}
for(i=0; i<counts[rank]; i++){
total_proc_x[i+displacements[rank]] = local_x[i];
}
/* Print result */
for (i = 0; i < N; i++) {
for (j = 0; j <= i; j++)
printf ("%f \t", a[i][j]);
printf ("%f \t%f\n", total_proc_x[i], b[i]);
}
/* Check result */
for (i = 0; i < N; i++) {
sum = 0.0;
for (j = 0; j <= i; j++)
sum = sum + (total_proc_x[j]*a[i][j]);
if (fabsf(sum - b[i]) > 0.00001) {
printf("%f != %f\n", sum, b[i]);
printf("Validation Failed...\n");
}
}
}
MPI_Finalize();
return 0;
}
Output:
$ mpicc -o backsub_mpi backsub_mpi.c
$ mpiexec -n 4 ./backsub_mpi 20
Rank 1, Displacement: 5, Count: 5
Rank 1, OK
Rank 3, Displacement: 15, Count: 5
Rank 3, OK
Size: 4
Rank 0, Displacement: 0, Count: 5
Rank 0, OK
Process 0 sent data to process 1
Rank 2, Displacement: 10, Count: 5
Rank 2, OK
-----------------------------------------------------------------------------
One of the processes started by mpirun has exited with a nonzero exit
code. This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.
PID 4105 failed on node n0 (127.0.0.1) due to signal 11.
-----------------------------------------------------------------------------
mpirun failed with exit status 11

Your problem is in how you allocate the matrix
a = (float **) malloc ( N * sizeof ( float *) );
for ( i = 0; i < N; i++)
a[i] = ( float * ) malloc ( (i+1) * sizeof ( float ) );
You're trying to allocate a triangular array, but you send it
MPI_Bcast(a, N*N, MPI_FLOAT, 0, MPI_COMM_WORLD);
as a square array.
Since a is an array or pointers, the actual matrix elements are probably not in contiguous memory.
You need to allocate double *a as a long enough single array, and then do some index translation to figure out where element i,j goes into this array. Something like i*(i+1)/2+j.

Related

Segmentation fault in matrix multiplication using mpi

I am trying to write an mpi program for multiplication of 2 matrix . If I give the size of the matrix lower that 800 the code works but when I give it higher I am getting segmentation fault and I am not able to figure out why . I am new to MPI so still trying to understand everything. If possible please help.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define N 1000
int main(int argc, char* argv[]) {
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double a[N][N], b[N][N], c[N][N];
int i, j, k;
// Initialize the matrices with random values
if (rank == 0) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i][j] = (double)rand() / RAND_MAX;
b[i][j] = (double)rand() / RAND_MAX;
}
}
}
// Broadcast the matrices to all ranks
MPI_Bcast(a, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Each rank calculates a portion of the output matrix
int rows_per_rank = N / size;
int start_row = rows_per_rank * rank;
int end_row = start_row + rows_per_rank;
for (i = start_row; i < end_row; i++) {
for (j = 0; j < N; j++) {
c[i][j] = 0;
for (k = 0; k < N; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
// Gather the output matrix from all ranks
double* c_buffer = (double*) malloc(N*N*sizeof(double));
MPI_Gather(c, rows_per_rank*N, MPI_DOUBLE, c_buffer, rows_per_rank*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Print the output matrix
if (rank == 0) {
printf("Output matrix C:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%lf ", c_buffer[i*N + j]);
}
printf("\n");
}
}
free(c_buffer);
MPI_Finalize();
return 0;
}
this line
double a[N][N], b[N][N], c[N][N];
with N = 1000 requires 24mb of stack space. Thats almost certainly larger than whats available. Either allocate them statically (place the kw static before them) or dynamically on the heap

Caught signal 11 (Segmentation fault: address not mapped to object at address (nil)

I am trying to calculate the sum of an array A using the reduce command; note here that A is only visible to the master node/root (0).
I am getting the following error and I can't seem to figure out why. Also, broadcasting the part (N) still produces the same error.
Error:
[kali:74924:0:74924] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
==== backtrace (tid: 74924) ====
0 /lib/x86_64-linux-gnu/libucs.so.0(ucs_handle_error+0x2dc) [0x7f14b5486a9c]
1 /lib/x86_64-linux-gnu/libucs.so.0(+0x28c8f) [0x7f14b5486c8f]
2 /lib/x86_64-linux-gnu/libucs.so.0(+0x28e4a) [0x7f14b5486e4a]
3 /lib/x86_64-linux-gnu/libc.so.6(+0x3c910) [0x7f14b564e910]
4 ./parts(+0x14f1) [0x557de43984f1]
5 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xcd) [0x7f14b56397ed]
6 ./parts(+0x113a) [0x557de439813a]
=================================
[kali:74925:0:74925] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
==== backtrace (tid: 74925) ====
0 /lib/x86_64-linux-gnu/libucs.so.0(ucs_handle_error+0x2dc) [0x7fb3324b0a9c]
1 /lib/x86_64-linux-gnu/libucs.so.0(+0x28c8f) [0x7fb3324b0c8f]
2 /lib/x86_64-linux-gnu/libucs.so.0(+0x28e4a) [0x7fb3324b0e4a]
3 /lib/x86_64-linux-gnu/libc.so.6(+0x3c910) [0x7fb332678910]
4 ./parts(+0x14f1) [0x5581e42d44f1]
5 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xcd) [0x7fb3326637ed]
6 ./parts(+0x113a) [0x5581e42d413a]
=================================
[kali:74926:0:74926] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
==== backtrace (tid: 74926) ====
0 /lib/x86_64-linux-gnu/libucs.so.0(ucs_handle_error+0x2dc) [0x7f7e8e8f9a9c]
1 /lib/x86_64-linux-gnu/libucs.so.0(+0x28c8f) [0x7f7e8e8f9c8f]
2 /lib/x86_64-linux-gnu/libucs.so.0(+0x28e4a) [0x7f7e8e8f9e4a]
3 /lib/x86_64-linux-gnu/libc.so.6(+0x3c910) [0x7f7e8eac1910]
4 ./parts(+0x14f1) [0x558b09e094f1]
5 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xcd) [0x7f7e8eaac7ed]
6 ./parts(+0x113a) [0x558b09e0913a]
=================================
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 74924 RUNNING AT kali
= EXIT CODE: 11
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions
Codes:
w/out broadcast
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <time.h>
int main (int argc, char** argv) {
int rank;
int size;
int sum = 0;
int grand_sum = 0;
int i;
int *A;
int N;
int part;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank == 0) {
do {
printf("Enter the array size: ");
scanf("%d", &N);
} while (N <= 0);
A = malloc(N * sizeof(int));
if (!A) {
printf("Array too big!\nExiting the program...\n");
return -1;
}
part = N / size;
srand(10);
for (i = 0; i < N; i++) {
A[i] = rand() % 10 + 1;
grand_sum += A[i];
printf("A[%d] = %d\n", i, A[i]);
}
for (i = 1; i < size; i++) {
MPI_Send(&part, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&A[i * part], part, MPI_INT, i, 0, MPI_COMM_WORLD);
}
} else {
int part;
MPI_Recv(&part, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int Aw[part];
MPI_Recv(&Aw, part, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (i = 0; i < part; i++) {
sum += A[i];
}
}
MPI_Reduce(&sum, &grand_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0) {
for (i = size * part; i < N; i++) {
grand_sum += A[i];
}
printf("\nThe grand sum is: %d", grand_sum);
}
MPI_Finalize();
}
w/broadcast
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <time.h>
int main (int argc, char** argv) {
int rank;
int size;
int sum = 0;
int grand_sum = 0;
int i;
int *A;
int N;
int part;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank == 0) {
do {
printf("Enter the array size: ");
scanf("%d", &N);
} while (N <= 0);
A = malloc(N * sizeof(int));
if (!A) {
printf("Array too big!\nExiting the program...\n");
return -1;
}
part = N / size;
srand(10);
for (i = 0; i < N; i++) {
A[i] = rand() % 10 + 1;
printf("A[%d] = %d\n", i, A[i]);
}
for (i = 1; i < size; i++) {
//MPI_Send(&part, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&A[i * part], part, MPI_INT, i, 0, MPI_COMM_WORLD);
}
}
MPI_Bcast(&part, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (rank != 0) {
//MPI_Recv(&part, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int Aw[part];
MPI_Recv(&Aw, part, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (i = 0; i < part; i++) {
sum += A[i];
}
}
MPI_Reduce(&sum, &grand_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0) {
for (i = size * part; i < N; i++) {
grand_sum += A[i];
}
printf("\nThe grand sum is: %d", grand_sum);
}
MPI_Finalize();
}
After rereading my code, I've spotted a mistake at the for loop, of the else statement:
for (i = 0; i < part; i++) {
sum += A[i];
}
A is not visible here; thus the error. Aw should have been used here instead.
Correct code:
for (i = 0; i < part; i++) {
sum += Aw[i];
}

MPI Bad Termination of one of processes Exit code: 139

The task is a 2D matrix multiplication. N is the data size and P is number of processors. dn029 is my remote host.
I tested this code for multiple number of Ps and I either got a code 139 or 11 error.
The error message I get :
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 147347 RUNNING AT dn029
= EXIT CODE: 139
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int P;
int N = 1024;
/*Single Row, Single Column Matrix Multiplication Function*/
float row_col_multi(float* row, float* col){
int i0;
float c0;
for(i0 = 0; i0 < N ; i0++)
c0 += row[i0]*col[i0];
return c0;
}
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, j, k, rank, size;
double start, end, total;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Request request[2];
P = size;
float A_row [N];
float B_col [N];
float matrix_C[N][N];
float matrix_A[N][N];
float matrix_BT[N][N];
if(rank == 0){
double wall_time;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_A[i][j] = -1+2*((float)rand())/RAND_MAX;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_BT[i][j] = -1+2*((float)rand())/RAND_MAX;
}
start = MPI_Wtime();
if(rank == 0)
printf("Root processor %d: Scatterring is started for diagonal elements...\n", rank);
for(i = 0; i < N/P ; i++){
MPI_Iscatter(matrix_A[rank + P*i], N, MPI_FLOAT, A_row, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[0]);
MPI_Iscatter(matrix_BT[rank + P*i], N, MPI_FLOAT, B_col, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[1]);
MPI_Waitall(2,request, MPI_STATUSES_IGNORE);
matrix_C[rank + P*i][rank + P*i] = row_col_multi(A_row, B_col);
}
for(i = 1 ; i < N ; i++){
if(rank < i){
for(k = 0; k < N/P ; k++){
MPI_Iscatter(matrix_A[rank+i + P*k], N, MPI_FLOAT, A_row, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[0]);
MPI_Iscatter(matrix_BT[rank + P*k], N, MPI_FLOAT, B_col, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[1]);
MPI_Waitall(2,request, MPI_STATUSES_IGNORE);
matrix_C[rank+i + P*k][rank + P*k] = row_col_multi(A_row, B_col);
}
}
}
end = MPI_Wtime();
printf("Total Time: %f\n", end - start);
MPI_Finalize();
}

mpi matrix multiplication to run with a different number of processors

So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}
You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.

C MPI Matrix multiplication error

I'm doing some matrix multiplication in C with MPI.
It works fine until I try to go above 15x15 and I cant figure out why...
From what I've noticed the error seems to mostly happen after I see a "Process # sending..." print, which happens when the slave processes are sending their data back to the master process.
Error message:
[LEC-B125N4J:12183] *** Process received signal ***
[LEC-B125N4J:12183] Signal: Segmentation fault (11)
[LEC-B125N4J:12183] Signal code: Address not mapped (1)
Code:
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <mpi.h>
//#define DIM 1000
#define DIM 15
/*
* Statically allocate the matrices to make the rows
* sequentially placed in memory. (This eases the task
* of distributing the problem among the slaves.)
* Make the matrices global to allow for larger
* dimensions.
*/
int A[DIM][DIM];
int B[DIM][DIM];
int C[DIM][DIM];
int D[DIM][DIM];
int correct_result(int A[DIM][DIM], int B[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; ++i)
for (j=0; j<DIM; ++j)
if (A[i][j] != B[i][j])
return 0;
return 1;
}
int main (argc, argv)
int argc;
char *argv[];
{
int rank=0, size;
int i, j, k;
int time1;
volatile int tmp;
int iOffset = 0;
int iProblemSize = 0;
MPI_Init(&argc, &argv); /* starts MPI */
MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size(MPI_COMM_WORLD, &size); /* get number of processes */
iProblemSize = (DIM / (size - 1));
if(rank == 0) { //Master
printf("Number of processes: %d (1 Master and %d slaves) - DIM: %d\n", size, (size - 1), DIM);
//Fill matrices A and B with random numbers
srand(timer(NULL));
for(i=0; i<DIM; ++i)
{
for (j=0; j<DIM; ++j)
{
A[i][j] = random() % 100 - 50;
B[i][j] = random() % 100 - 50;
C[i][j] = 0;
}
}
}
MPI_Bcast(B, (DIM * DIM), MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0) { //Master
/* Calculate the true answer */
for (i=0; i<DIM; ++i)
for (k=0; k<DIM; ++k)
for (j=0; j<DIM; ++j)
D[i][j] += A[i][k] * B[k][j];
time1 = timer();
//Send pieces of A to the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Send(A[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD);
iOffset += iProblemSize;
/*for(j = 0; j < iProblemSize; j++) {
MPI_Send(A[iOffset + j], DIM, MPI_INT, i, 0, MPI_COMM_WORLD);
}
iOffset += iProblemSize;*/
}
//Take care of leftovers if needed (if uneven number of slaves)
if((size - 1) % DIM != 0) {
for(i = iOffset; i < DIM; i++) {
for(k = 0; k < DIM; k++) {
for(j = 0; j < DIM; j++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
//Gather the results from the slaves
iOffset = 0;
for(i = 1; i < size; i++) {
MPI_Recv(C[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD, NULL);
iOffset += iProblemSize;
printf("Received from %d!\n", i);
}
printf("All received!\n");
/* Error checking */
time1 = timer() - time1;
printf ("Your calculation is %scorrect.\n", correct_result(C,D) ? "" : "not ");
printf ("Total runtime: %f seconds\n", time1/1000000.0);
}
else { //Slaves
MPI_Recv(A, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
/*for(j = 0; j < iProblemSize; j++) {
MPI_Recv(A[j], DIM, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
}*/
//Do the calculations for C
//printf("Process %d doing calculations...\n", rank);
for (i = 0; i < (iProblemSize * DIM); ++i) {
for (k = 0; k < DIM; ++k) {
for (j = 0; j < DIM; ++j) {
C[i][j] += A[i][k] * B[k][j];
}
//printf("\n");
}
}
//printf("Process %d finished doing the calculations!\n", rank);
//Send the result to the master
printf("Process %d sending...\n", rank);
MPI_Send(C, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD);
printf("Process %d finished sending!\n", rank);
}
MPI_Finalize();
return 0;
}
OK I finally fixed the error.
The problem was in the loop when the slaves are doing the calculations...
for (i = 0; i < (iProblemSize * DIM); ++i) {
should be
for (i = 0; i < iProblemSize; ++i) {
:)

Resources