I have written the code for matrix multiplication that takes input randomly from 1 to 9 and creates the matrix , the code is actually in MPI, the problem is that it works only for 4x4 matrix if i try anything like 5x5 or 10x10 it starts giving random values,I always change the value of N before creating any other order matrix, i don't know if its a MPI problem or just basic C programming error that I am doing, the code i have written is
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i++) {
for(j=0;j<col;j++){
a[i][j] = rand() % 10;
}
}
for(i=0;i<row;i++){
for(j=0;j<col;j++){
b[i][j] = rand() % 10;
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
time1 = MPI_Wtime();
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
Any solutions, please help me out with it!!
Related
I am trying to write an mpi program for multiplication of 2 matrix . If I give the size of the matrix lower that 800 the code works but when I give it higher I am getting segmentation fault and I am not able to figure out why . I am new to MPI so still trying to understand everything. If possible please help.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define N 1000
int main(int argc, char* argv[]) {
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double a[N][N], b[N][N], c[N][N];
int i, j, k;
// Initialize the matrices with random values
if (rank == 0) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i][j] = (double)rand() / RAND_MAX;
b[i][j] = (double)rand() / RAND_MAX;
}
}
}
// Broadcast the matrices to all ranks
MPI_Bcast(a, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Each rank calculates a portion of the output matrix
int rows_per_rank = N / size;
int start_row = rows_per_rank * rank;
int end_row = start_row + rows_per_rank;
for (i = start_row; i < end_row; i++) {
for (j = 0; j < N; j++) {
c[i][j] = 0;
for (k = 0; k < N; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
// Gather the output matrix from all ranks
double* c_buffer = (double*) malloc(N*N*sizeof(double));
MPI_Gather(c, rows_per_rank*N, MPI_DOUBLE, c_buffer, rows_per_rank*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Print the output matrix
if (rank == 0) {
printf("Output matrix C:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%lf ", c_buffer[i*N + j]);
}
printf("\n");
}
}
free(c_buffer);
MPI_Finalize();
return 0;
}
this line
double a[N][N], b[N][N], c[N][N];
with N = 1000 requires 24mb of stack space. Thats almost certainly larger than whats available. Either allocate them statically (place the kw static before them) or dynamically on the heap
The code i wrote in C for matrix multiplication in MPI shows that my code is taking 5 seconds approx in global time but when i run the same thing in python mpi4py it takes very less time like few milliseconds, what is the problem with mpi in C, because it doesnt feel like 5 seconds when I run it in Linux shell,the output comes really fast but still shows the globaltime as 5 seconds.the code below is in C
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
time1 = MPI_Wtime();
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i++) {
for(j=0;j<col;j++){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i++){
for(j=0;j<col;j++){
b[i][j] = rand() % 10;
}
}
}
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
The output it gives is
4
4
enter the number of row =enter the number of column =Global runtime is 5.975327
Runtime at 0 is 1.493793
Runtime at 1 is 1.493793
Runtime at 2 is 1.493877
Runtime at 3 is 1.493865
C =
78 83 142 116
128 138 236 194
39 49 112 71
96 109 204 156
Please let me know if there is some problem with the code!!
As discussed in the comment I have changed the position of time1 = MPI_Wtime(); and included a Barrier.
Take a look at the modified code :
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i++) {
for(j=0;j<col;j++){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i++){
for(j=0;j<col;j++){
b[i][j] = rand() % 10;
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
time1 = MPI_Wtime();
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
By doing so you will not take into account the user input time and also plcing the Barrier before the first the first timing will ensure that all process have nearly identical starting.
Also beware that you code only works with 4x4 matrix !
Apart from that you sould have something like :
mpirun -n 4 a.out
enter the number of row =4
enter the number of column =4
Global runtime is 0.005867
Runtime at 0 is 0.001474
Runtime at 1 is 0.001464
Runtime at 2 is 0.001464
Runtime at 3 is 0.001466
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
#include <time.h>
#include <sys/time.h>
#define N 8
void print(int n, int m, double (*matrix)[m]){
for(int i = 0; i<n; i++){
for(int j = 0; j < m; j++)
printf("%.0f\t", matrix[i][j]);
printf("\n");
}
printf("\n");
}
int main(int argc, char *argv[]){
int nproc, rank, source, dest, rows, offset;
double (*matrix_A)[N] = NULL;
double (*matrix_C)[N] = NULL;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
rows = N / nproc;
double subA[rows][N];
double subC[rows][N];
double matrix_B[N][N];
//initializing matrix B
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++){
matrix_B[i][j] = 15+j+i;
}
}
if(rank == 0){
//Allocating memory space for both matrix A and C that need to be only in the root process
matrix_A = malloc(N * sizeof(*matrix_A));
matrix_C = malloc(N * sizeof(*matrix_C));
//initializing matrix A only in the root process
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++)
matrix_A[i][j] = 10+j+i;
}
printf("\nProcess %d\n", rank);
printf("----------\n");
printf("Matrix A\n\n");
print(N, N, matrix_A);
printf("Matrix B\n\n");
print(N, N, matrix_B);
}
//Scattering matrix A into sub matrices in other processes
MPI_Scatter(matrix_A, N*N/nproc, MPI_DOUBLE, subA, N*N/nproc, MPI_DOUBLE, 0, MPI_COMM_WORLD);
//multyplying each sub matrix with matrix B and saving the results in sub matrices C
for(int k = 0; k < rows; k++){
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++)
subC[k][i] += subA[k][j] * matrix_B[j][i];
}
}
printf("\nProcess %d\n", rank);
printf("----------\n");
printf("Matrix subA\n\n");
print(rows, N, subA);
printf("Matrix subC\n\n");
print(rows, N, subC);
//Gathering all sub matrices C of all processes into one big matrix C in the root process - DOESN'T WORK
MPI_Gather(subC, N*N/nproc, MPI_INT, matrix_C, N*N/nproc, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0){
printf("\nProcess %d\n", rank);
printf("----------\n");
printf("Matrix C\n\n");
print(N, N, matrix_C);
}
MPI_Finalize();
}
The code above is meant to use Scatter and Gather functions to multiply two NxN matrices. Everything is fine up until the Gather part.
I need to multiply matrix A and matrix B.
Matrix A and the result matrix C need to be in the root process only, while B is a global matrix.
I've managed to scatter matrix A into sub-matrices in all processes, do the multiplication with the global matrix B and save the results in sub-matrices C.
But then i try to MPI_Gather the sub-matrices with the final results into one matrix C in the root process but for some reason, it doesn't work. The final matrix C prints out a wrong answer.
The task is a 2D matrix multiplication. N is the data size and P is number of processors. dn029 is my remote host.
I tested this code for multiple number of Ps and I either got a code 139 or 11 error.
The error message I get :
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 147347 RUNNING AT dn029
= EXIT CODE: 139
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int P;
int N = 1024;
/*Single Row, Single Column Matrix Multiplication Function*/
float row_col_multi(float* row, float* col){
int i0;
float c0;
for(i0 = 0; i0 < N ; i0++)
c0 += row[i0]*col[i0];
return c0;
}
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, j, k, rank, size;
double start, end, total;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Request request[2];
P = size;
float A_row [N];
float B_col [N];
float matrix_C[N][N];
float matrix_A[N][N];
float matrix_BT[N][N];
if(rank == 0){
double wall_time;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_A[i][j] = -1+2*((float)rand())/RAND_MAX;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_BT[i][j] = -1+2*((float)rand())/RAND_MAX;
}
start = MPI_Wtime();
if(rank == 0)
printf("Root processor %d: Scatterring is started for diagonal elements...\n", rank);
for(i = 0; i < N/P ; i++){
MPI_Iscatter(matrix_A[rank + P*i], N, MPI_FLOAT, A_row, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[0]);
MPI_Iscatter(matrix_BT[rank + P*i], N, MPI_FLOAT, B_col, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[1]);
MPI_Waitall(2,request, MPI_STATUSES_IGNORE);
matrix_C[rank + P*i][rank + P*i] = row_col_multi(A_row, B_col);
}
for(i = 1 ; i < N ; i++){
if(rank < i){
for(k = 0; k < N/P ; k++){
MPI_Iscatter(matrix_A[rank+i + P*k], N, MPI_FLOAT, A_row, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[0]);
MPI_Iscatter(matrix_BT[rank + P*k], N, MPI_FLOAT, B_col, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[1]);
MPI_Waitall(2,request, MPI_STATUSES_IGNORE);
matrix_C[rank+i + P*k][rank + P*k] = row_col_multi(A_row, B_col);
}
}
}
end = MPI_Wtime();
printf("Total Time: %f\n", end - start);
MPI_Finalize();
}
A program was written to find the minimum value in the matrix. At small values of the matrix dimension, it works correctly, however, if the dimension exceeds 350x350, then when calculating on a different number of nodes, a similar error occurs.
mpirun noticed that process rank 4 with PID 15014 on node cluster exited on signal 11 (Segmentation fault).
Everything works on two cores, but not always on others.
I use the following commands:
mpicc Lab777.c -o 1 -lm -std=c99
mpirun -np 16 ./1
The code:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define ROWS 350
#define COLS 350
#define max 999999999
void rand_matrix(int array[ROWS][COLS])
{
srand((unsigned int)time(NULL));
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
array[i][j] = rand();
}
void show(int array[ROWS][COLS])
{
for (int i = 0; i < ROWS; i++)
{
printf("\n");
for (int j = 0; j < COLS; j++)
{
printf("\t|%d|", array[i][j]);
}
printf("\n");
}
}
void convert(int array[ROWS][COLS], int *conv_arr)
{
int k = 0;
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
{
conv_arr[k] = array[i][j];
k++;
}
}
int find_min(int *array, int piece, int num_of_pieces)
{
int result = max;
for (int i = 0; i < (ROWS * COLS / (num_of_pieces)); i++)
if (array[i] < result)
result = array[i];
return result;
}
int main(int argc, char *argv[])
{
int matrix[ROWS][COLS], lin_arr[ROWS * COLS], min;
double startwtime, endwtime;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
int recv_matrix[ProcNum][ROWS * COLS], func[ProcNum];
if (ProcRank == 0)
{
printf("Matrix is filling\n");
rand_matrix(matrix);
convert(matrix, lin_arr);
printf("Matrix:\n");
//show(matrix);
}
startwtime = MPI_Wtime();
MPI_Scatter(&lin_arr[(ProcRank) * (ROWS * COLS / (ProcNum))], (ROWS * COLS / (ProcNum)), MPI_INT, &recv_matrix[ProcRank],(ROWS * COLS / (ProcNum)), MPI_INT, 0, MPI_COMM_WORLD);
func[ProcRank] = find_min(recv_matrix[ProcRank], ProcRank, ProcNum);
MPI_Reduce(&func[ProcRank], &min, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
endwtime = MPI_Wtime();
if (ProcRank == 0)
{
printf("Min value: %d\n", min);
printf("Time: %.4f\n", endwtime-startwtime);
}
MPI_Finalize();
return 0;
}
UPD:
Trying to reduce memory for recv_matrix and func.
I added the following line :
#define MAXPROC 20
And changed the following:
int recv_matrix[MAXPROC][ROWS * COLS], func[MAXPROC];
But now it works with a smaller matrix size