A program was written to find the minimum value in the matrix. At small values of the matrix dimension, it works correctly, however, if the dimension exceeds 350x350, then when calculating on a different number of nodes, a similar error occurs.
mpirun noticed that process rank 4 with PID 15014 on node cluster exited on signal 11 (Segmentation fault).
Everything works on two cores, but not always on others.
I use the following commands:
mpicc Lab777.c -o 1 -lm -std=c99
mpirun -np 16 ./1
The code:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define ROWS 350
#define COLS 350
#define max 999999999
void rand_matrix(int array[ROWS][COLS])
{
srand((unsigned int)time(NULL));
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
array[i][j] = rand();
}
void show(int array[ROWS][COLS])
{
for (int i = 0; i < ROWS; i++)
{
printf("\n");
for (int j = 0; j < COLS; j++)
{
printf("\t|%d|", array[i][j]);
}
printf("\n");
}
}
void convert(int array[ROWS][COLS], int *conv_arr)
{
int k = 0;
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
{
conv_arr[k] = array[i][j];
k++;
}
}
int find_min(int *array, int piece, int num_of_pieces)
{
int result = max;
for (int i = 0; i < (ROWS * COLS / (num_of_pieces)); i++)
if (array[i] < result)
result = array[i];
return result;
}
int main(int argc, char *argv[])
{
int matrix[ROWS][COLS], lin_arr[ROWS * COLS], min;
double startwtime, endwtime;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
int recv_matrix[ProcNum][ROWS * COLS], func[ProcNum];
if (ProcRank == 0)
{
printf("Matrix is filling\n");
rand_matrix(matrix);
convert(matrix, lin_arr);
printf("Matrix:\n");
//show(matrix);
}
startwtime = MPI_Wtime();
MPI_Scatter(&lin_arr[(ProcRank) * (ROWS * COLS / (ProcNum))], (ROWS * COLS / (ProcNum)), MPI_INT, &recv_matrix[ProcRank],(ROWS * COLS / (ProcNum)), MPI_INT, 0, MPI_COMM_WORLD);
func[ProcRank] = find_min(recv_matrix[ProcRank], ProcRank, ProcNum);
MPI_Reduce(&func[ProcRank], &min, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
endwtime = MPI_Wtime();
if (ProcRank == 0)
{
printf("Min value: %d\n", min);
printf("Time: %.4f\n", endwtime-startwtime);
}
MPI_Finalize();
return 0;
}
UPD:
Trying to reduce memory for recv_matrix and func.
I added the following line :
#define MAXPROC 20
And changed the following:
int recv_matrix[MAXPROC][ROWS * COLS], func[MAXPROC];
But now it works with a smaller matrix size
Related
I am trying to write an mpi program for multiplication of 2 matrix . If I give the size of the matrix lower that 800 the code works but when I give it higher I am getting segmentation fault and I am not able to figure out why . I am new to MPI so still trying to understand everything. If possible please help.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define N 1000
int main(int argc, char* argv[]) {
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double a[N][N], b[N][N], c[N][N];
int i, j, k;
// Initialize the matrices with random values
if (rank == 0) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i][j] = (double)rand() / RAND_MAX;
b[i][j] = (double)rand() / RAND_MAX;
}
}
}
// Broadcast the matrices to all ranks
MPI_Bcast(a, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Each rank calculates a portion of the output matrix
int rows_per_rank = N / size;
int start_row = rows_per_rank * rank;
int end_row = start_row + rows_per_rank;
for (i = start_row; i < end_row; i++) {
for (j = 0; j < N; j++) {
c[i][j] = 0;
for (k = 0; k < N; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
// Gather the output matrix from all ranks
double* c_buffer = (double*) malloc(N*N*sizeof(double));
MPI_Gather(c, rows_per_rank*N, MPI_DOUBLE, c_buffer, rows_per_rank*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Print the output matrix
if (rank == 0) {
printf("Output matrix C:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%lf ", c_buffer[i*N + j]);
}
printf("\n");
}
}
free(c_buffer);
MPI_Finalize();
return 0;
}
this line
double a[N][N], b[N][N], c[N][N];
with N = 1000 requires 24mb of stack space. Thats almost certainly larger than whats available. Either allocate them statically (place the kw static before them) or dynamically on the heap
The code i wrote in C for matrix multiplication in MPI shows that my code is taking 5 seconds approx in global time but when i run the same thing in python mpi4py it takes very less time like few milliseconds, what is the problem with mpi in C, because it doesnt feel like 5 seconds when I run it in Linux shell,the output comes really fast but still shows the globaltime as 5 seconds.the code below is in C
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
time1 = MPI_Wtime();
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i++) {
for(j=0;j<col;j++){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i++){
for(j=0;j<col;j++){
b[i][j] = rand() % 10;
}
}
}
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
The output it gives is
4
4
enter the number of row =enter the number of column =Global runtime is 5.975327
Runtime at 0 is 1.493793
Runtime at 1 is 1.493793
Runtime at 2 is 1.493877
Runtime at 3 is 1.493865
C =
78 83 142 116
128 138 236 194
39 49 112 71
96 109 204 156
Please let me know if there is some problem with the code!!
As discussed in the comment I have changed the position of time1 = MPI_Wtime(); and included a Barrier.
Take a look at the modified code :
#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"
void print_results(char *prompt, int a[N][N]);
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, sum = 0;
int a[N][N];
int b[N][N];
int c[N][N];
int aa[N],cc[N];
int row,col;
int dest = 0;
int source;
double time1, time2, duration, global;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0){
printf("enter the number of row =");
scanf("%d",&row);
printf("enter the number of column =");
scanf("%d",&col);
srand(time(NULL));
for(i=0;i<row;i++) {
for(j=0;j<col;j++){
a[i][j] = rand() % 10;
}
}
srand(time(NULL));
for(i=0;i<row;i++){
for(j=0;j<col;j++){
b[i][j] = rand() % 10;
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
time1 = MPI_Wtime();
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i];
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
time2 = MPI_Wtime();
duration = time2 - time1;
MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank == 0) {
printf("Global runtime is %f\n",global);
}
printf("Runtime at %d is %f \n", rank,duration);
MPI_Finalize();
if (rank == 0)
print_results("C = ", c);
}
void print_results(char *prompt, int a[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %d", a[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}
By doing so you will not take into account the user input time and also plcing the Barrier before the first the first timing will ensure that all process have nearly identical starting.
Also beware that you code only works with 4x4 matrix !
Apart from that you sould have something like :
mpirun -n 4 a.out
enter the number of row =4
enter the number of column =4
Global runtime is 0.005867
Runtime at 0 is 0.001474
Runtime at 1 is 0.001464
Runtime at 2 is 0.001464
Runtime at 3 is 0.001466
So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}
You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.
Recently began to study MPI technology
The task is to find the minimum value in the matrix.
Search itself parallelize for faster work.
But I can’t put my mind to how to make a parallel version from the sequential version of the program
I ask for help
#include <stdio.h>
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char *argv[])
{
int rows, cols, min, value, n;
int done = 0, numprocs, rank, i, j;
srand(time(NULL));
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(processor_name,&namelen);
while (!done) {
if (rank == 0) {
printf("Enter the height/width of the matrix\n");
scanf("%d",&rows);
scanf("%d",&cols);
startwtime = MPI_Wtime();
}
if (rows == 0 || cols == 0) {
done = 1;
} else {
int **arr = (int **) malloc(rows * sizeof(int*)); //creating 2d array
for (i = 0; i < rows; i++) {
arr[i] = (int *) malloc(cols * sizeof(int));
}
for (i = 0; i < rows; i++) { //Array filling
for (j = 0; j < cols; j++) {
arr[i][j] = rand();
}
}
for (i = 0; i < rows; i++) { // output of the array to the screen for clarity
for (j = 0; j < cols; j++) {
printf("%d\n",arr[i][j]);
}
}
min = arr[0][0];
for (i = 0; i < rows; i++) { //find min value
for (j = 0; j < cols; j++) { //
if(min > arr[i][j]){min = arr[i][j];} //need to parallize
}
}
if (rank == 0) {
endwtime = MPI_Wtime();
printf("min = %d\n", min);
printf("wall clock time = %f\n", endwtime-startwtime);
fflush( stdout );
for(i = 0; i < rows; i++) {
free(arr[i]);
}
free(arr);
}
}
}
MPI_Finalize();
return 0;
}
Sorry for my bad English
I am hope for your help
UPD: Added a similar function, but the program gives out garbage. It seems to me that this is due to the creation of an array based on pointers. And most likely I misused the functions
MPI_Scatter (&arr[0][0], rows*cols/numprocs, MPI_INT, &arr[rows][cols], rows*cols/numprocs, MPI_INT, 0, MPI_COMM_WORLD);
min = arr[0][0];
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
min = arr[i][j];
}
}
MPI_Reduce(&min, &value, rows*cols/numprocs, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
Trying to pass an array pointer between MPI Processes and receiving it to dynamically allocated memory. it keeps giving a segmentation fault, which we believe is due to the way we are sending it between processes. Our code is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "mpi.h"
#include <math.h>
int main(int argc, char* argv[])
{
int my_rank, i, j, p;
MPI_Request request, request2;
MPI_Status mpi_status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0)
{
int size = 5;
int **slice;
slice = (int **)malloc(sizeof(int*)* size);
slice[0] = (int *)malloc(sizeof(int)* size*size);
for(i = 0; i< size;i++)
{
slice[i] = (*slice + size*i);
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
slice[i][j] = i*j;
}
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
printf("slice[%d][%d]: %d\n", i, j, slice[i][j]);
}
}
MPI_Send(&slice, size * size, MPI_INT, 1, 1, MPI_COMM_WORLD);
} else {
int local_size=5;
int **local_slice;
local_slice = (int **)malloc(sizeof(int*)* local_size);
local_slice[0] = (int *)malloc(sizeof(int)* local_size*local_size);
for(i = 0; i< local_size;i++)
{
local_slice[i] = (*local_slice + local_size*i);
}
MPI_Recv(&local_slice, local_size * local_size, MPI_INT, 0, 1, MPI_COMM_WORLD, &mpi_status);
for (i = 0; i < local_size; i++)
{
for (j = 0; j < local_size; j++)
{
printf("local_slice[%d][%d]: %d\n", i, j, local_slice[i][j]);
}
}
}
MPI_Finalize();
return 0;
}
Can someone explain how to properly pass this type of array between MPI Processes please?
It looks like you need to change the first argument to MPI_Send from &slice to slice[0] and do the same for MPI_Recv.