Trying to pass an array pointer between MPI Processes and receiving it to dynamically allocated memory. it keeps giving a segmentation fault, which we believe is due to the way we are sending it between processes. Our code is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "mpi.h"
#include <math.h>
int main(int argc, char* argv[])
{
int my_rank, i, j, p;
MPI_Request request, request2;
MPI_Status mpi_status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Barrier(MPI_COMM_WORLD);
if (my_rank == 0)
{
int size = 5;
int **slice;
slice = (int **)malloc(sizeof(int*)* size);
slice[0] = (int *)malloc(sizeof(int)* size*size);
for(i = 0; i< size;i++)
{
slice[i] = (*slice + size*i);
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
slice[i][j] = i*j;
}
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
printf("slice[%d][%d]: %d\n", i, j, slice[i][j]);
}
}
MPI_Send(&slice, size * size, MPI_INT, 1, 1, MPI_COMM_WORLD);
} else {
int local_size=5;
int **local_slice;
local_slice = (int **)malloc(sizeof(int*)* local_size);
local_slice[0] = (int *)malloc(sizeof(int)* local_size*local_size);
for(i = 0; i< local_size;i++)
{
local_slice[i] = (*local_slice + local_size*i);
}
MPI_Recv(&local_slice, local_size * local_size, MPI_INT, 0, 1, MPI_COMM_WORLD, &mpi_status);
for (i = 0; i < local_size; i++)
{
for (j = 0; j < local_size; j++)
{
printf("local_slice[%d][%d]: %d\n", i, j, local_slice[i][j]);
}
}
}
MPI_Finalize();
return 0;
}
Can someone explain how to properly pass this type of array between MPI Processes please?
It looks like you need to change the first argument to MPI_Send from &slice to slice[0] and do the same for MPI_Recv.
Related
So I got the code working for when running on 1 process. Although when I try to run it on more then 2 processers or more(mpirun -n 4)(mpirun -n 8)etc; half my results are coming back as zero.Im assuming because it doesn't deal with the case where the number of processors is divisible by the matrix size. Any ideas? I'm trying to initialize both matrixes from command line and perform matrix multiplication using MPI. I'm knew to this and would love any help. For example when I enter in a size of 2 and initialize matrix A to the values {1,4,6,7} and matrix B to {8,9,4,5} my result comes out to be {8,9,0,0}..
void init_Matrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
scanf("%i", &matrix[i][j]);
}
}
}
void printMatrix(int n, int matrix[n][n])
{
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
printf("%d" , matrix[i][j]);
printf(" ");
}
printf("\n");
}
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size); //num p
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n;
if(rank == 0)
{
printf("Enter in size of matrix! \x0A");
scanf("%i",&n);
}
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
int A[n][n];
int B[n][n];
int C[n][n];
int aa[n/size][n];
int cc[n/size][n];
if(rank == 0)
{
init_Matrix(n,A);
init_Matrix(n,B);
}
for(int i = 0; i < n; i++)
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
//scatter rows of first matrix to different processes
MPI_Scatter(A, n*n/size, MPI_INT, aa, n*n/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(B, n*n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for(int k = 0; k < n/size; k++)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cc[i][j] += A[i][k] * B[k][j];
}
}
}
MPI_Gather(cc, n*n/size, MPI_INT, C, n*n/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
printMatrix(n, C);
}
MPI_Finalize();
}
updated:
updated attempt using mpi scatterv and mpi gather
:
void initMatrix(int Size, int matrix[Size][Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
scanf("%i", &matrix[i][j]);
}
}
void multIJK(int Size, int A[Size][Size], int B[Size][Size], int pResult[Size])
{
for(int i = 0; i < Size; i++)
{
for(int j = 0; j < Size; j++)
{
for(int k = 0; k < Size; k++)
pResult += A[i][k] * B[k][j];
}
}
}
int main(int argc, char* argv[]) {
int Size;
int RowNum;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
if (ProcRank == 0) {
printf("Enter in size of matrix! \x0A");
scanf("%i", &Size);
}
int aMatrix[Size][Size];
int bMatrix[Size][Size];
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
int RestRows = Size;
for (int i=0; i<ProcRank; i++)
RestRows = RestRows-RestRows/(ProcNum-i);
RowNum = RestRows/(ProcNum-ProcRank);
int pResult[Size];
int pProcRows[RowNum*Size];
int pProcResult[RowNum];
if(ProcRank == 0)
{
initMatrix(Size,aMatrix);
initMatrix(Size,bMatrix);
}
RestRows=Size; // Number of rows, that haven’t been distributed yet
MPI_Bcast(bMatrix, Size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Alloc memory for temporary objects
// the number of elements sent to the process
int pSendInd[ProcNum];
// the index of the first data element sent to the process
int pSendNum[ProcNum];
// Define the disposition of the matrix rows for current process
RowNum = (Size/ProcNum);
pSendNum[0] = RowNum*Size;
pSendInd[0] = 0;
for (int i=1; i<ProcNum; i++) {
RestRows -= RowNum;
RowNum = RestRows/(ProcNum-i);
pSendNum[i] = RowNum*Size;
pSendInd[i] = pSendInd[i-1]+pSendNum[i-1];
}
// Scatter the rows
MPI_Scatterv(aMatrix , pSendNum, pSendInd, MPI_INT, pProcRows,
pSendNum[ProcRank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
multIJK(Size,aMatrix,bMatrix,pResult);
RestRows=Size; // Number of rows, that haven’t been distributed yet
//Alloc memory for temporary objects
// Number of elements, that current process sends
int pReceiveNum[ProcNum];
/* Index of the first element from current process in result vector */
int pReceiveInd[ProcNum];
//Define the disposition of the result vector block of current processor
pReceiveInd[0] = 0;
pReceiveNum[0] = Size/ProcNum;
for (int i=1; i<ProcNum; i++) {
RestRows -= pReceiveNum[i-1];
pReceiveNum[i] = RestRows/(ProcNum-i);
pReceiveInd[i] = pReceiveInd[i-1]+pReceiveNum[i-1];
} //Gather the whole result vector on every processor
MPI_Allgatherv(pProcResult, pReceiveNum[ProcRank], MPI_INT, pResult,
pReceiveNum, pReceiveInd, MPI_DOUBLE, MPI_COMM_WORLD);
//ProcessTermination(aMatrix,bMatrix, pResult, pProcRows, pProcResult);
if(ProcRank == 0)
{
for(int i = 0; i < Size; i++)
{
printf("%i\n",pResult[i]);
}
}
MPI_Finalize();
}
You have some logic problems.
for(int i = 0; i < n; i++) <-- this should be until n/size, you are going into unallocated memory
{
for(int j = 0; j < n; j++)
{
cc[i][j] = 0;
}
}
cc[i][j] += A[i][k] * B[k][j]; <-- again, going outsize allocated memory
Replace it with
cc[k][i] += A[k][j] * B[j][i];
Hopefully these are all the problems.
You should also treat the cases where the matrix size is not divisible by the number of processors.
Recently began to study MPI technology
The task is to find the minimum value in the matrix.
Search itself parallelize for faster work.
But I can’t put my mind to how to make a parallel version from the sequential version of the program
I ask for help
#include <stdio.h>
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc,char *argv[])
{
int rows, cols, min, value, n;
int done = 0, numprocs, rank, i, j;
srand(time(NULL));
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(processor_name,&namelen);
while (!done) {
if (rank == 0) {
printf("Enter the height/width of the matrix\n");
scanf("%d",&rows);
scanf("%d",&cols);
startwtime = MPI_Wtime();
}
if (rows == 0 || cols == 0) {
done = 1;
} else {
int **arr = (int **) malloc(rows * sizeof(int*)); //creating 2d array
for (i = 0; i < rows; i++) {
arr[i] = (int *) malloc(cols * sizeof(int));
}
for (i = 0; i < rows; i++) { //Array filling
for (j = 0; j < cols; j++) {
arr[i][j] = rand();
}
}
for (i = 0; i < rows; i++) { // output of the array to the screen for clarity
for (j = 0; j < cols; j++) {
printf("%d\n",arr[i][j]);
}
}
min = arr[0][0];
for (i = 0; i < rows; i++) { //find min value
for (j = 0; j < cols; j++) { //
if(min > arr[i][j]){min = arr[i][j];} //need to parallize
}
}
if (rank == 0) {
endwtime = MPI_Wtime();
printf("min = %d\n", min);
printf("wall clock time = %f\n", endwtime-startwtime);
fflush( stdout );
for(i = 0; i < rows; i++) {
free(arr[i]);
}
free(arr);
}
}
}
MPI_Finalize();
return 0;
}
Sorry for my bad English
I am hope for your help
UPD: Added a similar function, but the program gives out garbage. It seems to me that this is due to the creation of an array based on pointers. And most likely I misused the functions
MPI_Scatter (&arr[0][0], rows*cols/numprocs, MPI_INT, &arr[rows][cols], rows*cols/numprocs, MPI_INT, 0, MPI_COMM_WORLD);
min = arr[0][0];
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
min = arr[i][j];
}
}
MPI_Reduce(&min, &value, rows*cols/numprocs, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
A program was written to find the minimum value in the matrix. At small values of the matrix dimension, it works correctly, however, if the dimension exceeds 350x350, then when calculating on a different number of nodes, a similar error occurs.
mpirun noticed that process rank 4 with PID 15014 on node cluster exited on signal 11 (Segmentation fault).
Everything works on two cores, but not always on others.
I use the following commands:
mpicc Lab777.c -o 1 -lm -std=c99
mpirun -np 16 ./1
The code:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define ROWS 350
#define COLS 350
#define max 999999999
void rand_matrix(int array[ROWS][COLS])
{
srand((unsigned int)time(NULL));
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
array[i][j] = rand();
}
void show(int array[ROWS][COLS])
{
for (int i = 0; i < ROWS; i++)
{
printf("\n");
for (int j = 0; j < COLS; j++)
{
printf("\t|%d|", array[i][j]);
}
printf("\n");
}
}
void convert(int array[ROWS][COLS], int *conv_arr)
{
int k = 0;
for (int i = 0; i < ROWS; i++)
for (int j = 0; j < COLS; j++)
{
conv_arr[k] = array[i][j];
k++;
}
}
int find_min(int *array, int piece, int num_of_pieces)
{
int result = max;
for (int i = 0; i < (ROWS * COLS / (num_of_pieces)); i++)
if (array[i] < result)
result = array[i];
return result;
}
int main(int argc, char *argv[])
{
int matrix[ROWS][COLS], lin_arr[ROWS * COLS], min;
double startwtime, endwtime;
int ProcNum;
int ProcRank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
int recv_matrix[ProcNum][ROWS * COLS], func[ProcNum];
if (ProcRank == 0)
{
printf("Matrix is filling\n");
rand_matrix(matrix);
convert(matrix, lin_arr);
printf("Matrix:\n");
//show(matrix);
}
startwtime = MPI_Wtime();
MPI_Scatter(&lin_arr[(ProcRank) * (ROWS * COLS / (ProcNum))], (ROWS * COLS / (ProcNum)), MPI_INT, &recv_matrix[ProcRank],(ROWS * COLS / (ProcNum)), MPI_INT, 0, MPI_COMM_WORLD);
func[ProcRank] = find_min(recv_matrix[ProcRank], ProcRank, ProcNum);
MPI_Reduce(&func[ProcRank], &min, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
endwtime = MPI_Wtime();
if (ProcRank == 0)
{
printf("Min value: %d\n", min);
printf("Time: %.4f\n", endwtime-startwtime);
}
MPI_Finalize();
return 0;
}
UPD:
Trying to reduce memory for recv_matrix and func.
I added the following line :
#define MAXPROC 20
And changed the following:
int recv_matrix[MAXPROC][ROWS * COLS], func[MAXPROC];
But now it works with a smaller matrix size
I'm making an MPI program with Visual studio and a problem occurred.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <mpi.h>
int main(void) {
int a[1000], b[1000];
int my_rank, p;
int source = 0, tag = 5;
int count[100], result = 0;
MPI_Status status;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
srand(time(NULL));
if (my_rank == 0) {
for (int j = 0; j < p; j++) {
for (int i = j / p; i < 1000 * ((j+1) / p); i++) {
a[i] = rand() % 100;
MPI_Send(a[i], 1, MPI_INT, j, tag, MPI_COMM_WORLD);
}
}
printf("&d", a[30]);
for (int i = 0; i < 1000 / p; i++) {
MPI_Recv(&b[i], 1, MPI_INT, source, tag, MPI_COMM_WORLD, &status);
if (b[i] % 2 != 0) {
count[my_rank] += b[i];
}
}
}
else if (my_rank != 0) {
count[my_rank] = 0;
for (int j = 0; j < 1000 / p; j++) {
MPI_Recv(&b[j], 1, MPI_INT, source, tag, MPI_COMM_WORLD, &status);
if (b[j] % 2 != 0) {
count[my_rank] += b[j];
}
}
}
for (int i = 0; i < p; i++) {
result += count[i];
}
printf("The sum of the odd numbers is %d.", result);
MPI_Finalize();
return 0;
}
If I run the code above (I inserted mpiexec -n 4 HW1.exe at the command prompt), I get an error message like this :
0: samsungpc: 0xc0000005: process excited without calling finalize
1: samsungpc: terminated
2: samsungpc: terminated
3: samsungpc: terminated
---error analysis---
0: HW1.exe ended prematurely and may have crashed on samsungpc
---error analysis---
I consider this to be an extremely weird issue:
I have this code:
it is supposed to receive a 2d matrix of size chunk and width.
Matrix is allocated using :
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
So it is a continuos memory block.
I have the following code :
MPI_Status st;
int worker;
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
/* for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}*/
}
If code is like this, everything stops and works well.
If I uncomment the region:
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
the thread running this code does not stop, I cannot find an logical explanation for this. Perhaps someone can see the error or the issue. Thank you!
recmat allocation and chunk calculation :
int **recmat;
recmat = alloc2d(chunk,width);
int chunk;
chunk = height / size;
Sorry, this is too long to fit in a comment:
The code you've posted is fine; for instance, putting enough code around it to make it run produces the correct results (below). So the issue is not where you think it is.
If you see code locking up in places you don't think it ought to, this frequently points to weird memory errors or something else going on. You're best of just running it through a debugger, or through something like valgrind to check for memory problems.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int **alloc2d(int n, int m) {
int i = 0;
int *data = malloc(n * m * sizeof(int));
int **array = malloc(n * sizeof(int *));
for (i = 0; i < n; i++) {
array[i] = &(data[i * m]);
}
return array;
}
int main(int argc, char **argv) {
int rank, size;
const int height=10, width=10;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int **recmat;
int chunk;
chunk = height / size;
if (chunk*size != height) {
fprintf(stderr, "%s: number of ranks %d does not divide size %d\n",
argv[0], size, height);
MPI_Finalize();
exit(1);
}
if (rank == 0) {
int **recmat = alloc2d(chunk,width);
int **mat = alloc2d(height,width);
int worker;
int i,j,k;
MPI_Status st;
/* deal with my own submatrix */
for (k=0; k<chunk; k++) {
for (j=0; j<width; j++) {
mat[k][j] = 0;
}
}
for(i = 1; i < size; i++) {
MPI_Recv(&(recmat[0][0]), chunk*width, MPI_INT, MPI_ANY_SOURCE, 1,
MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
for(k = worker * chunk; k < (worker + 1) * chunk; k++){
for(j = 0; j < width; j++) {
mat[k][j] = recmat[k - worker * chunk][j];
}
}
}
free(&(recmat[0][0]));
free(recmat);
printf("Rank 0: mat is \n");
for (int i=0; i<height; i++) {
for (int j=0; j<width; j++) {
printf("%2d ", mat[i][j]);
}
printf("\n");
}
free(&(mat[0][0]));
free(mat);
} else {
int **sendmat = alloc2d(chunk,width);
for (int i=0; i<chunk; i++)
for (int j=0; j<width; j++)
sendmat[i][j] = rank;
MPI_Send(&(sendmat[0][0]), chunk*width, MPI_INT, 0, 1, MPI_COMM_WORLD);
free(&(sendmat[0][0]));
free(sendmat);
}
MPI_Finalize();
return 0;
}
There were so many errors and bugs in my code that it's not even worth mentioning, I'm sorry for this useless question ...
This is also a longer comment. You can prevent double copy of the received data if you first probe for the message and then use the rank from the status to directly receive the content of the message into the big matrix:
for(i = 1; i < size; i++) {
MPI_Probe(MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &st);
worker = st.MPI_SOURCE;
MPI_Recv(&(mat[worker*chunk][0]), chunk*width, MPI_INT,
worker, 1, MPI_COMM_WORLD, &st);
}
Less code and should work faster.