I'm a beginner in C and MPI and i'm trying to do a program to multiply 2 matrix in MPI.
But I don't kwno what is wrong in my code.
I'm try do 'slice' the matrix M1 in n lines and send then to another process do multiply and broadcast de matrix M2 After I make a Gather to build the final matrix M3.
I make this:
mpirun -n 2 matrix
But I receive a error in terminal:
[adiel-VirtualBox:07921] *** Process received signal ***
[adiel-VirtualBox:07921] Signal: Segmentation fault (11)
[adiel-VirtualBox:07921] Signal code: (128)
[adiel-VirtualBox:07921] Failing at address: (nil)
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 7921 on node adiel-VirtualBox exited on signal 0 (Unknown signal 0).
--------------------------------------------------------------------------
2 total processes killed (some possibly by mpirun during cleanup)
mpirun: clean termination accomplished
Can anyone help me?
Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
//#include "mpe.h"
#include <math.h>
void printMatrix(double *M, int m, int n) {
int lin, col;
for (lin=0; lin<m; lin++) {
for (col=0; col<n; col++)
printf("%.2f \t", M[(lin*n+col)]);
printf("\n");
}
}
double* allocateMatrix(int m, int n){
double* M;
M = (double *)malloc(m*n*sizeof(double));
return M;
}
int main( int argc, char *argv[] )
{
int rank, size;
int m1,n1,m2,n2;
int row, col,ctrl,i,k,lines,proc;
double *M1, *M2, *M3, **vp, *v;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
m1 = m2 = n1 = n2 = 3;
lines = (int)ceil(n1/size);
v = (double *)malloc(lines*n1*sizeof(double));
M2 = allocateMatrix(m2,n2);
M3 = allocateMatrix(m1,n2);
if(rank==0)
M1 = allocateMatrix(m1,n1);
//startin matrix
for (col = 0; col < n1; col++){
for (row = 0; row < m1; row++) {
if(rank==0)
M1[(row*m1+col)] = 0;
M2[(row*m2+col)] = 0;
M3[(row*m1+col)] = 0;
}
}
//startin pointers with 0
for(i=0;i<lines*n1;i++)
v[i] = 0;
//populate
if(rank == 0){
for (col = 0; col < n1; col++){
for (row = 0; row < m1; row++) {
M1[row*m1+col] = row*3+(col+1);
M2[(row*m2+col)] = 1;
}
}
}
//---------------------sharing and multiply---------------//
//slicing M1 and sending to other process
if(rank == 0){
proc = size-1;
//for each line
for(row = 0;row<m1;row++){
ctrl = floor(row/lines);
//on each column
for(col=0;col<n1;col++){
v[(ctrl*n1)+col] = M1[(row*n1)+col];
}
if(row%lines == (lines - 1)){
if(proc!=0){
MPI_Send(v,lines*n1,MPI_DOUBLE,proc,1, MPI_COMM_WORLD);
proc--;
//clearing pointers
for(i=0;i<lines*n1;i++)
v[i] = 0;
}
}
}
}
//MPI_Bcast(m1, m*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(M2, m2*n2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
//receiving process
if(rank!=0)
MPI_Recv(v,lines*n1,MPI_DOUBLE,0,1,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(row=0;row<lines;row++){
if(v[row*n1]!=0){
for (col = 0; col < n1; col++){
double val = 0.0;
for(k=0;k<m1;k++){
val += v[(row*n1)+k] * M2[(k*n1)+col];
}
M3[((size-1-rank)*size*n1)+(row*n1)+col] = val;
}
}
}
if(rank!=0){
for(row = 0; row < lines; row++){
MPI_Gather(&M3[((size-1-rank)*size*n1)+(row*n1)], n1, MPI_DOUBLE, &M3[((size-1-rank)*size*n1)+(row*n1)], n1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
}
if(rank == 0){
printf("matrix 1------------------------\n");
printMatrix(M1,m1,n1);
printf("matrix 2------------------------\n");
printMatrix(M2,m2,n2);
printf("matrix 3------------------------\n");
printMatrix(M3,m1,n2);
}
MPI_Finalize();
return 0;
}
For one thing, doing all of your sends before the broadcast, and all of the receives after it is asking for trouble. I can easily see that leading to MPI resource exhaustion or deadlock failures. In such a small input that shouldn't arise, but you should fix it regardless. I'll take another look after that.
Related
I am trying to create a program that will ultimately be transposing a matrix in MPI so that it can be used in further computations. But right now I am trying to do a simple thing: Root process has a 4x4 matrix "A" which contains elements 0..15 in row-major order. This data is scattered to 2 processes so that each receives one half of the matrix. Process 0 has a 2x4 sub_matrix "a" and receives elements 0..7 and Process 1 gets elements 8..15 in its sub_matrix "a".
My goal is for these processes to swap their a matrices with each other using MPI_Get. Since I was encountering problems, I decided to test a simpler version and simply make process 0 get process 1's "a" matrix, that way, both processes will have the same elements in their respective sub_matrices once I print after the MPI_Get-call and the MPI_fence are called.
Yet the output is erratic, have tried to trouble-shoot for several hours but haven't been able to crack the nut. Would appreciate your help with this.
This is the code below, and the run-command: mpirun -n 2 ./get
Compile: mpicc -std=c99 -g -O3 -o get get.c -lm
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define NROWS 4
#define NCOLS 4
int allocate_matrix(int ***M, int ROWS, int COLS) {
int *p;
if (NULL == (p = malloc(ROWS * COLS * sizeof(int)))) {
perror("Couldn't allocate memory for input (p in allocate_matrix)");
return -1;
}
if (NULL == (*M = malloc(ROWS * sizeof(int*)))) {
perror("Couldn't allocate memory for input (M in allocate_matrix)");
return -1;
}
for(int i = 0; i < ROWS; i++) {
(*M)[i] = &(p[i * COLS]);
}
return 0;
}
int main(int argc, char *argv[])
{
int rank, nprocs, **A, **a, n_cols, n_rows, block_len;
MPI_Win win;
int errs = 0;
if(rank==0)
{
allocate_matrix(&A, NROWS, NCOLS);
for (int i=0; i<NROWS; i++)
for (int j=0; j<NCOLS; j++)
A[i][j] = i*NCOLS + j;
}
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
n_cols=NCOLS; //cols in a sub_matrix
n_rows=NROWS/nprocs; //rows in a sub_matrix
block_len = n_cols*n_rows;
allocate_matrix(&a, n_rows, n_cols);
for (int i = 0; i <n_rows; i++)
for (int j = 0; j < n_cols; j++)
a[i][j] = 0;
MPI_Datatype block_type;
MPI_Type_vector(n_rows, n_cols, n_cols, MPI_INTEGER, &block_type);
MPI_Type_commit(&block_type);
MPI_Scatter(*A, 1, block_type, &(a[0][0]), block_len, MPI_INTEGER, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
printf("process %d: \n", rank);
for (int j=0; j<n_rows; j++){
for (int i=0; i<n_cols; i++){
printf("%d ",a[j][i]);
}
printf("\n");
}
if (rank == 0)
{
printf("TESTING, before Get a[0][0] %d\n", a[0][0]);
MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), win);
MPI_Get(*a, 8, MPI_INTEGER, 1, 0, 8, MPI_INTEGER, win);
MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
printf("TESTING, after Get a[0][0] %d\n", a[0][0]);
printf("process %d:\n", rank);
for (int j=0; j<n_rows; j++){
for (int i=0; i<n_cols; i++){
printf("%d ", a[j][i]);
}
printf("\n");
}
}
else
{ /* rank = 1 */
MPI_Win_create(a, n_rows*n_cols*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), win);
MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
}
MPI_Type_free(&block_type);
MPI_Win_free(&win);
MPI_Finalize();
return errs;
}
This is the output that I get:
process 0:
0 1 2 3
4 5 6 7
process 1:
8 9 10 11
12 13 14 15
process 0:
1552976336 22007 1552976352 22007
1552800144 22007 117 0
But what I want is for the second time I print the matrix from process 0, it should have the same elements as in process 1.
First, I doubt this is really the code you are testing. You are freeing some MPI type variables that are not defined and also rank is uninitialised in
if(rank==0)
{
allocate_matrix(&A, NROWS, NCOLS);
for (int i=0; i<NROWS; i++)
for (int j=0; j<NCOLS; j++)
A[i][j] = i*NCOLS + j;
}
and the code segfaults because A won't get allocated in the root.
Moving this post MPI_Comm_rank(), freeing the correct MPI type variable, and fixing the call to MPI_Win_create in rank 1:
MPI_Win_create(&a[0][0], n_rows*n_cols*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
// This -------^^^^^^^^
produces the result you are seeking.
I'd recommend to stick to a single notation for the beginning of the array like &a[0][0] instead of a mixture of *a and &a[0][0]. This will prevent (or at least reduce the occurrence of) similar errors in the future.
I'm trying to make an MPI matrix multiplication program but the scatter function doesn't seem to be working for me. Only one row is getting scattered and the rest of the cores receive garbage value.
Also when calling the display_matrix() function before I MPI_Init() seems to be running 4 threads instead of 1 (I have quad core CPU). Why is this happening even before initialisation?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include<mpi.h>
int **matrix_generator(int row,int col);
int **multiply_matrices(int **matrix_A,int **matrix_B,int rowsA, int colsA,int rowsB,int colsB);
void display_matrix(int **matrixA,int rows,int cols);
void main(int argc,char *argv[])
{
srand(time(0));
int **matrix_A,**matrix_B,**matrix_result,*scattered_matrix,*gathered_matrix, rowsA,colsA,rowsB,colsB,world_rank,world_size,i,j;
rowsA = atoi(argv[1]);
colsA = atoi(argv[2]);
rowsB = atoi(argv[3]);
colsB = atoi(argv[4]);
scattered_matrix = (int *)malloc(sizeof(int) * rowsA*colsA/4);
if (argc != 5)
{
fprintf(stderr,"Usage: mpirun -np <No. of processors> ./a.out <Rows A> <Columns A> <Rows B> <Columns B>\n");
exit(-1);
}
else if(colsA != rowsB)
{
printf("Check the dimensions of the matrices!\n\n");
}
matrix_A = matrix_generator(rowsA,colsA);
matrix_B = matrix_generator(rowsB,colsB);
display_matrix(matrix_A,rowsA,colsA);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Scatter(matrix_A, rowsA*colsA/4, MPI_INT, scattered_matrix, rowsA*colsA/4, MPI_INT, 0, MPI_COMM_WORLD);
for(i=0;i<world_size;i++)
{
printf("Scattering data %d from root to: %d \n",scattered_matrix[i],world_rank);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
}
int **matrix_generator(int row, int col)
{
int i, j, **intMatrix;
intMatrix = (int **)malloc(sizeof(int *) * row);
for (i = 0; i < row; i++)
{
intMatrix[i] = (int *)malloc(sizeof(int *) * col);
for (j = 0;j<col;j++)
{
intMatrix[i][j]=rand()%10;
}
}
return intMatrix;
}
void display_matrix(int **matrix, int rows,int cols)
{
int i,j;
for (i = 0; i < rows; i = i + 1)
{
for (j = 0; j < cols; j = j + 1)
printf("%d ",matrix[i][j]);
printf("\n");
}
}
The main issue is your matrices are not allocated in contiguous memory (see the comment section for a link)
The MPI standard does not specify what happens before an app invokes MPI_Init().
The two main MPI implementations choose to spawn all the tasks when mpirun is invoked (that means there are 4 independent processes first, and they "join" into a single MPI job when they all call MPI_Init()).
That being said, once upon a time, a vendor chose to have mpirun start a single MPI task, and they use their own remote-fork when MPI_Init() is called.
Bottom line, if you want to write portable code, do as less as possible (and never print anything) before MPI_Init() is called.
I am a beginner in C. I have to create a distributed architecture with the library MPI. The following code is:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
int main(int argc, char **argv)
{
int N, w = 1, L = 2, M = 50; // with N number of threads
int T= 2;
int myid;
int buff;
float mit[N][T]; // I initialize a 2d array
for(int i = 0; i < N; ++i){
mit[i][0]= M / (float) N;
for (int j = 1; j < T; ++j){
mit[i][j] = 0;
}
}
float tab[T]; // 1d array
MPI_Status stat;
/*********************************************
start
*********************************************/
MPI_Init(&argc,&argv); // Initialisation
MPI_Comm_size(MPI_COMM_WORLD, &N);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
for(int j = 0; j < T; j++) {
for(int i = 0; i < N; i++) { // I iterate for each slave
if (myid !=0) {
float y = ((float) rand()) / (float) RAND_MAX;
mit[i][j + 1] = mit[i][j]*(1 + w * L * y);
buff=mit[i][j+1];
MPI_Send(&buff, 128, MPI_INT, 0, 0, MPI_COMM_WORLD); // I send the variable buff to the master
buff=0;
}
if( myid == 0 ) { // Master
for(int i = 1; i < N; i++){
MPI_Recv(&buff, 128, MPI_INT, i, 0, MPI_COMM_WORLD, &stat);
tab[j] += buff; // I need to receive all the variables buff sent by the salves, sum them and stock into the tab at the index j
}
printf("\n%.20f\n",tab[j]); // I print the result of the sum at index j
}
}
}
MPI_Finalize();
return 0;
}
}
I use the command in the terminal: mpicc .c -o my_file to compile the program
Then mpirun -np 101 my_file_c to start the program with 101 threads
But the problem is I have the following error int the terminal:
It seems that [at least] one of the processes that was started with
> mpirun did not invoke MPI_INIT before quitting (it is possible that
> more than one process did not invoke MPI_INIT -- mpirun was only
> notified of the first one, which was on node n0).
>
> mpirun can *only* be used with MPI programs (i.e., programs that
> invoke MPI_INIT and MPI_FINALIZE). You can use the "lamexec" program
> to run non-MPI programs over the lambooted nodes.
It seems that I have a problem with the master but i don't know why...
Any idea ???
Thank you :)
This behavior is very likely the result of a memory corruption.
You cannot
int buff=mit[i][j+1];
MPI_Send(&buff, 128, MPI_INT, ...);
depending on what you want to achieve, you can try instead
int buff=mit[i][j+1];
MPI_Send(&buff, 1, MPI_INT, ...);
// ...
MPI_Recv(&buff, 1, MPI_INT, ...);
or
int *buff=&mit[i][j+1];
MPI_Send(buff, 128, MPI_INT, ...);
// fix MPI_Recv()
I am trying to implement a master/slave relationship which solves the mandelbrot set and prints it into a ppm file. This is what I have so far:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi/mpi.h>
int calculateMan (double , double ); //calculateMandelbrotSet
MPI_Status status;
struct Number {
double R;
double i;
} Z,C;
const int color;
int colorTemp; //color value
const int max = 1000; //max iteration value
const int ResHeight = 800; //Resolution
const int ResWidth = 800;
double CRMax = 1.5;
double CIMax = 2.0;
double CRMin = -2.5;
double CIMin = -2.0; //Constant values
double colorWidth;
double colorHeight;
int main (int argc, char** argv) {
int rank, size = 0;
int nodos, source, dest;
double startTime, endTime;
//Rank = current process ID
//Size = amount of processes
MPI_Init (&argc, &argv); // starts MPI
startTime = MPI_Wtime();
MPI_Comm_size (MPI_COMM_WORLD, &size); // get number of processes
MPI_Comm_rank (MPI_COMM_WORLD, &rank); // get current process
nodos = size - 1;
if (rank == 0) { // MASTER --------------------------------------
colorHeight = (CIMax - CIMin) / ResHeight;
colorWidth = (CRMax - CRMin) / ResWidth;
FILE *fp;
fp = fopen("Mandelbrot.ppm","w");
fprintf(fp,"P3\n %d\n %d\n %d\n",ResWidth,ResHeight,255); //Magic Number & Header
for (int row = 0; row < ResHeight; row++) {
C.i= CIMin + row*colorHeight;
for (int column = 0; column < ResWidth; column++) {
C.R = CRMin + column*colorWidth;
//data sends
for (dest = 1; dest <= nodos; dest++) {
MPI_Send(&C.R, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
MPI_Send(&C.i, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
}
}
}
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
//Recv and print
MPI_Recv(&colorTemp, sizeof(int), MPI_DOUBLE, source, 0, MPI_COMM_WORLD, &status);
fprintf(fp, "%d %d %d\n", colorTemp, 1,3);
}
}
fclose(fp);
} //------------------------- END MASTER
if (rank > 0) // START SLAVE --------------------------------------
{
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
MPI_Recv(&C.R, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
MPI_Recv(&C.i, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
colorTemp = calculateMan(C.R, C.i);
MPI_Send(&colorTemp, sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
} // SLAVE END---------------------------------
endTime = MPI_Wtime(); //stop timer
MPI_Finalize(); //end MPI
printf("Time: %.6f\n", endTime-startTime);
exit(0); //end program
}
int calculateMan (double CReal, double CImaginary) {
int i = 0;
Z.R = 0.0;
Z.i = 0.0;
while (((i < max) && (Z.R*Z.R) + (Z.i * Z.i) < 4))
{
double temp = (Z.R * Z.R) - (Z.i * Z.i) + CReal;
Z.i = 2.0 * Z.R * Z.i + CImaginary;
Z.R = temp;
i++;
}
if (i == max)
return 0; //interior is black
else
return 255; //exterior white
}
I am trying to run my program but I cannot figure out why the RECV and print have an infinite iteration. Also, can anyone have a look at the code and tell me any sort of other issues or things I should look out for, for future reference?
Thanks!
I got a problem with a MPI code in C.
I think I created the good algorithm to process a double loop with a 2D array. But, when i try to use MPI_Gather to collect datas from process, there is a segmentation fault error. Here is the code :
#define NN 4096
#define NM 4096
double global[NN][NM];
void range(int n1, int n2, int nprocs, int irank, int *ista, int *iend){
int iwork1;
int iwork2;
iwork1 = ( n2 - n1 + 1 ) / nprocs;
iwork2 = ( ( n2 - n1 + 1 ) % nprocs );
*ista = irank * iwork1 + n1 + fmin(irank, iwork2);
*iend = *ista + iwork1 - 1;
if ( iwork2 > irank )
iend = iend + 1;
}
void runCalculation(int n, int m, int argc, char** argv)
{
const int iter_max = 1000;
const double tol = 1.0e-6;
double error = 1.0;
int rank, size;
int start, end;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
if (size != 16) MPI_Abort( MPI_COMM_WORLD, 1 );
memset(global, 0, n * m * sizeof(double));
if(rank == 0){
for (int j = 0; j < n; j++)
{
global[j][0] = 1.0;
}
}
int iter = 0;
while ( error > tol && iter < iter_max )
{
error = 0.0;
MPI_Bcast(global, NN*NM, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(iter == 0)
range(1, n, size, rank, &start, &end);
int size = end - start;
double local[size][NM];
memset(local, 0, size * NM * sizeof(double));
for( int j = 1; j < size - 1; j++)
{
for( int i = 1; i < m - 1; i++ )
{
local[j][i] = 0.25 * ( global[j][i+1] + global[j][i-1]
+ global[j-1][i] + global[j+1][i]);
error = fmax( error, fabs(local[j][i] - global[j][i]));
}
}
MPI_Gather(&local[0][0], size*NM, MPI_DOUBLE, &global[0][0], NN*NM, MPI_DOUBLE, 0, MPI_COMM_WORLD);
printf("%d\n", iter);
if(iter % 100 == 0)
printf("%5d, %0.6f\n", iter, error);
iter++;
}
MPI_Finalize();
}
I run this with 4096x4096 arrays. With the process rank 0, it creates a segmentation fault at the MPI_Gather line. I checked if the size are ok for local arrays and I think it works nicely.
Edit : Added the line of local initialization. New segmentation fault :
*** Process received signal ***
Signal: Segmentation fault (11)
Signal code: Address not mapped (1)
Failing at address: 0x10602000
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 19216 on machine_name exited on signal 11 (Segmentation fault).
The recvcount parameter of MPI_Gather indicates the number of items it receives from each process, not the total number of items it receives.
MPI_Gather(&local[0][0], size*NM, MPI_DOUBLE, &global[0][0], NN*NM, MPI_DOUBLE, 0, MPI_COMM_WORLD);
Should be:
MPI_Gather(&local[0][0], size*NM, MPI_DOUBLE, &global[0][0], size*NM, MPI_DOUBLE, 0, MPI_COMM_WORLD);