I write code that get a matrix and do distribution by chunks.
The chunks size not always be equal maybe. The chunks are worked correct, but when i try to run this and process count set to 3 (for example) i receive error that 'mpirun noticed that process rank 2 with PID 8676 on node cluster exited on signal 6 (Aborted).' Please look at the attached code. I think the problem is in the function free.
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#define COLUMN 4
#define ROW 10
#define dp 100.0f
// Local start
#define chunk_low(commrank, commsize, nvert) \
((commrank) * (nvert) / (commsize))
// Local end
#define chunk_height(commrank, commsize, nvert) \
(chunk_low((commrank) + 1, commsize, nvert) - 1)
// Local size
#define chunk_size(commrank, commsize, nvert) \
(chunk_height(commrank, commsize, nvert) - \
chunk_low(commrank, commsize, nvert) + 1)
// Matrix initialization function
void init_matrix(int column, int row, float *matrix)
{
int j, i;
printf("\nMatrix\n");
for(i=0; i < row; i++){
for(j=0; j < column; j++){
matrix[i*column+j] = i * column + j; // (float)rand()/RAND_MAX * dp *2.0f - dp;
printf(" %f ", matrix[i * column + j]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char **argv)
{
int rank, size;
int i, j;
float *vm, *local_matrix;
double time1, time2;
int *displs, *rcounts, *scounts;
vm = (float *)calloc(ROW * COLUMN, sizeof(float));
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Process 0 - master */
if (rank==0)
{
printf("\nNumbers of proccesses %d. \nElements in vector %d.\n", size, COLUMN);
/* Init vector vA */
init_matrix(COLUMN, ROW, vm);
//Time begining calculating of programm
time1=MPI_Wtime();
}
/* End of work process 0 */
displs = (int *)malloc(sizeof(int) * size);
scounts = (int *)malloc(sizeof(int) * size);
rcounts = (int *)malloc(sizeof(int) * size);
for (i = 0; i < size; i++) {
displs[i] = chunk_low(i, size, ROW) * COLUMN; // Position initialization
rcounts[i] = scounts[i] = chunk_size(i, size, ROW) * COLUMN;
printf("\ndispls[%d]=%d, scounts[%d]=%d\n",i , displs[i], i, scounts[i]);
}
local_matrix = (float *)calloc(chunk_size(i, size, ROW) * COLUMN, sizeof(float));
MPI_Scatterv(vm, scounts, displs, MPI_FLOAT, local_matrix,
rcounts[rank], MPI_FLOAT, 0, MPI_COMM_WORLD);
printf("\nProcess=%d Displs=%d rcounts=%d\n", rank, displs[rank], rcounts[rank]);
printf("Local Matrix\n");
for(i=0; i < scounts[rank]; i++){
printf(" %f ", local_matrix[i]);
if (scounts[rank] % ROW == 0) {
printf("%n");
}
}
printf("\n");
/* Only master-process */
if (rank==0)
{
//Time ending programm
time2=MPI_Wtime();
printf("\nTime parallel calculation = %f s.\n",time2-time1);
}
// End work of master-process
/* Delete storage arrays of process */
free(displs);
free(scounts);
free(rcounts);
free(local_matrix);
MPI_Finalize();
return 0;
}
It looks like you mis-allocate your local_matrix:
local_matrix = (float *)calloc(chunk_size(i, size, ROW) * COLUMN, sizeof(float));
I think you meant
local_matrix = (float *)calloc(chunk_size(rank, size, ROW) * COLUMN, sizeof(float));
Related
I am trying to parallelize the Mandelbrot.
the correct output should be around 1.510659. however I am not getting that correctly.
** PROGRAM: Mandelbrot area
**
** PURPOSE: Program to compute the area of a Mandelbrot set.
** The correct answer should be around 1.510659.
**
** USAGE: Program runs without input ... just run the executable
**
reduction for numoutside.
this is my parallelized code
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#define NPOINTS 1000
#define MAXITER 1000
int P = 1;
struct d_complex
{
double r;
double i;
};
int testpoint(struct d_complex);
struct d_complex c;
struct d_complex cPart;
int numoutside = 1;
int main()
{
int i, j, row;
int res;
double area, error, eps = 1.0e-5;
int myrank, mysize;
double stsec, ensec, commtime, maxcommtime;
MPI_Status status;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &mysize);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
stsec = MPI_Wtime();
// Loop over grid of points in the complex plane which contains the Mandelbrot set,
// testing each point to see whether it is inside or outside the set.
/*for (i = 0; i < NPOINTS; i ++)
{
for (j = 0; j < NPOINTS ; j++)
{
c.r = -2.0 + 2.5 * (double)(i) / (double)(NPOINTS) + eps;
c.i = 1.125 * (double)(j) / (double)(NPOINTS) + eps;
testpoint(c);
}
}*/
if (myrank == 0)
{
/* Begin User Program - the master */
//*
int outsum, nb_pixel = NPOINTS*NPOINTS ;
for (i = 0; i < nb_pixel; i++)
{
MPI_Recv(&res, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
// printf("Slave id %d has send : %d \n", status.MPI_SOURCE, data[2]);
// printf("%d: [%d,%d] -> [%d,%d] = %d\n", status.MPI_SOURCE, data[0], data[1], data[0] + MAXX, data[1] + MAXY, data[2]);
res += numoutside;
}
area = 2.0 * 2.5 * 1.125 * (double)(NPOINTS * NPOINTS - res) / (double)(NPOINTS * NPOINTS);
error = area / (double)NPOINTS;
printf("Area of Mandlebrot set = %12.8f +/- %12.8f\n", area, error);
printf("Finish.\n");
}
else
{
for (i = myrank; i < NPOINTS; i+=mysize)
{
for (j = 0; j < NPOINTS; j++)
{
c.r = -2.0 + 2.5 * (double)(i) / (double)(NPOINTS) + eps;
c.i = 1.125 * (double)(j) / (double)(NPOINTS) + eps;
res=testpoint(c);
MPI_Send(&res, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
}
// Calculate area of set and error estimate and output the results
MPI_Finalize();
ensec = MPI_Wtime();
commtime = ensec - stsec;
// area = 2.0 * 2.5 * 1.125 * (double)(NPOINTS * NPOINTS - numoutside) / (double)(NPOINTS * NPOINTS);
// error = area / (double)NPOINTS;
printf("Area of Mandlebrot set = %12.8f +/- %12.8f\n", area, error);
if (myrank == 0)
{
printf("%.3f\n", commtime);
}
}
int testpoint(struct d_complex c)
{
// Does the iteration z=z*z+c, until |z| > 2 when point is known to be outside set
// If loop count reaches MAXITER, point is considered to be inside the set
struct d_complex z;
int iter;
double temp;
z = c;
for (iter = 0; iter < MAXITER; iter++)
{
temp = (z.r * z.r) - (z.i * z.i) + c.r;
z.i = z.r * z.i * 2 + c.i;
z.r = temp;
if ((z.r * z.r + z.i * z.i) > 4.0)
{
// MPI_Send( &numoutside, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
return numoutside;
break;
}
}
return 0;
}
the expectation is to get around 1.510659 when running the code with NPOINTS: 1000,2000 and 2,4, processors .
The MPI documentation asserts that the adress of address of the receive buffer (recvbuf) is significant only at root. Meaning that the memory may not be allocated in the other processes. This is confirmed by this question.
int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int root, MPI_Comm comm)
At first I thought that recvbuf did not even have to exist: that the memory for recvbuf itself did not have to be allocated (eg by dynamical allocation). Unfortunately (it took me a lot of time to understand my mistake!), it seems that even if the memory that it points to is not valid, the pointer itself has to exist.
See below for the code I have in mind, with a version that gives a segfault, and one that does not.
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char **argv) {
// MPI initialization
int world_rank, world_size;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int n1 = 3, n2 = 10; // Sizes of the 2d arrays
long **observables = (long **) malloc(n1 * sizeof(long *));
for (int k = 0 ; k < n1 ; ++k) {
observables[k] = (long *) calloc(n2, sizeof(long));
for (long i = 0 ; i < n2 ; ++i) {
observables[k][i] = k * i * world_rank; // Whatever
}
}
long **obs_sum; // This will hold the sum on process 0
#ifdef OLD // Version that gives a segfault
if (world_rank == 0) {
obs_sum = (long **) malloc(n2 * sizeof(long *));
for (int k = 0 ; k < n2 ; ++k) {
obs_sum[k] = (long *) calloc(n2, sizeof(long));
}
}
#else // Correct version
// We define all the pointers in all the processes.
obs_sum = (long **) malloc(n2 * sizeof(long *));
if (world_rank == 0) {
for (int k = 0 ; k < n2 ; ++k) {
obs_sum[k] = (long *) calloc(n2, sizeof(long));
}
}
#endif
for (int k = 0 ; k < n1 ; ++k) {
// This is the line that results in a segfault if OLD is defined
MPI_Reduce(observables[k], obs_sum[k], n2, MPI_LONG, MPI_SUM, 0,
MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
// You may free memory here
return 0;
}
Am I interpreting this correctly? What is the rationale behind this behavior?
The problem is not MPI, but the fact that you are passing obs_sum[k], but you haven't defined/allocated it at all.
for (int k = 0 ; k < n1 ; ++k) {
// This is the line that results in a segfault if OLD is defined
MPI_Reduce(observables[k], obs_sum[k], n2, MPI_LONG, MPI_SUM, 0,
MPI_COMM_WORLD);
}
Even if MPI_Reduce() is not getting its value, the generated code will get obs_sum (undefined and not allocated), add k to it and try to read this pointer (segfault) to be passed to MPI_Reduce().
For example the allocation of the rows should be sufficient for it to work:
#else // Correct version
// We define all the pointers in all the processes.
obs_sum = (long **) malloc(n2 * sizeof(long *));
// try commenting out the following lines
// if (world_rank == 0) {
// for (int k = 0 ; k < n2 ; ++k) {
// obs_sum[k] = (long *) calloc(n2, sizeof(long));
// }
// }
#endif
I would allocate a 2D array as a flat array - I really hate this array-of-arrays representation. Wouldn't this be better?
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char **argv) {
// MPI initialization
int world_rank, world_size;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int n1 = 3, n2 = 10; // Sizes of the 2d arrays
long *observables = (long *) malloc(n1*n2*sizeof(long));
for (int k = 0 ; k < n1 ; ++k) {
for (long i = 0 ; i < n2 ; ++i) {
observables[k*n2+i] = k * i * world_rank; // Whatever
}
}
long *obs_sum = nullptr; // This will hold the sum on process 0
if (world_rank == 0) {
obs_sum = (long *) malloc(n1*n2*sizeof(long));
}
MPI_Reduce(observables, obs_sum, n1*n2, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
// You may free memory here
return 0;
}
I write code that do multiplying a Vector by a Matrix. I use the MPI. The matrix is distributed of chunks which consist of rows. The chunks size not always be equal maybe. The chunks are worked correct, but when i try to run this i get half empty vector. However, I was expected to receive a full vector. Please look at the attached code. I think the problem is in the MPI_Gatherv function.
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#define COLUMN 4
#define ROW 7
#define dp 100.0f
// Local start
#define chunk_low(commrank, commsize, nvert) \
((commrank) * (nvert) / (commsize))
// Local end
#define chunk_height(commrank, commsize, nvert) \
(chunk_low((commrank) + 1, commsize, nvert) - 1)
// Local size
#define chunk_size(commrank, commsize, nvert) \
(chunk_height(commrank, commsize, nvert) - \
chunk_low(commrank, commsize, nvert) + 1)
// Matrix initialization function
void init_matrix(int column, int row, float *matrix)
{
int j, i;
printf("\nMatrix\n");
for(i=0; i < row; i++){
for(j=0; j < column; j++){
matrix[i*column+j] = i * column + j; // (float)rand()/RAND_MAX * dp *2.0f - dp;
printf(" %f ", matrix[i * column + j]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char **argv)
{
int rank, size;
int i, j;
float *vm, *local_matrix, *result, *vector;
double time1, time2;
int *displs, *rcounts, *scounts;
vm = (float *)calloc(ROW * COLUMN, sizeof(float));
vector = malloc(COLUMN * sizeof(float));
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Process 0 - master */
if (rank==0)
{
printf("\nNumbers of proccesses %d. \nElements in vector %d.\n", size, COLUMN);
/* Init vector vA */
init_matrix(COLUMN, ROW, vm);
for (i = 0; i < COLUMN; i++) {
vector[i] = (11 * 5) + (11 * i);
}
result = (float *)calloc(ROW, sizeof(float));
//Time begining calculating of programm
time1=MPI_Wtime();
}
/* End of work process 0 */
displs = (int *)malloc(sizeof(int) * size);
scounts = (int *)malloc(sizeof(int) * size);
rcounts = (int *)malloc(sizeof(int) * size);
for (i = 0; i < size; i++) {
displs[i] = chunk_low(i, size, ROW) * COLUMN; // Position initialization
rcounts[i] = scounts[i] = chunk_size(i, size, ROW) * COLUMN;
}
local_matrix = (float *)calloc(chunk_size(rank, size, ROW) * COLUMN, sizeof(float));
MPI_Bcast(vector, COLUMN, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatterv(vm, scounts, displs, MPI_FLOAT, local_matrix,
rcounts[rank], MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
int local_row = scounts[rank] / COLUMN;
float *local_result = (float *)calloc(local_row, sizeof(float));;
for(i = 0; i < local_row; i++) {
for (j = 0; j < COLUMN; j++) {
local_result[i] += local_matrix[i * COLUMN + j] * vector[j];
}
}
MPI_Gatherv(local_result, local_row, MPI_FLOAT, result, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
/* Only master-process */
if (rank==0)
{
//Time ending programm
time2=MPI_Wtime();
printf("\nTime parallel calculation = %f s.\n",time2-time1);
for (i = 0; i < ROW; i++)
{
printf(" %f\n", result[i]);
}
}
// End work of master-process
/* Delete storage arrays of process */
free(displs);
free(scounts);
free(rcounts);
free(local_matrix);
MPI_Finalize();
return 0;
}
After run this code i was expected:
484.000000
1628.000000
2772.000000
3916.000000
5060.000000
6204.000000
7348.000000
But get this result:
484.000000
1628.000000
0.000000
0.000000
0.000000
0.000000
0.000000
The problem was articulated in the displs, which we passed to MPI_Gatherv
I am trying to implement a master/slave relationship which solves the mandelbrot set and prints it into a ppm file. This is what I have so far:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi/mpi.h>
int calculateMan (double , double ); //calculateMandelbrotSet
MPI_Status status;
struct Number {
double R;
double i;
} Z,C;
const int color;
int colorTemp; //color value
const int max = 1000; //max iteration value
const int ResHeight = 800; //Resolution
const int ResWidth = 800;
double CRMax = 1.5;
double CIMax = 2.0;
double CRMin = -2.5;
double CIMin = -2.0; //Constant values
double colorWidth;
double colorHeight;
int main (int argc, char** argv) {
int rank, size = 0;
int nodos, source, dest;
double startTime, endTime;
//Rank = current process ID
//Size = amount of processes
MPI_Init (&argc, &argv); // starts MPI
startTime = MPI_Wtime();
MPI_Comm_size (MPI_COMM_WORLD, &size); // get number of processes
MPI_Comm_rank (MPI_COMM_WORLD, &rank); // get current process
nodos = size - 1;
if (rank == 0) { // MASTER --------------------------------------
colorHeight = (CIMax - CIMin) / ResHeight;
colorWidth = (CRMax - CRMin) / ResWidth;
FILE *fp;
fp = fopen("Mandelbrot.ppm","w");
fprintf(fp,"P3\n %d\n %d\n %d\n",ResWidth,ResHeight,255); //Magic Number & Header
for (int row = 0; row < ResHeight; row++) {
C.i= CIMin + row*colorHeight;
for (int column = 0; column < ResWidth; column++) {
C.R = CRMin + column*colorWidth;
//data sends
for (dest = 1; dest <= nodos; dest++) {
MPI_Send(&C.R, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
MPI_Send(&C.i, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
}
}
}
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
//Recv and print
MPI_Recv(&colorTemp, sizeof(int), MPI_DOUBLE, source, 0, MPI_COMM_WORLD, &status);
fprintf(fp, "%d %d %d\n", colorTemp, 1,3);
}
}
fclose(fp);
} //------------------------- END MASTER
if (rank > 0) // START SLAVE --------------------------------------
{
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
MPI_Recv(&C.R, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
MPI_Recv(&C.i, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
colorTemp = calculateMan(C.R, C.i);
MPI_Send(&colorTemp, sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
} // SLAVE END---------------------------------
endTime = MPI_Wtime(); //stop timer
MPI_Finalize(); //end MPI
printf("Time: %.6f\n", endTime-startTime);
exit(0); //end program
}
int calculateMan (double CReal, double CImaginary) {
int i = 0;
Z.R = 0.0;
Z.i = 0.0;
while (((i < max) && (Z.R*Z.R) + (Z.i * Z.i) < 4))
{
double temp = (Z.R * Z.R) - (Z.i * Z.i) + CReal;
Z.i = 2.0 * Z.R * Z.i + CImaginary;
Z.R = temp;
i++;
}
if (i == max)
return 0; //interior is black
else
return 255; //exterior white
}
I am trying to run my program but I cannot figure out why the RECV and print have an infinite iteration. Also, can anyone have a look at the code and tell me any sort of other issues or things I should look out for, for future reference?
Thanks!
I firstly initialize a 4x4 matrix and then try to send the first 2x2 block to the slave process by using MPI in C. However the slave process only receives the first row of the block, the second row is filled with random numbers from computer ram. I couldn't find what is missing. The code of the program is below :
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define SIZE 4
int main(int argc, char** argv)
{
int rank, nproc;
const int root = 0;
const int tag = 3;
int** table;
int* datas;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
datas = malloc(SIZE * SIZE * sizeof(int));
table = malloc(SIZE * sizeof(int*));
for (int i = 0; i < SIZE; i++)
table[i] = &(datas[i * SIZE]);
for (int i = 0; i < SIZE; i++)
for (int k = 0; k < SIZE; k++)
table[i][k] = 0;
table[0][1] = 1;
table[0][2] = 2;
table[1][0] = 3;
table[2][3] = 2;
table[3][1] = 3;
table[3][2] = 4;
if (rank == root){
MPI_Datatype newtype;
int sizes[2] = { 4, 4 }; // size of table
int subsizes[2] = { 2, 2 }; // size of sub-region
int starts[2] = { 0, 0 };
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &newtype);
MPI_Type_commit(&newtype);
MPI_Send(&(table[0][0]), 1, newtype, 1, tag, MPI_COMM_WORLD);
}
else{
int* local_datas = malloc(SIZE * SIZE * sizeof(int));
int** local = malloc(SIZE * sizeof(int*));
for (int i = 0; i < SIZE; i++)
local[i] = &(local_datas[i * SIZE]);
MPI_Recv(&(local[0][0]), 4, MPI_INT, root, tag, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
for (int i = 0; i < 2; i++){
for (int k = 0; k < 2; k++)
printf("%3d ", local[i][k]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
You have instructed the receive operation to put four integer values consecutively in memory and therefore the 2x2 block is converted to a 1x4 row upon receive (since local is 4x4). The second row of local contains random values since the memory is never initialised.
You should either make use of MPI_Type_create_subarray in both the sender and the receiver in order to place the received data in a 2x2 block or redefine local to be a 2x2 matrix instead of 4x4.