Sum of the number 1 to 1000 using MPI

Sum of the number 1 to 1000 using MPI - c

The summation is 49995000 in this case, but it should be 50005000. The number 10000 is missing. The following code makes use of 10 processors to compute the sum of numbers ranging from 1 to 1000. This aggregate is calculated by each of the processors, and the results are shown on the screen.
The final result of all processors' computations is gathered and aggregated by the zero processor, and the final result is shown in the output of the program.
I didn't figure out what the problem is. Please assist me in this matter.
#include <cstdio>
#include <cstdlib>
#include <mpi.h>
static int rank, nodes;
int main()
{
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &nodes);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
int ans = 0;
int total = 0;
int start = rank * 1000;
int end = start + 999;
for(int i = start; i <= end; i++) {
ans = ans + i;
}
if(rank != 0) {
MPI_Ssend(&ans, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
} else {
total = ans;
for(int j = 1; j < 10; j++) {
MPI_Recv(&ans, 1, MPI_INT, j, 0, MPI_COMM_WORLD, &status);
total += ans;
}
printf("Total is %d\n", total);
printf("Total Nodes is %d\n", nodes);
}
MPI_Finalize();
return 0;
}
The PBS Job file as followers,
#PBS -l nodes=2
#PBS -l walltime=00:02:00
#PBS -l select=10
cat $PBS_NODEFILE
NPROC=10
cd $PBS_O_WORKDIR
MPISIZE=$NPROC
MPIPROG=`basename $PBS_JOBNAME .pbs`
echo 'Running MPI program' $MPIPROG 'on' $MPISIZE 'processes'
echo 'Started at' `date`
echo '--------------------------------------------------------------------------------'
(time mpirun -n $MPISIZE ./$MPIPROG) 2>&1
echo '--------------------------------------------------------------------------------'
echo 'Finished at' `date`

Related

How can I visualize MPI measures in plots with C?

The all task is: Implement the deterministic mass ranking algorithm.
The task should be prepared using the MPI standard in C programming language. Run times should be measured during runs on 1, 2, and 4 threads and plotted on a graph. The input N should always be 2 powers and this should be given on the horizontal x-axis, but only the exponents. For example, in the case of 1024, the scale is 10, because the 10th power of 2 is 1024. This is the so-called logarithmic scale. Thus, the scale of the x-axis remains linear, and the result can be easily plotted even with larger inputs. In all cases, the malloc function must be used, otherwise they will get a segmentation error! Whenever possible, input should be generated randomly.
My problem is, how can I measure the MPI threads, without the following error:
MPI_Init_thread(517): Cannot call MPI_INIT or MPI_INIT_THREAD more than once
My second problem is, how can I visualize the measured numbers?
My code is following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <dos.h>
#include <conio.h>
#include <graphics.h>
#include <math.h>
#include <mpi.h>
#include <string.h>
#include <omp.h>
#include "mpi.h"
int egyszal(int szomsz[], int rang[], int p, int x);
int ketszal(int szomsz[], int rang[], int p, int x);
int negyszal (int szomsz[], int rang[], int p, int x);
int egyszal(int szomsz[], int rang[], int p, int x)
{
int i, j;
MPI_Init_thread( 0, 0, MPI_THREAD_MULTIPLE, &x );
for(i = 0; i < p; ++i)
{
if(szomsz[i] == 0)
{
rang[i] = 0;
}
else
{
rang[i] = 1;
}
}
for(j = 0; j < log10(p); ++j)
{
//printf("j valtozo erteke: %d\n", j);
for(i = 0; i < p; ++i)
{
//printf("i valtozo erteke: %d\n", i);
if( szomsz[i] != 0)
{
rang[i] = rang[i] + rang[szomsz[i]];
szomsz[i] = szomsz[szomsz[i]];
}
}
}
double ido_1 = MPI_Wtime();
MPI_Finalize();
MPI_Abort;
return ido_1;
}
int ketszal(int szomsz[], int rang[], int p, int x)
{
int i, j;
MPI_Init_thread( 0, 0, MPI_THREAD_MULTIPLE, &x );
for(i = 0; i < p; ++i)
{
if(szomsz[i] == 0)
{
rang[i] = 0;
}
else
{
rang[i] = 1;
}
}
for(j = 0; j < log10(p); ++j)
{
//printf("j valtozo erteke: %d\n", j);
for(i = 0; i < p; ++i)
{
//printf("i valtozo erteke: %d\n", i);
if( szomsz[i] != 0)
{
rang[i] = rang[i] + rang[szomsz[i]];
szomsz[i] = szomsz[szomsz[i]];
}
}
}
double ido_1 = MPI_Wtime();
MPI_Finalize();
MPI_Abort;
return ido_1;
}
int negyszal(int szomsz[], int rang[], int p, int x)
{
int i, j;
MPI_Init_thread( 0, 0, MPI_THREAD_MULTIPLE, &x );
for(i = 0; i < p; ++i)
{
if(szomsz[i] == 0)
{
rang[i] = 0;
}
else
{
rang[i] = 1;
}
}
for(j = 0; j < log10(p); ++j)
{
//printf("j valtozo erteke: %d\n", j);
for(i = 0; i < p; ++i)
{
//printf("i valtozo erteke: %d\n", i);
if( szomsz[i] != 0)
{
rang[i] = rang[i] + rang[szomsz[i]];
szomsz[i] = szomsz[szomsz[i]];
}
}
}
double ido_1 = MPI_Wtime();
MPI_Finalize();
MPI_Abort;
return ido_1;
}
int main(int argc, char** argv)
{
int i, j;
int x = 1;
int szomsz[] = {5, 4, 2, 0, 3, 1};
int rang[] = {1, 1, 1, 0, 1, 1};
int seged_1 = sizeof(szomsz);
int seged_2 = sizeof(int);
int p = seged_1/seged_2;
egyszal(szomsz, rang, p, x);
x = 2;
ketszal(szomsz, rang, p, x);
x = 4;
negyszal(szomsz, rang, p, x);
return 0;
}

You can visualize with Gnuplot, considering the stdout prints, you can grab the stdout in a bash script file, and based on the input data, and the stdout you can visualize nice figures.
here is the example of collecting the "execution time" and then plot it then with Gnuplot:
In the following, based on the experiment, I will write results in a "CSV" file, then plot them.
FILENAME=MEASUREMENT
wdir=`pwd`
toolname=`pwd | awk -F/ '{print $NF}'`
cpath="run_mpi.sh" #command path of your executable
irange="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25" # input range
mstring="EXECUTION TIME" # matching string
mcolumn="5" # matching column
nrep="10" #number of repetition of the test
#-----------------------------------
if [ ! -d "$wdir" ]; then
echo "ERROR: cannot find '$wdir'"; exit 1
fi
if [ ! -s "$wdir/$cpath" ]; then
echo "ERROR: cannot find '$cpath' in '$wdir'"; exit 1
fi
cd $wdir
FILENAME=$FILENAME
re='^[0-9]+$'
echo "pwd=`pwd`"
echo "create log and plot file name: $FILENAME.csv"
echo "logs and plots will store into $wdir/logs/"
echo ""
for i in $irange; do
a="0"; min=""; max="0"
for r in `seq $nrep`; do
output=`./$cpath $i 2>error.log`
val=`echo "$output"|awk "/$mstring/{print \\$5}" c=$mcolumn`
if ! [[ $val =~ $re ]] ; then echo "ERROR: The output is not a number! (input=$i,output='$val')" >&2; exit 1; fi
if [ 1 = `echo "$val > $max"|bc` ]; then max="$val"; fi
if [ "$min" = "" ]; then
min="$val"; a="$val"
else
if [ 1 = `echo "$val < $min"|bc` ]; then min="$val"; fi
fi
i1=`expr $i - 1`
a=`echo "define trunc(x) { auto s; s=scale; scale=0; x=x/1; scale=s; return x } trunc($a * $i1 / $i + $val / $i)"|bc -l`
# echo "$i -- $val -- $a -- $min -- $max"
done
echo "$i,$a,$min,$max"
printf %s'\n' "$i,$a,$min,$max" >> $FILENAME.csv
done
### plot the collected CSV file
for FILE in ${FILENAME}.csv; do
gnuplot <<- EOF
set xlabel "Fib Index"
set ylabel "Execution time (ns)"
set key left
set term png
set style histogram cluster gap 1
set style fill solid 0.5
set boxwidth 0.9
set style histogram errorbars linewidth 1
set errorbars linecolor black
red = "#FF0000"; green = "#00FF00"; blue = "#0000FF"; skyblue = "#87CEEB" ; violet = "#FF00FF"; purple = "#440154" ;
set grid ytics
set format y '10^{%L}'
set logscale y
set autoscale x
set yrange [1:]
set output "${FILENAME}.png"
set datafile separator ","
set style data histogram
plot "${FILENAME}.csv" using 2:3:4:xtic(1) title "Execution time (ns)" linecolor rgb purple linewidth 0
EOF
done
You can also search about how to embed this script in your code, is possible to have Gnuplot in your C code. Look at here for instance:
Making C code plot a graph automatically

Program does not take input from user to loop itself again.

I have this code down here and it working for calculating but it does not take the input of the user to do the loop again and calculate for the user again or cancel. the function is near the end with if(rank == 0) { ... }. I need help figure out what am I missing
#include <math.h> //include files
#include <stdio.h>
#include "mpi.h"
void printit()
{
printf("\n*********************************\n");
printf("Welcome to the pi calculator!\n");
printf("Programmer: K. Spry\n");
printf("You set the number of divisions\n");
printf("for estimating the integral: \n\tf(x)=4/(1+x^2)");
printf("\n");
printf("*********************************\n");
} //end printit
//function prototypes
int main(int argc, char* argv[])
{
double actual_pi = 3.141592653589793238462643;
//for comparison later
int n, rank, num_proc, i;
double temp_pi, calc_pi, int_size, part_sum, x;
char response = 'y';
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_proc);
if (rank == 0)
{
printit();
} /* I am root node, print out welcome */
while (response == 'y')
{
if (rank == 0)
{ /*I am root node*/
printf("__________________________________\n");
printf("\nEnter the number of intervals: (0 will exit)\n");
n = fgetc(stdin);
}
else
{
int_size = 1.0 / (double) n; //calcs interval size
part_sum = 0.0;
for (i = rank * n / num_proc; i <= (rank + 1)* n / num_proc; i += 1)
{ //calcs partial sums
x = int_size * ((double)i - 0.5);
part_sum += (4.0 / (1.0 + x*x));
}
temp_pi = int_size * part_sum;
//collects all partial sums computes pi
MPI_Reduce(&temp_pi,&calc_pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{ /*I am server*/
printf("pi is approximately %f", calc_pi);
printf(". Error is %f", fabs(calc_pi - actual_pi));
printf("\n");
printf("_______________________________________");
printf("\n");
}
} //end else
if (rank == 0)
{ /*I am root node*/
printf("\nCompute with new intervals? (y/n)\n");
response = fgetc(stdin);
}
} //end while
MPI_Finalize(); //terminate MPI
return 0;
}
The problem I have is with the if inside the loop while that ask user to Compute with new intervals and user will input Y or N to response. The problem is when users do input, it stop working and never loop.

It behave as it should from what you coded :-).
The comment from ptb is the answer of your question.... But lets do it :-).
while (response == 'y')
{
if (rank == 0)
{ /*I am root node*/
printf("__________________________________\n");
printf("\nEnter the number of intervals: (0 will exit)\n");
n = fgetc(stdin);
}
/* here we have to: broadcast to all processes the value of response
in fact all processes have to wait that root get the new value and then
get this value from the root. Fortunately MPI_Broadcast is a blocking
opperation, so no MPI_Barrier are needed...*/
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
/* then your computation is done by every processes (in fact in your interval
boundaries you indeed take that process 0 has the first interval... i am not
sure your math a correct, I did not check them) */
int_size = 1.0 / (double) n; //calcs interval size
part_sum = 0.0;
for (i = rank * n / num_proc; i <= (rank + 1)* n / num_proc; i += 1)
{ //calcs partial sums
x = int_size * ((double)i - 0.5);
part_sum += (4.0 / (1.0 + x*x));
}
temp_pi = int_size * part_sum;
//collects all partial sums computes pi
MPI_Reduce(&temp_pi,&calc_pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{ /*I am server*/
printf("pi is approximately %f", calc_pi);
printf(". Error is %f", fabs(calc_pi - actual_pi));
printf("\n");
printf("_______________________________________");
printf("\n");
}
if (rank == 0)
{ /*I am root node*/
printf("\nCompute with new intervals? (y/n)\n");
response = fgetc(stdin);
}
/* here we have to: broadcast to all processes the value of response
in fact all processes have to wait that root get the new value and then
get this value from the root. Fortunately MPI_Broadcast is a blocking
opperation, so no MPI_Barrier are needed..., lol same than with n...*/
MPI_Bcast(&response,1,MPI_CHAR,0,MPI_COMM_WORLD);
} //end while
I did not try to compile... so there is maybe some typos...

Segmentation fault (core dumped) in parallel computing

I am trying to code an AMPI to sum a 1d array in parallel. However, I am getting a segmentation fault (core dumped).
I tried to fix my function so many times but I could not find where the error is or how to fix it.
/* File: mpi_sum.c
* Compile as: mpicc -g -Wall -std=c99 -o mpi_sum mpi_sum.c -lm
* Run as: mpirun -n 40 ./mpi_sum
* Description: An MPI solution to sum a 1D array. */
int main(int argc, char *argv[]) {
int myID, numProcs; // myID for the index to know when should the cpu start and stop calculate
//numPro numper of cpu you need to do the calculation
double localSum; // this for one operation on one cpu
double parallelSum; // this for collecting the values of localsum
int length = 10000000; // this for how many num
double Fact = 1 ;
int i; // this for for loop
clock_t clockStart, clockEnd; // timer
srand(5); // Initialize MPI
MPI_Init(NULL, NULL); //Initialize MPI
MPI_Comm_size(MPI_COMM_WORLD, &numProcs); // Get size
MPI_Comm_rank(MPI_COMM_WORLD, &myID); // Get rank
localSum = 0.0; // the value for eash cpu is 0
int A = (length / numProcs)*((long)myID); // this is to make each cpu work on his area
int B = (length / numProcs)*((long)myID + 1); // this is to make each cpu work on his area
A ++; // add 1 to go to next num
B ++;
clockStart = clock(); // start the timer to see how much time it take
for (i = A; i < B; i++)
{
Fact = (1 / myID - 1/numProcs) / (1 - 1/numProcs);
localSum += Fact ;
}
MPI_Reduce(&localSum, &parallelSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
clockEnd = clock();
if (myID == 0)
{
printf("Time to sum %d floats with MPI in parallel %3.5f seconds\n", length, (clockEnd - clockStart) / (float)CLOCKS_PER_SEC);
printf("The parallel sum: %f\n", parallelSum + 1);
}
MPI_Finalize();
return 0;
}

When I ran your code, my numProcs came up as 1 and the program crashed with
*** Process received signal ***
Signal: Floating point exception (8)
Signal code: Integer divide-by-zero (1)
Failing at address: 0x400af9
[ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x10330)[0x7f8bb13d2330]
[ 1] ./mpi_sum[0x400af9]
[ 2] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5)[0x7f8bb101af45]
[ 3] ./mpi_sum[0x400919]
*** End of error message ***
Floating point exception (core dumped)
on the line
Fact = (1 / myID - 1/numProcs) / (1 - 1/numProcs);
because we had a zero in the denominator.
Since you are getting a different error, may I suggest putting in a bunch of:
printf("%d\n", __LINE__); fflush(stdout);
statements to get an idea where it is crashing?

Proper way to create ghost zones MPI [halos]

Good night
I'm attending to a parallel programming course. The teacher gave us an assignment that involves domain partition for stencil calculations. For this type of calculations (finite difference) the most common way to parallelize a code is to partition the domain and create some ghost zones (halos).
For better understand the creation of ghost zones in MPI I programmed this simple example that initialize some arrays with inner values = 123 and boundary values 88. At the end of all communication, all ghost values should remain 8. In one node I'm getting 123 values.
Serial (no ghosts):
123 - 123 - ... - 123 - 123
Two partitions:
123 - 123 - ... - 88 ||| 88 - ... - 123 - 123
Three partitions:
123 - 123 - ... - 88 ||| 88 - ... - 123 - 123 - 88 ||| 88 - ... - 123 - 123
Aside from this bug, the main question here is about the correct approach to create and maintain ghost zones updated. Is there a cleaner solution for this aside from my messy if(myid == .... else if( myid = ... else type of implementation ? How people usually implement this kind of parallelism ?
#include<mpi.h>
#include<stdio.h>
#include<stdlib.h>
int WhichSize(int mpiId, int numProc, int tam);
int main(int argc, char *argv[]){
int i;
int localSize;
int numProc;
int myid;
int leftProc;
int rightProc;
int * myArray;
int fullDomainSize = 16;
MPI_Request request;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numProc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
// Lets get each partition size.
localSize = WhichSize(myid, numProc, fullDomainSize);
// Allocate arrays acording to proc number.
if(numProc == 1){
//printf("Allocating Array for serial usage\n");
myArray = (int*)malloc(localSize*sizeof(int));
} else if(numProc == 2) {
//printf("Allocating Array for 2 proc usage\n");
myArray = (int*)malloc((localSize+ 1)*sizeof(int));
} else if(numProc > 2) {
if (myid == 0 || myid == numProc - 1){
//printf("Allocating array for boundary nodes usage\n");
myArray = (int*)malloc((localSize+ 1)*sizeof(int));
} else {
//printf("Allocating array for inner nodes usage\n");
myArray = (int*)malloc((localSize+ 2)*sizeof(int));
}
}
// Now we will fill the arrays with a dummy value 123. For the
// boundaries (ghosts) we will fill than with 80. Just to differe
// ntiate.
if(numProc == 1){
//printf("----------------------------------------\n");
//printf("Filling the serial array with values... \n");
for (i = 0; i<localSize; i++){
myArray[i] = 123;
}
} else if(numProc == 2) {
////printf("------------------------------------------------\n");
//printf("Filling array for two proc usage with values... \n");
for (i = 0; i<localSize; i++){
myArray[i] = 123;
}
// ghost.
myArray[localSize+1] = 8;
} else if(numProc > 2) {
if (myid == 0 || myid == numProc - 1){
//printf("--------------------------------------------------\n");
//printf("Filling boundary node arrays usage with values... \n");
for (i = 0; i<localSize; i++){
myArray[i] = 123;
}
// ghosts.
myArray[localSize+1] = 8;
} else {
//printf("--------------------------------------------------\n");
//printf("Filling inner node arrays usage with values... \n");
for (i = 0; i<localSize; i++){
myArray[i] = 123;
}
// ghosts.
myArray[localSize+1] = 8;
myArray[0] = 8;
}
}
// Now lets comunicate the ghosts with MPI_Sendrecv().
if(numProc == 1){
//printf("Serial usage, no ghost to comunicate \n");
} else if(numProc == 2) {
if (myid == 0){
//printf("Sending ghost value from proc %d to %d\n", myid, myid + 1);
MPI_Isend(&myArray[localSize+1],
1,
MPI_INT,
1,
12345,
MPI_COMM_WORLD,
&request);
} else if (myid == 1) {
//printf("Receiving ghost value from proc %d to %d\n", myid-1, myid);
MPI_Irecv(&myArray[localSize+1],
1,
MPI_INT,
0,
12345,
MPI_COMM_WORLD,
&request);
}
} else if(numProc > 2) {
if (myid == 0){
rightProc = myid + 1;
if (myid == 0){
//printf("-------------------------------\n");
//printf("Communicating Boundary ghosts !\n");
//printf("-------------------------------\n");
//printf("Sending ghost value from proc %d to %d\n", myid, myid + 1);
MPI_Isend(&myArray[localSize+1],
1,
MPI_INT,
rightProc,
12345,
MPI_COMM_WORLD,
&request);
} else if (myid == rightProc) {
//printf("Receiving ghost value from proc %d to %d\n", myid-1, myid);
MPI_Irecv(&myArray[localSize+1],
1,
MPI_INT,
0,
12345,
MPI_COMM_WORLD,
&request);
}
} else if (myid == numProc - 1) {
leftProc = myid - 1;
if (myid == numProc - 1){
//printf("-------------------------------\n");
//printf("Communicating Boundary ghosts !\n");
//printf("-------------------------------\n");
////printf("Sending ghost value from proc %d to %d\n", myid, myid + 1);
MPI_Isend(&myArray[localSize+1],
1,
MPI_INT,
leftProc,
12345,
MPI_COMM_WORLD,
&request);
} else if (myid == leftProc) {
rightProc = myid + 1;
//printf("Receiving ghost value from proc %d to %d\n", myid-1, myid);
MPI_Irecv(&myArray[localSize+1],
1,
MPI_INT,
rightProc,
12345,
MPI_COMM_WORLD,
&request);
}
} else {
//printf("-------------------------------\n");
//printf("Communicating Inner ghosts baby\n");
//printf("-------------------------------\n");
leftProc = myid - 1;
rightProc = myid + 1;
// Communicate tail ghost.
if (myid == leftProc) {
MPI_Isend(&myArray[localSize+1],
1,
MPI_INT,
rightProc,
12345,
MPI_COMM_WORLD,
&request);
} else if (myid == rightProc){
MPI_Irecv(&myArray[localSize+1],
1,
MPI_INT,
leftProc,
12345,
MPI_COMM_WORLD,
&request);
}
// Communicate head ghost.
if (myid == leftProc) {
MPI_Isend(&myArray[0],
1,
MPI_INT,
rightProc,
12345,
MPI_COMM_WORLD,
&request);
} else if (myid == rightProc){
MPI_Irecv(&myArray[0],
1,
MPI_INT,
leftProc,
12345,
MPI_COMM_WORLD,
&request);
}
}
}
// Now I Want to see if the ghosts are in place !.
if (myid == 0){
printf("The ghost value is: %d\n", myArray[localSize + 1]);
} else if (myid == numProc - 1){
printf("The ghost value is: %d\n", myArray[0]);
} else {
printf("The head ghost is: %d\n", myArray[0]);
printf("The tail ghost is: %d\n", myArray[localSize + 1]);
}
MPI_Finalize();
exit(0);
}
int WhichSize(int mpiId, int numProc, int tam){
double resto;
int tamLocal;
tamLocal = tam / numProc;
resto = tam - tamLocal*numProc;
if (mpiId < resto) tamLocal = tamLocal + 1;
return tamLocal;
}
thank you guys !

Halos can be elegantly implemented in MPI using Cartesian virtual topologies and the send-receive operation.
First of all, having lots of rank-dependent logic in conditional operators makes the code hard to read and understand. It is way better when the code is symmetric, i.e. when all ranks execute the same code. Corner cases can be taken care of using the MPI_PROC_NULL null rank - a send to or receive from that rank results in a no-op. It is therefore enough to do:
// Compute the rank of the left neighbour
leftProc = myid - 1;
if (leftProc < 0) leftProc = MPI_PROC_NULL;
// Compute the rank of the right neighbour
rightProc = myid + 1;
if (rightProc >= numProc) rightProc = MPI_PROC_NULL;
// Halo exchange in forward direction
MPI_Sendrecv(&myArray[localSize], 1, MPI_INT, rightProc, 0, // send last element to the right
&myArray[0], 1, MPI_INT, leftProc, 0, // receive into left halo
MPI_COMM_WORLD);
// Halo exchange in reverse direction
MPI_Sendrecv(&myArray[1], 1, MPI_INT, leftProc, 0, // send first element to the left
&myArray[localSize+1], 1, MPI_INT, rightProc, 0, // receive into right halo
MPI_COMM_WORLD);
That code works for any rank, even for those at both ends - there either the source or the destination is the null rank and no actual transfer occurs in the corresponding direction. It also works with any number of MPI processes, from one to many. It requires that all ranks have halos on both sides, including those that don't really need it (the two corner ranks). One can store in those dummy halos useful things like boundary values (e.g. when solving PDEs) or simply live with the memory waste, which is usually negligible.
In your code, you use incorrectly non-blocking operations. Those are tricky and require care to be taken. MPI_Sendrecv could and should be used instead. It performs both send and receive operations at the same time and thus prevents deadlocks (as long as there is a matching receive for each send).
If the domain is periodic, then the rank computation logic becomes simply:
// Compute the rank of the left neighbour
leftProc = (myid - 1 + numProc) % numProc;
// Compute the rank of the right neighbour
rightProc = (myid + 1) % numProc;
Instead of doing the arithmetic, one could create a Cartesian virtual topology and then use MPI_Cart_shift to find the ranks of the two neighbours:
// Create a non-periodic 1-D Cartesian topology
int dims[1] = { numProc };
int periods[1] = { 0 }; // 0 - non-periodic, 1 - periodic
MPI_Comm cart_comm;
MPI_Cart_create(MPI_COMM_WORLD, 1, dims, periods, 1, &cart_comm);
// Find the two neighbours
MPI_Cart_shift(cart_comm, 0, 1, &leftProc, &rightProc);
The code for the halo exchange remains the same with the only difference that cart_comm should replace MPI_COMM_WORLD. MPI_Cart_shift automatically takes care of the corner cases and will return MPI_PROC_NULL when appropriate. The advantage of that method is that you can easily switch between non-periodic and periodic domains by simply flipping the values inside the periods[] array.
The halos have to be updates as often as necessary, which depends on the algorithm. With most iterative schemes, the update must happen at the beginning of each iteration. One could reduce the communication frequency by introducing multi-level halos and using the values in the outer levels to compute the values in the inner ones.
To conclude, your main function could be reduced to (without using a Cartesian topology):
int main(int argc, char *argv[]){
int i;
int localSize;
int numProc;
int myid;
int leftProc;
int rightProc;
int * myArray;
int fullDomainSize = 16;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numProc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
// Compute neighbouring ranks
rightProc = myid + 1;
if (rightProc >= numProc) rightProc = MPI_PROC_NULL;
leftProc = myid - 1;
if (leftProc < 0) leftProc = MPI_PROC_NULL;
// Lets get each partition size.
localSize = WhichSize(myid, numProc, fullDomainSize);
// Allocate arrays.
myArray = (int*)malloc((localSize+ 2)*sizeof(int));
// Now we will fill the arrays with a dummy value 123. For the
// boundaries (ghosts) we will fill than with 80. Just to differe
// ntiate.
//printf("--------------------------------------------------\n");
//printf("Filling node arrays usage with values... \n");
for (i = 1; i<localSize; i++){
myArray[i] = 123;
}
// ghosts.
myArray[localSize+1] = 8;
myArray[0] = 8;
//printf("-------------------------------\n");
//printf("Communicating Boundary ghosts !\n");
//printf("-------------------------------\n");
//printf("Sending ghost value to the right\n");
MPI_Sendrecv(&myArray[localSize], 1, MPI_INT, rightProc, 12345,
&myArray[0], 1, MPI_INT, leftProc, 12345,
MPI_COMM_WORLD);
//printf("Sending ghost value to the left\n");
MPI_Sendrecv(&myArray[1], 1, MPI_INT, leftProc, 12345,
&myArray[localSize+1], 1, MPI_INT, rightProc, 12345,
MPI_COMM_WORLD);
// Now I Want to see if the ghosts are in place !.
printf("[%d] The head ghost is: %d\n", myid, myArray[0]);
printf("[%d] The tail ghost is: %d\n", myid, myArray[localSize + 1]);
MPI_Finalize();
return 0;
}

MPI: Broadcasting a long long int

This program estimates Pi by throwing random "darts" (sampling points) to a circle or radius=1 inscribed inside a square board of length=2. Using the relationship
Area of circle / Area of Square = Pi/4
we can estimate Pi using the same relationship expressed as
Darts Inside Circle / Darts Outside Circle = Pi/4
The program works fine when I specify NDARTS in a #define, but when trying to broadcast it as a long long int, read from scanf, I get the following execution error:
mpirun -np 4 ./pi_montecarlo.x
-----------------------------------------------------------------------------
One of the processes started by mpirun has exited with a nonzero exit
code. This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.
PID 10591 failed on node n0 (127.0.0.1) due to signal 11.
Why?
Is there anything wrong with my MPI_Bcast declaration?
long long int *NDARTS=0;
scanf("%Ld",NDARTS);
MPI_Bcast(NDARTS, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD);
Full code:
/*
mpicc -g -Wall -lm pi_montecarlo3.c -o pi_montecarlo.x
mpirun -np 4 ./pi_montecarlo.x
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#define MASTER 0
#define PI 3.1415926535
d ouble pseudo_random (double a, double b) {
double r;
r = ((b-a) * ((double) rand() / (double) RAND_MAX)) +a;
return r;
}
int main(int argc, char*argv[]){
long long int *NDARTS=0;
int proc_id,
n_procs,
llimit,
ulimit,
n_circle,
i;
double pi_current,
pi_sum,
x,
y,
z,
error,
start_time,
end_time;
struct timeval stime;
llimit = -1;
ulimit = 1;
n_circle =0;
MPI_Init(&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &proc_id);
MPI_Comm_size (MPI_COMM_WORLD, &n_procs);
if (proc_id == MASTER){
printf("\nMonte Carlo Method to estimate Pi \n\n");
printf("Introduce Number of Darts \n");
scanf("%Ld",NDARTS);
printf(" Number of processes: %d \n", n_procs);
printf(" Number of darts: %Ld \n", *NDARTS);
MPI_Bcast(NDARTS, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD);
start_time = MPI_Wtime();
}
gettimeofday(&stime, NULL);
srand(stime.tv_usec * stime.tv_usec * stime.tv_usec * stime.tv_usec);
for (i=1; i<=*NDARTS;i++){
x = pseudo_random(llimit, ulimit);
y = pseudo_random(llimit, ulimit);
z = pow(x,2) + pow(y,2);
if (z<=1.0){
n_circle++;
}
}
pi_current = 4.0 * (double)n_circle / (double) *NDARTS;
MPI_Reduce (&pi_current, &pi_sum, 1, MPI_DOUBLE, MPI_SUM, MASTER, MPI_COMM_WORLD);
if (proc_id == MASTER) {
pi_sum = pi_sum / n_procs;
error = fabs ((pi_sum -PI) / PI) *100;
end_time = MPI_Wtime();
printf("Known value of PI : %11.10f \n", PI);
printf("Estimated Value of PI : %11.10f\n", pi_sum);
printf("Error Percentage : %10.8f\n", error);
printf("Time : %10.8f\n\n", end_time - start_time);
}
MPI_Finalize();
return 0;
}

You're not using scanf() correctly. It should be like this instead:
long long int NDARTS;
scanf("%lld",&NDARTS);
MPI_Bcast(&NDARTS, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD);
In your current code, long long int *NDARTS=0; effectively initializes NDARTS as a NULL pointer. So scanf() will obviously seg-fault when it tries to write to it.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Sum of the number 1 to 1000 using MPI - c

Related

How can I visualize MPI measures in plots with C?

Program does not take input from user to loop itself again.

Segmentation fault (core dumped) in parallel computing

Proper way to create ghost zones MPI [halos]

MPI: Broadcasting a long long int

Categories

Resources