Bitonic Sorting with MPI

Bitonic Sorting with MPI - c

I'm working on a project about the Parallel Bitonic Sorting using MPI and C to implement it. The program I developed works but it's not efficient since a simple QuickSort (sigh) beats it in terms of execution time. Maybe the problem is about the cost of communication but I don't get how to improve that, so here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <string.h>
#include "bs-util.h"
#include "quicksort.h"
#define TAG 1
/* Run this program knowing that:
* 1) The number of cores must be a power of 2
* 2) The length of the array to order must be a power of 2
*
* Exec Example: mpirun -n 4 ./bs 1024 1024
* */
void exchange(FILE *log, int i, int partner, int up);
int countTransfer = 0;
int *myArray, *partnerArray;
int currentPartner = -1;
int rank, size;
MPI_Status status;
int verbose = 0; //this var toggles on(1) or off(0) some useful prints for debugging purpose
int amount=0;
int main(int argc, char *argv[])
{
int *array;
int i=0;
int carry=0;
int up=1;
int count=0;
struct timeval tim;
FILE *log;
char logName[15] = "log/";
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Time meter */
srand((double) time(NULL));
gettimeofday(&tim, NULL);
double t1=tim.tv_sec+(tim.tv_usec/1000000.0);
snprintf(logName+4, 10, "%d",rank);
log = fopen(logName,"w");
printf("Hello world from process %d of %d.\n", rank, size);
MPI_Barrier(MPI_COMM_WORLD);
/* INPUT */
if (rank==0)
{
if (argc==2) /* by file */
{
FILE *input = fopen(argv[1],"r");
char line[20];
count = 0;
while(fgets(line,20,input) != NULL)
{
count++;
}
fclose(input);
array = (int *)malloc(count*sizeof(int));
input = fopen(argv[1],"r");
i = 0;
while(fgets(line,20,input) != NULL)
{
array[i] = atoi(line);
i++;
}
fclose(input);
}
else
if (argc==3) /* by command line */
{
count = atoi(argv[1]);
int max = atoi(argv[2]);
array = (int *)malloc(count*sizeof(int));
srand(time(NULL));
for (i=0; i<count; i++)
{
array[i] = rand()%max;
}
}
else
{
printf("\n\n ----------- ERRORE NEI PARAMETRI DI INPUT ----------- \n\n");
return 1;
}
/* END OF THE INPUT */
if (verbose){
printf("Initial array:\n");
for (i=0; i<count; i++)
{
printf("%d\t", array[i]);
}
printf("\n");
}
/* Everyone wait eachother */
MPI_Barrier(MPI_COMM_WORLD);
carry = count%size;
amount = count/size + carry;
printf("\nParametri: amount=%d carry=%d\n\n", amount, carry);
up=1;
int startIndex = amount;
myArray = (int *)malloc(amount*sizeof(int));
/* Buffer (partner) */
partnerArray = (int *)malloc(amount*sizeof(int));
for (i=0; i<amount; i++)
myArray[i] = array[i];
printf("Processo %d riceve amount=%d e up=%d\n", rank, amount, up);
if (verbose){
printf("Mia porzione ---> ");
for (i=0; i<amount; i++)
{
printf("%d\t", myArray[i]);
}
printf("\n");
}
/* Sending the big array's chunks */
for (i=1; i<size; i++)
{
up = (i+1) % 2;
MPI_Send(&up, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(&amount, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(&carry, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(array+startIndex, amount-carry, MPI_INT, i, TAG, MPI_COMM_WORLD);
startIndex += amount-carry;
}
MPI_Barrier(MPI_COMM_WORLD);
}
else
{
MPI_Barrier(MPI_COMM_WORLD);
MPI_Recv(&up, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&amount, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&carry, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
myArray = (int *)malloc(amount*sizeof(int));
partnerArray = (int *)malloc(amount*sizeof(int)); /* Buffer (partner) */
MPI_Recv(myArray, amount, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
/* Experimental padding: every chunck has the same amount of items. */
for (i=amount-carry; i<amount; i++)
{
myArray[i] = 0;
}
printf("\n");
printf("Processo %d riceve amount=%d e up=%d\n", rank, amount-carry, up);
if (verbose){
printf("Mia porzione ---> ");
for (i=0; i<amount; i++)
{
printf("%d\t", myArray[i]);
}
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* CORE */
/* Local Quicksort */
int result = quickSort(&myArray[0], amount); //this function is written within src/quicksort.c
if (verbose){
if (result == 1)
printf("Quick Sort: FAIL \n");
else
{
printf("\nLa mia porzione ordinata (processo %d)\n", rank);
for(i=0; i<amount; i++)
{
printf("%d ",myArray[i]);
}
printf ("\n");
}
}
int j;
for (up=8;up<=amount*size;up=2*up)
{
for (j=up>>1;j>0;j=j>>1)
{
for (i=0;i<amount*size;i++)
{
int partner=i^j;
if ((partner)>i)
{
exchange(log,i,partner,i&up);
}
}
}
}
/* END OF THE CORE */
if (rank!=0)
{
MPI_Send(myArray, amount, MPI_INT, 0, TAG, MPI_COMM_WORLD);
}
gettimeofday(&tim, NULL);
double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
if (rank==0)
{
myArray = (int *)realloc(myArray,sizeof(int)*amount*size);
for (i=1; i<size; i++)
MPI_Recv(myArray+i*amount, amount, MPI_INT, i, TAG, MPI_COMM_WORLD, &status);
printf("\nTempo trascorso %6f\n", t2-t1);
fprintf(log,"\n\n----------> Array Iniziale <----------\n");
printArray(log,array,count);
fprintf(log,"\n\n----------> Array Finale <----------\n");
printArray(log,myArray+(carry*(size-1)),count);
/*printArray(log,myArray,newAmount*size);*/
}
fprintf(log,"Numero di chunk scambiati: %d\n",countTransfer);
fclose(log);
MPI_Finalize();
return 0;
}
void exchange(FILE *log, int i, int partner, int up)
{
int rank_i = i/amount;
int rank_partner = partner/amount;
int offset_i = i%amount;
int offset_partner = partner%amount;
/*if (verbose)
fprintf(log,"\nnewAmount = %d - Rank_i = %d - Rank_partner = %d - Offset_i = %d - Offset_partner = %d \n",amount,rank_i,rank_partner,offset_i,offset_partner);
*/
if ((rank_i != rank) && (rank_partner != rank))
return;
if ((rank_i == rank) && (rank_partner == rank))
{
if (((up==0) && (myArray[offset_i] > myArray[offset_partner])) || ((up!=0) && (myArray[offset_i] < myArray[offset_partner])))
{
int temp = myArray[offset_i];
myArray[offset_i] = myArray[offset_partner];
myArray[offset_partner] = temp;
}
return;
}
if (rank_i == rank && rank_partner != rank)
{
if (currentPartner != rank_partner)
{
MPI_Send(myArray, amount, MPI_INT, rank_partner, TAG, MPI_COMM_WORLD);
MPI_Recv(partnerArray, amount, MPI_INT, rank_partner, TAG, MPI_COMM_WORLD, &status);
currentPartner = rank_partner;
countTransfer++;
}
if (((up==0) && (myArray[offset_i] > partnerArray[offset_partner])) || ((up!=0) && (myArray[offset_i] < partnerArray[offset_partner])))
myArray[offset_i] = partnerArray[offset_partner];
return;
}
if (rank_i != rank && rank_partner == rank)
{
if (currentPartner != rank_i)
{
MPI_Recv(partnerArray, amount, MPI_INT, rank_i, TAG, MPI_COMM_WORLD, &status);
MPI_Send(myArray, amount, MPI_INT, rank_i, TAG, MPI_COMM_WORLD);
currentPartner = rank_i;
countTransfer++;
}
if (((up==0) && (partnerArray[offset_i] > myArray[offset_partner])) || ((up!=0) && (partnerArray[offset_i] < myArray[offset_partner])))
myArray[offset_partner] = partnerArray[offset_i];
return;
}
}
And here's the Make file:
CC = mpicc
OPTIMIZE =
CFLAGS = $(DEFINES) $(OPTIMIZE)
LFLAGS = -lm
PROGS = ./bs
PROGS_SRC = src/bs-util.c src/bs.c src/quicksort.c
all:
$(CC) $(CFLAGS) $(LFLAGS) -o $(PROGS) $(PROGS_SRC)
Help would be very appreciated :)
References: http://goo.gl/nXt4p

Remember that bitonic sort has time complexity of something like N/P (log N)^2 compared to quicksort N log N (in serial version). This means that with log N > P (P ~ number of processors) should even the serial quicksort beat bitonic sort (I am not talking about multiplying with some factors depending on the implementation, neither the communication). Bitonic sort is for really parallel computers (it's pretty good on GPUs), not a grid of few PCs as you probably have.

Many sends/receives (as in exchange function) of small data chunks badly affect performance. More efficient is combining small chunks into one buffer and sending it.

Um. I don't see you doing any collective communication other than barriers...

Related

Why does this program not enter on the other ranks?

The code I am trying to do has to implement a skribbl io game. I am working with MPI, and the processes are divided between the ranks (rank 0 is the main, it assigns the drawer, rank (drawer) draws, collects the info and the other ones are the players). I have two problems with this code (the second one originates from the first one). The first problem is that although there are cases in the code for the processes to know what they need to do, the players never enter their respective if-s (if (rank != drawer)). I put printf-s before and after the if statement; the one before is called, the one after is not. The second problem is that the MPI_Gather functions from all the cases don't work as expected. I want to send a string array (char[][]), but the drawer's function just waits for data, and does not get any (probably because of the other ranked processes not being able to enter their if's).
Can anyone help me with this?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataa{
char fUname[18], sUname[18], tUname[18];
} Data;
short ran(int lower_limit, int upper_limit, unsigned int *seed) //random generáló
{
return (short) ((double) rand_r(seed) / (RAND_MAX + 1.0) * (upper_limit - lower_limit + 1) + lower_limit);
}
void generate(char fUname[18], char sUname[18], char tUname[18], MPI_Datatype* strct) {
int arrayOfBlocklengths[3] = {18, 18, 18};
MPI_Datatype arrayOfTypes[3] = {MPI_CHAR, MPI_CHAR, MPI_CHAR};
MPI_Aint fAddr, sAddr, tAddr;
MPI_Aint arrayOfDisplacements[3] = {0};
MPI_Get_address(fUname, &fAddr);
MPI_Get_address(sUname, &sAddr);
MPI_Get_address(tUname, &tAddr);
arrayOfDisplacements[1] = sAddr - fAddr;
arrayOfDisplacements[2] = tAddr - fAddr;
MPI_Type_create_struct(3, arrayOfBlocklengths, arrayOfDisplacements, arrayOfTypes, strct);
MPI_Type_commit(strct);
}
int main(int argc, const char* argv[]) {
if (argc != 1) {
printf("man no good i no need parameter bro\n");
exit(1);
}
int n, rank, i = 0;
//printf("%d\n", n);
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &n);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int nrOfGames, wordChooser;
unsigned int seed = getpid();
if (rank == 0) {
nrOfGames = ran(5, 15, &seed);
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
} else {
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process #%d: nrOfGames: %d\n", rank, nrOfGames);
}
for (i = 0; i < nrOfGames; i++) {
printf("%d. iteration: ranks are: %d\n", i, rank);
/*if (i % n != rank) {
continue;
}*/
if (rank == 0) {
int drawerRank = ran(1, n - 1, &seed);
int j;
MPI_Bcast(&drawerRank, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Main process: drawer generated, their rank is %d.\n", drawerRank);
char fileName[15] = "./threewords.sh";
FILE *f = popen(fileName, "r");
Data data;
fscanf(f, "%s %s %s", data.fUname, data.sUname, data.tUname);
printf("Main process: generated usernames are: %s %s %s\n", data.fUname, data.sUname, data.tUname);
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Main process: generated the structure\n");
MPI_Send(&data, 1, strct, drawerRank, 0, MPI_COMM_WORLD);
printf("Main process: new struct sent\n");
char badMsg[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
int as = 0;
for (as = 0; as < 5; as++) {
printf("szo: %s ", badMsg[as]);
}
char guesses[n * 6][18];
MPI_Gather(badMsg, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawerRank, MPI_COMM_WORLD);
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
MPI_Recv(&pointsPerPlayer, n - 1, MPI_INT, drawerRank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Main process: Receive command sent.\n");
continue;
}
printf("\t\t\trank: %d\n", rank);
if (rank != 0) {
int drawer;
MPI_Bcast(&drawer, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process with rank %d got the drawer, %d.\n", rank, drawer);
if (rank == drawer) {
printf("I am the drawer, rank %d.\n", drawer);
//rajzolo eset
char wordToDraw[18];
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
Data data;
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Drawer process generated the structure.\n");
Data recData;
MPI_Recv(&recData, 1, strct, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("\nDrawer process received the structure from the main process, usernames are %s %s %s\n", recData.fUname, recData.sUname, recData.tUname);
MPI_Type_free(&strct);
wordChooser = ran(1, 3, &seed);
if (wordChooser == 1) {
strcpy(wordToDraw, data.fUname);
} else if (wordChooser == 2) {
strcpy(wordToDraw, data.sUname);
} else {
strcpy(wordToDraw, data.tUname);
}
//lerajzolja, most meg varja a valaszokat
int j, k, guessed = 0;
char guessesPerThr[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
char guesses[n * 6][18];
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
printf("sus\n");
j = 1;
k = 0;
while (j < n) {
if (j != 0 && j != rank) {
k = 0;
while (k < 5) {
if (!strcmp(wordToDraw, guessesPerThr[j * 5 + k])) {
guessed++;
pointsPerPlayer[j] += 5 - k;
break;
}
k++;
}
} else {
if (j == 0) {
pointsPerPlayer[j] = 0;
}
}
j++;
}
if (guessed) {
pointsPerPlayer[rank] = guessed - (n - guessed);
if (pointsPerPlayer[i] < 0) {
pointsPerPlayer[i] *= -1;
}
}
MPI_Send(&pointsPerPlayer, n - 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
continue;
}
printf("\t\t\t\t\t\t\trank:%d \t drawer: %d\n", rank, drawer);
if (rank != drawer) {
int drawer;
printf("u ok m8?\n");
MPI_Recv(&drawer, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Process #%d: The drawer is %d.\n", rank, drawer);
FILE *g = popen("./fivewords.sh", "r");
char guessesPerThr[5][18], guesses[n * 6][18];
int j;
for (j = 0; j < 5; j++) {
fscanf(g, "%s", guessesPerThr[j]);
}
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
}
}
}
MPI_Finalize();
return 0;
}

Sum of the number 1 to 10000 using MPI_Scatter

I'm currently developing a piece of code that has 10 processes. Process 0 reads a total of 10000 (from lab7.csv). Afterwards, it distributes the array to all of the processes. In order to do this, I created an array named "intArray[10000]" that is shared by all processes. 49893236 is the correct summation.
The following code makes use of 10 processors to compute the sum of numbers ranging from 1 to 1000. This aggregate is calculated by each of the processors, and the results are shown on the screen.
As a result, a follower error is shown.
I didn't figure out what the problem is. Please assist me in this matter.
#include <mpi.h>
#include <stdio.h>
#include <string.h>
int main()
{
int rank, nodes;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nodes);
MPI_Status status;
int intArray[10000];
int subIntArray[1000];
if(rank == 0) {
// Substitute the full file path for the string file_path
FILE *fp = fopen("./lab7.csv", "r");
int i = 0;
int num;
if (!fp) {
printf("Can't open file\n");
} else {
while (fscanf(fp, "%d", &num) > 0)
{
intArray[i] = num;
i++;
}
// Close the file
fclose(fp);
}
}
MPI_Scatter(intArray, 10000, MPI_INT, subIntArray, 1000, MPI_INT, 0, MPI_COMM_WORLD);
int ans = 0;
int total = 0;
int start = rank * 1000;
int end = start + 999;
for(int i = start; i <= end; i++) {
ans = ans + subIntArray[i];
}
if(rank != 0) {
MPI_Ssend(&ans, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
} else {
total = ans;
for(int j = 1; j < 10; j++) {
MPI_Recv(&ans, 1, MPI_INT, j, 0, MPI_COMM_WORLD, &status);
total += ans;
}
printf("Total is %d\n", total);
}
MPI_Finalize();
return 0;
}
The PBS Job file as followers,
#PBS -l nodes=2
#PBS -l walltime=00:02:00
#PBS -l select=5
cat $PBS_NODEFILE
NPROC=10
cd $PBS_O_WORKDIR
MPISIZE=$NPROC
MPIPROG=`basename $PBS_JOBNAME .pbs`
echo 'Running MPI program' $MPIPROG 'on' $MPISIZE 'processes'
echo 'Started at' `date`
echo '--------------------------------------------------------------------------------'
(time mpirun -n $MPISIZE ./$MPIPROG) 2>&1
echo '--------------------------------------------------------------------------------'
echo 'Finished at' `date`
This is the error message that is shown on the terminal.

The orientation of MPI_Scatter is a bit different than the one you have.
You say: I have NTOT data elements and I want to send them to NODECOUNT nodes so I want each node to process NTOT / NODECOUNT of data. This doesn't work if NTOT is not an exact multiple of NODECOUNT
But, MPI_Scatter is oriented the other way: I have NPER number of elements that each node should process, and NODECOUNT nodes, so the total number of elements is NTOT = NPER * NODECOUNT This is how the manpage example shows it.
You want to give a count of NPER to MPI_Scatter and not NTOT. And, you want the send and receive counts to match.
Also, because MPI_Scatter does the split for you, the slave nodes should not use start/end as you calculated, but always do:
start = 0;
end = NPER - 1;
Also, in your code ...
You were indexing into subIntArray as if you could access 0-9999 instead of 0-999, so you were going beyond the end of the array and had UB (undefined behavior)
It's a bit shaky to hardwire 10, 100, 1000 everywhere. Better to use the some #define and the actual node code nodes
And, you assume you have 10000 valid input data elements rather than calculating this based on your i index variable when you do fscanf
Here is the corrected code, with some extra debug code I used.
I used preprocessor conditionals to denote old vs new code (e.g.):
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Also, I didn't have your data files, so I had to synthesize the input data.
Anyway, here it is:
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
int rank;
int nodes;
FILE *xfdbg;
#define dbgprtattr(_lvl) \
__attribute__((__format__(__printf__,_lvl,_lvl + 1)))
#if DEBUG || _USE_ZPRT_
#define dbgprt(_fmt...) \
_dbgprt(_fmt)
#else
#define dbgprt(_fmt...) \
do { } while (0)
#endif
void dbgprtattr(1)
_dbgprt(const char *fmt,...)
{
va_list ap;
char buf[10000];
char *bp = buf;
bp += sprintf(bp,"[%d] ",rank);
va_start(ap,fmt);
bp += vsprintf(bp,fmt,ap);
va_end(ap);
fputs(buf,xfdbg);
fflush(xfdbg);
}
//#define NTOT 10000
//#define NPER (NTOT / nodes)
int
main()
{
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nodes);
MPI_Status status;
char logf[100];
sprintf(logf,"log_%2.2d",rank);
xfdbg = fopen(logf,"w");
#if 0
int BIGSIZE = nodes * 1000;
int NPER = NTOT / nodes;
#else
//int NPER = 1000;
int NPER = 50;
//int NTOT = NPER * nodes;
#endif
int val;
int truetotal = 0;
#if 0
int intArray[NTOT];
#else
int intArray[nodes][NPER];
#endif
#if 1
int subIntArray[NPER];
#else
int subIntArray[NTOT];
#endif
if (rank == 0) {
#if 0
// Substitute the full file path for the string file_path
FILE *fp = fopen("./lab7.csv", "r");
int i = 0;
int num;
if (!fp) {
printf("Can't open file\n");
}
else {
while (fscanf(fp, "%d", &num) > 0) {
intArray[i] = num;
i++;
}
// Close the file
fclose(fp);
}
#endif
for (int nd = 0; nd < nodes; ++nd) {
for (int i = 0; i < NPER; ++i) {
val = (nd << 16) | i;
intArray[nd][i] = val;
truetotal += val;
}
}
}
dbgprt("main: hello\n");
#if 0
MPI_Scatter(intArray, NTOT, MPI_INT,
subIntArray, NPER, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
#if 1
MPI_Scatter(intArray, NPER, MPI_INT,
subIntArray, NPER, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
#if 0
MPI_Scatter(intArray, NTOT, MPI_INT,
subIntArray, NTOT, MPI_INT,
0, MPI_COMM_WORLD);
dbgprt("main: post\n");
#endif
//sleep(10);
int ans = 0;
int total = 0;
#if 0
int start = rank * NPER;
int end = start + NPER - 1;
#else
int start = 0;
int end = NPER - 1;
#endif
dbgprt("main: START start=%d end=%d\n",start,end);
for (int i = start; i <= end; i++) {
dbgprt("main: DATA i=%d sub=%8.8X\n",i,subIntArray[i]);
ans = ans + subIntArray[i];
}
dbgprt("main: loopdone ans=%d\n",ans);
if (rank != 0) {
MPI_Ssend(&ans, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
else {
total = ans;
for (int j = 1; j < nodes; j++) {
MPI_Recv(&ans, 1, MPI_INT, j, 0, MPI_COMM_WORLD, &status);
total += ans;
}
printf("Total is %d\n", total);
printf("Total is %d (TRUE)\n", truetotal);
}
fclose(xfdbg);
MPI_Finalize();
return 0;
}

MPI Search In Array

Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks

Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.

Mandelbrot set using MPICH. Trying to learn but can't figure this bug out

I am trying to implement a master/slave relationship which solves the mandelbrot set and prints it into a ppm file. This is what I have so far:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi/mpi.h>
int calculateMan (double , double ); //calculateMandelbrotSet
MPI_Status status;
struct Number {
double R;
double i;
} Z,C;
const int color;
int colorTemp; //color value
const int max = 1000; //max iteration value
const int ResHeight = 800; //Resolution
const int ResWidth = 800;
double CRMax = 1.5;
double CIMax = 2.0;
double CRMin = -2.5;
double CIMin = -2.0; //Constant values
double colorWidth;
double colorHeight;
int main (int argc, char** argv) {
int rank, size = 0;
int nodos, source, dest;
double startTime, endTime;
//Rank = current process ID
//Size = amount of processes
MPI_Init (&argc, &argv); // starts MPI
startTime = MPI_Wtime();
MPI_Comm_size (MPI_COMM_WORLD, &size); // get number of processes
MPI_Comm_rank (MPI_COMM_WORLD, &rank); // get current process
nodos = size - 1;
if (rank == 0) { // MASTER --------------------------------------
colorHeight = (CIMax - CIMin) / ResHeight;
colorWidth = (CRMax - CRMin) / ResWidth;
FILE *fp;
fp = fopen("Mandelbrot.ppm","w");
fprintf(fp,"P3\n %d\n %d\n %d\n",ResWidth,ResHeight,255); //Magic Number & Header
for (int row = 0; row < ResHeight; row++) {
C.i= CIMin + row*colorHeight;
for (int column = 0; column < ResWidth; column++) {
C.R = CRMin + column*colorWidth;
//data sends
for (dest = 1; dest <= nodos; dest++) {
MPI_Send(&C.R, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
MPI_Send(&C.i, sizeof(double), MPI_DOUBLE, dest, column, MPI_COMM_WORLD);
}
}
}
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
//Recv and print
MPI_Recv(&colorTemp, sizeof(int), MPI_DOUBLE, source, 0, MPI_COMM_WORLD, &status);
fprintf(fp, "%d %d %d\n", colorTemp, 1,3);
}
}
fclose(fp);
} //------------------------- END MASTER
if (rank > 0) // START SLAVE --------------------------------------
{
for (int row = 0; row < ResHeight; row++) {
for (int column = 0; column < ResWidth; column++) {
MPI_Recv(&C.R, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
MPI_Recv(&C.i, sizeof(double), MPI_DOUBLE, 0, column, MPI_COMM_WORLD, &status);
colorTemp = calculateMan(C.R, C.i);
MPI_Send(&colorTemp, sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
} // SLAVE END---------------------------------
endTime = MPI_Wtime(); //stop timer
MPI_Finalize(); //end MPI
printf("Time: %.6f\n", endTime-startTime);
exit(0); //end program
}
int calculateMan (double CReal, double CImaginary) {
int i = 0;
Z.R = 0.0;
Z.i = 0.0;
while (((i < max) && (Z.R*Z.R) + (Z.i * Z.i) < 4))
{
double temp = (Z.R * Z.R) - (Z.i * Z.i) + CReal;
Z.i = 2.0 * Z.R * Z.i + CImaginary;
Z.R = temp;
i++;
}
if (i == max)
return 0; //interior is black
else
return 255; //exterior white
}
I am trying to run my program but I cannot figure out why the RECV and print have an infinite iteration. Also, can anyone have a look at the code and tell me any sort of other issues or things I should look out for, for future reference?
Thanks!

parallel sort using mpi

I try to sort different array with mpi. Every array are allocate locally.
for example we have {1-7-4-12} {3-7-5-9} {12-15-2-16} {10-8-11-13}
and we want {1-2-3-4}{5-6-7-8}{9-10-11-12}{13-14-15-16}
So I use odd-even strategy. For 2proccess it's works in every case but when i try with more process i have new value. For my example i can have {23-2-3-4}. I think my problem is from allocate memory but i don't find where and what i do wrong...
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MASTER 0
#define MIN(a,b) ((a)<(b)?(a):(b))
#define BLOCK_LOW(id,p,n) ((id)*(n)/(p))
#define BLOCK_HIGH(id,p,n) \
(BLOCK_LOW((id)+1,p,n)-1)
#define BLOCK_SIZE(id,p,n) \
(BLOCK_LOW((id)+1, p, n)-BLOCK_LOW(id, p , n))
#define BLOCK_OWNER(index,p,n) \
(((p)*(index+1)-1)/(n))
int nbProcess, id, n; //n = number of value
void printTabByProcess(int *T){
int i = 0;
int size = BLOCK_SIZE(id, nbProcess, n);
printf("Tab n°%d [ ", id, size);
for(i; i < size; i++){
printf(" %d ", T[i]);
}
printf(" ]\n");
}
void fusion(int *t,int deb1,int fin1,int fin2){
int *table1;
int deb2=fin1+1;
int compt1=deb1;
int compt2=deb2;
int i;
table1=(int*)malloc((fin1-deb1+1)*sizeof(int));
for(i=deb1;i<=fin1;i++) {
table1[i-deb1]=t[i];
}
for(i=deb1;i<=fin2;i++){
if(compt1==deb2)
break;
else if(compt2==(fin2+1)){
t[i]=table1[compt1-deb1];
compt1++;
}
else if(table1[compt1-deb1]<t[compt2]){
t[i]=table1[compt1-deb1];
compt1++;
}
else{
t[i]=t[compt2];
compt2++;
}
}
free(table1);
}
void tri_fusion(int*t,int deb,int fin){
if(deb!=fin){
int milieu=(fin+deb)/2;
tri_fusion(t,deb,milieu);
tri_fusion(t,milieu+1,fin);
fusion(t,deb,milieu,fin);
}
}
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) - 1; i++){
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
if(index1 == size1 - 1 ){
buffer[size1 + size2 - 1] = t1[index1];
}else{
buffer[size1 + size2 - 1] = t2[index2];
}
return buffer;
}
/*
*
* OUR FUNCTION TO PARALLEL SORT
*
*/
void TD_trier(int* T){
MPI_Status status;
int size = BLOCK_SIZE(id, nbProcess, n);
int receive_size = 0;
int* receive;
int* array_tmp;
int i = 0;
tri_fusion(T, 0, size - 1);
MPI_Barrier(MPI_COMM_WORLD);
for(i; i < nbProcess; i++){
if(i%2==0){
if(id % 2 == 1){//send to left
MPI_Send(&size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Recv(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
}else {
MPI_Recv(&receive_size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(&array_tmp[size], receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
T = realloc(array_tmp, sizeof(int) * size);
}
if(id == 1){
//~ printTabByProcess(T);
}
}else if(i%2 == 1 && id < nbProcess-1){ //send to right
if(id % 2 == 1){
MPI_Send(&size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
//printTabByProcess(T);
MPI_Recv(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
}else if(id != 0 && id%2 ==0) {
MPI_Recv(&receive_size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//printTabByProcess(receive);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(array_tmp, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
printTabByProcess(&array_tmp[2]);
T = array_tmp + size;
printTabByProcess(T);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
//printTabByProcess(T);
}
int generateRandomValue(){
return rand() % 100;
}
//init array with "random" value
int* TD_init(int n){
int i = 0;
int indiceDerniere = (id+1)*n/nbProcess -1;
int indicePremiere = id*n/nbProcess;
int* arrayLocal;
int localSize = indiceDerniere - indicePremiere +1;
arrayLocal = malloc(sizeof(int)*localSize);
//~ printf("id : %d - nbCase : %d (debut : %d, fin : %d)\n",
//~ id, localSize, indicePremiere, indiceDerniere);
for(i; i < localSize; i++){
arrayLocal[i] = generateRandomValue() - id;
}
printTabByProcess(arrayLocal);
return arrayLocal;
}
int main (int argc, char *argv[]){
//int n = 0;
int *dataLocal;
int dest;
int x;
int success;
MPI_Status status;
srand(time(NULL));
/***** Initializations *****/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nbProcess); //numtask contient le nombre de processeur
MPI_Comm_rank(MPI_COMM_WORLD, &id); //taskid, determine le numero du processus
//~ printf ("MPI task %d has started...\n", id);
//~ tag2 = 1;
//~ tag1 = 2;
MPI_Barrier (MPI_COMM_WORLD);
/***** Master task only ******/
if (id == MASTER){
printf("Chose a number of value :");
scanf("%d",&n);
/* Send the number of cases */
for (dest=1; dest<nbProcess; dest++) {
MPI_Send(&n, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); //send number of value
}
} /* end of master section */
/***** Non-master tasks only *****/
if (id > MASTER) {
/* Receive the number of cases */
MPI_Recv(&n, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD, &status);
}
MPI_Barrier (MPI_COMM_WORLD);
dataLocal = TD_init(n);
MPI_Barrier (MPI_COMM_WORLD);
if(id == 0){
printf("__________________________________________\n");
}
TD_trier(dataLocal);
MPI_Finalize();
}

Troubles may come from fusion2 function. index1 can become higher than size1. In fact, the MPI part works correctly. The code works once tests are performed. Here is a version that is not optimal but...
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) ; i++){
if(index1==size1){
buffer[i] = t2[index2];
index2++;
}else{
if(index2==size2){
buffer[i] = t1[index1];
index1++;
}else{
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
}
}
return buffer;
}
Watch for memory management.
Ex : did you free T before doing ?
T = realloc(array_tmp, sizeof(int) * size);
Did you free "receive" ? did you free "array_tmp" in the second part ?
I fear memory leakages exist... It might be better to avoid allocation in fusion2, and even in the loops. Allocate array_tmp and receive at start, with "enougth" space, might be safer (faster ?).
Bye,
Francis
More : qsort (in stdlib) may go faster for local sorting.