parallel sort using mpi - c

I try to sort different array with mpi. Every array are allocate locally.
for example we have {1-7-4-12} {3-7-5-9} {12-15-2-16} {10-8-11-13}
and we want {1-2-3-4}{5-6-7-8}{9-10-11-12}{13-14-15-16}
So I use odd-even strategy. For 2proccess it's works in every case but when i try with more process i have new value. For my example i can have {23-2-3-4}. I think my problem is from allocate memory but i don't find where and what i do wrong...
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MASTER 0
#define MIN(a,b) ((a)<(b)?(a):(b))
#define BLOCK_LOW(id,p,n) ((id)*(n)/(p))
#define BLOCK_HIGH(id,p,n) \
(BLOCK_LOW((id)+1,p,n)-1)
#define BLOCK_SIZE(id,p,n) \
(BLOCK_LOW((id)+1, p, n)-BLOCK_LOW(id, p , n))
#define BLOCK_OWNER(index,p,n) \
(((p)*(index+1)-1)/(n))
int nbProcess, id, n; //n = number of value
void printTabByProcess(int *T){
int i = 0;
int size = BLOCK_SIZE(id, nbProcess, n);
printf("Tab n°%d [ ", id, size);
for(i; i < size; i++){
printf(" %d ", T[i]);
}
printf(" ]\n");
}
void fusion(int *t,int deb1,int fin1,int fin2){
int *table1;
int deb2=fin1+1;
int compt1=deb1;
int compt2=deb2;
int i;
table1=(int*)malloc((fin1-deb1+1)*sizeof(int));
for(i=deb1;i<=fin1;i++) {
table1[i-deb1]=t[i];
}
for(i=deb1;i<=fin2;i++){
if(compt1==deb2)
break;
else if(compt2==(fin2+1)){
t[i]=table1[compt1-deb1];
compt1++;
}
else if(table1[compt1-deb1]<t[compt2]){
t[i]=table1[compt1-deb1];
compt1++;
}
else{
t[i]=t[compt2];
compt2++;
}
}
free(table1);
}
void tri_fusion(int*t,int deb,int fin){
if(deb!=fin){
int milieu=(fin+deb)/2;
tri_fusion(t,deb,milieu);
tri_fusion(t,milieu+1,fin);
fusion(t,deb,milieu,fin);
}
}
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) - 1; i++){
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
if(index1 == size1 - 1 ){
buffer[size1 + size2 - 1] = t1[index1];
}else{
buffer[size1 + size2 - 1] = t2[index2];
}
return buffer;
}
/*
*
* OUR FUNCTION TO PARALLEL SORT
*
*/
void TD_trier(int* T){
MPI_Status status;
int size = BLOCK_SIZE(id, nbProcess, n);
int receive_size = 0;
int* receive;
int* array_tmp;
int i = 0;
tri_fusion(T, 0, size - 1);
MPI_Barrier(MPI_COMM_WORLD);
for(i; i < nbProcess; i++){
if(i%2==0){
if(id % 2 == 1){//send to left
MPI_Send(&size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Recv(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
}else {
MPI_Recv(&receive_size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(&array_tmp[size], receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
T = realloc(array_tmp, sizeof(int) * size);
}
if(id == 1){
//~ printTabByProcess(T);
}
}else if(i%2 == 1 && id < nbProcess-1){ //send to right
if(id % 2 == 1){
MPI_Send(&size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
//printTabByProcess(T);
MPI_Recv(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
}else if(id != 0 && id%2 ==0) {
MPI_Recv(&receive_size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//printTabByProcess(receive);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(array_tmp, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
printTabByProcess(&array_tmp[2]);
T = array_tmp + size;
printTabByProcess(T);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
//printTabByProcess(T);
}
int generateRandomValue(){
return rand() % 100;
}
//init array with "random" value
int* TD_init(int n){
int i = 0;
int indiceDerniere = (id+1)*n/nbProcess -1;
int indicePremiere = id*n/nbProcess;
int* arrayLocal;
int localSize = indiceDerniere - indicePremiere +1;
arrayLocal = malloc(sizeof(int)*localSize);
//~ printf("id : %d - nbCase : %d (debut : %d, fin : %d)\n",
//~ id, localSize, indicePremiere, indiceDerniere);
for(i; i < localSize; i++){
arrayLocal[i] = generateRandomValue() - id;
}
printTabByProcess(arrayLocal);
return arrayLocal;
}
int main (int argc, char *argv[]){
//int n = 0;
int *dataLocal;
int dest;
int x;
int success;
MPI_Status status;
srand(time(NULL));
/***** Initializations *****/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nbProcess); //numtask contient le nombre de processeur
MPI_Comm_rank(MPI_COMM_WORLD, &id); //taskid, determine le numero du processus
//~ printf ("MPI task %d has started...\n", id);
//~ tag2 = 1;
//~ tag1 = 2;
MPI_Barrier (MPI_COMM_WORLD);
/***** Master task only ******/
if (id == MASTER){
printf("Chose a number of value :");
scanf("%d",&n);
/* Send the number of cases */
for (dest=1; dest<nbProcess; dest++) {
MPI_Send(&n, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); //send number of value
}
} /* end of master section */
/***** Non-master tasks only *****/
if (id > MASTER) {
/* Receive the number of cases */
MPI_Recv(&n, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD, &status);
}
MPI_Barrier (MPI_COMM_WORLD);
dataLocal = TD_init(n);
MPI_Barrier (MPI_COMM_WORLD);
if(id == 0){
printf("__________________________________________\n");
}
TD_trier(dataLocal);
MPI_Finalize();
}

Troubles may come from fusion2 function. index1 can become higher than size1. In fact, the MPI part works correctly. The code works once tests are performed. Here is a version that is not optimal but...
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) ; i++){
if(index1==size1){
buffer[i] = t2[index2];
index2++;
}else{
if(index2==size2){
buffer[i] = t1[index1];
index1++;
}else{
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
}
}
return buffer;
}
Watch for memory management.
Ex : did you free T before doing ?
T = realloc(array_tmp, sizeof(int) * size);
Did you free "receive" ? did you free "array_tmp" in the second part ?
I fear memory leakages exist... It might be better to avoid allocation in fusion2, and even in the loops. Allocate array_tmp and receive at start, with "enougth" space, might be safer (faster ?).
Bye,
Francis
More : qsort (in stdlib) may go faster for local sorting.

Related

Send over MPI_Bcast an MPI_PACKED message

I have a message that I want to send in broadcast by using MPI_Bcast.
I have two structs with, near the others, dynamic arrays, and because of that I decided to use MPI_Pack and MPI_Unpack.
Here below my solution.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include <stddef.h>
#define DEBUG 0
typedef struct Code {
char character;
int length;
char *code;
} Code;
typedef struct CodeDictionary {
int codesNr;
Code *codes;
} CodeDictionary;
typedef struct Header {
int size; // size of the message in bytes
MPI_Datatype *type; // message type
int position; // position in the buffer
} Header;
typedef unsigned char BYTE;
int getRand(const int from, const int to)
{
int num = (rand() % (to - from + 1)) + from;
return num;
}
void buildCodeDictionaryType(MPI_Datatype *CodeDictType) {
int blockLengths[] = {1, 1};
MPI_Datatype types[] = {MPI_CHAR, MPI_INT};
MPI_Aint offsets[2];
offsets[0] = offsetof(Code, character);
offsets[1] = offsetof(Code, length);
MPI_Type_create_struct(2, blockLengths, offsets, types, CodeDictType);
MPI_Type_commit(CodeDictType);
}
BYTE* buildCodeDictionaryMsg(Header *header, CodeDictionary *dict) {
header->size = sizeof(int);
BYTE *buffer = malloc(sizeof(BYTE) * (header->size));
MPI_Pack(&dict->codesNr, 1, MPI_INT, buffer, header->size, &header->position, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("\ndict->codesNr = %d\n", dict->codesNr);
printf("header->size = %d\n", header->size);
printf("header->position = %d\n\n", header->position);
}
for (int i = 0; i < dict->codesNr; i++) {
header->size += sizeof(char) + sizeof(int) + (sizeof(char) * (dict->codes[i].length+1));
buffer = realloc(buffer, header->size);
MPI_Pack(&dict->codes[i], 1, *header->type, buffer, header->size, &header->position, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("before pack array - header->size = %d\n", header->size);
printf("before pack array - header->position = %d\n", header->position);
}
MPI_Pack(dict->codes[i].code, (dict->codes[i].length+1), MPI_CHAR, buffer, header->size, &header->position, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("after pack array - header->size = %d\n", header->size);
printf("after pack array - header->position = %d\n", header->position);
printf("\n");
}
}
return buffer;
}
void buildCodeDictionary(Header *header, CodeDictionary *dict, BYTE* buffer) {
MPI_Unpack(buffer, header->size, &header->position, &dict->codesNr, 1, MPI_INT, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("dict->codesNr = %d\n", dict->codesNr);
printf("header->size = %d\n", header->size);
printf("header->position = %d\n\n", header->position);
}
dict->codes = malloc(sizeof(Code) * dict->codesNr);
// I do it just for the first element because of test
MPI_Unpack(buffer, header->size, &header->position, &dict->codes[0], 1, *header->type, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("before unpack - header->size = %d\n", header->size);
printf("before unpack - header->position = %d\n", header->position);
}
dict->codes[0].code = malloc(sizeof(char) * (dict->codes[0].length+1));
MPI_Unpack(buffer, header->size, &header->position, &dict->codes[0].code, (dict->codes[0].length+1), MPI_CHAR, MPI_COMM_WORLD);
if (DEBUG == 1) {
printf("after unpack - header->size = %d\n", header->size);
printf("after unpack - header->position = %d\n", header->position);
}
// just for test
printf("character: %c\tlength: %d\tcode: ",
dict->codes[0].character,
dict->codes[0].length);
printf("%s\n", dict->codes[0].code); // it crashes here
// if it works do a for in order to unpack all the data
// ...
}
int main(int argc, char** argv) {
MPI_Init(NULL, NULL);
// Get the number of processes
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Get the rank of the process
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
// Create a CodeDictionary variable
CodeDictionary dict;
if (world_rank == 0)
printf("sending\n\n");
if (world_rank == 0) {
dict.codesNr = getRand(5, 9);
dict.codes = malloc(sizeof(Code) * dict.codesNr);
// create some fake values
for (int i = 0; i < dict.codesNr; i++) {
dict.codes[i].character = 'a' + i;
dict.codes[i].length = getRand(1, 9);
dict.codes[i].code = malloc(sizeof(char) * (dict.codes[i].length+1));
for (int j = 0; j < dict.codes[i].length; j++) {
int randChar = getRand('a', 'z');
dict.codes[i].code[j] = randChar + j;
}
dict.codes[i].code[dict.codes[i].length] = '\0';
if (DEBUG == 1)
printf("strlen(dict.charEncoding[%d].encoding): %d\n", i, strlen(dict.codes[i].code));
}
printf("source data\n");
for (int i = 0; i < dict.codesNr; i++) {
printf("codes[%d]:\n\t", i);
printf("character: %c\tlength: %d\tcode: ", dict.codes[i].character, dict.codes[i].length);
for (int j = 0; j < dict.codes[i].length; j++)
printf("%c", dict.codes[i].code[j]);
printf("\n");
}
}
MPI_Datatype codeDictType;
buildCodeDictionaryType(&codeDictType);
Header header = {.size = 0, .position = 0, .type = NULL};
header.type = &codeDictType;
BYTE *buffer = NULL;
if (world_rank == 0)
buffer = buildCodeDictionaryMsg(&header, &dict);
MPI_Bcast(&header.size, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (DEBUG == 1)
printf("rank %d: header.size = %d\n", world_rank, header.size);
if (world_rank != 0)
buffer = calloc(header.size, sizeof(BYTE));
MPI_Bcast(buffer, header.size, MPI_PACKED, 0, MPI_COMM_WORLD);
if (world_rank != 0) {
printf("\nreceiving\n\n");
buildCodeDictionary(&header, &dict, buffer);
// printf("received data\n");
// for (int i = 0; i < dict.codesNr; i++) {
// printf("codes[%d]:\n\t", i);
// printf("character: %c\tlength: %d\tcode: ", dict.codes[i].character, dict.codes[i].length);
// for (int j = 0; j < dict.codes[i].length; j++)
// printf("%c", dict.codes[i].code[j]);
// printf("\n");
// }
}
free(buffer);
MPI_Type_free(&codeDictType);
for (int i = 0; i < dict.codesNr; i++)
free(dict.codes[i].code);
free(dict.codes);
MPI_Finalize();
return 0;
}
For some reason this line of code doesn't work MPI_Unpack(buffer, header->size, &header->position, &dict->codes[0].code, (dict->codes[0].length+1), MPI_CHAR, MPI_COMM_WORLD); because if I try to print the received array, the run crashes with segmentation fault as error.
I don't understand why it happens, I take care of the memory by allocating the right size, \0 character included.
Do you know what is the problem?

Why does this program not enter on the other ranks?

The code I am trying to do has to implement a skribbl io game. I am working with MPI, and the processes are divided between the ranks (rank 0 is the main, it assigns the drawer, rank (drawer) draws, collects the info and the other ones are the players). I have two problems with this code (the second one originates from the first one). The first problem is that although there are cases in the code for the processes to know what they need to do, the players never enter their respective if-s (if (rank != drawer)). I put printf-s before and after the if statement; the one before is called, the one after is not. The second problem is that the MPI_Gather functions from all the cases don't work as expected. I want to send a string array (char[][]), but the drawer's function just waits for data, and does not get any (probably because of the other ranked processes not being able to enter their if's).
Can anyone help me with this?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataa{
char fUname[18], sUname[18], tUname[18];
} Data;
short ran(int lower_limit, int upper_limit, unsigned int *seed) //random generáló
{
return (short) ((double) rand_r(seed) / (RAND_MAX + 1.0) * (upper_limit - lower_limit + 1) + lower_limit);
}
void generate(char fUname[18], char sUname[18], char tUname[18], MPI_Datatype* strct) {
int arrayOfBlocklengths[3] = {18, 18, 18};
MPI_Datatype arrayOfTypes[3] = {MPI_CHAR, MPI_CHAR, MPI_CHAR};
MPI_Aint fAddr, sAddr, tAddr;
MPI_Aint arrayOfDisplacements[3] = {0};
MPI_Get_address(fUname, &fAddr);
MPI_Get_address(sUname, &sAddr);
MPI_Get_address(tUname, &tAddr);
arrayOfDisplacements[1] = sAddr - fAddr;
arrayOfDisplacements[2] = tAddr - fAddr;
MPI_Type_create_struct(3, arrayOfBlocklengths, arrayOfDisplacements, arrayOfTypes, strct);
MPI_Type_commit(strct);
}
int main(int argc, const char* argv[]) {
if (argc != 1) {
printf("man no good i no need parameter bro\n");
exit(1);
}
int n, rank, i = 0;
//printf("%d\n", n);
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &n);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int nrOfGames, wordChooser;
unsigned int seed = getpid();
if (rank == 0) {
nrOfGames = ran(5, 15, &seed);
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
} else {
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process #%d: nrOfGames: %d\n", rank, nrOfGames);
}
for (i = 0; i < nrOfGames; i++) {
printf("%d. iteration: ranks are: %d\n", i, rank);
/*if (i % n != rank) {
continue;
}*/
if (rank == 0) {
int drawerRank = ran(1, n - 1, &seed);
int j;
MPI_Bcast(&drawerRank, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Main process: drawer generated, their rank is %d.\n", drawerRank);
char fileName[15] = "./threewords.sh";
FILE *f = popen(fileName, "r");
Data data;
fscanf(f, "%s %s %s", data.fUname, data.sUname, data.tUname);
printf("Main process: generated usernames are: %s %s %s\n", data.fUname, data.sUname, data.tUname);
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Main process: generated the structure\n");
MPI_Send(&data, 1, strct, drawerRank, 0, MPI_COMM_WORLD);
printf("Main process: new struct sent\n");
char badMsg[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
int as = 0;
for (as = 0; as < 5; as++) {
printf("szo: %s ", badMsg[as]);
}
char guesses[n * 6][18];
MPI_Gather(badMsg, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawerRank, MPI_COMM_WORLD);
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
MPI_Recv(&pointsPerPlayer, n - 1, MPI_INT, drawerRank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Main process: Receive command sent.\n");
continue;
}
printf("\t\t\trank: %d\n", rank);
if (rank != 0) {
int drawer;
MPI_Bcast(&drawer, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process with rank %d got the drawer, %d.\n", rank, drawer);
if (rank == drawer) {
printf("I am the drawer, rank %d.\n", drawer);
//rajzolo eset
char wordToDraw[18];
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
Data data;
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Drawer process generated the structure.\n");
Data recData;
MPI_Recv(&recData, 1, strct, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("\nDrawer process received the structure from the main process, usernames are %s %s %s\n", recData.fUname, recData.sUname, recData.tUname);
MPI_Type_free(&strct);
wordChooser = ran(1, 3, &seed);
if (wordChooser == 1) {
strcpy(wordToDraw, data.fUname);
} else if (wordChooser == 2) {
strcpy(wordToDraw, data.sUname);
} else {
strcpy(wordToDraw, data.tUname);
}
//lerajzolja, most meg varja a valaszokat
int j, k, guessed = 0;
char guessesPerThr[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
char guesses[n * 6][18];
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
printf("sus\n");
j = 1;
k = 0;
while (j < n) {
if (j != 0 && j != rank) {
k = 0;
while (k < 5) {
if (!strcmp(wordToDraw, guessesPerThr[j * 5 + k])) {
guessed++;
pointsPerPlayer[j] += 5 - k;
break;
}
k++;
}
} else {
if (j == 0) {
pointsPerPlayer[j] = 0;
}
}
j++;
}
if (guessed) {
pointsPerPlayer[rank] = guessed - (n - guessed);
if (pointsPerPlayer[i] < 0) {
pointsPerPlayer[i] *= -1;
}
}
MPI_Send(&pointsPerPlayer, n - 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
continue;
}
printf("\t\t\t\t\t\t\trank:%d \t drawer: %d\n", rank, drawer);
if (rank != drawer) {
int drawer;
printf("u ok m8?\n");
MPI_Recv(&drawer, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Process #%d: The drawer is %d.\n", rank, drawer);
FILE *g = popen("./fivewords.sh", "r");
char guessesPerThr[5][18], guesses[n * 6][18];
int j;
for (j = 0; j < 5; j++) {
fscanf(g, "%s", guessesPerThr[j]);
}
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
}
}
}
MPI_Finalize();
return 0;
}

Cannon algorithm using MPI

I want to implement the Cannon Algorithm using MPI in C using cartesian communicators which are shifted using the default functions and by sending 2-dimensional blocks from the 2 matrices.
I have tried to follow a couple of tutorials found online, but I realized none were implemented the way I wanted them to, using both 2-dimensional blocks and cartesian communicators.
EDIT: I have managed to get over the error after realizing that I was using the proc_grid_size variable in a wrong way, confusing the size of the process matrix with the block size and entering into some unallocated memory area.
I am running with an input of 25 processes and 2 10*10 matrices stored in 2 different files.
I am currently trying to implement the shift operations using the MPI_Cart_Shift function. But I don't know how to send the block over to the neighbors.
This is my current implementation of this specific part, which is not working (the application just hangs):
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank, coords[2];
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;
// Get the communicator related information
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// Set up the Cartesian topology
dims[0] = dims[1] = proc_matrix_size;//sqrt(npes);
// Set the periods for wraparound connections
periods[0] = periods[1] = 1;
// Create the Cartesian topology, with rank reordering
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
// Get the rank and coordinates with respect to the new topology
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
// Compute ranks of the up and left shifts
// Get line neighbors (direction = 1, displacement = 1)
MPI_Cart_shift(comm_2d, 1, 1, &leftrank, &rightrank);
// Get column neighbors (direction = 0, displacement = 1)
MPI_Cart_shift(comm_2d, 0, 1, &uprank, &downrank);
// Determine the dimension of the local matrix block
nlocal = block_size;// n / dims[0];
MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(a[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(b[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
After closing the application, I discover that the root process is the only one that hangs:
F:\Facultate\AN_4\PDC\Labs\MPI\Cannon\x64\Release>mpiexec -np 25 Cannon.exe
a.txt b.txt> mpiexec aborting job...
job aborted:
[ranks] message
[0] job terminated by the user
[1-24] terminated
---- error analysis -----
[0] on DESKTOP-JB1815M
ctrl-c was hit. job aborted by the user.
---- error analysis -----
INITIAL SOLVED CODE:
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) calloc(n*m, sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) calloc(n, sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dimensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/column in process mesh
int proc_matrix_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/column in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_matrix_size != 0) || (world_size % proc_matrix_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_matrix_size;
malloc2D(&A, n, n);
malloc2D(&B, n, n);
malloc2D(&C, n, n);
// Read matrices A & B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
fscanf(fb, "%d ", &B[i][j]);
}
fscanf(fa, "\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int *globalCptr = NULL;
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
int **c = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
globalCptr = &(C[0][0]);
}
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
malloc2D(&c, block_size, block_size);
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
int* displs = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_matrix_size * proc_matrix_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_matrix_size; i++) {
for (int j = 0; j < proc_matrix_size; j++) {
displs[i * proc_matrix_size + j] = disp;
disp += 1;
}
disp += ((n / proc_matrix_size)-1) * proc_matrix_size;
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
OLD:
I would like to mention that at the moment, I am trying to send blocks over the default communicator and planning to implement the shifting operations and the cartesian communicator after managing to send the matrix blocks.
The help I need is with regard to the Scatterv function which throws the following error:
job aborted: [ranks] message
[0] fatal error Fatal error in MPI_Scatterv: Invalid count, error
stack: MPI_Scatterv(sbuf=0x0000029262048D40, scnts=0x00000292620482B0,
displs=0x0000029262048250, dtype=USER,
rbuf=0x000002926203ED30, rcount=25, MPI_INT, root=0, MPI_COMM_WORLD)
failed Negative count, value is -1912594387
[1-7] terminated
This is the code I have written until now:
#include "stdafx.h"
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include <assert.h>
#include <cstdlib>
#include <math.h>
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) malloc(n*m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dymensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/column in process mesh
int proc_grid_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/column in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_grid_size != 0) || (world_size % proc_grid_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_grid_size;
// Initialize matrices
A = (int**)calloc(n, sizeof(int*));
B = (int**)calloc(n, sizeof(int*));
//C = (int**)calloc(n, sizeof(int*));
for (int i = 0; i < n; i++)
{
A[i] = (int*)calloc(n, sizeof(int));
B[i] = (int*)calloc(n, sizeof(int));
//C[i] = (int*)calloc(n, sizeof(int));
}
// Read matrix A from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
printf("%d ", A[i][j]);
}
fscanf(fa, "\n");
printf("\n");
}
// Read matrix B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fb, "%d ", &B[i][j]);
printf("%d ", B[i][j]);
}
fscanf(fb, "\n");
printf("\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
}
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
int* displs = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_grid_size; i++) {
for (int j = 0; j < proc_grid_size; j++) {
displs[i * proc_grid_size + j] = disp;
disp += 1;
}
disp += ((block_size) - 1) * proc_grid_size;
}
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
{
printf("Send cound: %d\n", sendcounts[i]);
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
// Now each processor has its local array, and can process it
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Thank you very much!

MPI Search In Array

Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks
Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.

Bitonic Sorting with MPI

I'm working on a project about the Parallel Bitonic Sorting using MPI and C to implement it. The program I developed works but it's not efficient since a simple QuickSort (sigh) beats it in terms of execution time. Maybe the problem is about the cost of communication but I don't get how to improve that, so here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <string.h>
#include "bs-util.h"
#include "quicksort.h"
#define TAG 1
/* Run this program knowing that:
* 1) The number of cores must be a power of 2
* 2) The length of the array to order must be a power of 2
*
* Exec Example: mpirun -n 4 ./bs 1024 1024
* */
void exchange(FILE *log, int i, int partner, int up);
int countTransfer = 0;
int *myArray, *partnerArray;
int currentPartner = -1;
int rank, size;
MPI_Status status;
int verbose = 0; //this var toggles on(1) or off(0) some useful prints for debugging purpose
int amount=0;
int main(int argc, char *argv[])
{
int *array;
int i=0;
int carry=0;
int up=1;
int count=0;
struct timeval tim;
FILE *log;
char logName[15] = "log/";
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Time meter */
srand((double) time(NULL));
gettimeofday(&tim, NULL);
double t1=tim.tv_sec+(tim.tv_usec/1000000.0);
snprintf(logName+4, 10, "%d",rank);
log = fopen(logName,"w");
printf("Hello world from process %d of %d.\n", rank, size);
MPI_Barrier(MPI_COMM_WORLD);
/* INPUT */
if (rank==0)
{
if (argc==2) /* by file */
{
FILE *input = fopen(argv[1],"r");
char line[20];
count = 0;
while(fgets(line,20,input) != NULL)
{
count++;
}
fclose(input);
array = (int *)malloc(count*sizeof(int));
input = fopen(argv[1],"r");
i = 0;
while(fgets(line,20,input) != NULL)
{
array[i] = atoi(line);
i++;
}
fclose(input);
}
else
if (argc==3) /* by command line */
{
count = atoi(argv[1]);
int max = atoi(argv[2]);
array = (int *)malloc(count*sizeof(int));
srand(time(NULL));
for (i=0; i<count; i++)
{
array[i] = rand()%max;
}
}
else
{
printf("\n\n ----------- ERRORE NEI PARAMETRI DI INPUT ----------- \n\n");
return 1;
}
/* END OF THE INPUT */
if (verbose){
printf("Initial array:\n");
for (i=0; i<count; i++)
{
printf("%d\t", array[i]);
}
printf("\n");
}
/* Everyone wait eachother */
MPI_Barrier(MPI_COMM_WORLD);
carry = count%size;
amount = count/size + carry;
printf("\nParametri: amount=%d carry=%d\n\n", amount, carry);
up=1;
int startIndex = amount;
myArray = (int *)malloc(amount*sizeof(int));
/* Buffer (partner) */
partnerArray = (int *)malloc(amount*sizeof(int));
for (i=0; i<amount; i++)
myArray[i] = array[i];
printf("Processo %d riceve amount=%d e up=%d\n", rank, amount, up);
if (verbose){
printf("Mia porzione ---> ");
for (i=0; i<amount; i++)
{
printf("%d\t", myArray[i]);
}
printf("\n");
}
/* Sending the big array's chunks */
for (i=1; i<size; i++)
{
up = (i+1) % 2;
MPI_Send(&up, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(&amount, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(&carry, 1, MPI_INT, i, TAG, MPI_COMM_WORLD);
MPI_Send(array+startIndex, amount-carry, MPI_INT, i, TAG, MPI_COMM_WORLD);
startIndex += amount-carry;
}
MPI_Barrier(MPI_COMM_WORLD);
}
else
{
MPI_Barrier(MPI_COMM_WORLD);
MPI_Recv(&up, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&amount, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
MPI_Recv(&carry, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
myArray = (int *)malloc(amount*sizeof(int));
partnerArray = (int *)malloc(amount*sizeof(int)); /* Buffer (partner) */
MPI_Recv(myArray, amount, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
/* Experimental padding: every chunck has the same amount of items. */
for (i=amount-carry; i<amount; i++)
{
myArray[i] = 0;
}
printf("\n");
printf("Processo %d riceve amount=%d e up=%d\n", rank, amount-carry, up);
if (verbose){
printf("Mia porzione ---> ");
for (i=0; i<amount; i++)
{
printf("%d\t", myArray[i]);
}
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* CORE */
/* Local Quicksort */
int result = quickSort(&myArray[0], amount); //this function is written within src/quicksort.c
if (verbose){
if (result == 1)
printf("Quick Sort: FAIL \n");
else
{
printf("\nLa mia porzione ordinata (processo %d)\n", rank);
for(i=0; i<amount; i++)
{
printf("%d ",myArray[i]);
}
printf ("\n");
}
}
int j;
for (up=8;up<=amount*size;up=2*up)
{
for (j=up>>1;j>0;j=j>>1)
{
for (i=0;i<amount*size;i++)
{
int partner=i^j;
if ((partner)>i)
{
exchange(log,i,partner,i&up);
}
}
}
}
/* END OF THE CORE */
if (rank!=0)
{
MPI_Send(myArray, amount, MPI_INT, 0, TAG, MPI_COMM_WORLD);
}
gettimeofday(&tim, NULL);
double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
if (rank==0)
{
myArray = (int *)realloc(myArray,sizeof(int)*amount*size);
for (i=1; i<size; i++)
MPI_Recv(myArray+i*amount, amount, MPI_INT, i, TAG, MPI_COMM_WORLD, &status);
printf("\nTempo trascorso %6f\n", t2-t1);
fprintf(log,"\n\n----------> Array Iniziale <----------\n");
printArray(log,array,count);
fprintf(log,"\n\n----------> Array Finale <----------\n");
printArray(log,myArray+(carry*(size-1)),count);
/*printArray(log,myArray,newAmount*size);*/
}
fprintf(log,"Numero di chunk scambiati: %d\n",countTransfer);
fclose(log);
MPI_Finalize();
return 0;
}
void exchange(FILE *log, int i, int partner, int up)
{
int rank_i = i/amount;
int rank_partner = partner/amount;
int offset_i = i%amount;
int offset_partner = partner%amount;
/*if (verbose)
fprintf(log,"\nnewAmount = %d - Rank_i = %d - Rank_partner = %d - Offset_i = %d - Offset_partner = %d \n",amount,rank_i,rank_partner,offset_i,offset_partner);
*/
if ((rank_i != rank) && (rank_partner != rank))
return;
if ((rank_i == rank) && (rank_partner == rank))
{
if (((up==0) && (myArray[offset_i] > myArray[offset_partner])) || ((up!=0) && (myArray[offset_i] < myArray[offset_partner])))
{
int temp = myArray[offset_i];
myArray[offset_i] = myArray[offset_partner];
myArray[offset_partner] = temp;
}
return;
}
if (rank_i == rank && rank_partner != rank)
{
if (currentPartner != rank_partner)
{
MPI_Send(myArray, amount, MPI_INT, rank_partner, TAG, MPI_COMM_WORLD);
MPI_Recv(partnerArray, amount, MPI_INT, rank_partner, TAG, MPI_COMM_WORLD, &status);
currentPartner = rank_partner;
countTransfer++;
}
if (((up==0) && (myArray[offset_i] > partnerArray[offset_partner])) || ((up!=0) && (myArray[offset_i] < partnerArray[offset_partner])))
myArray[offset_i] = partnerArray[offset_partner];
return;
}
if (rank_i != rank && rank_partner == rank)
{
if (currentPartner != rank_i)
{
MPI_Recv(partnerArray, amount, MPI_INT, rank_i, TAG, MPI_COMM_WORLD, &status);
MPI_Send(myArray, amount, MPI_INT, rank_i, TAG, MPI_COMM_WORLD);
currentPartner = rank_i;
countTransfer++;
}
if (((up==0) && (partnerArray[offset_i] > myArray[offset_partner])) || ((up!=0) && (partnerArray[offset_i] < myArray[offset_partner])))
myArray[offset_partner] = partnerArray[offset_i];
return;
}
}
And here's the Make file:
CC = mpicc
OPTIMIZE =
CFLAGS = $(DEFINES) $(OPTIMIZE)
LFLAGS = -lm
PROGS = ./bs
PROGS_SRC = src/bs-util.c src/bs.c src/quicksort.c
all:
$(CC) $(CFLAGS) $(LFLAGS) -o $(PROGS) $(PROGS_SRC)
Help would be very appreciated :)
References: http://goo.gl/nXt4p
Remember that bitonic sort has time complexity of something like N/P (log N)^2 compared to quicksort N log N (in serial version). This means that with log N > P (P ~ number of processors) should even the serial quicksort beat bitonic sort (I am not talking about multiplying with some factors depending on the implementation, neither the communication). Bitonic sort is for really parallel computers (it's pretty good on GPUs), not a grid of few PCs as you probably have.
Many sends/receives (as in exchange function) of small data chunks badly affect performance. More efficient is combining small chunks into one buffer and sending it.
Um. I don't see you doing any collective communication other than barriers...

Resources