MPI skips communications on larger data - c

I want to parallelize a Backtracking algorithm to solve a sudoku game. My program skips some grids sometimes which is more likely to happen on larger sudoku files e.g. computation on 9x9 sudokus gives mostlikely a solution but on 16x16 almost never. It might return very quick meaning it does not compute all cached grids or they are not cached at all and thrown away(commenting continue statement in solveSudoku or not caching sudokus at all always returns a solution).
My idea is to have one process to collect a queue of sudoku grids on runtime from other processes which are solving a current sudoku grid. A sudoku grid is orginated from the sudoku root grid.
A computation process asks the manager process (in every node when there is at least more than one following node. Probably not very performance efficient.) if it needs more grids when its sudoku queue is not full.
When a computation process finishes its branch it asks the manager for a cached grid to solve.
solveSudoku is the method used for the backtracking algorithm and to cache branches/nodes from the sudoku graph.
I thought that maybe some MPI_Sends are thrown away because there are just too many happening at once but a synchronized MPI_Ssend does not change anything.
int solveSudoku(struct grid* grid, int row, int col, grids_queue** gridQueue, int* gridCount, int cacheGrids)
{
int index = row * grid->size + col;
// Solution found!
if (row == grid->size - 1 && col == grid->size)
return 1;
if (col == grid->size)
{
row++;
col = 0;
}
if (grid->sudoku[index] > 0)
return solveSudoku(grid, row, col + 1, gridQueue, gridCount, cacheGrids);
int multiplePaths = 0;
// First for loop to check for multiple next possiblities to attempt to cache one for manager process
if (cacheGrids == 1)
{
int count = 0;
for (int num = 1; num <= grid->size; num++)
{
if (isSafe(*grid, row, col, num) == 1)
{
count++;
if (count > 1)
{
multiplePaths = 1;
break;
}
}
}
}
int cachedGrid = 0;
for (int num = 1; num <= grid->size; num++)
{
if (isSafe(*grid, row, col, num) == 1)
{
grid->sudoku[index] = num;
// cache grid
if (multiplePaths == 1 && cachedGrid == 0)
{
if (attemptGridCache(*grid) == 1)
{
// Skip this node
cachedGrid = 1;
continue;
}
}
if (solveSudoku(grid, row, col + 1, NULL, gridCount, cacheGrids) == 1)
return 1;
}
grid->sudoku[index] = 0;
}
// No Solution return 0;
return 0;
}
#define MANAGER_RECV_GRIDS 11
#define COMPUT_RECV_GRIDS 12
#define COMPUT_STATUS 13
#define MANAGER_STATUS 14
#define SOLUTION_TIME 15
double solutionFindTime = 0;
double start_time;
int pid;
int gridWidth;
int gridSize;
int attemptGridCache(struct grid sudokuGrid) {
// Send Compute Status
int computeStatus = 1;
MPI_Ssend(&computeStatus, 1, MPI_INT, 0, COMPUT_STATUS, MPI_COMM_WORLD);
// Get Manager Status
int managerStatus;
MPI_Recv(&managerStatus, 1, MPI_INT, 0, MANAGER_STATUS, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
//printBoard(grid);
// Send grids to cache in manager process
if (managerStatus != GRIDS_FULL)
{
MPI_Send(sudokuGrid.sudoku, gridSize, MPI_INT, 0, MANAGER_RECV_GRIDS, MPI_COMM_WORLD);
return 1;
}
return 0;
}
struct grid recvGrid(int srcpid, int tag)
{
size_t size = gridSize * sizeof(int);
int* recvGrid = malloc(size);
MPI_Recv(recvGrid, gridSize, MPI_INT, srcpid, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
struct grid g;
g.sudoku = recvGrid;
g.size = gridWidth;
return g;
}
void computeGrid(struct grid grid, int cacheGrids)
{
//printBoard(&grid);
if (solveSudoku(&grid, 0, 0, NULL, NULL, cacheGrids) == 1)
{
printf("<<<<<<<<<<<<<<<<<Solution found on pid %d >>>>>>>>>>>>>>>>>>>>>\n", pid);
double end_time = MPI_Wtime();
solutionFindTime = end_time - start_time;
//if (pid != 0)
//MPI_Send(&solutionFindTime, 1, MPI_DOUBLE, 0, SOLUTION_TIME, MPI_COMM_WORLD);
}
}
int main(int argc, char** argv) {
int process_count;
double end_time;
double duration;
double initDuration;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &process_count);
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
char* file = argv[1];
start_time = MPI_Wtime();
if (process_count == 0 || process_count == 2)
return 0;
// For process 0
grids_queue* manager_gridQueue = NULL;
int sudokuListSize = 0;
// Manager Process Initialization
if (pid == 0)
{
double startInit_time = MPI_Wtime();
manager_gridQueue = initParallel(process_count - 1, &sudokuListSize, file);
initDuration = MPI_Wtime() - startInit_time;
gridWidth = manager_gridQueue->item.size;
}
// Broadcast grid size
MPI_Bcast(&gridWidth, 1, MPI_INT, 0, MPI_COMM_WORLD);
gridSize = gridWidth * gridWidth;
if(pid == 0 && process_count != 1)
{
for (int i = 1; i < process_count; i++)
{
//Sending grids to other processes...
MPI_Send(dequeue(&manager_gridQueue).sudoku, gridSize, MPI_INT, i, COMPUT_RECV_GRIDS, MPI_COMM_WORLD);
sudokuListSize--;
}
int msg;
int idlingProcessesCount = 0;
int computationFinished = 0;
do {
// receive status from other processes
MPI_Status status;
MPI_Recv(&msg, 1, MPI_INT, MPI_ANY_SOURCE, COMPUT_STATUS, MPI_COMM_WORLD, &status);
// send manager status
int reply;
if (sudokuListSize <= 0)
reply = GRIDS_EMPTY;
else if (sudokuListSize < process_count - 1)
reply = GRIDS_AVAILABLE;
else
reply = GRIDS_FULL;
MPI_Send(&reply, 1, MPI_INT, status.MPI_SOURCE, MANAGER_STATUS, MPI_COMM_WORLD);
// cache grid from other process
if (msg == 1 && reply != GRIDS_FULL) {
enqueue(&manager_gridQueue, recvGrid(status.MPI_SOURCE, MANAGER_RECV_GRIDS));
sudokuListSize++;
}
// process finished. Idling.. get work from queue
else if (msg == 0) {
if (reply != GRIDS_EMPTY) {
MPI_Send(dequeue(&manager_gridQueue).sudoku, gridSize, MPI_INT, status.MPI_SOURCE, COMPUT_RECV_GRIDS, MPI_COMM_WORLD);
sudokuListSize--;
}
// No Grids Available wait for more processes to check for completion
else {
idlingProcessesCount++;
// All processes are idling and no more work left.
// end mpi program
if (idlingProcessesCount == process_count - 1) {
computationFinished = 1;
}
}
}
} while (computationFinished == 0);
//if (solutionFindTime == 0)
//MPI_Recv(&solutionFindTime, 1, MPI_DOUBLE, MPI_ANY_SOURCE, SOLUTION_TIME, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
else if(pid != 0)
{
// compute init sudokus
computeGrid(recvGrid(0, COMPUT_RECV_GRIDS), 1);
// sudokus from dynamic queue
if (pid == 0)
return;
int managerStatus;
do {
// ask for new work
int computeStatus = 0;
MPI_Ssend(&computeStatus, 1, MPI_INT, 0, COMPUT_STATUS, MPI_COMM_WORLD);
// Get Manager Status
MPI_Recv(&managerStatus, 1, MPI_INT, 0, MANAGER_STATUS, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if (managerStatus != GRIDS_EMPTY)
computeGrid(recvGrid(0, COMPUT_RECV_GRIDS), 1);
} while (managerStatus != GRIDS_EMPTY);
}
else if (pid == 0 && process_count == 1) {
computeGrid(dequeue(&manager_gridQueue), 0);
}
// CODE END
if (pid == 0) {
end_time = MPI_Wtime();
duration = end_time - start_time;
D(printf("\\\\ //\n"));
D(printf(" \\\\ //\n"));
D(printf(" \\\\_// Duration: %f\n", duration));
printf("%f\n", duration);
printf("%f\n", solutionFindTime);
printf("%f\n", initDuration);
}
MPI_Finalize();
return 0;
}

It was the continue keyword in the for loop in the backtracking function solveSudoku which made it skip some computations recursively.

Related

Why does this program not enter on the other ranks?

The code I am trying to do has to implement a skribbl io game. I am working with MPI, and the processes are divided between the ranks (rank 0 is the main, it assigns the drawer, rank (drawer) draws, collects the info and the other ones are the players). I have two problems with this code (the second one originates from the first one). The first problem is that although there are cases in the code for the processes to know what they need to do, the players never enter their respective if-s (if (rank != drawer)). I put printf-s before and after the if statement; the one before is called, the one after is not. The second problem is that the MPI_Gather functions from all the cases don't work as expected. I want to send a string array (char[][]), but the drawer's function just waits for data, and does not get any (probably because of the other ranked processes not being able to enter their if's).
Can anyone help me with this?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataa{
char fUname[18], sUname[18], tUname[18];
} Data;
short ran(int lower_limit, int upper_limit, unsigned int *seed) //random generáló
{
return (short) ((double) rand_r(seed) / (RAND_MAX + 1.0) * (upper_limit - lower_limit + 1) + lower_limit);
}
void generate(char fUname[18], char sUname[18], char tUname[18], MPI_Datatype* strct) {
int arrayOfBlocklengths[3] = {18, 18, 18};
MPI_Datatype arrayOfTypes[3] = {MPI_CHAR, MPI_CHAR, MPI_CHAR};
MPI_Aint fAddr, sAddr, tAddr;
MPI_Aint arrayOfDisplacements[3] = {0};
MPI_Get_address(fUname, &fAddr);
MPI_Get_address(sUname, &sAddr);
MPI_Get_address(tUname, &tAddr);
arrayOfDisplacements[1] = sAddr - fAddr;
arrayOfDisplacements[2] = tAddr - fAddr;
MPI_Type_create_struct(3, arrayOfBlocklengths, arrayOfDisplacements, arrayOfTypes, strct);
MPI_Type_commit(strct);
}
int main(int argc, const char* argv[]) {
if (argc != 1) {
printf("man no good i no need parameter bro\n");
exit(1);
}
int n, rank, i = 0;
//printf("%d\n", n);
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &n);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int nrOfGames, wordChooser;
unsigned int seed = getpid();
if (rank == 0) {
nrOfGames = ran(5, 15, &seed);
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
} else {
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process #%d: nrOfGames: %d\n", rank, nrOfGames);
}
for (i = 0; i < nrOfGames; i++) {
printf("%d. iteration: ranks are: %d\n", i, rank);
/*if (i % n != rank) {
continue;
}*/
if (rank == 0) {
int drawerRank = ran(1, n - 1, &seed);
int j;
MPI_Bcast(&drawerRank, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Main process: drawer generated, their rank is %d.\n", drawerRank);
char fileName[15] = "./threewords.sh";
FILE *f = popen(fileName, "r");
Data data;
fscanf(f, "%s %s %s", data.fUname, data.sUname, data.tUname);
printf("Main process: generated usernames are: %s %s %s\n", data.fUname, data.sUname, data.tUname);
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Main process: generated the structure\n");
MPI_Send(&data, 1, strct, drawerRank, 0, MPI_COMM_WORLD);
printf("Main process: new struct sent\n");
char badMsg[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
int as = 0;
for (as = 0; as < 5; as++) {
printf("szo: %s ", badMsg[as]);
}
char guesses[n * 6][18];
MPI_Gather(badMsg, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawerRank, MPI_COMM_WORLD);
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
MPI_Recv(&pointsPerPlayer, n - 1, MPI_INT, drawerRank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Main process: Receive command sent.\n");
continue;
}
printf("\t\t\trank: %d\n", rank);
if (rank != 0) {
int drawer;
MPI_Bcast(&drawer, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process with rank %d got the drawer, %d.\n", rank, drawer);
if (rank == drawer) {
printf("I am the drawer, rank %d.\n", drawer);
//rajzolo eset
char wordToDraw[18];
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
Data data;
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Drawer process generated the structure.\n");
Data recData;
MPI_Recv(&recData, 1, strct, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("\nDrawer process received the structure from the main process, usernames are %s %s %s\n", recData.fUname, recData.sUname, recData.tUname);
MPI_Type_free(&strct);
wordChooser = ran(1, 3, &seed);
if (wordChooser == 1) {
strcpy(wordToDraw, data.fUname);
} else if (wordChooser == 2) {
strcpy(wordToDraw, data.sUname);
} else {
strcpy(wordToDraw, data.tUname);
}
//lerajzolja, most meg varja a valaszokat
int j, k, guessed = 0;
char guessesPerThr[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
char guesses[n * 6][18];
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
printf("sus\n");
j = 1;
k = 0;
while (j < n) {
if (j != 0 && j != rank) {
k = 0;
while (k < 5) {
if (!strcmp(wordToDraw, guessesPerThr[j * 5 + k])) {
guessed++;
pointsPerPlayer[j] += 5 - k;
break;
}
k++;
}
} else {
if (j == 0) {
pointsPerPlayer[j] = 0;
}
}
j++;
}
if (guessed) {
pointsPerPlayer[rank] = guessed - (n - guessed);
if (pointsPerPlayer[i] < 0) {
pointsPerPlayer[i] *= -1;
}
}
MPI_Send(&pointsPerPlayer, n - 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
continue;
}
printf("\t\t\t\t\t\t\trank:%d \t drawer: %d\n", rank, drawer);
if (rank != drawer) {
int drawer;
printf("u ok m8?\n");
MPI_Recv(&drawer, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Process #%d: The drawer is %d.\n", rank, drawer);
FILE *g = popen("./fivewords.sh", "r");
char guessesPerThr[5][18], guesses[n * 6][18];
int j;
for (j = 0; j < 5; j++) {
fscanf(g, "%s", guessesPerThr[j]);
}
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
}
}
}
MPI_Finalize();
return 0;
}

MPI Program Error- Why i get allways this exit code 0xc0000005?

I have problems with my MPI Code in C. My program crashes sometimes and i get this exit code:
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-2] terminated
---- error analysis -----
[0] on LOLANODE1011
project.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
Sometimes it works i dont know why :(
The code is for a job on a cluster. But it only works in commandline.
I think it should be something with the arrays or Send and Receive, but Im not sure....
Main:
int main(int argc, char **argv) {
double *array_distances;
int array_points[2];
int process_count;
int rank;
int city2;
int start_point;
int end_point;
double start_time;
double end_time;
double duration;
//MPI Initiate
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &process_count);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
start_time = MPI_Wtime();
struct local_stack local_work;
struct route best_route;
int num_cities;
double *distance;
char city_names[MAX_CITIES][MAX_CITY_NAME_LENGTH];
int city;
double coord[MAX_CITIES][2];
if (argc == 2) {
num_cities = atoi(argv[1]);
}
else {
num_cities = 0;
}
// Initiate best route in process 0
if (rank == 0) {
init_best_route(&best_route);
}
// Calculate start and endpoint for the current process
if (num_cities==0) {
num_cities = 15;
}
int rest = num_cities % process_count;
if (rest == 0) {
start_point = (num_cities / process_count)* rank;
end_point = (num_cities / process_count)* (rank + 1);
}
else
{
start_point = ((num_cities - rest) / process_count)* rank;
end_point = ((num_cities - rest) / process_count)* (rank + 1);
if (rank == (process_count - 1)) {
end_point += rest;
}
}
// Calculate distances between the cities
populate_distance_matrix(process_count, rank, &distance, &num_cities, city_names, start_point,
end_point);
// current process (not process 0!) sends the calculated distances and the start- and endpoint to
process 0
if (rank > 0) {
int array_size = num_cities * (end_point - start_point);
array_distances = malloc(sizeof(double) * array_size);
//send start- and endpoint
array_points[0] = start_point;
array_points[1] = end_point;
MPI_Request req2;
MPI_Isend(&array_points, 2, MPI_INT, 0, rank, MPI_COMM_WORLD, &req2);
//put distances in array
int i = 0;
for (start_point; start_point < end_point; start_point++) {
for (city2 = 0; city2 < num_cities; city2++) {
array_distances[i] = *(distance + start_point * num_cities + city2);
i++;
}
}
//send distances to process 0
MPI_Request req;
MPI_Isend(array_distances, array_size, MPI_DOUBLE, 0, rank+1, MPI_COMM_WORLD, &req);
MPI_Wait(&req, MPI_STATUS_IGNORE);
}
//process 0 receives all distances and start- and endpoint
if (rank == 0) {
for (int i = 1; i < process_count; i++) {
//receive start- and endpoint
MPI_Recv(&array_points, 2, MPI_INT, i, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
start_point = array_points[0];
end_point = array_points[1];
//receive distances
int count = 0;
int size;
MPI_Status status;
// Wait for a message
MPI_Probe(i, i + 1, MPI_COMM_WORLD, &status);
// Find out the number of elements in the message -> size goes to "size"
MPI_Get_count(&status, MPI_DOUBLE, &size);
//receive
MPI_Recv(array_distances, size, MPI_DOUBLE, i, i + 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// save distances in memory
for (start_point; start_point < end_point; start_point++) {
for (city2 = 0; city2 < num_cities; city2++) {
*(distance + start_point * num_cities + city2) = array_distances[count];
count++;
}
}
}
}
free(array_distances);
// process 0: search for the best route
if (rank == 0) {
init_stack(&local_work);
push_first_work_local(&local_work, num_cities);
expand_top_route(&local_work, &num_cities, &best_route, distance);
//printf("[%d] Let's start the fun!\n", );fflush(stdout);
while (!empty(&local_work)) {
//printf("[%d][w %d] Has work\n", , );fflush(stdout);
expand_top_route(&local_work, &num_cities, &best_route, distance);
/*
if ( % 100 == 0) {
printf("[%d] Finished %dth loop iteration\n", , );
}
*/
}
//printf("[%d][w %d] I am out\n", , - 1);fflush(stdout);
free(distance);
/*printf("==========================\n");
printf("# cities: %d\n", num_cities);
printf("==========================\n");
print_route(best_route, "Best route:\n");
for (city = 0; city < num_cities; city++) {
printf("%2d\tCity %2d/%c\t%s\n", city, best_route.route[city],
best_route.route[city] + 48, city_names[best_route.route[city]]);
}*/
}
// MPI show duration
end_time = MPI_Wtime();
if (rank == 0) {
duration = end_time - start_time;
/*printf("==========================\n");
printf("\\\\ //\n");
printf(" \\\\ //\n");
printf(" \\\\_// Duration: %f\n", duration);*/
fprintf(stderr, "%f,%f", duration, best_route.length);
for (int index = 0; index < num_cities; index++) {
fprintf(stderr, "%2d,", best_route.route[index]);
}
printf("%d, %d, %f\n", process_count, num_cities, duration);
}
MPI_Finalize();
return 0;
}
If someone could help me it would be nice :)

MPI Search In Array

Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks
Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.

How to implement a MPI filter on C code?

I am trying to implement a MPI of the filter code below, but I'm facing difficulties doing it. How should it be done?:
Filter code:
int A[100000][100000];
int B[100000][100000];
for (int i=1; i<(100000 - 1); i++)
for (int i=1; j<(100000 - 1); j++)
B[i][j] = A[i-1][j] + A[i+1][j] + A[i][j-1] + A[i][j+1] - 4*A[i][j];
This is what I have tried while following the six functions of MPI:
int myrank; /* Rank of process */
int numprocs; /* Number of processes */
int source; /* Rank of sender */
int dest; /* Rank of receiver */
char message[100]; /* Storage for the message */
MPI_Status status; /* Return status for receive */
MPI_Init( & argc, & argv);
MPI_Comm_size(MPI_COMM_WORLD, & numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, & myrank);
if (myrank != 0)
{
dest = 0;
MPI_Send(message, strlen(message) + 1,
MPI_CHAR, dest, 15, MPI_COMM_WORLD);
} else {
for (source = 1; source < numprocs; source++) {
MPI_Recv(message, 100, MPI_CHAR, source,
15, MPI_COMM_WORLD, & status);
}
}
MPI_Finalize();
I'd go like this. First of all, I'd have this code
int A[100000][100000];
int B[100000][100000];
replaced with dynamic allocations. You don't need all that memory for each and every process.
Then, I'd send array A to different processes. By rows.
What is the "height" of data frame (number of rows):
delta = (100000 - 2) / (numprocs-1); // we don't count first and last row
reminder = (100000 - 2) % (numprocs-1); // it might be that we need to give
// little bit more to calculate
// to one of the processes
// we are starting from row with idx=1 (second row) and we want to finish when
// we hit last row
if(myrank == 0) {
for( int i=1; i < numprocs; i++ ) {
// +100000 - we need two more rows to calculate data
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
MPI_Send(&(A[(i-1)*delta][0]), how_many_bytes, MPI_INT, i, 0,
MPI_COMM_WORLD);
}
} else {
// allocate memory for bytes
int *local_array = NULL;
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
local_array = malloc(how_many_bytes * sizeof(int));
MPI_Status status;
MPI_Recv(
local_array,
how_many_bytes,
MPI_INT,
0,
0,
MPI_COMM_WORLD,
&status);
}
// perform calculations for each and every slice
// remembering that we always have on extra row on
// top and one at the bottom
// send data back to master (as above, but vice versa).

parallel sort using mpi

I try to sort different array with mpi. Every array are allocate locally.
for example we have {1-7-4-12} {3-7-5-9} {12-15-2-16} {10-8-11-13}
and we want {1-2-3-4}{5-6-7-8}{9-10-11-12}{13-14-15-16}
So I use odd-even strategy. For 2proccess it's works in every case but when i try with more process i have new value. For my example i can have {23-2-3-4}. I think my problem is from allocate memory but i don't find where and what i do wrong...
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MASTER 0
#define MIN(a,b) ((a)<(b)?(a):(b))
#define BLOCK_LOW(id,p,n) ((id)*(n)/(p))
#define BLOCK_HIGH(id,p,n) \
(BLOCK_LOW((id)+1,p,n)-1)
#define BLOCK_SIZE(id,p,n) \
(BLOCK_LOW((id)+1, p, n)-BLOCK_LOW(id, p , n))
#define BLOCK_OWNER(index,p,n) \
(((p)*(index+1)-1)/(n))
int nbProcess, id, n; //n = number of value
void printTabByProcess(int *T){
int i = 0;
int size = BLOCK_SIZE(id, nbProcess, n);
printf("Tab n°%d [ ", id, size);
for(i; i < size; i++){
printf(" %d ", T[i]);
}
printf(" ]\n");
}
void fusion(int *t,int deb1,int fin1,int fin2){
int *table1;
int deb2=fin1+1;
int compt1=deb1;
int compt2=deb2;
int i;
table1=(int*)malloc((fin1-deb1+1)*sizeof(int));
for(i=deb1;i<=fin1;i++) {
table1[i-deb1]=t[i];
}
for(i=deb1;i<=fin2;i++){
if(compt1==deb2)
break;
else if(compt2==(fin2+1)){
t[i]=table1[compt1-deb1];
compt1++;
}
else if(table1[compt1-deb1]<t[compt2]){
t[i]=table1[compt1-deb1];
compt1++;
}
else{
t[i]=t[compt2];
compt2++;
}
}
free(table1);
}
void tri_fusion(int*t,int deb,int fin){
if(deb!=fin){
int milieu=(fin+deb)/2;
tri_fusion(t,deb,milieu);
tri_fusion(t,milieu+1,fin);
fusion(t,deb,milieu,fin);
}
}
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) - 1; i++){
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
if(index1 == size1 - 1 ){
buffer[size1 + size2 - 1] = t1[index1];
}else{
buffer[size1 + size2 - 1] = t2[index2];
}
return buffer;
}
/*
*
* OUR FUNCTION TO PARALLEL SORT
*
*/
void TD_trier(int* T){
MPI_Status status;
int size = BLOCK_SIZE(id, nbProcess, n);
int receive_size = 0;
int* receive;
int* array_tmp;
int i = 0;
tri_fusion(T, 0, size - 1);
MPI_Barrier(MPI_COMM_WORLD);
for(i; i < nbProcess; i++){
if(i%2==0){
if(id % 2 == 1){//send to left
MPI_Send(&size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Recv(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
}else {
MPI_Recv(&receive_size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(&array_tmp[size], receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
T = realloc(array_tmp, sizeof(int) * size);
}
if(id == 1){
//~ printTabByProcess(T);
}
}else if(i%2 == 1 && id < nbProcess-1){ //send to right
if(id % 2 == 1){
MPI_Send(&size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
//printTabByProcess(T);
MPI_Recv(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
}else if(id != 0 && id%2 ==0) {
MPI_Recv(&receive_size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//printTabByProcess(receive);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(array_tmp, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
printTabByProcess(&array_tmp[2]);
T = array_tmp + size;
printTabByProcess(T);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
//printTabByProcess(T);
}
int generateRandomValue(){
return rand() % 100;
}
//init array with "random" value
int* TD_init(int n){
int i = 0;
int indiceDerniere = (id+1)*n/nbProcess -1;
int indicePremiere = id*n/nbProcess;
int* arrayLocal;
int localSize = indiceDerniere - indicePremiere +1;
arrayLocal = malloc(sizeof(int)*localSize);
//~ printf("id : %d - nbCase : %d (debut : %d, fin : %d)\n",
//~ id, localSize, indicePremiere, indiceDerniere);
for(i; i < localSize; i++){
arrayLocal[i] = generateRandomValue() - id;
}
printTabByProcess(arrayLocal);
return arrayLocal;
}
int main (int argc, char *argv[]){
//int n = 0;
int *dataLocal;
int dest;
int x;
int success;
MPI_Status status;
srand(time(NULL));
/***** Initializations *****/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nbProcess); //numtask contient le nombre de processeur
MPI_Comm_rank(MPI_COMM_WORLD, &id); //taskid, determine le numero du processus
//~ printf ("MPI task %d has started...\n", id);
//~ tag2 = 1;
//~ tag1 = 2;
MPI_Barrier (MPI_COMM_WORLD);
/***** Master task only ******/
if (id == MASTER){
printf("Chose a number of value :");
scanf("%d",&n);
/* Send the number of cases */
for (dest=1; dest<nbProcess; dest++) {
MPI_Send(&n, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); //send number of value
}
} /* end of master section */
/***** Non-master tasks only *****/
if (id > MASTER) {
/* Receive the number of cases */
MPI_Recv(&n, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD, &status);
}
MPI_Barrier (MPI_COMM_WORLD);
dataLocal = TD_init(n);
MPI_Barrier (MPI_COMM_WORLD);
if(id == 0){
printf("__________________________________________\n");
}
TD_trier(dataLocal);
MPI_Finalize();
}
Troubles may come from fusion2 function. index1 can become higher than size1. In fact, the MPI part works correctly. The code works once tests are performed. Here is a version that is not optimal but...
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) ; i++){
if(index1==size1){
buffer[i] = t2[index2];
index2++;
}else{
if(index2==size2){
buffer[i] = t1[index1];
index1++;
}else{
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
}
}
return buffer;
}
Watch for memory management.
Ex : did you free T before doing ?
T = realloc(array_tmp, sizeof(int) * size);
Did you free "receive" ? did you free "array_tmp" in the second part ?
I fear memory leakages exist... It might be better to avoid allocation in fusion2, and even in the loops. Allocate array_tmp and receive at start, with "enougth" space, might be safer (faster ?).
Bye,
Francis
More : qsort (in stdlib) may go faster for local sorting.

Resources