Related
I have the following code
MPI_Init(NULL, NULL);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n_chunks = 16;
assert(n % n_chunks == 0);
int chunk_size = n / n_chunks;
int psizes[2] = {0, 0};
MPI_Dims_create(world_size, 2, psizes);
MPI_Datatype *dist_types = (MPI_Datatype *) malloc(world_size * sizeof(MPI_Datatype));
for (int i = 0; i < world_size; i++) {
int sizes[2] = {n, n};
int distribs[2] = {MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC};
int dargs[2] = {chunk_size, chunk_size};
MPI_Type_create_darray(world_size, i, 2,
sizes, distribs, dargs, psizes,
MPI_ORDER_C, MPI_DOUBLE, &dist_types[i]);
MPI_Type_commit(&dist_types[i]);
}
MPI_Request *send_requests;
if (rank == 0) {
send_requests = (MPI_Request *) malloc(world_size * sizeof(MPI_Request));
for (int i = 0; i < world_size; i++) {
MPI_Isend(&A[0][0], 1, dist_types[i],
i, 0, MPI_COMM_WORLD, &send_requests[i]);
}
}
int dist_size;
MPI_Type_size(dist_types[rank], &dist_size);
dist_size /= sizeof(double);
double *D = (double *) malloc(dist_size * sizeof(double));
MPI_Request recv_request;
MPI_Irecv(D, dist_size, MPI_DOUBLE,
0, 0, MPI_COMM_WORLD, &recv_request);
MPI_Wait(&recv_request, MPI_STATUS_IGNORE);
if (rank == 0) {
MPI_Waitall(1, send_requests, MPI_STATUSES_IGNORE);
}
int m = n / psizes[0];
if (rank == 0) {
for (int i = 0; i < m; i++) {
for (int j = 0; j < m; j++) {
printf("%.2lf ", D[i * m + j]);
}
printf("\n");
}
When I print out the matrix D, I don't get a block cyclic view of A as I'd expect. Rather, the entries are all jumbled up and apart from the top row they look quite random.
Hence, my question is, can I generally expect this to work or are you not really supposed to use MPI_Type_create_darray in this situation. I'm wondering because from what I could find online, people only mention the function in the context of MPI-IO and I couldn't locate a single example of it being used in a way similar to what I have.
I'm an MPI novice, so maybe I'm just doing something wrong that's unrelated to the type I'm using. Also, I did read that it's not really ideal to distribute your matrix this way and rather use MPI-IO, but I can't really change that.
The code I am trying to do has to implement a skribbl io game. I am working with MPI, and the processes are divided between the ranks (rank 0 is the main, it assigns the drawer, rank (drawer) draws, collects the info and the other ones are the players). I have two problems with this code (the second one originates from the first one). The first problem is that although there are cases in the code for the processes to know what they need to do, the players never enter their respective if-s (if (rank != drawer)). I put printf-s before and after the if statement; the one before is called, the one after is not. The second problem is that the MPI_Gather functions from all the cases don't work as expected. I want to send a string array (char[][]), but the drawer's function just waits for data, and does not get any (probably because of the other ranked processes not being able to enter their if's).
Can anyone help me with this?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataa{
char fUname[18], sUname[18], tUname[18];
} Data;
short ran(int lower_limit, int upper_limit, unsigned int *seed) //random generáló
{
return (short) ((double) rand_r(seed) / (RAND_MAX + 1.0) * (upper_limit - lower_limit + 1) + lower_limit);
}
void generate(char fUname[18], char sUname[18], char tUname[18], MPI_Datatype* strct) {
int arrayOfBlocklengths[3] = {18, 18, 18};
MPI_Datatype arrayOfTypes[3] = {MPI_CHAR, MPI_CHAR, MPI_CHAR};
MPI_Aint fAddr, sAddr, tAddr;
MPI_Aint arrayOfDisplacements[3] = {0};
MPI_Get_address(fUname, &fAddr);
MPI_Get_address(sUname, &sAddr);
MPI_Get_address(tUname, &tAddr);
arrayOfDisplacements[1] = sAddr - fAddr;
arrayOfDisplacements[2] = tAddr - fAddr;
MPI_Type_create_struct(3, arrayOfBlocklengths, arrayOfDisplacements, arrayOfTypes, strct);
MPI_Type_commit(strct);
}
int main(int argc, const char* argv[]) {
if (argc != 1) {
printf("man no good i no need parameter bro\n");
exit(1);
}
int n, rank, i = 0;
//printf("%d\n", n);
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &n);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int nrOfGames, wordChooser;
unsigned int seed = getpid();
if (rank == 0) {
nrOfGames = ran(5, 15, &seed);
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
} else {
MPI_Bcast(&nrOfGames, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process #%d: nrOfGames: %d\n", rank, nrOfGames);
}
for (i = 0; i < nrOfGames; i++) {
printf("%d. iteration: ranks are: %d\n", i, rank);
/*if (i % n != rank) {
continue;
}*/
if (rank == 0) {
int drawerRank = ran(1, n - 1, &seed);
int j;
MPI_Bcast(&drawerRank, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Main process: drawer generated, their rank is %d.\n", drawerRank);
char fileName[15] = "./threewords.sh";
FILE *f = popen(fileName, "r");
Data data;
fscanf(f, "%s %s %s", data.fUname, data.sUname, data.tUname);
printf("Main process: generated usernames are: %s %s %s\n", data.fUname, data.sUname, data.tUname);
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Main process: generated the structure\n");
MPI_Send(&data, 1, strct, drawerRank, 0, MPI_COMM_WORLD);
printf("Main process: new struct sent\n");
char badMsg[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
int as = 0;
for (as = 0; as < 5; as++) {
printf("szo: %s ", badMsg[as]);
}
char guesses[n * 6][18];
MPI_Gather(badMsg, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawerRank, MPI_COMM_WORLD);
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
MPI_Recv(&pointsPerPlayer, n - 1, MPI_INT, drawerRank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Main process: Receive command sent.\n");
continue;
}
printf("\t\t\trank: %d\n", rank);
if (rank != 0) {
int drawer;
MPI_Bcast(&drawer, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Process with rank %d got the drawer, %d.\n", rank, drawer);
if (rank == drawer) {
printf("I am the drawer, rank %d.\n", drawer);
//rajzolo eset
char wordToDraw[18];
int* pointsPerPlayer = (int*) calloc (n - 1, sizeof(int));
Data data;
MPI_Datatype strct;
generate(data.fUname, data.sUname, data.tUname, &strct);
printf("Drawer process generated the structure.\n");
Data recData;
MPI_Recv(&recData, 1, strct, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("\nDrawer process received the structure from the main process, usernames are %s %s %s\n", recData.fUname, recData.sUname, recData.tUname);
MPI_Type_free(&strct);
wordChooser = ran(1, 3, &seed);
if (wordChooser == 1) {
strcpy(wordToDraw, data.fUname);
} else if (wordChooser == 2) {
strcpy(wordToDraw, data.sUname);
} else {
strcpy(wordToDraw, data.tUname);
}
//lerajzolja, most meg varja a valaszokat
int j, k, guessed = 0;
char guessesPerThr[5][18] = {"rossz", "rossz", "rossz", "rossz", "rossz"};
char guesses[n * 6][18];
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
printf("sus\n");
j = 1;
k = 0;
while (j < n) {
if (j != 0 && j != rank) {
k = 0;
while (k < 5) {
if (!strcmp(wordToDraw, guessesPerThr[j * 5 + k])) {
guessed++;
pointsPerPlayer[j] += 5 - k;
break;
}
k++;
}
} else {
if (j == 0) {
pointsPerPlayer[j] = 0;
}
}
j++;
}
if (guessed) {
pointsPerPlayer[rank] = guessed - (n - guessed);
if (pointsPerPlayer[i] < 0) {
pointsPerPlayer[i] *= -1;
}
}
MPI_Send(&pointsPerPlayer, n - 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
continue;
}
printf("\t\t\t\t\t\t\trank:%d \t drawer: %d\n", rank, drawer);
if (rank != drawer) {
int drawer;
printf("u ok m8?\n");
MPI_Recv(&drawer, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Process #%d: The drawer is %d.\n", rank, drawer);
FILE *g = popen("./fivewords.sh", "r");
char guessesPerThr[5][18], guesses[n * 6][18];
int j;
for (j = 0; j < 5; j++) {
fscanf(g, "%s", guessesPerThr[j]);
}
MPI_Gather(guessesPerThr, 5 * 18, MPI_CHAR, guesses, 5 * 18, MPI_CHAR, drawer, MPI_COMM_WORLD);
}
}
}
MPI_Finalize();
return 0;
}
I want to implement the Cannon Algorithm using MPI in C using cartesian communicators which are shifted using the default functions and by sending 2-dimensional blocks from the 2 matrices.
I have tried to follow a couple of tutorials found online, but I realized none were implemented the way I wanted them to, using both 2-dimensional blocks and cartesian communicators.
EDIT: I have managed to get over the error after realizing that I was using the proc_grid_size variable in a wrong way, confusing the size of the process matrix with the block size and entering into some unallocated memory area.
I am running with an input of 25 processes and 2 10*10 matrices stored in 2 different files.
I am currently trying to implement the shift operations using the MPI_Cart_Shift function. But I don't know how to send the block over to the neighbors.
This is my current implementation of this specific part, which is not working (the application just hangs):
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank, coords[2];
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;
// Get the communicator related information
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// Set up the Cartesian topology
dims[0] = dims[1] = proc_matrix_size;//sqrt(npes);
// Set the periods for wraparound connections
periods[0] = periods[1] = 1;
// Create the Cartesian topology, with rank reordering
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
// Get the rank and coordinates with respect to the new topology
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
// Compute ranks of the up and left shifts
// Get line neighbors (direction = 1, displacement = 1)
MPI_Cart_shift(comm_2d, 1, 1, &leftrank, &rightrank);
// Get column neighbors (direction = 0, displacement = 1)
MPI_Cart_shift(comm_2d, 0, 1, &uprank, &downrank);
// Determine the dimension of the local matrix block
nlocal = block_size;// n / dims[0];
MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(a[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(&(b[0][0]), 1, subarrtype,
shiftdest, 1, shiftsource, 1, comm_2d, &status);
After closing the application, I discover that the root process is the only one that hangs:
F:\Facultate\AN_4\PDC\Labs\MPI\Cannon\x64\Release>mpiexec -np 25 Cannon.exe
a.txt b.txt> mpiexec aborting job...
job aborted:
[ranks] message
[0] job terminated by the user
[1-24] terminated
---- error analysis -----
[0] on DESKTOP-JB1815M
ctrl-c was hit. job aborted by the user.
---- error analysis -----
INITIAL SOLVED CODE:
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) calloc(n*m, sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) calloc(n, sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dimensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/column in process mesh
int proc_matrix_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/column in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_matrix_size != 0) || (world_size % proc_matrix_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_matrix_size;
malloc2D(&A, n, n);
malloc2D(&B, n, n);
malloc2D(&C, n, n);
// Read matrices A & B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
fscanf(fb, "%d ", &B[i][j]);
}
fscanf(fa, "\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int *globalCptr = NULL;
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
int **c = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
globalCptr = &(C[0][0]);
}
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
malloc2D(&c, block_size, block_size);
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
int* displs = (int*)malloc(proc_matrix_size * proc_matrix_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_matrix_size * proc_matrix_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_matrix_size; i++) {
for (int j = 0; j < proc_matrix_size; j++) {
displs[i * proc_matrix_size + j] = disp;
disp += 1;
}
disp += ((n / proc_matrix_size)-1) * proc_matrix_size;
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
OLD:
I would like to mention that at the moment, I am trying to send blocks over the default communicator and planning to implement the shifting operations and the cartesian communicator after managing to send the matrix blocks.
The help I need is with regard to the Scatterv function which throws the following error:
job aborted: [ranks] message
[0] fatal error Fatal error in MPI_Scatterv: Invalid count, error
stack: MPI_Scatterv(sbuf=0x0000029262048D40, scnts=0x00000292620482B0,
displs=0x0000029262048250, dtype=USER,
rbuf=0x000002926203ED30, rcount=25, MPI_INT, root=0, MPI_COMM_WORLD)
failed Negative count, value is -1912594387
[1-7] terminated
This is the code I have written until now:
#include "stdafx.h"
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include <assert.h>
#include <cstdlib>
#include <math.h>
int malloc2D(int ***array, int n, int m) {
int i;
/* allocate the n*m contiguous items */
int *p = (int*) malloc(n*m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**) malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int free2D(int ***array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr, "Not enough arguments passed! Make sure you pass 2 filenames.\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Declare file pointers
FILE* fa = NULL;
FILE* fb = NULL;
// Declare matrix pointers
int **A = NULL;
int **B = NULL;
int **C = NULL;
// Declare matrix dymensions
int ma = 0, na = 0;
int mb = 0, nb = 0;
// Nr of processes on each line/column in process mesh
int proc_grid_size = (int)sqrt(world_size);
// Single value for quadratic matrix size
int n = 0;
// Nr of elements on each line/column in local matrix
// of each process
int block_size = 0;
// Open files and read matrices
if (world_rank == 0)
{
fa = fopen(argv[1], "r");
fb = fopen(argv[2], "r");
// Read matrix dymensions
fscanf(fa, "%d %d\n", &ma, &na);
fscanf(fb, "%d %d\n", &mb, &nb);
// Check if matrices are quadratic
if ((ma != na) && (na != mb) && (mb != nb))
{
printf("Invalid matrices dimensions\n");
return 0;
}
n = na;
// Check if sqrt(nr_processes) divides matrix dimension
if ((n % proc_grid_size != 0) || (world_size % proc_grid_size != 0))
{
printf("Number of processes does not fit matrix size\n");
return 0;
}
block_size = n / proc_grid_size;
// Initialize matrices
A = (int**)calloc(n, sizeof(int*));
B = (int**)calloc(n, sizeof(int*));
//C = (int**)calloc(n, sizeof(int*));
for (int i = 0; i < n; i++)
{
A[i] = (int*)calloc(n, sizeof(int));
B[i] = (int*)calloc(n, sizeof(int));
//C[i] = (int*)calloc(n, sizeof(int));
}
// Read matrix A from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fa, "%d ", &A[i][j]);
printf("%d ", A[i][j]);
}
fscanf(fa, "\n");
printf("\n");
}
// Read matrix B from file
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
fscanf(fb, "%d ", &B[i][j]);
printf("%d ", B[i][j]);
}
fscanf(fb, "\n");
printf("\n");
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&block_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/*
Divide matrices in blocks and send each block to the corresponding process
*/
// Sizes of input global matrix
int sizes[2] = { n, n };
// Sizes of each block
int subsizes[2] = { block_size, block_size };
// Begining of current block
int starts[2] = { 0,0 };
// Declare subarray type
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &type);
MPI_Type_create_resized(type, 0, block_size * sizeof(int), &subarrtype);
MPI_Type_commit(&subarrtype);
// Declare global pointers to matrices
int *globalAptr = NULL;
int *globalBptr = NULL;
int **A2 = NULL;
int **B2 = NULL;
malloc2D(&A2, n, n);
malloc2D(&B2, n, n);
// Declare global return pointers
int *globalA2ptr = NULL;
int *globalB2ptr = NULL;
if (world_rank == 0)
{
globalAptr = &(A[0][0]);
globalBptr = &(B[0][0]);
globalA2ptr = &(A2[0][0]);
globalB2ptr = &(B2[0][0]);
}
// Declare local matrix pointers
int **a = NULL;
int **b = NULL;
malloc2D(&a, block_size, block_size);
malloc2D(&b, block_size, block_size);
// Scatter the A and B to all processes
int* sendcounts = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
int* displs = (int*)malloc(proc_grid_size * proc_grid_size * sizeof(int));
if (world_rank == 0)
{
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
sendcounts[i] = 1;
int disp = 0;
for (int i = 0; i < proc_grid_size; i++) {
for (int j = 0; j < proc_grid_size; j++) {
displs[i * proc_grid_size + j] = disp;
disp += 1;
}
disp += ((block_size) - 1) * proc_grid_size;
}
for (int i = 0; i < proc_grid_size * proc_grid_size; i++)
{
printf("Send cound: %d\n", sendcounts[i]);
}
}
MPI_Scatterv(globalAptr, sendcounts, displs, subarrtype, &(a[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
MPI_Scatterv(globalBptr, sendcounts, displs, subarrtype, &(b[0][0]),
block_size * block_size, MPI_INT,
0, MPI_COMM_WORLD);
// Now each processor has its local array, and can process it
for (int i = 0; i < block_size; i++) {
for (int j = 0; j < block_size; j++) {
a[i][j] = 10 + a[i][j];
b[i][j] = 10 + b[i][j];
}
}
// It all goes back to process 0
MPI_Gatherv(&(a[0][0]), block_size * block_size, MPI_INT,
globalA2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
MPI_Gatherv(&(b[0][0]), block_size * block_size, MPI_INT,
globalB2ptr, sendcounts, displs, subarrtype,
0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Thank you very much!
I am trying to implement a MPI of the filter code below, but I'm facing difficulties doing it. How should it be done?:
Filter code:
int A[100000][100000];
int B[100000][100000];
for (int i=1; i<(100000 - 1); i++)
for (int i=1; j<(100000 - 1); j++)
B[i][j] = A[i-1][j] + A[i+1][j] + A[i][j-1] + A[i][j+1] - 4*A[i][j];
This is what I have tried while following the six functions of MPI:
int myrank; /* Rank of process */
int numprocs; /* Number of processes */
int source; /* Rank of sender */
int dest; /* Rank of receiver */
char message[100]; /* Storage for the message */
MPI_Status status; /* Return status for receive */
MPI_Init( & argc, & argv);
MPI_Comm_size(MPI_COMM_WORLD, & numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, & myrank);
if (myrank != 0)
{
dest = 0;
MPI_Send(message, strlen(message) + 1,
MPI_CHAR, dest, 15, MPI_COMM_WORLD);
} else {
for (source = 1; source < numprocs; source++) {
MPI_Recv(message, 100, MPI_CHAR, source,
15, MPI_COMM_WORLD, & status);
}
}
MPI_Finalize();
I'd go like this. First of all, I'd have this code
int A[100000][100000];
int B[100000][100000];
replaced with dynamic allocations. You don't need all that memory for each and every process.
Then, I'd send array A to different processes. By rows.
What is the "height" of data frame (number of rows):
delta = (100000 - 2) / (numprocs-1); // we don't count first and last row
reminder = (100000 - 2) % (numprocs-1); // it might be that we need to give
// little bit more to calculate
// to one of the processes
// we are starting from row with idx=1 (second row) and we want to finish when
// we hit last row
if(myrank == 0) {
for( int i=1; i < numprocs; i++ ) {
// +100000 - we need two more rows to calculate data
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
MPI_Send(&(A[(i-1)*delta][0]), how_many_bytes, MPI_INT, i, 0,
MPI_COMM_WORLD);
}
} else {
// allocate memory for bytes
int *local_array = NULL;
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
local_array = malloc(how_many_bytes * sizeof(int));
MPI_Status status;
MPI_Recv(
local_array,
how_many_bytes,
MPI_INT,
0,
0,
MPI_COMM_WORLD,
&status);
}
// perform calculations for each and every slice
// remembering that we always have on extra row on
// top and one at the bottom
// send data back to master (as above, but vice versa).
I try to sort different array with mpi. Every array are allocate locally.
for example we have {1-7-4-12} {3-7-5-9} {12-15-2-16} {10-8-11-13}
and we want {1-2-3-4}{5-6-7-8}{9-10-11-12}{13-14-15-16}
So I use odd-even strategy. For 2proccess it's works in every case but when i try with more process i have new value. For my example i can have {23-2-3-4}. I think my problem is from allocate memory but i don't find where and what i do wrong...
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MASTER 0
#define MIN(a,b) ((a)<(b)?(a):(b))
#define BLOCK_LOW(id,p,n) ((id)*(n)/(p))
#define BLOCK_HIGH(id,p,n) \
(BLOCK_LOW((id)+1,p,n)-1)
#define BLOCK_SIZE(id,p,n) \
(BLOCK_LOW((id)+1, p, n)-BLOCK_LOW(id, p , n))
#define BLOCK_OWNER(index,p,n) \
(((p)*(index+1)-1)/(n))
int nbProcess, id, n; //n = number of value
void printTabByProcess(int *T){
int i = 0;
int size = BLOCK_SIZE(id, nbProcess, n);
printf("Tab n°%d [ ", id, size);
for(i; i < size; i++){
printf(" %d ", T[i]);
}
printf(" ]\n");
}
void fusion(int *t,int deb1,int fin1,int fin2){
int *table1;
int deb2=fin1+1;
int compt1=deb1;
int compt2=deb2;
int i;
table1=(int*)malloc((fin1-deb1+1)*sizeof(int));
for(i=deb1;i<=fin1;i++) {
table1[i-deb1]=t[i];
}
for(i=deb1;i<=fin2;i++){
if(compt1==deb2)
break;
else if(compt2==(fin2+1)){
t[i]=table1[compt1-deb1];
compt1++;
}
else if(table1[compt1-deb1]<t[compt2]){
t[i]=table1[compt1-deb1];
compt1++;
}
else{
t[i]=t[compt2];
compt2++;
}
}
free(table1);
}
void tri_fusion(int*t,int deb,int fin){
if(deb!=fin){
int milieu=(fin+deb)/2;
tri_fusion(t,deb,milieu);
tri_fusion(t,milieu+1,fin);
fusion(t,deb,milieu,fin);
}
}
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) - 1; i++){
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
if(index1 == size1 - 1 ){
buffer[size1 + size2 - 1] = t1[index1];
}else{
buffer[size1 + size2 - 1] = t2[index2];
}
return buffer;
}
/*
*
* OUR FUNCTION TO PARALLEL SORT
*
*/
void TD_trier(int* T){
MPI_Status status;
int size = BLOCK_SIZE(id, nbProcess, n);
int receive_size = 0;
int* receive;
int* array_tmp;
int i = 0;
tri_fusion(T, 0, size - 1);
MPI_Barrier(MPI_COMM_WORLD);
for(i; i < nbProcess; i++){
if(i%2==0){
if(id % 2 == 1){//send to left
MPI_Send(&size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
MPI_Recv(T, size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
}else {
MPI_Recv(&receive_size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(&array_tmp[size], receive_size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
T = realloc(array_tmp, sizeof(int) * size);
}
if(id == 1){
//~ printTabByProcess(T);
}
}else if(i%2 == 1 && id < nbProcess-1){ //send to right
if(id % 2 == 1){
MPI_Send(&size, 1, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
MPI_Send(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD);
//printTabByProcess(T);
MPI_Recv(T, size, MPI_INT, id + 1, 1, MPI_COMM_WORLD, &status);
}else if(id != 0 && id%2 ==0) {
MPI_Recv(&receive_size, 1, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//receive = malloc(sizeof(int) * size);
MPI_Recv(receive, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD, &status);
//printTabByProcess(receive);
array_tmp = fusion2(T, receive, size, receive_size);
MPI_Send(array_tmp, receive_size, MPI_INT, id - 1, 1, MPI_COMM_WORLD);
printTabByProcess(&array_tmp[2]);
T = array_tmp + size;
printTabByProcess(T);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
//printTabByProcess(T);
}
int generateRandomValue(){
return rand() % 100;
}
//init array with "random" value
int* TD_init(int n){
int i = 0;
int indiceDerniere = (id+1)*n/nbProcess -1;
int indicePremiere = id*n/nbProcess;
int* arrayLocal;
int localSize = indiceDerniere - indicePremiere +1;
arrayLocal = malloc(sizeof(int)*localSize);
//~ printf("id : %d - nbCase : %d (debut : %d, fin : %d)\n",
//~ id, localSize, indicePremiere, indiceDerniere);
for(i; i < localSize; i++){
arrayLocal[i] = generateRandomValue() - id;
}
printTabByProcess(arrayLocal);
return arrayLocal;
}
int main (int argc, char *argv[]){
//int n = 0;
int *dataLocal;
int dest;
int x;
int success;
MPI_Status status;
srand(time(NULL));
/***** Initializations *****/
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nbProcess); //numtask contient le nombre de processeur
MPI_Comm_rank(MPI_COMM_WORLD, &id); //taskid, determine le numero du processus
//~ printf ("MPI task %d has started...\n", id);
//~ tag2 = 1;
//~ tag1 = 2;
MPI_Barrier (MPI_COMM_WORLD);
/***** Master task only ******/
if (id == MASTER){
printf("Chose a number of value :");
scanf("%d",&n);
/* Send the number of cases */
for (dest=1; dest<nbProcess; dest++) {
MPI_Send(&n, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); //send number of value
}
} /* end of master section */
/***** Non-master tasks only *****/
if (id > MASTER) {
/* Receive the number of cases */
MPI_Recv(&n, 1, MPI_INT, MASTER, 1, MPI_COMM_WORLD, &status);
}
MPI_Barrier (MPI_COMM_WORLD);
dataLocal = TD_init(n);
MPI_Barrier (MPI_COMM_WORLD);
if(id == 0){
printf("__________________________________________\n");
}
TD_trier(dataLocal);
MPI_Finalize();
}
Troubles may come from fusion2 function. index1 can become higher than size1. In fact, the MPI part works correctly. The code works once tests are performed. Here is a version that is not optimal but...
int* fusion2(int* t1, int* t2, int size1, int size2){
int* buffer = malloc(sizeof(int)*(size1 + size2));
int index1 = 0;
int index2 = 0;
int i = 0;
for(i; i < (size1 + size2) ; i++){
if(index1==size1){
buffer[i] = t2[index2];
index2++;
}else{
if(index2==size2){
buffer[i] = t1[index1];
index1++;
}else{
if(t1[index1] < t2[index2]){
buffer[i] = t1[index1];
index1++;
}else{
buffer[i] = t2[index2];
index2++;
}
}
}
}
return buffer;
}
Watch for memory management.
Ex : did you free T before doing ?
T = realloc(array_tmp, sizeof(int) * size);
Did you free "receive" ? did you free "array_tmp" in the second part ?
I fear memory leakages exist... It might be better to avoid allocation in fusion2, and even in the loops. Allocate array_tmp and receive at start, with "enougth" space, might be safer (faster ?).
Bye,
Francis
More : qsort (in stdlib) may go faster for local sorting.