I've wrote a program in C/MPI that simply split a NxN matrix in submatrix (for rows) and then giving it to all processes with the routine MPI_Scatterv. The dimension N is not necessarily multiple of the number of processes. I decide to give one more row to a number of processes equal to DIM % size. The code is the following; it doesn't work, and I don't understand why. The error messages is something like this:
job aborted:
rank: node: exit code[: error message]
0: PACI: -1073741819: process 0 exited without calling finalize
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define DIM 4
#define ROOT 0
float **alloc (int, int);
void init (float **, int, int);
void print (float **, int, int);
int main(int argc, char *argv[])
{
int rank,
size,
dimrecv,
i;
int *sendcount = NULL, *displs = NULL;
float **matrix, **recvbuf;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
dimrecv = (int)(DIM / size);
if(rank < (DIM % size))
dimrecv += 1 ;
recvbuf = alloc(dimrecv, DIM);
if (rank == ROOT)
{
matrix = alloc(DIM, DIM);
init(matrix, DIM, DIM);
sendcount = (int*)calloc(size, sizeof(int));
displs = (int*)calloc(size, sizeof(int));
int total = 0;
printf("MATRIX %d x %d", DIM, DIM);
print(matrix, DIM, DIM);
displs[0] = 0;
for (i = 0; i < size; i++)
{
if (i < DIM % size)
sendcount[i] = (ceil((float)DIM/size))*DIM;
else
sendcount[i] = (floor((float)DIM/size))*DIM;
total += sendcount[i];
if (i + 1 < size)
displs[i + 1] = total;
}
}
MPI_Scatterv(&(matrix[0][0]), sendcount, displs, MPI_FLOAT,
recvbuf, dimrecv*DIM, MPI_FLOAT, ROOT, MPI_COMM_WORLD);
printf("\n\n");
for(i = 0; i< size; i++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == rank)
{
printf("SUBMATRIX P%d", i);
print(recvbuf, dimrecv, DIM);
}
}
free(matrix[0]);
free(matrix);
free(recvbuf[0]);
free(recvbuf);
/* quit */
MPI_Finalize();
return 0;
}
float **alloc(int rows, int cols)
{
int i;
float *num_elem = (float *)calloc(rows*cols, sizeof(float));
float **matrix= (float **)calloc(rows, sizeof(float*));
for (i=0; i<rows; i++)
matrix[i] = &(num_elem[cols*i]);
return matrix;
}
void init (float **matrix, int rows, int cols)
{
int i, j;
srand(time(NULL));
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++)
matrix[i][j] = 1 + (rand() % 5);
}
}
void print (float **matrix, int rows, int cols)
{
int i, j;
for (i = 0; i < rows; i++) {
printf("\n");
for (j = 0; j < cols; j++)
printf("%.1f ", matrix[i][j]);
}
}
How could I solve the problem, using a dynamic allocation with a double pointer? I've wrote the same program in a static way and it works!.Thanks a lot.
Pax.
You need to be more careful about which process/rank is allocating memory, and which process/rank is therefore freeing memory.
In your current implementation, you'll want rank == ROOT to allocate and initialize matrix, sendcount, and displs. You'll want every rank to allocate and initialize sendcount and displs (otherwise, when they each enter MPI_Scatterv how do they know what exactly they'll be receiving?). Finally, they'll also need to allocate but not initialize recvbuf. The initialization of this buffer happens internally to the MPI_Scatterv routine.
[Side note: You don't technically need to have each rank initialize sendcount and displs, although this will certainly be fastest. If only the rank == ROOT process has the knowledge to calculate these values, then you'll have to MPI_Bcast both of these arrays to every process before entering the MPI_Scatterv routine.]
And of course you'll then have to ensure that only the correct ranks free the correct memory they previously allocated.
The reason this worked in your static initialization is that each rank "allocated" the memory when you initially statically defined your arrays. Assuming you did this naively, you probably previously used excess memory in that implementation (because, as seen above, not every rank needs to allocate memory for every matrix/array you are using).
Hope this helps.
Thanks Nose for your suggestion. Nevertheless the program doesn't work well. The modified code is the following:
...
MPI_Bcast(sendcount, 4, MPI_INT, ROOT, MPI_COMM_WORLD);
MPI_Bcast(displs, 4, MPI_INT, ROOT, MPI_COMM_WORLD);
MPI_Scatterv(&(matrix[0][0]), sendcount, displs, MPI_FLOAT,
recvbuf, dimrecv*DIM, MPI_FLOAT, ROOT, MPI_COMM_WORLD);
printf("\n\n");
for(i = 0; i< size; i++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == rank)
{
printf("SUBMATRIX P%d", i);
print(recvbuf, dimrecv, DIM);
}
}
if (rank == ROOT) {
for (i=0; i<DIM; i++)
free(matrix[i]);
free(matrix);
}
for(i=0; i<dimrecv; i++)
free(recvbuf[i]);
free(recvbuf);
free(sendcount);
free(recvbuf);
sendcount and displs has been allocated outside the visibility of rank ROOT. There must be something wrong in the code that I don't catch.
Related
I'm a newby in MPI and I'm trying to learn how to use MPI_Type_create_subarray in order to apply it in my projects.
I've spent lots of time searching for a tutorial which could fits my needing, but without success.
So I've tried to generalize the concept in How to use MPI_Type_create_subarray
to 3D arrays, but something is still missing.
In particular my code return a Segmentation Fault error or shows wrong data when I try to see results.
I can't understand where I made a mistake
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
void printarr(int ***data, int nx, int ny, int nz, char *str);
int ***allocarray(int nx, int ny, int nz);
int main(int argc, char **argv) {
/* array sizes */
const int bigsize =10;
const int subsize_x =2; const int subsize_y =2; const int subsize_z =2;
/* communications parameters */
const int sender =0;
const int receiver=1;
const int ourtag =2;
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size < receiver+1) {
if (rank == 0)
fprintf(stderr,"%s: Needs at least %d processors.\n", argv[0], receiver+1);
MPI_Finalize();
return 1;
}
MPI_Datatype mysubarray;
int starts[3] = {0,0,0};
int subsizes[3] = {subsize_x,subsize_y,subsize_z};
int bigsizes[3] = {bigsize, bigsize, 3};
MPI_Type_create_subarray(3, bigsizes, subsizes, starts, MPI_ORDER_C, MPI_INT, &mysubarray);
MPI_Type_commit(&mysubarray);
if (rank == sender) {
int ***bigarray = allocarray(bigsize,bigsize,3);
for (int k=0; k<3; k++)
for (int j=0; j<bigsize; j++)
for(int i=0; i< bigsize; i++) {
bigarray[k][j][i] = k*(bigsize*bigsize)+j*bigsize+i;
}
printarr(bigarray, bigsize, bigsize, 3, " Sender: Big array ");
MPI_Send(&(bigarray[0][0][0]), 1, mysubarray, receiver, ourtag, MPI_COMM_WORLD);
MPI_Type_free(&mysubarray);
free(bigarray);
} else if (rank == receiver) {
int ***subarray = allocarray(subsize_x,subsize_y,subsize_z);
MPI_Recv(&(subarray[0][0][0]), subsizes[0]*subsizes[1]*subsizes[2], MPI_INT, sender, ourtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printarr(subarray, subsize_x, subsize_y, subsize_z, " Receiver: Subarray -- after receive");
free(subarray);
}
MPI_Finalize();
return 0;
}
void printarr(int ***data, int nx, int ny, int nz, char *str) {
printf("-- %s --\n", str);
for(int k=0; k<nz; k++) {
printf("\n\n-----%d------\n",k);
for (int j=0; j<ny;j++) {
for (int i=0; i<nx; i++) {
printf("%3d ", data[k][j][i]);
}
printf("\n");
}
}
}
int ***allocarray(int nx, int ny, int nz) {
int*** arr = (int***)malloc(sizeof(int**)*nz);
for(int k = 0; k < nz; k++) {
arr[k]= (int**)malloc(sizeof(int*)*ny);
for(int j = 0; j< ny; j++){
arr[k][j] = (int*)malloc(sizeof(int)*nx);
for(int i = 0; i < nx; i++){
arr[k][j][i] = 0;
}
}
}
return arr;
}
I'm trying to learn MPI and I've run into the following problem in one of my courses:
Consider a matrix A of dimensions n * n in which each element is an integer. Given 2 pair of indices (i1,j1) and (i2,j2) find the submatrix of such dimensions in matrix A for which it's elements sum is maximum.
I'd like some help on how to pass the submatrices to the processes. Should I calculate first how many submatrices (s) are in the matrix and send to each process N/s? How would I send the submatrices?
Some skeleton code I wrote:
#include<mpi.h>
#include<stdio.h>
#include<math.h>
#include<assert.h>
#include<iostream>
using namespace std;
#pragma comment (lib, "msmpi.lib")
enum CommunicationTag
{
COMM_TAG_MASTER_SEND_TASK,
COMM_TAG_MASTER_SEND_TERMINATE,
COMM_TAG_SLAVE_SEND_RESULT,
};
void print_matrix(int mat[10][10], int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf("%d ", mat[i][j]);
}
printf("\n");
}
}
int main(int argc, char *argv[]) {
//0. Init part, finding rank and number of processes
int numprocs, rank, rc;
rc = MPI_Init(&argc, &argv);
if (rc != MPI_SUCCESS) {
printf("Error starting MPI program. Terminating \n");
MPI_Abort(MPI_COMM_WORLD, rc);
}
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("I'm rank %d. Num procs %d\n", rank, numprocs); fflush(stdout);
//1. different machine code
if (rank == 0)
{
int n;
scanf("%d", &n);
int i1, i2, j1, j2;
scanf("%d%d%d%d", &i1, &i2, &j1, &j2);
int mat[10][10];
//init data
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
mat[i][j] = (rand() % 100) - 50; //init random between -50 and 49
}
print_matrix(mat, n);
//here; how do I pass the submatrices to the processes?
for (int i = 1; i < numprocs; i++) {
MPI_Send(&i1, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&i2, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&j1, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&j2, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
//here; how do I pass the submatrices to the processes?
}
}
else {
//if slave ...
}
system("Pause");
}
The first step is to stop thinking about how to use MPI_Send(). The basic solution is to use MPI_Bcast() to transmit A to all the MPI processes.
Then divide the work up (no need to communicate for this, the same dividing logic can run in each process). Compute the sums within each MPI process, and collect them in the main process using MPI_Gather(). Choose the largest and you're done.
It really only requires two MPI operations: Bcast to distribute the input data to all processes, and Gather to centralize the results.
Note that all MPI processes need to execute the collective operations together in lockstep. You only need if (rank == 0) to know which process should load the matrix and analyze the Gathered results.
I'm having difficulties with the following functionality. I'm allowing each process to perform work on the global array. Each process starts with the same global array, but if it changes, it has to update the other processes' array.
//Globals
float * globalArray;
int main() {
//Sequential code to init globalArray
int size = //Length of globalArray
int rank;
int x, i;
MPI_Init();
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
while(x == 0) {
for(i = 0; i < size; i++) {
printf("Values of array: %f\n", globalArray[i]);
}
workOnArray();
x = getCondition();
MPI_Bcast(&globalArray, size, MPI_FLOAT, rank, MPI_COMM_WORLD);
MPI_Finalize();
}
}
The problem is that the global arrays in the other processes aren't updated when I Bcast it.
I am trying to send multiple columns of "B" matrix to different processors from processor 0. I am trying to send use MPI_Send but its not working. Can someone pls help me?
For example : Size of square matrix B is 7.
In this way ,it should be distributed.
Processor 0: 3 columns
Processor 1 :2 columns
Processor 2: 2 columns
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#define ERR_BADORDER 255
#define TAG_INIT 31337
#define TAG_RESULT 42
#define DISP_MAXORDER 12
int mm(double *A, double *B, double *C, int n, int n1);
int rc(int rt,int rank, int size);
int main(int argc, char *argv[]) {
double *A, *B, *C,t,tt;
int n = 0, n0, n1, n2, i,ss,sts;
int rank = 0, size = 1,prev,next,k,z,jcol,ix=0,m,j;
MPI_Datatype column;
MPI_Request reqs[4];
MPI_Status stats[2];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (!rank) {
if (argc > 1) {
n = atoi(argv[1]);
}
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (!n) {
MPI_Finalize();
return 0;
}
n1 = rc(n, rank,size);
n0 = n * n1;
n2 = n * n;
A = (double *) malloc(sizeof(double) * (rank ? n0 : n2));
B = (double *) malloc(sizeof(double) * n2 );
C = (double *) malloc(sizeof(double) * (rank ? n0 : n2));
if (!rank) {
for (i=0; i<n2; i++) {
A[i] = 1.0;
B[i] = 1.0;
}
}
t = MPI_Wtime();
if (!rank) {
ss = n0;
for (i=1; i<size; i++) {
sts = n * rc(n, i, size);
MPI_Send(A + ss, sts, MPI_DOUBLE, i, TAG_INIT,
MPI_COMM_WORLD);
ss += sts;
}
}
else {
MPI_Recv(A, n0, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
MPI_Type_vector(n,1,n,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
if (!rank) {
for (i=1; i<size; i++) {
for(m=0;m<=i-1;m++)
ix+=rc(n,m,size);
ss=rc(n,i,size);
for(j=ix;j<ss+ix;j++)
MPI_Send(&B[j], 1, column, i, TAG_INIT, MPI_COMM_WORLD);
/* MPI_Send(&B[i+(n-1)*n], 1, column, i, TAG_INIT,
MPI_COMM_WORLD);*/
}
}
else {
printf("hello");
MPI_Recv(B, n, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
for (i=0; i<n0; i++) {
printf("Processor: %d and matrix %lf \n ",rank, B[i]);
}
for (i=0; i<n0; i++) {
C[i] = 0.0;
}
MPI_Finalize();
return 0;
}
int rc(int rt, int rank, int size) {
return (rt / size) + (rt % size > rank);
}
Please don't call the values with two-three letters, because I can't understand what do you want to do.
You can resolve the problem by different ways. When n = 7 and I have 3 process, I send 2 columns to each process different to the master 0.
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#define ERR_BADORDER 255
#define TAG_INIT 31337
#define TAG_RESULT 42
#define DISP_MAXORDER 12
int main(int argc, char *argv[]) {
double *B;
int n = 0;
int rank , size;
int i;
int columnToSend;
MPI_Datatype column;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (!rank)printf ("rank = %d , size = %d\n", rank, size);
if (!rank) {
if (argc > 1) {
n = atoi(argv[1]);
}
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (!n) {
printf ("n = %d!!\n", n);
MPI_Finalize();
return 0;
}
int offset = n%size;
int gap = n/size;
if (!rank)printf ("n = %d, offset = %d , gap = %d\n", n, offset, gap);
MPI_Type_vector(n,gap,n,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
B = (double *) malloc(sizeof(double) * n*n );
for (i = 0 ; i < n * n ; i++) {
B[i] = -1.0;
}
if (!rank) {
for (i = 0 ; i < n * n ; i++) {
B[i] = i;//<----- I put i instead one
}
for (i=1; i < size; i++) {
columnToSend = gap *i + offset;
printf ("columnToSend = %d to i = %d \n", columnToSend, i);
MPI_Send(&B[columnToSend], 1, column, i, TAG_INIT, MPI_COMM_WORLD);
}
}
if (rank) {
printf ("in rank = %d n*gap = %d \n", rank, n*gap);
MPI_Recv(B, n*gap, MPI_DOUBLE, 0, TAG_INIT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (i=0; i < n*gap; i++) {
printf("Processor: %d and matrix %lf \n ",rank, B[i]);
}
}
MPI_Finalize();
return 0;
}
Hallo Iam trying to make a simlpe parralel program in C language uing MPI. Program should find maximum in array. Root process should send chunks of array to all processes using MPI_Scatter and then gather results by MPI_Gather. When I run the program i get general error like this:
Perhaps this Unix error message will help:
Unix errno: 14
Bad address
I know that there is some problem with MPI_Scatter and MPI_Gather or with the values I am sending to this functions.
I was trying to find the solution, but I found nothing what could be useful.
Here is my code:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define BUFSIZE 9
int max(int *buf, int N){
int i;
int value = 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
int main(int argc, char** argv)
{ int size, rank;
int slave;
int *buf;
int *buf1;
int *buf2;
int i, n, value;
MPI_Status status;
/* Initialize MPI */
MPI_Init(NULL, NULL);
/*
* Determine size in the world group.
*/
MPI_Comm_size(MPI_COMM_WORLD, &size);
if ((BUFSIZE % size) != 0) {
printf("Wrong Bufsize ");
return(0);
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank==0) {
buf = (int *)malloc(BUFSIZE*sizeof(int));
buf2 = (int *)malloc(size*sizeof(int));
printf("\n Generated array: \n");
for(i=0; i<BUFSIZE; i++){
buf[i] = rand() % 20;
printf("%d, ", buf[i]);
}
printf("\n");
printf("\n Sending values to processes:");
printf("\n -----------------------------");
}
buf1 = (int *)malloc((BUFSIZE/size)*sizeof(int));
MPI_Scatter(buf, BUFSIZE/size, MPI_INT, buf1, BUFSIZE/size, MPI_INT, 0, MPI_COMM_WORLD);
value = max(&buf1[0], BUFSIZE/size);
printf("\n Max from rocess %d : %d \n", rank, max(&buf1[0], BUFSIZE/size));
MPI_Gather(&value, 1, MPI_INT, buf2, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0){
printf("\n Max value: %d", max(&buf2[0], size));
}
MPI_Finalize();
return(0);
}
Initialize your pointers to NULL, and track them.
use buf1 instead of &buf1[0], is more clear.
free your buffers before MPI_Finalize() with:
if(bufferPionter != NULL) free(bufferPionter);
If something is wrong with a pointer will crash in the free call. In the max function, If all your numbers are less than zero the maximun is zero. i fix that.
int max(int *buf, int N){
int i;
int value = N? buf[0] : 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
Best regards!