I'm having difficulties with the following functionality. I'm allowing each process to perform work on the global array. Each process starts with the same global array, but if it changes, it has to update the other processes' array.
//Globals
float * globalArray;
int main() {
//Sequential code to init globalArray
int size = //Length of globalArray
int rank;
int x, i;
MPI_Init();
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
while(x == 0) {
for(i = 0; i < size; i++) {
printf("Values of array: %f\n", globalArray[i]);
}
workOnArray();
x = getCondition();
MPI_Bcast(&globalArray, size, MPI_FLOAT, rank, MPI_COMM_WORLD);
MPI_Finalize();
}
}
The problem is that the global arrays in the other processes aren't updated when I Bcast it.
Related
I am trying to distribute the row of two 2D matrices using MPI_Iscatter(), but I am facing this
error message :
mpirun noticed that process rank 1 with PID 0 on node ***PC exited on signal 11 (Segmentation fault).
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int P;
int N = 1024;
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, j, k, rank, size;
double start, end, total;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Request request[2];
P = size;
float A_row [N];
float B_col [N];
float matrix_A[N][N];
float matrix_BT[N][N];
if(rank == 0){
double wall_time;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_A[i][j] = -1+2*((float)rand())/RAND_MAX;
for(i = 0; i < N; i++)
for (j = 0; j < N; j++)
matrix_BT[i][j] = -1+2*((float)rand())/RAND_MAX;
}
start = MPI_Wtime();
printf("Root processor Scatter is started for diagonal elements...\n");
for(i = 0; i < N/P ; i += P){
MPI_Iscatter(matrix_A[2+rank + i], N, MPI_FLOAT, A_row, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[0]);
MPI_Iscatter(matrix_BT[2+rank + i], N, MPI_FLOAT, B_col, N, MPI_FLOAT, 0, MPI_COMM_WORLD, &request[1]);
MPI_Waitall(2,request, MPI_STATUSES_IGNORE);
printf("Processor %d has recived the Scatter A & B elements...\n", rank);
}
end = MPI_Wtime();
printf("Total Time: %f\n", end - start);
MPI_Finalize();
}
If I compile and run your code, I get a segmentation fault even if I comment out everything except:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<mpi.h>
int P;
int N = 1024;
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int i, j, k, rank, size;
double start, end, total;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Request request[2];
P = size;
float A_row [N];
float B_col [N];
float matrix_A[N][N];
float matrix_BT[N][N];
MPI_Finalize();
}
Do you get the same? I think you are hitting the stack memory limit. I think you should allocate your matrices on the heap instead.
I'm trying to learn MPI and I've run into the following problem in one of my courses:
Consider a matrix A of dimensions n * n in which each element is an integer. Given 2 pair of indices (i1,j1) and (i2,j2) find the submatrix of such dimensions in matrix A for which it's elements sum is maximum.
I'd like some help on how to pass the submatrices to the processes. Should I calculate first how many submatrices (s) are in the matrix and send to each process N/s? How would I send the submatrices?
Some skeleton code I wrote:
#include<mpi.h>
#include<stdio.h>
#include<math.h>
#include<assert.h>
#include<iostream>
using namespace std;
#pragma comment (lib, "msmpi.lib")
enum CommunicationTag
{
COMM_TAG_MASTER_SEND_TASK,
COMM_TAG_MASTER_SEND_TERMINATE,
COMM_TAG_SLAVE_SEND_RESULT,
};
void print_matrix(int mat[10][10], int n) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf("%d ", mat[i][j]);
}
printf("\n");
}
}
int main(int argc, char *argv[]) {
//0. Init part, finding rank and number of processes
int numprocs, rank, rc;
rc = MPI_Init(&argc, &argv);
if (rc != MPI_SUCCESS) {
printf("Error starting MPI program. Terminating \n");
MPI_Abort(MPI_COMM_WORLD, rc);
}
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("I'm rank %d. Num procs %d\n", rank, numprocs); fflush(stdout);
//1. different machine code
if (rank == 0)
{
int n;
scanf("%d", &n);
int i1, i2, j1, j2;
scanf("%d%d%d%d", &i1, &i2, &j1, &j2);
int mat[10][10];
//init data
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
mat[i][j] = (rand() % 100) - 50; //init random between -50 and 49
}
print_matrix(mat, n);
//here; how do I pass the submatrices to the processes?
for (int i = 1; i < numprocs; i++) {
MPI_Send(&i1, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&i2, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&j1, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
MPI_Send(&j2, 1, MPI_INT, i, COMM_TAG_MASTER_SEND_TASK, MPI_COMM_WORLD);
//here; how do I pass the submatrices to the processes?
}
}
else {
//if slave ...
}
system("Pause");
}
The first step is to stop thinking about how to use MPI_Send(). The basic solution is to use MPI_Bcast() to transmit A to all the MPI processes.
Then divide the work up (no need to communicate for this, the same dividing logic can run in each process). Compute the sums within each MPI process, and collect them in the main process using MPI_Gather(). Choose the largest and you're done.
It really only requires two MPI operations: Bcast to distribute the input data to all processes, and Gather to centralize the results.
Note that all MPI processes need to execute the collective operations together in lockstep. You only need if (rank == 0) to know which process should load the matrix and analyze the Gathered results.
I am trying to implement a map where keys are numbers mapping into unique numbers. In other words, each process holds a set of numbers in an array that map into another set of numbers in another array held by the same process. The mappings need to be unique across all the process. I passed around a struct with the mappings to create mappings for each of the processes. However, this is not parallel, as I sequentially send information through processes. I request help from all of you wonderful programmers of the internet for how all processes can look at a specific variable at the same time? The following is the code I am currently working with. Thanks in advance and for all the support I have received till now.
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
struct map{ //saves all the mappings
int keys[25];
int values[25];
int grow;
};
struct map rmap; //global map
void mapadd(int key, int value){ //adding values to map
rmap.keys[rmap.grow] = key;
rmap.values[rmap.grow] = value;
rmap.grow++;
}
int mapper(int key){ //get value from key
for(int h=0; h<sizeof(rmap.keys)/sizeof(int); h++){
if(rmap.keys[h] == key){
return rmap.values[h];
}
}
return 0;
}
int finder(int list[], int val, int mem){ //see if a value is in array
for(int l=0; l<mem; l++){
if(list[l] == val){
return 1;
}
}
return 0;
}
int main(int argc, char** argv){
// Initialize the MPI environment
MPI_Init(NULL, NULL);
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
srand(time(0));
rmap.grow = 0;
int dim[world_size];
int maxdim = 0;
for(int s=0; s<world_size; s++){
dim[s] = (rand()%10) + 1;
if(dim[s]>maxdim){
maxdim = dim[s];
}
}
int nums[world_size][maxdim];
int labels[world_size][maxdim];
for(int u=0; u<world_size; u++){
for(int d=0; d<dim[u]; d++){
labels[u][d] = 0;
nums[u][d] = 0;
}
}
for(int t=0; t<world_size; t++){
for(int i=0; i<dim[t]; i++){
nums[t][i] = rand()%26 + 1;
//printf("%d\n", nums[t][i]);
}
}
if(world_rank!=0){
MPI_Recv(&rmap.keys, 25, MPI_INT, world_rank-1, 0,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&rmap.values, 25, MPI_INT, world_rank-1, 0,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
for(int j=0; j<dim[world_rank]; j++){
if(labels[world_rank][j] == 0){
if(finder(rmap.keys, nums[world_rank][j], 25)==1){
//printf("%s", "exist");
labels[world_rank][j] = mapper(nums[world_rank][j]);
}
else{
//printf("%s", "not");
labels[world_rank][j] = (rand()%50) + 1;
mapadd(nums[world_rank][j], labels[world_rank][j]);
/*for(int o=0; o<25; o++){
printf("%d - %d", rmap.keys[o], rmap.values[o]);
}*/
}
}
}
if(world_rank<world_size-1){
MPI_Send(&rmap.keys, 25, MPI_INT, world_rank+1, 0, MPI_COMM_WORLD);
MPI_Send(&rmap.values, 25, MPI_INT, world_rank+1, 0, MPI_COMM_WORLD);
}
for(int rank=0; rank<world_size; rank++){
if(rank==world_rank){
for(int k=0; k<dim[rank]; k++){
printf("Process #%d: %d --> %d\n", rank, nums[rank][k], labels[rank][k]);
}
}
}
MPI_Finalize();
return 0;
}
I've wrote a program in C/MPI that simply split a NxN matrix in submatrix (for rows) and then giving it to all processes with the routine MPI_Scatterv. The dimension N is not necessarily multiple of the number of processes. I decide to give one more row to a number of processes equal to DIM % size. The code is the following; it doesn't work, and I don't understand why. The error messages is something like this:
job aborted:
rank: node: exit code[: error message]
0: PACI: -1073741819: process 0 exited without calling finalize
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define DIM 4
#define ROOT 0
float **alloc (int, int);
void init (float **, int, int);
void print (float **, int, int);
int main(int argc, char *argv[])
{
int rank,
size,
dimrecv,
i;
int *sendcount = NULL, *displs = NULL;
float **matrix, **recvbuf;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
dimrecv = (int)(DIM / size);
if(rank < (DIM % size))
dimrecv += 1 ;
recvbuf = alloc(dimrecv, DIM);
if (rank == ROOT)
{
matrix = alloc(DIM, DIM);
init(matrix, DIM, DIM);
sendcount = (int*)calloc(size, sizeof(int));
displs = (int*)calloc(size, sizeof(int));
int total = 0;
printf("MATRIX %d x %d", DIM, DIM);
print(matrix, DIM, DIM);
displs[0] = 0;
for (i = 0; i < size; i++)
{
if (i < DIM % size)
sendcount[i] = (ceil((float)DIM/size))*DIM;
else
sendcount[i] = (floor((float)DIM/size))*DIM;
total += sendcount[i];
if (i + 1 < size)
displs[i + 1] = total;
}
}
MPI_Scatterv(&(matrix[0][0]), sendcount, displs, MPI_FLOAT,
recvbuf, dimrecv*DIM, MPI_FLOAT, ROOT, MPI_COMM_WORLD);
printf("\n\n");
for(i = 0; i< size; i++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == rank)
{
printf("SUBMATRIX P%d", i);
print(recvbuf, dimrecv, DIM);
}
}
free(matrix[0]);
free(matrix);
free(recvbuf[0]);
free(recvbuf);
/* quit */
MPI_Finalize();
return 0;
}
float **alloc(int rows, int cols)
{
int i;
float *num_elem = (float *)calloc(rows*cols, sizeof(float));
float **matrix= (float **)calloc(rows, sizeof(float*));
for (i=0; i<rows; i++)
matrix[i] = &(num_elem[cols*i]);
return matrix;
}
void init (float **matrix, int rows, int cols)
{
int i, j;
srand(time(NULL));
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++)
matrix[i][j] = 1 + (rand() % 5);
}
}
void print (float **matrix, int rows, int cols)
{
int i, j;
for (i = 0; i < rows; i++) {
printf("\n");
for (j = 0; j < cols; j++)
printf("%.1f ", matrix[i][j]);
}
}
How could I solve the problem, using a dynamic allocation with a double pointer? I've wrote the same program in a static way and it works!.Thanks a lot.
Pax.
You need to be more careful about which process/rank is allocating memory, and which process/rank is therefore freeing memory.
In your current implementation, you'll want rank == ROOT to allocate and initialize matrix, sendcount, and displs. You'll want every rank to allocate and initialize sendcount and displs (otherwise, when they each enter MPI_Scatterv how do they know what exactly they'll be receiving?). Finally, they'll also need to allocate but not initialize recvbuf. The initialization of this buffer happens internally to the MPI_Scatterv routine.
[Side note: You don't technically need to have each rank initialize sendcount and displs, although this will certainly be fastest. If only the rank == ROOT process has the knowledge to calculate these values, then you'll have to MPI_Bcast both of these arrays to every process before entering the MPI_Scatterv routine.]
And of course you'll then have to ensure that only the correct ranks free the correct memory they previously allocated.
The reason this worked in your static initialization is that each rank "allocated" the memory when you initially statically defined your arrays. Assuming you did this naively, you probably previously used excess memory in that implementation (because, as seen above, not every rank needs to allocate memory for every matrix/array you are using).
Hope this helps.
Thanks Nose for your suggestion. Nevertheless the program doesn't work well. The modified code is the following:
...
MPI_Bcast(sendcount, 4, MPI_INT, ROOT, MPI_COMM_WORLD);
MPI_Bcast(displs, 4, MPI_INT, ROOT, MPI_COMM_WORLD);
MPI_Scatterv(&(matrix[0][0]), sendcount, displs, MPI_FLOAT,
recvbuf, dimrecv*DIM, MPI_FLOAT, ROOT, MPI_COMM_WORLD);
printf("\n\n");
for(i = 0; i< size; i++)
{
MPI_Barrier(MPI_COMM_WORLD);
if (i == rank)
{
printf("SUBMATRIX P%d", i);
print(recvbuf, dimrecv, DIM);
}
}
if (rank == ROOT) {
for (i=0; i<DIM; i++)
free(matrix[i]);
free(matrix);
}
for(i=0; i<dimrecv; i++)
free(recvbuf[i]);
free(recvbuf);
free(sendcount);
free(recvbuf);
sendcount and displs has been allocated outside the visibility of rank ROOT. There must be something wrong in the code that I don't catch.
Hallo Iam trying to make a simlpe parralel program in C language uing MPI. Program should find maximum in array. Root process should send chunks of array to all processes using MPI_Scatter and then gather results by MPI_Gather. When I run the program i get general error like this:
Perhaps this Unix error message will help:
Unix errno: 14
Bad address
I know that there is some problem with MPI_Scatter and MPI_Gather or with the values I am sending to this functions.
I was trying to find the solution, but I found nothing what could be useful.
Here is my code:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define BUFSIZE 9
int max(int *buf, int N){
int i;
int value = 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
int main(int argc, char** argv)
{ int size, rank;
int slave;
int *buf;
int *buf1;
int *buf2;
int i, n, value;
MPI_Status status;
/* Initialize MPI */
MPI_Init(NULL, NULL);
/*
* Determine size in the world group.
*/
MPI_Comm_size(MPI_COMM_WORLD, &size);
if ((BUFSIZE % size) != 0) {
printf("Wrong Bufsize ");
return(0);
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank==0) {
buf = (int *)malloc(BUFSIZE*sizeof(int));
buf2 = (int *)malloc(size*sizeof(int));
printf("\n Generated array: \n");
for(i=0; i<BUFSIZE; i++){
buf[i] = rand() % 20;
printf("%d, ", buf[i]);
}
printf("\n");
printf("\n Sending values to processes:");
printf("\n -----------------------------");
}
buf1 = (int *)malloc((BUFSIZE/size)*sizeof(int));
MPI_Scatter(buf, BUFSIZE/size, MPI_INT, buf1, BUFSIZE/size, MPI_INT, 0, MPI_COMM_WORLD);
value = max(&buf1[0], BUFSIZE/size);
printf("\n Max from rocess %d : %d \n", rank, max(&buf1[0], BUFSIZE/size));
MPI_Gather(&value, 1, MPI_INT, buf2, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0){
printf("\n Max value: %d", max(&buf2[0], size));
}
MPI_Finalize();
return(0);
}
Initialize your pointers to NULL, and track them.
use buf1 instead of &buf1[0], is more clear.
free your buffers before MPI_Finalize() with:
if(bufferPionter != NULL) free(bufferPionter);
If something is wrong with a pointer will crash in the free call. In the max function, If all your numbers are less than zero the maximun is zero. i fix that.
int max(int *buf, int N){
int i;
int value = N? buf[0] : 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
Best regards!