TeraSort Algorithm using MPICH2 in C - c

I am trying to implement TeraSort algorithm according to this paper in C using MPICH2. Currently I am using small dataset with values given in the paper and using 4 processors.
int main(int argc, char** argv) {
/********** Create and populate the array *********/
FILE *file = fopen("data.txt","r");
int n; //number of data values
fscanf(file,"%d",&n);
printf("%d",n);
int *original_array = malloc(n * sizeof(int));
int num,c;
int i=0;
while (fscanf(file, "%d", &num) == 1){
original_array[i] = num;
i++;
}
fclose(file);
/********* Finding max and min values ************/
int max = -32768; //change if "int" is changed
int min = 32767;
for(i=0; i<n; i++){
if(max < original_array[i]){
max = original_array[i];
}
if(min > original_array[i]){
min = original_array[i];
}
}
printf("%d", min);
printf("\n");
printf("%d", max);
/********** Initialize MPI **********/
int world_rank;
int world_size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
/********** FILE PLACEMENT **********/
int nodeDataSize = n/world_size;
printf("%d",nodeDataSize);
printf("\n");
MPI_Finalize();
All the values printed (n, min, max, nodeDataSize) are incorrect. The file data.txt contains space separated integers where the first integer gives the number of values. Can anyone tell me what's wrong?
This is what the data.txt file looks like:

Related

collect result MPI C

hi guys i wanted to ask you for help with the array data collection, below there is my code and here what it should do
implementation of a parallel algorithm in which each
processor read a different size vector
M, carry out the sum of its elements in the memory
local and concatenate results in a contained vector
in the memory of each processor.
CODE
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[])
{
int * a;
int i, n;
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &nproc);
MPI_Comm_rank (MPI_COMM_WORLD,&menum);
printf("Insert the size of the array\n");
scanf ("%d", &n);
a = (int *) malloc(sizeof(int)*n);
printf("Size degli array inserito correttamente\n");
printf("Ora, inserire i %d elementi di a\n", n);
for(i = 0; i < n; i++)
scanf ("%d", &a[i]);
printf("Elementi di a inseriti correttamente\n");
float local_sum = 0;
int i;
for (i = 0; i < n; i++) {
local_sum += a[i]; \\
}
MPI_Allgather(&localsum,1,MPI_INT,localsum,1,MPI_INT,MPI_COMMON_WORLD);
MPI_Finalize();
}

How to scatter multiple variables in an array for MPI_Scatter

I am currently struggling to equally distribute an array with 8 integers to 2 integers per 4 processors. I used MPI_Bcast to let every processors to know there are total array of 8 and each of those will have 2 integers array called "my_input".
MPI_Bcast(&totalarray,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&my_input,2,MPI_INT,0,MPI_COMM_WORLD);
MPI_Scatter (input, 2 , MPI_INT, &my_input, 2 , MPI_INT, 0, MPI_COMM_WORLD );
//MPI_Barrier (MPI_COMM_WORLD);
printf("\n my input is %d & %d and rank is %d \n" , my_input[0], my_input[1] , rank);
However after scattering, I see the print function cannot print the 'rank' but all the integers from the 8 integers array. How should I program in order to equally distribute the number of arrays to other processors from root?
Here is my full code (it is just for testing a total of 8 integers, therefore scanf I will enter '8'):
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
//initailise MPI
MPI_Init(&argc, &argv);
//Variable to identify processor and total number of processors
int rank, size;
int my_input[0];
//initailse total array variable
int totalarray =0;
//initialise memory array
int* input;
//range of random number
int upper = 100, lower = 0;
//declare processor rank
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
//declare total size of processor
MPI_Comm_size(MPI_COMM_WORLD, &size);
//let root gather N elements from user
if (rank == 0)
{
printf("Enter a number from 1 to 1000: ");
fflush(stdout);
int number;
//ask user to input number of elements
scanf("%d",&number);
printf("Your number is %d\n",number);
//Fill the array to power of 2
int totalarray = pow(2, ceil(log(number)/log(2)));
input[totalarray];
my_input[totalarray/size];
//allocate memory for the array
input = malloc(totalarray * sizeof(int) );
//Add randomise number until N elements
for(int i =0; i<=totalarray ; i++)
{
if( i<number)
{
input[i] = (rand() % (upper - lower + 1)) + lower; ;
}
//padding zero to the extra elements
else if(number <= i < totalarray)
{
input[i] = 0;
}
}
//confirm the input array
printf("the input is: ");
for(int i =0; i < totalarray ; i++)
{
printf( "%d ", input[i]);
}
}
MPI_Bcast(&totalarray,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&my_input,2,MPI_INT,0,MPI_COMM_WORLD);
MPI_Scatter (input, 2 , MPI_INT, &my_input, 2 , MPI_INT, 0, MPI_COMM_WORLD );
//MPI_Barrier (MPI_COMM_WORLD);
printf("\n my input is %d & %d and rank is %d \n" , my_input[0], my_input[1] , rank);
MPI_Finalize();
return 0;
}
I used MPI_Bcast to let every processors to know there are total array
of 8 and each of those will have 2 integers array called "my_input".
Yes, that makes sense.
However after scattering, I see the print function cannot print the
'rank' but all the integers from the 8 integers array. How should I
program in order to equally distribute the number of arrays to other
processors from root?
You have some issues with your code. For instance, you declare the variables my_input, totalarray, and input as:
int my_input[0];
...
int totalarray =0;
...
int* input;
and then within if (rank == 0) you redefine them again:
int totalarray = pow(2, ceil(log(number)/log(2)));
input[totalarray];
my_input[totalarray/size];
input = malloc(totalarray * sizeof(int) );
This is not correct, alternatively what you can do is to declare both arrays as int*, namely:
int *my_input;
int *input;
then allocate their space as soon as you know how many elements there will be in each of those arrays.
The input array can be allocated right after the user has inserted the size of that array:
//ask user to input number of elements
scanf("%d",&number);
printf("Your number is %d\n",number);
input = malloc(totalarray * sizeof(int));
and the my_input array after the master process has broadcast the input size to the other processes:
MPI_Bcast(&totalarray, 1, MPI_INT, 0, MPI_COMM_WORLD);
int *my_input = malloc((totalarray/size) * sizeof(int));
For the variable totalarray just do not declare again within if (rank == 0). Because if you do so, then int totalarray = pow(2, ceil(log(number)/log(2))); will be a different variable that will only exist in the scope of the if (rank == 0).
The second MPI_Bcast call
MPI_Bcast(&my_input,2,MPI_INT,0,MPI_COMM_WORLD);
is unless, since you want to
to equally distribute total 8 integers in an array to 2 integers for
4 processors.
and not that every process has the entire contend of the my_input array of the master process.
For that you need the MPI_Scatter which you do. However, instead of
MPI_Scatter (input, 2 , MPI_INT, &my_input, 2 , MPI_INT, 0, MPI_COMM_WORLD );
do not hardcode the size of the inputs, because if you want to test with different input sizes and/or with a different number of processes the code will not work, do the following instead:
int size_per_process = totalarray/size;
MPI_Scatter (input, size_per_process , MPI_INT, my_input, size_per_process , MPI_INT, 0, MPI_COMM_WORLD );
The loop for(int i =0; i<=totalarray ; i++) should actually be for(int i =0; i< totalarray ; i++), otherwise you are getting out of boundaries of the array input. Personal opinion, but I think that the adding of the random values logic reads better this way:
for(int i =0; i < number ; i++)
input[i] = (rand() % (upper - lower + 1)) + lower;
for(int i = number; i < totalarray; i++)
input[i] = 0;
The final code would look like the following:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int rank, size;
int *input;
int totalarray;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank == 0){
printf("Enter a number from 1 to 1000: ");
fflush(stdout);
int number;
scanf("%d",&number);
printf("Your number is %d\n",number);
totalarray = pow(2, ceil(log(number)/log(2)));
input = malloc(totalarray * sizeof(int));
int upper = 100, lower = 0;
for(int i = 0; i < number ; i++)
input[i] = (rand() % (upper - lower + 1)) + lower;
for(int i = number; i < totalarray; i++)
input[i] = 0;
printf("the input is: ");
for(int i =0; i < totalarray ; i++)
printf( "%d ", input[i]);
}
MPI_Bcast(&totalarray, 1, MPI_INT, 0, MPI_COMM_WORLD);
int size_per_process = totalarray / size;
int *my_input = malloc(size_per_process * sizeof(int));
printf("SIZE PER %d\n", size_per_process);
MPI_Scatter (input, size_per_process, MPI_INT, my_input, size_per_process, MPI_INT, 0, MPI_COMM_WORLD );
printf("\n my input is %d & %d and rank is %d \n" , my_input[0], my_input[1] , rank);
MPI_Finalize();
return 0;
}
The last print can also be made to be more generic by printing the entire my_input rather than just the first two positions.

Control some code to be executed once in MPI

I can't control how to make to execute some code once using mpi
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#define N 10
#define ITERS 1
// N and ITERS might be input arguments
double **A;
int times = 0;
void initialize (double **A)
{
int i,j;
for(i =0; i < N+2 ; i++){
for(j =0; j < N+2 ; j++){
if(i== 0 || j == 0 || i == (N+1) || j == (N +1) )
A[i][j] = 0.0;
else
A[i][j] = rand() % 10 + 1;
}
}
}
int main(int argc, char * argv[]){
int MyProc, size,tag=1;
char msg='A', msg_recpt;
MPI_Status status;
double **received_array;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &MyProc);
MPI_Comm_size(MPI_COMM_WORLD, &size);
printf("Process # %d started \n", MyProc);
MPI_Barrier(MPI_COMM_WORLD);
int i;
/******Make it once*******/
if(times == 0){
printf("One time\n");
A = malloc((N+2) * sizeof(double *));
for (i=0; i<N+2; i++) {
A[i] = malloc((N+2) * sizeof(double));
}
initialize(A);
times ++;
}
/*************/
MPI_Barrier(MPI_COMM_WORLD);
}
MPI_Finalize();
}
I've tried with times variable as a flag but the make it once section it's executed once per process...everethig I've tried it's executed once per process ,I don't know what to try, I can't make it using the rank of teh process because I need the first procees to arrive executes the once section.

Troubles in MPI : Use mpi to rewrite the program as a parallel program

Request:each process needs to calculate the distance between its own group and all points.
My code is as follows:
#include stdio.h
#include stdlib.h
#include math.h
#include string.h
#include "mpi.h"
double **malloc_Array2D(int row, int col)
{
int size = sizeof(double);
int point_size = sizeof(double*);
double **arr = (double **) malloc(point_size * row + size * row * col);
if (arr != NULL)
{
memset(arr, 0, point_size * row + size * row * col);
double *head = (double*)(arr + row);
while (row--)
arr[row] = head + row * col;
}
return (double**)arr;
}
void free_Aarray2D(void **arr)
{
if (arr != NULL)
free(arr);
}
double distance(double *pos1, double *pos2, int dim)
{
int i;
double dist = 0.0;
for(i=0;i<dim;i++)
dist += pow((pos2[i]-pos1[i]), 2.0);
return sqrt(dist);
}
int main(int argc,char *argv[])
{
int np, myid;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &np);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
//open file
FILE *fp;
if((fp=fopen("points.dat","r"))==NULL)
{
printf("open file failure! exit!\n");
return -1;
}
//read the number of points
int npoints;
fscanf(fp, "There are %d points\n", &npoints);
if(0==myid)
printf("There are %d points\n", npoints);
int nptsl_max = npoints/np;
if((npoints%np)!=0)
nptsl_max++;
double (*xi)[3];
double **dist;
int *ind = (int *)malloc(sizeof(int)*nptsl_max);
xi = (double (*)[3])malloc(sizeof(double)*nptsl_max*3);
dist= malloc_Array2D(nptsl_max, npoints);
int nptsl = 0; //local number of points
char str[200];
for(int i=0; i<npoints; i++)
{
if(myid == (i%np))
{
fscanf(fp, "%d %lf %lf %lf", ind+nptsl, &xi[nptsl][0], &xi[nptsl][1], &xi[nptsl][2]);
nptsl++;
}
else
fgets(str, 200, fp);
}
fclose(fp);
for(int i=0; i<nptsl; i++)
printf("point %4d on process %d\n", *(ind+i), myid);
dist = (myid == (np-1))?0 :(myid+1);
source = (myid == 0) ?(np-1):(myid-1);
double (*yi)[3];
yi = (double (*)[3])malloc(sizeof(double)*nptsl_max*3);
for(int i=0; i<nptsl; i++)
for(int j=0; j<3; j++)
yi[i][j] = xi[i][j];
for(int loop=0; loop < np)
{
for(int i=0; i<nptsl; i++)
{
for(int j=0; j<npoints; j++)
{
dist[i][j] = distance(xi[i], xi[j], 3);
}
}
}
sprintf(filename, "dist_%d.dat",myid);
fp = fopen(filename, "w");
for(i=0; i<npoints; i++)
{
fprintf(fp, "%4d", ind[i]);
for(j=0; j<npoints; j++)
{
fprintf(fp, " %f", dist[i][j]);
}
fprintf(fp, "\n");
}
fclose(fp);
free(ind);
free(xi);
free_Aarray2D((void **)dist);
}
I don't know how to communicate messages.
I think it should be used MPI_Bcast or MPI_Gather for communication.
But I can't solve this problem all the time.Can anyone help me?Thank you!
So each process is reading a subset from a file, and you want to calculate the distance from each point in the sub-set to ALL the points?
Why is there a loop with a "loop" variable?
If you need to do one iteration for calculating the distances, it would probably be faster to have each process read the whole file entirely, calculate the distances for a subset and the use MPI_Gather to send all the distances to the root node that will write the result.
If you plan on doing several iterations, where in each iteration you also update the location of the points, then you need to also use MPI_Allgather to send/recv the points from the other processes.

MPI_Scatter and MPI_Gather dont work

Hallo Iam trying to make a simlpe parralel program in C language uing MPI. Program should find maximum in array. Root process should send chunks of array to all processes using MPI_Scatter and then gather results by MPI_Gather. When I run the program i get general error like this:
Perhaps this Unix error message will help:
Unix errno: 14
Bad address
I know that there is some problem with MPI_Scatter and MPI_Gather or with the values I am sending to this functions.
I was trying to find the solution, but I found nothing what could be useful.
Here is my code:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define BUFSIZE 9
int max(int *buf, int N){
int i;
int value = 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
int main(int argc, char** argv)
{ int size, rank;
int slave;
int *buf;
int *buf1;
int *buf2;
int i, n, value;
MPI_Status status;
/* Initialize MPI */
MPI_Init(NULL, NULL);
/*
* Determine size in the world group.
*/
MPI_Comm_size(MPI_COMM_WORLD, &size);
if ((BUFSIZE % size) != 0) {
printf("Wrong Bufsize ");
return(0);
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank==0) {
buf = (int *)malloc(BUFSIZE*sizeof(int));
buf2 = (int *)malloc(size*sizeof(int));
printf("\n Generated array: \n");
for(i=0; i<BUFSIZE; i++){
buf[i] = rand() % 20;
printf("%d, ", buf[i]);
}
printf("\n");
printf("\n Sending values to processes:");
printf("\n -----------------------------");
}
buf1 = (int *)malloc((BUFSIZE/size)*sizeof(int));
MPI_Scatter(buf, BUFSIZE/size, MPI_INT, buf1, BUFSIZE/size, MPI_INT, 0, MPI_COMM_WORLD);
value = max(&buf1[0], BUFSIZE/size);
printf("\n Max from rocess %d : %d \n", rank, max(&buf1[0], BUFSIZE/size));
MPI_Gather(&value, 1, MPI_INT, buf2, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0){
printf("\n Max value: %d", max(&buf2[0], size));
}
MPI_Finalize();
return(0);
}
Initialize your pointers to NULL, and track them.
use buf1 instead of &buf1[0], is more clear.
free your buffers before MPI_Finalize() with:
if(bufferPionter != NULL) free(bufferPionter);
If something is wrong with a pointer will crash in the free call. In the max function, If all your numbers are less than zero the maximun is zero. i fix that.
int max(int *buf, int N){
int i;
int value = N? buf[0] : 0;
for(i=0; i<N; i++){
if (buf[i]>value){
value = buf[i];
}
}
return value;
}
Best regards!

Resources