I am trying to run some tests using OPENmpi processing data in an array by spliting up the work across nodes (the second part is with matricies). I am running into some problems now because the data array is being initialized every time and I don't know how to prevent this from happening.
How, using ANSI C can I create a variable length array, using OPENmpi once? I tried making it static and global, but nothing.
#define NUM_THREADS 4
#define NUM_DATA 1000
static int *list = NULL;
int main(int argc, char *argv[]) {
int numprocs, rank, namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int n = NUM_DATA*NUM_DATA;
printf("hi\n");
int i;
if(list == NULL)
{
printf("ho\n");
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
}
int position;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
printf("Process %d on %s out of %d\n", rank,processor_name, numprocs);
clock_t start = clock();
position = n / NUM_THREADS * rank;
search(list,position, n / NUM_THREADS * (rank + 1));
printf("Time elapsed: %f seconds\n", ((double)clock() - (double)start) /(double) CLOCKS_PER_SEC);
free(list);
MPI_Finalize();
return 0;
}
Probably the easiest way is to have the rank 0 process do the initialization while the other processes block. Then once the initialization is done, have them all start their work.
A basic example trying to call your search function (NB: it's dry-coded):
#define NUM_THREADS 4
#define NUM_DATA 1000
int main(int argc, char *argv[]) {
int *list;
int numprocs, rank, namelen, i, n;
int chunksize,offset;
char processor_name[MPI_MAX_PROCESSOR_NAME];
n= NUM_DATA * NUM_DATA;
MPI_Status stat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
//note you'll need to handle n%NUM_THREADS !=0, but i'm ignoring that for now
chunksize = n / NUM_THREADS;
if (rank == 0) {
//Think of this as a master process
//Do your initialization in this process
list = malloc(n*sizeof(int));
for(i = 0 ; i < n; i++)
{
list[i] = rand() % 1000;
}
// Once you're ready, send each slave process a chunk to work on
offset = chunksize;
for(i = 1; i < numprocs; i++) {
MPI_Send(&list[offset], chunksize, MPI_INT, i, 0, MPI_COMM_WORLD);
offset += chunksize
}
search(list, 0, chunksize);
//If you need some sort of response back from the slaves, do a recv loop here
} else {
// If you're not the master, you're a slave process, so wait to receive data
list = malloc(chunksize*sizeof(int));
MPI_Recv(list, chunksize, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
// Now you can do work on your portion
search(list, 0, chunksize);
//If you need to send something back to the master, do it here.
}
MPI_Finalize();
}
Related
Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks
Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.
I have a simple MPI code which runs successfully but just before terminating it shows following error.
===
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 139
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Below is my source code.
/*
AUTHOR ::: KHAYAM ANJAM
*/
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main (int argc, char *argv[])
{
int rank, size, ball_value, ball_present;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &size);
srandom(rank);
int delta = rand() % 13;
int random = rand() % 5;
if (random == 0) delta = -1*delta;
if (rank == 0) {
ball_present = 1;
ball_value = 0;
}
else ball_present = 0;
while (1) {
if(ball_present == 0)
MPI_Recv(&ball_value, 30, MPI_INT, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
ball_present = 1;
printf("Task %d has Ball with value %d\n", rank, ball_value);
if (ball_value == 1000) break;
if (abs(ball_value) > 100) {
int send_1000 = 1000;
int i;
for (i = 0; i < size; i++)
if (i != rank) MPI_Send(&send_1000, 30, MPI_INT, i, 10, MPI_COMM_WORLD); //Broadcast to all others
break;
}
ball_value += delta;
int next_to_send = rand() % size;
if (next_to_send != rank) {
printf("Sending ball to %d\n", next_to_send);
MPI_Send(&ball_value, 30, MPI_INT, next_to_send, 10, MPI_COMM_WORLD);
ball_present = 0;
}
}
MPI_Finalize();
return 0;
}
I'm not too sure for the rest of the code (seems OK but I didn't look too closely), but what is sure is that you've got he MPI_Recv() / MPI_Send() pairs wrong. The problem is that you send and receive arrays of 30 integers, while you only allocated memory for one of each.
Try replacing your 30 parameter by a 1 in the 3 MPI_Send() or MPI_Recv() calls, and you code might just work.
I am using MPI to implement Dijkstras algorithm for a class. My teacher also has no idea of why this is broken and has given me permission to post here.
My problem is happening in the chooseVertex function. The program works fine for 1 processor, but when I run it with 2 processors, processor 0 fails to return leastPostition, even though I am able to print the contents of leastPosition on the line before the return.
My code:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#define min(x,y) ((x) > (y) ? (y) : (x))
#define MASTER 0
#define INFINTY 100000
void dijkstra(int, int, int **, int *, int, int);
int chooseVertex(int *, int, int *, int, int);
int main(int argc, char* argv[])
{
int rank, size, i, j;
//Initialize MPI
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//Initialize graph
int src = 0;
int n = 12;
int **edge = (int**) malloc(n * sizeof(int *));
for (i = 0; i < n; i++)
edge[i] = (int *)malloc(n * sizeof(int));
int dist[12];
//Set all graph lengths to infinity
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
if (i == j) { edge[i][j] = 0; }
else { edge[i][j] = INFINTY; }
}
}
//set graph edge lengths
edge[0][3] = 5;
edge[0][6] = 13;
edge[1][5] = 12;
edge[2][1] = 7;
edge[3][2] = 9;
edge[3][4] = 2;
edge[4][7] = 3;
edge[5][10] = 1;
edge[5][11] = 4;
edge[6][9] = 9;
edge[7][8] = 4;
edge[8][9] = 10;
edge[8][10] = 7;
edge[9][10] = 6;
edge[10][11] = 1;
dijkstra(src, n, edge, dist, rank, size);
if(rank == MASTER){ printf("The distance is %d", dist[n - 1]); }
MPI_Finalize();
return 0;
}
//called by dijkstras function below
int chooseVertex(int *dist, int n, int *found, int rank, int size) {
int i, tmp, partition, lower, upper, leastPosition;
int least = INFINTY;
//set the number of nodes wach processor will work with
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//used for MPI_Reduce
struct {
int pos;
int val;
} sendBuffr, recvBuffr;
//calculate least position
for (i = lower; i < upper; i++) {
tmp = dist[i];
if ((!found[i]) && (tmp < least)) {
least = tmp;
leastPosition = i;
}
}
//if all nodes checked are INFINITY, go with last node checked
if (least == INFINTY) leastPosition = i;
//set the send buffer for MPI_Reduce
sendBuffr.val = least;
sendBuffr.pos = leastPosition;
//Rank 0 processor has correct least position and value
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
if (rank == MASTER) leastPosition = recvBuffr.pos;
//Update all processors to have correct position
MPI_Bcast(&leastPosition, 1, MPI_INT, MASTER, MPI_COMM_WORLD);
//Print the contents of leastPosition on rank 0 for debugging
if(rank == MASTER) printf("LeastPosition for rank %d is: %d\n", rank, leastPosition);
fflush(stdout);
return leastPosition;
}
void dijkstra(int SOURCE, int n, int **edge, int *dist, int rank, int size)
{
int i, j, count, partition, lower, upper, *found, *sendBuffer;
j = INFINTY;
sendBuffer = (int *)malloc(n * sizeof(int));
found = (int *)calloc(n, sizeof(int));
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//set the distance array
for (i = 0; i < n; i++) {
found[i] = 0;
dist[i] = edge[SOURCE][i];
sendBuffer[i] = dist[i];
}
found[SOURCE] = 1;
count = 1;
//Dijkstra loop
while (count < n) {
printf("before ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
j = chooseVertex(dist, n, found, rank, size);
printf("after ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
count++;
found[j] = 1;
for (i = lower; i < upper; i++) {
if (!found[i])
{
dist[i] = min(dist[i], dist[j] + edge[j][i]);
sendBuffer[i] = dist[i];
}
}
MPI_Reduce(sendBuffer, dist, n, MPI_INT, MPI_MIN, MASTER, MPI_COMM_WORLD);
MPI_Bcast(dist, n, MPI_INT, MASTER, MPI_COMM_WORLD);
}
}
Sample error messages:
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
LeastPosition for rank 0 is: 3
after ChooseVertex: rank 1 reporting
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
after ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 4
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 7
after ChooseVertex: rank 1 reporting
job aborted:
[ranks] message
[0] process exited without calling finalize
[1] terminated
---- error analysis -----
[0] on My-ComputerName
Assignmet3PP ended prematurely and may have crashed. exit code 3
---- error analysis -----
Your reduce command is:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
By using MPI_DOUBLE_INT, you are saying that you are sending a struct with two variables: a double followed by an int. This is not your struct however: you only have 2 ints. Therefore you should use MPI_2INT. These types were derived from this source. Alternatively, you could create your own type using vectors.
An example fix is:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_2INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
Also, a reduction, followed by a broadcast can be easily combined into one step with MPI_Allreduce().
I have already looked for answers about MPI and dynamic allocation, but there is still an error in my code.
I think the pairs send/receive work well. The problem is probably due to the identical part when I want to do some basic operations. I can't specify indices of the array, otherwise I get this error:
[lyomatnuc09:07574] * Process received signal *
[lyomatnuc09:07575] * Process received signal *
[lyomatnuc09:07575] Signal: Segmentation fault (11)
[lyomatnuc09:07575] Signal code: Address not mapped (1)
[lyomatnuc09:07575] Failing at address: 0x60
The basic code that reproduce the error is below :
int **alloc_array(int rows, int cols) {
int *data = (int *)malloc(rows*cols*sizeof(int));
int **array= (int **)malloc(rows*sizeof(int*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv); //initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
MPI_Datatype columntype;
MPI_Type_vector(10, 1, 10, MPI_INT, &columntype);
MPI_Type_commit(&columntype);
start_time = MPI_Wtime();
if (rank == 0)
{
int **A;
A = alloc_array(10,10);
for ( int i =1 ;i<size;i++)
{
MPI_Send(&(A[0][0]), 10*10, MPI_INT, i, 1, MPI_COMM_WORLD);
}
} else if (rank >= 1) {
int **A2;
A2 = alloc_array(10,10);
MPI_Recv(&(A2[0][0]), 10*10, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
for (int i =0; i<10; i++)
{
for ( int j=0; j<10;i++)
{
A2[i][j]=i*j;//bug here
}
}
}//end slaves task
MPI_Finalize();
return 0;
}
I'm trying to write a parallel program that implements a pipeline version of Gaussian elimination, using MPI and C language...
However I'm encountering some difficulties early in the implementation of the code....
I use a root process to read a data matrix from a text file... this process gives-me the size of this matrix and I broadcast the size of it to all other processes in order for them to allocate it in memory... However, the slave processes are trying to allocate it before the broadcast from the root...
How can I make them wait?
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
int CalcInd(int i, int j, int dimL)
{
return i*dimL +j;
}
int main (int argc, char **argv)
{
FILE *fin, *fout;
char fA[] = "Matrix.txt";
int rank, size, i, ii, j, k, m, n, picked, tmp, total;
int counter=0, elements=0;
int * RightNeigbhor, * LeftNeigbhor, * loc;
float f, magnitude, t;
float * A, * x;
MPI_Status status;
MPI_Request request;
// MPI initialization
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0)
{
// Defenição dos processos vizinhos pelo master
RightNeigbhor = (int *)calloc(size,sizeof(int));
if(RightNeigbhor==NULL)
{printf("!!! Could not allocate memory !!!\n"); exit(-1);}
LeftNeigbhor = (int *)calloc(size,sizeof(int));
if(RightNeigbhor==NULL)
{printf("!!! Could not allocate memory !!!\n"); exit(-1);}
for(i = 0; i < size; i++ )
{
RightNeigbhor[i] = (rank + 1) % size;
LeftNeigbhor[i] = (rank - 1) % size;
}
// Broadcast os processos vizinhos para todos os processos
MPI_Bcast ( RightNeigbhor, size, MPI_INTEGER, rank, MPI_COMM_WORLD );
MPI_Bcast ( LeftNeigbhor, size, MPI_INTEGER, rank, MPI_COMM_WORLD );
// Leitura da matriz A pelo master
fin = fopen ( fA, "r" );
if (fin == NULL){ printf("!!! FILE NOT FOUND !!!"); exit(-1); }
while( !feof(fin))
{
fscanf (fin, "%f", &f);
elements++;
}
rewind(fin);
f = 0;
while( !feof(fin))
{
if(fgetc(fin) == '\n')
{
counter++;
}
}
rewind(fin);
n = counter;
m = (elements-1) / counter;
total = n*m;
MPI_Bcast ( &total, 1, MPI_INT, rank, MPI_COMM_WORLD );
MPI_Bcast ( &n, 1, MPI_INT, rank, MPI_COMM_WORLD );
}
// Alocação de variaveis
A = (float *)calloc(total,sizeof(float));
if(A==NULL){printf("!!! Could not allocate memory !!!\n"); exit(-1);}
loc = (int *)calloc(n,sizeof(int*));
if(loc==NULL){printf("!!! Could not allocate memory !!!\n"); exit(-1);}
// AND IT GOES ON AND ON
Everything in your rank == 0 block runs only in process 0. While process rank == 1 ... n just skip that block. Therefore, you have to put your MPI_Bcast calls in an environment which is visible for all process in MPI_Comm comm here MPI_COMM_WORLD. When process 1...n skip all the initialization and jump to the broadcast before process 0 reaches it, they will wait till the bcast has occured.