I am having trouble with using MPI for multiplying matrices.
I have the program reading two n x n matrices from two files and am supposed to use MPI. But I am getting a segmentation fault in one of the processes. This is the output I get when I run my code:
read matrix A from matrixA
read matrix B from matrixB
mpirun noticed that process rank 1 with PID 15599 on node VirtualBox exited on signal 11 (Segmentation fault).
Here is my code:
int main (int argc, char * argv[])
{
/* Check the number of arguments */
int n; /* Dimension of the matrix */
float *sa, *sb, *sc; /* Storage for matrix A, B, and C */
float **a, **b, **c; /* 2D array to access matrix A, B, and C */
int i, j, k;
MPI_Init(&argc, &argv); //Initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //Get number of processes
if(argc != 4) {
printf("Usage: %s fileA fileB fileC\n", argv[0]);
return 1;
}
if(rank == 0)
{
/* Read matrix A */
printf("read matrix A from %s\n", argv[1]);
read_matrix(argv[1], &a, &sa, &i, &j);
if(i != j) {
printf("ERROR: matrix A not square\n"); return 2;
}
n = i;
//printf("%d", n);
/* Read matrix B */
printf("Read matrix B from %s\n", argv[2]);
read_matrix(argv[2], &b, &sb, &i, &j);
if(i != j) {
printf("ERROR: matrix B not square\n");
return 2;
}
if(n != i) {
printf("ERROR: matrix A and B incompatible\n");
return 2;
}
}
printf("test");
if(rank == 0)
{
/* Initialize matrix C */
sc = (float*)malloc(n*n*sizeof(float));
memset(sc, 0, n*n*sizeof(float));
c = (float**)malloc(n*sizeof(float*));
for(i=0; i<n; i++) c[i] = &sc[i*n];
}
////////////////////////////////////////////////////////////////////////////////////////////
float matrA[n][n];
float matrB[n][n];
float matrC[n][n];
for(i = 0; i < n; i++)
{
for(j = 0; j < n; j++)
{
matrA[i][j] = sa[(i*n) + j];
matrB[i][j] = sb[(i*n) + j];
}
}
/* Master initializes work*/
if (rank == 0)
{
start_time = MPI_Wtime();
for (i = 1; i < size; i++)
{
//For each slave other than the master
portion = (n / (size - 1)); // Calculate portion without master
low_bound = (i - 1) * portion;
if (((i + 1) == size) && ((n % (size - 1)) != 0))
{
//If rows of [A] cannot be equally divided among slaves,
upper_bound = n; //the last slave gets all the remaining rows.
}
else
{
upper_bound = low_bound + portion; //Rows of [A] are equally divisable among slaves
}
//Send the low bound first without blocking, to the intended slave.
MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);
//Next send the upper bound without blocking, to the intended slave
MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);
//Finally send the allocated row portion of [A] without blocking, to the intended slave
MPI_Isend(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, i, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
}
}
//broadcast [B] to all the slaves
MPI_Bcast(&matrB, n*n, MPI_FLOAT, 0, MPI_COMM_WORLD);
/* work done by slaves*/
if (rank > 0)
{
//receive low bound from the master
MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
//next receive upper bound from the master
MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
//finally receive row portion of [A] to be processed from the master
MPI_Recv(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, 0, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
for (i = low_bound; i < upper_bound; i++)
{
//iterate through a given set of rows of [A]
for (j = 0; j < n; j++)
{
//iterate through columns of [B]
for (k = 0; k < n; k++)
{
//iterate through rows of [B]
matrC[i][j] += (matrA[i][k] * matrB[k][j]);
}
}
}
//send back the low bound first without blocking, to the master
MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
//send the upper bound next without blocking, to the master
MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
//finally send the processed portion of data without blocking, to the master
MPI_Isend(&matrC[low_bound][0],
(upper_bound - low_bound) * n,
MPI_FLOAT,
0,
SLAVE_TO_MASTER_TAG + 2,
MPI_COMM_WORLD,
&request);
}
/* Master gathers processed work*/
if (rank == 0)
{
for (i = 1; i < size; i++)
{
// Until all slaves have handed back the processed data,
// receive low bound from a slave.
MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
//Receive upper bound from a slave
MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);
//Receive processed data from a slave
MPI_Recv(&matrC[low_bound][0],
(upper_bound - low_bound) * n,
MPI_FLOAT,
i,
SLAVE_TO_MASTER_TAG + 2,
MPI_COMM_WORLD,
&status);
}
end_time = MPI_Wtime();
printf("\nRunning Time = %f\n\n", end_time - start_time);
}
MPI_Finalize(); //Finalize MPI operations
/* Do the multiplication */
//////////////////////////////////////////////////// matmul(a, b, c, n);
for(i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sc[(i*n) + j] = matrC[i][j];
}
}
}
Every process declares the pointers to the matrices, namely:
float *sa, *sb, *sc; /* storage for matrix A, B, and C */
but only the process 0 (allocates and) fills up the arrays sa and sb:
if(rank == 0)
{
...
read_matrix(argv[1], &a, &sa, &i, &j);
...
read_matrix(argv[2], &b, &sb, &i, &j);
...
}
However, afterward every process tries to access the positions of the sa and sb array:
for(i = 0; i < n; i++)
{
for(j = 0; j < n; j++)
{
matrA[i][j] = sa[(i*n) + j];
matrB[i][j] = sb[(i*n) + j];
}
}
Since only the process 0 had (allocated and) filled up the arrays sa and sb, the remaining processes are trying to access memory (sa[(i*n) + j] and sb[(i*n) + j]) that they have not allocated. Hence, the reason why you get segmentation fault.
On a side note, there is another problem in your program - you initiate non-blocking sends with MPI_Isend but never wait on the completion of the returned request handles. MPI implementations are not even required to start the send operation until it is properly progressed to completion, mostly by a call to one of the wait or test operations (MPI_Wait, MPI_Waitsome, MPI_Waitall, and so on). Even worse, you reuse the same handle variable request, effectively losing the handles to all previously initiated requests, which makes them unwaitable/untestable. Use an array of requests instead and wait for all of them to finish with MPI_Waitall after the send loop.
Also think about this -
do you really need non-blocking operations to send data back from the workers?
Related
I have problem with sending dynamically allocated sub-matrix to workers. I can't understand how I can correctly do that (and what I should send).
Here is sending part:
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2, MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
Here is receiving part:
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2, MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD, &status);
Here is error message:
[pop-os:29368] Read -1, expected 80000, errno = 14
[pop-os:29367] *** Process received signal ***
[pop-os:29367] Signal: Segmentation fault (11)
[pop-os:29367] Signal code: Address not mapped (1)
[pop-os:29367] Failing at address: 0x7fffc2ae8000
Here is all code:
#include <cstdio>
#include <cstdlib>
#include "mpi.h"
#define NRA 100
/* number of rows in matrix A */
#define NCA 100
/* number of columns in matrix A */
#define NCB 100
/* number of columns in matrix B */
#define MASTER 0
/* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 10 /* setting a message type */
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
int main(int argc, char *argv[]) {
int numtasks, taskid, numworkers, source, dest, rows,
/* rows of matrix A sent to each worker */
averow, extra, offset, i, j, k;
double **a = alloc_2d_int(NRA, NCA);
double **b = alloc_2d_int(NCA, NCB);
double **c = alloc_2d_int(NRA, NCB);
MPI_Init(&argc, &argv);
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
if (numtasks < 2) {
printf("Need at least two MPI tasks. Quitting...\n");
MPI_Abort(MPI_COMM_WORLD, -1);
exit(1);
}
numworkers = numtasks - 1;
if (taskid == MASTER) {
printf("mpi_mm has started with %d tasks (task1).\n", numtasks);
for (i = 0; i < NRA; i++)
for (j = 0; j < NCA; j++) a[i][j] = 10;
for (i = 0; i < NCA; i++)
for (j = 0; j < NCB; j++) b[i][j] = 10;
double t1 = MPI_Wtime();
averow = NRA / numworkers;
extra = NRA % numworkers;
offset = 0;
for (dest = 1; dest <= numworkers; dest++) {
rows = (dest <= extra) ? averow + 1 : averow;
printf("Sending %d rows to task %d offset=%d\n", rows, dest, offset);
MPI_Send(&offset, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, FROM_MASTER + 1, MPI_COMM_WORLD);
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2,
MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3,
MPI_COMM_WORLD);
offset = offset + rows;
}
/* Receive results from worker tasks */
for (source = 1; source <= numworkers; source++) {
MPI_Recv(&offset, 1, MPI_INT, source, FROM_WORKER, MPI_COMM_WORLD,
&status);
MPI_Recv(&rows, 1, MPI_INT, source, FROM_WORKER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(c[offset][0]), rows * NCB, MPI_DOUBLE, source, FROM_WORKER + 2,
MPI_COMM_WORLD, &status);
printf("Received results from task %d\n", source);
}
/* Print results */
/*
printf("****\n");
printf("Result Matrix:\n");
for (i = 0; i < NRA; i++)
{
printf("\n");
for (j = 0; j < NCB; j++) printf("%6.2f ", c[i][j]);
}*/
printf("\n********\n");
printf("Done.\n");
t1 = MPI_Wtime() - t1;
printf("\nExecution time: %.2f\n", t1);
}
/******** worker task *****************/
else { /* if (taskid > MASTER) */
MPI_Recv(&offset, 1, MPI_INT, MASTER, FROM_MASTER, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, FROM_MASTER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2,
MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD,
&status);
for (k = 0; k < NCB; k++)
for (i = 0; i < rows; i++) {
c[i][k] = 0.0;
for (j = 0; j < NCA; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER + 1, MPI_COMM_WORLD);
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2,
MPI_COMM_WORLD);
}
for (i=0; i<NRA; i++)
free(a[i]);
free(a);
for (i=0; i<NCA; i++)
free(b[i]);
free(b);
for (i=0; i<NRA; i++)
free(c[i]);
free(c);
MPI_Finalize();
}
Solution: link to GitHub with correct code
MPI_Send and MPI_recv first parameter is const void * so you need to change:
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
to
MPI_Send(b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
and
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
to
MPI_Send(c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
Another issue that you have is that you are allocating a array of pointers:
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
But the data to be send/recv on the MPI_Send and MPI_Recv are assumed to be continuously. To solve this, you can create a continuously 2D array, simply represent the matrix as an array, create a MPI custom type, among others.
I am trying to run a matrix multiplication program using MPI. The arrays 'a' and 'b' are of type double and size 512*512. The array 'a' is to be scattered and array 'b' is to be broadcast. The final result after matrix multiplication is to be gathered in master process in array c[512][512]
I am trying to implement MPI_Scatter() using MPI_Send() and MPI_Recv() functions but I am kind of stuck in an infinite loop (probably). P is the number of processes.
double a[512][512], b[512][512], c[512][512];
blksz = 512/P;
if(rank == 0) {
// Scatter input matrix a, implementation of MPI_Scatter()
for(j = 1 ; j < P ; j++ ) {
MPI_Send(&a + j*blksz*N*sizeof(double), blksz*N, MPI_DOUBLE, j, 0, MPI_COMM_WORLD);
}
// Broadcast the input matrix b, implementation of MPI_Bcast()
for(j = 1 ; j < P ; j++ ) {
MPI_Send(&b, N*N, MPI_DOUBLE, j, 1, MPI_COMM_WORLD);
}
}
else {
MPI_Recv(&a, blksz*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&b, N*N, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status);
}
for(i = 0 ; i < blksz; i++) {
for(j = 0 ; j < N ; j++) {
c[i][j] = 0;
for(k = 0 ; k < N ; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
// Gather result, implementation of MPI_Gather()
if(rank != 0) {
MPI_Send(&c, blksz*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
else {
for(i = 1 ; i < P ; i++) {
MPI_Recv(&c+i*blksz*N*sizeof(double), blksz*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, &status);
}
}
I am a kind of a beginner to programming but I was up all night trying to figure it out but to no avail. I would really appreciate if someone could help me out here.
When you send &a + j*blksize*N*sizeof(double), you are not doing what you want to do. First off, &a is the address of a, which is an array of arrays, which is not what you want to send, you want to send a pointer, or *a (technically this is an array, but it will be implicitly cast to a pointer to the first element of said array). Next, when doing pointer arithmetic, you do not need to (and in fact, should not) multiply by sizeof(type); that will be taken care of for you by the compiler. So your first MPI_Send command should be
MPI_Send(*a + j*blksz*N, blksz*N, MPI_DOUBLE, j, 0, MPI_COMM_WORLD);
Make similar changes (for all sends and receives) and your code should work.
I'm trying to sort an array of random numbers using Odd- Even transposition but I keep getting a segmentation error when running my code:
[islb:48966] *** Process received signal ***
[islb:48966] Signal: Segmentation fault (11)
[islb:48966] Signal code: Address not mapped (1)
[islb:48966] Failing at address: 0x28
[islb:48966] [ 0] /lib64/libpthread.so.0(+0xf810)[0x7fc3da4cb810]
[islb:48966] [ 1] /lib64/libc.so.6(memcpy+0xa3)[0x7fc3da1c7cf3]
[islb:48966] [ 2] /usr/local/lib/libopen-pal.so.6(opal_convertor_unpack+0x10b)[0x7fc3d9c372db]
[islb:48966] [ 3] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress_match+0x138)[0x7fc3d58507a8]
[islb:48966] [ 4] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_req_start+0x1b1)[0x7fc3d5850d11]
[islb:48966] [ 5] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv+0x139)[0x7fc3d5849489]
[islb:48966] [ 6] /usr/local/lib/libmpi.so.1(MPI_Recv+0xc0)[0x7fc3da742f40]
[islb:48966] [ 7] oddEven[0x40115a]
[islb:48966] [ 8] /lib64/libc.so.6(__libc_start_main+0xe6)[0x7fc3da161c36]
[islb:48966] [ 9] oddEven[0x400c19]
[islb:48966] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 48966 on node islb exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
The program allocates the array, it's when it comes to scattering it amongst the processes that the error seems to occur as the print statment directly after the scatter call only prints for process 0 and then prints the error message.
Here's my code:
#include <stdio.h>
#include <math.h>
#include <malloc.h>
#include <time.h>
#include <string.h>
#include "mpi.h"
const int MAX = 10000;
int myid, numprocs;
int i, n, j, k, arrayChunk, minindex;
int A, B;
int temp;
int swap(int *x, int *y) {
temp = *x;
*x = *y;
*y = temp;
return 0;
}
int main(int argc, char **argv) {
int* arr = NULL;
int* value = NULL;
MPI_Status status;
//int arr[] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
srand(time(0));
time_t t1, t2;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (myid == 0) {
printf("Enter the number of elements you would like in the array \n");
scanf("%d", &n);
arrayChunk = n/numprocs;
//printf("cpus: %d, #s per cpu: %d\n", numprocs, arrayChunk);
//Allocate memory for the array
arr = malloc(n * sizeof(int));
value = malloc(n * sizeof(int));
// Generate an array of size n random numbers and prints them
printf("Elements in the array: ");
for (i = 0; i < n; i++) {
arr[i] = (rand() % 100) + 1;
printf("%d ", arr[i]);
}
printf("\n");
time(&t1);
}
if ((n % numprocs) != 0) {
if (myid == 0)
printf("Number of Elements are not divisible by numprocs \n");
MPI_Finalize();
return(0);
}
// Broadcast the size of each chunk
MPI_Bcast(&arrayChunk, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&arr, arrayChunk, MPI_INT, &value, arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
printf("Processor %d receives %d\n", myid, value[0]);
for (i = 0; i < numprocs; i++) {
if (i % 2 == 0) {
if (myid%2 == 0) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
} else {
if ((myid%2 == 1) && (myid != (numprocs-1))) {
MPI_Send(&value[0], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD);
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid + 1, 0, MPI_COMM_WORLD, &status);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
} else if (myid != 0 && myid != (numprocs-1)) {
MPI_Recv(&value[arrayChunk], arrayChunk, MPI_INT, myid - 1, 0, MPI_COMM_WORLD, &status);
MPI_Send(&value[0], 1, MPI_INT, myid - 1, 0, MPI_COMM_WORLD);
for (j = 0; j < (arrayChunk * 2 - 1); j++) {
minindex = j;
for (k = j + 1; k < arrayChunk * 2; k++) {
if (value[k] < value[minindex]) {
minindex = k;
}
}
if (minindex > j) {
swap(&value[j], &value[minindex]);
}
}
for (j = 0; j < arrayChunk; j++) {
swap(&value[j], &value[j + arrayChunk]);
}
//printf("myid %d i: %d, %d\n", myid, i, value[0]);
}
}
}
MPI_Gather(&value[0], arrayChunk, MPI_INT, &arr[0], arrayChunk, MPI_INT, 0, MPI_COMM_WORLD);
if (myid == 0) {
time(&t2);
printf("Sorted array: ");
for (i = 0; i < n; i++) {
printf("%d ", arr[i]);
}
printf("\n");
printf("Time in sec. %f\n", difftime(t2, t1));
}
// Free allocated memory
if (arr != NULL) {
free(arr);
arr = NULL;
free(value);
value = NULL;
}
MPI_Finalize();
return 0;
}
I'm not very familiar with C and it could well be that I've used malloc and/or addresses and pointers incorrectly, as such it's probably something simple.
Sorry for the amount of code but I thought it would be better to supply all of it to allow for proper debugging.
The problem is in your MPI_Scatter command. You try to scatter the information and store in value, but if you look above that code, only rank 0 has allocated any memory for value. When any and all other ranks try to store data into value, you will get a segmentation fault (and indeed you do). Instead, remove the value = malloc(...); line from inside the if block, and put it after the MPI_Bcast as value = malloc(arrayChunk * sizeof(int));. I've not looked through the rest of the code to see if there are any issues elsewhere as well, but that is likely the cause of the initial seg-fault.
I would build program with debugging info (most likely -g compile flag), try geting coredump and try using gdb debugger to locate the bug. Corefile is created when process crashes and it holds process memory image at the moment of crash.
If after program crash coredump file is not created, You'll need to figure out how to enable it on Your system. You may create simple buggy program (for example with a=x/0; or similar error) and play a bit. Coredump may be called core, PID.core (PID - number of crashed process), or something similar. Sometimes it is enough to set core file size tu unlimited using ulimit. Also check kernel.core_* sysctl's on Linux.
Once You have corecump, You can use it with gdb or similar debuger (ddd):
gdb executable_file core
My master process has a string array, which it sends to the slaves process. The slaves process quicksort the array and send it back. The problem is on receiving it back.
After MPI_Recv, only the first 3 indexes of the array is filled and the rest of the elements are empty. When I print the Array before sending, the elements are there. But after receiving, except for the those 3, all others are empty.
slave that send the message code. It receive an array of strings, quicksotr it and send it back ordered
// receiving number of elements
MPI_Recv(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
// allocating the array
array = (char**) malloc(sizeof(*array) * size);
array[0] = (char*) malloc(sizeof(*array[0]) * size * buf);
for(i=1; i<size; i++)
array[i] = &(array[0][i*buf]);
// receiving the array
MPI_Recv(&array[0][0], size*buf, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
// sorting the array
quicksort(array, 0, size-1);
// sending the ordered array back
//DEBUG printf("%d: Sending for %d -> %d...\n", rank, 0, size);
MPI_Send(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
//DEBUG printf("%d: Sending for %d -> %d...\n", rank, 0, size*buf);
//printArray(array, size);
printf("%d: before sending\n", rank);
for(i=0; i<size; i++)
printf("%d: %s", rank, array[i]);
MPI_Send(&array[0][0], size*buf, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
the master, who sends the array and is supposed to receive it back ordered
// sending the messages
int j = 0;
for(i=1; i<procs; i++){
MPI_Send(&division[i], 1, MPI_INT, i, tag, MPI_COMM_WORLD);
MPI_Send(&array[j][0], division[i]*buf, MPI_CHAR, i, tag, MPI_COMM_WORLD);
j += division[i];
}
printf("%d: All Sent\n", rank);
// receiving the ordered arrays
printf("%d: Waiting for answers...\n", rank);
// receiving number of elements
MPI_Recv(&sizeA, 1, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status);
// alocating an array
arrayA = (char**) malloc(sizeof(*arrayA) * sizeA);
arrayA[0] = (char*) malloc(sizeof(*arrayA[0]) * sizeA * buf);
for(i=1; i<sizeA; i++)
arrayA[i] = &(arrayA[0][i*buf]);
// receiving the array
MPI_Recv(&arrayA[0][0], sizeA*buf, MPI_CHAR, status.MPI_SOURCE, tag, MPI_COMM_WORLD, &status);
for(i=0; i<sizeA; i++)
printf("%i: %s", i, arrayA[i]);
EDIT The problem seems to be on the quicksort. If i comment the quicksort line i receive the entire array. Although it works fine on the sequential version. Here's the code.
void quicksort(char **array, int left, int right){
// array empty
if(right - left < 1)
return;
// end of quicksort, just two elements left
if(right - left == 1){
if(strcomp(array[left], array[right]) > 0)
swap(array, left, right);
return;
}
int i = left + 1; // left + 1: used to avoid the pivot, on the first position
int j = right; // right
int pivot = (left + right) / 2; // pivot position
char *key = array[pivot]; // string with the array[pivot] element
// moving the pivot to the beginning
swap(array, left, pivot);
// quicksorting
while(i < j){
// if an element is on the left of the pivot and, is <= than the pivot
// keep the element there and increase i
while(i <= right && strcomp(array[i], key) < 0){
i++;
}
// if an element is on the right of the pivot, and is > than the pivot
// keep the element there and decrease j
while(j >= left && strcomp(array[j], key) > 0){
j--;
}
// array[i] > key, and should be on the right
// array[j] <= key, and should be on the left
if(i < j){
swap(array, i, j);
}
}
// moving the pivot back to the middle
swap(array, left, j);
// left recursion
if(left < j-1)
quicksort(array, left, j-1);
// right recursion
if(j+1 < right)
quicksort(array, j+1, right);
}
i have a strange error when using MPI_Send --i get this error when trying to send a portion of a bi-dimensional array (matrix): "MPI_matrixMultiplication.c:68:99: error: expected expression before ‘,’ token".
The specific line is the one where i try to send a portion if the matrix: MPI_Send(&a[beginPosition][0],... );
(and as you can see, i have commented the other send and receive related with the matrix.
/////////////////////////////////////////////////////////
// multiplication of 2 matrices, parallelized using MPI //
/////////////////////////////////////////////////////////
#include <stdio.h>
#include <mpi.h>
// must use #define here, and not simply int blahblahblah, because "c" doesnt like ints for array dimension :(
#define matrixARowSize 3 // size of the row for matrix A
#define matrixAColumnSize 3 // size of the column for matrix A
#define matrixBRowSize 3 // size of the row for matrix B
#define matrixBColumnSize 3 // size of the column for matrix B
// tags used for sending/receiving data:
#define LOWER_BOUND 1 // first line to be processed
#define UPPER_BOUND 2 // last line to be processed
#define DATA // data to be processed
int a[matrixARowSize][matrixAColumnSize]; // matrix a
int b[matrixBRowSize][matrixBColumnSize]; // matrix b
int c[matrixARowSize][matrixBColumnSize]; // matrix c
int main()
{
int currentProcess; // current process
int worldSize; // world size
int i, j, k; // iterators
int rowsComputedPerProcess; // how many rows of the first matrix should be computed in each process
int numberOfSlaveProcesses; // the number of slave processes
int processesUsed; //how many processes of the available ones are actually used
MPI_Init(NULL, NULL); // MPI_Init()
MPI_Comm_size(MPI_COMM_WORLD, &worldSize); // get the world size
MPI_Comm_rank(MPI_COMM_WORLD, ¤tProcess); // get current process
numberOfSlaveProcesses = worldSize - 1; // 0 is the master, rest are slaves
rowsComputedPerProcess = worldSize > matrixARowSize ? 1 : (matrixARowSize/numberOfSlaveProcesses);
processesUsed = worldSize > matrixARowSize ? matrixARowSize : numberOfSlaveProcesses;
/*
* in the first process (the father);
* initialize the 2 matrices, then start splitting the data to the slave processes
*/
if (!currentProcess) // in father process
{
printf("rows per process: %d\n", rowsComputedPerProcess);
printf("nr of processes used: %d\n", processesUsed);
// init matrix A
for(i = 0; i < matrixARowSize; ++i)
for(j = 0; j < matrixAColumnSize; ++j){
a[i][j] = i + j + 1;
// printf("%d\n", a[i][j]);
// printf("%d\n", *(a[i] + j));
}
// init matrix B
for(i = 0; i < matrixBRowSize; ++i)
for(j = 0; j < matrixBColumnSize; ++j)
b[i][j] = i + j + 1;
// start sending data to the slaves for them to work >:)
int beginPosition; // auxiliary values used for sending the offsets to slaves
int endPosition;
for(i = 1; i < processesUsed; ++i) // the last process is dealt with separately
{
beginPosition = (i - 1)*rowsComputedPerProcess;
endPosition = i*rowsComputedPerProcess;
MPI_Send(&beginPosition, 1, MPI_INT, i, LOWER_BOUND, MPI_COMM_WORLD);
MPI_Send(&endPosition, 1, MPI_INT, i, UPPER_BOUND, MPI_COMM_WORLD);
MPI_Send(&a[beginPosition][0], ((endPosition - beginPosition)*matrixARowSize), MPI_INT, i, DATA, MPI_COMM_WORLD);
// MPI_Send(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, MPI_INT, i, DATA, MPI_COMM_WORLD);
// for(j = beginPosition; j < endPosition; ++j)
// for (k = 0; k < matrixAColumnSize; ++k)
// {
// printf("%d ", *(a[j] + k));
// }
// printf("\n");
// printf("beg: %d, end: %d\n", beginPosition, endPosition);
// printf(" data #%d\n", (endPosition - beginPosition)*matrixARowSize);
}
// deal with last process
beginPosition = (i - 1)*rowsComputedPerProcess;
endPosition = matrixARowSize;
MPI_Send(&beginPosition, 1, MPI_INT, i, LOWER_BOUND, MPI_COMM_WORLD);
MPI_Send(&endPosition, 1, MPI_INT, i, UPPER_BOUND, MPI_COMM_WORLD);
// MPI_Send(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, MPI_INT, i, DATA, MPI_COMM_WORLD);
// printf("beg: %d, end: %d\n", beginPosition, endPosition);
// printf(" data #%d\n", (endPosition - beginPosition)*matrixARowSize);
}
else { // if this is a slave (rank > 0)
int beginPosition; // auxiliary values used for sending the offsets to slaves
int endPosition;
MPI_Recv(&beginPosition, 1, MPI_INT, 0, LOWER_BOUND, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&endPosition, 1, MPI_INT, 0, UPPER_BOUND, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// MPI_Recv(a[beginPosition], (endPosition - beginPosition)*matrixARowSize, 0, DATA, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(i = beginPosition; i < endPosition; ++i) {
for (j = 0; j < matrixAColumnSize; ++j)
printf("(# %d, i=%d, j=%d: %d ", currentProcess, i, j, a[i][j]);
// printf("\n");
}
}
MPI_Finalize();
return 0; // bye-bye
}
Your DATA constant is empty.
#define DATA // data to be processed
So you're trying to do :
MPI_Send(&a[beginPosition][0], ((endPosition - beginPosition)*matrixARowSize), MPI_INT, i, , MPI_COMM_WORLD);
Which logically generates an expected expression before ',' token error.