I have trouble with MPI_Recv when using malloc? Is there any suggestion to receive a two dimensional array created with malloc?
Thanks.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#define SIZE 2000
/* Tags defines message from_to */
#define TO_SLAVE_TAG 1
#define TO_MASTER_TAG 5
void createMatrices();
/* Matrices */
int** first;
/* MPI_WORLD rank and size */
int rank, size;
MPI_Status status;
/*
* matrixSize: current matrix size
* lower_bound: lower bound of the number of rows of [first matrix] allocated to a slave
* upper_bound: upper bound of the number of rows of [first matrix] allocated to a slave
* portion: number of the rows of [first matrix] allocated to a slave according to the number of processors
* count: number of data will pass with mpi functions
*/
int matrixSize, lower_bound, upper_bound, portion, count;
int sum = 0;
clock_t t, start_time, end_time;
int main( int argc, char **argv ) {
/* Initialize the MPI execution environment */
MPI_Init( &argc, &argv );
/* Determines the size of the group */
MPI_Comm_size( MPI_COMM_WORLD, &size );
/* Determines the rank of the calling process */
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
if (rank == 0)
{
for (matrixSize = 500; matrixSize <= SIZE; matrixSize += 500) {
createMatrices(matrixSize);
/*
* Master processor divides [first matrix] elements
* and send them to proper slave processors.
* We can start time at this point.
*/
start_time = clock();
/* Define bounds for each processor except master */
for (int i = 1; i < size; ++i)
{
/* Calculate portion for each slave */
portion = (matrixSize / (size - 1));
lower_bound = (i-1) * portion;
if (((i+1)==size) && (matrixSize % (size-1) != 0)) {
upper_bound = matrixSize;
} else {
upper_bound = lower_bound + portion;
}
/* send matrix size to ith slave */
MPI_Send(&matrixSize, 1, MPI_INT, i, TO_SLAVE_TAG, MPI_COMM_WORLD);
/* send lower bount to ith slave */
MPI_Send(&lower_bound, 1, MPI_INT, i, TO_SLAVE_TAG + 1, MPI_COMM_WORLD);
/* send upper bount to ith slave */
MPI_Send(&upper_bound, 1, MPI_INT, i, TO_SLAVE_TAG + 2, MPI_COMM_WORLD);
/* send allocated row of [first matrix] to ith slave */
count = (upper_bound - lower_bound) * matrixSize;
printf("Count: %d\n", count);
MPI_Send(&(first[lower_bound][0]), count, MPI_DOUBLE, i, TO_SLAVE_TAG + 3, MPI_COMM_WORLD);
}
}
}
if (rank > 0)
{
//receive low bound from the master
MPI_Recv(&matrixSize, 1, MPI_INT, 0, TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
printf("Matrix size: %d\n", matrixSize);
//receive low bound from the master
MPI_Recv(&lower_bound, 1, MPI_INT, 0, TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
printf("Lower bound: %d\n", lower_bound);
//next receive upper bound from the master
MPI_Recv(&upper_bound, 1, MPI_INT, 0, TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
printf("Upper bound: %d\n", upper_bound);
//finally receive row portion of [A] to be processed from the master
count = (upper_bound - lower_bound) * matrixSize;
printf("Count: %d\n", count);
MPI_Recv(&first[lower_bound][0], count, MPI_INT, 0, TO_SLAVE_TAG + 3, MPI_COMM_WORLD, &status);
printf("first[0][0]: %d\n", first[0][0]);
}
MPI_Finalize();
return 0;
}
void createMatrices(int mSize) {
/* matrix cols */
first = malloc(mSize * sizeof(int*));
/* matrix rows */
for (int i = 0; i < mSize; ++i)
first[i] = malloc(mSize * sizeof(int));
srand(time(NULL));
for (int i = 0; i < mSize; ++i)
for (int j = 0; j < mSize; ++j)
first[i][j] = rand()%2;
}
And problem is:
*** Process received signal ***
Signal: Segmentation fault: 11 (11)
Signal code: Address not mapped (1)
Failing at address: 0x0
[ 0] 0 libsystem_platform.dylib 0x00007fff89cc8f1a _sigtramp + 26
[ 1] 0 libsystem_c.dylib 0x00007fff73857070 __stack_chk_guard + 0
[ 2] 0 libdyld.dylib 0x00007fff90f535c9 start + 1
[ 3] 0 ??? 0x0000000000000001 0x0 + 1
*** End of error message ***
To avoid (possibly high) latency costs of sending each row individual you need to create a matrix in linear memory. This is done by allocating enough memory for the entire matrix and setting up pointers to each row. Here is your function modified to do so.
void createMatrices(int mSize) {
/* initialize enough linear memory to store whole matrix */
raw_data=malloc(mSize*mSize*sizeof(int*));
/* matrix row pointers i.e. they point to each consecutive row */
first = malloc(mSize * sizeof(int*));
/* set the pointers to the appropriate address */
for (int i = 0; i < mSize; ++i)
first[i] = raw_data + mSize*i;
/* initialize with random values */
srand(time(NULL));
for (int i = 0; i < mSize; ++i)
for (int j = 0; j < mSize; ++j)
first[i][j] = rand()%2;
}
The other major problem you are facing is proper memory handling. You should free your matrices before allocating new ones on the root rank.
You also need to allocate memory for a matrix on the slave ranks before trying to copy over the data. That also needs to be in linear memory as done in the above function.
Related
I want to perform matrix multiplication. I have to write two codes one with MPI blocking and other with MPI non blocking. I have done with MPI blocking. I want some help to convert below code into MPI non blocking.
This is the code of matrix multiplication with Blocking and i want to convert it into MPI non blocking. If anyone is available then Please respond..
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
#include <time.h>
#include <sys/time.h>
// Number of rows and columnns in a matrix
#define N 4
MPI_Status status;
// Matrix holders are created
double matrix_a[N][N],matrix_b[N][N],matrix_c[N][N];
int main(int argc, char **argv)
{
int processCount, processId, slaveTaskCount, source, dest, rows, offset;
struct timeval start, stop;
// MPI environment is initialized
MPI_Init(&argc, &argv);
// Each process gets unique ID (rank)
MPI_Comm_rank(MPI_COMM_WORLD, &processId);
// Number of processes in communicator will be assigned to variable -> processCount
MPI_Comm_size(MPI_COMM_WORLD, &processCount);
// Number of slave tasks will be assigned to variable -> slaveTaskCount
slaveTaskCount = processCount - 1;
// Root (Master) process
if (processId == 0) {
// Matrix A and Matrix B both will be filled with random numbers
srand ( time(NULL) );
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
matrix_a[i][j]= rand()%10;
matrix_b[i][j]= rand()%10;
}
}
printf("\n\t\tMatrix - Matrix Multiplication using MPI\n");
// Print Matrix A
printf("\nMatrix A\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_a[i][j]);
}
printf("\n");
}
// Print Matrix B
printf("\nMatrix B\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++) {
printf("%.0f\t", matrix_b[i][j]);
}
printf("\n");
}
rows = N/slaveTaskCount;
offset = 0;
for (dest=1; dest <= slaveTaskCount; dest++)
{
// Acknowledging the offset of the Matrix A
MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Acknowledging the number of rows
MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
// Send rows of the Matrix A which will be assigned to slave process to compute
MPI_Send(&matrix_a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);
// Matrix B is sent
MPI_Send(&matrix_b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
// Offset is modified according to number of rows sent to each process
offset = offset + rows;
}
for (int i = 1; i <= slaveTaskCount; i++)
{
source = i;
// Receive the offset of particular slave process
MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Receive the number of rows that each slave process processed
MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
// Calculated rows of the each process will be stored int Matrix C according to their offset and
// the processed number of rows
MPI_Recv(&matrix_c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
}
// Print the result matrix
printf("\nResult Matrix C = Matrix A * Matrix B:\n\n");
for (int i = 0; i<N; i++) {
for (int j = 0; j<N; j++)
printf("%.0f\t", matrix_c[i][j]);
printf ("\n");
}
printf ("\n");
}
// Slave Processes
if (processId > 0) {
// Source process ID is defined
source = 0;
MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives number of rows sent by root process
MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the sub portion of the Matrix A which assigned by Root
MPI_Recv(&matrix_a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// The slave process receives the Matrix B
MPI_Recv(&matrix_b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// Matrix multiplication
for (int k = 0; k<N; k++) {
for (int i = 0; i<rows; i++) {
// Set initial value of the row summataion
matrix_c[i][k] = 0.0;
// Matrix A's element(i, j) will be multiplied with Matrix B's element(j, k)
for (int j = 0; j<N; j++)
matrix_c[i][k] = matrix_c[i][k] + matrix_a[i][j] * matrix_b[j][k];
}
}
// value in matrix C
MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Number of rows the process calculated will be sent to root process
MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
// Resulting matrix with calculated rows will be sent to root process
MPI_Send(&matrix_c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
MPI_Finalize();
}
Look at non-blocking this way: instead of spelling out "now I send this, now you receive that", you decide in a stage of the computation: "what are all the messages that will be communicated here". Then you do an Isend for all the sends, and Irecv for all the corresponding receives. And then wait for all the resulting requests.
One problem is that each of these Isend/Irecv operations need their own buffer, so you may need to allocate some more memory.
I am new to programing with MPI and I have an exercise where I have to multiply 2 matrices using MPI_Send and MPI_Recv while sending both matrices to my processes and sending back the result to the root process. (both matrices are square and N is equal to the number of processes).
This is the code I have written:
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char *argv[]){
srand(time(NULL));
int rank, nproc;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
int **matrice = (int **)malloc(nproc * sizeof(int *));
for ( int i=0; i<nproc; i++)
matrice[i] = (int *)malloc(nproc * sizeof(int));
int **matrice1 = (int **)malloc(nproc * sizeof(int *));
for (int i=0; i<nproc; i++)
matrice1[i] = (int *)malloc(nproc * sizeof(int));
int **result = (int **)malloc(nproc * sizeof(int *));
for (int i=0; i<nproc; i++)
result[i] = (int *)malloc(nproc * sizeof(int));
if(rank == 0){
for(int i = 0; i < nproc; i++){
for(int j = 0; j < nproc; j++){
matrice[i][j] = (rand() % 20) + 1;
matrice1[i][j] = (rand() % 20) + 1;
}
}
for(int i = 1; i < nproc; i++){
MPI_Send(&(matrice[0][0]), nproc*nproc, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&(matrice1[0][0]), nproc*nproc, MPI_INT, i, 2, MPI_COMM_WORLD);
}
}else{
MPI_Recv(&(matrice[0][0]), nproc*nproc, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&(matrice1[0][0]), nproc*nproc, MPI_INT, 0, 2, MPI_COMM_WORLD, &status);
}
for(int i = 0; i < nproc; i++){
result[i][j] = 0;
for(int j = 0; j < nproc; j++){
result[rank][i] += matrice[rank][j] * matrice1[j][i];
}
}
if(rank != 0){
MPI_Send(&result[rank][0], nproc, MPI_INT, 0, 'p', MPI_COMM_WORLD);
}
if(rank == 0){
for(int i = 1; i < nproc; i++){
MPI_Recv(&result[i][0], nproc, MPI_INT, i, 'p', MPI_COMM_WORLD, &status);
}
}
MPI_Finalize();
}
I am having problems with MPI_Send or MPI_Recv because only the first row of the matrice I receive is correct, the second row is filled with 0 and the others are random.
I don't understand what is causing this problem.
I am having problems with MPI_Send or MPI_Recv because only the first
row of the matrice I receive is correct, the second row is filled with
0 and the others are random.
You are calling the MPI_Send as follows:
MPI_Send(&(matrice[0][0]), nproc*nproc, MPI_INT, i, 1, MPI_COMM_WORLD);
so telling MPI that you will be sending nproc*nproc elements starting from the position &(matrice[0][0]). MPI_Send expects that those nproc*nproc elements are continuously allocated in memory. Therefore, your matrices should be allocated continuously in memory. You can think of the memory layout of such matrices as :
| ------------ data used in the MPI_Send -----------|
| row1 row2 ... rowN |
|[0, 1, 2, 3, N][0, 1, 2, 3, N] ... [0, 1, 2, 3, N]|
\---------------------------------------------------/
From the last element of one row to the first element of the next row there is no gap.
Unfortunately, you have allocated your matrix as:
int **matrice = (int **)malloc(nproc * sizeof(int *));
for ( int i=0; i<nproc; i++)
matrice[i] = (int *)malloc(nproc * sizeof(int));
which does not allocate a matrix continuously in memory, but rather allocates an array of pointers which are not force to be continuously in memory. You can think of that matrix as having the following memory layout:
| ------------ data used in the MPI_Send ----------|
| row1 [0, 1, 2, 3, N] ... (some "random" stuff) |
\--------------------------------------------------/
row2 [0, 1, 2, 3, N] ... (some "random" stuff)
...
rowN [0, 1, 2, 3, N] ... (some "random" stuff)
From the last element of one row to the first element of the next row there might be a memory gap. Consequently, making it impossible for the MPI_Send to know where the next rows starts. That is why you can receive the first row, but not the remaining rows.
Among others you can use the following approaches to solve that issue
allocated the matrix continuously in memory;
send the matrix row by row.
The simplest (and performance-wise better) solution would be for you to use the first approach; check this SO Thread to see how to dynamically allocate a contiguous block of memory for a 2D array.
Im trying to find a spesific value inside an array. Im trying to find it with parallel searching by mpi. When my code finds the value, it shows an error.
ERROR
Assertion failed in file src/mpid/ch3/src/ch3u_buffer.c at line 77: FALSE
memcpy argument memory ranges overlap, dst_=0x7ffece7eb590 src_=0x7ffece7eb590 len_=4
PROGRAM
const char *FILENAME = "input.txt";
const size_t ARRAY_SIZE = 640;
int main(int argc, char **argv)
{
int *array = malloc(sizeof(int) * ARRAY_SIZE);
int rank,size;
MPI_Status status;
MPI_Request request;
int done,myfound,inrange,nvalues;
int i,j,dummy;
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
myfound=0;
if (rank == 0)
{
createFile();
array = readFile(FILENAME);
}
MPI_Bcast(array, ARRAY_SIZE, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Irecv(&dummy, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &request);
MPI_Test(&request, &done, &status);
nvalues = ARRAY_SIZE / size; //EACH PROCESS RUNS THAT MUCH NUMBER IN ARRAY
i = rank * nvalues; //OFFSET FOR EACH PROCESS INSIDE THE ARRAY
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues); //LIMIT OF THE OFFSET
while (!done && inrange)
{
if (array[i] == 17)
{
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
}
printf("P:%d - %d - %d\n", rank, i, array[i]);
MPI_Test(&request, &done, &status);
++i;
inrange = (i <= ((rank + 1) * nvalues - 1) && i >= rank * nvalues);
}
if (!myfound)
{
printf("P:%d stopped at global index %d\n", rank, i - 1);
}
MPI_Finalize();
}
Error is somewhere in here because when i put an invalid number for example -5 into if condition, program runs smoothly.
dummy = 1;
for (j = 0; j < size; j++)
{
MPI_Send(&dummy, 1, MPI_INT, j, 1, MPI_COMM_WORLD);
}
printf("P:%d found it at global index %d\n", rank, i);
myfound = 1;
Thanks
Your program is invalid with respect to the MPI standard because you use the same buffer (&dummy) for both MPI_Irecv() and MPI_Send().
You can either use two distinct buffers (e.g. dummy_send and dummy_recv), or since you do not seem to care about the value of dummy, then use NULL as buffer and send/receive zero size messages.
I write code that do multiplying a Vector by a Matrix. I use the MPI. The matrix is distributed of chunks which consist of rows. The chunks size not always be equal maybe. The chunks are worked correct, but when i try to run this i get half empty vector. However, I was expected to receive a full vector. Please look at the attached code. I think the problem is in the MPI_Gatherv function.
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#define COLUMN 4
#define ROW 7
#define dp 100.0f
// Local start
#define chunk_low(commrank, commsize, nvert) \
((commrank) * (nvert) / (commsize))
// Local end
#define chunk_height(commrank, commsize, nvert) \
(chunk_low((commrank) + 1, commsize, nvert) - 1)
// Local size
#define chunk_size(commrank, commsize, nvert) \
(chunk_height(commrank, commsize, nvert) - \
chunk_low(commrank, commsize, nvert) + 1)
// Matrix initialization function
void init_matrix(int column, int row, float *matrix)
{
int j, i;
printf("\nMatrix\n");
for(i=0; i < row; i++){
for(j=0; j < column; j++){
matrix[i*column+j] = i * column + j; // (float)rand()/RAND_MAX * dp *2.0f - dp;
printf(" %f ", matrix[i * column + j]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char **argv)
{
int rank, size;
int i, j;
float *vm, *local_matrix, *result, *vector;
double time1, time2;
int *displs, *rcounts, *scounts;
vm = (float *)calloc(ROW * COLUMN, sizeof(float));
vector = malloc(COLUMN * sizeof(float));
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Process 0 - master */
if (rank==0)
{
printf("\nNumbers of proccesses %d. \nElements in vector %d.\n", size, COLUMN);
/* Init vector vA */
init_matrix(COLUMN, ROW, vm);
for (i = 0; i < COLUMN; i++) {
vector[i] = (11 * 5) + (11 * i);
}
result = (float *)calloc(ROW, sizeof(float));
//Time begining calculating of programm
time1=MPI_Wtime();
}
/* End of work process 0 */
displs = (int *)malloc(sizeof(int) * size);
scounts = (int *)malloc(sizeof(int) * size);
rcounts = (int *)malloc(sizeof(int) * size);
for (i = 0; i < size; i++) {
displs[i] = chunk_low(i, size, ROW) * COLUMN; // Position initialization
rcounts[i] = scounts[i] = chunk_size(i, size, ROW) * COLUMN;
}
local_matrix = (float *)calloc(chunk_size(rank, size, ROW) * COLUMN, sizeof(float));
MPI_Bcast(vector, COLUMN, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatterv(vm, scounts, displs, MPI_FLOAT, local_matrix,
rcounts[rank], MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
int local_row = scounts[rank] / COLUMN;
float *local_result = (float *)calloc(local_row, sizeof(float));;
for(i = 0; i < local_row; i++) {
for (j = 0; j < COLUMN; j++) {
local_result[i] += local_matrix[i * COLUMN + j] * vector[j];
}
}
MPI_Gatherv(local_result, local_row, MPI_FLOAT, result, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
/* Only master-process */
if (rank==0)
{
//Time ending programm
time2=MPI_Wtime();
printf("\nTime parallel calculation = %f s.\n",time2-time1);
for (i = 0; i < ROW; i++)
{
printf(" %f\n", result[i]);
}
}
// End work of master-process
/* Delete storage arrays of process */
free(displs);
free(scounts);
free(rcounts);
free(local_matrix);
MPI_Finalize();
return 0;
}
After run this code i was expected:
484.000000
1628.000000
2772.000000
3916.000000
5060.000000
6204.000000
7348.000000
But get this result:
484.000000
1628.000000
0.000000
0.000000
0.000000
0.000000
0.000000
The problem was articulated in the displs, which we passed to MPI_Gatherv
I am trying to implement a MPI of the filter code below, but I'm facing difficulties doing it. How should it be done?:
Filter code:
int A[100000][100000];
int B[100000][100000];
for (int i=1; i<(100000 - 1); i++)
for (int i=1; j<(100000 - 1); j++)
B[i][j] = A[i-1][j] + A[i+1][j] + A[i][j-1] + A[i][j+1] - 4*A[i][j];
This is what I have tried while following the six functions of MPI:
int myrank; /* Rank of process */
int numprocs; /* Number of processes */
int source; /* Rank of sender */
int dest; /* Rank of receiver */
char message[100]; /* Storage for the message */
MPI_Status status; /* Return status for receive */
MPI_Init( & argc, & argv);
MPI_Comm_size(MPI_COMM_WORLD, & numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, & myrank);
if (myrank != 0)
{
dest = 0;
MPI_Send(message, strlen(message) + 1,
MPI_CHAR, dest, 15, MPI_COMM_WORLD);
} else {
for (source = 1; source < numprocs; source++) {
MPI_Recv(message, 100, MPI_CHAR, source,
15, MPI_COMM_WORLD, & status);
}
}
MPI_Finalize();
I'd go like this. First of all, I'd have this code
int A[100000][100000];
int B[100000][100000];
replaced with dynamic allocations. You don't need all that memory for each and every process.
Then, I'd send array A to different processes. By rows.
What is the "height" of data frame (number of rows):
delta = (100000 - 2) / (numprocs-1); // we don't count first and last row
reminder = (100000 - 2) % (numprocs-1); // it might be that we need to give
// little bit more to calculate
// to one of the processes
// we are starting from row with idx=1 (second row) and we want to finish when
// we hit last row
if(myrank == 0) {
for( int i=1; i < numprocs; i++ ) {
// +100000 - we need two more rows to calculate data
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
MPI_Send(&(A[(i-1)*delta][0]), how_many_bytes, MPI_INT, i, 0,
MPI_COMM_WORLD);
}
} else {
// allocate memory for bytes
int *local_array = NULL;
int how_many_bytes = delta * 100000 + 200000;
if(reminder != 0 && i == (numprocs-1)) {
how_many_bytes += reminder * 100000;
}
local_array = malloc(how_many_bytes * sizeof(int));
MPI_Status status;
MPI_Recv(
local_array,
how_many_bytes,
MPI_INT,
0,
0,
MPI_COMM_WORLD,
&status);
}
// perform calculations for each and every slice
// remembering that we always have on extra row on
// top and one at the bottom
// send data back to master (as above, but vice versa).