MPI_Gather segmentation fault

MPI_Gather segmentation fault - c

I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.
Can someone help?
Thanks.
Notes:
The program reads from a .txt file in the same directory called input.txt.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
/*void print2dAddresses(double** array2d, int rows, int cols)
{
int i;
for(i = 0; i < rows; i++)
{
int j;
for(j = 0; j < cols; j++)
{
printf("%d ", &(array2d[i][j]));
}
printf("\n");
}
printf("------------------------------------");
}*/
double** newMatrix(int rows, int cols)
{
double *data = (double*) malloc(rows * cols * sizeof(double));
double **array= (double **)malloc(rows * sizeof(double*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void freeMatrix(double** mat)
{
free(mat[0]);
free(mat);
}
double** new2dArray(int nrows, int ncols)
{
int i;
double** array2d;
array2d = (double**) malloc(nrows * sizeof(double*));
for(i = 0; i < nrows; i++)
{
array2d[i] = (double*) malloc(ncols * sizeof(double));
}
return array2d;
}
double* new1dArray(int size)
{
return (double*) malloc(size * sizeof(double));
}
void free2dArray(double** array2d, int nrows)
{
int i;
for(i = 0; i < nrows; i++)
{
free(array2d[i]);
}
free(array2d);
}
void print2dArray(double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
printf("%lf ", array2d[i][j]);
}
printf("\n");
}
printf("----------------------\n");
}
void print1dArray(double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%lf\n", array[i]);
}
printf("----------------------\n");
}
void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
fscanf(fp, "%lf", &(array2d[i][j]));
}
}
}
void read1dArray(FILE* fp, double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
fscanf(fp, "%lf", &(array[i]));
}
}
void readSymbols(char* symbols, int size, FILE* fp)
{
int i;
for(i = 0; i < size; i++)
{
char c = '\n';
while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
fscanf(fp, "%c", &c);
symbols[i] = c;
}
}
void printSolution(char* symbols, double* x, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%c = %lf\n", symbols[i], x[i]);
}
}
double* copy_1d_array(double* original, int size)
{
double* copy_version;
int i;
copy_version = (double*) malloc(size * sizeof(double));
for(i = 0; i < size; i++)
{
copy_version[i] = original[i];
}
return copy_version;
}
int main(int argc, char** argv)
{
int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
char *symbols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0)
{
FILE* fp;
fp = fopen("input.txt", "r");
fscanf(fp, "%d", &msize);
A = newMatrix(msize, msize);
b = new1dArray(msize);
x = new1dArray(msize);
symbols = (char*) malloc(msize * sizeof(char));
read2dArray(fp, A, msize, msize);
read1dArray(fp, b, msize);
readSymbols(symbols, msize, fp);
fclose(fp);
/*print2dArray(A, msize, msize);
print1dArray(b, msize);*/
}
MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < (msize - 1); i++)
{
int maxIndex;
double maxCoef, tmp, r;
/*finding max row*/
if(rank == 0)
{
maxIndex = i;
maxCoef = fabs(A[i][i]);
for(j = i + 1; j < msize; j++)
{
if(fabs(A[j][i]) > maxCoef)
{
maxCoef = A[j][i];
maxIndex = j;
}
}
/*swapping the current row with the max row*/
for(j = 0; j < msize; j++)
{
tmp = A[i][j];
A[i][j] = A[maxIndex][j];
A[maxIndex][j] = tmp;
}
tmp = b[i];
b[i] = b[maxIndex];
b[maxIndex] = tmp;
/*elimination*/
/*for(j = i + 1; j < msize; j++)
{
double r = A[j][i] / A[i][i];
subtracting r * row i from row j
for(k = i; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}*/
/*parallel elimination*/
startingRow = i + 1;
neededProcesses = p;
remainingRows = msize - startingRow;
if(remainingRows < neededProcesses)
{
neededProcesses = remainingRows;
}
rowsPerProcess = remainingRows / neededProcesses;
remainder = remainingRows % neededProcesses;
}
MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0)
{
currentRow = copy_1d_array(A[startingRow-1], msize);
currentB = b[startingRow-1];
}
else
{
currentRow = new1dArray(msize);
}
MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&currentB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
receivedB = new1dArray(remainingRows);
}
smallA = newMatrix(rowsPerProcess, msize);
smallB = new1dArray(rowsPerProcess);
MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(j = 0; j < rowsPerProcess; j++)
{
r = smallA[j][startingRow-1] / currentRow[startingRow-1];
for(k = 0; k < msize; k++)
{
smallA[j][k] -= r * currentRow[k];
}
smallB[j] -= r * currentB;
}
MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
freeMatrix(smallA);
free(smallB);
if(rank == 0)
{
for(j = 0; j < remainingRows; j++)
{
for(k = 0; k < msize; k++)
{
A[j+startingRow][k] = receivedA[j][k];
}
b[j+startingRow] = receivedB[j];
}
free(currentRow);
freeMatrix(receivedA);
free(receivedB);
}
if(rank == 0)
{
if(remainder > 0)
{
for(j = (msize - remainder); j < msize; j++)
{
r = A[j][i] / A[i][i];
for(k = 0; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}
}
}
}
if(rank == 0)
{
/*backward substitution*/
for(i = msize - 1; i >= 0; i--)
{
x[i] = b[i];
for(j = msize - 1; j > i; j--)
{
x[i] -= A[i][j] * x[j];
}
x[i] /= A[i][i];
}
printf("solution = \n");
//print1dArray(x, msize);
printSolution(symbols, x, msize);
freeMatrix(A);
free(b);
free(x);
free(symbols);
}
MPI_Finalize();
return 0;
}
Input File:
3
1 1 1
1 1 3
2 1 4
4
9
12
x
y
z

It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
recievedAHead = &(receivedA[0][0]);
receivedB = new1dArray(remainingRows);
}
else {
recievedAHead = NULL;
}
and use recievedAHead in the MPI_Gather call.

Related

matrix multiply with mpi

I have a problem with the result of my m1 function when I check that some of the array between rank 0 and the last rank is empty and unfortunately none of the workarounds help to solve this problem.
Can anyone help me with this?
Where is the problem in this code?
And how can it be solved?
This is the code:
this is the code:
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#define N 1000
#define M 1000 / 2
int A[N][N], B[N][N], C[N][N];
int m1[M][M], m2[M][M], m3[M][M], m4[M][M], m5[M][M], m6[M][M], m7[M][M];
int A11[M][M], A12[M][M], A21[M][M], A22[M][M], B11[M][M], B12[M][M], B21[M][M], B22[M][M];
int C11[M][M], C12[M][M], C21[M][M], C22[M][M];
int rank, size, start_row, end_row;
void multiplym1(int mySize, int AA[M][M], int BB[M][M], int CC[M][M], int DD[M][M], int resfinal[M][M], int mystart_row, int myend_row)
{
int result1[mySize][mySize], result2[mySize][mySize];
for (int i = mystart_row; i < myend_row; i++)
{
for (int j = 0; j < mySize; j++)
{
result1[i][j] = AA[i][j] + BB[i][j];
result2[i][j] = CC[i][j] + DD[i][j];
}
}
for (int i = mystart_row; i < myend_row; i++)
{
for (int j = 0; j < mySize; j++)
{
resfinal[i][j] = 0;
for (int k = 0; k < mySize; k++)
{
resfinal[i][j] += (result1[i][k] * result2[k][j]);
}
}
}
}
int main(int argc, char const *argv[])
{
srand(time(NULL));
printf("\n------------------------* Initializing matrices *----------------------\n");
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
start_row = rank * (N / size);
if (rank + 1 == size)
{
end_row = N;
}
else
{
end_row = (rank + 1) * (N / size);
}
// printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
for (int i = start_row; i < end_row; i++)
{
for (int j = 0; j < N; j++)
{
A[i][j] = rand() % 50;
B[i][j] = rand() % 20;
C[i][j] = 0;
// printf("#%d: A[%d][%d] = %d\n", rank, i, j, A[i][j]);
}
}
// printf("#%d: Done\n\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
// printf("#%d: start \n\n", rank);
start_row = 0;
end_row = 0;
start_row = rank * (M / size);
if (rank + 1 == size)
{
end_row = M;
}
else
{
end_row = (rank + 1) * (M / size);
}
// printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
for (int i = start_row; i < end_row; i++)
{
for (int j = 0; j < M; j++)
{
A11[i][j] = A[i][j];
A12[i][j] = A[i][j + M];
A21[i][j] = A[i + M][j];
A22[i][j] = A[i + M][j + M];
B11[i][j] = B[i][j];
B12[i][j] = B[i][j + M];
B21[i][j] = B[i + M][j];
B22[i][j] = B[i + M][j + M];
}
}
// printf("#%d: Done\n\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
// printf("#%d: start For M1\n\n", rank);
start_row = 0;
end_row = 0;
start_row = rank * (M / size);
if (rank + 1 == size)
{
end_row = M;
}
else
{
end_row = (rank + 1) * (M / size);
}
printf("#%d: start is %d and end is %d\n", rank, start_row, end_row);
multiplym1(M, A11, A22, B11, B22, m1, start_row, end_row);
MPI_Barrier(MPI_COMM_WORLD);
int *counts = malloc(size * sizeof(int));
int *displs = malloc(size * sizeof(int));
for (int i = 0; i < size; i++)
{
counts[i] = (M / size) * M;
displs[i] = i * (M / size) * M;
}
counts[size - 1] = ((M / size) + (M % size)) * M;
MPI_Gatherv(&m1[start_row][0], counts[rank], MPI_INT, m1, counts, displs, MPI_INT, 0, MPI_COMM_WORLD);
printf("#%d: M1 DONE!!\n", rank);
if (rank == 0)
{
for (int i = 0; i < M; i += 49)
{
for (int j = 0; j < M; j += 100)
{
printf("#%d: m1[%d][%d] = %d\n", rank, i, j, m1[i][j]);
}
}
}
MPI_Finalize();
return 0;
}```
`

Why does my C program that is supposed to output a matrix to the power of n output my matrix to the power of 2^n?

My code is supposed to take in a matrix M and raise it to the power of an integer A. However, somehow, my output is always M^(2^A). For example, if I want to find a matrix in its 3rd power, I will instead receive its 8th power.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void multiply(int ** p, int pwr, int dim, int ** prod) {
int m, i, j, k;
/*if (n<pwr){*/
int pos = 0;
for (m = 0; m < pwr; m++) {
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
for (k = 0; k < dim; k++) {
pos += p[i][k] * p[k][j];
}
prod[i][j] = pos;
pos = 0;
}
}
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
p[i][j] = prod[i][j];
prod[i][j] = 0;
}
}
}
/*n=n+1;
multiply(prod, q, pwr, dim, prod);
}*/
}
int main(int argc, char * argv[]) {
FILE * fp = fopen(argv[1], "r");
int dim, pwr, i, j;
fscanf(fp, "%d", & dim);
int ** matrix;
matrix = (int ** ) malloc(dim * sizeof(int * ));
for (i = 0; i < dim; i++) {
matrix[i] = (int * ) malloc(dim * sizeof(int));
}
int ** prod;
prod = (int ** ) malloc(dim * sizeof(int * ));
for (i = 0; i < dim; i++) {
prod[i] = (int * ) malloc(dim * sizeof(int));
}
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
fscanf(fp, "%d", & matrix[i][j]);
}
}
fscanf(fp, "%d", & pwr);
if (pwr == 1) {
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
} else if (pwr >= 2) {
multiply(matrix, pwr, dim, prod);
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
}
return 0;
}

You are multiplying your matrix by itself and then store the result in the original one. Then you do it again.
So perfectly normal that it gets powered 8 times. What you need is another temporary matrix on which you store the result and keep the original matrix to multiply your result with.

pointer was nullptr error in C programming

I have a serious problame.
at the line **c = (int*)malloc(size1 * sizeof(int*));
the compiler gives me this error which I don't really know what it says.
Unhandled exception thrown: read access violation.
c was nullptr. occurred
I don't know what I'm doing wrong..
I initialize every pointer like this.
void BuildMatrix(int ***, int, int);
void FreeMatrix(int ***, int);
void PrintMatrix(int **, int, int);
int **MultiplyMatrixes(int **, int**, int, int, int);
int main() {
int **matrix1 = NULL, **matrix2 = NULL, **matrix3 = NULL;
int * newCol = NULL;
int size1, size2, size3, newRow;
printf("-How many rows in the first matrix?: ");
scanf("%d", &size1);
printf("-How many columns in the first matrix and second?[size2, size3]: ");
scanf("%d %d", &size2, &size3); /*size2 = rows of matrix2.*/
/*Build both matrices*/
printf("-First matrix input.\n");
BuildMatrix(&matrix1, size1, size2);
PrintMatrix(matrix1, size1, size2);
printf("-Second matrix input.\n");
BuildMatrix(&matrix2, size2, size3);
PrintMatrix(matrix2, size2, size3);
/*Combine the 2 matrices to a new matrix*/
matrix3 = MultiplyMatrixes(matrix1, matrix2, size1, size2, size3);
FreeMatrix(&matrix2, size2); //Free the second matrix
printf("\n-Multiplied matrix: \n");
PrintMatrix(matrix3, size1, size3);
FreeMatrix(&matrix3, size1);
FreeMatrix(&matrix1, size1);
}
void BuildMatrix(int *** pMat, int row, int col) {
int i, j;
(*pMat) = (int **)malloc(row * sizeof(int*));
if (*pMat == NULL) {
free(pMat);
printf("*Not enough RAM.\nTerminating.\n");
exit(1);
}
for (i = 0; i < row; i++) {
(*pMat)[i] = (int *)malloc(col * sizeof(int*));
if ((*pMat)[i] == NULL) {
printf("*Not enough RAM.\nTerminating.\n");
FreeMatrix(pMat, row);
exit(1);
}
for (j = 0; j < col; j++) {
printf("-Enter %d element in %d row: ", j + 1, i + 1);
scanf("%d", &(*pMat)[i][j]);
}
printf("\n");
}
//FreeMatrix(pMat, row);
}
void PrintMatrix(int ** pMat, int row, int col) {
for (int i = 0; i < row; ++i) {
for (int j = 0; j < col; ++j) {
printf("%d ", pMat[i][j]);
}
printf("\n");
}
}
int** MultiplyMatrixes(int ** a, int ** b, int size1, int size2, int size3) {
int i, j, k, **c = NULL;
**c = (int*)malloc(size1 * sizeof(int*));
if (c == NULL) {
free(*c);
printf("*Not enough RAM.\nTerminating.\n");
exit(1);
}
for (i = 0; i < size1; i++) {
for (j = 0; j < size3; j++) {
c[i] = (int *)malloc(size3 * sizeof(int));
if (c[i] == NULL) {
printf("*Not enough RAM.\nTerminating.\n");
FreeMatrix(&c, size1);
exit(1);
}
for (k = 0; k < size2; k++) {
c[i][j] += (a[i][k] * b[k][j]);
}
}
}
return c;
}

(*pMat)[i] = (int *)malloc(col * sizeof(int*));
will be
(*pMat)[i] = malloc(col * sizeof(int));
You have allocated space for col number of int* where you are reading int-s.
Also
**c = (int*)malloc(size1 * sizeof(int*));
will be
c = malloc(size1 * sizeof(int*));
Otherwise you were trying to dereference NULL value which triggered the error you got.
Also the loop will be
for (i = 0; i < size1; i++) {
c[i] = malloc(size3 * sizeof(int));
if (c[i] == NULL) {
printf("*Not enough RAM.\nTerminating.\n");
FreeMatrix(&c, size1);
exit(1);
}
for (j = 0; j < size3; j++) {
c[i][j]=0;
for (k = 0; k < size2; k++) {
c[i][j] += (a[i][k] * b[k][j]);
}
}
}
Don't cast the return value of malloc.

C - I Think i'm Doing a realloc wrong

I'm doing some exercise in preparation for my test and in one of those I have to remove duplicated int values from an array:
int *eliminaDup(int *vect, int dim, int *dim_nodup){
int i = 0, newDim = 0, found = 0, j = 0;
int *tmpArr = malloc(dim * sizeof(int));
for(i = 0; i < dim; i++){
j = 0; found = 0;
while(j < newDim && !found){
if(vect[i] == tmpArr[j])
found = 1;
j++;
}
if(!found){
tmpArr[newDim] = vect[i];
newDim++;
}
}
*dim_nodup = newDim;
return (realloc(tmpArr, newDim * sizeof(int)));
}
And in the main method is called this way:
nodup=eliminaDup(input,dim,&dim_nodup);
printf("Print of the new Array: (%d values)\n", dim_nodup);
for (i=0; i<dim_nodup; i++){
printf("%d\n",nodup[i]);
}
But when I try to execute the code, This happens:
ARRAY GIVEN IN INPUT:
[1;2]
OUTPUT:
1
2
OUTPUT EXPECTED:
1
2
...other output from the code...
and as you can see from the screen, the code should go on and print other stuff.
I made some tries and i saw that the code "lock" exactly after the print, but it never came out from the for.
...Why? I'm banging my head on the keyboard.
EDIT: Complete program
#include <stdio.h>
#include <stdlib.h>
int *leggiInput(int *dim);
int *eliminaDup(int *vect, int dim, int *dim_nodup);
int ugualeASomma(int *vect,int dim);
int *maggioreDeiSuccessivi(int *vect, int dim);
int main()
{
int *input, *nodup, *results;
int dim, dim_nodup, i;
//Legge l'input
input=leggiInput(&dim);
printf("Stampa dei valori in input: (%d valori)\n", dim);
for (i=0; i<dim; i++)
printf("%d\n",input[i]);
//Elimina i duplicati
nodup=eliminaDup(input,dim,&dim_nodup);
printf("Stampa dei valori senza duplicati: (%d valori)\n", dim_nodup);
for (i=0; i<dim_nodup; i++){
printf("%d\n",nodup[i]);
}
//Esegue ugualeASomma
printf("Risultato di ugualeASomma: %d\n", ugualeASomma(nodup,dim_nodup));
//Esegue maggioreDeiSuccessivi
results=maggioreDeiSuccessivi(nodup,dim_nodup);
printf("Risultato maggioreDeiSuccessivi:\n");
for(i=0; i<dim_nodup; i++)
printf("%d\n",results[i]);
return 0;
}
int *leggiInput(int *dim){
int n, i;
scanf("%d", &n);
int *arr = malloc(n * sizeof(int));
for(i = 0; i < n; i++){
scanf("%d", &arr[i]);
}
*dim = n;
return arr;
}
int *eliminaDup(int *vect, int dim, int *dim_nodup){
int i = 0, newDim = 0, trovato = 0, j = 0;
int *tmpArr = malloc(dim * sizeof(int));
while(i < dim){
j = 0; trovato = 0;
while(j < newDim && !trovato){
if(vect[i] == tmpArr[j])
trovato = 1;
j++;
}
if(!trovato){
tmpArr[newDim] = vect[i];
newDim++;
}
i++;
}
*dim_nodup = newDim;
return (realloc(tmpArr, newDim * sizeof(int)));
}
int ugualeASomma(int *vect, int dim){
int somma = 0, i = 0, j, trovato = 0;
while(i < dim)
somma += vect[i];
while(i < dim){
if(vect[i] == somma - vect[i])
trovato = 1;
}
return trovato;
}
int *maggioreDeiSuccessivi(int *vect, int dim){
int i = 0, j, trovato;
while(i < dim){
j = i+1; trovato = 0;
while(j < dim && !trovato){
if(vect[i] <= vect[j])
trovato = 1;
else
j++;
}
if(trovato) vect[i] = 0;
else vect[i] = 1;
i++;
}
return vect;
}
EDIT: Solved in comments changing malloc to calloc.

When realloc fails it returns NULL so you should check this and return tmpArr instead:
int* p = realloc(tmpArr, newDim * sizeof(int));
return p != NULL ? p : tmpArr;
it is good practice to initialize all declared variables, even if they will be initialized later. You may later forget about it and assume it is initialized as the function grows.
You have an infinite loop here
int ugualeASomma(int *vect, int dim){
int somma = 0, i = 0, j, trovato = 0;
while(i < dim)
somma += vect[i];
while(i < dim){
if(vect[i] == somma - vect[i])
trovato = 1;
}
return trovato;
}
i is never incremented

I would say Anders Karlsson has a good point. This works fine for me:
int ugualeASomma(int *vect, int dim){
int somma = 0, i = 0, j, trovato = 0;
while(i < dim)
{
somma += vect[i];
if(vect[i] == somma - vect[i])
trovato = 1;
i++;
}
return trovato;
}

Memory management for Strassen's matrix multiplication

As a part of an assignment, I am trying to find out the crossover point for Strassen's matrix multiplication and naive multiplication algorithms. But for the same, I am unable to proceed when matrix becomes 256x256. Can someone please suggest me the appropriate memory management technique to be able to handle larger inputs.
The code is in C as follows:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);
int idx = 0;
int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;
int total = 15;
double tnaive[total];
double tstrassen[total];
printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
double X[N][N], Y[N][N], Z[N][N], W[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");
while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;
printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);
printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
double A[N][N], B[N][N], C[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i][j] = rand()/(RAND_MAX + 1.);
B[i][j] = rand()/(RAND_MAX + 1.);
}
}
printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i][j]);
printf("\n");
}
printf("\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i][j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");
strassenMul((double *)A, (double *)B, (double *)C, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i][j]);
printf("\n");
}
return(0);
}
void strassenMul(double *X, double *Y, double *Z, int m)
{
if (m <= idx)
{
matMul((double *)X, (double *)Y, (double *)Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double x11[n][n], x12[n][n], x21[n][n], x22[n][n];
double y11[n][n], y12[n][n], y21[n][n], y22[n][n];
double P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
double C11[n][n], C12[n][n], C21[n][n], C22[n][n];
double S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n];
double S8[n][n], S9[n][n], S10[n][n], S11[n][n], S12[n][n], S13[n][n], S14[n][n];
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x11[i][j] = *((X+row*m)+col);
y11[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x12[i][j] = *((X+row*m)+col);
y12[i][j] = *((Y+row*m)+col);
}
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x21[i][j] = *((X+row*m)+col);
y21[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x22[i][j] = *((X+row*m)+col);
y22[i][j] = *((Y+row*m)+col);
}
}
// Calculating P1
matAdd((double *)x11, (double *)x22, (double *)S1, n);
matAdd((double *)y11, (double *)y22, (double *)S2, n);
strassenMul((double *)S1, (double *)S2, (double *)P1, n);
// Calculating P2
matAdd((double *)x21, (double *)x22, (double *)S3, n);
strassenMul((double *)S3, (double *)y11, (double *)P2, n);
// Calculating P3
matSub((double *)y12, (double *)y22, (double *)S4, n);
strassenMul((double *)x11, (double *)S4, (double *)P3, n);
// Calculating P4
matSub((double *)y21, (double *)y11, (double *)S5, n);
strassenMul((double *)x22, (double *)S5, (double *)P4, n);
// Calculating P5
matAdd((double *)x11, (double *)x12, (double *)S6, n);
strassenMul((double *)S6, (double *)y22, (double *)P5, n);
// Calculating P6
matSub((double *)x21, (double *)x11, (double *)S7, n);
matAdd((double *)y11, (double *)y12, (double *)S8, n);
strassenMul((double *)S7, (double *)S8, (double *)P6, n);
// Calculating P7
matSub((double *)x12, (double *)x22, (double *)S9, n);
matAdd((double *)y21, (double *)y22, (double *)S10, n);
strassenMul((double *)S9, (double *)S10, (double *)P7, n);
// Calculating C11
matAdd((double *)P1, (double *)P4, (double *)S11, n);
matSub((double *)S11, (double *)P5, (double *)S12, n);
matAdd((double *)S12, (double *)P7, (double *)C11, n);
// Calculating C12
matAdd((double *)P3, (double *)P5, (double *)C12, n);
// Calculating C21
matAdd((double *)P2, (double *)P4, (double *)C21, n);
// Calculating C22
matAdd((double *)P1, (double *)P3, (double *)S13, n);
matSub((double *)S13, (double *)P2, (double *)S14, n);
matAdd((double *)S14, (double *)P6, (double *)C22, n);
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C11[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C12[i][j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C21[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C22[i][j];
}
}
void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += *((A+i*n)+k) * *((B+k*n)+j);
}
*((C+i*n)+j) = sum;
}
}
}
void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) + *((B+row*m)+col);
}
void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) - *((B+row*m)+col);
}
Added later If I try using malloc statements for memory assignment, the code is as follows. But the problem is that it stops after the naive matrix multiplication method and does not even proceed to the Strassen's method for N=1. It shows a prompt to close the program.
for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
//double X[N][N], Y[N][N], Z[N][N], W[N][N];
double **X, **Y, **Z, **W;
X = malloc(N * sizeof(double*));
if (X == NULL){
perror("Failed malloc() in X");
return 1;
}
Y = malloc(N * sizeof(double*));
if (Y == NULL){
perror("Failed malloc() in Y");
return 1;
}
Z = malloc(N * sizeof(double*));
if (Z == NULL){
perror("Failed malloc() in Z");
return 1;
}
W = malloc(N * sizeof(double*));
if (W == NULL){
perror("Failed malloc() in W");
return 1;
}
for (j = 0; j < N; j++)
{
X[j] = malloc(N * sizeof(double*));
if (X[j] == NULL){
perror("Failed malloc() in X[j]");
return 1;
}
Y[j] = malloc(N * sizeof(double*));
if (Y[j] == NULL){
perror("Failed malloc() in Y[j]");
return 1;
}
Z[j] = malloc(N * sizeof(double*));
if (Z[j] == NULL){
perror("Failed malloc() in Z[j]");
return 1;
}
W[j] = malloc(N * sizeof(double*));
if (W[j] == NULL){
perror("Failed malloc() in W[j]");
return 1;
}
}
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
for (j = 0; j < N; j++)
{
free(X[j]);
free(Y[j]);
free(Z[j]);
free(W[j]);
}
free(X); free(Y); free(Z); free(W);
printf("strassen = %5.4f\n",tstrassen[count]);
}

I have re-written the answer. My previous answer which allocated memory row by row won't work, because OP has cast the 2-D arrays to 1-D arrays when passed to the functions. Here is my re-write of the code with some simplifications, such as keeping all the matrix arrays 1-dimensional.
I am unsure exactly what Strassen's method does, although the recursion halves the matrix dimensions. So I do wonder if the intention was to use row*2 and col*2 when accessing the arrays passed.
I hope the techniques are useful to you - even that it works! All the matrix arrays are now on the heap.
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#define total 4 //15
void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);
enum array { x11, x12, x21, x22, y11, y12, y21, y22,
P1, P2, P3, P4, P5, P6, P7, C11, C12, C21, C22,
S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, arrs };
int idx = 0;
int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;
double tnaive[total];
double tstrassen[total];
double *X, *Y, *Z, *W, *A, *B, *C;
printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = (int)pow(2, count);
printf("Matrix size = %2d\t",N);
X = malloc(N*N*sizeof(double));
Y = malloc(N*N*sizeof(double));
Z = malloc(N*N*sizeof(double));
W = malloc(N*N*sizeof(double));
if (X==NULL || Y==NULL || Z==NULL || W==NULL) {
printf("Out of memory (1)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
X[i] = rand()/(RAND_MAX + 1.);
Y[i] = rand()/(RAND_MAX + 1.);
}
start = clock();
matMul(X, Y, W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul(X, Y, Z, N);
free(W);
free(Z);
free(Y);
free(X);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");
while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;
printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);
printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
A = malloc(N*N*sizeof(double));
B = malloc(N*N*sizeof(double));
C = malloc(N*N*sizeof(double));
if (A==NULL || B==NULL || C==NULL) {
printf("Out of memory (2)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
A[i] = rand()/(RAND_MAX + 1.);
B[i] = rand()/(RAND_MAX + 1.);
}
printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i*N+j]);
printf("\n");
}
printf("\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i*N+j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");
strassenMul(A, B, C, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i*N+j]);
printf("\n");
}
free(C);
free(B);
free(A);
return(0);
}
void strassenMul(double *X, double *Y, double *Z, int m)
{
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double *arr[arrs]; // each matrix mem ptr
if (m <= idx)
{
matMul(X, Y, Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}
for (i=0; i<arrs; i++) { // memory for arrays
arr[i] = malloc(n*n*sizeof(double));
if (arr[i] == NULL) {
printf("Out of memory (1)\n");
exit (1); // brutal
}
}
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x11][i*n+j] = X[row*m+col];
arr[y11][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x12][i*n+j] = X[row*m+col];
arr[y12][i*n+j] = Y[row*m+col];
}
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x21][i*n+j] = X[row*m+col];
arr[y21][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x22][i*n+j] = X[row*m+col];
arr[y22][i*n+j] = Y[row*m+col];
}
}
// Calculating P1
matAdd(arr[x11], arr[x22], arr[S1], n);
matAdd(arr[y11], arr[y22], arr[S2], n);
strassenMul(arr[S1], arr[S2], arr[P1], n);
// Calculating P2
matAdd(arr[x21], arr[x22], arr[S3], n);
strassenMul(arr[S3], arr[y11], arr[P2], n);
// Calculating P3
matSub(arr[y12], arr[y22], arr[S4], n);
strassenMul(arr[x11], arr[S4], arr[P3], n);
// Calculating P4
matSub(arr[y21], arr[y11], arr[S5], n);
strassenMul(arr[x22], arr[S5], arr[P4], n);
// Calculating P5
matAdd(arr[x11], arr[x12], arr[S6], n);
strassenMul(arr[S6], arr[y22], arr[P5], n);
// Calculating P6
matSub(arr[x21], arr[x11], arr[S7], n);
matAdd(arr[y11], arr[y12], arr[S8], n);
strassenMul(arr[S7], arr[S8], arr[P6], n);
// Calculating P7
matSub(arr[x12], arr[x22], arr[S9], n);
matAdd(arr[y21], arr[y22], arr[S10], n);
strassenMul(arr[S9], arr[S10], arr[P7], n);
// Calculating C11
matAdd(arr[P1], arr[P4], arr[S11], n);
matSub(arr[S11], arr[P5], arr[S12], n);
matAdd(arr[S12], arr[P7], arr[C11], n);
// Calculating C12
matAdd(arr[P3], arr[P5], arr[C12], n);
// Calculating C21
matAdd(arr[P2], arr[P4], arr[C21], n);
// Calculating C22
matAdd(arr[P1], arr[P3], arr[S13], n);
matSub(arr[S13], arr[P2], arr[S14], n);
matAdd(arr[S14], arr[P6], arr[C22], n);
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C11][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C12][i*n+j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C21][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C22][i*n+j];
}
for (i=0; i<arrs; i++)
free (arr[i]);
}
void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += A[i*n+k] * B[k*n+j];
}
C[i*n+j] = sum;
}
}
}
void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] + B[row*m+col];
}
void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] - B[row*m+col];
}