As a part of an assignment, I am trying to find out the crossover point for Strassen's matrix multiplication and naive multiplication algorithms. But for the same, I am unable to proceed when matrix becomes 256x256. Can someone please suggest me the appropriate memory management technique to be able to handle larger inputs.
The code is in C as follows:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);
int idx = 0;
int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;
int total = 15;
double tnaive[total];
double tstrassen[total];
printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
double X[N][N], Y[N][N], Z[N][N], W[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");
while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;
printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);
printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
double A[N][N], B[N][N], C[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i][j] = rand()/(RAND_MAX + 1.);
B[i][j] = rand()/(RAND_MAX + 1.);
}
}
printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i][j]);
printf("\n");
}
printf("\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i][j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");
strassenMul((double *)A, (double *)B, (double *)C, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i][j]);
printf("\n");
}
return(0);
}
void strassenMul(double *X, double *Y, double *Z, int m)
{
if (m <= idx)
{
matMul((double *)X, (double *)Y, (double *)Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double x11[n][n], x12[n][n], x21[n][n], x22[n][n];
double y11[n][n], y12[n][n], y21[n][n], y22[n][n];
double P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
double C11[n][n], C12[n][n], C21[n][n], C22[n][n];
double S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n];
double S8[n][n], S9[n][n], S10[n][n], S11[n][n], S12[n][n], S13[n][n], S14[n][n];
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x11[i][j] = *((X+row*m)+col);
y11[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x12[i][j] = *((X+row*m)+col);
y12[i][j] = *((Y+row*m)+col);
}
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x21[i][j] = *((X+row*m)+col);
y21[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x22[i][j] = *((X+row*m)+col);
y22[i][j] = *((Y+row*m)+col);
}
}
// Calculating P1
matAdd((double *)x11, (double *)x22, (double *)S1, n);
matAdd((double *)y11, (double *)y22, (double *)S2, n);
strassenMul((double *)S1, (double *)S2, (double *)P1, n);
// Calculating P2
matAdd((double *)x21, (double *)x22, (double *)S3, n);
strassenMul((double *)S3, (double *)y11, (double *)P2, n);
// Calculating P3
matSub((double *)y12, (double *)y22, (double *)S4, n);
strassenMul((double *)x11, (double *)S4, (double *)P3, n);
// Calculating P4
matSub((double *)y21, (double *)y11, (double *)S5, n);
strassenMul((double *)x22, (double *)S5, (double *)P4, n);
// Calculating P5
matAdd((double *)x11, (double *)x12, (double *)S6, n);
strassenMul((double *)S6, (double *)y22, (double *)P5, n);
// Calculating P6
matSub((double *)x21, (double *)x11, (double *)S7, n);
matAdd((double *)y11, (double *)y12, (double *)S8, n);
strassenMul((double *)S7, (double *)S8, (double *)P6, n);
// Calculating P7
matSub((double *)x12, (double *)x22, (double *)S9, n);
matAdd((double *)y21, (double *)y22, (double *)S10, n);
strassenMul((double *)S9, (double *)S10, (double *)P7, n);
// Calculating C11
matAdd((double *)P1, (double *)P4, (double *)S11, n);
matSub((double *)S11, (double *)P5, (double *)S12, n);
matAdd((double *)S12, (double *)P7, (double *)C11, n);
// Calculating C12
matAdd((double *)P3, (double *)P5, (double *)C12, n);
// Calculating C21
matAdd((double *)P2, (double *)P4, (double *)C21, n);
// Calculating C22
matAdd((double *)P1, (double *)P3, (double *)S13, n);
matSub((double *)S13, (double *)P2, (double *)S14, n);
matAdd((double *)S14, (double *)P6, (double *)C22, n);
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C11[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C12[i][j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C21[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C22[i][j];
}
}
void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += *((A+i*n)+k) * *((B+k*n)+j);
}
*((C+i*n)+j) = sum;
}
}
}
void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) + *((B+row*m)+col);
}
void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) - *((B+row*m)+col);
}
Added later If I try using malloc statements for memory assignment, the code is as follows. But the problem is that it stops after the naive matrix multiplication method and does not even proceed to the Strassen's method for N=1. It shows a prompt to close the program.
for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
//double X[N][N], Y[N][N], Z[N][N], W[N][N];
double **X, **Y, **Z, **W;
X = malloc(N * sizeof(double*));
if (X == NULL){
perror("Failed malloc() in X");
return 1;
}
Y = malloc(N * sizeof(double*));
if (Y == NULL){
perror("Failed malloc() in Y");
return 1;
}
Z = malloc(N * sizeof(double*));
if (Z == NULL){
perror("Failed malloc() in Z");
return 1;
}
W = malloc(N * sizeof(double*));
if (W == NULL){
perror("Failed malloc() in W");
return 1;
}
for (j = 0; j < N; j++)
{
X[j] = malloc(N * sizeof(double*));
if (X[j] == NULL){
perror("Failed malloc() in X[j]");
return 1;
}
Y[j] = malloc(N * sizeof(double*));
if (Y[j] == NULL){
perror("Failed malloc() in Y[j]");
return 1;
}
Z[j] = malloc(N * sizeof(double*));
if (Z[j] == NULL){
perror("Failed malloc() in Z[j]");
return 1;
}
W[j] = malloc(N * sizeof(double*));
if (W[j] == NULL){
perror("Failed malloc() in W[j]");
return 1;
}
}
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
for (j = 0; j < N; j++)
{
free(X[j]);
free(Y[j]);
free(Z[j]);
free(W[j]);
}
free(X); free(Y); free(Z); free(W);
printf("strassen = %5.4f\n",tstrassen[count]);
}
I have re-written the answer. My previous answer which allocated memory row by row won't work, because OP has cast the 2-D arrays to 1-D arrays when passed to the functions. Here is my re-write of the code with some simplifications, such as keeping all the matrix arrays 1-dimensional.
I am unsure exactly what Strassen's method does, although the recursion halves the matrix dimensions. So I do wonder if the intention was to use row*2 and col*2 when accessing the arrays passed.
I hope the techniques are useful to you - even that it works! All the matrix arrays are now on the heap.
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#define total 4 //15
void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);
enum array { x11, x12, x21, x22, y11, y12, y21, y22,
P1, P2, P3, P4, P5, P6, P7, C11, C12, C21, C22,
S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, arrs };
int idx = 0;
int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;
double tnaive[total];
double tstrassen[total];
double *X, *Y, *Z, *W, *A, *B, *C;
printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = (int)pow(2, count);
printf("Matrix size = %2d\t",N);
X = malloc(N*N*sizeof(double));
Y = malloc(N*N*sizeof(double));
Z = malloc(N*N*sizeof(double));
W = malloc(N*N*sizeof(double));
if (X==NULL || Y==NULL || Z==NULL || W==NULL) {
printf("Out of memory (1)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
X[i] = rand()/(RAND_MAX + 1.);
Y[i] = rand()/(RAND_MAX + 1.);
}
start = clock();
matMul(X, Y, W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);
start = clock();
strassenMul(X, Y, Z, N);
free(W);
free(Z);
free(Y);
free(X);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");
while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;
printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);
printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
A = malloc(N*N*sizeof(double));
B = malloc(N*N*sizeof(double));
C = malloc(N*N*sizeof(double));
if (A==NULL || B==NULL || C==NULL) {
printf("Out of memory (2)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
A[i] = rand()/(RAND_MAX + 1.);
B[i] = rand()/(RAND_MAX + 1.);
}
printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i*N+j]);
printf("\n");
}
printf("\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i*N+j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");
strassenMul(A, B, C, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i*N+j]);
printf("\n");
}
free(C);
free(B);
free(A);
return(0);
}
void strassenMul(double *X, double *Y, double *Z, int m)
{
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double *arr[arrs]; // each matrix mem ptr
if (m <= idx)
{
matMul(X, Y, Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}
for (i=0; i<arrs; i++) { // memory for arrays
arr[i] = malloc(n*n*sizeof(double));
if (arr[i] == NULL) {
printf("Out of memory (1)\n");
exit (1); // brutal
}
}
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x11][i*n+j] = X[row*m+col];
arr[y11][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x12][i*n+j] = X[row*m+col];
arr[y12][i*n+j] = Y[row*m+col];
}
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x21][i*n+j] = X[row*m+col];
arr[y21][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x22][i*n+j] = X[row*m+col];
arr[y22][i*n+j] = Y[row*m+col];
}
}
// Calculating P1
matAdd(arr[x11], arr[x22], arr[S1], n);
matAdd(arr[y11], arr[y22], arr[S2], n);
strassenMul(arr[S1], arr[S2], arr[P1], n);
// Calculating P2
matAdd(arr[x21], arr[x22], arr[S3], n);
strassenMul(arr[S3], arr[y11], arr[P2], n);
// Calculating P3
matSub(arr[y12], arr[y22], arr[S4], n);
strassenMul(arr[x11], arr[S4], arr[P3], n);
// Calculating P4
matSub(arr[y21], arr[y11], arr[S5], n);
strassenMul(arr[x22], arr[S5], arr[P4], n);
// Calculating P5
matAdd(arr[x11], arr[x12], arr[S6], n);
strassenMul(arr[S6], arr[y22], arr[P5], n);
// Calculating P6
matSub(arr[x21], arr[x11], arr[S7], n);
matAdd(arr[y11], arr[y12], arr[S8], n);
strassenMul(arr[S7], arr[S8], arr[P6], n);
// Calculating P7
matSub(arr[x12], arr[x22], arr[S9], n);
matAdd(arr[y21], arr[y22], arr[S10], n);
strassenMul(arr[S9], arr[S10], arr[P7], n);
// Calculating C11
matAdd(arr[P1], arr[P4], arr[S11], n);
matSub(arr[S11], arr[P5], arr[S12], n);
matAdd(arr[S12], arr[P7], arr[C11], n);
// Calculating C12
matAdd(arr[P3], arr[P5], arr[C12], n);
// Calculating C21
matAdd(arr[P2], arr[P4], arr[C21], n);
// Calculating C22
matAdd(arr[P1], arr[P3], arr[S13], n);
matSub(arr[S13], arr[P2], arr[S14], n);
matAdd(arr[S14], arr[P6], arr[C22], n);
for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C11][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C12][i*n+j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C21][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C22][i*n+j];
}
for (i=0; i<arrs; i++)
free (arr[i]);
}
void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += A[i*n+k] * B[k*n+j];
}
C[i*n+j] = sum;
}
}
}
void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] + B[row*m+col];
}
void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] - B[row*m+col];
}
Related
I'm trying to write a function that does naive matrix multiplication of two contiguous, row-major arrays. But when I attempt to print each value at the end I get garbage. I'm guessing it's because I've mixed up the proper iterations and scaling needed to jump rows/columns. Does anyone have any advice?
Full code necessary is below:
#include <stdio.h>
#include <stdlib.h>
void dmatmul(double *a, double *b, double *c, int astride, int bstride, int cdim_0, int cdim_1) {
int i, j, p;
for (i = 0; i < cdim_0; i++) {
for (j = 0; j < cdim_1; j++) {
c[i * cdim_1 + j] = 0.0;
for (p = 0; p < (astride); p++) {
c[i * cdim_1 + j] += a[i * (astride) + p] * b[p * (bstride) + j];
}
}
}
}
int main(void) {
double *x, *y, *z;
int xdim_0, xdim_1, ydim_0, ydim_1, zdim_0, zdim_1, i, j;
xdim_0 = 2;
xdim_1 = 4;
ydim_0 = 4;
ydim_1 = 2;
zdim_0 = 2;
zdim_1 = 2;
x = (double *) malloc (xdim_0 * xdim_1 * sizeof(double));
y = (double *) malloc (ydim_0 * ydim_1 * sizeof(double));
z = (double *) malloc (zdim_0 * zdim_1 * sizeof(double));
for (i = 0; i < xdim_0 * xdim_1; i++) {
x[i] = i + 1;
y[i] = 2 * (i + 1);
}
dmatmul(x, y, z, xdim_1, ydim_1, zdim_0, zdim_1);
printf("\nMatrix product of X and Y dimensions: (%d, %d)\n", zdim_0, zdim_1);
printf("Matrix product of X and Y values:");
for (i = 0; i < zdim_0; i++) {
printf("\n");
for (j = 0; j < zdim_1; i++) {
printf("\t%f", z[i * zdim_1 + j]);
}
}
return 0;
}
The primary problem is a typo in the inner for loop doing the printing. You have:
for (j = 0; j < zdim_1; i++)
but you ned to increment j, not i:
for (j = 0; j < zdim_1; j++)
Here's my code, which has an independent matrix printing function appropriate for the arrays you're using:
/* SO 7516-7451 */
#include <stdio.h>
#include <stdlib.h>
static void dmatmul(double *a, double *b, double *c, int astride, int bstride, int cdim_0, int cdim_1)
{
int i, j, p;
for (i = 0; i < cdim_0; i++)
{
for (j = 0; j < cdim_1; j++)
{
c[i * cdim_1 + j] = 0.0;
for (p = 0; p < (astride); p++)
{
c[i * cdim_1 + j] += a[i * (astride) + p] * b[p * (bstride) + j];
}
}
}
}
static void mat_print(const char *tag, int rows, int cols, double *matrix)
{
printf("%s (%dx%d):\n", tag, rows, cols);
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
printf("%4.0f", matrix[i * cols + j]);
putchar('\n');
}
}
int main(void)
{
int xdim_0 = 2;
int xdim_1 = 4;
int ydim_0 = 4;
int ydim_1 = 2;
int zdim_0 = 2;
int zdim_1 = 2;
double *x = (double *)malloc(xdim_0 * xdim_1 * sizeof(double));
double *y = (double *)malloc(ydim_0 * ydim_1 * sizeof(double));
double *z = (double *)malloc(zdim_0 * zdim_1 * sizeof(double));
for (int i = 0; i < xdim_0 * xdim_1; i++)
{
x[i] = i + 1;
y[i] = 2 * (i + 1);
}
mat_print("X", xdim_0, xdim_1, x);
mat_print("Y", ydim_0, ydim_1, y);
dmatmul(x, y, z, xdim_1, ydim_1, zdim_0, zdim_1);
mat_print("Z", zdim_0, zdim_1, z);
printf("\nMatrix product of X and Y dimensions: (%d, %d)\n", zdim_0, zdim_1);
printf("Matrix product of X and Y values:\n");
for (int i = 0; i < zdim_0; i++)
{
for (int j = 0; j < zdim_1; j++)
printf("\t%f", z[i * zdim_1 + j]);
printf("\n");
}
return 0;
}
I've also initialized the variables as I declared them. The code should, but does not, check that the memory was allocated.
When I ran this code without your printing, I got the correct result, so then I took a good look at that and saw the problem.
X (2x4):
1 2 3 4
5 6 7 8
Y (4x2):
2 4
6 8
10 12
14 16
Z (2x2):
100 120
228 280
Matrix product of X and Y dimensions: (2, 2)
Matrix product of X and Y values:
100.000000 120.000000
228.000000 280.000000
int main() {
double a, b, hx, hy, tol, max1;
double h = 0.25;
tol = 0.000000001;
max1 = 100000000;
a = 1; b = 1;
hx = h;
hy = h;
int n = (a / hy) + 1;
int m = (b / hx) + 1;
double **U = (double **) malloc(n * sizeof(double*));
for (int i = 0; i < n; i++)
U[i] = (double *) malloc(m * sizeof(double));
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
U[i][j] = 1;
}
}
LAPLACEWCG(a, b, h, hx, hy, tol, max1,U);
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
printf("U[%d][%d]: %lf \n", i, j, U[i][j]);
}
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
printf("U[%d][%d]: %lf \n", i, j, U[i][j]);
}
}
return 0; }
Why is the matrix not getting printed. If I try to print it in laplacewcg() it prints inside the while loop but it doesn't print outside it either. What needs to be changed in the code?
You are passing to the LAPLACEWCG() function the U matrix value but you pass by reference. try:
LAPLACEWCG(..., &U);
I'm trying to solve Gaussian Elimination and Back Substitution in C.
But I've got Segmentation fault(Core dumped) error in shell.
this is the part of main code.
float **a = (float **) malloc(sizeof(float*) *n);
for (int i = 0; i < n; i++)
a[i] = (float*) malloc(sizeof(float) *n);
float *b = (float*) malloc(sizeof(float) *n);
float *x = (float*) malloc(sizeof(float) *n);
Gaussian(n, &a, &b);
BackSubstitution(n, &a, &b, &x);
and below is gaussian.c . I think there is some problem with gaussian.c
#include <math.h>
void Gaussian(int n, float ***arr, float **arr2)
{
for (int l = 0; l < n - 1; l++)
{
for (int i = l + 1, j = 1; i < n && j < n; i++, j++)
{ (*arr)[i][j] = (*arr)[i][j] - ((*arr)[i][l] / (*arr)[l][l]) * (*arr)[l][j];
(*arr2)[i] = (*arr2)[i] - ((*arr)[i][l] / (*arr)[l][l]) * (*arr2)[l];
}
}
}
void BackSubstitution(int n, float ***arr, float **arr2, float **result)
{
for (int i = n - 1; i > 0; i--)
{
(*result)[i] = (*arr2)[i] / (*arr)[i][i];
for (int j = 0; j < i; j++)
{ (*arr2)[j] = (*arr2)[j] - (*result)[i] * (*arr)[j][i];
(*arr)[j][i] = 0;
}
}
}
Is there something wrong that generate segmentation fault?
A few things:
You have no reason to pass your arrays by pointer reference. So your functions gets much easier by eliminating one extra reference:
void Gaussian(int n, float** arr, float* arr2) {
for (int l = 0; l < n - 1; l++) {
for (int i = l + 1, j = 1; i < n && j < n; i++, j++) {
arr[i][j] = arr[i][j] - arr[i][l] / arr[l][l] * arr[l][j];
arr2[i] = arr2[i] - arr[i][l] / arr[l][l] * arr2[l];
}
}
}
void BackSubstitution(int n, float** arr, float* arr2, float* result) {
for (int i = n - 1; i > 0; i--) {
result[i] = arr2[i] / arr[i][i];
for (int j = 0; j < i; j++) {
arr2[j] = arr2[j] - result[i] * arr[j][i];
arr[j][i] = 0;
}
}
}
Second, you aren't actually initializing the contents of your arrays with valid data. Some of your array initializations are missing initializations to actual floating point data. Without this, your arrays have garbage data - which won't play well with floating point.
So aside from initializing your arrays correctly, you don't have to pass them in by pointer (because arrays degrade to pointers in function calls)
int n = 10;
float** a = (float**)malloc(n * sizeof(float*));
for (int i = 0; i < n; i++)
{
a[i] = (float*)malloc(n * sizeof(float));
for (int j = 0; j < n; j++)
{
a[i][j] = 0.0f; // you initialize a[i][j] with your data
}
}
float* b = (float*)malloc(n * sizeof(float));
float* x = (float*)malloc(n * sizeof(float));
for (int i = 0; i < n; i++)
{
b[i] = 0.0f;
x[i] = 0.0f;
}
Gaussian(n, a, b);
BackSubstitution(n, a, b, x);
I'm using MPI to perform some calculations on a specific function. I only need MPI for that specific function, so my question is, when I initialise MPI in my main function how do I make sure that whatever code I'm writing is only performed by the process with rank 0.
I've tried putting everything under an if (rank == 0) but the code doesn't seem to work.
int main(int argc, char* argv[])
{
int num_procs, my_id;
int *count, *disp;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
int i, j, N, k;
double u, l, h;
double *A, *b1, *b2, *x, *xo, x1bound, x2bound, y1bound, y2bound;
double norm, sum, dot, tmp;
l = -10;
u = 10;
h = 0.5;
tmp = (u-l)/h;
N = floor(tmp);
A = (double*)calloc(N*N,sizeof(double));
b1 = (double *)calloc(N,sizeof(double));
x = (double *)malloc(sizeof(double)*N);
xo = (double *)malloc(N*sizeof(double));
for (i = 0; i < N; i++)
xo[i] = 0.0;
count = (int*) calloc(num_procs, sizeof(int));
disp = (int*) calloc(num_procs, sizeof(int));
A[0] = -2.0;
A[1] = 1.0;
for (i = 1; i < N; i++)
{
A[i*N+i] = -2.0;
A[i*N+i+1] = 1.0;
A[i*N+i-1] = 1.0;
}
for (i = 0; i < N; i++)
b1[i] = (l + i*h);
func(&b1, N, h);
x1bound = 0;
y1bound = 0;
x2bound = 1;
y2bound = 0;
i = index_of(x1bound, l, h);
A[i*N+i] = 1.0;
A[i*N+i+1] = 0.0;
A[i*N+i-1] = 0.0;
b1[i] = y1bound;
i = index_of(x2bound, l, h);
A[i*N+i] = 1.0;
A[i*N+i+1] = 0.0;
A[i*N+i-1] = 0.0;
b1[i] = y2bound;
k = 0;
norm = INF;
while (norm > EPS)
{
dot = 0.0;
MATVEC(N,N,A,xo,&b2,num_procs,my_id,count,disp,MPI_COMM_WORLD);
for (i = 0; i < N; i++)
{
x[i] = (b1[i] - b2[i])/(A[i*N+i]);
dot += (x[i] - xo[i])*(x[i] - xo[i]);
}
for (i = 0; i < N; i++)
xo[i] = x[i];
printf("from %d dot = %lf\n",my_id,dot);
norm = sqrt(dot);
k++;
MPI_Bcast(xo,N,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
}
if(my_id == 0);
{
printf("After %d operations we obtained\n", k);
for(i = 0; i < N; i++)
printf("%lf\n", x[i]);
free(x); free(xo);
free(b1); free(b2);
free(A);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
}
I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.
Can someone help?
Thanks.
Notes:
The program reads from a .txt file in the same directory called input.txt.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
/*void print2dAddresses(double** array2d, int rows, int cols)
{
int i;
for(i = 0; i < rows; i++)
{
int j;
for(j = 0; j < cols; j++)
{
printf("%d ", &(array2d[i][j]));
}
printf("\n");
}
printf("------------------------------------");
}*/
double** newMatrix(int rows, int cols)
{
double *data = (double*) malloc(rows * cols * sizeof(double));
double **array= (double **)malloc(rows * sizeof(double*));
int i;
for (i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
void freeMatrix(double** mat)
{
free(mat[0]);
free(mat);
}
double** new2dArray(int nrows, int ncols)
{
int i;
double** array2d;
array2d = (double**) malloc(nrows * sizeof(double*));
for(i = 0; i < nrows; i++)
{
array2d[i] = (double*) malloc(ncols * sizeof(double));
}
return array2d;
}
double* new1dArray(int size)
{
return (double*) malloc(size * sizeof(double));
}
void free2dArray(double** array2d, int nrows)
{
int i;
for(i = 0; i < nrows; i++)
{
free(array2d[i]);
}
free(array2d);
}
void print2dArray(double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
printf("%lf ", array2d[i][j]);
}
printf("\n");
}
printf("----------------------\n");
}
void print1dArray(double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%lf\n", array[i]);
}
printf("----------------------\n");
}
void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
{
int i, j;
for(i = 0; i < nrows; i++)
{
for(j = 0; j < ncols; j++)
{
fscanf(fp, "%lf", &(array2d[i][j]));
}
}
}
void read1dArray(FILE* fp, double* array, int size)
{
int i;
for(i = 0; i < size; i++)
{
fscanf(fp, "%lf", &(array[i]));
}
}
void readSymbols(char* symbols, int size, FILE* fp)
{
int i;
for(i = 0; i < size; i++)
{
char c = '\n';
while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
fscanf(fp, "%c", &c);
symbols[i] = c;
}
}
void printSolution(char* symbols, double* x, int size)
{
int i;
for(i = 0; i < size; i++)
{
printf("%c = %lf\n", symbols[i], x[i]);
}
}
double* copy_1d_array(double* original, int size)
{
double* copy_version;
int i;
copy_version = (double*) malloc(size * sizeof(double));
for(i = 0; i < size; i++)
{
copy_version[i] = original[i];
}
return copy_version;
}
int main(int argc, char** argv)
{
int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
char *symbols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == 0)
{
FILE* fp;
fp = fopen("input.txt", "r");
fscanf(fp, "%d", &msize);
A = newMatrix(msize, msize);
b = new1dArray(msize);
x = new1dArray(msize);
symbols = (char*) malloc(msize * sizeof(char));
read2dArray(fp, A, msize, msize);
read1dArray(fp, b, msize);
readSymbols(symbols, msize, fp);
fclose(fp);
/*print2dArray(A, msize, msize);
print1dArray(b, msize);*/
}
MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(i = 0; i < (msize - 1); i++)
{
int maxIndex;
double maxCoef, tmp, r;
/*finding max row*/
if(rank == 0)
{
maxIndex = i;
maxCoef = fabs(A[i][i]);
for(j = i + 1; j < msize; j++)
{
if(fabs(A[j][i]) > maxCoef)
{
maxCoef = A[j][i];
maxIndex = j;
}
}
/*swapping the current row with the max row*/
for(j = 0; j < msize; j++)
{
tmp = A[i][j];
A[i][j] = A[maxIndex][j];
A[maxIndex][j] = tmp;
}
tmp = b[i];
b[i] = b[maxIndex];
b[maxIndex] = tmp;
/*elimination*/
/*for(j = i + 1; j < msize; j++)
{
double r = A[j][i] / A[i][i];
subtracting r * row i from row j
for(k = i; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}*/
/*parallel elimination*/
startingRow = i + 1;
neededProcesses = p;
remainingRows = msize - startingRow;
if(remainingRows < neededProcesses)
{
neededProcesses = remainingRows;
}
rowsPerProcess = remainingRows / neededProcesses;
remainder = remainingRows % neededProcesses;
}
MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank == 0)
{
currentRow = copy_1d_array(A[startingRow-1], msize);
currentB = b[startingRow-1];
}
else
{
currentRow = new1dArray(msize);
}
MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(¤tB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
receivedB = new1dArray(remainingRows);
}
smallA = newMatrix(rowsPerProcess, msize);
smallB = new1dArray(rowsPerProcess);
MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(j = 0; j < rowsPerProcess; j++)
{
r = smallA[j][startingRow-1] / currentRow[startingRow-1];
for(k = 0; k < msize; k++)
{
smallA[j][k] -= r * currentRow[k];
}
smallB[j] -= r * currentB;
}
MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);
freeMatrix(smallA);
free(smallB);
if(rank == 0)
{
for(j = 0; j < remainingRows; j++)
{
for(k = 0; k < msize; k++)
{
A[j+startingRow][k] = receivedA[j][k];
}
b[j+startingRow] = receivedB[j];
}
free(currentRow);
freeMatrix(receivedA);
free(receivedB);
}
if(rank == 0)
{
if(remainder > 0)
{
for(j = (msize - remainder); j < msize; j++)
{
r = A[j][i] / A[i][i];
for(k = 0; k < msize; k++)
{
A[j][k] -= r * A[i][k];
}
b[j] -= r * b[i];
}
}
}
}
if(rank == 0)
{
/*backward substitution*/
for(i = msize - 1; i >= 0; i--)
{
x[i] = b[i];
for(j = msize - 1; j > i; j--)
{
x[i] -= A[i][j] * x[j];
}
x[i] /= A[i][i];
}
printf("solution = \n");
//print1dArray(x, msize);
printSolution(symbols, x, msize);
freeMatrix(A);
free(b);
free(x);
free(symbols);
}
MPI_Finalize();
return 0;
}
Input File:
3
1 1 1
1 1 3
2 1 4
4
9
12
x
y
z
It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:
if(rank == 0)
{
receivedA = newMatrix(remainingRows, msize);
recievedAHead = &(receivedA[0][0]);
receivedB = new1dArray(remainingRows);
}
else {
recievedAHead = NULL;
}
and use recievedAHead in the MPI_Gather call.