I need to use MPI and OpenMP (2 different problems) to parallelize a code from Sbac-Pad marathon (reference: http://lspd.mackenzie.br/marathon/18/problems.html). I am working on the himeno benchmark. I believe the only part of this code that is worth parallellizing is the jacobi function:
#define MR(mt,n,r,c,d) mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
struct Matrix {
float* m;
int mnums;
int mrows;
int mcols;
int mdeps;
};
float
jacobi(int nn, Matrix* a,Matrix* b,Matrix* c,
Matrix* p,Matrix* bnd,Matrix* wrk1,Matrix* wrk2)
{
int i,j,k,n,imax,jmax,kmax;
float gosa,s0,ss;
imax= p->mrows-1;
jmax= p->mcols-1;
kmax= p->mdeps-1;
for(n=0 ; n<nn ; n++){
gosa = 0.0;
for(i=1 ; i<imax; i++)
for(j=1 ; j<jmax ; j++)
for(k=1 ; k<kmax ; k++){
s0= MR(a,0,i,j,k)*MR(p,0,i+1,j, k)
+ MR(a,1,i,j,k)*MR(p,0,i, j+1,k)
+ MR(a,2,i,j,k)*MR(p,0,i, j, k+1)
+ MR(b,0,i,j,k)
*( MR(p,0,i+1,j+1,k) - MR(p,0,i+1,j-1,k)
- MR(p,0,i-1,j+1,k) + MR(p,0,i-1,j-1,k) )
+ MR(b,1,i,j,k)
*( MR(p,0,i,j+1,k+1) - MR(p,0,i,j-1,k+1)
- MR(p,0,i,j+1,k-1) + MR(p,0,i,j-1,k-1) )
+ MR(b,2,i,j,k)
*( MR(p,0,i+1,j,k+1) - MR(p,0,i-1,j,k+1)
- MR(p,0,i+1,j,k-1) + MR(p,0,i-1,j,k-1) )
+ MR(c,0,i,j,k) * MR(p,0,i-1,j, k)
+ MR(c,1,i,j,k) * MR(p,0,i, j-1,k)
+ MR(c,2,i,j,k) * MR(p,0,i, j, k-1)
+ MR(wrk1,0,i,j,k);
ss= (s0*MR(a,3,i,j,k) - MR(p,0,i,j,k))*MR(bnd,0,i,j,k);
gosa+= ss*ss;
MR(wrk2,0,i,j,k)= MR(p,0,i,j,k) + omega*ss;
}
for(i=1 ; i<imax ; i++)
for(j=1 ; j<jmax ; j++)
for(k=1 ; k<kmax ; k++)
MR(p,0,i,j,k)= MR(wrk2,0,i,j,k);
} /* end n loop */
return(gosa);
}
The problem is, this function seems to have a sequential nature, since every iteration of nn is dependant on the last one. What I tried, using MPI, was making an auxiliar variable for gosa (auxgosa), and using MPI_REDUCE after the i j k for loops, like the following (root process is rank = 0):
//rank is the current process
//size is the total amount of processes
int start = ((imax+1)/size)*rank;
int stop = ((imax+1)/size)*(rank+1)-1;
if(rank == 0){start++;}
for(n=0 ; n<nn ; n++){
gosa = 0.0;
auxgosa = 0.0;
for(i=start ; i<stop; i++)
for(j=1 ; j<jmax ; j++)
for(k=1 ; k<kmax ; k++){
s0= MR(aa,0,i,j,k)*MR(pp,0,i+1,j,k)
+ MR(aa,1,i,j,k)*MR(pp,0,i, j+1,k)
+ MR(aa,2,i,j,k)*MR(pp,0,i, j, k+1)
+ MR(bb,0,i,j,k)
*( MR(pp,0,i+1,j+1,k) - MR(pp,0,i+1,j-1,k)
- MR(pp,0,i-1,j+1,k) + MR(pp,0,i-1,j-1,k) )
+ MR(bb,1,i,j,k)
*( MR(pp,0,i,j+1,k+1) - MR(pp,0,i,j-1,k+1)
- MR(pp,0,i,j+1,k-1) + MR(pp,0,i,j-1,k-1) )
+ MR(bb,2,i,j,k)
*( MR(pp,0,i+1,j,k+1) - MR(pp,0,i-1,j,k+1)
- MR(pp,0,i+1,j,k-1) + MR(pp,0,i-1,j,k-1) )
+ MR(cc,0,i,j,k) * MR(pp,0,i-1,j, k)
+ MR(cc,1,i,j,k) * MR(pp,0,i, j-1,k)
+ MR(cc,2,i,j,k) * MR(pp,0,i, j, k-1)
+ MR(awrk1,0,i,j,k);
ss= (s0*MR(aa,3,i,j,k) - MR(pp,0,i,j,k))*MR(abnd,0,i,j,k);
auxgosa+= ss*ss;
MR(awrk2,0,i,j,k)= MR(pp,0,i,j,k) + omega*ss;
}
MPI_Reduce(&auxgosa,&gosa,1,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD);
for(i=1 ; i<imax ; i++)
for(j=1 ; j<jmax ; j++)
for(k=1 ; k<kmax ; k++)
MR(pp,0,i,j,k)= MR(awrk2,0,i,j,k);
} /* end n loop */
Unfortunately, this didn't work. Could anyone give me some insight about this? I plan using a similar strategy with OpenMP.
If awrk2 is different from a, p, b, c and wrk1, then there is no loop carried dependence.
A simple google search will point you to parallelized versions of the Himeno benchmark (MPI, OpenMP and hybrid MPI+OpenMP versions are available).
following this previous question Malloc Memory Corruption in C, now i have another problem.
I have the same code. Now I am trying to multiply the values contained in the arrays A * vc
and store in res. Then A is set to zero and i do a second multiplication with res and vc and i store the values in A. (A and Q are square matrices and mc and vc are N lines two columns matrices or arrays).
Here is my code :
int jacobi_gpu(double A[], double Q[],
double tol, long int dim){
int nrot, p, q, k, tid;
double c, s;
double *mc, *vc, *res;
int i,kc;
double vc1, vc2;
mc = (double *)malloc(2 * dim * sizeof(double));
vc = (double *)malloc(2 * dim * sizeof(double));
vc = (double *)malloc(dim * dim * sizeof(double));
if( mc == NULL || vc == NULL){
fprintf(stderr, "pb allocation matricre\n");
exit(1);
}
nrot = 0;
for(k = 0; k < dim - 1; k++){
eye(mc, dim);
eye(vc, dim);
for(tid = 0; tid < floor(dim /2); tid++){
p = (tid + k)%(dim - 1);
if(tid != 0)
q = (dim - tid + k - 1)%(dim - 1);
else
q = dim - 1;
printf("p = %d | q = %d\n", p, q);
if(fabs(A[p + q*dim]) > tol){
nrot++;
symschur2(A, dim, p, q, &c, &s);
mc[2*tid] = p; vc[2 * tid] = c;
mc[2*tid + 1] = q; vc[2*tid + 1] = -s;
mc[2*tid + 2*(dim - 2*tid) - 2] = p; vc[2*tid + 2*(dim - 2*tid) - 2 ] = s;
mc[2*tid + 2*(dim - 2*tid) - 1] = q; vc[2 * tid + 2*(dim - 2*tid) - 1 ] = c;
}
}
for( i = 0; i< dim; i++){
for(kc=0; kc < dim; kc++){
if( kc < floor(dim/2)) {
vc1 = vc[2*kc + i*dim];
vc2 = vc[2*kc + 2*(dim - 2*kc) - 2];
}else {
vc1 = vc[2*kc+1 + i*dim];
vc2 = vc[2*kc - 2*(dim - 2*kc) - 1];
}
res[kc + i*dim] = A[mc[2*kc] + i*dim]*vc1 + A[mc[2*kc + 1] + i*dim]*vc2;
}
}
zero(A, dim);
for( i = 0; i< dim; i++){
for(kc=0; kc < dim; k++){
if( k < floor(dim/2)){
vc1 = vc[2*kc + i*dim];
vc2 = vc[2*kc + 2*(dim - 2*kc) - 2];
}else {
vc1 = vc[2*kc+1 + i*dim];
vc2 = vc[2*kc - 2*(dim - 2*kc) - 1];
}
A[kc + i*dim] = res[mc[2*kc] + i*dim]*vc1 + res[mc[2*kc + 1] + i*dim]*vc2;
}
}
affiche(mc,dim,2,"Matrice creuse");
affiche(vc,dim,2,"Valeur creuse");
}
free(mc);
free(vc);
free(res);
return nrot;
}
When i try to compile, i have this error :
jacobi_gpu.c: In function ‘jacobi_gpu’:
jacobi_gpu.c:103: error: array subscript is not an integer
jacobi_gpu.c:103: error: array subscript is not an integer
jacobi_gpu.c:118: error: array subscript is not an integer
jacobi_gpu.c:118: error: array subscript is not an integer
make: *** [jacobi_gpu.o] Erreur 1
The corresponding lines are where I store the results in res and A :
res[kc + i*dim] = A[mc[2*kc] + i*dim]*vc1 + A[mc[2*kc + 1] + i*dim]*vc2;
and
A[kc + i*dim] = res[mc[2*kc] + i*dim]*vc1 + res[mc[2*kc + 1] + i*dim]*vc2;
Can someone explain me what is this error and how can i correct it?
Thanks for your help. ;)
mc is of type double. It has to be integral type
mc is pointer to double.
A[mc[2*kc + 1]
In above, you are indexing A with a value in mc (double array). And, there are other similar cases. If you are sure of the values, cast to int
Your declaration of mc:
mc = (double *)malloc(2 * dim * sizeof(double));
And then you use mc multiple times in your array access. For example:
A[mc[2*kc + 1] ...]
Can you change mc to be an int array instead of a double?
Looks like you're using entries in mc, which are doubles, as a part of array subscripts, thus making the entire subscript a double.
If you meant to do this, try casting back to an integer. I don't know what the context of this problem is, but I'd take a real good look at what you're doing to ensure you really want to use the contents of mc as a subscript.
The compiler is complaining because the expression you use as an array index evaluates to type double.
In other words, the expression:
mc[2*kc] + i*dim
...will give you a result which is of type double. You may want to look into the rules for usual arithmetic type conversions in C if you don't understand why this expression evaluates to a double.
The problem is that array indices must be integral types, like int or long. This is because the array subscript operator in C is basically shorthand for pointer arithmetic. In other words, saying array[N] is the same as saying *(array + N). But you can't do pointer arithmetic with non-integral types like float or double, so of course the array subscript operator won't work that way either.
To fix this, you'll need to cast the result of your array-indexing expression to an integral type.
mc is an array of doubles, and floating point values cannot be used to index arrays. I notice that nowhere in your code do you assign anything other than integers to mc. You should consider changing mc's type to an array of integers.