Related
I'm trying to write a function that does naive matrix multiplication of two contiguous, row-major arrays. But when I attempt to print each value at the end I get garbage. I'm guessing it's because I've mixed up the proper iterations and scaling needed to jump rows/columns. Does anyone have any advice?
Full code necessary is below:
#include <stdio.h>
#include <stdlib.h>
void dmatmul(double *a, double *b, double *c, int astride, int bstride, int cdim_0, int cdim_1) {
int i, j, p;
for (i = 0; i < cdim_0; i++) {
for (j = 0; j < cdim_1; j++) {
c[i * cdim_1 + j] = 0.0;
for (p = 0; p < (astride); p++) {
c[i * cdim_1 + j] += a[i * (astride) + p] * b[p * (bstride) + j];
}
}
}
}
int main(void) {
double *x, *y, *z;
int xdim_0, xdim_1, ydim_0, ydim_1, zdim_0, zdim_1, i, j;
xdim_0 = 2;
xdim_1 = 4;
ydim_0 = 4;
ydim_1 = 2;
zdim_0 = 2;
zdim_1 = 2;
x = (double *) malloc (xdim_0 * xdim_1 * sizeof(double));
y = (double *) malloc (ydim_0 * ydim_1 * sizeof(double));
z = (double *) malloc (zdim_0 * zdim_1 * sizeof(double));
for (i = 0; i < xdim_0 * xdim_1; i++) {
x[i] = i + 1;
y[i] = 2 * (i + 1);
}
dmatmul(x, y, z, xdim_1, ydim_1, zdim_0, zdim_1);
printf("\nMatrix product of X and Y dimensions: (%d, %d)\n", zdim_0, zdim_1);
printf("Matrix product of X and Y values:");
for (i = 0; i < zdim_0; i++) {
printf("\n");
for (j = 0; j < zdim_1; i++) {
printf("\t%f", z[i * zdim_1 + j]);
}
}
return 0;
}
The primary problem is a typo in the inner for loop doing the printing. You have:
for (j = 0; j < zdim_1; i++)
but you ned to increment j, not i:
for (j = 0; j < zdim_1; j++)
Here's my code, which has an independent matrix printing function appropriate for the arrays you're using:
/* SO 7516-7451 */
#include <stdio.h>
#include <stdlib.h>
static void dmatmul(double *a, double *b, double *c, int astride, int bstride, int cdim_0, int cdim_1)
{
int i, j, p;
for (i = 0; i < cdim_0; i++)
{
for (j = 0; j < cdim_1; j++)
{
c[i * cdim_1 + j] = 0.0;
for (p = 0; p < (astride); p++)
{
c[i * cdim_1 + j] += a[i * (astride) + p] * b[p * (bstride) + j];
}
}
}
}
static void mat_print(const char *tag, int rows, int cols, double *matrix)
{
printf("%s (%dx%d):\n", tag, rows, cols);
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
printf("%4.0f", matrix[i * cols + j]);
putchar('\n');
}
}
int main(void)
{
int xdim_0 = 2;
int xdim_1 = 4;
int ydim_0 = 4;
int ydim_1 = 2;
int zdim_0 = 2;
int zdim_1 = 2;
double *x = (double *)malloc(xdim_0 * xdim_1 * sizeof(double));
double *y = (double *)malloc(ydim_0 * ydim_1 * sizeof(double));
double *z = (double *)malloc(zdim_0 * zdim_1 * sizeof(double));
for (int i = 0; i < xdim_0 * xdim_1; i++)
{
x[i] = i + 1;
y[i] = 2 * (i + 1);
}
mat_print("X", xdim_0, xdim_1, x);
mat_print("Y", ydim_0, ydim_1, y);
dmatmul(x, y, z, xdim_1, ydim_1, zdim_0, zdim_1);
mat_print("Z", zdim_0, zdim_1, z);
printf("\nMatrix product of X and Y dimensions: (%d, %d)\n", zdim_0, zdim_1);
printf("Matrix product of X and Y values:\n");
for (int i = 0; i < zdim_0; i++)
{
for (int j = 0; j < zdim_1; j++)
printf("\t%f", z[i * zdim_1 + j]);
printf("\n");
}
return 0;
}
I've also initialized the variables as I declared them. The code should, but does not, check that the memory was allocated.
When I ran this code without your printing, I got the correct result, so then I took a good look at that and saw the problem.
X (2x4):
1 2 3 4
5 6 7 8
Y (4x2):
2 4
6 8
10 12
14 16
Z (2x2):
100 120
228 280
Matrix product of X and Y dimensions: (2, 2)
Matrix product of X and Y values:
100.000000 120.000000
228.000000 280.000000
I'm trying to solve Gaussian Elimination and Back Substitution in C.
But I've got Segmentation fault(Core dumped) error in shell.
this is the part of main code.
float **a = (float **) malloc(sizeof(float*) *n);
for (int i = 0; i < n; i++)
a[i] = (float*) malloc(sizeof(float) *n);
float *b = (float*) malloc(sizeof(float) *n);
float *x = (float*) malloc(sizeof(float) *n);
Gaussian(n, &a, &b);
BackSubstitution(n, &a, &b, &x);
and below is gaussian.c . I think there is some problem with gaussian.c
#include <math.h>
void Gaussian(int n, float ***arr, float **arr2)
{
for (int l = 0; l < n - 1; l++)
{
for (int i = l + 1, j = 1; i < n && j < n; i++, j++)
{ (*arr)[i][j] = (*arr)[i][j] - ((*arr)[i][l] / (*arr)[l][l]) * (*arr)[l][j];
(*arr2)[i] = (*arr2)[i] - ((*arr)[i][l] / (*arr)[l][l]) * (*arr2)[l];
}
}
}
void BackSubstitution(int n, float ***arr, float **arr2, float **result)
{
for (int i = n - 1; i > 0; i--)
{
(*result)[i] = (*arr2)[i] / (*arr)[i][i];
for (int j = 0; j < i; j++)
{ (*arr2)[j] = (*arr2)[j] - (*result)[i] * (*arr)[j][i];
(*arr)[j][i] = 0;
}
}
}
Is there something wrong that generate segmentation fault?
A few things:
You have no reason to pass your arrays by pointer reference. So your functions gets much easier by eliminating one extra reference:
void Gaussian(int n, float** arr, float* arr2) {
for (int l = 0; l < n - 1; l++) {
for (int i = l + 1, j = 1; i < n && j < n; i++, j++) {
arr[i][j] = arr[i][j] - arr[i][l] / arr[l][l] * arr[l][j];
arr2[i] = arr2[i] - arr[i][l] / arr[l][l] * arr2[l];
}
}
}
void BackSubstitution(int n, float** arr, float* arr2, float* result) {
for (int i = n - 1; i > 0; i--) {
result[i] = arr2[i] / arr[i][i];
for (int j = 0; j < i; j++) {
arr2[j] = arr2[j] - result[i] * arr[j][i];
arr[j][i] = 0;
}
}
}
Second, you aren't actually initializing the contents of your arrays with valid data. Some of your array initializations are missing initializations to actual floating point data. Without this, your arrays have garbage data - which won't play well with floating point.
So aside from initializing your arrays correctly, you don't have to pass them in by pointer (because arrays degrade to pointers in function calls)
int n = 10;
float** a = (float**)malloc(n * sizeof(float*));
for (int i = 0; i < n; i++)
{
a[i] = (float*)malloc(n * sizeof(float));
for (int j = 0; j < n; j++)
{
a[i][j] = 0.0f; // you initialize a[i][j] with your data
}
}
float* b = (float*)malloc(n * sizeof(float));
float* x = (float*)malloc(n * sizeof(float));
for (int i = 0; i < n; i++)
{
b[i] = 0.0f;
x[i] = 0.0f;
}
Gaussian(n, a, b);
BackSubstitution(n, a, b, x);
I was preparing a code for simulating Distance Vector Routing using C, however, I faced Segmentation Fault while running.
The code:-
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/* Date : 03/06/2018
*
* Algorithm
*
* 1. get number of nodes from user
* 2. dynamic alloc new matrix nxn
* 3. create distance vector matrix, if dist > 1000 consider inf
* _| A B C D E F
* A| 0 5 2 3 i i
* B| 5 0 4 5
* C|
* D|
* E|
* F|
* ---------------
*
* 4. create new routing matrix of nxn
* 5. create new minimizing array for the node
* 6. find minimum of the array, allocate new value
*
* copyleft
*/
#define inf 1000
int min_r(int*, int*, int);
void dvr(int**, int**, char**, int);
void dvtDisp(int**, int);
void dvtDispNew(int **, char**, int);
int main(){
int n; //No of nodes
int i,j; //Counters
printf("> enter the number of nodes in the network... ");
scanf("%d",&n);
int **DisMat = (int **)malloc(n * n * sizeof(int)); //Dynamic allocation of Distance Matrix
for(i=0; i<n; i++){ // x directional loop
printf("> distance vector table for node %c\n",i+65);
for(j=0; j<n; j++){ // y directional loop
printf("> distance from %c... ",j+65);
if(j==i) { DisMat[i][j] = 0; printf("0");}
else scanf("%d",&DisMat[i][j]);
}// y directional loop
}// x directional loop
int **NewDisMat = (int **)malloc(n * n * sizeof(int)); //New Distance Matrix
char **Hop = (char **)malloc(n * n * sizeof(char)); //New Hop Matrix
for(i=0; i<n; i++){
for(j=0; j<n; j++){
Hop[i][j] = '-'; //All Hops Nullified
}
}
dvr(DisMat, NewDisMat, Hop, n); //Distance Vector Routing
return 0;
}//main
void dvr(int *dvt[], int *newdvt[], char *hopper[], int l){ //DVR function
int x=0, y=0, z=0, conCount;
int hopPoint;
int *mini = (int *)malloc((l-1) * sizeof(int));
int *mzer = (int *)malloc((l-1) * sizeof(int));
for(x=0; x<l; x++){ // x directional propagation
mini[0] = x;
z = 1; conCount=0;
do{
if((dvt[x][y] < inf) && (y != x)) {
mini[z] = y;
z++;
conCount++;
}
y++;
}while(y<l);
y = 0; z = 0;
for(y = 0; y<l; y++){
while(z<conCount){
mzer[z] = dvt[mini[z]][y];
z++;
}
newdvt[x][y] = min_r(mzer, &hopPoint, conCount);
hopper[x][y] = hopPoint + 65;
}// y directional propagation
}// x directional propagation
}//dvr
int min_r(int arr[], int *index, int len){
//Sequential minimum search
int min;
int ind = 0;
min = arr[ind];
for(ind = 0; ind<len; ind++){
if(arr[ind] < min){
min = arr[ind];
*index = ind;
}
}
return min;
}//min_r
void dvtDisp(int *dvt[], int size){
int x, y;
printf("_ |");
for(x = 0; x<size; x++){
printf("\t%c",65 + x);
}
printf("\n");
for(y = 0; y<size; y++){
printf("%c |",y + 65);
for(x = 0; x < size; x++)
printf("\t%d",dvt[x][y]);
}
}
void dvtDispNew(int *dvt[], char *hopto[], int size){
int x, y;
printf("_ |");
for(x = 0; x<size; x++){
printf("\t%c\thop",65 + x);
}
printf("\n");
for(y = 0; y<size; y++){
printf("%c |",y + 65);
for(x = 0; x < size; x++)
printf("\t%d\t%c",dvt[x][y],hopto[x][y]);
}
}
I got the following output on the terminal during execution.
anwesh#bionic-Inspiron:~/Documents/NS2/LAB/prog5$ gcc main.c
anwesh#bionic-Inspiron:~/Documents/NS2/LAB/prog5$ ./a.out
> enter the number of nodes in the network... 5
> distance vector table for node A
Segmentation fault (core dumped)
I tried to run it on gdb but could not figure out what the results meant. Here the gdb output:-
Starting program: /home/anwesh/Documents/NS2/LAB/prog5/a.out
> enter the number of nodes in the network... 5
> distance vector table for node A
Program received signal SIGSEGV, Segmentation fault.
0x000055555555487e in main ()
(gdb)
Initially I thought it'd be a problem related to dynamic memory allocation, but I do not know the exact cause. I've checked the code multiple times to see if there are any naive mistakes, but I couldn't.
Please help me out here! Thanks in advance.
The line
int **DisMat = (int **)malloc(n * n * sizeof(int)); //Dynamic allocation of Distance Matrix
is not valid. DisMat is an array of pointers to an array of ints. So we need to allocate n pointers to ints first:
int **DisMat = malloc(n * sizeof(int*));
Then we need to n-times allocate array of n ints:
for(size_t i = 0; i < n; ++i) {
DisMat[i] = malloc(n * sizeof(int));
}
The same goes for Hop and NewDisMat.
Remember, that malloc does not check for multiplication overflow.
The following code runs fine:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
/* Date : 03/06/2018
*
* Algorithm
*
* 1. get number of nodes from user
* 2. dynamic alloc new matrix nxn
* 3. create distance vector matrix, if dist > 1000 consider inf
* _| A B C D E F
* A| 0 5 2 3 i i
* B| 5 0 4 5
* C|
* D|
* E|
* F|
* ---------------
*
* 4. create new routing matrix of nxn
* 5. create new minimizing array for the node
* 6. find minimum of the array, allocate new value
*
* copyleft
*/
#define inf 1000
int min_r(int*, int*, int);
void dvr(int**, int**, char**, int);
void dvtDisp(int**, int);
void dvtDispNew(int **, char**, int);
int main(){
int n; //No of nodes
int i,j; //Counters
printf("> enter the number of nodes in the network... ");
scanf("%d",&n);
int **DisMat = malloc(n * sizeof(*DisMat)); //Dynamic allocation of Distance Matrix
assert(DisMat != NULL);
for(size_t i = 0; i < n; ++i) {
DisMat[i] = malloc(n * sizeof(*DisMat[i]));
assert(DisMat[i] != NULL);
}
for(i=0; i<n; i++){ // x directional loop
printf("> distance vector table for node %c\n",i+65);
for(j=0; j<n; j++){ // y directional loop
printf("> distance from %c... ",j+65);
if(j==i) { DisMat[i][j] = 0; printf("0");}
else scanf("%d",&DisMat[i][j]);
printf("\n");
}// y directional loop
}// x directional loop
int **NewDisMat = malloc(n * sizeof(*NewDisMat)); //New Distance Matrix
assert(NewDisMat != NULL);
for(size_t i = 0; i < n; ++i) {
NewDisMat[i] = malloc(n * sizeof(*NewDisMat[i]));
assert(NewDisMat[i] != NULL);
}
char **Hop = malloc(n * sizeof(*Hop)); //New Hop Matrix
assert(Hop);
for(size_t i = 0; i < n; ++i) {
Hop[i] = malloc(n * sizeof(*Hop[i]));
assert(Hop[i] != NULL);
}
for(i=0; i<n; i++){
for(j=0; j<n; j++){
Hop[i][j] = '-'; //All Hops Nullified
}
}
dvr(DisMat, NewDisMat, Hop, n); //Distance Vector Routing
for(size_t i = 0; i < n; ++i) {
free(DisMat[i]);
}
free(DisMat);
for(size_t i = 0; i < n; ++i) {
free(NewDisMat[i]);
}
free(NewDisMat);
for(size_t i = 0; i < n; ++i) {
free(Hop[i]);
}
free(Hop);
return 0;
}//main
void dvr(int *dvt[], int *newdvt[], char *hopper[], int l){ //DVR function
int x=0, y=0, z=0, conCount;
int hopPoint;
int *mini = (int *)malloc((l-1) * sizeof(int));
int *mzer = (int *)malloc((l-1) * sizeof(int));
for(x=0; x<l; x++){ // x directional propagation
mini[0] = x;
z = 1; conCount=0;
do{
if((dvt[x][y] < inf) && (y != x)) {
mini[z] = y;
z++;
conCount++;
}
y++;
}while(y<l);
y = 0; z = 0;
for(y = 0; y<l; y++){
while(z<conCount){
mzer[z] = dvt[mini[z]][y];
z++;
}
newdvt[x][y] = min_r(mzer, &hopPoint, conCount);
hopper[x][y] = hopPoint + 65;
}// y directional propagation
}// x directional propagation
}//dvr
int min_r(int arr[], int *index, int len){
//Sequential minimum search
int min;
int ind = 0;
min = arr[ind];
for(ind = 0; ind<len; ind++){
if(arr[ind] < min){
min = arr[ind];
*index = ind;
}
}
return min;
}//min_r
void dvtDisp(int *dvt[], int size){
int x, y;
printf("_ |");
for(x = 0; x<size; x++){
printf("\t%c",65 + x);
}
printf("\n");
for(y = 0; y<size; y++){
printf("%c |",y + 65);
for(x = 0; x < size; x++)
printf("\t%d",dvt[x][y]);
}
}
void dvtDispNew(int *dvt[], char *hopto[], int size){
int x, y;
printf("_ |");
for(x = 0; x<size; x++){
printf("\t%c\thop",65 + x);
}
printf("\n");
for(y = 0; y<size; y++){
printf("%c |",y + 65);
for(x = 0; x < size; x++)
printf("\t%d\t%c",dvt[x][y],hopto[x][y]);
}
}
Side note: Remember that sizeof(int*) == sizeof(*DisMat), so I prefer:
int **DisMat = malloc(n * sizeof(*DisMat));
By using that expression type *variable = malloc(n * sizeof(*variable)) I can remember, that I am allocating the correct type, an array of pointers to ints in case of DisMat, cause typeof(*DisMat) == int*, and make less errors.
I have this code and I want to partition a table inp[2560] into 4 parts and for each part I want to calculate this:
MI = calcul__min(inp,640);
MA = calcul__max(inp,640);
MOY = calcul__moy(inp,640);
ectt = calcul__ect(inp,640);
I don't know how to use the for-loop for this.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define ORDER 65
#define NP 2560
float inp[2560];
float ED1, ED2, ED3, ED4, Ap4;
float d1, d2, d3, d4, a4, total;
int i;
double calcul__max(float tab[], int N)
{
double max;
int i;
for (i = 0; i < N; i++)
{
if(tab[i]>max)
max=tab[i];
}
return max;
}
double calcul__min(float tab[], int N)
{
double min;
int i;
for (i = 0; i < N; i++)
{
if(tab[i]<min)
min=tab[i];
}
return min;
}
double calcul__moy(float tab[],int N)
{
double moyenne,somme;
int i;
for (i = 0; i < N; i++)
{
somme = somme + tab[i];
moyenne = somme /640;
}
return moyenne;
}
float calcul__ect(float tab[], int N)
{
double moyenne, TM, som, ec, ect;
moyenne = calcul__moy(inp,640);
int i;
for (i = 0; i < N; i++)
{
TM = tab[i] - moyenne;
TM *= TM;
som += TM;
}
ec = som / 639;
ect = sqrt(ec);
return ect;
}
struct Calculstat
{
float Ea;
float amplitudemin;
float ecarttype;
float Ed2;
float amplitudemax;
};
filter(int ord, float *a, float *b, int np, float *x, float *y) {
int i, j;
y[0] = b[0] * x[0];
for (i = 1; i < ord + 1; i++) {
y[i] = 0.0;
for (j = 0; j < i + 1; j++)
y[i] = y[i] + b[j] * x[i-j];
for (j = 0; j < i; j++)
y[i] = y[i] - a[j+1] * y[i-j-1];
}
for (i = ord + 1; i < np + 1; i++) {
y[i] = 0.0;
for (j = 0; j < ord + 1; j++)
y[i] = y[i] + b[j] * x[i-j];
for (j = 0; j < ord; j++)
y[i] = y[i] - a[j+1] * y[i-j-1];
}
}
main()
{
float x[NP]= { -84.786,...};
float y[NP], a[ORDER+1], b[ORDER+1];
int i, j;
b[0] = -0.005574892;
// [...]
b[65] = -0.005574892;
a[0] = 0;
// [...]
a[65] = 0;
filter(ORDER, a, b, NP, x, y);
for (i = 0; i < NP; i++)
{
x[i]=y[NP-i-1];
}
filter(ORDER,a,b,NP,x,y);
for (i=0;i<NP;i++)
{
x[i] = y[NP-i-1];
}
for (i = 0; i < NP; i++)
{
y[i] = x[i];
}
for (i = 0; i < NP; i++)
{
//printf("%f\n",y[i]);
inp[i]=y[i];
}
double MA,MI,MOY;
float ectt;
MI = calcul__min(inp,640);
MA = calcul__max(inp,640);
MOY = calcul__moy(inp,640);
ectt = calcul__ect(inp,640);
printf("Le min de tableau est ""%f\n",MI);
printf("Le max de tableau est ""%f\n",MA);
printf ("la moyenne est de ""%g\n", MOY);
printf ("ecart type est ""%g\n", ectt);
}
As we know, arrays in C are passed as pointer to first byte.
And also as we know we can apply pointer arithmetics on pointers in C (except void).
Here is the example
#include <stdio.h>
void foo(float *f)
{
// Some stuff
}
int main()
{
float inp[2560];
foo(inp);
foo(inp+(640));
foo(inp+(2*640));
foo(inp+(3*640));
}
inp+X skips X floats in array. Dont explicitly type X * sizeof(float); it would be wrong.
i have this code and i want to partion a table inp[2560] into 4 part
and for each part i want to calculate this :
Here is an inspiration:
int i = 0;
while (i < 4)
{
whatever = calcul__min(inp+(i * 640),640);
}
First of all, my Englishg level sucks, so sorry if something isn't well written...
I'm learning how to parallelize C code using OpenMP, the algorith I'm trying to parallelize is the shallow water equations algorithm, and although with a simple #pragma omp parallel for in the most critical loop I've gained nearly 40% more performance I know that my implementation is very poor and I'm not milking the cores as I should. The structure of the code is simple: a 'main' that allocates memory and initializes some matrixes and arrays and calls a function called solver that does all the work, where I putted the #pragma omp parallel for.
I was thinking that I could boost the performance using a parallel section where the memory is allocated and initialized so every thread has all the data, but when i run the program I don't have any boost, and since I'm a rookie with this I don't know if my thinking was bad or the bad thing was my implementation. I'll apreciate some help or a hint that could boost the performance of the algorithm. This is my homework and I don't want someone to do it for me, just a little help that can make me go forward...
I'll paste the code for better understanding:
MAIN FUNCTION (Allocations and initializations)
int main(int argc, char **argv) {
long int i, j, m, n, M, N;
char *ptr;
long int s;
int flag, verbose;
double *Q;
double *x, *y;
double **ffx, **nFx, **ffy, **nFy;
double dx, dt, epsi, delta, dy, tend, tmp, stime;
/* Default values to use: m volumes in the x-direction and n volumes in the y-direction
M = 1000;
N = 1000;
/* create file and verbose flags */
.......
.......
/* Parse command line options */
.......
.......
epsi = 2.0;
delta = 0.5;
dx = (xend - xstart) / (double) M;
dy = (yend - ystart) / (double) N;
dt = dx / sqrt( 9.81 * 5.0);
tend = 0.1;
/* Add two ghost volumes at each side of the domain */
m = M + 2;
n = N + 2;
/* Allocate memory for the domain */
/*HERE IS WHRE I PUT THE PRAGMA FOR PARALLEL INITIALIZATION AND ALLOCATIONS*/
#pragma omp parallel
{
Q = (double *) malloc(m * n * cell_size * sizeof(double));
x = (double *) malloc(m * sizeof(double));
y = (double *) malloc(n * sizeof(double));
/* Allocate memory for fluxes */
ffx = (double **) malloc(cell_size * sizeof(double *));
ffy = (double **) malloc(cell_size * sizeof(double *));
nFx = (double **) malloc(cell_size * sizeof(double *));
nFy = (double **) malloc(cell_size * sizeof(double *));
ffx[0] = (double *) malloc(cell_size * m * sizeof(double));
nFx[0] = (double *) malloc(cell_size * m * sizeof(double));
ffy[0] = (double *) malloc(cell_size * n * sizeof(double));
nFy[0] = (double *) malloc(cell_size * n * sizeof(double));
for (i = 0; i < cell_size; i++) {
ffx[i] = ffx[0] + i * m;
nFx[i] = nFx[0] + i * m;
ffy[i] = ffy[0] + i * n;
nFy[i] = nFy[0] + i * n;
}
for (i = 0,tmp= -dx/2 + xstart; i < m; i++, tmp += dx)
x[i] = tmp;
for (i = 0,tmp= -dy/2 + ystart; i < n; i++, tmp += dy)
y[i] = tmp;
/* Set initial Gauss hump */
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
Q(0, i, j) = 4.0;
Q(1, i, j) = 0.0;
Q(2, i, j) = 0.0;
}
}
for (i = 1; i < m-1; i++) {
for (j = 1; j < n-1; j++) {
Q(0, i, j) = 4.0 + epsi * exp(-(pow(x[i] - xend / 4.0, 2) + pow(y[j] - yend / 4.0, 2)) /
(pow(delta, 2)));
}
}
}
// Record start time
stime = gettime();
/*THIS IS THE FUNCTION WHERE THE 'WORK' IS DONE*/
solver(Q, ffx, ffy, nFx, nFy, m, n, tend, dx, dy, dt);`
}
SOLVER FUNCTION (Critical Section)
/*
This is the main solver routine.
*/
void solver(double *Q, double **ffx, double **ffy, double **nFx, double **nFy,
int m, int n, double tend, double dx, double dy, double dt) {
double bc_mask[3] = {1.0, -1.0, -1.0};
double time;
int i, j, k, steps;
steps = ceil(tend / dt);
for (i = 0, time = 0.0; i < steps; i++, time += dt) {
/* Apply boundary condition */
#pragma omp parallel for private(j) num_threads (NTHR)
for (k = 0; k < cell_size; k++)
{
for (j = 1; j < n - 1 ; j++)
{
Q(k, 0, j) = bc_mask[k] * Q(k, 1, j);
Q(k, m-1, j) = bc_mask[k] * Q(k, m-2, j);
}
}
#pragma omp parallel for private(j) num_threads (NTHR)
for (k = 0; k < cell_size; k++)
{
for (j = 0; j < m; j++)
{
Q(k, j, 0) = bc_mask[k] * Q(k, j, 1);
Q(k, j, n-1) = bc_mask[k] * Q(k, j, n-2);
}
}
/* Update all volumes with the Lax-Friedrich's scheme */
laxf_scheme_2d(Q, ffx, ffy, nFx, nFy, m, n, dx, dy, dt);
}
}
/*
This is the Lax-Friedrich's scheme for updating volumes
*/
void laxf_scheme_2d(double *Q, double **ffx, double **ffy, double **nFx, double **nFy,
int m, int n, double dx, double dy, double dt) {
int i, j, k;
/* Calculate and update fluxes in the x-direction */
#pragma omp parallel for private(k,j) num_threads (NTHR)
for (i = 1; i < n; i++) {
fx(Q, ffx, m, n, i);
for (k = 0; k < cell_size; k++)
for (j = 1; j < m; j++)
nFx[k][j] = 0.5 * ((ffx[k][j-1] + ffx[k][j]) - dx/dt * (Q(k, j, i) - Q(k, j-1, i)));
for (k = 0; k < cell_size; k++)
for (j = 1; j < m-1; j++)
Q(k, j, i) = Q(k, j, i) - dt/dx * ((nFx[k][j+1] - nFx[k][j]));
}
/* Calculate and update fluxes in the y-direction */
#pragma omp parallel for private(k,j) num_threads (NTHR)
for (i = 1; i < m; i++) {
fy(Q, ffy, m, n, i);
for (k = 0; k < cell_size; k++)
for (j = 1; j < n; j++)
nFy[k][j] = 0.5 * ((ffy[k][j-1] + ffy[k][j]) - dy/dt * (Q(k, i, j) - Q(k, i, j -1)));
for (k = 0; k < cell_size; k++)
for (j = 1; j < n-1; j++)
Q(k,i,j) = Q(k,i,j) - dt/dy * ((nFy[k][j+1] - nFy[k][j]));
}
}
As I understand there is no data dependency in the loops of the solver function and it's sub-functions, and since putting a parallel region in the allocation and data initialization did nothing, I don't know how to continue.
Thanks in advance!
There are multiple problems with your code. First of all, you have data races there, since you write to shared variables, such as Q, x, and y, by all threads. Either do the allocations outside of a parallel region or perform them by a single thread only (#pragma omp master or #pragma omp single).
Then, you don't parallelize the for loops in the initialization section. In fact, all these loops are executed by all threads within whole ranges (again with data races and likely a lot of cache contention). You should add #pragma omp parallel to these loops. For nested loops, the collapse directive might be useful.
Also, be sure that there are no data races in solver() and laxf_scheme_2d() functions. Seemingly, the most time of the calculation is spend within laxf_scheme_2d(), however, this function is not at all run in parallel. Does it use OpenMP internally?
Thank you for the answers. I've seen many problems in my implementation, first of all the heaviest function where all the job is done is laxf_scheme_2d.
About the Q variable i have this #define Q(i, j, k) Q[((k) + n * ((j) + m * (i)))]
This is laxf_scheme_2d
void laxf_scheme_2d(double *Q, double **ffx, double **ffy, double **nFx, double **nFy,
int m, int n, double dx, double dy, double dt) {
int i, j, k;
/* Calculate and update fluxes in the x-direction */
#pragma omp for
for (i = 1; i < n; i++) {
fx(Q, ffx, m, n, i);
for (j = 1; j < m; j++)
for (k = 0; k < cell_size; k++)
nFx[k][j] = 0.5 * ((ffx[k][j-1] + ffx[k][j]) -
dx/dt * (Q(k, j, i) - Q(k, j-1, i)));
for (j = 1; j < m-1; j++)
for (k = 0; k < cell_size; k++)
Q(k, j, i) = Q(k, j, i) - dt/dx * ((nFx[k][j+1] - nFx[k][j]));
}
/* Calculate and update fluxes in the y-direction */
#pragma omp for
for (i = 1; i < m; i++) {
fy(Q, ffy, m, n, i);
for (j = 1; j < n; j++)
for (k = 0; k < cell_size; k++)
nFy[k][j] = 0.5 * ((ffy[k][j-1] + ffy[k][j]) -
dy/dt * (Q(k, i, j) - Q(k, i, j -1)));
for (j = 1; j < n-1; j++)
for (k = 0; k < cell_size; k++)
Q(k,i,j) = Q(k,i,j) - dt/dy * ((nFy[k][j+1] - nFy[k][j]));
}
}
Functions fx and fy are very simple and with no data dependencies. I can't put de #pragma omp parallel for above the first for loop because there are data races but for now I can't see how to change this code to overcome them.
long int i, j, m, n, M, N;
char *ptr;
long int s;
int flag, verbose;
double *Q;
double *x, *y;
double **ffx, **nFx, **ffy, **nFy;
double dx, dt, epsi, delta, dy, tend, tmp, stime;
M = 1000;
N = 1000;
/* Add two ghost volumes at each side of the domain */
m = M + 2;
n = N + 2;
/* Allocate memory for the domain */
Q = (double *) malloc(m * n * cell_size * sizeof(double));
x = (double *) malloc(m * sizeof(double));
y = (double *) malloc(n * sizeof(double));
/* Allocate memory for fluxes */
ffx = (double **) malloc(cell_size * sizeof(double *));
ffy = (double **) malloc(cell_size * sizeof(double *));
nFx = (double **) malloc(cell_size * sizeof(double *));
nFy = (double **) malloc(cell_size * sizeof(double *));
ffx[0] = (double *) malloc(cell_size * m * sizeof(double));
nFx[0] = (double *) malloc(cell_size * m * sizeof(double));
ffy[0] = (double *) malloc(cell_size * n * sizeof(double));
nFy[0] = (double *) malloc(cell_size * n * sizeof(double));