Implementation of OpenMP

Implementation of OpenMP - c

void collision_f()
{
int x;
long double feq[Q], feq_R[Q], feq_B[Q], feq_force[Q];
long double mR[Q], mB[Q], meq_R[Q], meq_B[Q];
long double col_R[Q], col_B[Q];
forces_f();
#pragma omp parallel
{
#pragma omp for
for (x =0; x <NX; x++)
{
for (int y =0; y <NY; y++)
{
if ( (bnode[x][y] ==0) || (bnode[x][y] ==1) || (bnode[x][y] ==2) )
{
long double uxeq = ux_f[x][y] + Force_x_f[x][y]/rho_f[x][y];
long double uyeq = uy_f[x][y] + Force_y_f[x][y]/rho_f[x][y];
for (int i =0; i <Q; i++)
{
feq[i] = feq_R[i] = feq_B[i] = feq_force[i] = 0.0;
//equilibrium distribution
long double udotc_f = ux_f[x][y]*cx[i] + uy_f[x][y]*cy[i];
long double u2_f = pow(ux_f[x][y], 2) + pow(uy_f[x][y], 2);
feq[i] = wt[i]*rho_f[x][y]*(1.0 + 3.0*udotc_f + 4.5*pow(udotc_f, 2) - 1.5*u2_f);
feq_R[i] = wt[i]*rho_R_f[x][y]*(1.0 + 3.0*udotc_f + 4.5*pow(udotc_f, 2) - 1.5*u2_f);
feq_B[i] = wt[i]*rho_B_f[x][y]*(1.0 + 3.0*udotc_f + 4.5*pow(udotc_f, 2) - 1.5*u2_f);
long double udotc_force = uxeq*cx[i] + uyeq*cy[i];
long double u2_force = pow(uxeq, 2) + pow(uyeq, 2);
feq_force[i] = wt[i]*rho_f[x][y]*(1.0 + 3.0*udotc_force + 4.5*pow(udotc_force, 2) - 1.5*u2_force);
//printf("%d\t%d\t%d\t%Lf\t%Lf\t%Lf\t%Lf\n", x, y, i, feq[i], feq_R[i], feq_B[i], feq_force[i]);
}
//Calculating moments and meq
for (int i =0; i <Q; i++)
{
meq_R[i] = meq_B[i] = mR[i] = mB[i] = 0.0;
for (int j =0; j <Q; j++)
{
mR[i] += M[i][j]*r1[x][y][j];
meq_R[i] += M[i][j]*feq_R[j];
mB[i] += M[i][j]*b1[x][y][j];
meq_B[i] += M[i][j]*feq_B[j];
//printf("%d,%d\t%d,%d\t%Lf\t%Lf\t%Lf\t%Lf\n", x, y, i, j, mR[i], meq_R[i], mB[i], meq_B[i]);
}
//printf("%d\t%d\t%d\t%Lf\t%Lf\t%Lf\t%Lf\n", x, y, i, mR[i], meq_R[i], mB[i], meq_B[i]);
}
//Collision equation
for (int i =0; i <Q; i++)
{
col_R[i] = col_B[i] = 0.0;
for (int j =0; j <Q; j++)
{
col_R[i] += stmiv_f[x][y][i][j]*(mR[j] - meq_R[j]);
col_B[i] += stmiv_f[x][y][i][j]*(mB[j] - meq_B[j]);
}
long double force = feq_force[i] - feq[i];
r2[x][y][i] = r1[x][y][i] - col_R[i];
b2[x][y][i] = b1[x][y][i] - col_B[i];
f1[x][y][i] = r2[x][y][i] + b2[x][y][i] + force;
//Recoloring using d'Ortona's segregation method
r2[x][y][i] = (rho_R_f[x][y]/rho_f[x][y]) * (f1[x][y][i] + BETA_LKR*wt[i]*(rho_f[x][y] - rho_R_f[x][y])*(n_x[x][y]*cx[i] + n_y[x][y]*cy[i]));
b2[x][y][i] = f1[x][y][i] - r2[x][y][i];
}
if (rho_R_f[x][y] <=EVAP_LIM*rho_r_f)
{
for (int i =0; i <Q; i++)
{
b2[x][y][i] = f1[x][y][i];
r2[x][y][i] = 0.0;
}
}
if (rho_B_f[x][y] <=EVAP_LIM*rho_r_f)
{
for (int i =0; i <Q; i++)
{
r2[x][y][i] = f1[x][y][i];
b2[x][y][i] = 0.0;
}
}
}
}
}
}
return;
}
I am trying to implement OpenMP in this function. But I am getting nan for feq_R[Q] and -nan for meq_R[Q], meq_B[Q] only for a few combination of x, y. Also for each run, the values of x, y are different where I get -nan kind of solution. I have also checked that rho_f[x][y] is not zero for those x, y. I have also tried omp for reduction for meq_R[Q], meq_B[Q] only to be unsuccessful. FYI, the serial code runs without any problem. Any help is greatly appreciated.

Related

Simple C example of add/sub/mul/div operations in double-precision floating-points using a single-precision Floating-point system

I am working on an algorithm which requires calculations in large numbers, upto e+30. I am using a 32 bit system with compiler support of 32 bits for long/float/double. So far, by searching online, I've learned that single-precision floating points (FPs) can be used for Double-precision FPs.
From this question asked by someone earlier (Emulate “double” using 2 “float”s) I found this paper which has the algorithm to work with Double-precision FPs in GPUs. It is too confusing for me to implement in C. I just need four basic mathematical operations. Is there any way I could find an example for this which will help me understand it better?
Thanks in advance.
Here is the Code I am working on. It might have errors i can not see, any suggestions would be appreciated to rectify error but that is preety much what I am trying to implement. In the algorithm, POLYNOMIAL_ORDER should be able to go up to forth order (can settle at Third order if the standard deviation is smaller). Few things I am not sure about are 1) Procedures make_float() and make_float() are correct or not, 2) Use of make_float() in the program.
#define POLYNOMIAL_ORDER (3)
#define TC_TABLE_SIZE (14)
typedef struct vector_float2{
float x;
float y;
}float2;
typedef struct
{
float tc0;
float tc1;
float tc2;
float tc3;
}POLYNOMIALS;
typedef struct {
int16_t Temp;
int16_t Comp;
} TempCompPair;
volatile TempCompPair TCtable[TC_TABLE_SIZE] = {{22452,1651},
{25318,1444},
{28268,1133},
{31120,822},
{34027,511},
{36932,185},
{39770,-81},
{42685,-288},
{45531,-407},
{48425,-632},
{51401,-703},
{54460,-1143},
{57202,-1420},
{60027,-1652}};
POLYNOMIALS polynomials;
float matrix[TC_TABLE_SIZE][TC_TABLE_SIZE] = {0};
float average[TC_TABLE_SIZE] = {0};
float make_float(float x, float y)
{
return x+y;
}
float2 make_float2(float a, float b)
{
float2 f2 = {a,b};
return f2;
}
float2 quickTwoSum(float a, float b)
{
float s = a+b;
float e = b - (s - a);
float2 result = {s, e};
return result;
}
float2 twoSum(float a, float b)
{
volatile float s = a + b;
float v = s - a;
float e = (a - (s - v)) + (b - v);
float2 result = {s , e};
return result;
}
float2 df64_add(float2 a, float2 b)
{
float2 s,t;
s = twoSum(a.x, b.x);
t = twoSum(a.y, b.y);
s.y += t.x;
s = quickTwoSum(s.x, s.y);
s.y += t.y;
s = quickTwoSum(s.x, s.y);
return s;
}
float2 split(float a)
{
const float split = 4097; //(1<<12) + 1
float t = a *split;
float a_hi = t - (t - a);
float a_lo = a - a_hi;
float2 result = {a_hi, a_lo};
return result;
}
float2 twoProd(float a, float b)
{
float p = a*b;
float2 aS = split(a);
float2 bS = split(b);
float err = ((aS.x * bS.x - p)
+ aS.x * bS.y + aS.y * bS.x)
+ aS.y * bS.y;
float2 result = {p, err};
return result;
}
float2 df64_mult(float2 a, float2 b)
{
float2 p;
p = twoProd(a.x,b.x);
p.y += a.x * b.y;
p.y += a.y * b.x;
p = quickTwoSum(p.x,p.y);
return p;
}
float2 calculate_power(float base, int pow)
{
int i = 0;
float2 base_f2 = make_float2(base,0);
float2 result_f2 = {1,0};
if(pow == 0)
{
return result_f2;
}
if(pow > 0)
{
if(pow == 1)
{
return base_f2;
}
else
{
for(i = 0; i < pow; i++)
{
result_f2 = df64_mult(result_f2,base_f2);
}
return result_f2;
}
}
else
{
return result_f2;
//Mechanism for negative powers
}
}
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float2 sum = {0,0};
float2 result0 = {0,0};
float2 result1 = {0,0};
float x[TC_TABLE_SIZE];
float y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (float) TCtable[i].Temp;
y[i] = (float) TCtable[i].Comp;
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum.x = 0;
sum.y = 0;
for(k = 0; k < size; k++)
{
// Expression simplified below: **sum += pow(x[k],i+j)**
result0 = calculate_power(x[k], i+j);
sum = df64_add(result0,sum);
}
matrix[i][j] = make_float(sum.x,sum.y);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum.x = 0;
sum.y = 0;
for(j = 0; j < size; j++)
{
// Expression simplified below: **sum += y[j] * pow(x[j],i)**
result0 = calculate_power(x[j], i);
result1 = df64_mult( result0 , make_float2(y[j],0) );
sum = df64_add(result1,sum);
}
average[i] = make_float(sum.x,sum.y);
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
temp = matrix[j][i]/matrix[i][i];
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
}
average[j] -= temp*average[i];
}
}
}
if(matrix[0][0] != 0)
{
polynomials.tc0 = average[0]/matrix[0][0];
}
if(matrix[1][1] != 0)
{
polynomials.tc1 = average[1]/matrix[1][1];
}
if(matrix[2][2] != 0)
{
polynomials.tc2 = average[2]/matrix[2][2];
}
if(matrix[3][3] != 0)
{
polynomials.tc3 = average[3]/matrix[3][3];
}
}
and then use the struct polynomials.tc0/1/2/3 in below expression
// Y = T^3 * X3 + T^2 * X2 + T^1 * X1 + X0 ;
double calculate_equation(uint16_t TEMP)
{
double Y;
if(POLYNOMIAL_ORDER == 1)
{
Y = polynomials.tc1*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 2)
{
Y = (polynomials.tc2 * (double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 3)
{
Y = ((polynomials.tc3 * (double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 4)
{
Y = (((polynomials.tc4 * (double)TEMP + polynomials.tc3)*(double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
return Y;
}
And standard Deviation is calculated is as follows:
//sqrt(sigma(error^2))
for(i = 0; i < TC_TABLE_SIZE; i++)
{
actual_comp[i] =(int) calculate_equation(TCtable[i].Temp);
error[i] = TCtable[i].Comp - actual_comp[i] ;
error_sqr += error[i]*error[i];
printf("%u\t%d\t\t%e\n", TCtable[i].Temp, TCtable[i].Comp, actual_comp[i] );
}
error_sqrt = sqrt(error_sqr);
Reference:
http://hal.archives-ouvertes.fr/docs/00/06/33/56/PDF/float-float.pdf Guillaume Da Graça, David Defour Implementation of float-float operators on graphics hardware, 7th conference on Real Numbers and Computers, RNC7.

I was able to implement this code without using double precision as the calculations were in the range of Float.
Here's my implementation, let me know if I can optimize it better.
typedef struct
{ int64_t tc0;
int64_t tc1;
int64_t tc2;
int64_t tc3;
int64_t tc4;
}POLYNOMIALS;
POLYNOMIALS polynomials = {0,0,0,0,0};
int16_t TempCompIndex;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
float matrix[POLYNOMIAL_ORDER+1][POLYNOMIAL_ORDER+1] = {0};
float average[POLYNOMIAL_ORDER+1] = {0};
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float sum = 0;
float powr = 0;
float prod;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (int64_t) TCtable[i].Temp;
y[i] = (int64_t) TCtable[i].Comp<<PRECISION;
printf("x: %lld, y:%lld\n",x[i],y[i]);
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum = 0;
powr = 0;
for(k = 0; k < size; k++)
{
//printf("x[%d]: %ld, i: %d ,j: %d ", k, x[k],i,j);
powr = pow(x[k],i+j);
//printf("Power: %f, sum: %f\n ",powr,sum);
sum += powr;
//printf("%f\r\n",powr);
//printf("sum: %lf\n",sum );
}
matrix[i][j] = sum;
printf("sum: %g\n",sum);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum = 0;
powr = 0;
for(j = 0; j < size; j++)
{
//sum += y[j] * pow(x[j],i)
//printf("sum: %lf, y[%d]: %lf, x[%d]: %lf^%d ",sum,j,y[j], i, x[j],i);
//printf("x[%d]:%lld ^ %d\t",j,x[j],i);
powr = (float) pow(x[j],i);
printf("powr: %f\t",powr);
prod = (float) y[j] * powr;
printf("prod:%f \t %lld \t", prod,y[j]);
sum += (float) prod;
printf("sum: %f \n",sum);
}
average[i] = sum;
//printf("#Avg: %f\n",average[i]);
}
printf("\n\n");
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
//printf("matrix%d%d: %g / matrix%d%d: %g =\t ",j,i,matrix[j][i],i,i,matrix[i][i]);
temp = matrix[j][i]/matrix[i][i];
//printf("Temp: %g\n",temp);
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
//printf("matrix[%d][%d]:%g, %g, matrix[%d][%d]:%g\n",j,k,matrix[j][k], temp,i,k,matrix[i][k]);
}
//printf("\n\n");
//print_matrix();
printf("\n\n");
//printf("avg%d: %g\ttemp: %g\tavg%d: %g\n\n",j,average[j],temp,i,average[i]);
average[j] -= temp*average[i];
printf("#Avg%d:%g\n",j,average[j]);
//print_average();
}
}
}
print_matrix();
print_average();
/* Calculate polynomial Coefficients (n+1) based on the POLYNOMIAL_ORDER (n) */
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
if(matrix[0][0] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
}
#elif POLYNOMIAL_ORDER == 1
if(matrix[1][1] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
}
#elif POLYNOMIAL_ORDER == 2
if(matrix[2][2] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
}
#elif POLYNOMIAL_ORDER == 3
if(matrix[3][3] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
}
#elif POLYNOMIAL_ORDER == 4
if(matrix[4][4] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
polynomials.tc4 = (int64_t) (average[4]/matrix[4][4]);
}
#endif
}
int16_t calculate_equation(uint16_t TEMP)
{
int64_t Y = 0;
int16_t TempComp = 0;
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
Y = polynomials.tc0;
#elif POLYNOMIAL_ORDER == 1
Y = polynomials.tc1* ((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 2
Y = (polynomials.tc2 * ((int64_t)TEMP) + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 3
Y = ((polynomials.tc3 * ((int64_t)TEMP) + polynomials.tc2)*((int64_t)TEMP) + polynomials.tc1)*((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 4
Y = (((polynomials.tc4 * (int64_t)TEMP + polynomials.tc3)*(int64_t)TEMP + polynomials.tc2)*(int64_t)TEMP + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#endif
TempComp = (int16_t) (Y>>PRECISION_BITS);
return TempComp;
}
void main(){
int16_t TempComp = 0;
TempCompValue = (int16_t) calculate_equation(Mon_Temp);
}
Note: Calculate_Equation() is being called once a second and it is required to not use float in order to avoid floating point arithmetic, hence I am using non-float variables in that function.
It is working right for me and haven't discovered any bug after initial testing.
Thanks every one for taking interest in my post, if not the answer, got to learn some new techniques. And thanks #chux.

how to produce -400,-200,-400 in sequential order

I am trying to write a for loop in the second version which produce the same result in the original code but i am not sure how to get -400,-200,-400 in sequential order.
original code:
p->m_p[0] = randFloat(-400.0f, 400.0);
p->m_p[1] = randFloat(-200.0f, 200.0);
p->m_p[2] = randFloat(-400.0f, 400.0);
second version:
float x = -800;
float y = 800;
for(int i = 0; i < 4; i++)
{
plNew->m_fPosition[i] = randFloat(x / 2,y / 2);
}

If you need it to work in C (or in C++ before C++11), this would work:
#define NUMBER_OF_VALUES 3
float bounds[NUMBER_OF_VALUES] = { 400.0f, 200.0f, 400.0f };
for (int i = 0; i < NUMBER_OF_VALUES; i++)
{
plNew->m_fPosition[i] = randFloat(-bounds[i], bounds[i]);
}
You can extend this to make NUMBER_OF_VALUES be 4 or a larger number as long as you initialize all the members of bounds[NUMBER_OF_VALUES] with the desired constants.
A nice feature of this is that the sequence of constants can be anything you like,
not limited to alternating 400, 200, 400 or any other regular sequence.

Something like this?
for (int i = 0; i < 4; i++) {
float x, y;
if (i % 2) {
x = -400.0f;
y = 400.0f;
} else {
x = -200.0f;
y = 200.0f;
}
p->m_p[i] = randFloat(x, y);
}

What about?
float x = -800;
float y = 800;
for(int i = 0; i < 4; i++)
{
float z = 2.0 * ((float)((i + 1) % 2 + 1));
plNew->m_fPosition[i] = randFloat(x / z, y / z);
}

I would suggest keeping it simple, using something like the following :
const float arr3[] = {-400.0f, -200.0f, -400.0f};
for(int i = 0; i < 3; i++)
{
plNew->m_fPosition[i] = arr3[i];
}

float x = -400;
float y = 400;
for(int i = 0; i < 3; i++)
{
plNew->m_fPosition[i] = randFloat(x / (1 + (i & 1)), y / (1 + (i & 1)));
}

High Pass Filter using FFTW in C

I have a question regarding FFT. I already manage to do FFT forward and backward using FFTW in C. Now, I want to apply high pass filter for edge detection, some of my source said that just zeroing the centre of the magnitude.
This is my input image
http://i62.tinypic.com/2wnxvfl.jpg
Basically what I do are :
Forward FFT
Convert the output to 2D array
Do forward FFT shifting
Make the real and imag value to 0 when the distance from the centre is 25% of the height
Generate the magnitude
Do backward FFT shifting
Convert into 1D array
Do Backward FFT.
This is the original magnitude, the processed magnitude, and the result
http://i58.tinypic.com/aysx9s.png
can someone help me, to tell me which part is wrong and how to do the high pass filtering using FFTW in C.
Thank You.
The Source Code:
unsigned char **FFT2(int width,int height, unsigned char **pixel, char line1[100],char line2[100], char line3[100],char filename[100])
{
fftw_complex* in, * dft, * idft, * dft2;
//fftw_complex tmp1,tmp2;
fftw_plan plan_f,plan_i;
int i,j,k,w,h,N,w2,h2;
w = width;
h = height;
N = w*h;
unsigned char **pixel_out;
pixel_out = malloc(h*sizeof(unsigned char*));
for(i = 0 ; i<h;i++)
pixel_out[i]=malloc(w*sizeof(unsigned char));
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *N);
dft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *N);
dft2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *N);
idft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *N);
/*run forward FFT*/
plan_f = fftw_plan_dft_2d(w,h,in,dft,FFTW_FORWARD,FFTW_ESTIMATE);
for(i = 0,k = 0 ; i < h ; i++)
{
for(j = 0 ; j < w ; j++,k++)
{
in[k][0] = pixel[i][j];
in[k][1] = 0.0;
}
}
fftw_execute(plan_f);
double maxReal = 0.0;
for(i = 0 ; i < N ; i++)
maxReal = dft[i][0] > maxReal ? dft[i][0] : maxReal;
printf("MAX REAL : %f\n",maxReal);
/*fftshift*/
//convert to 2d
double ***temp1;
temp1 = malloc(h * sizeof (double**));
for (i = 0;i < h; i++){
temp1[i] = malloc(w*sizeof (double*));
for (j = 0; j < w; j++){
temp1[i][j] = malloc(2*sizeof(double));
}
}
double ***temp2;
temp2 = malloc(h * sizeof (double**));
for (i = 0;i < h; i++){
temp2[i] = malloc(w*sizeof (double*));
for (j = 0; j < w; j++){
temp2[i][j] = malloc(2*sizeof(double));
}
}
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
temp1[i][j][0] = dft[i*w+j][0];
temp1[i][j][1] = dft[i*w+j][1];
}
}
int m2 = h/2;
int n2 = w/2;
//forward shifting
for (i = 0; i < m2; i++)
{
for (k = 0; k < n2; k++)
{
double tmp13[2] = {temp1[i][k][0],temp1[i][k][1]};
temp1[i][k][0] = temp1[i+m2][k+n2][0];
temp1[i][k][1] = temp1[i+m2][k+n2][1];
temp1[i+m2][k+n2][0] = tmp13[0];
temp1[i+m2][k+n2][1] = tmp13[1];
double tmp24[2] = {temp1[i+m2][k][0],temp1[i+m2][k][1]};
temp1[i+m2][k][0] = temp1[i][k+n2][0];
temp1[i+m2][k][1] = temp1[i][k+n2][1];
temp1[i][k+n2][0] = tmp24[0];
temp1[i][k+n2][1] = tmp24[1];
}
}
//process
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
if(distance_to_center(i,j,m2,n2) < 0.25*h)
{
temp1[i][j][0] = (double)0.0;
temp1[i][j][1] = (double)0.0;
}
}
}
/* copy for magnitude */
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
temp2[i][j][0] = temp1[i][j][0];
temp2[i][j][1] = temp1[i][j][1];
}
}
//backward shifting
for (i = 0; i < m2; i++)
{
for (k = 0; k < n2; k++)
{
double tmp13[2] = {temp1[i][k][0],temp1[i][k][1]};
temp1[i][k][0] = temp1[i+m2][k+n2][0];
temp1[i][k][1] = temp1[i+m2][k+n2][1];
temp1[i+m2][k+n2][0] = tmp13[0];
temp1[i+m2][k+n2][1] = tmp13[1];
double tmp24[2] = {temp1[i+m2][k][0],temp1[i+m2][k][1]};
temp1[i+m2][k][0] = temp1[i][k+n2][0];
temp1[i+m2][k][1] = temp1[i][k+n2][1];
temp1[i][k+n2][0] = tmp24[0];
temp1[i][k+n2][1] = tmp24[1];
}
}
//convert back to 1d
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
dft[i*w+j][0] = temp1[i][j][0];
dft[i*w+j][1] = temp1[i][j][1];
dft2[i*w+j][0] = temp2[i][j][0];
dft2[i*w+j][1] = temp2[i][j][1];
}
}
/* magnitude */
double max = 0;
double min = 0;
double mag=0;
for (i = 0, k = 1; i < h; i++){
for (j = 0; j < w; j++, k++){
mag = sqrt(pow(dft2[i*w+j][0],2) + pow(dft2[i*w+j][1],2));
if (max < mag)
max = mag;
}
}
double **magTemp;
magTemp = malloc(h * sizeof (double*));
for (i = 0;i < h; i++){
magTemp[i] = malloc(w*sizeof (double));
}
for(i = 0,k = 0 ; i < h ; i++)
{
for(j = 0 ; j < w ; j++,k++)
{
double mag = sqrt(pow(dft2[i*w+j][0],2) + pow(dft2[i*w+j][1],2));
mag = 255*(mag/max);
//magTemp[i][j] = 255-mag; //Putih
magTemp[i][j] = mag; //Item
}
}
/* brightening magnitude*/
for(i = 0,k = 0 ; i < h ; i++)
{
for(j = 0 ; j < w ; j++,k++)
{
//double temp = magTemp[i][j];
double temp = (double)(255/(log(1+255)))*log(1+magTemp[i][j]);
pixel_out[i][j] = (unsigned char)temp;
}
}
generateImage(width,height,pixel_out,line1,line2,line3,filename,"magnitude");
/* backward fft */
plan_i = fftw_plan_dft_2d(w,h,dft,idft,FFTW_BACKWARD,FFTW_ESTIMATE);
fftw_execute(plan_i);
for(i = 0,k = 0 ; i < h ; i++)
{
for(j = 0 ; j < w ; j++,k++)
{
double temp = idft[i*w+j][0]/N;
pixel_out[i][j] = (unsigned char)temp; //+ pixel[i][j];
}
}
generateImage(width,height,pixel_out,line1,line2,line3,filename,"backward");
return pixel_out;
}
EDIT new source code
I add this part before the forward shifting, the result is as expected also.
//proses
//create filter
unsigned char **pixel_filter;
pixel_filter = malloc(h*sizeof(unsigned char*));
for(i = 0 ; i<h;i++)
pixel_filter[i]=malloc(w*sizeof(unsigned char));
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
if(distance_to_center(i,j,m2,n2) < 20)
{
pixel_filter[i][j] = 0;
}
else
{
pixel_filter[i][j] = 255;
}
}
}
generateImage(width,height,pixel_filter,line1,line2,line3,filename,"filter1");
for (i = 0; i < m2; i++)
{
for (k = 0; k < n2; k++)
{
unsigned char tmp13 = pixel_filter[i][k];
pixel_filter[i][k] = pixel_filter[i+m2][k+n2];
pixel_filter[i+m2][k+n2] = tmp13;
unsigned char tmp24 = pixel_filter[i+m2][k];
pixel_filter[i+m2][k] = pixel_filter[i][k+n2];
pixel_filter[i][k+n2] = tmp24;
}
}
generateImage(width,height,pixel_filter,line1,line2,line3,filename,"filter2");
for (i = 0;i < h; i++){
for (j = 0; j < w; j++){
temp1[i][j][0] *= pixel_filter[i][j];
temp1[i][j][1] *= pixel_filter[i][j];
}
}

Your general idea is OK. From the output, it's hard to tell whether there's simply an accounting problem in your program, or whether this is perhaps the expected result. Try padding the source image with much more empty space, and filter out a smaller area in the frequency domain.
As a side note, doing this in C appears incredibly painful. Here is an equivalent implementation in Matlab. Not including plotting, it's around 10 lines of code. You might also try Numerical Python (NumPy).
% Demonstrate frequency-domain image filtering in Matlab
% Define the grid
x = linspace(-1, 1, 1001);
y = x;
[X, Y] = meshgrid(x, y);
% Make a square (source image)
rect = (abs(X) < 0.1) & (abs(Y) < 0.1);
% Compute the transform
rect_hat = fft2(rect);
% Make the high-pass filter
R = sqrt(X.^2 + Y.^2);
filt = (R > 0.05);
% Apply the filter
rect_hat_filtered = rect_hat .* ifftshift(filt);
% Compute the inverse transform
rect_filtered = ifft2(rect_hat_filtered);
%% Plot everything
figure(1)
imagesc(rect);
title('source');
axis square
saveas(gcf, 'fig1.png');
figure(2)
imagesc(abs(fftshift(rect_hat)));
title('fft(source)');
axis square
saveas(gcf, 'fig2.png');
figure(3)
imagesc(filt);
title('filter (frequency domain)');
axis square
saveas(gcf, 'fig3.png');
figure(4)
imagesc(fftshift(abs(rect_hat_filtered)));
title('fft(source) .* filter');
axis square
saveas(gcf, 'fig4.png');
figure(5)
imagesc(abs(rect_filtered))
title('result');
axis square
saveas(gcf, 'fig5.png');
The source image:
Fourier transform of the source image:
The filter:
Result of applying (multiplying) the filter with the fourier transform of the source image:
Taking the inverse transform gives the final result:

Algorithm in C to calculate coefficients of polynomial using Lagrange interpolation

I've been stuck on this for a while now. I'm writing an algorithm in C to pull out the coefficients of a polynomial using Lagrange's interpolation method.
My code partially works, for instance if we do the first example here http://en.wikipedia.org/wiki/Lagrange_polynomial#Example_1 then the code can print out the first 2 coefficients (0 and 4.834848)
Similarly with example 3 on that article, it will print the 2 coefficients 6 and -11.
I need to be able to accurately get all the coefficients from any set of points. Please advise on the alterations required to the code.
Thanks in advance!
Updated with latest code, 7:57PM, GMT on August 5th. 9 coefficients now working, getting ugly looking. Will investigate iterative process for n degrees tomorrow!
#include<ncursesw/ncurses.h>
#include<math.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#define MAX 200
float coeff[MAX], coefftwo[MAX], coeffthree[MAX], coefffour[MAX];
int count;
void main()
{
int n,i,j ;
char ch;
float x[MAX],y[MAX],fp2, coeff1, coeff2;
printf("\n\nn = ");
scanf("%i", &count);
for(i=0; i < count; i++)
{
printf("\n\n The value of x%i= ", i);
scanf("%f",&x[i]);
printf("\n The value of f(x%i)= ", i);
scanf("%f",&y[i]);
}
for(i=0;i<count;i++)
{
coeff1 = 1.0;
coeff2 = 0.0;
coeff3 = 0.0;
coeff4 = 0.0;
coeff5 = 0.0;
coeff6 = 0.0;
coeff7 = 0.0;
coeff8 = 0.0;
coeff9 = 0.0;
for(j=0; j<count; j++)
{
if(i!=j) {
coeff1 = coeff1 * (array[i]-array[j]);
coeff2 -= array[j];
for (int k=j; k < count; k++) {
if ((j!=k) && (k!=i)) {
coeff3 += array[j] * array[k];
for(int l=k; l < count; l++) {
if ((l!=j) && (l!=k) && (l!=i)) {
coeff4 -= array[j] * array[k] * array[l];
for (int m = l; m < count; m++) {
if ((m!=l) && (m!=k) && (m!=j) && (m!=i)) { coeff5 += array[j] * array[k] * array[l] * array[m];
for (int n = m; n < count; n++) {
if ((n!=m) && (n!=l) && (n!=k) && (n!=j) && (n!=i)) {
coeff6 -= array[j] * array[k] * array[l] * array[m] * array[n];
for (int o = n; o < count; o++) {
if ((o!=n) && (o!=m) && (o!=l) && (o!=k) && (o!=j) && (o!=i)) {
coeff7 += array[j] * array[k] * array[l] * array[m] * array[n] * array[o];
for (int p = o; p < count; p++) {
if ((p!=o) && (p!=n) && (p!=m) && (p!=l) && (p!=k) && (p!=j) && (p!=i)) {
coeff8 -= array[j] * array[k] * array[l] *array[m] *array[n] * array[o] * array[p];
for (int q = p; q < count; q++) {
if ((q!=p) && (q!=o) && (q!=n) && (q!=m) && (q!=l) && (q!=k) && (q!=j) && (q!=i)) {
coeff9 += array[j] * array[k] * array[l] * array[m] * array[n] * array[o] * array[p] * array[q];
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
coeff[i] = y[i] / coeff1;
coefftwo[i] = y[i] * coeff2 / coeff1;
coeffthree[i] = y[i] * coeff3 / coeff1;
coefffour[i] = y[i] * coeff4 / coeff1;
coefffive[i] = y[i] * coeff5 / coeff1;
coeffsix[i] = y[i] * coeff6 / coeff1;
coeffseven[i] = y[i] * coeff7 / coeff1;
coeffeight[i] = y[i] * coeff8 / coeff1;
coeffnine[i] = y[i] * coeff9 / coeff1;
}
float coefficientone = 0.0;
float coefficienttwo = 0.0;
float coefficientthree = 0.0;
float coefficientfour = 0.0;
float coefficientfive = 0.0;
float coefficientsix = 0.0;
float coefficientseven = 0.0;
float coefficienteight = 0.0;
float coefficientnine = 0.0;
for (int i = 0; i< count; i++){
coefficientone = coefficientone + coeff[i];
coefficienttwo = coefficienttwo + coefftwo[i];
coefficientthree = coefficientthree + coeffthree[i];
coefficientfour = coefficientfour + coefffour[i];
coefficientfive = coefficientfive + coefffive[i];
coefficientsix = coefficientsix + coeffsix[i];
coefficientseven = coefficientseven + coeffseven[i];
coefficienteight = coefficienteight + coeffeight[i];
coefficientnine = coefficientnine + coeffnine[i];
}
printf("coefficient 1 = %f\n", coefficientone);
printf("coefficient 2 = %f\n", coefficienttwo);
printf("coefficient 3 = %f\n", coefficientthree);
printf("coefficient 4 = %f\n", coefficientfour);
printf("coefficient 5 = %f\n", coefficientfive);
printf("coefficient 6 = %f\n", coefficientsix);
printf("coefficient 7 = %f\n", coefficientseven);
printf("coefficient 8 = %f\n", coefficienteight);
printf("coefficient 9 = %f\n", coefficientnine);
}

Your algebra is simply wrong, and that fact is hidden by poorly chosen variable names.
When you calculate the contribution of the ith basis polynomial (never mind y for now) what variable represents the coefficient of the x2 term? It's coeff3. And you don't calculate it correctly.
Take a simpler case. Suppose you want to work out (x+a)(x+b)(x+c)(x+d). The first term is x4, easy. The next is (a+b+c+d)x3, not too bad. The next is (ab + ac + ad + bc + bd + cd)x2, and now it's clear that a single loop won't do the job. It's worth taking the time to make sure you can write code that handles the simple problem correctly, before you try the more complex one. You need something like this:
for(unsigned int j=0 ; j<count ; ++j)
{
...
coeff2 -= x[j];
for(unsigned int k=j ; k<count ; ++k)
{
if(j!=k && k!=i)
coeff3 += x[j] * x[k];
...
}
}
That should be enough to get you started.

Newton method implementation for finding initial values, with Dormand Prince to solve differential equations in C

The following code works like a charm to solve a system of differential equations in it(fcn function in the code), with correct initial values. However, the point of the task is to replace initial values y_1(0) and y_2(0) with some random values, and implement some iterative method to find the correct initial values to solve the equation. I already know how to check if the value is correct value, since by definition output of ddopri 5 should give y_2(1) and y_3(1) as 0. How do I implement Newton Raphson for this problem?
#include<stdio.h>
#include<math.h>
#include<stdbool.h>
double ddopri5(void fcn(double, double *, double *), double *y);
double alpha;
void fcn(double t, double *y, double *f);
double eps;
int main(void){
double y[4];
//eps = 1.e-9;
printf("Enter alpha:\n");
scanf("%lg", &alpha);
printf("Enter epsilon:\n");
scanf("%lg", &eps);
y[0]=1.0;//x1(0)
y[1]=-1.22565282791;//x2(0)
y[2]=-0.274772807644;//p1(0)
y[3]=0.0;//p2(0)
ddopri5(fcn, y);
}
void fcn(double t, double *y, double *f){
/* double h = 0.25;*/
f[0] = y[1];
f[1] = y[3] - sqrt(2)*y[0]*exp(-alpha*t);
f[2] = sqrt(2)*y[3]*exp(-alpha*t) + y[0];
f[3] = -y[2];
}
double ddopri5(void fcn(double, double *, double *), double *y){
double t, h, a, b, tw, chi;
double w[4], k1[4], k2[4], k3[4], k4[4], k5[4], k6[4], k7[4], err[4], dy[4];
int i;
double errabs;
int iteration;
iteration = 0;
//eps = 1.e-9;
h = 0.1;
a = 0.0;
b = 1;//3.1415926535;
t = a;
while(t < b -eps){
printf("%lg\n", eps);
fcn(t, y, k1);
tw = t+ (1.0/5.0)*h;
for(i = 0; i < 4; i++){
/*printf("k1[%i] = %.15lf \n", i, k1[i]);*/
w[i] = y[i] + h*(1.0/5.0)*k1[i];
}
fcn(tw, w, k2);
tw = t+ (3.0/10.0)*h;
for(i = 0; i < 4; i++){
/*printf("k2[%i] = %.15lf \n", i, k2[i]);*/
w[i] = y[i] + h*((3.0/40.0)*k1[i] + (9.0/40.0)*k2[i]);
}
fcn(tw, w, k3);
tw = t+ (4.0/5.0)*h;
for(i = 0; i < 4; i++){
/*printf("k3[%i] = %.15lf \n", i, k3[i]);*/
w[i] = y[i] + h*((44.0/45.0)*k1[i] - (56.0/15.0)*k2[i] + (32.0/9.0)*k3[i]);
}
fcn(tw, w, k4);
tw = t+ (8.0/9.0)*h;
for(i = 0; i < 4; i++){
/*printf("k4[%i] = %.15lf \n", i, k4[i]);*/
w[i] = y[i] + h*((19372.0/6561.0)*k1[i] - (25360.0/2187.0)*k2[i] + (64448.0/6561.0)*k3[i] - (212.0/729.0)*k4[i]);
}
fcn(tw, w, k5);
tw = t + h;
for(i = 0; i < 4; i++){
/*printf("k5[%i] = %.15lf \n", i, k5[i]);*/
w[i] = y[i] + h*((9017.0/3168.0)*k1[i] - (355.0/33.0)*k2[i] + (46732.0/5247.0)*k3[i] + (49.0/176.0)*k4[i] - (5103.0/18656.0)*k5[i]) ;
}
fcn(tw, w, k6);
tw = t + h;
for(i = 0; i < 4; i++){
/*printf("k6[%i] = %.15lf \n", i, k6[i]);*/
w[i] = y[i] + h*((35.0/384.0)*k1[i] + (500.0/1113.0)*k3[i] + (125.0/192.0)*k4[i] - (2187.0/6784.0)*k5[i] + (11.0/84.0)*k6[i]);
}
fcn(tw, w, k7);
errabs = 0;
for(i = 0; i < 4; i++){
/* printf("k7[%i] = %.15lf \n", i, k7[i]);*/
/* dy[i] = h*((71.0/57600.0)*k1[i] - (71.0/16695.0)*k3[i] + (71.0/1920.0)*k4[i] - (17253.0/339200.0)*k5[i] + (22.0/525.0)*k6[i]);*/
dy[i] = h*((35.0/384.0)*k1[i] + (500.0/1113.0)*k3[i] + (125.0/192.0)*k4[i] - (2187.0/6784.0)*k5[i] + (11.0/84.0)*k6[i]);
/*err[i] = h*((71.0/57600.0)*k1[i] + (71.0/16695.0)*k3[i] + (71.0/1920.0)*k4[i] - (17253.0/339200.0)*k5[i] + (22.0/525.0)*k6[i] - (1.0/40.0)*k7[i])*/;
err[i] = h*((71.0/57600.0)*k1[i] - (71.0/16695.0)*k3[i] + (71.0/1920.0)*k4[i] - (17253.0/339200.0)*k5[i] + (22.0/525.0)*k6[i] - (1.0/40.0)*k7[i]);
/*printf("err[%i] = %.15lf \n", i, err[i]);*/
errabs+=err[i]*err[i];
}
errabs = sqrt(errabs);
printf("errabs = %.15lf\n", errabs);
if( errabs < eps){
t+= h;
printf(" FROM IF \t t = %.25lf, \n h = %.25lf, \n errabs = %.25lf, \n iteration = %i . \n", t, h, errabs, iteration);
for(i = 0; i < 4; i++){
y[i]+=dy[i];
}
}
/*Avtomaticheskiy vibor shaga*/
chi=errabs/eps;
chi = pow(chi, (1.0/6.0));
if(chi > 10) chi = 10;
if(chi < 0.1) chi = 0.1;
h*= 0.95/chi;
if( t + h > b ) h = b - t;
/* for(i = 0; i < 4; i++){
printf("y[%i] = %.15lf \n", i, y[i]);
}*/
iteration++;
printf("t = %.25lf \t h = %.25lf\n", t, h);
/*if(iteration > 5) break;*/
printf("end \n");
for(i = 0; i < 4; i++){
printf("y[%i] = %.15lf \n", i, y[i]);
}
if(iteration > 30000) break;
}
/* for(i = 0; i < 4; i++){
printf("y[%i] = %.15lf\n", i, y[i]);
}*/
return 0;
}

Try this:
Y0=initial_guess
while (true) {
F=ddopri(Y0);
Error=F-F_correct
if (Error small enough)
break;
J=jacobian(ddopri, Y0) // this is the matrix dF/dY0
Y0=Y0-J^(-1)*Error // here you have to solve a linear system
The Jacobian can be obtained using finite differences, i.e. bump up and down the elements of Y one at a time, compute F, take finite differences.
To be clear, element (i,j) of matrix J is dF_i/dY0_j

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Implementation of OpenMP - c

Related

Simple C example of add/sub/mul/div operations in double-precision floating-points using a single-precision Floating-point system

how to produce -400,-200,-400 in sequential order

High Pass Filter using FFTW in C

Algorithm in C to calculate coefficients of polynomial using Lagrange interpolation

Newton method implementation for finding initial values, with Dormand Prince to solve differential equations in C

Categories

Resources