FFTW3 backward doesn't work for me - c

first I apologize for my bad english...
So here is my problem. I'm testing out the FFTW3 library, with a simple input signal, a continious one. Then I compute the FFT and get the good result : just a signal on frequency 0, everything else is at 0.
Then I would like to get my input back with the backward FFT, but it doesn't work. Here is my code :
fftw_complex* imgIn;
fftw_complex* imgIn2;
fftw_complex* imgOut;
fftw_plan plan;
int taille = 100;
int i;
//Allocation des entrées et sorties
imgIn = fftw_malloc(sizeof(fftw_complex)*taille);
imgIn2 = fftw_malloc(sizeof(fftw_complex)*taille);
imgOut = fftw_malloc(sizeof(fftw_complex)*taille);
//Remplissage des données d'entrées pour le calcul de la FFT
for(i = 0 ; i < taille ; i++){
imgIn[i][0] = 1.0;
imgIn[i][1] = 0.0;
}
//Plan d'execution
plan = fftw_plan_dft_2d(taille/10, taille/10, imgIn, imgOut, FFTW_FORWARD, FFTW_ESTIMATE);
//Execute la FFT
fftw_execute(plan);
//Inverse
plan = fftw_plan_dft_2d(taille/10, taille/10, imgOut, imgIn2, FFTW_BACKWARD, FFTW_ESTIMATE);
for(i = 0 ; i < taille ; i++){
printf("%d : %g\n%d : %g\n", i, imgIn2[i][0], i, imgIn2[i][1]);
}
As you can see, I just try to perform a normal FFT, then to reverse it. The problem is that my output imgIn2 is just full of 0, instead of 1 and 0...
So what's wrong with my code ?
Thank you :)

Your code does not execute the second plan.

Related

How to integrate my calculation C code with OpenMP

I am using a dual channels DAQ card with data stream mode. I wrote some code for analysis/calculation and put them to the main code for operation. However, the FIFO overflow warning sign always occur once its total data reach around 6000 MSamples (the DAQ on board memory is 8GB). I am well-noticed that a complicated calculation might retard the system and cause the overflow but all of the works I wrote are necessary to my experiment which means cannot be replaced (or there is more effective code can let me get the same result). I have heard that the OpenMP might be a solution to boost up the speed, but I am just a beginner in C, how could I implement to my calculation code?
My computer has 64GB RAM and Intel Core i7 processor. I always turn off other unnecessary software when running the data stream code. The code has been optimize as possible as I can, like simplify the hilbert() and use memcpy to pick out a specific range of data points.
This is how I process the data:
1.Install the FFTW source code for the Hilbert transform.
2.For loop to de-interleave pi16Buffer data to ch2Buffer
3.memcpy to get a certain range of data that I am interested put them to another array called ch2newBuffer
4.Do the hilbert() on ch2newBuffer and calculate its absolute number.
5.Find the max value of ch1 and abs(hilbert(ch2newBuffer)).
6.Calculate max(abs(hilbert(ch2))) / max(ch1).
Here is a part of the my DAQ code which in charge to calculation:
void hilbert(const int16* in, fftw_complex* out, fftw_plan plan_forward, fftw_plan plan_backward)
{
// copy the data to the complex array
for (int i = 0; i < N; ++i) {
out[i][REAL] = in[i];
out[i][IMAG] = 0;
}
// creat a DFT plan and execute it
//fftw_plan plan = fftw_plan_dft_1d(N, out, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(plan_forward);
// destroy a plan to prevent memory leak
//fftw_destroy_plan(plan_forward);
int hN = N>>1; // half of the length (N/2)
int numRem = hN; // the number of remaining elements
// multiply the appropriate value by 2
//(those should multiplied by 1 are left intact because they wouldn't change)
for (int i = 1; i < hN; ++i) {
out[i][REAL] *= 2;
out[i][IMAG] *= 2;
}
// if the length is even, the number of the remaining elements decrease by 1
if (N % 2 == 0)
numRem--;
else if (N > 1) {
out[hN][REAL] *= 2;
out[hN][IMAG] *= 2;
}
// set the remaining value to 0
// (multiplying by 0 gives 0, so we don't care about the multiplicands)
memset(&out[hN + 1][REAL], 0, numRem * sizeof(fftw_complex));
// creat a IDFT plan and execute it
//plan = fftw_plan_dft_1d(N, out, out, FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(plan_backward);
// do some cleaning
//fftw_destroy_plan(plan_backward);
//fftw_cleanup();
// scale the IDFT output
//for (int i = 0; i < N; ++i) {
//out[i][REAL] /= N;
//out[i][IMAG] /= N;
//}
}
float SumBufferData(void* pBuffer, uInt32 u32Size, uInt32 u32SampleBits)
{
// In this routine we sum up all the samples in the buffer. This function
// should be replaced with the user's analysys function
if ( 8 == u32SampleBits )
{
pu8Buffer = (uInt8 *)pBuffer;
for (i = 0; i < u32Size; i++)
{
i64Sum += pu8Buffer[i];
}
}
else
{
pi16Buffer = (int16 *)pBuffer;
fftw_complex(hilbertedch2[N]);
fftw_plan plan_forward = fftw_plan_dft_1d(N, hilbertedch2, hilbertedch2, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_plan plan_backward = fftw_plan_dft_1d(N, hilbertedch2, hilbertedch2, FFTW_BACKWARD, FFTW_ESTIMATE);
ch2Buffer = (int16*)calloc(u32Size / 2, sizeof * ch2Buffer);
ch2newBuffer= (int16*)calloc(u32Size/2, sizeof* ch2newBuffer);
// De-interleave the data from pi16Buffer
for (i = 0; i < u32Size/2 ; i++)
{
ch2Buffer[i] = pi16Buffer[i*2+1];
}
// Pick out the data points range that we are interested
memcpy(ch2newBuffer, &ch2Buffer[6944], 1024 * sizeof(ch2Buffer[0]));
// Do the hilbert transform to these data points
hilbert(ch2newBuffer, hilbertedch2, plan_forward, plan_backward);
fftw_destroy_plan(plan_forward);
fftw_destroy_plan(plan_backward);
//Find max value in each segs of ch1 and ch2
for (i = 128; i < 200 ; i++)
{
if (pi16Buffer[i*2] > max1)
max1 = pi16Buffer[i*2];
}
for (i = 0; i < 1024; i++)
{
if (fabs(hilbertedch2[i][IMAG]) > max2)
max2 = fabs(hilbertedch2[i][IMAG]);
}
Corrected = max2 / max1 / N; // Calculate the signal correction
}
free(ch2Buffer);
free(ch2newBuffer);
return Corrected;
}
Loop are typically a good start for parallelism, for instance:
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
out[i][REAL] = in[i];
out[i][IMAG] = 0;
}
or
#pragma omp parallel for reduction(max:max2)
for (i = 0; i < 1024; i++)
{
float tmp = fabs(hilbertedch2[i][IMAG]);
max2 = (max2 > tmp) ? max2 : tmp.
}
That being said, you need to profile your code find out where the execution takes the most time and try to parallelized if possible. However, looking at what you have posted, I do not see a lot of parallelism opportunity there.

Parallelizing Simpson's Method in C using pthreads and OpenMp

I am fairly new to the whole multiprocessing and parallelizing world. I am currently working on an algorithm to implement Simpson's Method and Simpson 3/8. I have already implemented the algorithm in C in its serial form.
I need some help to parallelize both of this algorithms using, OpenMp and Pthreads. Any recommendation is welcome and appreciated.
//Simpson.c
#include <stdio.h>
#include <stdlib.h>
double function(double x) //función a la cual se le aplicará el método
{
return 0.5*x;
}
int main(int argc, char*argv[])
{
double resultado = 0.0; //resultado de la integral tras aplicar el método de simpson
double a = 0.0; //límite inferior de la integral
double b = 0.0; //límite superior de la integral
int n = 0; //cantidad de particiones entre los intervalos, tiene que ser un número par
int i = 0; //variable para poder iterar sobre las diferentes particiones
double h = 0.0; //variable para guardar el incremento que habrá entre cada valor de "x"
double calc = 0.0; //variable intermedia para ir acumulando el resultado de evaluar las diferentes "x" en la función
//variables para almacenar los diferentes valores de "x", es decir los límites inferiores y superiores y el valor intermedio de cada partición
double x0 = 0.0;
double x1 = 0.0;
double x2 = 0.0;
printf("Introduce el límite inferior: ");
scanf("%lf", &a);
printf("Introduce el límite superior: ");
scanf("%lf", &b);
printf("Introduce la cantidad de particiones (número par): ");
scanf("%d", &n);
h = (b-a)/n; //se calcula el incremento para poder iterar sobre todas las particiones
//Para el cálculo de la integral se utilizan xj, x(j+1) y x(j+2) en el subintervalo [xj, x(j+2)] para cada i = 1, 3, 5, ..., n
for (i=0; i<=n; i+=2)
{
x0 = a + h*i; //límite inferior
calc += function(x0); //se evalua la x en la función
x1 = a + h*(i+1); //valor intermedio
calc += 4*function(x1);
x2 = a + h*(i+2); //límite superior
calc += function(x2);
calc *= ((x2-x0)/6) ; //se vuelve a calcular el incremento entre nuestros límites, y se divide entre 6
resultado += calc; //variable para ir acumulando los cálculos intermedios de cada "x"
}
printf("La aproximación a la integral es: %f \n", resultado);
}
#include<stdio.h>
#include<math.h>
// función a integrar
#define f(x) (0.5*x)
int main()
{
float b; //Limite superior
float a; //Limite inferior
float resultado=0.0;
float dx;
float k;
int i;
int N; //numero de intervalos o iteraciones
// Entradas
printf("Dame el limite inferior: ");
scanf("%f", &a);
printf("Dame el limite superior: ");
scanf("%f", &b);
printf("Número de iteraciones/intervalos (número par): ");
scanf("%d", &N);
//Calculando delta de x
dx = (b - a)/N;
//Sumas de la integracion
resultado = f(a) + f(b); //primera suma
for(i=1; i<= N-1; i++)
{
k = a + i*dx; //Suma del intervalo
if(i%3 == 0)
{
resultado = resultado + 2 * f(k);
}
else
{
resultado = resultado + 3 * f(k);
}
}
resultado = resultado * dx*3/8;
printf("\nEl valor de la integral es: %f \n", resultado);
printf("\n");
return 0;
}
My first recommendation is to start by reading a lot about the shared-memory paradigm and its problems. Then read about Pthreads and OpenMP, and only then look at concrete OpenMP and Pthread parallelizations, and how they deal with some of the issues of the shared-memory paradigm. If you have to learn both Pthreads and OpenMP, in my opinion, if I were you I would start by looking into Pthread first and only then looking into OpenMP. The latter helps with some of the cumbersome details, and pitfalls, of coding with the former.
I will try to give you a very general (not super detailed) formula on how to approach a parallelization of a code, and I will be using as an example your second code and OpenMP.
If you opt to parallelize an algorithm, first you need to look at the part of the code that is worth parallelization (i.e., the overhead of the parallelization is overshadowed by the speedups that it brings). For this profiling is fundamental, however, for smaller codes like yours it is pretty straightforward to figure it out, it will be typically loops, for instance:
for(i=1; i<= N-1; i++)
{
k = a + i*dx; //Suma del intervalo
if(i%3 == 0)
{
resultado = resultado + 2 * f(k);
}
else
{
resultado = resultado + 3 * f(k);
}
}
To parallelize this code, which constructor should I use tasks ? parallel for ? you can look at the great answers to this topic on this SO thread. In your code, it is pretty clear that we should use parallel for (i.e., #pragma omp parallel for). Consequently, we are telling to OpenMP to divide the iterations of that loop among the threads.
Now you should think about if the variables used inside the parallel region should be private or shared among threads. For that openMP offers constructors like private, firstprivate, lastprivate and shared. Another great SO thread about the subject can be found here. To evaluate this you really need to understand the shared memory paradigm and how OpenMP handles it.
Then look at potential race conditions, interdependencies between variables and so on. OpenMP offers constructors like critical, atomic operations, reductions among others to solve those issues (other great SO threads atomic vs critical and reduction vs atomic). TL;DR : You should opt for the solution that gives you the best performance without compromising correctness.
In your case reduction of the variable "resultado" is clearly the best option:
#pragma omp parallel reduction ( +: resultado)
for(i=1; i<= N-1; i++)
{
k = a + i*dx; //Suma del intervalo
...
}
After you guarantee the correctness of your code you can look at load unbalancing problems (i.e., the difference among the work performed by each thread). To deal with this issue openMP offers parallel for scheduling strategies like dynamic and guided (another SO thread) and the possibility of tuning the chunk size of those distributions.
In your code, threads will execute the same amount of parallel work, so no need to use a dynamic or guided schedule, you can opt for the static one instead. The benefit of the static over the others is that it does not introduce the overhead of having to coordinate the distributions of tasks among threads at-runtime.
There is much more stuff to look into to extract the maximum performance out of the architecture that your code is running on. But for now, I think it is a good enough start.

Why is FFT of (A+B) different from FFT(A) + FFT(B)?

I have been fighting with a very weird bug for almost a month. Asking you guys is my last hope. I wrote a program in C that integrates the 2d Cahn–Hilliard equation using the Implicit Euler (IE) scheme in Fourier (or reciprocal) space:
Where the "hats" mean that we are in Fourier space: h_q(t_n+1) and h_q(t_n) are the FTs of h(x,y) at times t_n and t_(n+1), N[h_q] is the nonlinear operator applied to h_q, in Fourier space, and L_q is the linear one, again in Fourier space. I don't want to go too much into the details of the numerical method I am using, since I am sure that the problem is not coming from there (I tried using other schemes).
My code is actually quite simple. Here is the beginning, where basically I declare variables, allocate memory and create the plans for the FFTW routines.
# include <stdlib.h>
# include <stdio.h>
# include <time.h>
# include <math.h>
# include <fftw3.h>
# define pi M_PI
int main(){
// define lattice size and spacing
int Nx = 150; // n of points on x
int Ny = 150; // n of points on y
double dx = 0.5; // bin size on x and y
// define simulation time and time step
long int Nt = 1000; // n of time steps
double dt = 0.5; // time step size
// number of frames to plot (at denominator)
long int nframes = Nt/100;
// define the noise
double rn, drift = 0.05; // punctual drift of h(x)
srand(666); // seed the RNG
// other variables
int i, j, nt; // variables for space and time loops
// declare FFTW3 routine
fftw_plan FT_h_hft; // routine to perform fourier transform
fftw_plan FT_Nonl_Nonlft;
fftw_plan IFT_hft_h; // routine to perform inverse fourier transform
// declare and allocate memory for real variables
double *Linft = fftw_alloc_real(Nx*Ny);
double *Q2 = fftw_alloc_real(Nx*Ny);
double *qx = fftw_alloc_real(Nx);
double *qy = fftw_alloc_real(Ny);
// declare and allocate memory for complex variables
fftw_complex *dh = fftw_alloc_complex(Nx*Ny);
fftw_complex *dhft = fftw_alloc_complex(Nx*Ny);
fftw_complex *Nonl = fftw_alloc_complex(Nx*Ny);
fftw_complex *Nonlft = fftw_alloc_complex(Nx*Ny);
// create the FFTW plans
FT_h_hft = fftw_plan_dft_2d ( Nx, Ny, dh, dhft, FFTW_FORWARD, FFTW_ESTIMATE );
FT_Nonl_Nonlft = fftw_plan_dft_2d ( Nx, Ny, Nonl, Nonlft, FFTW_FORWARD, FFTW_ESTIMATE );
IFT_hft_h = fftw_plan_dft_2d ( Nx, Ny, dhft, dh, FFTW_BACKWARD, FFTW_ESTIMATE );
// open file to store the data
char acstr[160];
FILE *fp;
sprintf(acstr, "CH2d_IE_dt%.2f_dx%.3f_Nt%ld_Nx%d_Ny%d_#f%.ld.dat",dt,dx,Nt,Nx,Ny,Nt/nframes);
After this preamble, I initialise my function h(x,y) with a uniform random noise, and I also take the FT of it. I set the imaginary part of h(x,y), which is dh[i*Ny+j][1] in the code, to 0, since it is a real function. Then I calculate the wavevectors qx and qy, and with them, I compute the linear operator of my equation in Fourier space, which is Linft in the code. I consider only the - fourth derivative of h as the linear term, so that the FT of the linear term is simply -q^4... but again, I don't want to go into the details of my integration method. The question is not about it.
// generate h(x,y) at initial time
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
rn = (double) rand()/RAND_MAX; // extract a random number between 0 and 1
dh[i*Ny+j][0] = drift-2.0*drift*rn; // shift of +-drift
dh[i*Ny+j][1] = 0.0;
}
}
// execute plan for the first time
fftw_execute (FT_h_hft);
// calculate wavenumbers
for (i = 0; i < Nx; i++) { qx[i] = 2.0*i*pi/(Nx*dx); }
for (i = 0; i < Ny; i++) { qy[i] = 2.0*i*pi/(Ny*dx); }
for (i = 1; i < Nx/2; i++) { qx[Nx-i] = -qx[i]; }
for (i = 1; i < Ny/2; i++) { qy[Ny-i] = -qy[i]; }
// calculate the FT of the linear operator
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
Q2[i*Ny+j] = qx[i]*qx[i] + qy[j]*qy[j];
Linft[i*Ny+j] = -Q2[i*Ny+j]*Q2[i*Ny+j];
}
}
Then, finally, it comes the time loop. Essentially, what I do is the following:
Every once in a while, I save the data to a file and print some information on the terminal. In particular, I print the highest value of the FT of the Nonlinear term. I also check if h(x,y) is diverging to infinity (it shouldn't happen!),
Calculate h^3 in direct space (that is simply dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][0]). Again, the imaginary part is set to 0,
Take the FT of h^3,
Obtain the complete Nonlinear term in reciprocal space (that is N[h_q] in the IE algorithm written above) by computing -q^2*(FT[h^3] - FT[h]). In the code, I am referring to the lines Nonlft[i*Ny+j][0] = -Q2[i*Ny+j]*(Nonlft[i*Ny+j][0] -dhft[i*Ny+j][0]) and the one below, for the imaginary part. I do this because:
Advance in time using the IE method, transform back in direct space, and then normalise.
Here is the code:
for(nt = 0; nt < Nt; nt++) {
if((nt % nframes)== 0) {
printf("%.0f %%\n",((double)nt/(double)Nt)*100);
printf("Nonlft %.15f \n",Nonlft[(Nx/2)*(Ny/2)][0]);
// write data to file
fp = fopen(acstr,"a");
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
fprintf(fp, "%4d %4d %.6f\n", i, j, dh[i*Ny+j][0]);
}
}
fclose(fp);
}
// check if h is going to infinity
if (isnan(dh[1][0])!=0) {
printf("crashed!\n");
return 0;
}
// calculate nonlinear term h^3 in direct space
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
Nonl[i*Ny+j][0] = dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][0];
Nonl[i*Ny+j][1] = 0.0;
}
}
// Fourier transform of nonlinear term
fftw_execute (FT_Nonl_Nonlft);
// second derivative in Fourier space is just multiplication by -q^2
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
Nonlft[i*Ny+j][0] = -Q2[i*Ny+j]*(Nonlft[i*Ny+j][0] -dhft[i*Ny+j][0]);
Nonlft[i*Ny+j][1] = -Q2[i*Ny+j]*(Nonlft[i*Ny+j][1] -dhft[i*Ny+j][1]);
}
}
// Implicit Euler scheme in Fourier space
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
dhft[i*Ny+j][0] = (dhft[i*Ny+j][0] + dt*Nonlft[i*Ny+j][0])/(1.0 - dt*Linft[i*Ny+j]);
dhft[i*Ny+j][1] = (dhft[i*Ny+j][1] + dt*Nonlft[i*Ny+j][1])/(1.0 - dt*Linft[i*Ny+j]);
}
}
// transform h back in direct space
fftw_execute (IFT_hft_h);
// normalize
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
dh[i*Ny+j][0] = dh[i*Ny+j][0] / (double) (Nx*Ny);
dh[i*Ny+j][1] = dh[i*Ny+j][1] / (double) (Nx*Ny);
}
}
}
Last part of the code: empty the memory and destroy FFTW plans.
// terminate the FFTW3 plan and free memory
fftw_destroy_plan (FT_h_hft);
fftw_destroy_plan (FT_Nonl_Nonlft);
fftw_destroy_plan (IFT_hft_h);
fftw_cleanup();
fftw_free(dh);
fftw_free(Nonl);
fftw_free(qx);
fftw_free(qy);
fftw_free(Q2);
fftw_free(Linft);
fftw_free(dhft);
fftw_free(Nonlft);
return 0;
}
If I run this code, I obtain the following output:
0 %
Nonlft 0.0000000000000000000
1 %
Nonlft -0.0000000000001353512
2 %
Nonlft -0.0000000000000115539
3 %
Nonlft 0.0000000001376379599
...
69 %
Nonlft -12.1987455309071730625
70 %
Nonlft -70.1631962517720353389
71 %
Nonlft -252.4941743351609204637
72 %
Nonlft 347.5067875825179726235
73 %
Nonlft 109.3351142318568633982
74 %
Nonlft 39933.1054502610786585137
crashed!
The code crashes before reaching the end and we can see that the Nonlinear term is diverging.
Now, the thing that doesn't make sense to me is that if I change the lines in which I calculate the FT of the Nonlinear term in the following way:
// calculate nonlinear term h^3 -h in direct space
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
Nonl[i*Ny+j][0] = dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][0] -dh[i*Ny+j][0];
Nonl[i*Ny+j][1] = 0.0;
}
}
// Fourier transform of nonlinear term
fftw_execute (FT_Nonl_Nonlft);
// second derivative in Fourier space is just multiplication by -q^2
for ( i = 0; i < Nx; i++ ) {
for ( j = 0; j < Ny; j++ ) {
Nonlft[i*Ny+j][0] = -Q2[i*Ny+j]* Nonlft[i*Ny+j][0];
Nonlft[i*Ny+j][1] = -Q2[i*Ny+j]* Nonlft[i*Ny+j][1];
}
}
Which means that I am using this definition:
instead of this one:
Then the code is perfectly stable and no divergence happens! Even for billions of time steps! Why does this happen, since the two ways of calculating Nonlft should be equivalent?
Thank you very much to anyone who will take the time to read all of this and give me some help!
EDIT: To make things even more weird, I should point out that this bug does NOT happen for the same system in 1D. In 1D both methods of calculating Nonlft are stable.
EDIT: I add a short animation of what happens to the function h(x,y) just before crashing. Also: I quickly re-wrote the code in MATLAB, which uses Fast Fourier Transform functions based on the FFTW library, and the bug is NOT happening... the mystery deepens.
I solved it!!
The problem was the calculation of the Nonl term:
Nonl[i*Ny+j][0] = dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][0];
Nonl[i*Ny+j][1] = 0.0;
That needs to be changed to:
Nonl[i*Ny+j][0] = dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][0] -3.0*dh[i*Ny+j][0]*dh[i*Ny+j][1]*dh[i*Ny+j][1];
Nonl[i*Ny+j][1] = -dh[i*Ny+j][1]*dh[i*Ny+j][1]*dh[i*Ny+j][1] +3.0*dh[i*Ny+j][0]*dh[i*Ny+j][0]*dh[i*Ny+j][1];
In other words: I need to consider dh as a complex function (even though it should be real).
Basically, because of stupid rounding errors, the IFT of the FT of a real function (in my case dh), is NOT purely real, but will have a very small imaginary part. By setting Nonl[i*Ny+j][1] = 0.0 I was completely ignoring this imaginary part.
The issue, then, was that I was recursively summing FT(dh), dhft, and an object obtained using the IFT(FT(dh)), this is Nonlft, but ignoring the residual imaginary parts!
Nonlft[i*Ny+j][0] = -Q2[i*Ny+j]*(Nonlft[i*Ny+j][0] -dhft[i*Ny+j][0]);
Nonlft[i*Ny+j][1] = -Q2[i*Ny+j]*(Nonlft[i*Ny+j][1] -dhft[i*Ny+j][1]);
Obviously, calculating Nonlft as dh^3 -dh and then doing
Nonlft[i*Ny+j][0] = -Q2[i*Ny+j]* Nonlft[i*Ny+j][0];
Nonlft[i*Ny+j][1] = -Q2[i*Ny+j]* Nonlft[i*Ny+j][1];
Avoided the problem of doing this "mixed" sum.
Phew... such a relief! I wish I could assign the bounty to myself! :P
EDIT: I'd like to add that, before using the fftw_plan_dft_2d functions, I was using fftw_plan_dft_r2c_2d and fftw_plan_dft_c2r_2d (real-to-complex and complex-to-real), and I was seeing the same bug. However, I suppose that I couldn't have solved it if I didn't switch to fftw_plan_dft_2d, since the c2r function automatically "chops off" the residual imaginary part coming from the IFT. If this is the case and I'm not missing something, I think that this should be written somewhere on the FFTW website, to prevent users from running into problems like this. Something like "r2c and c2r transforms are not good to implement pseudospectral methods".
EDIT: I found another SO question that addresses exactly the same problem.

Issue with C program

I have this code:
#include <stdio.h>
#include<conio.h>
main(){
float promAnual=0.0;
int numMeses, numToneladas,i, suma = 0, mesTon = 0;
float toneladas[12];
for(i = 1; i < 13; i++){
printf("Ingrese la cantidad de toneladas del mes #%d->", i);
scanf("%f", &toneladas[i] );
}
for(i = 1; i < 13; i++){
suma = suma + toneladas [i];
}
promAnual = suma / 12.0;
for(i = 1; i < 13; i++){
if(toneladas[i]>promAnual){
numMeses = numMeses + 1;
}
}
numToneladas = 0;
mesTon = 0;
for(i = 1; i < 13; i++){
if(toneladas[i]>toneladas[i+1]){
mesTon = i;
numToneladas = toneladas[i];
}
}
printf("El promedio anual es: %0.2f, %d mes(es) tuvieron mayor cosecha que el promedio anual, y el mayor numero de toneladas se produjo en el mes #%d con %0.2f", promAnual,numMeses,mesTon, numToneladas);
}
The issue is that the last 2 variables in the last printf are showing wrong values, I know why, but I don't know how to fix it, it is because the last "for" is assigning the last value of "i", but I don't know how to fix it.
you would better check the end condition of "for" loop, the array toneladas have 12 stores, which from 0 to 11,but you set it from 1 to 12,maybe it cause your issue.
please try to set "for(i=0;i<12;i++)",and run the code again.
By the looks of the errors in your code, I'd say you're probably not reading a book to learn. That's unfortunate, because people who read books to learn C usually don't have this kind of trouble.
float toneladas[12];
for(i = 1; i < 13; i++){
printf("Ingrese la cantidad de toneladas del mes #%d->", i);
scanf("%f", &toneladas[i] );
}
Here you've declared a carton of 12 eggs, egg[0] through to egg[11] (write them out and count them one by one, and you'll see there are 12)... and then tried to insert into egg[12], which is out of bounds. Expect one smashed egg!
Throughout that code you're referring to that smashed egg again and again. I wouldn't be surprised if the recipe you're concocting is disastrous!
Speaking of smashed eggs, you have an uninitialised variable: int numMesses which you then use without initialisation: numMeses = numMeses + 1;...
int numMeses, numToneladas,i, suma = 0, mesTon = 0;
/* SNIP */
printf("El promedio anual es: %0.2f, %d mes(es) tuvieron mayor cosecha que el promedio anual, y el mayor numero de toneladas se produjo en el mes #%d con %0.2f", promAnual,numMeses,mesTon, numToneladas);
As you can see, numToneladas is declared as int. However, in your call to printf you're telling printf it's a double. You're lying to printf; no wonder it's lying back at you!
You appear to be including the non-portable header <conio.h>, though I see no point. Unlike most, you've not used a single function from that header! Why include a non-portable header you're not using?
My only guess is that you're copy/pasting from somewhere, and attempting to learn by unguided trial and error. As you've seen, that's dangerous; it'll cause you headaches due to strange, difficult to debug bugs like the one you've encountered today. You got lucky this time, because you noticed. If you write code like this in the real world you might cause someone injury!
Read a book! Do the exercises as you stumble across them. I can recommend K&R2E.

OpenCV Fourier Magnitude - doesn't seem correct

I believe I am having a scaling issue in trying to convert the Fourier magnitude spectrum to an Image.
I am working on my own visual odometry project to determine the translation and rotation between consequtive frames from a camera input. I have been successful with determining translation using phase correlation of the fourier transform, however part of determining the rotation requires the magnitude spectrum to be convolved. Essentially the magnitude I have produced does not seem correct, as below.
Original Image:
Magnitude, with the 'mag = 255*(mag/max)' scaling
Magnitude, without the scaling
Unfortunately I would require help as to the function I am using to determine the magnitude, I believe my error is in the scaling of the magnitude but am unsure exactly. This issue has had me for some time and your input would be appreciated, thankyou.
void iplimage_dft(IplImage* img)
{
IplImage* img1, * img2;
fftw_complex* in, * dft, * idft;
fftw_plan plan_f, plan_b;
int i, j, k, w, h, N;
/* Copy input image */
img1 = cvCloneImage(img);
w = img1->width;
h = img1->height;
N = w * h;
/* Allocate input data for FFTW */
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
dft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
/* Create plans */
plan_f = fftw_plan_dft_2d(w, h, in, dft, FFTW_FORWARD, FFTW_ESTIMATE);
/* Populate input data in row-major order */
for (i = 0, k = 0; i < h; i++)
{
for (j = 0; j < w; j++, k++)
{
in[k][0] = ((uchar*)(img1->imageData + i * img1->widthStep))[j];
in[k][1] = 0.0;
}
}
/* Forward & inverse DFT */
fftw_execute(plan_f);
/* Create output image */
img2 = cvCreateImage(cvSize(w, h), 8, 1);
//Find the maximum value among the magnitudes
double max=0;
double mag=0;
for (i = 0, k = 1; i < h; i++){
for (j = 0; j < w; j++, k++){
mag = sqrt(pow(dft[k][0],2) + pow(dft[k][1],2));
if (max < mag)
max = mag;
}
}
// Convert DFT result to output image
for (i = 0, k = 0; i < h; i++)
{
for (j = 0; j < w; j++, k++)
{
double mag = sqrt(pow(dft[k][0],2) + pow(dft[k][1],2));
mag = 255*(mag/max);
((uchar*)(img2->imageData + i * img2->widthStep))[j] = mag;
}
}
cvShowImage("iplimage_dft(): original", img1);
cvShowImage("iplimage_dft(): result", img2);
//cvSaveImage("iplimage_dft.png", img2,0 );
cvWaitKey(0);
/* Free memory */
fftw_destroy_plan(plan_f);
fftw_free(in);
fftw_free(dft);
cvReleaseImage(&img1);
cvReleaseImage(&img2);
}
int main( int argc, char** argv )
{
argv[1] = "image1.jpg";
IplImage *img3 = cvLoadImage( argv[1], CV_LOAD_IMAGE_GRAYSCALE );
iplimage_dft(img3);
return 0;
}
The spectrum of many images have characteristics like this - several relatively high peaks with the rest of the field quite small in magnitude. It looks like you're normalizing right, it's just that the details are lost because the magnitude of much of the spectrum is very small. I've often found it more useful to use log(mag(spectrum)) (or even log(log(mag(spectrum))) in some cases) to generate an image if you're wanting to inspect details.

Resources