Algorithm to find positive real solutions to quartic equations? - c

I am using these algorithms on a microcontroller:
float32_t cubic(float32_t b,float32_t c,float32_t d)
{
float32_t p=c-b*b/3.0f;
float32_t q=2.0f*b*b*b/27.0f-b*c/3.0f+d;
if(fabsf(p)==0.0f) return powf(q,1.0f/3.0f);
if(fabsf(q)==0.0f){
PRINTF(INFO, "q=0 %f", p);
return 0.0f; // TODO
}
float32_t t=sqrtf(fabsf(p)/3.0f);
float32_t g=1.5f*q/(p*t);
if(p>0.0f)
return -2.0f*t*sinhf(asinhf(g)/3.0f)-b/3.0f;
if(4.0f*p*p*p+27.0f*q*q<0.0f)
return 2.0f*t*cosf(acosf(g)/3.0f)-b/3.0f;
if(q>0.0f)
return -2.0f*t*coshf(acoshf(-g)/3.0f)-b/3.0f;
return 2.0f*t*coshf(acoshf(g)/3.0f)-b/3.0f;
}
int quartic(float32_t b,float32_t c,float32_t d,float32_t e,float32_t* ans)
{
float32_t p=c-0.375f*b*b;
float32_t q=0.125f*b*b*b-0.5f*b*c+d;
float32_t m=cubic(p,0.25f*p*p+0.01171875f*b*b*b*b-e+0.25f*b*d-0.0625f*b*b*c,-0.125f*q*q);
if(fabsf(q)==0.0f)
{
if(m<0.0f) return 0;
int nroots=0;
float32_t sqrt_2m=sqrtf(2.0f*m);
if(-m-p>0.0f)
{
float32_t delta=sqrtf(2.0f*(-m-p));
ans[nroots++]=-0.25f*b+0.5f*(sqrt_2m-delta);
ans[nroots++]=-0.25f*b-0.5f*(sqrt_2m-delta);
ans[nroots++]=-0.25f*b+0.5f*(sqrt_2m+delta);
ans[nroots++]=-0.25f*b-0.5f*(sqrt_2m+delta);
}
if(fabsf(-m-p)==0.0f)
{
ans[nroots++]=-0.25f*b-0.5f*sqrt_2m;
ans[nroots++]=-0.25f*b+0.5f*sqrt_2m;
}
return nroots;
}
if(m<0.0f) return 0;
float32_t sqrt_2m=sqrtf(2.0f*m);
int nroots=0;
if(fabsf(-m-p+q/sqrt_2m)>=0.0f)
{
float32_t delta=sqrtf(2.0f*(-m-p+q/sqrt_2m));
ans[nroots++]=0.5f*(-sqrt_2m+delta)-0.25f*b;
ans[nroots++]=0.5f*(-sqrt_2m-delta)-0.25f*b;
}
if(fabsf(-m-p-q/sqrt_2m)>=0.0f)
{
float32_t delta=sqrtf(2.0f*(-m-p-q/sqrt_2m));
ans[nroots++]=0.5f*(sqrt_2m+delta)-0.25f*b;
ans[nroots++]=0.5f*(sqrt_2m-delta)-0.25f*b;
}
return nroots;
}
From: Specialised algorithm to find positive real solutions to quartic equations? (I can't comment on this thread because I haven't enough Stackoverflow privileges)
Is the same but with single precision instead of doubles.
With doubles it seems that works.
With singles, i have some points in which q=0 in the cubic function, so it returns m=0 for the quartic function and the result is not a real root. I get zeros.
I need to implement Matlab root method for real positive roots.
This point gives problems with the above code but has solution with roots in Matlab.
C1=53.3456154
C2=1729.59448
C3=54973.8164
C4=56.3456192
C5=1729.5946
C6=54973.8242
ans=single(roots([C5 (-C1+2*C4-C6) (3*C2-3*C5) (C1-2*C3+C6) -C2]))
r=r(r==conj(r));
r=r(r>0)
In Matlab the result is ok
ans =
31.7814 + 0i
-0.0000 + 1.0001i
-0.0000 - 1.0001i
-0.0315 + 0i
r = 31.7814

Related

Can't implement square root systemcall in linux kernel

I need to implement this function as a systemcall:
asmlinkage ​ long​​ sys_sqrt (​ float​ x);
Where the function gonna print the square root of n to the kernel log.
I'm using kernel version 4.13 on 64bit virtual box.
I'm trying to implement the sqrt by using this technique
#include <linux/kernel.h>
#define SQRT_MAGIC_F 0x5f3759df
asmlinkage ​long​​ sys_sqrt(​float​ x);
{
const float xhalf = 0.5f*x;
union // get bits for floating value
{
float x;
int i;
} u;
u.x = x;
u.i = SQRT_MAGIC_F - (u.i >> 1);
printk ("%f", (x*u.x*(1.5f - xhalf*u.x*u.x));
return 0;
}
This leads the compiler telling me "error: SSE register return with SSE disabled" on "printk ("%f", (x * u.x * (1.5f - xhalf * u.x * u.x));"
Another workaround I tried is separating the integer and the decimals like so
float ans = (x*u.x*(1.5f - xhalf*u.x*u.x);
int head = ans;
float tail_float = ans - head;
int tail = tail_float*10000;
printk ("%d.%03d", head,tail);
This leads the compiler telling me "error:SSE register return with SSE disabled" on "float ans = (x *u.x *(1.5f - xhalf * u.x* u.x);"
another thing i've tried is adding a kernel_fpu_begin & end between the function body but this leads "error: implicit declaration of function "kernel_fpu_begin"; did you mean "kernel_old_dev_t"
Any solution?
Thank you so much.

Using SSE effectively for rasterization

I've been thinking about using the SSE instruction set to make my 3d software rasterizer faster, but I've never used them before and feel like I am going completely wrong.
I'd like to hear from the more experienced on whether it is an effort that is worth it, and if this code is written poorly:
typedef union _declspec(align(16)) {
struct {
float x;
float y;
float z;
float w;
};
__m128 m128;
} Vec4_t;
Vec4_t AddVec(Vec4_t* a, Vec4_t *b) {
__m128 value = _mm_add_ps(a->m128, b->m128);
return *(Vec4_t*)&value;
}
This is how I'm testing it:
Vec4_t a = { 2.0f, 4.0f, 10.0f, 123.1f };
Vec4_t b = { 6.0f, 12.0f, 16.0f, 64.0f };
Vec4_t c = AddVec(&a, &b);
printf("%f, %f, %f, %f\n", c.x, c.y, c.z, c.w);
which outputs:
8.000000, 16.000000, 26.000000, 187.100006
I honestly have no idea what I'm doing. I'm surprised the code I wrote even worked.

DSP libraries - RFFT - strange results

Recently I've been trying to do FFT calculations on my STM32F4-Discovery evaluation board then send it to PC. I have looked into my problem - I think that I'm doing something wrong with FFT functions provided by manufacturer.
I'm using CMSIS-DSP libraries.
For now I've have been generating samples with code (if that works correct I'll do sampling by microphone).
I'm using arm_rfft_fast_f32 as my data are going to be floats in the future, but results I get in my output array are insane (I think) - I'm getting frequencies below 0.
number_of_samples = 512; (l_probek in code)
dt = 1/freq/number_of_samples
Here is my code
float32_t buffer_input[l_probek];
uint16_t i;
uint8_t mode;
float32_t dt;
float32_t freq;
bool DoFlag = false;
bool UBFlag = false;
uint32_t rozmiar = 4*l_probek;
union
{
float32_t f[l_probek];
uint8_t b[4*l_probek];
}data_out;
union
{
float32_t f[l_probek];
uint8_t b[4*l_probek];
}data_mag;
union
{
float32_t f;
uint8_t b[4];
}czest_rozdz;
/* Pointers ------------------------------------------------------------------*/
arm_rfft_fast_instance_f32 S;
arm_cfft_radix4_instance_f32 S_CFFT;
uint16_t output;
/* ---------------------------------------------------------------------------*/
int main(void)
{
freq = 5000;
dt = 0.000000390625;
_GPIO();
_LED();
_NVIC();
_EXTI(0);
arm_rfft_fast_init_f32(&S, l_probek);
GPIO_SetBits(GPIOD, LED_Green);
mode = 2;
//----------------- Infinite loop
while (1)
{
if(true)//(UBFlag == true)
for(i=0; i<l_probek; ++i)
{
buffer_input[i] = (float32_t) 15*sin(2*PI*freq*i*dt);
}
//Obliczanie FFT
arm_rfft_fast_f32(&S, buffer_input, data_out.f, 0);
//Obliczanie modulow
arm_cmplx_mag_f32(data_out.f, data_mag.f, l_probek);
USART_putdata(USART1, data_out.b, data_mag.b, rozmiar);
//USART_putdata(USART1, czest_rozdz.b, data_mag.b, rozmiar);
GPIO_ToggleBits(GPIOD, LED_Orange);
//mode++;
//UBFlag = false;
}
}
}
I'm using arm_rfft_fast_f32 as my data are going to be floats in the future, but results I get in my output array are insane (I think) - I'm getting frequencies below 0.
The arm_rfft_fast_f32 function does not return frequencies, but rather complex-valued coefficients computed using the Fast Fourier Transform (FFT). It is thus perfectly reasonable for those coefficients to be negative. More specifically, the expected coefficients for your single-cycle sin test tone input with an amplitude of 15 would be:
0.0, 0.0; // special case packing real-valued X[0] and X[N/2]
0.0, -3840.0; // X[1]
0.0, 0.0; // X[2]
0.0, 0.0; // X[3]
...
0.0, 0.0; // X[255]
Note that as indicated in the documentation the first two outputs correspond to the purely real coefficients X[0] and X[N/2] (you should be particularly careful about this special case in your subsequent call to arm_cmplx_mag_f32; see last point below).
The frequency of each of those frequency components are given by k*fs/N, where N is the number of samples (in your case l_probek) and fs = 1/dt is the sampling rate (in your case freq*l_probek):
X[0] -> 0*freq*l_probek/l_probek = 0
X[1] -> 1*freq*l_probek/l_probek = freq = 5000
X[2] -> 2*freq*l_probek/l_probek = 2*freq = 10000
X[3] -> 3*freq*l_probek/l_probek = 2*freq = 15000
...
Finally, due to the special packing of the first two values, you need to be careful when computing the N/2+1 magnitudes:
// General case for the magnitudes
arm_cmplx_mag_f32(data_out.f+2, data_mag.f+1, l_probek/2 - 1);
// Handle special cases
data_mag.f[0] = data_out.f[0];
data_mag.f[l_probek/2] = data_out.f[1];
As a follow-up to the above answer, which is awesome, some further clarifications which took me an age to figure out.
The frequency bins are centered on the target frequency, so for instance in the example above X[0] represents -2500Hz to 2500Hz, centered on zero, X[1] is 2500Hz to 7500Hz centered on 5000Hz and so on
It's common to interpolate frequencies within the bin by looking at the energy of the adjacent bins (see https://dspguru.com/dsp/howtos/how-to-interpolate-fft-peak/) if you do this you will need to make sure that your magnitude array is large enough for the bins + Nyquist and that the bin above Nyquist is 0, but note many interpolation techniques require the complex values (e.q. Quinn, Jacobson) so make sure you interpolate before finding the magnitudes.
The special case code above works because there is no complex component of the DC and Nyquist values and thus the magnitude is simply the real part
There is a bug in the code above however - although the imaginary parts of the DC and Nyquist components is always zero, the real part could still be negative, so you need to take the absolute value to get the magnitude:
// Handle special cases
data_mag.f[0] = fabs(data_out.f[0]);
data_mag.f[l_probek/2] = fabs(data_out.f[1]);

gsl Error in inifinite integration interval. bad integrand behavior found. How to fix it?

I'm getting the following error message after trying to do the a numerical integration on a infinte interval [0,inf) using GSL in C.
gsl: qags.c:553: ERROR: bad integrand behavior found in the integration interval
Default GSL error handler invoked.
Command terminated by signal 6
Here is the function I'm integrating
$
double dI2dmu(double x, void * parametros){
double *p,Ep,mu,M,T;
p=(double *) parametros;
M=p[0];
T=p[1];
mu=p[2];
Ep=sqrt(x*x+M*M);
double fplus= -((exp((Ep - mu)/T)/(pow(1 + exp((Ep - mu)/T),2)*T) - exp((Ep + \
mu)/T)/(pow(1 + exp((Ep + mu)/T),2)*T))*pow(x,2))/(2.*Ep*pow(PI,2));
return fplus;
}
And the code for the integration procedure
params[0]=0.007683; //M
params[1]=0.284000;// T
params[2]=0.1; //mu
gsl_function dI2mu_u;
dI2mu_u.function = &dI2dmu;
dI2mu_u.params = &params;
gsl_integration_qagiu (&dI2mu_u, 0, 0, 1e-7, 100000,
w, &resultTest2, &error1Test2);
The fucntion has the following aspect:
Which, to my eyes, has a very well behavior. So, instead of performing an infinite integration, I perform the integration up to an upper limit that I consider rezonable, like in:
gsl_function G;
G.function = &dI2dmu;
G.params = &params;
gsl_integration_qags (&G, 0, 1e2*A, 0, 1e-7, 100000,
w, &result1, &error1);
Getting a result that agrees with the result of Mathematica for infinite integration
result definite up to 10*A = 0.005065263943958745
result up to infinity = nan
Mathematica result up to infinity = 0.005065260000000000
But the GSL infinite integral keps being "nan". Any ideas? I thanks in advance for the help.
As #yonatan zuleta ochoa points out correctly, the problem is in exp(t)/pow(exp(t)+1,2). exp(t) can overflow an ieee754 DBL_MAX for values of t as low as nextafter(log(DBL_MAX), INFINITY), which is ~7.09783e2.
When exp(t) == INFINITY,
exp(t)/pow(exp(t)+1,2) == ∞/pow(∞+1,2) == ∞/∞ == NAN
Yonatan's proposed solution is to use logarithms, which can be done as follows:
exp(t)/pow(exp(t)+1,2) == exp(log(exp(t)) - log(pow(exp(t)+1,2)))
== exp(t - 2*log(exp(t)+1))
== exp(t - 2*log1p(exp(t))) //<math.h> function avoiding loss of precision for log(exp(t)+1)) if exp(t) << 1.0
This is an entirely reasonable approach, avoiding NAN up to very high values of t. However, in your code, t == (Ep ± mu)/T can be INFINITY if abs(T) < 1.0 for values of x close to DBL_MAX, even if x is not infinity. In this case, the subtraction t - 2*log1p(exp(t)) turns into ∞ - ∞, which is NAN again.
A different approach is to replace exp(x)/pow(exp(x)+1,2) with 1.0/(pow(exp(x)+1,2)*pow(exp(x), -1)) by dividing both denominator and numerator by exp(x) (which is not zero for any finite x). This simplifies to 1.0/(exp(x)+exp(-x)+2.0).
Here is an implementation of the function avoiding NAN for values of x up to and including DBL_MAX:
static double auxfun4(double a, double b, double c, double d)
{
return 1.0/(a*b+2.0+c*d);
}
double dI2dmu(double x, void * parametros)
{
double *p = (double *) parametros;
double invT = 1.0/p[1];
double Ep = hypot(x, p[0]);
double muexp = exp(p[2]*invT);
double Epexp = exp(Ep*invT);
double muinv = 1.0/muexp;
double Epinv = 1.0/Epexp;
double subterm = auxfun4(Epexp, muinv, Epinv, muexp);
subterm -= auxfun4(Epexp, muexp, Epinv, muinv);
double fminus = subterm*(x/Ep)*invT*(0.5/(M_PI*M_PI))*x;;
return -fminus;
}
This implementation also uses hypot(x,M), rather than sqrt(x*x, M*M), and avoids calculating x*x by rearranging the order of multiplications/divisions to group x/Ep together. Since hypot(x,M) will be abs(x) for abs(x) >> abs(M), the term x/Ep approaches 1.0 for large x.
I think the problem here is that unlike Mathematica, C does not use arbitrary precision in computing. Then, at some point when Exp [Ep] is calculated numerical computation overflows.
Now, GSL uses the transformation x = (1-t)/t, to map onto interval (0,1].
So, for t<<0 is posible to get nan results since the behavior of your function tends to indeterminations (0/0 or inf/inf,etc) for extreme values.
Maybe if you write out the terms
Exp[ ( Ep(x) - \Mu)/T ] / { 1 + Exp[( Ep(x) - \Mu )/T] }^2
using A/B = Exp[ Ln A - Ln B], you could get a better numerical behavior.
I will try if and I have nice results, then I'll tell you.
The solution
As I said before, you must take care the problems arising with indeterminate forms. So, lets write out the problematic terms using the logarithmic version:
double dIdmu(double x, void * parametros){
double *p,Ep,mu,M,T;
p=(double *) parametros;
M=p[0];
T=p[1];
mu=p[2];
Ep=sqrt(x*x+M*M);
double fplus= - ( exp( (Ep - mu)/T -2.0*log(1.0 + exp((Ep - mu)/T) ) ) - exp( (Ep + mu)/T -2.0*log(1.0 + exp((Ep + mu)/T) ) ) ) * pow(x,2) / (2.* T * Ep*pow(M_PI,2));
return fplus;
}
and with this main function
int main()
{
double params[3];
double resultTest2, error1Test2;
gsl_integration_workspace * w
= gsl_integration_workspace_alloc (10000);
params[0]=0.007683; //M
params[1]=0.284000;// T
params[2]=0.1; //mu
gsl_function dI2mu_u;
dI2mu_u.function = &dIdmu;
dI2mu_u.params = &params;
gsl_integration_qagiu (&dI2mu_u, 0.0, 1e-7, 1e-7, 10000, w, &resultTest2, &error1Test2);
printf("%e\n", resultTest2);
gsl_integration_workspace_free ( w);
return 0;
}
you get the answer:
-5.065288e-03.
I am curious... This is how I define the function in Mathematica
So comparing the answers:
GSL -5.065288e-03
Mathematica -0.005065287633739702

newton raphson in C

I have implemented the newton raphson algorithm for finding roots in C. I want to print out the most accurate approximation of the root as possible without going into nan land. My strategy for this is while (!(isnan(x0)) { dostuff(); } But this continues to print out the result multiple times. Ideally I would like to setup a range so that the difference between each computed x intercept approximation would stop when the previous - current is less than some range .000001 in my case. I have a possible implementation below. When I input 2.999 It takes only one step, but when I input 3.0 it takes 20 steps, this seems incorrect to me.
(When I input 3.0)
λ newton_raphson 3
2.500000
2.250000
2.125000
2.062500
2.031250
2.015625
2.007812
2.003906
2.001953
2.000977
2.000488
2.000244
2.000122
2.000061
2.000031
2.000015
2.000008
2.000004
2.000002
2.000001
Took 20 operation(s) to approximate a proper root of 2.000002
within a range of 0.000001
(When I input 2.999)
λ newton_raphson 2.999
Took 1 operation(s) to approximate a proper root of 2.000000
within a range of 0.000001
My code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define RANGE 0.000001
double absolute(double number)
{
if (number < 0) return -number;
else return number;
}
double newton_raphson(double (*func)(double), double (*derivative)(double), double x0){
int count;
double temp;
count = 0;
while (!isnan(x0)) {
temp = x0;
x0 = (x0 - (func(x0)/derivative(x0)));
if (!isnan(x0))
printf("%f\n", x0);
count++;
if (absolute(temp - x0) < RANGE && count > 1)
break;
}
printf("Took %d operation(s) to approximate a proper root of %6f\nwithin a range of 0.000001\n", count, temp);
return x0;
}
/* (x-2)^2 */
double func(double x){ return pow(x-2.0, 2.0); }
/* 2x-4 */
double derivative(double x){ return 2.0*x - 4.0; }
int main(int argc, char ** argv)
{
double x0 = atof(argv[1]);
double (*funcPtr)(double) = &func; /* this is a user defined function */
double (*derivativePtr)(double) = &derivative; /* this is the derivative of that function */
double result = newton_raphson(funcPtr, derivativePtr, x0);
return 0;
}
You call trunc(x0) which turns 2.999 into 2.0. Naturally, when you start at the right answer, no iteration is needed! In other words, although you intended to use 2.999 as your starting value, you actually used 2.0.
Simply remove the call to trunc().
Worth pointing out: taking 20 steps to converge is also anomalous; because you are converging to a multiple root, the convergence is only linear instead of the typical quadratic convergence that Newton-Raphson gives in the general case. You can see this in the fact that your error is halved with each iteration (with the usual quadratic convergence, you would get twice as many correct digits on each iteration, and converge much, much faster).

Resources