I am implementing BFSK frequency hopping communication system on a DSP processor. It was suggested by some of the forum members to use Goertzel algorithm for the demodulation of frequency hopping at specific frequencies. I have tried implementing the goertzel algorithm in C. the code is follows:
float goertzel(int numSamples,int TARGET_FREQUENCY,int SAMPLING_RATE, float* data)
{
int k,i;
float floatnumSamples;
float omega,sine,cosine,coeff,q0,q1,q2,result,real,imag;
floatnumSamples = (float) numSamples;
k = (int) (0.5 + ((floatnumSamples * TARGET_FREQUENCY) / SAMPLING_RATE));
omega = (2.0 * M_PI * k) / floatnumSamples;
sine = sin(omega);
cosine = cos(omega);
coeff = 2.0 * cosine;
q0=0;
q1=0;
q2=0;
for(i=0; i<numSamples; i++)
{
q0 = coeff * q1 - q2 + data[i];
q2 = q1;
q1 = q0;
}
real = (q1 - q2 * cosine);
imag = (q2 * sine);
result = sqrtf(real*real + imag*imag);
return result;
}
When I use the function to calculate the result at specific frequencies for a given dataset, I am not getting the correct results. However, if I use the same dataset and calculate the goertzel result using MATLAB goertzel() function, then I get the results perfectly. I am implemented the algorithm using C, with the help of some online tutorials that I found over the internet. I just want to get the view of you guys if the function is implementing the goertzel algorithm correctly.
If you are saying that the Matlab implementation is good because its results match the result for that frequency of a DFT or FFT of your data, then it's probably because the Matlab implementation is normalizing the results by a scaling factor as is done with the FFT.
Change your code to take this into account and see if it improves your results. Note that I also changed the function and result names to reflect that your goertzel is calculating the magnitude, not the complete complex result, for clarity:
float goertzel_mag(int numSamples,int TARGET_FREQUENCY,int SAMPLING_RATE, float* data)
{
int k,i;
float floatnumSamples;
float omega,sine,cosine,coeff,q0,q1,q2,magnitude,real,imag;
float scalingFactor = numSamples / 2.0;
floatnumSamples = (float) numSamples;
k = (int) (0.5 + ((floatnumSamples * TARGET_FREQUENCY) / SAMPLING_RATE));
omega = (2.0 * M_PI * k) / floatnumSamples;
sine = sin(omega);
cosine = cos(omega);
coeff = 2.0 * cosine;
q0=0;
q1=0;
q2=0;
for(i=0; i<numSamples; i++)
{
q0 = coeff * q1 - q2 + data[i];
q2 = q1;
q1 = q0;
}
// calculate the real and imaginary results
// scaling appropriately
real = (q1 - q2 * cosine) / scalingFactor;
imag = (q2 * sine) / scalingFactor;
magnitude = sqrtf(real*real + imag*imag);
return magnitude;
}
Often you can just use the square of the magnitude in your computations,
for example for tone detection.
Some excellent examples of Goertzels are in the Asterisk PBX DSP code
Asterisk DSP code (dsp.c)
and in the spandsp library SPANDSP DSP Library
Consider two input sample wave-forms:
1) a sine wave with amplitude A and frequency W
2) a cosine wave with the same amplitude and frequency A and W
Goertzel algorithm should yield the same results for two mentioned input wave-forms but the provided code results in different return values. I think the code should be revised as follows:
float goertzel_mag(int numSamples,int TARGET_FREQUENCY,int SAMPLING_RATE, float* data)
{
int k,i;
float floatnumSamples;
float omega,sine,cosine,coeff,q0,q1,q2,magnitude,real,imag;
float scalingFactor = numSamples / 2.0;
floatnumSamples = (float) numSamples;
k = (int) (0.5 + ((floatnumSamples * TARGET_FREQUENCY) / SAMPLING_RATE));
omega = (2.0 * M_PI * k) / floatnumSamples;
sine = sin(omega);
cosine = cos(omega);
coeff = 2.0 * cosine;
q0=0;
q1=0;
q2=0;
for(i=0; i<numSamples; i++)
{
q2 = q1;
q1 = q0;
q0 = coeff * q1 - q2 + data[i];
}
// calculate the real and imaginary results
// scaling appropriately
real = (q0 - q1 * cosine) / scalingFactor;
imag = (-q1 * sine) / scalingFactor;
magnitude = sqrtf(real*real + imag*imag);
return magnitude;
}
Related
The following full code could compare speed of fast inverse square root with 1/sqrt(). According to this sentence in wikipedia, (i.e. The algorithm was approximately four times faster than computing the square root with another method and calculating the reciprocal via floating point division.)
But here is why I am here: it is slower than 1/sqrt(). something wrong in my code? please.
#include <stdio.h>
#include <time.h>
#include <math.h>
float FastInvSqrt (float number);
int
main ()
{
float x = 1.0e+100;
int N = 100000000;
int i = 0;
clock_t start2 = clock ();
do
{
float z = 1.0 / sqrt (x);
i++;
}
while (i < N);
clock_t end2 = clock ();
double time2 = (end2 - start2) / (double) CLOCKS_PER_SEC;
printf ("1/sqrt() spends %13f sec.\n\n", time2);
i = 0;
clock_t start1 = clock ();
do
{
float y = FastInvSqrt (x);
i++;
}
while (i < N);
clock_t end1 = clock ();
double time1 = (end1 - start1) / (double) CLOCKS_PER_SEC;
printf ("FastInvSqrt() spends %f sec.\n\n", time1);
printf ("fast inverse square root is faster %f times than 1/sqrt().\n", time2/time1);
return 0;
}
float
FastInvSqrt (float x)
{
float xhalf = 0.5F * x;
int i = *(int *) &x; // store floating-point bits in integer
i = 0x5f3759df - (i >> 1); // initial guess for Newton's method
x = *(float *) &i; // convert new bits into float
x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
return x;
}
The result is as follows:
1/sqrt() spends 0.850000 sec.
FastInvSqrt() spends 0.960000 sec.
fast inverse square root is faster 0.885417 times than 1/sqrt().
A function that reduces the domain in which it computes with precision will have less computational complexity (meaning that it can be computed faster). This can be thought of as optimizing the computation of a function's shape for a subset of its definition, or like search algorithms which each are best for a particular kind of input (No Free Lunch theorem).
As such, using this function for inputs outside the interval [0, 1] (which I suppose it was optimized / designed for) means using it in the subset of inputs where its complexity is worse (higher) than other possibly specialized variants of functions that compute square roots.
The sqrt() function you are using from the library was itself (likely) also optimized, as it has pre-computed values in a sort of LUT (which act as initial guesses for further approximations); using such a more "general function" (meaning that it covers more of the domain and tries to efficientize it by precomputation, for example; or eliminating redundant computation, but that is limited; or maximizing data reuse at run-time) has its complexity limitations, because the more choices between which precomputation to use for an interval, the more decision overhead there is; so knowing at compile-time that all your inputs to sqrt are in the interval [0, 1] would help reduce the run-time decision overhead, as you would know ahead of time which specialized approximation function to use (or you could generate specialized functions for each interval of interest, at compile-time -> see meta-programming for this).
I correct my code as follows:
1. compute random number, instead of a fixed number.
2. count time consumption inside while loop and sum of it.
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
float FastInvSqrt (float number);
int
main ()
{
float x=0;
time_t t;
srand((unsigned) time(&t));
int N = 1000000;
int i = 0;
double sum_time2=0.0;
do
{
x=(float)(rand() % 10000)*0.22158;
clock_t start2 = clock ();
float z = 1.0 / sqrt (x);
clock_t end2 = clock ();
sum_time2=sum_time2+(end2-start2);
i++;
}
while (i < N);
printf ("1/sqrt() spends %13f sec.\n\n", sum_time2/(double)CLOCKS_PER_SEC);
double sum_time1=0.0;
i = 0;
do
{
x=(float)(rand() % 10000)*0.22158;
clock_t start1 = clock ();
float y = FastInvSqrt (x);
clock_t end1 = clock ();
sum_time1=sum_time1+(end1-start1);
i++;
}
while (i < N);
printf ("FastInvSqrt() spends %f sec.\n\n", sum_time1/(double)CLOCKS_PER_SEC);
printf ("fast inverse square root is faster %f times than 1/sqrt().\n", sum_time2/sum_time1);
return 0;
}
float
FastInvSqrt (float x)
{
float xhalf = 0.5F * x;
int i = *(int *) &x; // store floating-point bits in integer
i = 0x5f3759df - (i >> 1); // initial guess for Newton's method
x = *(float *) &i; // convert new bits into float
x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
return x;
}
but fast inverse square root still slower that 1/sqrt().
1/sqrt() spends 0.530000 sec.
FastInvSqrt() spends 0.540000 sec.
fast inverse square root is faster 0.981481 times than 1/sqrt().
An analytical solution for cubic bezier length
seems not to exist, but it does not mean that
coding a cheap solution does not exist. By cheap I mean something like in the range of 50-100 ns (or less).
Does someone know anything like that? Maybe in two categories:
1) less error like 1% but more slow code.
2) more error like 20% but faster?
I scanned through google a bit but it doesn't
find anything which looks like a nice solution. Only something like divide on N line segments
and sum the N sqrt - too slow for more precision,
and probably too inaccurate for 2 or 3 segments.
Is there anything better?
Another option is to estimate the arc length as the average between the chord and the control net. In practice:
Bezier bezier = Bezier (p0, p1, p2, p3);
chord = (p3-p0).Length;
cont_net = (p0 - p1).Length + (p2 - p1).Length + (p3 - p2).Length;
app_arc_length = (cont_net + chord) / 2;
You can then recursively split your spline segment into two segments and calculate the arc length up to convergence. I tested myself and it actually converges pretty fast. I got the idea from this forum.
Simplest algorithm: flatten the curve and tally euclidean distance. As long as you want an approximate arc length, this solution is fast and cheap. Given your curve's coordinate LUT—you're talking about speed, so I'm assuming you use those, and don't constantly recompute the coordinates—it's a simple for loop with a tally. In generic code, with a dist function that computes the euclidean distance between two points:
var arclength = 0,
last=LUT.length-1,
i;
for (i=0; i<last; i++) {
arclength += dist(LUT[i], LUT[i+1]);
}
Done. arclength is now the approximate arc length based on the maximum number of segments you can form in the curve based on your LUT. Need things faster with a larger potential error? Control the segment count.
var arclength = 0,
segCount = ...,
last=LUT.length-2,
step = last/segCount,
s, i;
for (s=0; s<=segCount; s++) {
i = (s*step/last)|0;
arclength += dist(LUT[i], LUT[i+1]);
}
This is pretty much the simplest possible algorithm that still generates values that come even close to the true arc length. For anything better, you're going to have to use more expensive numerical approaches (like the Legendre-Gauss quadrature technique).
If you want to know why, hit up the arc length section of "A Primer on Bézier Curves".
in my case a fast and valid approach is this. (Rewritten in c# for Unity3d)
public static float BezierSingleLength(Vector3[] points){
var p0 = points[0] - points[1];
var p1 = points[2] - points[1];
var p2 = new Vector3();
var p3 = points[3]-points[2];
var l0 = p0.magnitude;
var l1 = p1.magnitude;
var l3 = p3.magnitude;
if(l0 > 0) p0 /= l0;
if(l1 > 0) p1 /= l1;
if(l3 > 0) p3 /= l3;
p2 = -p1;
var a = Mathf.Abs(Vector3.Dot(p0,p1)) + Mathf.Abs(Vector3.Dot(p2,p3));
if(a > 1.98f || l0 + l1 + l3 < (4 - a)*8) return l0+l1+l3;
var bl = new Vector3[4];
var br = new Vector3[4];
bl[0] = points[0];
bl[1] = (points[0]+points[1]) * 0.5f;
var mid = (points[1]+points[2]) * 0.5f;
bl[2] = (bl[1]+mid) * 0.5f;
br[3] = points[3];
br[2] = (points[2]+points[3]) * 0.5f;
br[1] = (br[2]+mid) * 0.5f;
br[0] = (br[1]+bl[2]) * 0.5f;
bl[3] = br[0];
return BezierSingleLength(bl) + BezierSingleLength(br);
}
I worked out the closed form expression of length for a 3 point Bezier (below). I've not attempted to work out a closed form for 4+ points. This would most likely be difficult or complicated to represent and handle. However, a numerical approximation technique such as a Runge-Kutta integration algorithm (see my Q&A here for details) would work quite well by integrating using the arc length formula.
Here is some Java code for the arc length of a 3 point Bezier, with points a,b, and c.
v.x = 2*(b.x - a.x);
v.y = 2*(b.y - a.y);
w.x = c.x - 2*b.x + a.x;
w.y = c.y - 2*b.y + a.y;
uu = 4*(w.x*w.x + w.y*w.y);
if(uu < 0.00001)
{
return (float) Math.sqrt((c.x - a.x)*(c.x - a.x) + (c.y - a.y)*(c.y - a.y));
}
vv = 4*(v.x*w.x + v.y*w.y);
ww = v.x*v.x + v.y*v.y;
t1 = (float) (2*Math.sqrt(uu*(uu + vv + ww)));
t2 = 2*uu+vv;
t3 = vv*vv - 4*uu*ww;
t4 = (float) (2*Math.sqrt(uu*ww));
return (float) ((t1*t2 - t3*Math.log(t2+t1) -(vv*t4 - t3*Math.log(vv+t4))) / (8*Math.pow(uu, 1.5)));
public float FastArcLength()
{
float arcLength = 0.0f;
ArcLengthUtil(cp0.position, cp1.position, cp2.position, cp3.position, 5, ref arcLength);
return arcLength;
}
private void ArcLengthUtil(Vector3 A, Vector3 B, Vector3 C, Vector3 D, uint subdiv, ref float L)
{
if (subdiv > 0)
{
Vector3 a = A + (B - A) * 0.5f;
Vector3 b = B + (C - B) * 0.5f;
Vector3 c = C + (D - C) * 0.5f;
Vector3 d = a + (b - a) * 0.5f;
Vector3 e = b + (c - b) * 0.5f;
Vector3 f = d + (e - d) * 0.5f;
// left branch
ArcLengthUtil(A, a, d, f, subdiv - 1, ref L);
// right branch
ArcLengthUtil(f, e, c, D, subdiv - 1, ref L);
}
else
{
float controlNetLength = (B-A).magnitude + (C - B).magnitude + (D - C).magnitude;
float chordLength = (D - A).magnitude;
L += (chordLength + controlNetLength) / 2.0f;
}
}
first of first you should Understand the algorithm use in Bezier,
When i was coding a program by c# Which was full of graphic material I used beziers and many time I had to find a point cordinate in bezier , whic it seem imposisble in the first look. so the thing i do was to write Cubic bezier function in my costume math class which was in my project. so I will share the code with you first.
//--------------- My Costum Power Method ------------------\\
public static float FloatPowerX(float number, int power)
{
float temp = number;
for (int i = 0; i < power - 1; i++)
{
temp *= number;
}
return temp;
}
//--------------- Bezier Drawer Code Bellow ------------------\\
public static void CubicBezierDrawer(Graphics graphics, Pen pen, float[] startPointPixel, float[] firstControlPointPixel
, float[] secondControlPointPixel, float[] endPointPixel)
{
float[] px = new float[1111], py = new float[1111];
float[] x = new float[4] { startPointPixel[0], firstControlPointPixel[0], secondControlPointPixel[0], endPointPixel[0] };
float[] y = new float[4] { startPointPixel[1], firstControlPointPixel[1], secondControlPointPixel[1], endPointPixel[1] };
int i = 0;
for (float t = 0; t <= 1F; t += 0.001F)
{
px[i] = FloatPowerX((1F - t), 3) * x[0] + 3 * t * FloatPowerX((1F - t), 2) * x[1] + 3 * FloatPowerX(t, 2) * (1F - t) * x[2] + FloatPowerX(t, 3) * x[3];
py[i] = FloatPowerX((1F - t), 3) * y[0] + 3 * t * FloatPowerX((1F - t), 2) * y[1] + 3 * FloatPowerX(t, 2) * (1F - t) * y[2] + FloatPowerX(t, 3) * y[3];
graphics.DrawLine(pen, px[i - 1], py[i - 1], px[i], py[i]);
i++;
}
}
as you see above, this is the way a bezier Function work and it draw the same Bezier as Microsoft Bezier Function do( I've test it). you can make it even more accurate by incrementing array size and counter size or draw elipse instead of line& ... . All of them depend on you need and level of accuracy you need and ... .
Returning to main goal ,the Question is how to calc the lenght???
well The answer is we Have tons of point and each of them has an x coorinat and y coordinate which remember us a triangle shape & especially A RightTriabgle Shape. so if we have point p1 & p2 , we can calculate the distance of them as a RightTriangle Chord. as we remeber from our math class in school, in ABC Triangle of type RightTriangle, chord Lenght is -> Sqrt(Angle's FrontCostalLenght ^ 2 + Angle's SideCostalLeghth ^ 2);
and there is this relation betwen all points we calc the lenght betwen current point and the last point before current point(exmp p[i - 1] & p[i]) and store sum of them all in a variable. lets show it in code bellow
//--------------- My Costum Power Method ------------------\\
public static float FloatPower2(float number)
{
return number * number;
}
//--------------- My Bezier Lenght Calculator Method ------------------\\
public static float CubicBezierLenghtCalculator(float[] startPointPixel
, float[] firstControlPointPixel, float[] secondControlPointPixel, float[] endPointPixel)
{
float[] tmp = new float[2];
float lenght = 0;
float[] px = new float[1111], py = new float[1111];
float[] x = new float[4] { startPointPixel[0], firstControlPointPixel[0]
, secondControlPointPixel[0], endPointPixel[0] };
float[] y = new float[4] { startPointPixel[1], firstControlPointPixel[1]
, secondControlPointPixel[1], endPointPixel[1] };
int i = 0;
for (float t = 0; t <= 1.0; t += 0.001F)
{
px[i] = FloatPowerX((1.0F - t), 3) * x[0] + 3 * t * FloatPowerX((1.0F - t), 2) * x[1] + 3F * FloatPowerX(t, 2) * (1.0F - t) * x[2] + FloatPowerX(t, 3) * x[3];
py[i] = FloatPowerX((1.0F - t), 3) * y[0] + 3 * t * FloatPowerX((1.0F - t), 2) * y[1] + 3F * FloatPowerX(t, 2) * (1.0F - t) * y[2] + FloatPowerX(t, 3) * y[3];
if (i > 0)
{
tmp[0] = Math.Abs(px[i - 1] - px[i]);// calculating costal lenght
tmp[1] = Math.Abs(py[i - 1] - py[i]);// calculating costal lenght
lenght += (float)Math.Sqrt(FloatPower2(tmp[0]) + FloatPower2(tmp[1]));// calculating the lenght of current RightTriangle Chord & add it each time to variable
}
i++;
}
return lenght;
}
if you wish to have faster calculation just need to reduce px & py array lenght and loob count.
We also can decrease memory need by reducing px and py to array lenght to 1 or make a simple double variable but becuase of Conditional situation Happend which Increase Our Big O I didn't do that.
Hope it helped you so much. if have another question just ask.
With Best regards, Heydar - Islamic Republic of Iran.
I have a C function that computes the values of 4 sines based on time elapsed. Using gprof, I figured that this function uses 100% (100.7% to be exact lol) of the CPU time.
void
update_sines(void)
{
clock_gettime(CLOCK_MONOTONIC, &spec);
s = spec.tv_sec;
ms = spec.tv_nsec * 0.0000001;
etime = concatenate((long)s, ms);
int k;
for (k = 0; k < 799; ++k)
{
double A1 = 145 * sin((RAND1 * k + etime) * 0.00333) + RAND5; // Amplitude
double A2 = 100 * sin((RAND2 * k + etime) * 0.00333) + RAND4; // Amplitude
double A3 = 168 * sin((RAND3 * k + etime) * 0.00333) + RAND3; // Amplitude
double A4 = 136 * sin((RAND4 * k + etime) * 0.00333) + RAND2; // Amplitude
double B1 = 3 + RAND1 + (sin((RAND5 * k) * etime) * 0.00216); // Period
double B2 = 3 + RAND2 + (sin((RAND4 * k) * etime) * 0.002); // Period
double B3 = 3 + RAND3 + (sin((RAND3 * k) * etime) * 0.00245); // Period
double B4 = 3 + RAND4 + (sin((RAND2 * k) * etime) * 0.002); // Period
double x = k; // Current x
double C1 = 0.6 * etime; // X axis move
double C2 = 0.9 * etime; // X axis move
double C3 = 1.2 * etime; // X axis move
double C4 = 0.8 * etime + 200; // X axis move
double D1 = RAND1 + sin(RAND1 * x * 0.00166) * 4; // Y axis move
double D2 = RAND2 + sin(RAND2 * x * 0.002) * 4; // Y axis move
double D3 = RAND3 + cos(RAND3 * x * 0.0025) * 4; // Y axis move
double D4 = RAND4 + sin(RAND4 * x * 0.002) * 4; // Y axis move
sine1[k] = A1 * sin((B1 * x + C1) * 0.0025) + D1;
sine2[k] = A2 * sin((B2 * x + C2) * 0.00333) + D2 + 100;
sine3[k] = A3 * cos((B3 * x + C3) * 0.002) + D3 + 50;
sine4[k] = A4 * sin((B4 * x + C4) * 0.00333) + D4 + 100;
}
}
And this is the output from gprof:
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls Ts/call Ts/call name
100.07 0.04 0.04
I'm currently getting a frame rate of roughly 30-31 fps using this. Now I figure there as to be a more efficient way to do this.
As you noticed I already changed all the divisions to multiplications but that had very little effect on performance.
How could I increase the performance of this math heavy function?
Besides all the other advice given in other answers, here is a pure algorithmic optimization.
In most cases, you're computing something of the form sin(k * a + b), where a and b are constants, and k is a loop variable. If you were also to compute cos(k * a + b), then you could use a 2D rotation matrix to form a recurrence relationship (in matrix form):
|cos(k*a + b)| = |cos(a) -sin(a)| * |cos((k-1)*a + b)|
|sin(k*a + b)| |sin(a) cos(a)| |sin((k-1)*a + b)|
In other words, you can calculate the value for the current iteration in terms of the value from the previous iteration. Thus, you only need to to do the full trig calculation for k == 0, but the rest can be calculated via this recurrence (once you have calculated cos(a) and sin(a), which are constants). So you eliminate 75% of the trig function calls (it's not clear the same trick can be pulled for the final set of trig calls).
If you don't need all that precision, create a lookup for the sin() values you need, so if 1 degree is enough, use double sin_lookup[360], etc.. And possibly float sin_lookup[360] if float precision is sufficient.
Also, as noted in comments, at a certain point as per Keith, "You might also consider using linear interpolation between lookup values, which should give you substantially better accuracy (a reasonably continuous function rather than a step function) at a fairly small cost in performance"
EDIT: also consider changing the hardcoded A1,A2,A3,A4 pattern to arrays of size[4], and looping from 0 to 3 - should allow vectorization on many platforms and allow parrellism without needing to manage threads
EDIT2: some code and results
(Coded in C++ just to make comparisons easy between precisions, calcs are the same in C)
class simple_trig
{
public:
simple_trig(size_t prec) : precision(prec)
{
static const double PI=3.141592653589793;
const double dprec=(double)prec;
const double quotient=(2.0*PI)/dprec;
rev_quotient=dprec/(2.0*PI);
values.reserve(prec);
for (int i=0; i < precision; ++i)
{
values[i]=::sin(quotient*(double)i);
}
}
double sin(double x) const
{
double cvt=x*rev_quotient;
int index=(int)cvt;
double delta=cvt-(double)index;
int lookup1=index%precision;
int lookup2=(index+1)%precision;
return values[lookup1]*(1.0-delta)+values[lookup2]*delta;
}
double cos(double x) const
{
double cvt=x*rev_quotient;
int index=(int)cvt;
double delta=cvt-(double)index;
int lookup1=(index+precision/4)%precision;
int lookup2=(index+precision/4+1)%precision;
return values[lookup1]*(1.0-delta)+values[lookup2]*delta;
}
private:
const size_t precision;
double rev_quotient;
std::vector<double> values;
};
Examples Low is 100, Med is 1000 and High is 10,000
X=0 Sin=0 Sin Low=0 Sin Med=0 Sin High=0
X=0 Cos=1 Cos Low=1 Cos Med=1 Cos High=1
X=0.5 Sin=0.479426 Sin Low=0.479389 Sin Med=0.479423 Sin High=0.479426
X=0.5 Cos=0.877583 Cos Low=0.877512 Cos Med=0.877578 Cos High=0.877583
X=1.33333 Sin=0.971938 Sin Low=0.971607 Sin Med=0.971935 Sin High=0.971938
X=1.33333 Cos=0.235238 Cos Low=0.235162 Cos Med=0.235237 Cos High=0.235238
X=2.25 Sin=0.778073 Sin Low=0.777834 Sin Med=0.778072 Sin High=0.778073
X=2.25 Cos=-0.628174 Cos Low=-0.627986 Cos Med=-0.628173 Cos High=-0.628174
X=3.2 Sin=-0.0583741 Sin Low=-0.0583689 Sin Med=-0.0583739 Sin High=-0.0583741
X=3.2 Cos=-0.998295 Cos Low=-0.998166 Cos Med=-0.998291 Cos High=-0.998295
X=4.16667 Sin=-0.854753 Sin Low=-0.854387 Sin Med=-0.854751 Sin High=-0.854753
X=4.16667 Cos=-0.519036 Cos Low=-0.518818 Cos Med=-0.519034 Cos High=-0.519036
X=5.14286 Sin=-0.90877 Sin Low=-0.908542 Sin Med=-0.908766 Sin High=-0.90877
X=5.14286 Cos=0.417296 Cos Low=0.417195 Cos Med=0.417294 Cos High=0.417296
X=6.125 Sin=-0.157526 Sin Low=-0.157449 Sin Med=-0.157526 Sin High=-0.157526
X=6.125 Cos=0.987515 Cos Low=0.987028 Cos Med=0.987512 Cos High=0.987515
X=7.11111 Sin=0.73653 Sin Low=0.736316 Sin Med=0.736527 Sin High=0.73653
X=7.11111 Cos=0.676405 Cos Low=0.676213 Cos Med=0.676403 Cos High=0.676405
X=8.1 Sin=0.96989 Sin Low=0.969741 Sin Med=0.969887 Sin High=0.96989
X=8.1 Cos=-0.243544 Cos Low=-0.24351 Cos Med=-0.243544 Cos High=-0.243544
X=9.09091 Sin=0.327701 Sin Low=0.327558 Sin Med=0.3277 Sin High=0.327701
X=9.09091 Cos=-0.944782 Cos Low=-0.944381 Cos Med=-0.944779 Cos High=-0.944782
X=10.0833 Sin=-0.611975 Sin Low=-0.611673 Sin Med=-0.611973 Sin High=-0.611975
X=10.0833 Cos=-0.790877 Cos Low=-0.790488 Cos Med=-0.790875 Cos High=-0.790877
It seems to me that sine1, sine2, sine3 and sine4 arrays are completely independent from eachother. So you are basically running a single for loop for 4 different arrays which have no dependency.
Spawn 4 threads, 1 for each, so you have 4 for loops running at the same time. On multicore machine this should speed up your function dramatically. As a matter of fact, it should be a perfect 4x speedup (+- ...).
Actually combining the use of threads (consider this with OpenMP) and the use of a table for the sin is a good idea. If possible use float instead of double and, depending on the platform, you could also use simd instructions, but the later would make the use of threads unnecessary.
Cheers
Here is a C++ snippet to use the rotation matrix suggested in the accepted answer.
float a = 0.343;
float b = 2.3232;
float sina{};
float cosa{};
sincosf(a, &sina, &cosa);
float resSin{};
float resCos{};
for (int k = 0; k < 5; k++) {
if (k == 0) {
sincosf(b, &resSin, &resCos);
} else {
float newResCos, newResSin;
newResCos = cosa * resCos - sina * resSin;
newResSin = sina * resCos + cosa * resSin;
resCos = newResCos;
resSin = newResSin;
}
}
I need a function to animate a sine wave infinitely over time. The sine wave moves to the left.
My sine wave is built using the following equation:
A * sin(B * x + C) + D
Now to animate the sine wave as if it is moving to the left, I simply increase C by 1 everytime I refresh the screen. Now this is all fine and dandy for a few minutes but I need to have that animation run for hours. I can't just have an integer build up 60 times a second forever. How does someone deal with this? Do I just try to find a point where the sine wave crosses 0 and then restart the animation from 0?
I just need to have the logic of something like this explained.
EDIT #1
I forgot to mention that there's a randomized component to my sine. The sine is not continuously the same. A and D are sinusoidal functions tied to that integer at the moment. The sine needs to look random with varying periods and amplitudes.
EDIT #2
Edited see Edit 3
EDIT #3
#Potatoswatter I tried implementing your technique but I don't think I'm getting it. Here's what I got:
static double i = 0;
i = i + (MPI / 2);
if ( i >= 800 * (MPI / 2) ) i -= 800 * (MPI / 2);
for (k = 0; k < 800; ++k)
{
double A1 = 145 * sin((rand1 * (k - 400) + i) / 300) + rand3; // Amplitude
double A2 = 100 * sin((rand2 * (k - 400) + i) / 300) + rand2; // Amplitude
double A3 = 168 * sin((rand3 * (k - 400) + i) / 300) + rand1; // Amplitude
double B1 = 3 + rand1 + (sin((rand3 * k) * i) / (500 * rand1)); // Period
double B2 = 3 + rand2 + (sin((rand2 * k) * i) / 500); // Period
double B3 = 3 + rand3 + (sin((rand1 * k) * i) / (500 * rand3)); // Period
double x = k; // Current x
double C1 = 10 * i; // X axis move
double C2 = 11 * i; // X axis move
double C3 = 12 * i; // X axis move
double D1 = rand1 + sin(rand1 * x / 600) * 4; // Y axis move
double D2 = rand2 + sin(rand2 * x / 500) * 4; // Y axis move
double D3 = rand3 + cos(rand3 * x / 400) * 4; // Y axis move
sine1[k] = (double)A1 * sin((B1 * x + C1) / 400) + D1;
sine2[k] = (double)A2 * sin((B2 * x + C2) / 300) + D2 + 100;
sine3[k] = (double)A3 * cos((B3 * x + C3) / 500) + D3 + 50;
}
How do I modify this to make it work?
Halp!
Sine has a period of 2 pi, meaning that sin(x) = sin(x + 2 * M_PI), for any x.
So, you could just increase C by, say, pi/n where n is any integer, as you refresh the screen, and after 2n refreshes, reset C (to 0, or whatever).
Edit for clarity: the integer n is not meant to change over time.
Instead, pick some n, for example, let's say n = 10. Now, every frame, increase x by pi / 10. After 20 frames, you have increased x by a total of 20 * pi / 10 = 2 * pi. Since sin(x + 2 * pi) = sin(x), you may as well just reset your sin(...) input to just x, and start the process over.
sin is periodic, with a period of 2π. Therefore, if the argument is greater than 2π, you can subtract 2 * M_PI from it and get the same answer.
Instead of using a single variable k to compute all waves of various speeds, use three variables double k1, k2, k3, and keep them bound in the range from 0 to 2π.
if ( k2 >= 2 * M_PI ) k2 -= 2 * M_PI;
They may be individually updated by adding some value each frame. If the increment may be more than 2π then subtracting a single 2π won't bring them back into range, but you can use fmod() instead.
I decided to change my course of action. I just drive i with the system's monotonic clock like so:
struct timespec spec;
int ms;
time_t s;
static unsigned long long etime = 0;
clock_gettime(CLOCK_MONOTONIC, &spec);
s = spec.tv_sec;
ms = spec.tv_nsec / 10000000;
etime = concatenate((long)s, ms);
Then I simply changed i to etime in my sine equations. Here's the concatenating function I used for this purpose:
unsigned concatenate(unsigned x, unsigned y) {
x = x * 100;
return x + y;
}
I would like to evaluate Pi approximately by running the following code which fits a regular polygon of n sides inside a circle with unit diameter and calculates its perimeter using the function in the code. However the output after the 34th term is 0 when long double variable type is used or it increases without bounds when double variable type is used. How can I remedy this situation? Any suggestion or help is appreciated and welcome.
Thanks
P.S: Operating system: Ubuntu 12.04 LTS 32-bit, Compiler: GCC 4.6.3
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <stdlib.h>
#define increment 0.25
int main()
{
int i = 0, k = 0, n[6] = {3, 6, 12, 24, 48, 96};
double per[61] = {0}, per2[6] = {0};
// Since the above algorithm is recursive we need to specify the perimeter for n = 3;
per[3] = 0.5 * 3 * sqrtl(3);
for(i = 3; i <= 60; i++)
{
per[i + 1] = powl(2, i) * sqrtl(2 * (1.0 - sqrtl(1.0 - (per[i] / powl(2, i)) * (per[i] / powl(2, i)))));
printf("%d %f \n", i, per[i]);
}
return 0;
for(k = 0; k < 6; k++)
{
//p[k] = k
}
}
Some ideas:
Use y = (1.0 - x)*( 1.0 + x) instead of y = 1.0 - x*x. This helps with 1 stage of "subtraction of nearly equal values", but I am still stuck on the next 1.0 - sqrtl(y) as y approaches 1.0.
// per[i + 1] = powl(2, i) * sqrtl(2 * (1.0 - sqrtl(1.0 - (per[i] / powl(2, i)) * (per[i] / powl(2, i)))));
long double p = powl(2, i);
// per[i + 1] = p * sqrtl(2 * (1.0 - sqrtl(1.0 - (per[i] / p) * (per[i] / p))));
long double x = per[i] / p;
// per[i + 1] = p * sqrtl(2 * (1.0 - sqrtl(1.0 - x * x)));
// per[i + 1] = p * sqrtl(2 * (1.0 - sqrtl((1.0 - x)*(1.0 + x)) ));
long double y = (1.0 - x)*( 1.0 + x);
per[i + 1] = p * sqrtl(2 * (1.0 - sqrtl(y) ));
Change array size or for()
double per[61+1] = { 0 }; // Add 1 here
...
for (i = 3; i <= 60; i++) {
...
per[i + 1] =
Following is a similar method for pi
unsigned n = 6;
double sine = 0.5;
double cosine = sqrt(0.75);
double pi = n*sine;
static const double mpi = 3.1415926535897932384626433832795;
do {
sine = sqrt((1 - cosine)/2);
cosine = sqrt((1 + cosine)/2);
n *= 2;
pi = n*sine;
printf("%6u s:%.17e c:%.17e pi:%.17e %%:%.6e\n", n, sine, cosine, pi, (pi-mpi)/mpi);
} while (n <500000);
Subtracting 1.0 from a nearly-1.0 number is leading to "catastrophic cancellation", where the relative error in a FP calculation skyrockets due to the loss of significant digits. Try evaluating pow(2, i) - (pow(2, i) - 1.0) for each i between 0 and 60 and you'll see what I mean.
The only real solution to this issue is reorganizing your equations to avoid subtracting nearly-equal nonzero quantities. For more details, see Acton, Real Computing Made Real, or Higham, Accuracy and Stability of Numerical Algorithms.