timing floating point calculation with clock_gettime in C, time keeps changing?

timing floating point calculation with clock_gettime in C, time keeps changing? - c

I'm trying to calculate the time of a piece of code (floating point multiplications) using clock_gettime(). I randomly generate 10 arrays of 100000 floats and then multiply them all together, ten at a time. What I want to do is measure the time it takes to perform ten floating point multiplies, then add that time value to the total time, and then print it all out at the end.
I thought it was all working, but I discovered that if I remove my printf() statements from the while loop, the time decreases from 28125692 to 17490394 nanoseconds! This shouldn't be happening. I placed the clock_gettime() calls at the beginning and end of the floating point multiplications, so theoretically, the printf() statement should not effect the elapsed time!
Any ideas? Thank you.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char *argv[])
{
int i;
int j;
float alpha[100000];
float bravo[100000];
float charlie[100000];
float delta[100000];
float echo[100000];
float foxtrot[100000];
float golf[100000];
float hotel[100000];
float india[100000];
float juliet[100000];
float max;
long long num_calcs;
struct timespec start_time;
struct timespec end_time;
int diff_seconds;
long diff_nanoseconds;
long long total_calcs;
long long total_seconds;
long long total_nanoseconds;
num_calcs = 100000;
max = 1000.0;
printf("\n%lld floating point calculations requested.", num_calcs);
printf("\nGenerating random floating point values...");
//initialize random seed
srand((unsigned int)time(NULL));
//generate random floating point values
for (i = 0; i < 100000; i++)
{
alpha[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
bravo[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
charlie[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
delta[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
echo[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
foxtrot[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
golf[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
hotel[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
india[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
juliet[i] = ((float) rand()) / ((float) (RAND_MAX)) * max;
}
printf("done!");
printf("\nRunning %lld floating point multiplications...", num_calcs);
//run calculations
i = 0;
total_calcs = 0;
total_seconds = 0;
total_nanoseconds = 0;
while (total_calcs < num_calcs)
{
printf("\n\nRunning 10 calculations...");
//do 10 multiplications
//start the timer
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time) < 0)
{
printf("\nclock_gettime for start_time failed, exiting...");
return -1;
}
alpha[i] * bravo[i] * charlie[i] * delta[i] * echo[i] *
foxtrot[i] * golf[i] * hotel[i] * india[i] * juliet[i];
//stop the timer
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time) < 0)
{
printf("\nclock_gettime for end_time failed, exiting...");
return -1;
}
printf("done!");
total_calcs = total_calcs + 10; //increase total calculations
i++; //increment array index
//show timing statistics
printf("\nSTART TIME tv_sec: %d", (int) start_time.tv_sec);
printf("\nSTART TIME tv_nsec: %ld", start_time.tv_nsec);
printf("\nEND TIME tv_sec: %d", (int) end_time.tv_sec);
printf("\nEND TIME tv_nsec: %ld", end_time.tv_nsec);
//calculate time it took to run 10 floating point caculcations
if ((end_time.tv_nsec - start_time.tv_nsec) < 0)
{
diff_seconds = end_time.tv_sec - start_time.tv_sec - 1;
diff_nanoseconds = 1000000000 + end_time.tv_nsec - start_time.tv_nsec;
}
else
{
diff_seconds = end_time.tv_sec - start_time.tv_sec;
diff_nanoseconds = end_time.tv_nsec - start_time.tv_nsec;
}
//add elapsed time for the last 10 calculations to total elapsed time
total_seconds = total_seconds + diff_seconds;
total_nanoseconds = total_nanoseconds + diff_nanoseconds;
printf("\nPerformed 10 floating point multiplications in %d seconds and %ld nanoseconds.", diff_seconds, diff_nanoseconds);
printf("\nPerformed %lld floating point multiplications in %lld seconds and %lld nanoseconds.", total_calcs, total_seconds, total_nanoseconds);
}
printf("done!");
printf("\nPerformed %lld floating point multiplications in %lld seconds and %lld nanoseconds.\n", total_calcs, total_seconds, total_nanoseconds);
return 0;
}

The statement
alpha[i] * bravo[i] * charlie[i] * delta[i] * echo[i] *
foxtrot[i] * golf[i] * hotel[i] * india[i] * juliet[i];
is a no-op. An optimizing compiler will omit this from the executable.
To make it work, try adding the result to another variable and print the sum at the end of the program.

Related

find square root with Newton using integers

In this program, I'm trying to calculate the square root with the newton equation 1/2(x+a/x) just using integers. So if I repeat this equation at least 10 times, it should divide the number by 1000 and give an approximate value of the square root of a/1000. This is the code:
int main (){
int32_t a, x; //integers a and x
float root;
do{
scanf ("%d", &a); // get value of a from the user
if (a < 1000 ){ // if chosen number is less than 1000 the programm ends.
break;
}
x = ((float) a / 1000); //starting value of x is a / 1000;
for (int i = 0; i < 50;i++)
{
root = ((float) x * (float) x + a/1000) / ((float)2*x); // convert int to float //through casting
x = (float)root; // refresh the value of x to be the root of the last value.
}
printf ("%f\n", (float)root);
}while (1);
return 0;
}
so if I calculate the square root of 2000, it should give back the square root of 2(1.414..), but it just gives an approximate value: 1.50000
How can I correct this using integers and casting them with float?
thanks

#include <stdlib.h>
#include <stdio.h>
int main (int argc, char * *argv, char * *envp) {
int32_t a, x; //integers a and x
float root;
do {
scanf ("%d", &a); // get value of a from the user
if (a < 1000) { // if chosen number is less than 1000 the program ends.
break;
}
x = (int32_t)((float) a / 1000.0f); //starting value of x is a / 1000;
for (int i = 0; i < 1000; i++) {
// Fixed formula based on (x+a/x)/2
root = ((float)x + (((float)a) / (float)x)) / 2.0f;
//Below, wrong old formula
//root = ((float) x * (float) x + a / 1000) / ((float) 2 * x); // convert int to float //through casting
x = (int32_t) root; // refresh the value of x to be the root of the last value.
}
printf ("%f\n", root);
} while (1);
return (EXIT_SUCCESS);
}

The iterates of x = (x + a / x) / 2 for a = 2000000 and x0 = 1000 (all integer variables) are 1500, 1416 and 1414. Then 200000000 gives 14142 and so on.

Estimating the value of Pi using the Monte Carlo Method

I am writing a C program that will be able to accept an input value that dictates the number of iterations that will be used to estimate Pi.
For example, the number of points to be created as the number of iterations increases and the value of Pi also.
Here is the code I have so far:
#include <stdio.h>
#include <stdlib.h>
main()
{
const double pp = (double)RAND_MAX * RAND_MAX;
int innerPoint = 0, i, count;
printf("Enter the number of points:");
scanf("%d", &innerPoint);
for (i = 0; i < count; ++i){
float x = rand();
float y = rand();
if (x * x + y * y <= 1){
++innerPoint;
}
int ratio = 4 *(innerPoint/ i);
printf("Pi value is:", ratio);
}
}
Help fix my code as I'm facing program errors.

rand() returns an integer [0...RAND_MAX].
So something like:
float x = rand()*scale; // Scale is about 1.0/RAND_MAX
The quality of the Monte Carlo method is dependent on a good random number generator. rand() may not be that good, but let us assume it is a fair random number generator for this purpose.
The range of [0...RAND_MAX] is RAND_MAX+1 different values that should be distributed evenly from [0.0...1.0].
((float) rand())/RAND_MAX biases the end points 0.0 and 1.0 giving them twice the weight of others.
Consider instead [0.5, 1.5, 2.5, ... RAND_MAX + 0.5]/(RAND_MAX + 1).
RAND_MAX may exceed the precision of float so converting rand() or RAND_MAX, both int, to float can incurring rounding and further disturb the Monte Carlo method. Consider double.
#define RAND_MAX_P1 ((double)RAND_MAX + 1.0)
// float x = rand();
double x = ((double) rand() + 0.5)/RAND_MAX_P1;
x * x + y * y can also incur excessive rounding. C has hypot(x,y) for a better precision sqrt(x*x + y*y). Yet here, with small count, it likely makes no observable difference.
// if (x * x + y * y <= 1)
if (hypot(x, y <= 1.0))

I am sure it is not the best solution, but it should do the job and is similar to your code. Use a sample size of at least 10000 to get a value near PI.
As mentioned in the commenter: You should look at the data types of the return values functions give you.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main()
{
// Initialize random number generation
srand(time(NULL));
int samples = 10000;
int points_inside =0;
// Read integer - sample size (use at least 10000 to get near PI)
printf("Enter the number of points:");
scanf("%d", &samples);
for (int i = 0; i < samples; ++i){
// Get two numbers between 0 and 1
float x = (float) rand() / (float)RAND_MAX;
float y = (float) rand() / (float)RAND_MAX;
// Check if point is inside
if (x * x + y * y <= 1){
points_inside++;
}
// Calculate current ratio
float current_ratio = 4 * ((float) points_inside / (float) i);
printf("Current value of pi value is: %f \n", current_ratio);
}
}

Taylor Series in C (problem with sin(240) and sin(300))

#include <stdio.h>
#include <math.h>
const int TERMS = 7;
const float PI = 3.14159265358979;
int fact(int n) {
return n<= 0 ? 1 : n * fact(n-1);
}
double sine(int x) {
double rad = x * (PI / 180);
double sin = 0;
int n;
for(n = 0; n < TERMS; n++) { // That's Taylor series!!
sin += pow(-1, n) * pow(rad, (2 * n) + 1)/ fact((2 * n) + 1);
}
return sin;
}
double cosine(int x) {
double rad = x * (PI / 180);
double cos = 0;
int n;
for(n = 0; n < TERMS; n++) { // That's also Taylor series!
cos += pow(-1, n) * pow(rad, 2 * n) / fact(2 * n);
}
return cos;
}
int main(void){
int y;
scanf("%d",&y);
printf("sine(%d)= %lf\n",y, sine(y));
printf("cosine(%d)= %lf\n",y, cosine(y));
return 0;
}
The code above was implemented to compute sine and cosine using Taylor series.
I tried testing the code and it works fine for sine(120).
I am getting wrong answers for sine(240) and sine(300).
Can anyone help me find out why those errors occur?

You should calculate the functions in the first quadrant only [0, pi/2). Exploit the properties of the functions to get the values for other angles. For instance, for values of x between [pi/2, pi), sin(x) can be calculated by sin(pi - x).
The sine of 120 degrees, which is 40 past 90 degrees, is the same as 50 degrees: 40 degrees before 90. Sine starts at 0, then rises toward 1 at 90 degrees, and then falls again in a mirror image to zero at 180.
The negative sine values from pi to 2pi are just -sin(x - pi). I'd handle everything by this recursive definition:
sin(x):
cases x of:
[0, pi/2) -> calculate (Taylor or whatever)
[pi/2, pi) -> sin(pi - x)
[pi/2, 2pi) -> -sin(x - pi)
< 0 -> sin(-x)
>= 2pi -> sin(fmod(x, 2pi)) // floating-point remainder
A similar approach for cos, using identity cases appropriate for it.

The key point is:
TERMS is too small to have proper precision. And if you increase TERMS, you have to change fact implementation as it will likely overflow when working with int.
I would use a sign to toggle the -1 power instead of pow(-1,n) overkill.
Then use double for the value of PI to avoid losing too many decimals
Then for high values, you should increase the number of terms (this is the main issue). using long long for your factorial method or you get overflow. I set 10 and get proper results:
#include <stdio.h>
#include <math.h>
const int TERMS = 10;
const double PI = 3.14159265358979;
long long fact(int n) {
return n<= 0 ? 1 : n * fact(n-1);
}
double powd(double x,int n) {
return n<= 0 ? 1 : x * powd(x,n-1);
}
double sine(int x) {
double rad = x * (PI / 180);
double sin = 0;
int n;
int sign = 1;
for(n = 0; n < TERMS; n++) { // That's Taylor series!!
sin += sign * powd(rad, (2 * n) + 1)/ fact((2 * n) + 1);
sign = -sign;
}
return sin;
}
double cosine(int x) {
double rad = x * (PI / 180);
double cos = 0;
int n;
int sign = 1;
for(n = 0; n < TERMS; n++) { // That's also Taylor series!
cos += sign * powd(rad, 2 * n) / fact(2 * n);
sign = -sign;
}
return cos;
}
int main(void){
int y;
scanf("%d",&y);
printf("sine(%d)= %lf\n",y, sine(y));
printf("cosine(%d)= %lf\n",y, cosine(y));
return 0;
}
result:
240
sine(240)= -0.866026
cosine(240)= -0.500001
Notes:
my recusive implementation of pow using successive multiplications is probably not needed, since we're dealing with floating point. It introduces accumulation error if n is big.
fact could be using floating point to allow bigger numbers and better precision. Actually I suggested long long but it would be better not to assume that the size will be enough. Better use standard type like int64_t for that.
fact and pow results could be pre-computed/hardcoded as well. This would save computation time.

const double TERMS = 14;
const double PI = 3.14159265358979;
double fact(double n) {return n <= 0.0 ? 1 : n * fact(n - 1);}
double sine(double x)
{
double rad = x * (PI / 180);
rad = fmod(rad, 2 * PI);
double sin = 0;
for (double n = 0; n < TERMS; n++)
sin += pow(-1, n) * pow(rad, (2 * n) + 1) / fact((2 * n) + 1);
return sin;
}
double cosine(double x)
{
double rad = x * (PI / 180);
rad = fmod(rad,2*PI);
double cos = 0;
for (double n = 0; n < TERMS; n++)
cos += pow(-1, n) * pow(rad, 2 * n) / fact(2 * n);
return cos;
}
int main()
{
printf("sine(240)= %lf\n", sine(240));
printf("cosine(300)= %lf\n",cosine(300));
}

how to print multiple max values using loop

Trying to calculate at which frequencies voltage hits max, i am able to print the most recent max but there may be lower values of frequency in which it is able to max voltage.
I am able to get the highest or lowest freq by switching the loop from + to - from or to 1000000 in increments of 10.
Tried nested if statement inside of VO > voMax
#include <stdio.h>
#include <conio.h>
#include <math.h>
#define PI 3.14f
#define Vi 5
#define L 4.3e-4
#define C 5.1e-6
int getFreq();
long getResist();
float getVO(float XL, float XC, int R);
float getXC(int f);
float getXL(int f);
int main()
{
long resist, freq, fMax;
float XL, XC, VO, voMax;
voMax = 0;
fMax = 0;
resist = getResist();
for (freq = 1000000; freq >= 0; freq -= 10)
{
XL = getXL(freq);
XC = getXC(freq);
VO = getVO(XL, XC, resist);
if (1000000 == freq)
{
fMax = freq;
voMax = VO;
}
else if (VO > voMax)
{
fMax = freq;
voMax = VO;
}
}
printf("VO = %f Frequency = %d\n", voMax, fMax);
getch();
return 0;
}
float getXL(long f)
{
float XL;
XL = 2 * PI * f * C;
return XL;
}
float getXC(long f)
{
float XC;
XC = 1 / (2 * PI * f * C);
return XC;
}
float getVO(float XL, float XC, long R)
{
float VO;
VO = (Vi * R) / sqrt((XL - XC) * (XL - XC) + R * R);
return VO;
}
int getFreq()
{
int freq;
freq = 0;
printf("please enter a frequency:");
scanf("%d", &freq);
return freq;
}
long getResist()
{
int resist;
resist = 0;
printf("please enter a resistance:");
scanf("%d", &resist);
return resist;
}
I want the voltage to print max at multiple freq.

Well, what you want is to generate "a lot" of data, and then make some analysis. I would actually implement it in two steps:
Generate the data (and save it in an array or in a file)
Do any analysis you need on that data.
After you get the desired result with this clear approach, you can move to the next step and try to optimize the algorithm, according to any optimization rule you need.
I want the voltage to print max at multiple freq.
I think you need a small code update. You have the following sequence:
voMax = 0;
fMax = 0;
resist = getResist();
for (freq = 1000000; freq >= 0; freq -= 10)
{
you should probably have:
fMax = 0;
resist = getResist();
for (freq = 1000000; freq >= 0; freq -= 10)
{
voMax = 0;
(I moved "voMax = 0;" inside the "for").
In that way, you can calculate max voltages for all frequencies, without interference from the other frequencies.

why is 'fast inverse square root' slower than 1/sqrt() at extremely large float?

The following full code could compare speed of fast inverse square root with 1/sqrt(). According to this sentence in wikipedia, (i.e. The algorithm was approximately four times faster than computing the square root with another method and calculating the reciprocal via floating point division.)
But here is why I am here: it is slower than 1/sqrt(). something wrong in my code? please.
#include <stdio.h>
#include <time.h>
#include <math.h>
float FastInvSqrt (float number);
int
main ()
{
float x = 1.0e+100;
int N = 100000000;
int i = 0;
clock_t start2 = clock ();
do
{
float z = 1.0 / sqrt (x);
i++;
}
while (i < N);
clock_t end2 = clock ();
double time2 = (end2 - start2) / (double) CLOCKS_PER_SEC;
printf ("1/sqrt() spends %13f sec.\n\n", time2);
i = 0;
clock_t start1 = clock ();
do
{
float y = FastInvSqrt (x);
i++;
}
while (i < N);
clock_t end1 = clock ();
double time1 = (end1 - start1) / (double) CLOCKS_PER_SEC;
printf ("FastInvSqrt() spends %f sec.\n\n", time1);
printf ("fast inverse square root is faster %f times than 1/sqrt().\n", time2/time1);
return 0;
}
float
FastInvSqrt (float x)
{
float xhalf = 0.5F * x;
int i = *(int *) &x; // store floating-point bits in integer
i = 0x5f3759df - (i >> 1); // initial guess for Newton's method
x = *(float *) &i; // convert new bits into float
x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
return x;
}
The result is as follows:
1/sqrt() spends 0.850000 sec.
FastInvSqrt() spends 0.960000 sec.
fast inverse square root is faster 0.885417 times than 1/sqrt().

A function that reduces the domain in which it computes with precision will have less computational complexity (meaning that it can be computed faster). This can be thought of as optimizing the computation of a function's shape for a subset of its definition, or like search algorithms which each are best for a particular kind of input (No Free Lunch theorem).
As such, using this function for inputs outside the interval [0, 1] (which I suppose it was optimized / designed for) means using it in the subset of inputs where its complexity is worse (higher) than other possibly specialized variants of functions that compute square roots.
The sqrt() function you are using from the library was itself (likely) also optimized, as it has pre-computed values in a sort of LUT (which act as initial guesses for further approximations); using such a more "general function" (meaning that it covers more of the domain and tries to efficientize it by precomputation, for example; or eliminating redundant computation, but that is limited; or maximizing data reuse at run-time) has its complexity limitations, because the more choices between which precomputation to use for an interval, the more decision overhead there is; so knowing at compile-time that all your inputs to sqrt are in the interval [0, 1] would help reduce the run-time decision overhead, as you would know ahead of time which specialized approximation function to use (or you could generate specialized functions for each interval of interest, at compile-time -> see meta-programming for this).

I correct my code as follows:
1. compute random number, instead of a fixed number.
2. count time consumption inside while loop and sum of it.
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <stdlib.h>
float FastInvSqrt (float number);
int
main ()
{
float x=0;
time_t t;
srand((unsigned) time(&t));
int N = 1000000;
int i = 0;
double sum_time2=0.0;
do
{
x=(float)(rand() % 10000)*0.22158;
clock_t start2 = clock ();
float z = 1.0 / sqrt (x);
clock_t end2 = clock ();
sum_time2=sum_time2+(end2-start2);
i++;
}
while (i < N);
printf ("1/sqrt() spends %13f sec.\n\n", sum_time2/(double)CLOCKS_PER_SEC);
double sum_time1=0.0;
i = 0;
do
{
x=(float)(rand() % 10000)*0.22158;
clock_t start1 = clock ();
float y = FastInvSqrt (x);
clock_t end1 = clock ();
sum_time1=sum_time1+(end1-start1);
i++;
}
while (i < N);
printf ("FastInvSqrt() spends %f sec.\n\n", sum_time1/(double)CLOCKS_PER_SEC);
printf ("fast inverse square root is faster %f times than 1/sqrt().\n", sum_time2/sum_time1);
return 0;
}
float
FastInvSqrt (float x)
{
float xhalf = 0.5F * x;
int i = *(int *) &x; // store floating-point bits in integer
i = 0x5f3759df - (i >> 1); // initial guess for Newton's method
x = *(float *) &i; // convert new bits into float
x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
//x = x * (1.5 - xhalf * x * x); // One round of Newton's method
return x;
}
but fast inverse square root still slower that 1/sqrt().
1/sqrt() spends 0.530000 sec.
FastInvSqrt() spends 0.540000 sec.
fast inverse square root is faster 0.981481 times than 1/sqrt().

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

timing floating point calculation with clock_gettime in C, time keeps changing? - c

The statement alpha[i] * bravo[i] * charlie[i] * delta[i] * echo[i] * foxtrot[i] * golf[i] * hotel[i] * india[i] * juliet[i]; is a no-op. An optimizing compiler will omit this from the executable. To make it work, try adding the result to another variable and print the sum at the end of the program.

Related

find square root with Newton using integers

Estimating the value of Pi using the Monte Carlo Method

Taylor Series in C (problem with sin(240) and sin(300))

how to print multiple max values using loop

why is 'fast inverse square root' slower than 1/sqrt() at extremely large float?

Categories

Resources