I'm writing a socket program that maintains FIFO queues for two input sockets. When deciding which queue to service, the program pulls the most recent time-stamp from each queue.
I need a reliable method for comparing two timeval structs. I tried using timercmp(), but my version of gcc doesn't support it, and documentation states that the function is not POSIX compliant.
What should I do?
timercmp() is just a macro in libc (sys/time.h):
# define timercmp(a, b, CMP) \
(((a)->tv_sec == (b)->tv_sec) ? \
((a)->tv_usec CMP (b)->tv_usec) : \
((a)->tv_sec CMP (b)->tv_sec))
If you need timersub():
# define timersub(a, b, result) \
do { \
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
if ((result)->tv_usec < 0) { \
--(result)->tv_sec; \
(result)->tv_usec += 1000000; \
} \
} while (0)
googling timeval give this first result. From that page:
It is often necessary to subtract two values of type struct timeval or struct timespec. Here is the best way to do this. It works even on some peculiar operating systems where the tv_sec member has an unsigned type.
/* Subtract the `struct timeval' values X and Y,
storing the result in RESULT.
Return 1 if the difference is negative, otherwise 0. */
int
timeval_subtract (result, x, y)
struct timeval *result, *x, *y;
{
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
This is slightly different, but I think clearly illustrates the logic involved.
I'm working on some MSP430 code in C, and have a timestamp struct very similar to timeval, but with nsecs instead of usecs.
This code keeps everything positive, so unsigned ints would work fine, and avoids overflows (I think). It also doesn't modify the timestamps/timevals being passed in, except the result of course.
typedef struct timestamp {
int32_t secs;
int32_t nsecs;
} timestamp_t;
int timestamp_sub(timestamp_t * x, timestamp_t * y, timestamp_t * result){
// returns 1 if difference is negative, 0 otherwise
// result is the absolute value of the difference between x and y
negative = 0;
if( x->secs > y->secs ){
if( x->nsecs > y->nsecs ){
result->secs = x->secs - y->secs;
result->nsecs = x->nsecs - y->nsecs;
}else{
result->secs = x->secs - y->secs - 1;
result->nsecs = (1000*1000*1000) - y->nsecs + x->nsecs;
}
}else{
if( x->secs == y->secs ){
result->secs = 0;
if( x->nsecs > y->nsecs ){
result->nsecs = x->nsecs - y->nsecs;
}else{
negative = 1;
result->nsecs = y->nsecs - x->nsecs;
}
}else{
negative = 1;
if( x->nsecs > y->nsecs ){
result->secs = y->secs - x->secs - 1;
result->nsecs = (1000*1000*1000) - x->nsecs + y->nsecs;
}else{
result->secs = y->secs - x->secs;
result->nsecs = y->nsecs - x->nsecs;
}
}
}
return negative;
}
For viewing timevals I just whipped this up. It returns a timeval as a string that you can print or send to a text file:
char *tv2str(struct timeval *intv) {
static char ans[200];
snprintf(ans,200,"%u.%u",(unsigned int)intv->tv_sec, \
(unsigned int) intv->tv_usec);
return ans;
}
Use like:
printf("nowtv: %s\n",tv2str(&nowtv));
nowtv: 1568407554.646623
Timercmp() didn't seem to work right so I wanted a way to check up on it by actually looking at some values.
Related
I am trying to create a modulated waveform out of 2 sine waves.
To do this I need the modulo(fmodf) to know what amplitude a sine with a specific frequency(lo_frequency) has at that time(t). But I get a hardfault when the following line is executed:
j = fmodf(2 * PI * lo_frequency * t, 2 * PI);
Do you have an idea why this gives me a hardfault ?
Edit 1:
I exchanged fmodf with my_fmodf:
float my_fmodf(float x, float y){
if(y == 0){
return 0;
}
float n = x / y;
return x - n * y;
}
But still the hardfault occurs, and when I debug it it doesn't even jump into this function(my_fmodf).
Heres the whole function in which this error occurs:
int* create_wave(int* message){
/* Mixes the message signal at 10kHz and the carrier at 40kHz.
* When a bit of the message is 0 the amplitude is lowered to 10%.
* When a bit of the message is 1 the amplitude is 100%.
* The output of the STM32 can't be negative, thats why the wave swings between
* 0 and 256 (8bit precision for faster DAC)
*/
static int rf_frequency = 10000;
static int lo_frequency = 40000;
static int sample_rate = 100000;
int output[sample_rate];
int index, mix;
float j, t;
for(int i = 0; i <= sample_rate; i++){
t = i * 0.00000001f; // i * 10^-8
j = my_fmodf(2 * PI * lo_frequency * t, 2 * PI);
if (j < 0){
j += (float) 2 * PI;
}
index = floor((16.0f / (lo_frequency/rf_frequency * 0.0001f)) * t);
if (index < 16) {
if (!message[index]) {
mix = 115 + sin1(j) * 0.1f;
} else {
mix = sin1(j);
}
} else {
break;
}
output[i] = mix;
}
return output;
}
Edit 2:
I fixed the warning: function returns address of local variable [-Wreturn-local-addr] the way "chux - Reinstate Monica" suggested.
int* create_wave(int* message){
static uint16_t rf_frequency = 10000;
static uint32_t lo_frequency = 40000;
static uint32_t sample_rate = 100000;
int *output = malloc(sizeof *output * sample_rate);
uint8_t index, mix;
float j, n, t;
for(int i = 0; i < sample_rate; i++){
t = i * 0.00000001f; // i * 10^-8
j = fmodf(2 * PI * lo_frequency * t, 2 * PI);
if (j < 0){
j += 2 * PI;
}
index = floor((16.0f / (lo_frequency/rf_frequency * 0.0001f)) * t);
if (index < 16) {
if (!message[index]) {
mix = (uint8_t) floor(115 + sin1(j) * 0.1f);
} else {
mix = sin1(j);
}
} else {
break;
}
output[i] = mix;
}
return output;
}
But now I get the hardfault on this line:
output[i] = mix;
EDIT 3:
Because the previous code contained a very large buffer array that did not fit into the 16KB SRAM of the STM32F303K8 I needed to change it.
Now I use a "ping-pong" buffer where I use the callback of the DMA for "first-half-transmitted" and "completly-transmitted":
void HAL_DAC_ConvHalfCpltCallbackCh1(DAC_HandleTypeDef * hdac){
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_3, GPIO_PIN_SET);
for(uint16_t i = 0; i < 128; i++){
new_value = sin_table[(i * 8) % 256];
if (message[message_index] == 0x0){
dac_buf[i] = new_value * 0.1f + 115;
} else {
dac_buf[i] = new_value;
}
}
}
void HAL_DAC_ConvCpltCallbackCh1 (DAC_HandleTypeDef * hdac){
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_3, GPIO_PIN_RESET);
for(uint16_t i = 128; i < 256; i++){
new_value = sin_table[(i * 8) % 256];
if (message[message_index] == 0x0){
dac_buf[i] = new_value * 0.1f + 115;
} else {
dac_buf[i] = new_value;
}
}
message_index++;
if (message_index >= 16) {
message_index = 0;
// HAL_DAC_Stop_DMA (&hdac1, DAC_CHANNEL_1);
}
}
And it works the way I wanted:
But the frequency of the created sine is too low.
I cap at around 20kHz but I'd need 40kHz.
I allready increased the clock by a factor of 8 so that one is maxed out:
.
I can still decrease the counter period (it is 50 at the moment), but when I do so the interrupt callback seems to take longer than the period to the next one.
At least it seems so as the output becomes very distorted when I do that.
I also tried to decrease the precision by taking only every 8th sine value but
I cant do this any more because then the output does not look like a sine wave anymore.
Any ideas how I could optimize the callback so that it takes less time ?
Any other ideas ?
Does fmodf() cause a hardfault in stm32?
It is other code problems causing the hard fault here.
Failing to compile with ample warnings
Best code tip: enable all warnings. #KamilCuk
Faster feedback than Stackoverflow.
I'd expect something like below on a well enabled compiler.
return output;
warning: function returns address of local variable [-Wreturn-local-addr]
Returning a local Object
Cannot return a local array. Allocate instead.
// int output[sample_rate];
int *output = malloc(sizeof *output * sample_rate);
return output;
Calling code will need to free() the pointer.
Out of range array access
static int sample_rate = 100000;
int output[sample_rate];
// for(int i = 0; i <= sample_rate; i++){
for(int i = 0; i < sample_rate; i++){
...
output[i] = mix;
}
Stack overflow?
static int sample_rate = 100000; int output[sample_rate]; is a large local variable. Maybe allocate or try something smaller?
Advanced: loss of precision
A good fmodf() does not lose precision. For a more precise answer consider double math for the intermediate results. An even better approach is more involved.
float my_fmodf(float x, float y){
if(y == 0){
return 0;
}
double n = 1.0 * x / y;
return (float) (x - n * y);
}
Can I not use any function within another ?
Yes. Code has other issues.
1 value every 10uS makes only 100kSPS whis is not too much for this macro. In my designs I generate > 5MSPS signals without any problems. Usually I have one buffer and DMA in circular mode. First I fill the buffer and start generation. When the half transmition DMA interrupt is trigerred I fill the first half of the buffer with fresh data. The the transmition complete interrupt is trigerred I fill the second half and this process repeats all over again.
I am collecting the total elapsed time by using two inline functions (specified and implemented in my .h file) as follows:
extern double _elapsed_time_mf;
extern double _elapsed_time_b;
//this function returns the elapsed time in order to compute the total elapsed time of an operation
static inline struct timeval get_current_time() {
struct timeval time;
gettimeofday(&time, NULL);
return time;
}
//calculate the total processed time and return the elapsed total time in seconds
static inline double get_elapsed_time(struct timeval start, struct timeval end) {
long int tmili;
tmili = (int) (1000.0 * (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000.0);
return (double) (tmili / (double) 1000.0);
}
Then, when I would like to know the total elapsed time of an operation I do this:
void my_function() {
#ifdef COLLECT_STATISTICAL_DATA
struct timeval start;
struct timeval end;
start = get_current_time();
#endif
//a processing....
#ifdef COLLECT_STATISTICAL_DATA
end = get_current_time();
_elapsed_time_mf = get_elapsed_time(start, end);
#endif
}
_elapsed_time_mf is defined in only one .c file.
However, I am getting strange results. For instance, consider that I have another function, called function_b, which also collects its elapsed time (which is stored in other global variable). Then, this function makes a call to my_function (that collects its elapsed time according to my previous code). However, the total elapsed time of function_b is sometimes lesser than the total elapsed time of my_function. An example of this situations is:
void function_b() {
#ifdef COLLECT_STATISTICAL_DATA
struct timeval start;
struct timeval end;
start = get_current_time();
#endif
//a processing....
my_function();
//another processing...
#ifdef COLLECT_STATISTICAL_DATA
end = get_current_time();
_elapsed_time_b = get_elapsed_time(start, end);
#endif
}
Sometimes _elapsed_time_b is lesser than _elapsed_time_mf. Why?
I would like to collect both elapsed times in seconds according to the clock/date/timestamp (not the CPU elapsed time).
You might want to reconsider the implementation of get_elapsed_time. From here: http://www.gnu.org/software/libc/manual/html_node/Elapsed-Time.html
int timeval_subtract (struct timeval *result, struct timeval *x, struct timeval *y)
{
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
As Art has commented, I am using now clock_gettime. Thus, my code is now working as expected.
My functions are now written as:
static inline double get_elapsed_time(struct timespec start, struct timespec end) {
double start_in_sec = (double)start.tv_sec + (double)start.tv_nsec / 1000000000.0;
double end_in_sec = (double)end.tv_sec + (double)end.tv_nsec / 1000000000.0;
return end_in_sec - start_in_sec;
}
static inline struct timespec get_current_time() {
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
return time;
}
I have a piece of code that traces 4 sines at a time.
My original code was making roughly 12000 sin() function calls per frame and was running at 30 fps.
I tried optimizing it by generating lookup tables. I ended up with 16 different lookup tables. I declared and load them in a separate header file at the top of my program. Each table is declared like so:
static const float d4_lookup[800] {...};
Now, with this new method I actually lost fps?! I'm running at 20 fps now instead of 30. Each frame now only has to do 8 sin / cos calls and 19200 lookup calls vs 12000 sin() calls.
I compile using gcc with -O3 flag on. At the moment, the lookup tables are included at the top and are part of the global scope of the program.
I assume I'm not loading them in the right memory or something to that effect. How can I speed up the lookup time?
** EDIT 1 **
As requested, here's the function that uses the lookup calls, it is called once per frame:
void
update_sines(void)
{
static float c1_sin, c1_cos;
static float c2_sin, c2_cos;
static float c3_sin, c3_cos;
static float c4_sin, c4_cos;
clock_gettime(CLOCK_MONOTONIC, &spec);
s = spec.tv_sec;
ms = spec.tv_nsec * 0.0000001;
etime = concatenate((long)s, ms);
c1_sin = sinf(etime * 0.00525);
c1_cos = cosf(etime * 0.00525);
c2_sin = sinf(etime * 0.007326);
c2_cos = cosf(etime * 0.007326);
c3_sin = sinf(etime * 0.0046);
c3_cos = cosf(etime * 0.0046);
c4_sin = sinf(etime * 0.007992);
c4_cos = cosf(etime * 0.007992);
int k;
for (k = 0; k < 800; ++k)
{
sine1[k] = a1_lookup[k] * ((bx1_sin_lookup[k] * c1_cos) + (c1_sin * bx1_cos_lookup[k])) + d1_lookup[k];
sine2[k] = a2_lookup[k] * ((bx2_sin_lookup[k] * c2_cos) + (c2_sin * bx2_cos_lookup[k])) + d2_lookup[k] + 50;
sine3[k] = a3_lookup[k] * ((bx3_sin_lookup[k] * c3_cos) + (c3_sin * bx3_cos_lookup[k])) + d3_lookup[k];
sine4[k] = a4_lookup[k] * ((bx4_sin_lookup[k] * c4_cos) + (c4_sin * bx4_cos_lookup[k])) + d4_lookup[k] + 50;
}
}
** UPDATE **
For anyone reading this thread, I gave up on this problem. I tried using OpenCL kernels, structs, SIMD instructions as well as all the solutions shown here. In the end the original code that computed the sinf() 12800 per frame worked faster than the lookup tables since the lookup tables didn't fit into the cache. Yet it was still only doing 30 fps. It just had too much going on to keep up with my 60fps expectations. I've decided to take a different direction. Thanks to everyone who contributed to this thread. Most of these solutions would probably work to get some half decent speed improvements but nothing like the 200% speed up I needed here to have the lookup tables work the way I wanted.
Sometimes it's hard to know what's slowing you down, but potentially you are going to ruin your cache hits, you could try a lookup of a struct
typedef struct
{
float bx1_sin;
float bx2_sin;
float bx3_sin;
float bx4_sin;
float bx1_cos;
etc etc
including sine1,2,3,4 as well
} lookup_table
then
lookup_table lookup[800]
now everything at the kth lookup will be in the same small chunk of memory.
also, if you use a macro that takes k as a parameter to do do the contents of the loop lets say SINE_CALC(k), or an inline function...
you can do
for (k = 0; k < 800; ++k)
{
SINE_CALC(k); k++;
SINE_CALC(k); k++;
SINE_CALC(k); k++;
SINE_CALC(k); k++;
SINE_CALC(k); k++;
}
if you do a macro, make sure the k++ is outside the macro call like shown
Try unrolling your loops like this:
for (k = 0; k < 800; ++k)
{
sine1[k] = a1_lookup[k];
sine2[k] = a2_lookup[k];
sine3[k] = a3_lookup[k];
sine4[k] = a4_lookup[k];
}
for (k = 0; k < 800; ++k)
{
sine1[k] *= ((bx1_sin_lookup[k] * c1_cos) + (c1_sin * bx1_cos_lookup[k]));
sine2[k] *= ((bx2_sin_lookup[k] * c2_cos) + (c2_sin * bx2_cos_lookup[k]));
sine3[k] *= ((bx3_sin_lookup[k] * c3_cos) + (c3_sin * bx3_cos_lookup[k]));
sine4[k] *= ((bx4_sin_lookup[k] * c4_cos) + (c4_sin * bx4_cos_lookup[k]));
}
for (k = 0; k < 800; ++k)
{
sine1[k] += d1_lookup[k];
sine2[k] += d2_lookup[k] + 50;
sine3[k] += d3_lookup[k];
sine4[k] += d4_lookup[k] + 50;
}
By accessing fewer lookup tables in each loop, you should be able to stay in the cache. The middle loop could be split up as well, but you'll need to create an intermediate table for one of the sub-expressions.
Intel processors can predict serial access (and perform prefetch) for up to 4 arrays both for forward and backward traverse. At least this was true in Core 2 Duo days. Split your for in:
for (k = 0; k < 800; ++k)
sine1[k] = a1_lookup[k] * ((bx1_sin_lookup[k] * c1_cos) + (c1_sin * bx1_cos_lookup[k])) + d1_lookup[k];
for (k = 0; k < 800; ++k)
sine2[k] = a2_lookup[k] * ((bx2_sin_lookup[k] * c2_cos) + (c2_sin * bx2_cos_lookup[k])) + d2_lookup[k] + 50;
for (k = 0; k < 800; ++k)
sine3[k] = a3_lookup[k] * ((bx3_sin_lookup[k] * c3_cos) + (c3_sin * bx3_cos_lookup[k])) + d3_lookup[k];
for (k = 0; k < 800; ++k)
sine4[k] = a4_lookup[k] * ((bx4_sin_lookup[k] * c4_cos) + (c4_sin * bx4_cos_lookup[k])) + d4_lookup[k] + 50;
I guess you have more cache load than benchmarks in other answers so this does matters. I recommend you not to unroll loops, compilers do it well.
Using a simple sin lookup table will yields >20% speed increase on my linux machine (vm, gcc, 64bit). Interestingly, the size of lookup table (within reasonable < L1 cache size values) does not influence the speed of execution.
Using a fastsin simple implementation from here I got >45% improvement.
Code:
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <sys/time.h>
#include <time.h>
#define LOOKUP_SIZE 628
uint64_t currentTimestampUs( void )
{
struct timeval tv;
time_t localTimeRet;
uint64_t timestamp = 0;
//time_t tzDiff = 0;
struct tm when;
int64_t localeOffset = 0;
{
localTimeRet = time(NULL);
localtime_r ( &localTimeRet, &when );
localeOffset = when.tm_gmtoff * 1000000ll;
}
gettimeofday ( &tv, NULL );
timestamp = ((uint64_t)((tv.tv_sec) * 1000000ll) ) + ( (uint64_t)(tv.tv_usec) );
timestamp+=localeOffset;
return timestamp;
}
const double PI = 3.141592653589793238462;
const double PI2 = 3.141592653589793238462 * 2;
static float sinarr[LOOKUP_SIZE];
void initSinArr() {
int a =0;
for (a=0; a<LOOKUP_SIZE; a++) {
double arg = (1.0*a/LOOKUP_SIZE)*((double)PI * 0.5);
float sinval_f = sin(arg); // double computation earlier to avoid losing precision on value
sinarr[a] = sinval_f;
}
}
float sinlookup(float val) {
float normval = val;
while (normval < 0) {
normval += PI2;
}
while (normval > PI2) {
normval -= PI2;
}
int index = LOOKUP_SIZE*(2*normval/PI);
if (index > 3*LOOKUP_SIZE) {
index = -index + 4*LOOKUP_SIZE;//LOOKUP_SIZE - (index-3*LOOKUP_SIZE);
return -sinarr[index];
} else if (index > 2*LOOKUP_SIZE) {
index = index - 2*LOOKUP_SIZE;
return -sinarr[index];
} else if (index > LOOKUP_SIZE) {
index = 2*LOOKUP_SIZE - index;
return sinarr[index];
} else {
return sinarr[index];
}
}
float sin_fast(float x) {
while (x < -PI)
x += PI2;
while (x > PI)
x -= PI2;
//compute sine
if (x < 0)
return 1.27323954 * x + .405284735 * x * x;
else
return 1.27323954 * x - 0.405284735 * x * x;
}
int main(void) {
initSinArr();
int a = 0;
float val = 0;
const int num_tries = 100000;
uint64_t startLookup = currentTimestampUs();
for (a=0; a<num_tries; a++) {
for (val=0; val<PI2; val+=0.01) {
float compval = sinlookup(val);
(void)compval;
}
}
uint64_t startSin = currentTimestampUs();
for (a=0; a<num_tries; a++) {
for (val=0; val<PI2; val+=0.01) {
float compval = sin(val);
(void)compval;
}
}
uint64_t startFastSin = currentTimestampUs();
for (a=0; a<num_tries; a++) {
for (val=0; val<PI2; val+=0.01) {
float compval = sin_fast(val);
(void)compval;
}
}
uint64_t end = currentTimestampUs();
int64_t lookupMs = (startSin - startLookup)/1000;
int64_t sinMs = (startFastSin - startSin)/1000;
int64_t fastSinMs = (end - startFastSin)/1000;
printf(" lookup: %lld ms\n", lookupMs );
printf(" sin: %lld ms\n", sinMs );
printf(" diff: %lld ms\n", sinMs-lookupMs);
printf(" diff%: %lld %\n", 100*(sinMs-lookupMs)/sinMs);
printf("fastsin: %lld ms\n", fastSinMs );
printf(" sin: %lld ms\n", sinMs );
printf(" diff: %lld ms\n", sinMs-fastSinMs);
printf(" diff%: %lld %\n", 100*(sinMs-fastSinMs)/sinMs);
}
Sample result:
lookup: 2276 ms
sin: 3004 ms
diff: 728 ms
diff%: 24 %
fastsin: 1500 ms
sin: 3004 ms
diff: 1504 ms
diff%: 50 %
With the "timeval_subtract" function to find the time elapsed between two struct timeval types, can someone please explain the purpose of and step by step maths used to "Perform the carry for the later subtraction by updating y" and other sections? I understand the purpose of the function and how to implement it within a program, but I would like to understand how it works inside and cannot find any explanations of this anywhere, and I can't seem to wrap my head around it.
int timeval_subtract (struct timeval *result, struct timeval *x,struct timeval *y)
{
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
It is a function described in relation to the GNU C library for determining an elapsed time https://ftp.gnu.org/old-gnu/Manuals/glibc-2.2.5/html_node/Elapsed-Time.html so I am not looking for improvements but simply an explanation of why the dividing and adding and subtracting and multiplying within it. What do these specific arithmetic operations achieve?/Why are they done/not done? I have done the stepping through but still can'y get my head around it. I will continue to do so until I do (and even after someone explains it to me) but I was hoping to get some insight from someone who understands it already. The platform is UNIX, which I am new to using, but I don't think it changes the operations that are taking place inside the function. It is more a question about the arithmetic being performed than the algorithm being used.
At first glance, it looks like struct timeval contains a time split into two parts:
tv_usec - microseconds, ideally should always be under 1000000, but greater values seem to be allowed as suggested by the code
tv_sec - seconds (the number of multiples of 1000000)
and the time in microseconds is tv_usec + tv_sec * 1000000.
Conversely, one would expect this to be true:
tv_sec = time in microseconds / 1000000
tv_usec = time in microseconds % 1000000.
The function appears to calculate the time difference between *x and *y (logically, *x - *y) and store it in another struct timeval, *result.
A simple test program gives us some hints:
#include <stdio.h>
struct timeval
{
long tv_sec;
long tv_usec;
};
int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y)
{
// preserve *y
struct timeval yy = *y;
y = &yy;
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
struct timeval testData00 = { 0, 0 };
struct timeval testData01 = { 0, 1 };
int main(void)
{
struct timeval diff;
int res;
res = timeval_subtract(&diff, &testData00, &testData00);
printf("%d %ld:%ld\n", res, diff.tv_sec, diff.tv_usec);
res = timeval_subtract(&diff, &testData01, &testData01);
printf("%d %ld:%ld\n", res, diff.tv_sec, diff.tv_usec);
res = timeval_subtract(&diff, &testData01, &testData00);
printf("%d %ld:%ld\n", res, diff.tv_sec, diff.tv_usec);
res = timeval_subtract(&diff, &testData00, &testData01);
printf("%d %ld:%ld\n", res, diff.tv_sec, diff.tv_usec);
return 0;
}
Output (ideone):
0 0:0
0 0:0
0 0:1
1 -1:999999
From the last test result it appears that the function returns (-1):999999 instead of -(0:1). Both values represent the same negative time (or time difference) in microseconds:
-1 * 1000000 + 999999 = -1
-(0 * 1000000 + 1) = -1
So, how does it really work?
If x->tv_usec >= y->tv_usec then only the second if could probably* execute:
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
This if checks if the difference in the microseconds parts alone is greater than 1 second. If it is, it subtracts the whole seconds of this difference from y->tv_usec (as microseconds) and adds it to y->tv_sec (as seconds). This simply redistributes the time in *y without really changing it. You could rewrite this if equivalently like this to see it more clearly:
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
One important thing to note here is that when the input *x and *y have their tv_usec in the range from 0 to 999999 inclusive, the body of this if does not execute (hence, probably* is actually never when x->tv_usec >= y->tv_usec and when tv_usecs are in the range from 0 to 999999).
The net effect of this if is not readily clear now.
However, one interesting thing can be seen here. If we call this function with *x = 0:1000001 and *y = 0:0, the result is going to be wrong: difference = (-1):2000001 (instead of 1:1) and the return value of the function = 1 (instead of 0). This suggests that the function isn't really suited for tv_usec > 1000000 and even for tv_usec > 999999. And because of this behavior I'm going to claim that the function isn't suited for negative tv_usec in the inputs either. I'm just going to ignore those cases in the face of this behavior. It looks wrong enough already.
Let's look at the first if.
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
As the comment and the code suggests, when x->tv_usec < y->tv_usec we need to take care of the "carry" between the "digits" as if we were adding and not subtracting. But it's OK, we'll see it.
Let's go back to school for a moment.
How do you do 37 - 12?
You do it like this:
7 - 2 = 5
3 - 1 = 2
And so 37 - 12 = 25.
Now, how do you do 57 - 38?
You do it like this:
10/*because 7 < 8*/ + 7 - 8 = 9
5 - 3 - 1/*borrow, because of the above*/ = 1
And so 57 - 38 = 19. See?
And the check:
if (x->tv_usec < y->tv_usec) {
checks whether or not we need to take care of this borrowing.
So, what's happening here? Let's look again:
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
If y->tv_usec > x->tv_usec, it calculates the difference between the two in whole seconds and just like the other if it adds these whole seconds to y->tv_sec and subtracts them from y->tv_usec, simply redistributing the time in *y, without changing it.
The extra one (+ 1) that ends up added to y->tv_sec here will be subtracted from x->tv_sec at the end of the function (result->tv_sec = x->tv_sec - y->tv_sec;) and thus this 1 functions as the borrow I just reminded you of in the 57 - 38 = 19 example.
What else is happening here besides the borrow itself and some time redistribution?
Like I said earlier, I'm just going to ignore negative tv_usecs and greater than 999999 as likely handled incorrectly.
With this I take (y->tv_usec - x->tv_usec) / 1000000 to be 0 and I am left only with truly meaningful values of tv_usecs (0 to 999999 inclusive).
So, if the if's condition is true, I basically subtract 1000000 from y->tv_usec and add 1 (the borrow) to y->tv_sec.
This is the same thing we had in 57 - 38 = 19:
10/*because 7 < 8*/ + 7 - 8 = 9
5 - 3 - 1/*borrow, because of the above*/ = 1
Similarly to this 10, 1000000 is going to be added later in here: result->tv_usec = x->tv_usec - y->tv_usec;
And this first if is the meat of the function.
If I had to write a function with similar behavior, I'd require the input times to be non-negative and the microsecond parts to be no greater than 999999 and I'd write just this:
int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y)
{
result->tv_sec = x->tv_sec - y->tv_sec;
if ((result->tv_usec = x->tv_usec - y->tv_usec) < 0)
{
result->tv_usec += 1000000;
result->tv_sec--; // borrow
}
return result->tv_sec < 0;
}
If for some odd reason I wanted to support tv_usec > 999999 in the inputs, I'd first move the excess from tv_usec to tv_sec and then do the above, something like this:
int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y)
{
struct timeval xx = *x;
struct timeval yy = *y;
x = &xx; y = &yy;
if (x->tv_usec > 999999)
{
x->tv_sec += x->tv_usec / 1000000;
x->tv_usec %= 1000000;
}
if (y->tv_usec > 999999)
{
y->tv_sec += y->tv_usec / 1000000;
y->tv_usec %= 1000000;
}
result->tv_sec = x->tv_sec - y->tv_sec;
if ((result->tv_usec = x->tv_usec - y->tv_usec) < 0)
{
result->tv_usec += 1000000;
result->tv_sec--; // borrow
}
return result->tv_sec < 0;
}
Here, the intent is clear and the code is easy to understand.
Here's a timeval_subtract():
commented with why and how to carry usec <--> sec
with const input structs (copies y to the subtrahend sh)
that normalizes output microseconds (0-999999)
Recall that tv_usec are the microseconds elapsed since tv_sec.
So diff{-1,2000001} == diff{1,1} is true,
as is tv{0,-1} == tv{-1,999999}.
/* copied from
https://www.gnu.org/software/libc/manual/html_node/Calculating-Elapsed-Time.html
Changed input timevals to const, added // comments.
Changed condition of 2nd if.
*/
int
timeval_subtract (const struct timeval *x, const struct timeval *y, struct timeval *diff)
{
//subtraction found the difference, the minuend minus the subtrahend
timeval sh = *y; // mutable local copy of y, sh (the subtrahend)
/* Perform the carry for the later subtraction by updating sh. */
if (x->tv_usec < sh.tv_usec) {
// reduce sh.usecs so usec diff will be positive.
// carry or lend sh.usecs to sh.secs, in packages of 1e6 usecs (whole secs).
// as we are here, we know we must carry at least 1 sec (1 million usec)
int nsec = (sh.tv_usec - x->tv_usec) / 1000000 + 1;
sh.tv_usec -= 1000000 * nsec;
sh.tv_sec += nsec;
}
// if (x->tv_usec - sh.tv_usec > 1000000) { // could show tv{x,1000000}, not 'normal'
if (x->tv_usec - sh.tv_usec > 999999) { // normalize 0-999999
// normalize diff; increase sh.usecs so usec diff will be < 1000000.
// carry or lend whole sh.secs to sh.usecs
int nsec = (x->tv_usec - sh.tv_usec) / 1000000;
sh.tv_usec += 1000000 * nsec;
sh.tv_sec -= nsec;
}
// should now have the subtrahend sec/usec that will produce normalized difference
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
diff->tv_sec = x->tv_sec - sh.tv_sec;
diff->tv_usec = x->tv_usec - sh.tv_usec;
/* Return 1 if diff is negative. */
return x->tv_sec < sh.tv_sec;
// timeval_subtract
}
If you need to support both 32 and 64 bit time_t types, it complicates outputting results, but you might call timeval_subtract() with something like below:
// replace MY_SPECIFIC_PREPROC_8B_DEF with your own
// preprocessor time_t distinguishing define
#if defined MY_SPECIFIC_PREPROC_8B_DEF
#define LSPEC "lld" // format specifier input length
char fmt[] = "% 020" LSPEC " % 011ld "; // long long tv_sec"
#define MAX_TIME_T 0x7fffffffffffffff
#define MIN_TIME_T 0x8000000000000000
#else
#define LSPEC "ld"
char fmt[] = "% 011" LSPEC " % 011ld "; // less chars for long tv_sec"
#define MAX_TIME_T 0x7fffffff
#define MIN_TIME_T 0x80000000
#endif
const time_t max_time_t = MAX_TIME_T;
const time_t min_time_t = MIN_TIME_T;
// Test overflow of both timeval members, sec & usec
struct timeval a = {min_time_t, 1}; // 1 usec > negative overflow
struct timeval b = {0, 0}; // our subtrahend, ++1 usec in loop
struct timeval c = {0, 0}; // holds result; difference in this case
strcat (fmt, "= a{%" LSPEC ",%ld} - b{%" LSPEC ",%ld}\n");
for (auto i=0; i<3; i++) {
timeval_subtract (&a,&b,&c);
Serial.printf(fmt,
c.tv_sec, c.tv_usec, a.tv_sec, a.tv_usec, b.tv_sec, b.tv_usec);
b.tv_usec += 1; // normal time flow
}
// Without an appropriate preprocessor define this may compile
for (auto i=0; i<3; i++) {
timeval_subtract (&a,&b,&c);
// explicit casts try to quiet compiler on other sized type_t systems
if (8 == sizeof(time_t)) {
Serial.printf("% 020lld % 011ld = a{%lld,%ld} - b{%lld,%ld}\n",
(long long)c.tv_sec, c.tv_usec,
(long long)a.tv_sec, a.tv_usec,
(long long)b.tv_sec, b.tv_usec);
}
else if (4 == sizeof(time_t)) {
Serial.printf("% 011ld % 011ld = a{%ld,%ld} - b{%ld,%ld}\n",
(long)c.tv_sec, c.tv_usec,
(long)a.tv_sec, a.tv_usec,
(long)b.tv_sec, b.tv_usec);
}
b.tv_usec += 1; // normal time flow
}
I'm using the Maclaurin series for arctan(x) and I am not getting the correct answer. I'm doing the calculation in radians. Here's the function so far:
fp32 t32rArcTangent(fp32 number)
{
fp32 a, b, c, d; /* Temp Variables */
fp32 t; /* Number Temp */
uint32 i; /* Loop Counter */
/* Time Savers */
if (b32fpcomp(number, MM_FP8INFINITY)) return((fp32)MM_PI / 2);
if (b32fpcomp(number, -MM_FP8INFINITY)) return(-(fp32)MM_PI / 2);
/* Setup */
a = 0;
b = 0;
c = 1;
d = number;
t = number * number;
/* Calculation Loop */
for (i = 0; i < MMPRVT_FP32_TRIG_LIMIT; i++)
{
b += d;
if (b32fpcomp(a, b)) break;
a = b;
c += 2;
d *= -1 * t / c;
}
#ifdef DEBUG
printf("Loops: %lu\n", i);
#endif
/* Result */
return(a);
fp32 = typedef'd float
uint32 = typedef'd unsigned long int
MM_FP8INFINITY is the largest number that the fp32 datatype can contain.
MM_PI is just PI out to about 50 digits.
MMPRVT_FP32_TRIG_LIMIT is the maximum number of loops that can be used to calculate the result. This is to prevent the series expansion from going into an infinite loop if for whatever reason the series fails to converge.
These are the results that I am getting:
Testing arctangent(x) function.
Loops: 0
arctan(0): 0
Loops: 8
arctan(1): 0.724778414
Loops: 13
arctan(R3): 0.709577262
Loops: 6
arctan(1/R3): 0.517280579
R3 is just the square root of 3 which is 1.732050808....
Now I know that the radius of convergence of the arctan series is |x| <= 1, so I'm thinking that I have to reduce the input somehow. The problem is that for arctan, the domain of the function is (-INF, +INF). So how do you reduce that? This is being calculated to radian angles.
Thanks for pointing that out. The problem has been corrected, and I also have the input reduction done as well. Here is the completed and corrected function which now gives the correct answers:
fp32 t32rArcTangent(fp32 number)
{
fp32 a, b, c, d; /* Temp Variables */
fp32 t; /* Number Temp */
uint32 i; /* Loop Counter */
uint8 fr; /* Reduction Flag */
/* Time Savers */
if (b32isInf(number) == -1) return(-(fp32)MM_PI / 2);
if (b32isInf(number) == 1) return((fp32)MM_PI / 2);
if (b32isNaN(number)) return(number);
if (b32fpcomp(number, MM_FP8INFINITY)) return((fp32)MM_PI / 2);
if (b32fpcomp(number, -MM_FP8INFINITY)) return(-(fp32)MM_PI / 2);
if (b32fpcomp(number, ONE)) return((fp32)MM_PI / 4);
if (b32fpcomp(number, -ONE)) return(-(fp32)MM_PI / 4);
/* Reduce Input */
if (number > ONE)
{
number = 1 / number;
fr = 1;
}
else fr = 0;
/* Setup */
a = 0;
b = 0;
c = 1;
d = number;
t = number * number;
/* Calculation Loop */
for (i = 0; i < MMPRVT_FP32_TRIG_LIMIT; i++)
{
b += d / c;
if (b32fpcomp(a, b)) break;
a = b;
c += 2;
d *= -1 * t;
#ifdef DEBUG
printf("a=%g b=%g, c=%g d=%g\n", a, b, c, d);
#endif
}
#ifdef DEBUG
printf("Loops: %lu\n", i);
#endif
/* Result */
if (fr != 0) a = ((fp32)MM_PI / 2) - a;
return(a);
}
Think about what happens to the terms in each loop as a result of the division by c:
c += 2;
d *= -1 * t / c;
First you're dividing by 1 [implicitly, before this], and then by 3, and then by 5, which sounds good, but because you're multiplying d by this term you're effectively dividing by the product of each of the divisors. IOW, instead of
x - 1/3*x^3 + 1/5*x^5 - 1/7*x^7 + 1/9*x^9
which you want, you're computing
x - 1/(1*3)*x^3 + 1/(1*3*5)*x^5 - 1/(1*3*5*7)*x^7 + 1/(1*3*5*7*9)*x^9
You can still use your d *= -t trick, but you should move the division.