I am not sure if power by squaring takes care of negative exponent. I implemented the following code which works for only positive numbers.
#include <stdio.h>
int powe(int x, int exp)
{
if (x == 0)
return 1;
if (x == 1)
return x;
if (x&1)
return powe(x*x, exp/2);
else
return x*powe(x*x, (exp-1)/2);
}
Looking at https://en.wikipedia.org/wiki/Exponentiation_by_squaring doesn't help as the following code seems wrong.
Function exp-by-squaring(x, n )
if n < 0 then return exp-by-squaring(1 / x, - n );
else if n = 0 then return 1;
else if n = 1 then return x ;
else if n is even then return exp-by-squaring(x * x, n / 2);
else if n is odd then return x * exp-by-squaring(x * x, (n - 1) / 2).
Edit:
Thanks to amit this solution works for both negative and positive numbers:
float powe(float x, int exp)
{
if (exp < 0)
return powe(1/x, -exp);
if (exp == 0)
return 1;
if (exp == 1)
return x;
if (((int)exp)%2==0)
return powe(x*x, exp/2);
else
return x*powe(x*x, (exp-1)/2);
}
For fractional exponent we can do below (Spektre method):
Suppose you have x^0.5 then you easily calculate square root by this method: start from 0 to x/2 and keep checking x^2 is equal to the result or not in binary search method.
So, in case you have x^(1/3) you have to replace if mid*mid <= n to if mid*mid*mid <= n and you will get the cube root of x.Same thing applies for x^(1/4), x^(1/5) and so on. In the case of x^(2/5) we can do (x^(1/5))^2 and again reduce the problem of finding the 5th root of x.
However by this time you would have realized that this method works only in the case when you can convert the root to 1/x format. So are we stuck if we can't convert? No, we can still go ahead as we have the will.
Convert your floating point to fixed point and then calculate pow(a, b). Suppose the number is 0.6, converting this to (24, 8)floating point yields Floor(0.6*1<<8) = 153(10011001). As you know 153 represents the fractional part so in fixed point this (10011001) represent (2^-1, 0, 0, 2^-3, 2^-4, 0, 0, 2^7).So we can again calculate the pow(a, 0.6) by calculating the 2,3, 4 and 7 root of x in fixed point. After calculating we again need to get the result in floating point by dividing with 1<<8.
Code for above method can be found in the accepted answer.
There is also a log based method:
x^y = exp2(y*log2(x))
Integer examples are for 32 bit int arithmetics, DWORD is 32bit unsigned int
floating pow(x,y)=x^y
Is usually evaluated like this:
How Math.Pow (and so on) actually works
so the fractional exponent can be evaluated: pow(x,y) = exp2(y*log2(x)). This can be done also on fixed point:
fixed point bignum pow
integer pow(a,b)=a^b where a>=0 , b>=0
This is easy (you already have that) done by squaring:
DWORD powuu(DWORD a,DWORD b)
{
int i,bits=32;
DWORD d=1;
for (i=0;i<bits;i++)
{
d*=d;
if (DWORD(b&0x80000000)) d*=a;
b<<=1;
}
return d;
}
integer pow(a,b)=a^b where b>=0
Just add few ifs to handle the negative a
int powiu(int a,DWORD b)
{
int sig=0,c;
if ((a<0)&&(DWORD(b&1)) { sig=1; a=-a; } // negative output only if a<0 and b is odd
c=powuu(a,b); if (sig) c=-c;
return c;
}
integer pow(a,b)=a^b
So if b<0 then it means 1/powiu(a,-b) As you can see the result is not integer at all so either ignore this case or return floating value or add a multiplier variable (so you can evaluate PI equations on pure Integer arithmetics). This is float result:
float powfii(int a,int b)
{
if (b<0) return 1.0/float(powiu(a,-b));
else return powiu(a,b);
}
integer pow(a,b)=a^b where b is fractional
You can do something like this a^(1/bb) where bb is integer. In reality this is rooting so you can use binary search to evaluate:
a^(1/2) is square root(a)
a^(1/bb) is bb_root(a)
so do a binary search for c from MSB to LSB and evaluate if pow(c,bb)<=a then leave the bit as is else clear it. This is sqrt example:
int bits(DWORD p) // count how many bits is p
{
DWORD m=0x80000000; int b=32;
for (;m;m>>=1,b--)
if (p>=m) break;
return b;
}
DWORD sqrt(const DWORD &x)
{
DWORD m,a;
m=(bits(x)>>1);
if (m) m=1<<m; else m=1;
for (a=0;m;m>>=1) { a|=m; if (a*a>x) a^=m; }
return a;
}
so now just change the if (a*a>x) with if (pow(a,bb)>x) where bb=1/b ... so b is fractional exponent you looking for and bb is integer. Also m is the number of bits of the result so change m=(bits(x)>>1); to m=(bits(x)/bb);
[edit1] fixed point sqrt example
//---------------------------------------------------------------------------
const int _fx32_fract=16; // fractional bits count
const int _fx32_one =1<<_fx32_fract;
DWORD fx32_mul(const DWORD &x,const DWORD &y) // unsigned fixed point mul
{
DWORD a=x,b=y; // asm has access only to local variables
asm { // compute (a*b)>>_fx32_fract
mov eax,a // eax=a
mov ebx,b // ebx=b
mul eax,ebx // (edx,eax)=eax*ebx
mov ebx,_fx32_one
div ebx // eax=(edx,eax)>>_fx32_fract
mov a,eax;
}
return a;
}
DWORD fx32_sqrt(const DWORD &x) // unsigned fixed point sqrt
{
DWORD m,a;
if (!x) return 0;
m=bits(x); // integer bits
if (m>_fx32_fract) m-=_fx32_fract; else m=0;
m>>=1; // sqrt integer result is half of x integer bits
m=_fx32_one<<m; // MSB of result mask
for (a=0;m;m>>=1) // test bits from MSB to 0
{
a|=m; // bit set
if (fx32_mul(a,a)>x) // if result is too big
a^=m; // bit clear
}
return a;
}
//---------------------------------------------------------------------------
so this is unsigned fixed point. High 16 bits are integer and low 16 bits are fractional part.
this is fp -> fx conversion: DWORD(float(x)*float(_fx32_one))
this is fp <- fx conversion: float(DWORD(x))/float(_fx32_one))
fx32_mul(x,y) is x*y it uses assembler of 80386+ 32bit architecture (you can rewrite it to karatsuba or whatever else to be platform independent)
fx32_sqrt(x) is sqrt(x)
In fixed point you should be aware of the fractional bit shift for multiplication: (a<<16)*(b<<16)=(a*b<<32) you need to shift back by >>16 to get result (a*b<<16). Also the result can overflow 32 bit therefore I use 64 bit result in assembly.
[edit2] 32bit signed fixed point pow C++ example
When you put all the previous steps together you should have something like this:
//---------------------------------------------------------------------------
//--- 32bit signed fixed point format (2os complement)
//---------------------------------------------------------------------------
// |MSB LSB|
// |integer|.|fractional|
//---------------------------------------------------------------------------
const int _fx32_bits=32; // all bits count
const int _fx32_fract_bits=16; // fractional bits count
const int _fx32_integ_bits=_fx32_bits-_fx32_fract_bits; // integer bits count
//---------------------------------------------------------------------------
const int _fx32_one =1<<_fx32_fract_bits; // constant=1.0 (fixed point)
const float _fx32_onef =_fx32_one; // constant=1.0 (floating point)
const int _fx32_fract_mask=_fx32_one-1; // fractional bits mask
const int _fx32_integ_mask=0xFFFFFFFF-_fx32_fract_mask; // integer bits mask
const int _fx32_sMSB_mask =1<<(_fx32_bits-1); // max signed bit mask
const int _fx32_uMSB_mask =1<<(_fx32_bits-2); // max unsigned bit mask
//---------------------------------------------------------------------------
float fx32_get(int x) { return float(x)/_fx32_onef; }
int fx32_set(float x) { return int(float(x*_fx32_onef)); }
//---------------------------------------------------------------------------
int fx32_mul(const int &x,const int &y) // x*y
{
int a=x,b=y; // asm has access only to local variables
asm { // compute (a*b)>>_fx32_fract
mov eax,a
mov ebx,b
mul eax,ebx // (edx,eax)=a*b
mov ebx,_fx32_one
div ebx // eax=(a*b)>>_fx32_fract
mov a,eax;
}
return a;
}
//---------------------------------------------------------------------------
int fx32_div(const int &x,const int &y) // x/y
{
int a=x,b=y; // asm has access only to local variables
asm { // compute (a*b)>>_fx32_fract
mov eax,a
mov ebx,_fx32_one
mul eax,ebx // (edx,eax)=a<<_fx32_fract
mov ebx,b
div ebx // eax=(a<<_fx32_fract)/b
mov a,eax;
}
return a;
}
//---------------------------------------------------------------------------
int fx32_abs_sqrt(int x) // |x|^(0.5)
{
int m,a;
if (!x) return 0;
if (x<0) x=-x;
m=bits(x); // integer bits
for (a=x,m=0;a;a>>=1,m++); // count all bits
m-=_fx32_fract_bits; // compute result integer bits (half of x integer bits)
if (m<0) m=0; m>>=1;
m=_fx32_one<<m; // MSB of result mask
for (a=0;m;m>>=1) // test bits from MSB to 0
{
a|=m; // bit set
if (fx32_mul(a,a)>x) // if result is too big
a^=m; // bit clear
}
return a;
}
//---------------------------------------------------------------------------
int fx32_pow(int x,int y) // x^y
{
// handle special cases
if (!y) return _fx32_one; // x^0 = 1
if (!x) return 0; // 0^y = 0 if y!=0
if (y==-_fx32_one) return fx32_div(_fx32_one,x); // x^-1 = 1/x
if (y==+_fx32_one) return x; // x^+1 = x
int m,a,b,_y; int sx,sy;
// handle the signs
sx=0; if (x<0) { sx=1; x=-x; }
sy=0; if (y<0) { sy=1; y=-y; }
_y=y&_fx32_fract_mask; // _y fractional part of exponent
y=y&_fx32_integ_mask; // y integer part of exponent
a=_fx32_one; // ini result
// powering by squaring x^y
if (y)
{
for (m=_fx32_uMSB_mask;(m>_fx32_one)&&(m>y);m>>=1); // find mask of highest bit of exponent
for (;m>=_fx32_one;m>>=1)
{
a=fx32_mul(a,a);
if (int(y&m)) a=fx32_mul(a,x);
}
}
// powering by rooting x^_y
if (_y)
{
for (b=x,m=_fx32_one>>1;m;m>>=1) // use only fractional part
{
b=fx32_abs_sqrt(b);
if (int(_y&m)) a=fx32_mul(a,b);
}
}
// handle signs
if (sy) { if (a) a=fx32_div(_fx32_one,a); else a=0; /*Error*/ } // underflow
if (sx) { if (_y) a=0; /*Error*/ else if(int(y&_fx32_one)) a=-a; } // negative number ^ non integer exponent, here could add test if 1/_y is integer instead
return a;
}
//---------------------------------------------------------------------------
I have tested it like this:
float a,b,c0,c1,d;
int x,y;
for (a=0.0,x=fx32_set(a);a<=10.0;a+=0.1,x=fx32_set(a))
for (b=-2.5,y=fx32_set(b);b<=2.5;b+=0.1,y=fx32_set(b))
{
if (!x) continue; // math pow has problems with this
if (!y) continue; // math pow has problems with this
c0=pow(a,b);
c1=fx32_get(fx32_pow(x,y));
d=0.0;
if (fabs(c1)<1e-3) d=c1-c0; else d=(c0/c1)-1.0;
if (fabs(d)>0.1)
d=d; // here add breakpoint to check inconsistencies with math pow
}
a,b are floating point
x,y are closest fixed point representations of a,b
c0 is math pow result
c1 is fx32_pow result
d is difference
hope did not forget something trivial but it seems like it works properly. Do not forget that fixed point has very limited precision so the results will differ a bit ...
P.S. Take a look at this:
How to get a square root for 32 bit input in one clock cycle only?
fixed point log2,exp2
integer C++ pow(x,1/y) implementation
integer C++ pow(a,b),log2(a),logb(a) implementation
real domain pow based on complex domain math
Related
I'm doing a Codewars Challenge. I must square every digit of a number and concatenate them.
So, if we run 9119 through the function, 811181 will come out, because 92 is 81 and 12 is 1.
My code is below:
#include <math.h>
unsigned long long square_digits (unsigned n)
{
// Count for digits
int digits = log10(n) + 1, i = 0;
// Array to store the split number
int numDivided[digits];
// Store concatenated numbers
unsigned long long result = 0;
if (n == 0)
{
return result;
}
// Split number and store their square
while (n > 0)
{
numDivided[i] = pow(n % 10, 2);
n /= 10;
i++;
}
// Concatenated square of numbers
for (i = digits - 1;i >= 0;i--)
{
if (numDivided[i] == 0)
{
result *= 10;
}
else
{
// Count digits of the current number
digits = log10(numDivided[i]) + 1;
// Multiply current result for 10^(digits)
result *= pow(10, digits);
// Add the current number to result
result += numDivided[i];
}
}
return result;
}
The test cases are below:
Test(test_suite, sample_tests)
{
do_test( 3212u, 9414ull);
do_test( 2112u, 4114ull);
do_test( 0u, 0ull);
do_test( 999u, 818181ull);
do_test( 10001u, 10001ull);
do_test(3210987654u, 9410816449362516ull);
do_test(3999999999u, 9818181818181818181ull); // :p
do_test( UINT_MAX, 164811681364948125ull);
}
The code works until the two last tests, so:
for n = 3999999999, expected 9818181818181818181, but got 9818181818181818449
I was think that the test case was wrong and check if the number was greater that ULLONG_MAX but it's less, so, all right.
What is my mistake at this point?
What problems could i have with pow function? There is any alternative?
pow() typically returns, at best, a 53 significant bit result yet we have at least a 64-bit problem. powl() does not certianly help either as it may be as limiting as pow(). Do not use floating point functions for an integer problem. Using a floating point funtion for an integer problem is the wrong approach.
digits = log10(n) + 1 are numDivided[i] = pow(n % 10, 2) are not specified to get the desired value (due to imprecision) and certainly fail when n == 0.
Simple extract the least significant decimal digit with %10. Scale by an integer type power-of-10.
unsigned long long square_digits(unsigned n) {
unsigned long long nn = 0;
unsigned long long pow10 = 1;
while (n) {
unsigned digit = n % 10;
unsigned square = digit * digit; // 0 to 81
nn += square * pow10;
if (square < 10) {
pow10 *= 10;
} else {
pow10 *= 100;
}
n /= 10;
}
return nn;
}
The main culprit is this line:
result *= pow(10, digits);
Given that result is an unsigned long long while the return value of pow is a double, result is first converted to a double, then multiplied by the power and finally the result is converted back to an unsigned long long.
The problem is that while a double type has a much greater range than an unsigned long long, its precision is limited and not all the possible integral values can be represented (stored) exactly.
In particular, you can check the result of the following line:
printf("%.0lf\n", 981818181818181ull * 100.0); // It won't print 98181818181818100
See #chux's answer for further details and a proper implementation of the solution.
I found two mistakes in your code.
log() is not defined for 0. So, you cannot run your code against Test case 3.
Use powl() instead of pow().
PS: One change you can do to pass test case 3 is check if N == 0 in the beginning of the function itself. The next script should calculate the number of digits in N.
I was thinking about the floor function available in math.h. It is very easy to use it:
#include <stdio.h>
#include <math.h>
int main(void)
{
for (double a = 12.5; a < 13.4; a += 0.1)
printf("floor of %.1lf is %.1lf\n", a, floor(a));
return 0;
}
What if I would like to write my own implementation of it? Would it look simply like this:
#include <stdio.h>
#include <math.h>
double my_floor(double num)
{
return (int)num;
}
int main(void)
{
double a;
for (a = 12.5; a < 13.4; a += 0.1)
printf("floor of %.1lf is %.1lf\n", a, floor(a));
printf("\n\n");
for (a = 12.5; a < 13.4; a += 0.1)
printf("floor of %.1lf is %.1lf\n", a, my_floor(a));
return 0;
}
?
It seems it does not work with negative numbers (my_floor), but the second one seems to be fine (my_floor_2):
#include <stdio.h>
#include <math.h>
double my_floor(double num)
{
return (int)num;
}
double my_floor_2(double num)
{
if(num < 0)
return (int)num - 1;
else
return (int)num;
}
int main(void)
{
double a1 = -12.5;
printf("%lf\n", floor(a1));
printf("%lf\n", my_floor(a1));
printf("%lf\n", my_floor_2(a1));
return 0;
}
program output:
-13.000000
-12.000000
-13.000000
Is one of them eventually correct or not?
Both of your attempts have limitations:
If the double value is outside the range of the int type, converting to int is implementation defined.
If the double value is negative but integral, returning (int)num - 1 is incorrect.
Here is an (almost) portable version that tries to handle all cases:
double my_floor_2(double num) {
if (num >= LLONG_MAX || num <= LLONG_MIN || num != num) {
/* handle large values, infinities and nan */
return num;
}
long long n = (long long)num;
double d = (double)n;
if (d == num || num >= 0)
return d;
else
return d - 1;
}
It should be correct if type long long has more value bits than type double, which is the case on most modern systems.
No, you can't tackle it this way. The best way of writing your own implementation is to take the one from the C Standard Library on your platform. But note that might contain platform specific nuances so might not be portable.
The C Standard Library floor function is typically clever in that it doesn't work by taking a conversion to an integral type. If it did then you'd run the risk of signed integer overflow, the behaviour of which is undefined. (Note that the smallest possible range for an int is -32767 to +32767).
The precise implementation is also dependent on the floating point scheme used on your platform.
For a platform using IEEE754 floating point, and a long long type you could adopt this scheme:
If the magnitude of the number is greater than 253, return it (as it's already integral).
Else, cast to a 64-bit type (long long), and return it back.
In C++ and 32 bit arithmetics it can be done for example like this:
//---------------------------------------------------------------------------
// IEEE 754 double MSW masks
const DWORD _f64_sig =0x80000000; // sign
const DWORD _f64_exp =0x7FF00000; // exponent
const DWORD _f64_exp_sig=0x40000000; // exponent sign
const DWORD _f64_exp_bia=0x3FF00000; // exponent bias
const DWORD _f64_exp_lsb=0x00100000; // exponent LSB
const DWORD _f64_exp_pos= 20; // exponent LSB bit position
const DWORD _f64_man =0x000FFFFF; // mantisa
const DWORD _f64_man_msb=0x00080000; // mantisa MSB
const DWORD _f64_man_bits= 52; // mantisa bits
// IEEE 754 single masks
const DWORD _f32_sig =0x80000000; // sign
const DWORD _f32_exp =0x7F800000; // exponent
const DWORD _f32_exp_sig=0x40000000; // exponent sign
const DWORD _f32_exp_bia=0x3F800000; // exponent bias
const DWORD _f32_exp_lsb=0x00800000; // exponent LSB
const DWORD _f32_exp_pos= 23; // exponent LSB bit position
const DWORD _f32_man =0x007FFFFF; // mantisa
const DWORD _f32_man_msb=0x00400000; // mantisa MSB
const DWORD _f32_man_bits= 23; // mantisa bits
//---------------------------------------------------------------------------
double f64_floor(double x)
{
const int h=1; // may be platform dependent MSB/LSB order
const int l=0;
union _f64 // semi result
{
double f; // 64bit floating point
DWORD u[2]; // 2x32 bit uint
} y;
DWORD m,a;
int sig,exp,sh;
y.f=x;
// extract sign
sig =y.u[h]&_f64_sig;
// extract exponent
exp =((y.u[h]&_f64_exp)>>_f64_exp_pos)-(_f64_exp_bia>>_f64_exp_pos);
// floor bit shift
sh=_f64_man_bits-exp; a=0;
if (exp<0)
{
a=y.u[l]|(y.u[h]&_f64_man);
if (sig) return -1.0;
return 0.0;
}
// LSW
if (sh>0)
{
if (sh<32) m=(0xFFFFFFFF>>sh)<<sh; else m=0;
a=y.u[l]&(m^0xFFFFFFFF); y.u[l]&=m;
}
// MSW
sh-=32;
if (sh>0)
{
if (sh<_f64_exp_pos) m=(0xFFFFFFFF>>sh)<<sh; else m=_f64_sig|_f64_exp;
a|=y.u[h]&(m^0xFFFFFFFF); y.u[h]&=m;
}
if ((sig)&&(a)) y.f--;
return y.f;
}
//---------------------------------------------------------------------------
float f32_floor(float x)
{
union // semi result
{
float f; // 32bit floating point
DWORD u; // 32 bit uint
} y;
DWORD m,a;
int sig,exp,sh;
y.f=x;
// extract sign
sig =y.u&_f32_sig;
// extract exponent
exp =((y.u&_f32_exp)>>_f32_exp_pos)-(_f32_exp_bia>>_f32_exp_pos);
// floor bit shift
sh=_f32_man_bits-exp; a=0;
if (exp<0)
{
a=y.u&_f32_man;
if (sig) return -1.0;
return 0.0;
}
if (sh>0)
{
if (sh<_f32_exp_pos) m=(0xFFFFFFFF>>sh)<<sh; else m=_f32_sig|_f32_exp;
a|=y.u&(m^0xFFFFFFFF); y.u&=m;
}
if ((sig)&&(a)) y.f--;
return y.f;
}
//---------------------------------------------------------------------------
The point is to make mask that will clear out the decimal bits from mantissa and in case of negative input and non zero cleared bits decrement the result. To access individual bits you can convert your floating point value to integral representation with use of union (like in the example) or use pointers instead.
I tested this in simple VCL app like this:
float f32;
double f64;
AnsiString txt="";
// 64 bit
txt+="[double]\r\n";
for (f64=-10.0;f64<=10.0;f64+=0.1)
if (fabs(floor(f64)-f64_floor(f64))>1e-6)
{
txt+=AnsiString().sprintf("%5.3lf %5.3lf %5.3lf\r\n",f64,floor(f64),f64_floor(f64));
f64_floor(f64);
}
for (f64=1;f64<=1e307;f64*=1.1)
{
if (fabs(floor( f64)-f64_floor( f64))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n", f64,floor( f64),f64_floor( f64));
f64_floor( f64); }
if (fabs(floor(-f64)-f64_floor(-f64))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n",-f64,floor(-f64),f64_floor(-f64));
f64_floor(-f64); }
}
// 32 bit
txt+="[float]\r\n";
for (f32=-10.0;f32<=10.0;f32+=0.1)
if (fabs(floor(f32)-f32_floor(f32))>1e-6)
{
txt+=AnsiString().sprintf("%5.3lf %5.3lf %5.3lf\r\n",f32,floor(f32),f32_floor(f32));
f32_floor(f32);
}
for (f32=1;f32<=1e37;f32*=1.1)
{
if (fabs(floor( f32)-f32_floor( f32))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n", f32,floor( f32),f32_floor( f32));
f32_floor( f32); }
if (fabs(floor(-f32)-f32_floor(-f32))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n",-f32,floor(-f32),f32_floor(-f32));
f32_floor(-f32); }
}
mm_log->Lines->Add(txt);
with no difference result (so in all tested cases it matches math.h floor() values. If you want to give this a shot outside VCL then just change AnsiString to any string type you got at hand and change the output from TMemo::mm_log to anything you got (like console cout or whatever)
The double calling of fxx_floor() in case of difference is for debuging purposes (you can place a breakpoint and step in the error case directly).
[Notes]
Beware the order of words (MSW,LSW) is platform dependent so you should adjust the h,l constants accordingly. This code is not optimized so it is easily understandable so do not expect it will be fast.
When the precision of the floating point type is small enough as compared to a wide integer type, cast to that integer type when the floating point value is in the integer range.
Review the function for values outside the intmax_t range, NAN, infinity and -0.0 and adjust as desired.
#if DBL_MANT_DIG >= 64
#error TBD code
#endif
#include <inttypes.h>
// INTMAX_MAX is not exact as a double, yet INTMAX_MAX + 1 is an exact double
#define INTMAX_MAX_P1 ((INTMAX_MAX/2 + 1)*2.0)
double my_floor(double x) {
if (x >= 0.0) {
if (x < INTMAX_MAX_P1) {
return (double)(intmax_t)x;
}
return x;
} else if (x < 0.0) {
if (x >= INTMAX_MIN) {
intmax_t ix = (intmax_t) x;
return (ix == x) ? x : (double)(ix-1);
}
return x;
}
return x; // NAN
}
Try it online!. Try this as your function:
// we need as much space as possible
typedef long double largestFloat;
largestFloat myFloor(largestFloat x)
{
largestFloat xcopy = (x < 0) ? (x * -1) : x;
unsigned int zeros = 0;
largestFloat n = 1;
// Count digits before the decimal
for (n = 1; xcopy > (n * 10); n *= 10, ++zeros)
;
// Make xcopy follow 0 <= xcopy < 1
for (xcopy -= n; zeros != -1; xcopy -= n) {
if (xcopy < 0) {
xcopy += n;
n /= 10;
--zeros;
}
}
xcopy += n;
// Follow standard floor behavior
if (x < 0)
return (xcopy == 0) ? x : (x + xcopy - 1);
else
return x - xcopy;
}
This is an explanation of the code.
Create xcopy (absolute value of x)
Use the first for loop to figure out the number of digits before the decimal point.
Use that number to continually decrease xcopy until it satisfies 0 <= xcopy < 1
Based on whether x was originally positive or negative, either return x - xcopy or x - (1 - xcopy).
I have two fractions I like to compare. They are stored like this:
struct fraction {
int64_t numerator;
int64_t denominator;
};
Currently, I compare them like this:
bool fraction_le(struct fraction a, struct fraction b)
{
return a.numerator * b.denominator < b.numerator * a.denominator;
}
That works fine, except that (64 bit value) * (64 bit value) = (128 bit value), which means it will overflow for numerators and denominators that are too far away from zero.
How can I make the comparison always works, even for absurd fractions?
Oh, and by the way: fractions are always stored simplified, and only the numerator can be negative. Maybe that input constraint makes some algorithm possible...
Here's how Boost implements it. The code is well-commented.
template <typename IntType>
bool rational<IntType>::operator< (const rational<IntType>& r) const
{
// Avoid repeated construction
int_type const zero( 0 );
// This should really be a class-wide invariant. The reason for these
// checks is that for 2's complement systems, INT_MIN has no corresponding
// positive, so negating it during normalization keeps it INT_MIN, which
// is bad for later calculations that assume a positive denominator.
BOOST_ASSERT( this->den > zero );
BOOST_ASSERT( r.den > zero );
// Determine relative order by expanding each value to its simple continued
// fraction representation using the Euclidian GCD algorithm.
struct { int_type n, d, q, r; } ts = { this->num, this->den, this->num /
this->den, this->num % this->den }, rs = { r.num, r.den, r.num / r.den,
r.num % r.den };
unsigned reverse = 0u;
// Normalize negative moduli by repeatedly adding the (positive) denominator
// and decrementing the quotient. Later cycles should have all positive
// values, so this only has to be done for the first cycle. (The rules of
// C++ require a nonnegative quotient & remainder for a nonnegative dividend
// & positive divisor.)
while ( ts.r < zero ) { ts.r += ts.d; --ts.q; }
while ( rs.r < zero ) { rs.r += rs.d; --rs.q; }
// Loop through and compare each variable's continued-fraction components
while ( true )
{
// The quotients of the current cycle are the continued-fraction
// components. Comparing two c.f. is comparing their sequences,
// stopping at the first difference.
if ( ts.q != rs.q )
{
// Since reciprocation changes the relative order of two variables,
// and c.f. use reciprocals, the less/greater-than test reverses
// after each index. (Start w/ non-reversed # whole-number place.)
return reverse ? ts.q > rs.q : ts.q < rs.q;
}
// Prepare the next cycle
reverse ^= 1u;
if ( (ts.r == zero) || (rs.r == zero) )
{
// At least one variable's c.f. expansion has ended
break;
}
ts.n = ts.d; ts.d = ts.r;
ts.q = ts.n / ts.d; ts.r = ts.n % ts.d;
rs.n = rs.d; rs.d = rs.r;
rs.q = rs.n / rs.d; rs.r = rs.n % rs.d;
}
// Compare infinity-valued components for otherwise equal sequences
if ( ts.r == rs.r )
{
// Both remainders are zero, so the next (and subsequent) c.f.
// components for both sequences are infinity. Therefore, the sequences
// and their corresponding values are equal.
return false;
}
else
{
// Exactly one of the remainders is zero, so all following c.f.
// components of that variable are infinity, while the other variable
// has a finite next c.f. component. So that other variable has the
// lesser value (modulo the reversal flag!).
return ( ts.r != zero ) != static_cast<bool>( reverse );
}
}
If you are using GCC, you can use __int128.
I didn't understand the code in Kos's answer so this might be just duplicating it.
As other people have mentioned there are some easy special cases e.g. b/c > -e/f and -b/c > -e/f if e/f > b/c. So we are left with the case of positive fractions.
Convert these to mixed numbers i.e. a b/c and d e/f. The trivial case has a != d so we assume a == d. We then want to compare b/c with e/f with b < c, e < f. Well b/c > e/f if f/e > c/b. These are both greater than one so you can repeat the mixed number test until the whole number parts differ.
Case intrigued me, so here is an implementation of Neil's answer, possibly with bugs :)
#include <stdint.h>
#include <stdlib.h>
typedef struct {
int64_t num, den;
} frac;
int cmp(frac a, frac b) {
if (a.num < 0) {
if (b.num < 0) {
a.num = -a.num;
b.num = -b.num;
return !cmpUnsigned(a, b);
}
else return 1;
}
else if (0 <= b.num) return cmpUnsigned(a, b);
else return 0;
}
#define swap(a, b) { int64_t c = a; a = b; b = c; }
int cmpUnsigned(frac a, frac b) {
int64_t c = a.num / a.den, d = b.num / b.den;
if (c != d) return c < d;
a.num = a.num % a.den;
swap(a.num, a.den);
b.num = b.num % b.den;
swap(b.num, b.den);
return !cmpUnsigned(a, b);
}
main() {
frac a = { INT64_MAX - 1, INT64_MAX }, b = { INT64_MAX - 3, INT64_MAX };
printf("%i\n", cmp(a, b));
}
Alright, so only your numerators are signed.
Special cases:
If the a.numerator is negative and the b.numerator is positive, then b is greater than a.
If the b.numerator is negative and the a.numerator is positive, then a is greater than b.
Otherwise:
Both your numerators have the same sign (+/-). Add some logic-code or bit manipulation to remove it, and use multiplication with uint64_t to compare them. Remember that if both numerators are negative, then the result of the comparison must be negated.
Why not just compare them directly as floating point numbers?
bool fraction_le(struct fraction a, struct fraction b)
{
return (double)a.numerator / a.denominator < (double)b.numerator / b.denominator;
}
I'm looking to for a reasonably efficient way of determining if a floating point value (double) can be exactly represented by an integer data type (long, 64 bit).
My initial thought was to check the exponent to see if it was 0 (or more precisely 127). But that won't work because 2.0 would be e=1 m=1...
So basically, I am stuck. I have a feeling that I can do this with bit masks, but I'm just not getting my head around how to do that at this point.
So how can I check to see if a double is exactly representable as a long?
Thanks
I think I have found a way to clamp a double into an integer in a standard-conforming fashion (this is not really what the question is about, but it helps a lot). First, we need to see why the obvious code is not correct.
// INCORRECT CODE
uint64_t double_to_uint64 (double x)
{
if (x < 0.0) {
return 0;
}
if (x > UINT64_MAX) {
return UINT64_MAX;
}
return x;
}
The problem here is that in the second comparison, UINT64_MAX is being implicitly converted to double. The C standard does not specify exactly how this conversion works, only that it is to be rounded up or down to a representable value. This means that the second comparison may be false, even if should mathematically be true (which can happen when UINT64_MAX is rounded up, and 'x' is mathematically between UINT64_MAX and (double)UINT64_MAX). As such, the conversion of double to uint64_t can result in undefined behavior in that edge case.
Surprisingly, the solution is very simple. Consider that while UINT64_MAX may not be exactly representable in a double, UINT64_MAX+1, being a power of two (and not too large), certainly is. So, if we first round the input to an integer, the comparison x > UINT64_MAX is equivalent to x >= UINT64_MAX+1, except for possible overflow in the addition. We can fix the overflow by using ldexp instead of adding one to UINT64_MAX. That being said, the following code should be correct.
/* Input: a double 'x', which must not be NaN.
* Output: If 'x' is lesser than zero, then zero;
* otherwise, if 'x' is greater than UINT64_MAX, then UINT64_MAX;
* otherwise, 'x', rounded down to an integer.
*/
uint64_t double_to_uint64 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 64)) {
return UINT64_MAX;
}
return y;
}
Now, to back to your question: is x is exactly representable in an uint64_t? Only if it was neither rounded nor clamped.
/* Input: a double 'x', which must not be NaN.
* Output: If 'x' is exactly representable in an uint64_t,
* then 1, otherwise 0.
*/
int double_representable_in_uint64 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 64));
}
The same algorithm can be used for integers of different size, and also for signed integers with a minor modification. The code that follows does some very basic tests of the uint32_t and uint64_t versions (only false positives can possibly be caught), but is also suitable for manual examination of the edge cases.
#include <inttypes.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include <stdio.h>
uint32_t double_to_uint32 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 32)) {
return UINT32_MAX;
}
return y;
}
uint64_t double_to_uint64 (double x)
{
assert(!isnan(x));
double y = floor(x);
if (y < 0.0) {
return 0;
}
if (y >= ldexp(1.0, 64)) {
return UINT64_MAX;
}
return y;
}
int double_representable_in_uint32 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 32));
}
int double_representable_in_uint64 (double x)
{
assert(!isnan(x));
return (floor(x) == x && x >= 0.0 && x < ldexp(1.0, 64));
}
int main ()
{
{
printf("Testing 32-bit\n");
for (double x = 4294967295.999990; x < 4294967296.000017; x = nextafter(x, INFINITY)) {
uint32_t y = double_to_uint32(x);
int representable = double_representable_in_uint32(x);
printf("%f -> %" PRIu32 " representable=%d\n", x, y, representable);
assert(!representable || (double)(uint32_t)x == x);
}
}
{
printf("Testing 64-bit\n");
double x = ldexp(1.0, 64) - 40000.0;
for (double x = 18446744073709510656.0; x < 18446744073709629440.0; x = nextafter(x, INFINITY)) {
uint64_t y = double_to_uint64(x);
int representable = double_representable_in_uint64(x);
printf("%f -> %" PRIu64 " representable=%d\n", x, y, representable);
assert(!representable || (double)(uint64_t)x == x);
}
}
}
Here's one method that could work in most cases. I'm not sure if/how it will break if you give it NaN, INF, very large (overflow) numbers...
(Though I think they will all return false - not exactly representable.)
You could:
Cast it to an integer.
Cast it back to a floating-point.
Compare with original value.
Something like this:
double val = ... ; // Value
if ((double)(long long)val == val){
// Exactly representable
}
floor() and ceil() are also fair game (though they may fail if the value overflows an integer):
floor(val) == val
ceil(val) == val
And here's a messy bit-mask solution:
This uses union type-punning and assumes IEEE double-precision. Union type-punning is only valid in C99 TR2 and later.
int representable(double x){
// Handle corner cases:
if (x == 0)
return 1;
// -2^63 is representable as a signed 64-bit integer, but +2^63 is not.
if (x == -9223372036854775808.)
return 1;
// Warning: Union type-punning is only valid in C99 TR2 or later.
union{
double f;
uint64_t i;
} val;
val.f = x;
uint64_t exp = val.i & 0x7ff0000000000000ull;
uint64_t man = val.i & 0x000fffffffffffffull;
man |= 0x0010000000000000ull; // Implicit leading 1-bit.
int shift = (exp >> 52) - 1075;
// Out of range
if (shift < -52 || shift > 10)
return 0;
// Test mantissa
if (shift < 0){
shift = -shift;
return ((man >> shift) << shift) == man;
}else{
return ((man << shift) >> shift) == man;
}
}
You can use the modf function to split a float into the integer and fraction parts. modf is in the standard C library.
#include <math.h>
#include <limits.h>
double val = ...
double i;
long l;
/* check if fractional part is 0 */
if (modf(val, &i) == 0.0) {
/* val is an integer. check if it can be stored in a long */
if (val >= LONG_MIN && val <= LONG_MAX) {
/* can be exactly represented by a long */
l = val;
}
}
How to check if float can be exactly represented as an integer ?
I'm looking to for a reasonably efficient way of determining if a floating point value double can be exactly represented by an integer data type long, 64 bit.
Range (LONG_MIN, LONG_MAX) and fraction (frexp()) tests needed. Also need to watch out for not-a-numbers.
The usual idea is to test like (double)(long)x == x, but to avoid its direct usage. (long)x, when x is out of range, is undefined behavior (UB).
The valid range of conversion for (long)x is LONG_MIN - 1 < x < LONG_MAX + 1 as code discards any fractional part of x during the conversion. So code needs to test, using FP math, if x is in range.
#include <limits.h>
#include <stdbool.h>
#define DBL_LONG_MAXP1 (2.0*(LONG_MAX/2+1))
#define DBL_LONG_MINM1 (2.0*(LONG_MIN/2-1))
bool double_to_long_exact_possible(double x) {
if (x < DBL_LONG_MAXP1) {
double whole_number_part;
if (frexp(x, &whole_number_part) != 0.0) {
return false; // Fractional part exist.
}
#if -LONG_MAX == LONG_MIN
// rare non-2's complement machine
return x > DBL_LONG_MINM1;
#else
return x - LONG_MIN > -1.0;
#endif
}
return false; // Too large or NaN
}
Any IEEE floating-point double or float value with a magnitude at or above 2^52 or 2^23 will be whole number. Adding 2^52 or 2^23 to a positive number whose magnitude is less than that will cause it to be rounded to a whole number. Subtracting the value that was added will yield a whole number which will equal the original iff the original was a whole number. Note that this algorithm will fail with some numbers larger than 2^52, but it isn't needed for numbers that big.
Could you use the modulus operator to check if the double is divisible by one... or am I completely misunderstanding the question?
double val = ... ; // Value
if(val % 1 == 0) {
// Val is evenly divisible by 1 and is therefore a whole number
}
How can I subtract two integers in C without the - operator?
int a = 34;
int b = 50;
You can convert b to negative value using negation and adding 1:
int c = a + (~b + 1);
printf("%d\n", c);
-16
This is two's complement sign negation. Processor is doing it when you use '-' operator when you want to negate value or subtrackt it.
Converting float is simpler. Just negate first bit (shoosh gave you example how to do this).
EDIT:
Ok, guys. I give up. Here is my compiler independent version:
#include <stdio.h>
unsigned int adder(unsigned int a, unsigned int b) {
unsigned int loop = 1;
unsigned int sum = 0;
unsigned int ai, bi, ci;
while (loop) {
ai = a & loop;
bi = b & loop;
ci = sum & loop;
sum = sum ^ ai ^ bi; // add i-th bit of a and b, and add carry bit stored in sum i-th bit
loop = loop << 1;
if ((ai&bi)|(ci&ai)|(ci&bi)) sum = sum^loop; // add carry bit
}
return sum;
}
unsigned int sub(unsigned int a, unsigned int b) {
return adder(a, adder(~b, 1)); // add negation + 1 (two's complement here)
}
int main() {
unsigned int a = 35;
unsigned int b = 40;
printf("%u - %u = %d\n", a, b, sub(a, b)); // printf function isn't compiler independent here
return 0;
}
I'm using unsigned int so that any compiler will treat it the same.
If you want to subtract negative values, then do it that way:
unsgined int negative15 = adder(~15, 1);
Now we are completly independent of signed values conventions. In my approach result all ints will be stored as two's complement - so you have to be careful with bigger ints (they have to start with 0 bit).
Pontus is right, 2's complement is not mandated by the C standard (even if it is the de facto hardware standard). +1 for Phil's creative answers; here's another approach to getting -1 without using the standard library or the -- operator.
C mandates three possible representations, so you can sniff which is in operation and get a different -1 for each:
negation= ~1;
if (negation+1==0) /* one's complement arithmetic */
minusone= ~1;
else if (negation+2==0) /* two's complement arithmetic */
minusone= ~0;
else /* sign-and-magnitude arithmetic */
minusone= ~0x7FFFFFFE;
r= a+b*minusone;
The value 0x7FFFFFFFE would depend on the width (number of ‘value bits’) of the type of integer you were interested in; if unspecified, you have more work to find that out!
+ No bit setting
+ Language independent
+ Can be adjusted for different number types (int, float, etc)
- Almost certainly not your C homework answer (which is likely to be about bits)
Expand a-b:
a-b = a + (-b)
= a + (-1).b
Manufacture -1:
float: pi = asin(1.0);
(with minusone_flt = sin(3.0/2.0*pi);
math.h) or = cos(pi)
or = log10(0.1)
complex: minusone_cpx = (0,1)**2; // i squared
integer: minusone_int = 0; minusone_int--; // or convert one of the floats above
+ No bit setting
+ Language independent
+ Independent of number type (int, float, etc)
- Requires a>b (ie positive result)
- Almost certainly not your C homework answer (which is likely to be about bits)
a - b = c
restricting ourselves to the number space 0 <= c < (a+b):
(a - b) mod(a+b) = c mod(a+b)
a mod(a+b) - b mod(a+b) = c mod(a+b)
simplifying the second term:
(-b).mod(a+b) = (a+b-b).mod(a+b)
= a.mod(a+b)
substituting:
a.mod(a+b) + a.mod(a+b) = c.mod(a+b)
2a.mod(a+b) = c.mod(a+b)
if b>a, then b-a>0, so:
c.mod(a+b) = c
c = 2a.mod(a+b)
So, if a is always greater than b, then this would work.
Given that encoding integers to support two's complement is not mandated in C, iterate until done. If they want you to jump through flaming hoops, no need to be efficient about it!
int subtract(int a, int b)
{
if ( b < 0 )
return a+abs(b);
while (b-- > 0)
--a;
return a;
}
Silly question... probably silly interview!
For subtracting in C two integers you only need:
int subtract(int a, int b)
{
return a + (~b) + 1;
}
I don't believe that there is a simple an elegant solution for float or double numbers like for integers. So you can transform your float numbers in arrays and apply an algorithm similar with one simulated here
If you want to do it for floats, start from a positive number and change its sign bit like so:
float f = 3;
*(int*)&f |= 0x80000000;
// now f is -3.
float m = 4 + f;
// m = 1
You can also do this for doubles using the appropriate 64 bit integer. in visual studio this is __int64 for instance.
I suppose this
b - a = ~( a + ~b)
Assembly (accumulator) style:
int result = a;
result -= b;
As the question asked for integers not ints, you could implement a small interpreter than uses Church numerals.
Create a lookup table for every possible case of int-int!
Not tested. Without using 2's complement:
#include <stdlib.h>
#include <stdio.h>
int sillyNegate(int x) {
if (x <= 0)
return abs(x);
else {
// setlocale(LC_ALL, "C"); // if necessary.
char buffer[256];
snprintf(buffer, 255, "%c%d", 0x2d, x);
sscanf(buffer, "%d", &x);
return x;
}
}
Assuming the length of an int is much less than 255, and the snprintf/sscanf round-trip won't produce any unspecified behavior (right? right?).
The subtraction can be computed using a - b == a + (-b).
Alternative:
#include <math.h>
int moreSillyNegate(int x) {
return x * ilogb(0.5); // ilogb(0.5) == -1;
}
This would work using integer overflow:
#include<limits.h>
int subtractWithoutMinusSign(int a, int b){
return a + (b * (INT_MAX + INT_MAX + 1));
}
This also works for floats (assuming you make a float version…)
For the maximum range of any data type , one's complement provide the negative value decreased by 1 to any corresponding value. ex:
~1 --------> -2
~2---------> -3
and so on... I will show you this observation using little code snippet
#include<stdio.h>
int main()
{
int a , b;
a=10;
b=~a; // b-----> -11
printf("%d\n",a+~b+1);// equivalent to a-b
return 0;
}
Output: 0
Note : This is valid only for the range of data type. means for int data type this rule will be applicable only for the value of range[-2,147,483,648 to 2,147,483,647].
Thankyou .....May this help you
Iff:
The Minuend is greater or equal to 0, or
The Subtrahend is greater or equal to 0, or
The Subtrahend and the Minuend are less than 0
multiply the Minuend by -1 and add the result to the Subtrahend:
SUB + (MIN * -1)
Else multiply the Minuend by 1 and add the result to the Subtrahend.
SUB + (MIN * 1)
Example (Try it online):
#include <stdio.h>
int subtract (int a, int b)
{
if ( a >= 0 || b >= 0 || ( a < 0 && b < 0 ) )
{
return a + (b * -1);
}
return a + (b * 1);
}
int main (void)
{
int x = -1;
int y = -5;
printf("%d - %d = %d", x, y, subtract(x, y) );
}
Output:
-1 - -5 = 4
int num1, num2, count = 0;
Console.WriteLine("Enter two numebrs");
num1 = int.Parse(Console.ReadLine());
num2 = int.Parse(Console.ReadLine());
if (num1 < num2)
{
num1 = num1 + num2;
num2 = num1 - num2;
num1 = num1 - num2;
}
for (; num2 < num1; num2++)
{
count++;
}
Console.WriteLine("The diferrence is " + count);
void main()
{
int a=5;
int b=7;
while(b--)a--;
printf("sud=%d",a);
}