Round down float using bit operations in C - c

I am trying to round down a float using bit operations in C.
I start by converting the float to an unsigned int.
I think my strategy should be to get the exponent, and then zero out the bits after that, but I'm not sure how to code that. This is what I have so far:
float roundDown(float f);
unsigned int notRounded = *(unsigned int *)&f;
unsigned int copy = notRounded;
int exponent = (copy >> 23) & 0xff;
int fractional = 127 + 23 - exponent;
if(fractional > 0){
//not sure how to zero out the bits.
//Also don't know how to deal with the signed part.

Since its just for fun, and I'm not sure what the constraints are, here's a variant that DOES work for negative numbers:
float myRoundDown_1 (float v) { //only works right for positive numbers
return ((v-0.5f)+(1<<23)) - (1<<23);
}
float myRoundDown_2 (float v) { //works for all numbers
static union {
unsigned long i;
float f;
} myfloat;
unsigned long n;
myfloat.f = v;
n = myfloat.i & 0x80000000;
myfloat.i &= 0x7fffffff;
myfloat.f = myRoundDown_1(myfloat.f+(n>>31));
myfloat.i |= n;
return myfloat.f;
}

float roundDown(float f); should be float roundDown(float f) {.
unsigned int notRounded = *(unsigned int *)&f; is incompatible with modern compiler optimizations. Look up “strict aliasing”.
Here is a working function to round down to the power of two:
#include <stdio.h>
#include <assert.h>
#include <string.h>
float roundDown(float f) {
unsigned int notRounded;
assert(sizeof(int) == sizeof(float));
memcpy(&notRounded, &f, sizeof(int));
// zero out the significand (mantissa):
unsigned int rounded = notRounded & 0xFF800000;
float r;
memcpy(&r, &rounded, sizeof(int));
return r;
}
int main()
{
printf("%f %f\n", 1.33, roundDown(1.33));
printf("%f %f\n", 3.0, roundDown(3.0));
}
This should produce :
1.330000 1.000000
3.000000 2.000000

Related

Converting floating point to binary

#include <stdio.h>
void printBinary(int n, int i) {
int k;
for(k = i-1;k>=0;k--){
if((n>>k)&1)
printf("1");
else
printf("0");
}
}
typedef union {
float f;
struct {
unsigned int mantissa : 23; //4
unsigned int exponent : 8; //3
unsigned int sign : 1;
}raw;
}myfloat;
void printIEEE(myfloat var){
printf("%d | ", var.raw.sign);
printBinary(var.raw.exponent,8); //3
printf(" | ");
printBinary(var.raw.mantissa, 23); //4
printf("\n");
}
int main(){
myfloat var;
var.f = -4.25;
printf("IEEE 754 represantation of %f is : \n",var.f);
printIEEE(var);
return 0;
}
I found this code from Internet. I get an error when I make some changes on it. For example, i want to change the number. I want to make 3 exponent and 4 mantissa when i change the number output is happening 0 000 0000.
Would you please try a cheat solution which share the bit pattern in the union:
#include <stdio.h>
#include <stdint.h>
union ieee754 {
uint32_t i;
float f;
};
void printBinary(uint32_t n, int i) {
uint32_t mask = 1 << (i - 1);
do putchar(n & mask ? '1' : '0');
while (mask >>= 1);
}
int main()
{
union ieee754 var;
var.f = -4.25;
printf("IEEE 754 represantation of %f is:\n", var.f);
printBinary(var.i, 32);
printf("\n");
return 0;
}
Output:
IEEE 754 represantation of -4.250000 is:
11000000100010000000000000000000
Interpretation (or verification) of the bit pattern:
11000000100010000000000000000000
sign bit ... 1
exponent ... 10000001 (= 129)
fraction ... 00010000000000000000000 (= 1/16)
decimal value = (-1)^1 * (1 + 1/16) * 2^(129 - 127))
= -4.250000

Rotation of binary number in C

I have an issue i can't solve. The code below should get a number from the user and a number of rotations. The code should calculate the number after the rotations. For negative number of rotations the code should rotate the number left and for positive number of rotation the code should rotate the number right.
For example: for the input x=1010111011111011
my_rotate(x, -3) will return 0111010111011111
my_rotate(x, 3) will return 0111011111011101
Here is the code i wrote so far:
#include <stdio.h>
unsigned short my_rotate(unsigned short, char);
int main()
{
unsigned short num, res;
char rotations;
printf("\nPlease enter a number and number of rotations\n");
scanf("%hu %d", &num, &rotations);
res = my_rotate(num, rotations);
return 0;
}
unsigned short my_rotate(unsigned short a, char b)
{
unsigned short bitsNum = sizeof(unsigned short) * 8;
unsigned short temp1, temp2, result;
if(b == 0)
return a;
else if(b < 0)
{
temp1 = a << (bitsNum + b);
temp2 = a >> (-b);
result = temp1 + temp2;
}
else /* b > 0 */
{
temp1 = (a >> (bitsNum - (unsigned short)b));
temp2 = (a << (unsigned short)b);
result = temp1 + temp2;
}
return result;
}
I always get 0 as a result and i don't know why. What's wrong with my code?
in main :
unsigned short num, res;
char rotations;
printf("\nPlease enter a number and number of rotations\n");
scanf("%hu %d", &num, &rotations);
the last argument of scanf must be a pointer to an int (format is %d) but you give the address of a char, the behavior is undefined. Use an int for rotations for the format %d
In my_rotate b is a char and you do if(b < 0), the result depends if the char are signed or not, type n with signed char if you expect a char to be signed
If rotations is an int and b a signed char :
44795 (1010111011111011) and -3 produce 30175 being 111010111011111
44795 (1010111011111011) and 3 produce 30685 being 111011111011101
as you expected.
Note for me an unsigned short is on 16 bits, of course the result is not the same if short are on a different number of bit.
#bruno well explained a problem with input.
A rotation count may exceed +/- bitsNum, so a good first step is to limit the rotation count.
unsigned short my_rotate(unsigned short a, int b) {
unsigned short bitsNum = sizeof(unsigned short) * 8;
//add
b %= bitsNum;
....
Highly portable code would not use bitsNum as that is derived by the size of unsigned short (and assumes 8 bits/char) and an unsigned short could have padding bits. Certainly this is more of a rare machine concern. Code should derive the bit width based on USHRT_MAX instead.

How the binary multiplication of floats can be represented in C?

The binary multiplication algorithm with ints can be represented as follows:
unsigned int multiply(unsigned int multiplier, unsigned int multiplicand) {
unsigned int product = 0;
while (multiplier != 0) {
if ((multiplier & 1) != 0) {
product = product + multiplicand;
}
multiplier = multiplier >> 1;
multiplicand = multiplicand << 1;
}
return product;
}
This function performs the multiplication of two unsigned ints without the operator "*".
However it does not work with floats because a float is composed of three parts:
IEEE 754 single-precision binary floating-point format
These parts can be isolated as follows:
#include <stdio.h>
typedef union {
float f;
struct {
unsigned int mantisa : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} parts;
} float_cast;
int main() {
float_cast d1;
d1.f = 0.15625;
printf("sign = %x\n",d1.parts.sign);
printf("exponent = %x\n",d1.parts.exponent);
printf("mantisa = %x\n",d1.parts.mantisa);
return 0;
}
With the parts separated as ints I can manipulate the bits. But how to make a function that multiplies the parts of a float?
Thanks in advance
Multiply the mantissas as integers.
Add the exponents
Xor the signs.
There are some details.
The mantissas should both be normalized, meaning that either the high-order bit is 1 or the mantissa is 0. For full compliance you need to deal with denorms and other special cases -- infinities, NaNs, zeros -- and you may need to normalize, denorm, or overflow (set to infinity).
The product is in the range [1, 4), assuming the values were in [1, 2). If the product of the mantissas is greater than 2, some fixups are necessary: increment the exponents by 1; shift both mantissas right one.
Exponents are normally stored with an offset. Suppose the the real value of the exponent is e + m, where m is the constant offset. The m needs to be subtracted from the sum of the representations of the two exponents, in order to get the exponent of the product.
Here is my solution and Answer:
#include <stdio.h>
float multiplyfloat(float multiplier, float multiplicand) {
typedef union {
float f;
unsigned int i;
struct {
unsigned int mantissa : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} parts;
struct {
unsigned int mantissa : 23;
unsigned int b23 : 1;
unsigned int b31_24 : 8;
} parts2;
} float_cast;
float_cast product, f1, f2, m1, m2;
product.f = 0.f;
f1.f = multiplier;
f2.f = multiplicand;
m1 = f1;
m2 = f2;
m1.parts2.b23 = m2.parts2.b23 = 1;
m1.parts2.b31_24 = m2.parts2.b31_24 = 0;
while (m1.parts.mantissa) {
if (m1.parts2.b23) {
product.i += m2.i;
}
m2.i >>= 1;
m1.i <<= 1;
}
if (product.parts.exponent > 1) {
product.parts.mantissa >>= product.parts.exponent - 1;
}
product.parts.exponent += f1.parts.exponent + f2.parts.exponent - 128;
product.parts.sign = f1.parts.sign != f2.parts.sign;
return product.f;
}
int main() {
float a = 134.337368;
float b = 151.23000000001;
float res = multiplyfloat(a, b);
printf("result = %f\n", res);
printf("compare = %f\n", a * b);
system("pause");
return 1;
}
Any questions, just comment below. Thanks

Getting the exponent from a floating point in C

I'm writing a function that will get the exponent of a floating point number (IEEE 754 standard) but for some reason when I use the right shift bitwise operator on the number it returns 0
Here is the function
int get_exp (int x)
{
return ( ((x >> 21) & 255) -127 );
}
I'm passing it 7.23 so the output should be 2, for some reason the (x >> 21) part returns 0 when it should actually be returning 129. The 255 is the mask I'm using to and (&) with the exponent part of the floating point number.
I'm guessing you're doing some kind of casting hocus-pocus to pass floating point as ints? I would use float frexpf (float x, int* exp); as defined in <math.h>.
#include <math.h>
int get_exp(float x)
{
int exp;
frexpf(x, &exp);
return exp;
}
It's guaranteed to work regardless of the sizes of the floating point types.
If you want to roll it yourself, you can adapt this code.
#define EXPONENT_BIAS (-127)
int get_exp(float f)
{
int i;
union {
// Set here, then use s or c to extract
float f;
// This may or may not work for you
struct {
unsigned int sign: 1;
unsigned int exponent: 8;
unsigned int mantissa: 23;
} s;
// For debugging purposes
unsigned char c[sizeof(float)];
} u;
// Assign, you might need to reverse the bytes!
u.f = f;
// You'll probably need this to figure out the field widths
for (i = 0; i < sizeof(float); i++)
fprintf(stderr, "%02x%s", u.c[i], (i + 1 < sizeof(float))? " ": "\n");
// Just return the exponent
return (int)u.s.exponent + EXPONENT_BIAS;
}
This will bite you if sizeof(float) != 4, or if you switch endian-ness.
Main issue is the passing of int rather than float and using 21 vs 23. #dbush
IEEE 754 standard (binary32) has a number of corner cases: Inifinty, NaN, sub-normal including zero. So additional code is needed to cope with them.
Assuming proper endian:
int get_exp(float x) {
assert(sizeof x == sizeof(uint32_t));
union {
float x;
uint32_t u32;
} u = { x };
#define EXPOSHIFT 23
#define EXPOMASK 255
#define EXPOBIAS 127
if (x == 0.0) return 0;
int expo = (int) (u.u32 >> EXPOSHIFT) & EXPOMASK;
if (expo == EXPOMASK) return INT_MAX; // x is infinity or NaN
if (expo == 0) return get_exp(x * (1L << EXPOSHIFT)) - EXPOSHIFT;
return expo - EXPOBIAS;
}
Working under the assumption that a float is 32 bit and is laid out as specified here, you have three issues:
Your function needs to accept a float.
You need to point a uint32_t to the address of the float so it sees the same bytes, then perform actions against the dereferenced pointer.
The exponent starts at the 24th (23 if you start from 0) bit, not the 22nd (21 if you start with 0), so you have to shift by 23.
#include <stdio.h>
#include <stdint.h>
int get_exp (float x)
{
uint32_t *i = (uint32_t *)&x;
return ( ((*i >> 23) & 255) -127 );
}
int main()
{
printf("exp=%d\n",get_exp(7.23));
}
Result:
exp=2
Should performance not be an issue, simply iterate:
int expof(float f) {
int expo = 0;
if (f < 0.0) f = -f;
while (f < 0.5f) {
f *= 2.0f;
expo--;
}
while (f >= 1.0f) {
f *= 0.5f;
expo++;
}
return expo;
}
Does not depend on any particular float implementation other than the exponent fits in int. It use no external functions as commented here.
Same result as from int expo; frexpf(f, &expo); return expo
The parameter list show
int x
and you pass a floating point number. Try to substitute with
float x

Encoding int value as an IEEE-754 float (binary32)

Given the 32 bits that represent an IEEE 754 floating-point number, how can the number be converted to an integer, using integer or bit operations on the representation (rather than using a machine instruction or compiler operation to convert)?
I have the following function but it fails in some cases:
Input: int x (contains 32 bit single precision number in IEEE 754 format)
if(x == 0) return x;
unsigned int signBit = 0;
unsigned int absX = (unsigned int)x;
if (x < 0)
{
signBit = 0x80000000u;
absX = (unsigned int)-x;
}
unsigned int exponent = 158;
while ((absX & 0x80000000) == 0)
{
exponent--;
absX <<= 1;
}
unsigned int mantissa = absX >> 8;
unsigned int result = signBit | (exponent << 23) | (mantissa & 0x7fffff);
printf("\nfor x: %x, result: %x",x,result);
return result;
C has the "union" to handle this type of view of data:
typedef union {
int i;
float f;
} u;
u u1;
u1.f = 45.6789;
/* now u1.i refers to the int version of the float */
printf("%d",u1.i);
&x gives the address of x so has float* type.
(int*)&x cast that pointer to a pointer to int ie to a int* thing.
*(int*)&x dereference that pointer into an int value. It won't do what you believe on machines where int and float have different sizes.
And there could be endianness issues.
This solution was used in the fast inverse square root algorithm.
// With the proviso that your compiler implementation uses
// the same number of bytes for an int as for a float:
// example float
float f = 1.234f;
// get address of float, cast as pointer to int, reference
int i = *((int *)&f);
// get address of int, cast as pointer to float, reference
float g = *((float *)&i);
printf("%f %f %08x\n",f,g,i);
float x = 43.133;
int y;
assert (sizeof x == sizeof y);
memcpy (&y, &x, sizeof x);
...
You can cast the float using a reference. A cast like this should never generate any code.
C++
float f = 1.0f;
int i = (int &)f;
printf("Float %f is 0x%08x\n", f, i);
Output:
Float 1.000000 is 0x3f800000
If you want c++ style cast use a reinterpret_cast, like this.
int i = reinterpret_cast<int &>(f);
It does not work with expressions, you have to store it in a variable.
int i_times_two;
float f_times_two = f * 2.0f;
i_times_two = (int &)f_times_two;
i_times_two = (int &)(f * 2.0f);
main.cpp:25:13: error: C-style cast from rvalue to reference type 'int &'
You cannot (meaningfully) convert a floating point number into an 'integer' (signed int or int) in this way.
It may end up having the integer type, but it's actually just an index into the encoding space of IEEE754, not a meaningful value in itself.
You might argue that an unsigned int serves dual purpose as a bit pattern and an integer value, but int does not.
Also there are platform issues with bit manipulation of signed ints.
Multiply float number a factor you want. In this case I multiplied with 100,000, because 5 decimals after fraction is have meaning in my operation.
Convert it to bytes and than join them and divide by 100,000 again.
double angleX, angleY;
angleX = 3.2342;
angleY = 1.34256;
printf("%f, %f", (double)angleX, (double)angleY);
int remain, j;
int TxData[8];
j=0;
remain=0;
unsigned long data = angleX*100000;
printf("\ndata : %d\n", data);
while(data>=256)
{
remain= data%256;
data = data/256;
TxData[j]= remain;
printf("\ntxData %d : %d", j, TxData[j]);
j++;
}
TxData[j] = data;
printf("\ntxData %d : %d", j, TxData[j]);
int i=0;
long int angleSon=0;
for(i=0;i<=j;i++)
{
angleSon += pow(256,i)*TxData[i];
printf("\nangleSon : %li", angleSon);
}

Resources