#include <stdio.h>
void printBinary(int n, int i) {
int k;
for(k = i-1;k>=0;k--){
if((n>>k)&1)
printf("1");
else
printf("0");
}
}
typedef union {
float f;
struct {
unsigned int mantissa : 23; //4
unsigned int exponent : 8; //3
unsigned int sign : 1;
}raw;
}myfloat;
void printIEEE(myfloat var){
printf("%d | ", var.raw.sign);
printBinary(var.raw.exponent,8); //3
printf(" | ");
printBinary(var.raw.mantissa, 23); //4
printf("\n");
}
int main(){
myfloat var;
var.f = -4.25;
printf("IEEE 754 represantation of %f is : \n",var.f);
printIEEE(var);
return 0;
}
I found this code from Internet. I get an error when I make some changes on it. For example, i want to change the number. I want to make 3 exponent and 4 mantissa when i change the number output is happening 0 000 0000.
Would you please try a cheat solution which share the bit pattern in the union:
#include <stdio.h>
#include <stdint.h>
union ieee754 {
uint32_t i;
float f;
};
void printBinary(uint32_t n, int i) {
uint32_t mask = 1 << (i - 1);
do putchar(n & mask ? '1' : '0');
while (mask >>= 1);
}
int main()
{
union ieee754 var;
var.f = -4.25;
printf("IEEE 754 represantation of %f is:\n", var.f);
printBinary(var.i, 32);
printf("\n");
return 0;
}
Output:
IEEE 754 represantation of -4.250000 is:
11000000100010000000000000000000
Interpretation (or verification) of the bit pattern:
11000000100010000000000000000000
sign bit ... 1
exponent ... 10000001 (= 129)
fraction ... 00010000000000000000000 (= 1/16)
decimal value = (-1)^1 * (1 + 1/16) * 2^(129 - 127))
= -4.250000
Related
Below C program display binary representation of inputted decimal number:
#include <stdio.h>
#include <stdlib.h>
typedef union {
int i;
struct {
unsigned int dgts: 31;
unsigned int sign: 1;
} bin;
} myint;
void printb(int n, int i) {
int k;
for (k = i - 1; k >= 0; k--)
if ((n >> k) & 1)
printf("1");
else
printf("0");
}
void display_binary(myint x) {
printf("%d | ", x.bin.sign);
printb(x.bin.dgts, 31);
printf("\n");
}
int main() {
myint decimal;
printf("input decimal value : ");
scanf("%d", &decimal.i);
printf("Binary representation is:\n");
display_binary(decimal);
return 0;
}
The program is working correctly. What I can't understand is order of dgts and sign members of bin struct. Intuitively, sign member should precede dgts as bits that representing data are ordered from left to write in memory (as far as I know). After swapping orders of these two members, result became false. Why dgts should come before sign?
order of bits in the bitfields is implementation-defined, but most popular compilers start with LSB.
Numbers are stored binary and it does not matter how you enter them. Negative numbers are stored as two'2 complement on most modern systems. In this system, the sign bit does not exist "per se". No special types are needed
I would implement it as
void printb(int n) {
unsigned int mask = 1U << (sizeof(n) * CHAR_BIT - 1);
for (; mask; mask >>= 1)
{
printf("%c", (n & mask) ? '1' : '0');
}
}
int main()
{
double hexa_number;
double bitmask = 0x80;
double i;
printf("Enter 8 bit number in hexadecimal form: ");
scanf("%lf",& hexa_number);
for( i = 0; i <= 8; i++)
{
if(hexa_number&(bitmask >> i))
printf("1");
else
printf("0");
}
return 0;
}
plus Displaying the binary representation of this number,
along with a count of the number of 0’s and 1’s in the binary number.
i found other ways to convert it but not with bitmask
The problem could be that you are using a double for your bitmask. I don't think >> makes much sense for doubles especially as they are composed of a sign bit, mantissa and 2^ exponent.
For example, 8 >> 1 is 4. That is a nice consistent operation, same as division by 2.
If we imagine an unsigned two byte float for simplicity and imagine that the first byte represents the mantissa and the second the exponent (both unsigned), then 7 * 2^4 (=112) could be represented as 0b00000111 00000100. If you shift that 1 to the right, >> 1, you will get 0b00000011 10000010, which by our convention is now 3 * 2^130. This is definitely not the division by 2 you would get by shifting an integer right by 1.
My compiler gives "error: invalid operands to binary >> (have double and unsigned)" if I try bitwise shifting a floating point number.
So basically you could try the following:
#include <stdio.h>
#include <stdlib.h>
int main()
{
int hexa_number = 0;
unsigned num_bits = sizeof(int)*8;
unsigned bitmask = 1 << (num_bits - 1);
printf("Enter a number in hexadecimal form: ");
scanf("%i", &hexa_number);
unsigned k = 0;
unsigned num_ones = 0;
unsigned num_zeros = 0;
for(k = 0; k != num_bits; k ++)
{
if(hexa_number & (bitmask >> k))
{
printf("1");
num_ones ++;
}
else
{
printf("0");
num_zeros++;
}
}
printf("\nNumber of ones is %i\nNumber of zeros is %i", num_ones, num_zeros);
return 0;
}
More specifically, I need to make a function float_16(unsigned sign, unsigned exp, unsigned frac), that returns a bit16 representation (bit16 is a typedef unsigned integer) of a number given a sign, exponent, and fraction values as unsigned integers.
I have the following preamble:
int main(int argc, const char * argv[]) {
typedef unsigned int bit16;
bit16 a;
a = 0xABCD; // 1010 1011 1100 1101 in binary = 43981
printf("Sign is: %d",extractSign(a));
printf(" ");
printf("Exponent is: %d",extractExp(a));
printf(" ");
printf("Fraction is: %d",extractFrac(a));
…
}
In my main program, and these values are retrieved by the functions in a separate C file:
int extractSign(bit16 x) //significant bit
{
return (x >> 15) & 0x0001; // 0x0001 is mask for 1 bit
}
int extractExp(bit16 x) // 7 bits
{
return (x >> 8) & 0x007F; // 0x007F is mask for 7 bits
}
int extractFrac(bit16 x) // 8 bit fraction field
{
return x & 0x00FF; // 0x00FF is mask for 8 bits
}
How would I be able to use these values to fulfill what is being asked here?
You can use an union.
#include <stdio.h>
typedef unsigned short bit16; // On my computer, sizeof (int) == 4, while sizeof (short) == 2
union floating_point
{
bit16 a;
struct
{
unsigned frac : 8;
unsigned exp : 7;
unsigned sign : 1;
} guts;
};
bit16 float_16 (unsigned sign, unsigned exp, unsigned frac);
unsigned extractSign (bit16 a);
unsigned extractExp (bit16 a);
unsigned extractFrac (bit16 a);
int main(int argc, const char * argv[])
{
bit16 a = 0xABCD;
printf("%d\n",a == float_16(extractSign(a),extractExp(a),extractFrac(a)));
printf("Sign is: %u\n",extractSign(a));
printf("Exponent is: %u\n",extractExp(a));
printf("Fraction is: %u\n",extractFrac(a));
return 0;
}
bit16 float_16 (unsigned sign, unsigned exp, unsigned frac)
{
union floating_point value;
value.guts.sign=sign;
value.guts.exp=exp;
value.guts.frac=frac;
return value.a;
}
unsigned extractSign (bit16 a)
{
union floating_point value;
value.a=a;
return value.guts.sign;
}
unsigned extractExp (bit16 a)
{
union floating_point value;
value.a=a;
return value.guts.exp;
}
unsigned extractFrac (bit16 a)
{
union floating_point value;
value.a=a;
return value.guts.frac;
}
The binary multiplication algorithm with ints can be represented as follows:
unsigned int multiply(unsigned int multiplier, unsigned int multiplicand) {
unsigned int product = 0;
while (multiplier != 0) {
if ((multiplier & 1) != 0) {
product = product + multiplicand;
}
multiplier = multiplier >> 1;
multiplicand = multiplicand << 1;
}
return product;
}
This function performs the multiplication of two unsigned ints without the operator "*".
However it does not work with floats because a float is composed of three parts:
IEEE 754 single-precision binary floating-point format
These parts can be isolated as follows:
#include <stdio.h>
typedef union {
float f;
struct {
unsigned int mantisa : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} parts;
} float_cast;
int main() {
float_cast d1;
d1.f = 0.15625;
printf("sign = %x\n",d1.parts.sign);
printf("exponent = %x\n",d1.parts.exponent);
printf("mantisa = %x\n",d1.parts.mantisa);
return 0;
}
With the parts separated as ints I can manipulate the bits. But how to make a function that multiplies the parts of a float?
Thanks in advance
Multiply the mantissas as integers.
Add the exponents
Xor the signs.
There are some details.
The mantissas should both be normalized, meaning that either the high-order bit is 1 or the mantissa is 0. For full compliance you need to deal with denorms and other special cases -- infinities, NaNs, zeros -- and you may need to normalize, denorm, or overflow (set to infinity).
The product is in the range [1, 4), assuming the values were in [1, 2). If the product of the mantissas is greater than 2, some fixups are necessary: increment the exponents by 1; shift both mantissas right one.
Exponents are normally stored with an offset. Suppose the the real value of the exponent is e + m, where m is the constant offset. The m needs to be subtracted from the sum of the representations of the two exponents, in order to get the exponent of the product.
Here is my solution and Answer:
#include <stdio.h>
float multiplyfloat(float multiplier, float multiplicand) {
typedef union {
float f;
unsigned int i;
struct {
unsigned int mantissa : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
} parts;
struct {
unsigned int mantissa : 23;
unsigned int b23 : 1;
unsigned int b31_24 : 8;
} parts2;
} float_cast;
float_cast product, f1, f2, m1, m2;
product.f = 0.f;
f1.f = multiplier;
f2.f = multiplicand;
m1 = f1;
m2 = f2;
m1.parts2.b23 = m2.parts2.b23 = 1;
m1.parts2.b31_24 = m2.parts2.b31_24 = 0;
while (m1.parts.mantissa) {
if (m1.parts2.b23) {
product.i += m2.i;
}
m2.i >>= 1;
m1.i <<= 1;
}
if (product.parts.exponent > 1) {
product.parts.mantissa >>= product.parts.exponent - 1;
}
product.parts.exponent += f1.parts.exponent + f2.parts.exponent - 128;
product.parts.sign = f1.parts.sign != f2.parts.sign;
return product.f;
}
int main() {
float a = 134.337368;
float b = 151.23000000001;
float res = multiplyfloat(a, b);
printf("result = %f\n", res);
printf("compare = %f\n", a * b);
system("pause");
return 1;
}
Any questions, just comment below. Thanks
I am trying to round down a float using bit operations in C.
I start by converting the float to an unsigned int.
I think my strategy should be to get the exponent, and then zero out the bits after that, but I'm not sure how to code that. This is what I have so far:
float roundDown(float f);
unsigned int notRounded = *(unsigned int *)&f;
unsigned int copy = notRounded;
int exponent = (copy >> 23) & 0xff;
int fractional = 127 + 23 - exponent;
if(fractional > 0){
//not sure how to zero out the bits.
//Also don't know how to deal with the signed part.
Since its just for fun, and I'm not sure what the constraints are, here's a variant that DOES work for negative numbers:
float myRoundDown_1 (float v) { //only works right for positive numbers
return ((v-0.5f)+(1<<23)) - (1<<23);
}
float myRoundDown_2 (float v) { //works for all numbers
static union {
unsigned long i;
float f;
} myfloat;
unsigned long n;
myfloat.f = v;
n = myfloat.i & 0x80000000;
myfloat.i &= 0x7fffffff;
myfloat.f = myRoundDown_1(myfloat.f+(n>>31));
myfloat.i |= n;
return myfloat.f;
}
float roundDown(float f); should be float roundDown(float f) {.
unsigned int notRounded = *(unsigned int *)&f; is incompatible with modern compiler optimizations. Look up “strict aliasing”.
Here is a working function to round down to the power of two:
#include <stdio.h>
#include <assert.h>
#include <string.h>
float roundDown(float f) {
unsigned int notRounded;
assert(sizeof(int) == sizeof(float));
memcpy(¬Rounded, &f, sizeof(int));
// zero out the significand (mantissa):
unsigned int rounded = notRounded & 0xFF800000;
float r;
memcpy(&r, &rounded, sizeof(int));
return r;
}
int main()
{
printf("%f %f\n", 1.33, roundDown(1.33));
printf("%f %f\n", 3.0, roundDown(3.0));
}
This should produce :
1.330000 1.000000
3.000000 2.000000