8 byte double as binary string to uint64_t - c

Im looking for way to convert 8 byte double to uint64_t. I cant use any standard library becouse of that there is only 4byte double in my solution.
This conversion should convert 10987789.5 to 10987789 as int.
Conversion that I use right now:
uint64_t binDoubleToUint64_t(char *bit){
uint8_t i, j;
uint64_t fraction;
for(i=0; i<64; i++)
bit[i]-='0';
uint16_t exponent = bit[1] ? 1 : 0;
j = 0;
for(i=9; i>0;i--)
exponent += bit[i+2] * int_pow(2, j++);
bit[11] = bit[1];
fraction = 0;
j=0;
for(i=0; i < exponent; i++){
fraction = fraction << 1;
if(bit[11+i])
fraction |= 1 << 1;
}
return fraction;
}
But this give me wrong answers.
While I try to convert double 10225203.0 (0x416380c660000000) it returns 10225202 (should 10225203)

Can you read the bit values straight in as a uint64_t. Then the code might look something like this:
uint64_t binDoubleToUint64_t (uint64_t in) {
if (!(in & 0x4000000000000000) || in & 0x800000000000000000) {
/* If the exponent isn't big enough to give a value greater than 1
* or our number is negative return 0.
*/
return 0;
}
uint32_t exponent = ((in & 0x7FF0000000000000) >> 52) - 1023;
// get the mantissa including the imagined bit.
uint64_t mantissa = (in & 0xFFFFFFFFFFFFF) | 0x10000000000000;
// Now we just need to work out how much to shift the mantissa by.
/* You may notice that the top bit of the mantissa is actually at 53 once
you put the imagined bit back in, mantissaTopBit is really
floor(log2(mantissa)) which is 52 (i.e. the power of 2 of the position
that the top bit is in). I couldn't think of a good name for this, so just
imagine that you started counting from 0 instead of 1 if you like!
*/
uint32_t mantissaTopBit = 52;
if (mantissaTopBit > exponent)
return mantissa >> mantissaTopBit - exponent;
else {
if (exponent - mantissaTopBit > 12) {
//You're in trouble as your double doesn't fit into an uint64_t
}
return mantissa << exponent - mantissaTopBit;
}
}
This has been written from my memory of the floating point spec (I haven't checked all the values) so you may want to check the values given. It works for your examples, but you may want to check that I've put the right number of '0's in everywhere.

/*
* write a double to a stream in ieee754 format regardless of host
* encoding.
* x - number to write
* fp - the stream
* bigendian - set to write big bytes first, else write little bytes first
* Returns: 0 or EOF on error
* Notes: different NaN types and negative zero not preserved.
* if the number is too big to represent it will become infinity
* if it is too small to represent it will become zero.
*/
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
/* zero (can't handle signed zero) */
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
/* infinity */
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
/* -infinity */
if (x < -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
/* put the data into two longs (for convenience) */
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}
You can trivially modify this function to do what you want.
https://github.com/MalcolmMcLean/ieee754

Related

Round Constants in Keccak

Recently, just for the heck of it, I've been playing around with an attempt at implementing Keccak, the cryptographic primitive behind SHA-3. I've run into some issues however, specifically with calculating the round constants used in the "Iota" step of the permutation.
Just to get it out of the way: Yes. I know they are round constants. I know I could hard code them as constants. But where's the fun in that?
I've specifically been referencing the FIPS 202 specification document on SHA-3 as well as the Keccak team's own Keccak reference. However, despite my efforts, I can't seem to end up with the correct constants. I've never dealt with bit manipulation before, so if I'm doing something the complete wrong way, feel free to let me know.
rc is a function defined in the FIPS 202 standard of Keccak that is a linear feedback shift register with a feedback polynomial of x^8 + x^6 + x^5 + x^4 + 1.
The values of t (specific to SHA-3) are defined as the set of integers that includes j + 7 * i_r, where i_r = {0, 1, ..., 22, 23} and j = {0, 1, ..., 4, 5}.
The expected outputs (the round constants) are defined as follows: 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, and 0x8000000080008008.
rc Function Implementation
uint64_t rc(int t)
{
if(t % 255 == 0)
{
return 0x1;
}
uint64_t R = 0x1;
for(int i = 1; i <= t % 255; i++)
{
R = R << 0x1;
R |= (((R >> 0x0) & 0x1) ^ ((R >> 0x8) & 0x1)) << 0x0;
R |= (((R >> 0x4) & 0x1) ^ ((R >> 0x8) & 0x1)) << 0x4;
R |= (((R >> 0x5) & 0x1) ^ ((R >> 0x8) & 0x1)) << 0x5;
R |= (((R >> 0x6) & 0x1) ^ ((R >> 0x8) & 0x1)) << 0x6;
R &= 0xFF;
}
return R & 0x1;
}
rc Function Call
for(int i_r = 0; i_r < 24; i_r++)
{
uint64_t RC = 0x0;
// TODO: Fix so the limit is not constant
for(int j = 0; j < 6; j++)
{
RC ^= (rc(j + 7 * i_r) << ((int) pow(2, j) - 1));
}
printf("%llu\n", RC);
}
Any help on this matter is much appreciated.
I made some random changes to the code and now it works. Here are the highlights:
The j loop needs to count from 0 to 6. That's because 2^6-1 = 63. So if j is never 6, then the output can never have the MSB set, i.e. an output of 0x8... is not possible.
Using the pow function is generally a bad idea for this type of application. double values have a nasty habit of being slightly lower than desired, e.g. 4 is actually 3.99999999999, which gets truncated to 3 when you convert it to an int. Doubtful that was happening in this case, but why risk it, since it's easy to just multiply variable shift by 2 on each pass through the loop.
The maximum value for t is 7*23+6 = 167, so the % 255 does nothing (at least with the value of i and t in this code). Also, there's no need to treat t == 0 as a special case. The loop won't run when t is 0, so the result is 0x1 by default.
Implementing a linear feedback shift register is quite simple in C. Each term in the polynomial corresponds to a single bit. x^8 is just 2^8 which is 0x100 and x^6 + x^5 + x^4 + 1 is 0x71. So whenever bit 0x100 is set, you XOR the result by 0x71.
Here's the updated code:
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
uint64_t rc(int t)
{
uint64_t result = 0x1;
for (int i = 1; i <= t; i++)
{
result <<= 1;
if (result & 0x100)
result ^= 0x71;
}
return result & 0x1;
}
int main(void)
{
for (int i = 0; i < 24; i++)
{
uint64_t result = 0x0;
uint64_t shift = 1;
for (int j = 0; j < 7; j++)
{
uint64_t value = rc(7*i + j);
result |= value << (shift - 1);
shift *= 2;
}
printf("0x%016" PRIx64 "\n", result);
}
}

How to quadruple an unsigned number using bit-wise and logic operator in C

Goal:
4x ( 4.400000095 ) = 17.60000038
Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
Max ops: 30
Return bit-level equivalent of expression x + x + x + x for
floating point argument f.
My code:
unsigned 4x(unsigned uf) {
unsigned expn = (uf >> 23) & 0xFF;
unsigned sign = uf & 0x80000000;
unsigned frac = uf & 0x007FFFFF;
if (expn == 255 || (expn == 0 && frac == 0))
return uf;
if (expn) {
expn << 2;
} else if (frac == 0x7FFFFF) {
frac >> 2;
expn << 2;
} else {
frac <<= 2;
}
return (sign) | (expn << 23) | (frac);
}
As you can guess, my code does not work. Instead of quadrupling the input, the input is doubled. I don't know why since the fraction and exponent are always being right / left shifted by 2 instead of 1. Im working with single precision floating point values in 32 bit machines.
Note that
expn << 2;
does not modify expn. You probably want
expn <<= 2;
Ditto for
frac >> 2;
expn << 2;
However, as #chux pointed out, you only need to increase add 2 to the exponent, not multiply the exponent by 4.
Some untested code - leave that for OP. (GTG)
The tricky bit is dealing with sub-normal numbers that when *4 become normal. Also watch for large values that overflow to infinity. If you want to ignore sub-normals, just expn += 2 and check for overflow.
Another approach would expn += 2 for normal numbers. For sub-normals, shift the frac <<= 2 and handle cases that become normal.
Code is about 30 ops.
#include <stdint.h>
float x4(float x) {
// Use union to access the bits. Leap-of-faith here (float is 32 bits, endian)
union {
float f;
uint32_t u32;
} u;
u.f = x;
uint32_t expn = (u.u32 >> 23) & 0xFF;
uint32_t sign = u.u32 & 0x80000000;
uint32_t frac = u.u32 & 0x007FFFFF;
// Nan Inf
if (expn == 255) return u.f;
if (expn == 0) {
expn++; // Bring sub-normal into normal expo range
} else {
frac += 0x800000; // restore implied bit
}
// *4
frac <<= 2;
// normalize - this usually iterates twice, less for sub-normals
while (frac > 0xFFFFFF) {
expn++;
frac >>= 1; // 1's will not be shifted out as 2 LSB are 0 so no later rounding
}
// overflow to inf
if (expn >= 255) {
expn = 255;
frac = 0;
} else if (frac & 0x800000) {
frac ^= 0x800000; // clear implied bit
} else {
// still sub-normal
expn--; // should now be 0
}
u.u32 = sign | (expn << 23) | frac;
return u.f;
}

What's the best way to toggle the MSB?

So I want to toggle the most significant bit of my number. Here is an example:
x = 100101 then answer should be 00101
I have a 64 bit machine and hence I am not expecting the answer to be 100000..<51 0's>..100101
One way I thought of was to count the number of bits in my number and then toggle the MSB, but not sure on how to count.
The cheat is to pawn it off to the compiler: There are instructions in most CPUs for doing work like this.
The following should do what you want.
i ^ (1 << (sizeof i * CHAR_BIT - clz(i) - 1))
This will translate into the CLZ instruction, which counts the leading zeros.
For GCC, see: http://gcc.gnu.org/onlinedocs/gcc-4.1.2/gcc/Other-Builtins.html
One thing to be careful of is that this results in undefined behavior if i == 0.
You should replace clz() with the correct intrinsic for your compiler, In GCC this is __builtin_clz; in Visual Studio C++ this is _BitScanForward.
#jleahy has already posted a good option in case of using GCC, I would only leave here a generic implementation of clz which does not use any compiler intrinsics. However, it is not the optimal choice for CPUs which already have native instructions for counting bits (such as x86).
#define __bit_msb_mask(n) (~(~0x0ul >> (n))) /* n leftmost bits. */
/* Count leading zeroes. */
int clz(unsigned long x) {
int nr = 0;
int sh;
assert(x);
/* Hope that compiler optimizes out the sizeof check. */
if (sizeof(x) == 8) {
/* Suppress "shift count >= width of type" error in case
* when sizeof(x) is NOT 8, i.e. when it is a dead code anyway. */
sh = !(x & __bit_msb_mask(sizeof(x)*8/2)) << 5;
nr += sh; x <<= sh;
}
sh = !(x & __bit_msb_mask(1 << 4)) << 4; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 3)) << 3; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 2)) << 2; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 1)) << 1; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 0)) << 0; nr += sh;
return nr;
}
Using this function one can toggle the most significant set bit (assuming there is such one) as follows:
x ^= 1ul << (sizeof(x)*8 - clz(x))
Here's an approach using a lookup table, assuming CHAR_BIT == 8:
uint32_t toggle_msb(uint32_t n)
{
static unsigned char const lookup[] =
{ 1, 0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 };
for (unsigned int i = 0; i != sizeof n; ++i)
{
// omit the last bit for big-endian machines: ---VVVVVVVVVVVVVVVVVV
unsigned char * p
= reinterpret_cast<unsigned char *>(&n) + sizeof n - i - 1;
if (*p / 16 != 0) { *p = *p % 16 + (lookup[*p / 16] * 16); return n; }
if (*p % 16 != 0) { *p = 16 * (*p / 16) + lookup[*p % 16]; return n; }
}
return 1;
}
And to just put it all together in some sample code for GCC:
#include <stdio.h>
#define clz(x) __builtin_clz(x)
int main()
{
int i = 411; /* 110011011 */
if( i != 0 )
i ^= (1 << (sizeof(i)*8 - clz(i)-1));
/* i is now 10011011 */
printf("i = %d\n", i);
return(0);
}

reverse the bits using bit field in c language?

how to reverse the bits using bit wise operators in c language
Eg:
i/p: 10010101
o/p: 10101001
If it's just 8 bits:
u_char in = 0x95;
u_char out = 0;
for (int i = 0; i < 8; ++i) {
out <<= 1;
out |= (in & 0x01);
in >>= 1;
}
Or for bonus points:
u_char in = 0x95;
u_char out = in;
out = (out & 0xaa) >> 1 | (out & 0x55) << 1;
out = (out & 0xcc) >> 2 | (out & 0x33) << 2;
out = (out & 0xf0) >> 4 | (out & 0x0f) << 4;
figuring out how the last one works is an exercise for the reader ;-)
Knuth has a section on Bit reversal in The Art of Computer Programming Vol 4A, bitwise tricks and techniques.
To reverse the bits of a 32 bit number in a divide and conquer fashion he uses magic constants
u0= 1010101010101010, (from -1/(2+1)
u1= 0011001100110011, (from -1/(4+1)
u2= 0000111100001111, (from -1/(16+1)
u3= 0000000011111111, (from -1/(256+1)
Method credited to Henry Warren Jr., Hackers delight.
unsigned int u0 = 0x55555555;
x = (((x >> 1) & u0) | ((x & u0) << 1));
unsigned int u1 = 0x33333333;
x = (((x >> 2) & u1) | ((x & u1) << 2));
unsigned int u2 = 0x0f0f0f0f;
x = (((x >> 4) & u2) | ((x & u2) << 4));
unsigned int u3 = 0x00ff00ff;
x = (((x >> 8) & u3) | ((x & u3) << 8));
x = ((x >> 16) | (x << 16) mod 0x100000000); // reversed
The 16 and 8 bit cases are left as an exercise to the reader.
Well, this might not be the most elegant solution but it is a solution:
int reverseBits(int x) {
int res = 0;
int len = sizeof(x) * 8; // no of bits to reverse
int i, shift, mask;
for(i = 0; i < len; i++) {
mask = 1 << i; //which bit we are at
shift = len - 2*i - 1;
mask &= x;
mask = (shift > 0) ? mask << shift : mask >> -shift;
res |= mask; // mask the bit we work at at shift it to the left
}
return res;
}
Tested it on a sheet of paper and it seemed to work :D
Edit: Yeah, this is indeed very complicated. I dunno why, but I wanted to find a solution without touching the input, so this came to my haead

C - Serialization of the floating point numbers (floats, doubles)

How to convert a floating point number into a sequence of bytes so that it can be persisted in a file? Such algorithm must be fast and highly portable. It must allow also the opposite operation, deserialization. It would be nice if only very tiny excess of bits per value (persistent space) is required.
Assuming you're using mainstream compilers, floating point values in C and C++ obey the IEEE standard and when written in binary form to a file can be recovered in any other platform, provided that you write and read using the same byte endianess. So my suggestion is: pick an endianess of choice, and before writing or after reading, check if that endianess is the same as in the current platform; if not, just swap the bytes.
This might give you a good start - it packs a floating point value into an int and long long pair, which you can then serialise in the usual way.
#define FRAC_MAX 9223372036854775807LL /* 2**63 - 1 */
struct dbl_packed
{
int exp;
long long frac;
};
void pack(double x, struct dbl_packed *r)
{
double xf = fabs(frexp(x, &r->exp)) - 0.5;
if (xf < 0.0)
{
r->frac = 0;
return;
}
r->frac = 1 + (long long)(xf * 2.0 * (FRAC_MAX - 1));
if (x < 0.0)
r->frac = -r->frac;
}
double unpack(const struct dbl_packed *p)
{
double xf, x;
if (p->frac == 0)
return 0.0;
xf = ((double)(llabs(p->frac) - 1) / (FRAC_MAX - 1)) / 2.0;
x = ldexp(xf + 0.5, p->exp);
if (p->frac < 0)
x = -x;
return x;
}
You could always convert to IEEE-754 format in a fixed byte order (either little endian or big endian). For most machines, that would require either nothing at all or a simple byte swap to serialize and deserialize. A machine that doesn't support IEEE-754 natively will need a converter written, but doing that with ldexp and frexp (standard C library functions)and bit shuffling is not too tough.
What do you mean, "portable"?
For portability, remember to keep the numbers within the limits defined in the Standard: use a single number outside these limits, and there goes all portability down the drain.
double planck_time = 5.39124E-44; /* second */
5.2.4.2.2 Characteristics of floating types <float.h>
[...]
10 The values given in the following list shall be replaced by constant
expressions with implementation-defined values [...]
11 The values given in the following list shall be replaced by constant
expressions with implementation-defined values [...]
12 The values given in the following list shall be replaced by constant
expressions with implementation-defined (positive) values [...]
[...]
Note the implementation-defined in all these clauses.
Converting to an ascii representation would be the simplest, but if you need to deal with a colossal number of floats, then of course you should go binary. But this can be a tricky issue if you care about portability. Floating point numbers are represented differently in different machines.
If you don't want to use a canned library, then your float-binary serializer/deserializer will simply have to have "a contract" on where each bit lands and what it represents.
Here's a fun website to help with that: link.
sprintf, fprintf ? you don't get any more portable than that.
What level of portability do you require? If the file is to be read on a computer with the same OS that it was generated on, than you using a binary file and just saving and restoring the bit pattern should work. Otherwise as boytheo said, ASCII is your friend.
This version has excess of only one byte per one floating point value to indicate the endianness. But I think, it is still not very portable however.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define LITEND 'L'
#define BIGEND 'B'
typedef short INT16;
typedef int INT32;
typedef double vec1_t;
typedef struct {
FILE *fp;
} WFILE, RFILE;
#define w_byte(c, p) putc((c), (p)->fp)
#define r_byte(p) getc((p)->fp)
static void w_vec1(vec1_t v1_Val, WFILE *p)
{
INT32 i;
char *pc_Val;
pc_Val = (char *)&v1_Val;
w_byte(LITEND, p);
for (i = 0; i<sizeof(vec1_t); i++)
{
w_byte(pc_Val[i], p);
}
}
static vec1_t r_vec1(RFILE *p)
{
INT32 i;
vec1_t v1_Val;
char c_Type,
*pc_Val;
pc_Val = (char *)&v1_Val;
c_Type = r_byte(p);
if (c_Type==LITEND)
{
for (i = 0; i<sizeof(vec1_t); i++)
{
pc_Val[i] = r_byte(p);
}
}
return v1_Val;
}
int main(void)
{
WFILE x_FileW,
*px_FileW = &x_FileW;
RFILE x_FileR,
*px_FileR = &x_FileR;
vec1_t v1_Val;
INT32 l_Val;
char *pc_Val = (char *)&v1_Val;
INT32 i;
px_FileW->fp = fopen("test.bin", "w");
v1_Val = 1234567890.0987654321;
printf("v1_Val before write = %.20f \n", v1_Val);
w_vec1(v1_Val, px_FileW);
fclose(px_FileW->fp);
px_FileR->fp = fopen("test.bin", "r");
v1_Val = r_vec1(px_FileR);
printf("v1_Val after read = %.20f \n", v1_Val);
fclose(px_FileR->fp);
return 0;
}
Here we go.
Portable IEEE 754 serialisation / deserialisation that should
work regardless of the machine's internal floating point
representation.
https://github.com/MalcolmMcLean/ieee754
/*
* read a double from a stream in ieee754 format regardless of host
* encoding.
* fp - the stream
* bigendian - set to if big bytes first, clear for little bytes
* first
*
*/
double freadieee754(FILE *fp, int bigendian)
{
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
/* read the data */
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
/* just reverse if not big-endian*/
if (!bigendian)
{
for (i = 0; i < 4; i++)
{
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
}
}
sign = buff[0] & 0x80 ? -1 : 1;
/* exponet in raw format*/
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
/* read inthe mantissa. Top bit is 0.5, the successive bits half*/
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
{
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
{
mask = 0x80;
maski++;
}
}
/* handle zero specially */
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
/* nans have exp 1024 and non-zero mantissa */
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
/*infinity*/
if (shift == 1024 && fnorm == 0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
}
if (shift > -1023)
{
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
}
else
{
/* denormalised numbers */
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return answer * sign;
}
}
/*
* write a double to a stream in ieee754 format regardless of host
* encoding.
* x - number to write
* fp - the stream
* bigendian - set to write big bytes first, elee write litle bytes
* first
* Returns: 0 or EOF on error
* Notes: different NaN types and negative zero not preserved.
* if the number is too big to represent it will become infinity
* if it is too small to represent it will become zero.
*/
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
/* zero (can't handle signed zero) */
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
/* infinity */
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
/* -infinity */
if (x < -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
/* put the data into two longs (for convenience) */
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}
fwrite(), fread()? You will likely want binary, and you cannot pack the bytes any tighter unless you want to sacrifice precision which you would do in the program and then fwrite() fread() anyway; float a; double b; a=(float)b; fwrite(&a,1,sizeof(a),fp);
If you are carrying different floating point formats around they may not convert in a straight binary sense, so you may have to pick apart the bits and perform the math, this to the power that plus this, etc. IEEE 754 is a dreadful standard to use but widespread so it would minimize the effort.

Resources