C - Serialization of the floating point numbers (floats, doubles)

C - Serialization of the floating point numbers (floats, doubles) - c

How to convert a floating point number into a sequence of bytes so that it can be persisted in a file? Such algorithm must be fast and highly portable. It must allow also the opposite operation, deserialization. It would be nice if only very tiny excess of bits per value (persistent space) is required.

Assuming you're using mainstream compilers, floating point values in C and C++ obey the IEEE standard and when written in binary form to a file can be recovered in any other platform, provided that you write and read using the same byte endianess. So my suggestion is: pick an endianess of choice, and before writing or after reading, check if that endianess is the same as in the current platform; if not, just swap the bytes.

This might give you a good start - it packs a floating point value into an int and long long pair, which you can then serialise in the usual way.
#define FRAC_MAX 9223372036854775807LL /* 2**63 - 1 */
struct dbl_packed
{
int exp;
long long frac;
};
void pack(double x, struct dbl_packed *r)
{
double xf = fabs(frexp(x, &r->exp)) - 0.5;
if (xf < 0.0)
{
r->frac = 0;
return;
}
r->frac = 1 + (long long)(xf * 2.0 * (FRAC_MAX - 1));
if (x < 0.0)
r->frac = -r->frac;
}
double unpack(const struct dbl_packed *p)
{
double xf, x;
if (p->frac == 0)
return 0.0;
xf = ((double)(llabs(p->frac) - 1) / (FRAC_MAX - 1)) / 2.0;
x = ldexp(xf + 0.5, p->exp);
if (p->frac < 0)
x = -x;
return x;
}

You could always convert to IEEE-754 format in a fixed byte order (either little endian or big endian). For most machines, that would require either nothing at all or a simple byte swap to serialize and deserialize. A machine that doesn't support IEEE-754 natively will need a converter written, but doing that with ldexp and frexp (standard C library functions)and bit shuffling is not too tough.

What do you mean, "portable"?
For portability, remember to keep the numbers within the limits defined in the Standard: use a single number outside these limits, and there goes all portability down the drain.
double planck_time = 5.39124E-44; /* second */
5.2.4.2.2 Characteristics of floating types <float.h>
[...]
10 The values given in the following list shall be replaced by constant
expressions with implementation-defined values [...]
11 The values given in the following list shall be replaced by constant
expressions with implementation-defined values [...]
12 The values given in the following list shall be replaced by constant
expressions with implementation-defined (positive) values [...]
[...]
Note the implementation-defined in all these clauses.

Converting to an ascii representation would be the simplest, but if you need to deal with a colossal number of floats, then of course you should go binary. But this can be a tricky issue if you care about portability. Floating point numbers are represented differently in different machines.
If you don't want to use a canned library, then your float-binary serializer/deserializer will simply have to have "a contract" on where each bit lands and what it represents.
Here's a fun website to help with that: link.

sprintf, fprintf ? you don't get any more portable than that.

What level of portability do you require? If the file is to be read on a computer with the same OS that it was generated on, than you using a binary file and just saving and restoring the bit pattern should work. Otherwise as boytheo said, ASCII is your friend.

This version has excess of only one byte per one floating point value to indicate the endianness. But I think, it is still not very portable however.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define LITEND 'L'
#define BIGEND 'B'
typedef short INT16;
typedef int INT32;
typedef double vec1_t;
typedef struct {
FILE *fp;
} WFILE, RFILE;
#define w_byte(c, p) putc((c), (p)->fp)
#define r_byte(p) getc((p)->fp)
static void w_vec1(vec1_t v1_Val, WFILE *p)
{
INT32 i;
char *pc_Val;
pc_Val = (char *)&v1_Val;
w_byte(LITEND, p);
for (i = 0; i<sizeof(vec1_t); i++)
{
w_byte(pc_Val[i], p);
}
}
static vec1_t r_vec1(RFILE *p)
{
INT32 i;
vec1_t v1_Val;
char c_Type,
*pc_Val;
pc_Val = (char *)&v1_Val;
c_Type = r_byte(p);
if (c_Type==LITEND)
{
for (i = 0; i<sizeof(vec1_t); i++)
{
pc_Val[i] = r_byte(p);
}
}
return v1_Val;
}
int main(void)
{
WFILE x_FileW,
*px_FileW = &x_FileW;
RFILE x_FileR,
*px_FileR = &x_FileR;
vec1_t v1_Val;
INT32 l_Val;
char *pc_Val = (char *)&v1_Val;
INT32 i;
px_FileW->fp = fopen("test.bin", "w");
v1_Val = 1234567890.0987654321;
printf("v1_Val before write = %.20f \n", v1_Val);
w_vec1(v1_Val, px_FileW);
fclose(px_FileW->fp);
px_FileR->fp = fopen("test.bin", "r");
v1_Val = r_vec1(px_FileR);
printf("v1_Val after read = %.20f \n", v1_Val);
fclose(px_FileR->fp);
return 0;
}

Here we go.
Portable IEEE 754 serialisation / deserialisation that should
work regardless of the machine's internal floating point
representation.
https://github.com/MalcolmMcLean/ieee754
/*
* read a double from a stream in ieee754 format regardless of host
* encoding.
* fp - the stream
* bigendian - set to if big bytes first, clear for little bytes
* first
*
*/
double freadieee754(FILE *fp, int bigendian)
{
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
/* read the data */
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
/* just reverse if not big-endian*/
if (!bigendian)
{
for (i = 0; i < 4; i++)
{
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
}
}
sign = buff[0] & 0x80 ? -1 : 1;
/* exponet in raw format*/
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
/* read inthe mantissa. Top bit is 0.5, the successive bits half*/
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
{
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
{
mask = 0x80;
maski++;
}
}
/* handle zero specially */
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
/* nans have exp 1024 and non-zero mantissa */
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
/*infinity*/
if (shift == 1024 && fnorm == 0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
}
if (shift > -1023)
{
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
}
else
{
/* denormalised numbers */
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return answer * sign;
}
}
/*
* write a double to a stream in ieee754 format regardless of host
* encoding.
* x - number to write
* fp - the stream
* bigendian - set to write big bytes first, elee write litle bytes
* first
* Returns: 0 or EOF on error
* Notes: different NaN types and negative zero not preserved.
* if the number is too big to represent it will become infinity
* if it is too small to represent it will become zero.
*/
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
/* zero (can't handle signed zero) */
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
/* infinity */
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
/* -infinity */
if (x < -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
/* put the data into two longs (for convenience) */
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}

fwrite(), fread()? You will likely want binary, and you cannot pack the bytes any tighter unless you want to sacrifice precision which you would do in the program and then fwrite() fread() anyway; float a; double b; a=(float)b; fwrite(&a,1,sizeof(a),fp);
If you are carrying different floating point formats around they may not convert in a straight binary sense, so you may have to pick apart the bits and perform the math, this to the power that plus this, etc. IEEE 754 is a dreadful standard to use but widespread so it would minimize the effort.

Related

How to correctly implement multiply for floating point numbers (software FP)

My program is about a method which is given floats and in this method I want to multiply or add those floats. But not multiply like a * b, I want to break those floats down to their structure like the bit for the sign, the 8 bit for the exponent and the rest of the bits as the mantissa.
I want to implement / emulate software floating-point add and multiply (to learn more about what FP hardware has to do).
In the head of the program there are the breakdowns:
#define SIGN(x) (x>>31);
#define MANT(x) (x&0x7FFFFF);
#define EXPO(x) ((x>>23)&0xFF);
#define SPLIT(x, s, m, e) do { \
s = SIGN(x); \
m = MANT(x); \
e = EXPO(x); \
if ( e != 0x00 && e != 0xFF ) { \
m |= 0x800000; \
} \
} while ( 0 )
#define BUILD(x, s, m, e) do { \
x = (s << 31) | (e<<23) | (m&0x7FFFFF); \
} while ( 0 )
The main looks as follows:
float f = 2.3;
float g = 1.8;
float h = foo(&f, &g);
And the method for the calculation looks like:
float foo(float *a, float *b) {
uint32_t ia = *(unsigned int *)a;
uint32_t ib = *(unsigned int *)b;
uint32_t result = 0;
uint32_t signa, signb, signr;
uint32_t manta, mantb, mantr;
uint32_t expoa, expob, expor;
SPLIT(ia, signa, manta, expoa);
SPLIT(ib, signb, mantb, expob);
I already tried the multiply by adding the exponents and multiply their mantissas as follow:
expor = (expoa -127) + (expob -127) + 127;
mantr = (manta) * (mantb);
signr = signa ^ signb;
The return and rebuild of the new float:
BUILD(result, signr, mantr, expor);
return *(float *)&result;
The problem is now, that the result is wrong. the mantr even takes a very low negative Number (in case if foo gets 1.5 and 2.4 mantr takes -838860800 and the result is 2.0000000).

You can't just take truncate the result of the mantissa multiply, you need to take the top 24 bits (after using the low half for rounding) and renormalize (adjust the exponent).
Floating point operations keep the top significand bits. The most significant part of the integer product is the high bits; the low bits are further places after the decimal. (Terminology: it's a "binary point", not "decimal point", because binary floats use radix 2 (binary), not 10 (decimal).)
For normalized inputs, the implicit leading 1 in the input significands means the 32x32 => 64-bit uint64_t product that you use to implement 24 x 24 => 48-bit mantissa multiplication will have its high bit in one of 2 possible locations, so you don't need a bit-scan to find it. A compare or single-bit-test will do.
For subnormal inputs, that's not guaranteed so you need to check where the MSB is, e.g. with GNU C __builtin_clzll. (There are many special cases to handle for one or both inputs being subnormal, and/or the output being subnormal.)
See https://en.wikipedia.org/wiki/Single-precision_floating-point_format for more about the IEEE-754 binary32 format, including the implied leading 1 of the significand.
And see #njuffa's answer for an actual tested + working implementation that does 64-bit operations as two 32-bit halves for some reason, instead of letting C do that efficiently.
Also, return *(float *)&result; violates strict aliasing. It's only safe on MSVC. Use a union or memcpy for type punning in C99 / C11.

Emulating the multiplication of two IEEE-754 (2008) binary32 operands is a bit more complex than the question suggests. In general, we have to distinguish the following operand classes: zeros, subnormals (0 < |x| < 2-126), normals (2126 ≤ |x| < 2128), infinities, NaNs. Normals use biased exponents in [1, 254], while any of the special operand classes use biased exponents in {0, 255}. The following assumes we want to implement floating-point multiply with all floating-point exceptions masked, and using the round-to-nearest-to-even rounding mode.
First, we check whether any of the arguments belongs to a special operand class. If so, we check the special cases in sequence. If one of the arguments is a NaN, we turn that NaN into a QNaN and return it. If one of the operands is zero, we return an appropriately signed zero, unless the other argument is an infinity, in which case we return a special QNaN INDEFINITE since this is an invalid operation. After that we check for any argument of infinity, returning an appropriately signed infinity. This leaves subnormals, which we normalize. In case there are two subnormal arguments, we only need to normalize one of them as the result will underflow to zero.
The multiplication of normals proceeds as the asker envisioned in the question. The sign of the result is the exclusive-OR of the signs of the arguments, the exponent of the result is the sum of the exponents of the arguments (adjusted for exponent bias), and the significand of the result is generated from the product of the significant of the arguments. We need the full product for rounding. We can either use a 64-bit type for that, or represent it with a pair of 32-bit numbers. In the code below I have chose the latter representation. Rounding to nearest-or-even is straightforward: if we have a tie-case (the result is exactly in the middle between the closest two binary32 number), we need to round up if the least significant bit of the mantissa is 1. Otherwise, we need to round up if the most significant discarded bit (the round bit) is 1.
Three cases need to be considered for the result, based on the result exponent prior to rounding: Exponent is in normal range, result overflows (too large in magnitude), or it underflows (too small in magnitude). In the first case, the result is a normal or infinity if overflow occurs during rounding. In the second case, the result is infinity. In the last case the result is either zero (severe underflow), a subnormal, or the smallest normal (if round-up occurs).
The following code, with a simple framework for light testing via gobs of random test cases and several thousand interesting patterns shows an exemplary ISO-C implementation written in a couple of hours for reasonable clarity and reasonable performance. I let the test framework run for an hour or so on an x64 platform and no errors were reported. If you plan to use the code in production, you would want to construct a more stringent test framework, and may need additional performance tuning.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#define FLOAT_MANT_BITS (23)
#define FLOAT_EXPO_BITS (8)
#define FLOAT_EXPO_BIAS (127)
#define FLOAT_MANT_MASK (~((~0u) << (FLOAT_MANT_BITS+1))) /* incl. integer bit */
#define EXPO_ADJUST (1) /* adjustment for performance reasons */
#define MIN_NORM_EXPO (1) /* minimum biased exponent of normals */
#define MAX_NORM_EXPO (254) /* maximum biased exponent of normals */
#define INF_EXPO (255) /* biased exponent of infinities */
#define EXPO_MASK (~((~0u) << FLOAT_EXPO_BITS))
#define FLOAT_SIGN_MASK (0x80000000u)
#define FLOAT_IMPLICIT_BIT (1 << FLOAT_MANT_BITS)
#define RND_BIT_SHIFT (31)
#define RND_BIT_MASK (1u << RND_BIT_SHIFT)
#define FLOAT_INFINITY (0x7f800000)
#define FLOAT_INDEFINITE (0xffc00000u)
#define MANT_LSB (0x00000001)
#define FLOAT_QNAN_BIT (0x00400000)
#define MAX_SHIFT (FLOAT_MANT_BITS + 2)
uint32_t fp32_mul_core (uint32_t a, uint32_t b)
{
uint64_t prod;
uint32_t expoa, expob, manta, mantb, shift;
uint32_t r, signr, expor, mantr_hi, mantr_lo;
/* split arguments into sign, exponent, significand */
expoa = ((a >> FLOAT_MANT_BITS) & EXPO_MASK) - EXPO_ADJUST;
expob = ((b >> FLOAT_MANT_BITS) & EXPO_MASK) - EXPO_ADJUST;
manta = (a | FLOAT_IMPLICIT_BIT) & FLOAT_MANT_MASK;
mantb = (b | FLOAT_IMPLICIT_BIT) & FLOAT_MANT_MASK;
/* result sign bit: XOR sign argument signs */
signr = (a ^ b) & FLOAT_SIGN_MASK;
if ((expoa >= (MAX_NORM_EXPO - EXPO_ADJUST)) || /* at least one argument is special */
(expob >= (MAX_NORM_EXPO - EXPO_ADJUST))) {
if ((a & ~FLOAT_SIGN_MASK) > FLOAT_INFINITY) { /* a is NaN */
/* return quietened NaN */
return a | FLOAT_QNAN_BIT;
}
if ((b & ~FLOAT_SIGN_MASK) > FLOAT_INFINITY) { /* b is NaN */
/* return quietened NaN */
return b | FLOAT_QNAN_BIT;
}
if ((a & ~FLOAT_SIGN_MASK) == 0) { /* a is zero */
/* return NaN if b is infinity, else zero */
return (expob != (INF_EXPO - EXPO_ADJUST)) ? signr : FLOAT_INDEFINITE;
}
if ((b & ~FLOAT_SIGN_MASK) == 0) { /* b is zero */
/* return NaN if a is infinity, else zero */
return (expoa != (INF_EXPO - EXPO_ADJUST)) ? signr : FLOAT_INDEFINITE;
}
if (((a & ~FLOAT_SIGN_MASK) == FLOAT_INFINITY) || /* a or b infinity */
((b & ~FLOAT_SIGN_MASK) == FLOAT_INFINITY)) {
return signr | FLOAT_INFINITY;
}
if ((int32_t)expoa < (MIN_NORM_EXPO - EXPO_ADJUST)) { /* a is subnormal */
/* normalize significand of a */
manta = a & FLOAT_MANT_MASK;
expoa++;
do {
manta = 2 * manta;
expoa--;
} while (manta < FLOAT_IMPLICIT_BIT);
} else if ((int32_t)expob < (MIN_NORM_EXPO - EXPO_ADJUST)) { /* b is subnormal */
/* normalize significand of b */
mantb = b & FLOAT_MANT_MASK;
expob++;
do {
mantb = 2 * mantb;
expob--;
} while (mantb < FLOAT_IMPLICIT_BIT);
}
}
/* result exponent: add argument exponents and adjust for biasing */
expor = expoa + expob - FLOAT_EXPO_BIAS + 2 * EXPO_ADJUST;
mantb = mantb << FLOAT_EXPO_BITS; /* preshift to align result signficand */
/* result significand: multiply argument signficands */
prod = (uint64_t)manta * mantb;
mantr_hi = (uint32_t)(prod >> 32);
mantr_lo = (uint32_t)(prod >> 0);
/* normalize significand */
if (mantr_hi < FLOAT_IMPLICIT_BIT) {
mantr_hi = (mantr_hi << 1) | (mantr_lo >> (32 - 1));
mantr_lo = (mantr_lo << 1);
expor--;
}
if (expor <= (MAX_NORM_EXPO - EXPO_ADJUST)) { /* normal, may overflow to infinity during rounding */
/* combine biased exponent, sign and signficand */
r = (expor << FLOAT_MANT_BITS) + signr + mantr_hi;
/* round result to nearest or even; overflow to infinity possible */
r = r + ((mantr_lo == RND_BIT_MASK) ? (mantr_hi & MANT_LSB) : (mantr_lo >> RND_BIT_SHIFT));
} else if ((int32_t)expor > (MAX_NORM_EXPO - EXPO_ADJUST)) { /* overflow */
/* return infinity */
r = signr | FLOAT_INFINITY;
} else { /* underflow */
/* return zero, normal, or smallest subnormal */
shift = 0 - expor;
if (shift > MAX_SHIFT) shift = MAX_SHIFT;
/* denormalize significand */
mantr_lo = mantr_hi << (32 - shift) | (mantr_lo ? 1 : 0);
mantr_hi = mantr_hi >> shift;
/* combine sign and signficand; biased exponent known to be zero */
r = mantr_hi + signr;
/* round result to nearest or even */
r = r + ((mantr_lo == RND_BIT_MASK) ? (mantr_hi & MANT_LSB) : (mantr_lo >> RND_BIT_SHIFT));
}
return r;
}
uint32_t float_as_uint (float a)
{
uint32_t r;
memcpy (&r, &a, sizeof r);
return r;
}
float uint_as_float (uint32_t a)
{
float r;
memcpy (&r, &a, sizeof r);
return r;
}
float fp32_mul (float a, float b)
{
return uint_as_float (fp32_mul_core (float_as_uint (a), float_as_uint (b)));
}
/* Fixes via: Greg Rose, KISS: A Bit Too Simple. http://eprint.iacr.org/2011/007 */
static unsigned int z=362436069,w=521288629,jsr=362436069,jcong=123456789;
#define znew (z=36969*(z&0xffff)+(z>>16))
#define wnew (w=18000*(w&0xffff)+(w>>16))
#define MWC ((znew<<16)+wnew)
#define SHR3 (jsr^=(jsr<<13),jsr^=(jsr>>17),jsr^=(jsr<<5)) /* 2^32-1 */
#define CONG (jcong=69069*jcong+13579) /* 2^32 */
#define KISS ((MWC^CONG)+SHR3)
#define ISNAN(x) ((float_as_uint (x) << 1) > 0xff000000)
#define QNAN(x) (x | FLOAT_QNAN_BIT)
#define PURELY_RANDOM (0)
#define PATTERN_BASED (1)
#define TEST_MODE (PURELY_RANDOM)
uint32_t v[8192];
int main (void)
{
unsigned long long count = 0;
float a, b, res, ref;
uint32_t i, j, patterns, idx = 0, nbrBits = sizeof (uint32_t) * CHAR_BIT;
/* pattern class 1: 2**i */
for (i = 0; i < nbrBits; i++) {
v [idx] = ((uint32_t)1 << i);
idx++;
}
/* pattern class 2: 2**i-1 */
for (i = 0; i < nbrBits; i++) {
v [idx] = (((uint32_t)1 << i) - 1);
idx++;
}
/* pattern class 3: 2**i+1 */
for (i = 0; i < nbrBits; i++) {
v [idx] = (((uint32_t)1 << i) + 1);
idx++;
}
/* pattern class 4: 2**i + 2**j */
for (i = 0; i < nbrBits; i++) {
for (j = 0; j < nbrBits; j++) {
v [idx] = (((uint32_t)1 << i) + ((uint32_t)1 << j));
idx++;
}
}
/* pattern class 5: 2**i - 2**j */
for (i = 0; i < nbrBits; i++) {
for (j = 0; j < nbrBits; j++) {
v [idx] = (((uint32_t)1 << i) - ((uint32_t)1 << j));
idx++;
}
}
/* pattern class 6: MAX_UINT/(2**i+1) rep. blocks of i zeros an i ones */
for (i = 0; i < nbrBits; i++) {
v [idx] = ((~(uint32_t)0) / (((uint32_t)1 << i) + 1));
idx++;
}
patterns = idx;
/* pattern class 6: one's complement of pattern classes 1 through 5 */
for (i = 0; i < patterns; i++) {
v [idx] = ~v [i];
idx++;
}
/* pattern class 7: two's complement of pattern classes 1 through 5 */
for (i = 0; i < patterns; i++) {
v [idx] = ~v [i] + 1;
idx++;
}
patterns = idx;
#if TEST_MODE == PURELY_RANDOM
printf ("using purely random test vectors\n");
#elif TEST_MODE == PATTERN_BASED
printf ("using pattern-based test vectors\n");
printf ("#patterns = %u\n", patterns);
#endif // TEST_MODE
do {
#if TEST_MODE == PURELY_RANDOM
a = uint_as_float (KISS);
b = uint_as_float (KISS);
#elif TEST_MODE == PATTERN_BASED
i = KISS % patterns;
j = KISS % patterns;
a = uint_as_float ((v[i] & 0x7fffff) | (KISS & ~0x7fffff));
b = uint_as_float ((v[j] & 0x7fffff) | (KISS & ~0x7fffff));
#endif // TEST_MODE
res = fp32_mul (a, b);
ref = a * b;
/* check for bit pattern mismatch between result and reference */
if (float_as_uint (res) != float_as_uint (ref)) {
/* if both a and b are NaNs, either could be returned quietened */
if (! (ISNAN (a) && ISNAN (b) &&
((QNAN (float_as_uint (a)) == float_as_uint (res)) ||
(QNAN (float_as_uint (b)) == float_as_uint (res))))) {
printf ("err: a=% 15.8e (%08x) b=% 15.8e (%08x) res=% 15.8e (%08x) ref=%15.8e (%08x)\n",
a, float_as_uint(a), b, float_as_uint (b), res, float_as_uint (res), ref, float_as_uint (ref));
return EXIT_FAILURE;
}
}
count++;
if (!(count & 0xffffff)) printf ("\r%llu", count);
} while (1);
return EXIT_SUCCESS;
}

It is much more complicated. Take a look on the source of the softmath library (for example https://github.com/riscv/riscv-pk/blob/master/softfloat/f64_mul.c). Clone it and analyze.

8 byte double as binary string to uint64_t

Im looking for way to convert 8 byte double to uint64_t. I cant use any standard library becouse of that there is only 4byte double in my solution.
This conversion should convert 10987789.5 to 10987789 as int.
Conversion that I use right now:
uint64_t binDoubleToUint64_t(char *bit){
uint8_t i, j;
uint64_t fraction;
for(i=0; i<64; i++)
bit[i]-='0';
uint16_t exponent = bit[1] ? 1 : 0;
j = 0;
for(i=9; i>0;i--)
exponent += bit[i+2] * int_pow(2, j++);
bit[11] = bit[1];
fraction = 0;
j=0;
for(i=0; i < exponent; i++){
fraction = fraction << 1;
if(bit[11+i])
fraction |= 1 << 1;
}
return fraction;
}
But this give me wrong answers.
While I try to convert double 10225203.0 (0x416380c660000000) it returns 10225202 (should 10225203)

Can you read the bit values straight in as a uint64_t. Then the code might look something like this:
uint64_t binDoubleToUint64_t (uint64_t in) {
if (!(in & 0x4000000000000000) || in & 0x800000000000000000) {
/* If the exponent isn't big enough to give a value greater than 1
* or our number is negative return 0.
*/
return 0;
}
uint32_t exponent = ((in & 0x7FF0000000000000) >> 52) - 1023;
// get the mantissa including the imagined bit.
uint64_t mantissa = (in & 0xFFFFFFFFFFFFF) | 0x10000000000000;
// Now we just need to work out how much to shift the mantissa by.
/* You may notice that the top bit of the mantissa is actually at 53 once
you put the imagined bit back in, mantissaTopBit is really
floor(log2(mantissa)) which is 52 (i.e. the power of 2 of the position
that the top bit is in). I couldn't think of a good name for this, so just
imagine that you started counting from 0 instead of 1 if you like!
*/
uint32_t mantissaTopBit = 52;
if (mantissaTopBit > exponent)
return mantissa >> mantissaTopBit - exponent;
else {
if (exponent - mantissaTopBit > 12) {
//You're in trouble as your double doesn't fit into an uint64_t
}
return mantissa << exponent - mantissaTopBit;
}
}
This has been written from my memory of the floating point spec (I haven't checked all the values) so you may want to check the values given. It works for your examples, but you may want to check that I've put the right number of '0's in everywhere.

/*
* write a double to a stream in ieee754 format regardless of host
* encoding.
* x - number to write
* fp - the stream
* bigendian - set to write big bytes first, else write little bytes first
* Returns: 0 or EOF on error
* Notes: different NaN types and negative zero not preserved.
* if the number is too big to represent it will become infinity
* if it is too small to represent it will become zero.
*/
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
/* zero (can't handle signed zero) */
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
/* infinity */
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
/* -infinity */
if (x < -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
/* put the data into two longs (for convenience) */
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}
You can trivially modify this function to do what you want.
https://github.com/MalcolmMcLean/ieee754

Converting 32-bit and 64-bit numbers to IEEE 754 binary in C

I am currently working on a program where I need to have this kind of output:
I have to output the binary in IEEE 754 of 64 and 32-bit numbers in C.
I already have the double and single floating point approximation, but I'm having trouble finding out how to output the binary of these in IEEE 754 notation, and color code them as well. Any thoughts/solutions on how to do this would be much appreciated.

This does not guarantee the correct answer if the underlying machine is something esoteric, however:
float f = 3.14;
uint32_t u;
memcpy(&u, &f, sizeof u);
for (int i = 31; i >= 0; i--)
putchar('0' + ((u >> i) & 1));

I decided to take the opportunity to refresh my memory of the IEE-754 floating-point standard. Below is a mashup I made for displaying a string in its single-precision floating point number representation, though it is easily modified for the double-precision format.
The code won't work with +Inf, -Inf, NaN, trailing-zero, fractionless and leftout-zero (.fraction instead of 0.fraction or integer. instead of integer.0) numbers, it's just supposed to give the general idea of how to do what you want to do in a portable and well-defined (and highly entertaining) way.
#define EXPLEN 8 /* Fraction length for single-precision */
#define SIGNIFLEN 23 /* Significand length for single-precision */
#define EXPBIAS 0x7F /* Exponent bias for single-precision */
#define BITLEN (1 + EXPLEN + SIGNIFLEN)
BOOL strToFloat(char *floatStr, char *outBits, size_t outBitsLen){
unsigned long int floatStrLength = strlen(floatStr), intPart, fracPart, intPartHighestBit = 1, fracPartLength,
fracPartPowTen = 1, temp;
char roundBit, stickyBit, expPart = 0;
int i;
/* Get sign */
if (floatStr[0] == '-'){
floatStr++;
outBits[0] = '1';
} else {
if (floatStr[0] == '+')
floatStr++;
outBits[0] = '0';
}
if (sscanf(floatStr, "%lu.%lu", &intPart, &fracPart) == EOF ||
outBitsLen < BITLEN + 1)
return FALSE; /* Failure */
/* Get integer part */
temp = intPart;
while (temp >>= 1)
intPartHighestBit <<= 1;
for (i = EXPLEN + 1; i < BITLEN && (intPartHighestBit >>= 1); i++, expPart++)
outBits[i] = !!(intPart & intPartHighestBit) + '0';
/* Get fraction part */
fracPartLength = strlen(strchr(floatStr, '.'));
while (--fracPartLength)
fracPartPowTen *= 10;
if (!intPart && i == EXPLEN + 1)
if (fracPart > 0){
i--;
expPart--;
} else
expPart = -EXPBIAS;
for (; i < BITLEN; fracPart = (fracPart << 1) % fracPartPowTen){
outBits[i] = !!((fracPart << 1) - (fracPart << 1) % fracPartPowTen) + '0';
if (outBits[i] == '0' && i == EXPLEN) /* Start writing only after first set bit is reached if number <1 */
expPart--;
else
i++;
}
/* Get exponent part */
for (i = EXPLEN, expPart += EXPBIAS; i > 0; i--, expPart >>= 1)
outBits[i] = (unsigned char)expPart % 2 + '0';
/* Round fraction part (to-nearest mode) */
if ((fracPart << 1) - (fracPart << 1) % fracPartPowTen){ /* Guard bit set, rounding needed */
fracPart = (fracPart << 1) % fracPartPowTen;
roundBit = !!((fracPart << 1) - (fracPart << 1) % fracPartPowTen);
fracPart = (fracPart << 1) % fracPartPowTen;
stickyBit = !!((fracPart << 1) - (fracPart << 1) % fracPartPowTen);
if (roundBit || stickyBit || outBits[BITLEN - 1] == '0'){ /* Round up, add 1 to mantissa (and to exponent
if mantissa overflows)*/
for (i = BITLEN - 1; outBits[i] == '1' && i > 0; i--)
outBits[i] = '0';
outBits[i] = '1';
}
}
outBits[BITLEN] = '\0';
return TRUE; /* Success */
}
Example usage:
char *str = "-3.14",
*outFloat = malloc(BITLEN + 1);
if (outFloat && strToFloat(str, outFloat, BITLEN + 1))
printf("%s", outFloat);
outputs
11000000010010001111010111000011
UPDATE: did my best to
remove magic numbers so it's easier to change this to use the double-precision format;
fix (I think) the rounding overflows;
fix zeroes issues;
refactor the code for setting the sign bit; and I also fiddled with some types, both per #Segmented's request in the comments.
Well, that was lots of fun! If you see any errors or space for improvement in this (rather hasty) code, please post it!

What's the best way to toggle the MSB?

So I want to toggle the most significant bit of my number. Here is an example:
x = 100101 then answer should be 00101
I have a 64 bit machine and hence I am not expecting the answer to be 100000..<51 0's>..100101
One way I thought of was to count the number of bits in my number and then toggle the MSB, but not sure on how to count.

The cheat is to pawn it off to the compiler: There are instructions in most CPUs for doing work like this.
The following should do what you want.
i ^ (1 << (sizeof i * CHAR_BIT - clz(i) - 1))
This will translate into the CLZ instruction, which counts the leading zeros.
For GCC, see: http://gcc.gnu.org/onlinedocs/gcc-4.1.2/gcc/Other-Builtins.html
One thing to be careful of is that this results in undefined behavior if i == 0.
You should replace clz() with the correct intrinsic for your compiler, In GCC this is __builtin_clz; in Visual Studio C++ this is _BitScanForward.

#jleahy has already posted a good option in case of using GCC, I would only leave here a generic implementation of clz which does not use any compiler intrinsics. However, it is not the optimal choice for CPUs which already have native instructions for counting bits (such as x86).
#define __bit_msb_mask(n) (~(~0x0ul >> (n))) /* n leftmost bits. */
/* Count leading zeroes. */
int clz(unsigned long x) {
int nr = 0;
int sh;
assert(x);
/* Hope that compiler optimizes out the sizeof check. */
if (sizeof(x) == 8) {
/* Suppress "shift count >= width of type" error in case
* when sizeof(x) is NOT 8, i.e. when it is a dead code anyway. */
sh = !(x & __bit_msb_mask(sizeof(x)*8/2)) << 5;
nr += sh; x <<= sh;
}
sh = !(x & __bit_msb_mask(1 << 4)) << 4; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 3)) << 3; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 2)) << 2; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 1)) << 1; nr += sh; x <<= sh;
sh = !(x & __bit_msb_mask(1 << 0)) << 0; nr += sh;
return nr;
}
Using this function one can toggle the most significant set bit (assuming there is such one) as follows:
x ^= 1ul << (sizeof(x)*8 - clz(x))

Here's an approach using a lookup table, assuming CHAR_BIT == 8:
uint32_t toggle_msb(uint32_t n)
{
static unsigned char const lookup[] =
{ 1, 0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 };
for (unsigned int i = 0; i != sizeof n; ++i)
{
// omit the last bit for big-endian machines: ---VVVVVVVVVVVVVVVVVV
unsigned char * p
= reinterpret_cast<unsigned char *>(&n) + sizeof n - i - 1;
if (*p / 16 != 0) { *p = *p % 16 + (lookup[*p / 16] * 16); return n; }
if (*p % 16 != 0) { *p = 16 * (*p / 16) + lookup[*p % 16]; return n; }
}
return 1;
}

And to just put it all together in some sample code for GCC:
#include <stdio.h>
#define clz(x) __builtin_clz(x)
int main()
{
int i = 411; /* 110011011 */
if( i != 0 )
i ^= (1 << (sizeof(i)*8 - clz(i)-1));
/* i is now 10011011 */
printf("i = %d\n", i);
return(0);
}

how to make a bit-set/byte-array conversion in c

Given an array,
unsigned char q[32]="1100111...",
how can I generate a 4-bytes bit-set, unsigned char p[4], such that, the bit of this bit-set, equals to value inside the array, e.g., the first byte p[0]= "q[0] ... q[7]"; 2nd byte p[1]="q[8] ... q[15]", etc.
and also how to do it in opposite, i.e., given bit-set, generate the array?
my own trial out for the first part.
unsigned char p[4]={0};
for (int j=0; j<N; j++)
{
if (q[j] == '1')
{
p [j / 8] |= 1 << (7-(j % 8));
}
}
Is the above right? any conditions to check? Is there any better way?
EDIT - 1
I wonder if above is efficient way? As the array size could be upto 4096 or even more.

First, Use strtoul to get a 32-bit value. Then convert the byte order to big-endian with htonl. Finally, store the result in your array:
#include <arpa/inet.h>
#include <stdlib.h>
/* ... */
unsigned char q[32] = "1100111...";
unsigned char result[4] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));
There are other ways as well.
But I lack <arpa/inet.h>!
Then you need to know what byte order your platform is. If it's big endian, then htonl does nothing and can be omitted. If it's little-endian, then htonl is just:
unsigned long htonl(unsigned long x)
{
x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
return x;
}
If you're lucky, your optimizer might see what you're doing and make it into efficient code. If not, well, at least it's all implementable in registers and O(log N).
If you don't know what byte order your platform is, then you need to detect it:
typedef union {
char c[sizeof(int) / sizeof(char)];
int i;
} OrderTest;
unsigned long htonl(unsigned long x)
{
OrderTest test;
test.i = 1;
if(!test.c[0])
return x;
x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
return x;
}
Maybe long is 8 bytes!
Well, the OP implied 4-byte inputs with their array size, but 8-byte long is doable:
#define kCharsPerLong (sizeof(long) / sizeof(char))
unsigned char q[8 * kCharsPerLong] = "1100111...";
unsigned char result[kCharsPerLong] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));
unsigned long htonl(unsigned long x)
{
#if kCharsPerLong == 4
x = (x & 0xFF00FF00UL) >> 8) | (x & 0x00FF00FFUL) << 8);
x = (x & 0xFFFF0000UL) >> 16) | (x & 0x0000FFFFUL) << 16);
#elif kCharsPerLong == 8
x = (x & 0xFF00FF00FF00FF00UL) >> 8) | (x & 0x00FF00FF00FF00FFUL) << 8);
x = (x & 0xFFFF0000FFFF0000UL) >> 16) | (x & 0x0000FFFF0000FFFFUL) << 16);
x = (x & 0xFFFFFFFF00000000UL) >> 32) | (x & 0x00000000FFFFFFFFUL) << 32);
#else
#error Unsupported word size.
#endif
return x;
}
For char that isn't 8 bits (DSPs like to do this), you're on your own. (This is why it was a Big Deal when the SHARC series of DSPs had 8-bit bytes; it made it a LOT easier to port existing code because, face it, C does a horrible job of portability support.)
What about arbitrary length buffers? No funny pointer typecasts, please.
The main thing that can be improved with the OP's version is to rethink the loop's internals. Instead of thinking of the output bytes as a fixed data register, think of it as a shift register, where each successive bit is shifted into the right (LSB) end. This will save you from all those divisions and mods (which, hopefully, are optimized away to bit shifts).
For sanity, I'm ditching unsigned char for uint8_t.
#include <stdint.h>
unsigned StringToBits(const char* inChars, uint8_t* outBytes, size_t numBytes,
size_t* bytesRead)
/* Converts the string of '1' and '0' characters in `inChars` to a buffer of
* bytes in `outBytes`. `numBytes` is the number of available bytes in the
* `outBytes` buffer. On exit, if `bytesRead` is not NULL, the value it points
* to is set to the number of bytes read (rounding up to the nearest full
* byte). If a multiple of 8 bits is not read, the last byte written will be
* padded with 0 bits to reach a multiple of 8 bits. This function returns the
* number of padding bits that were added. For example, an input of 11 bits
* will result `bytesRead` being set to 2 and the function will return 5. This
* means that if a nonzero value is returned, then a partial byte was read,
* which may be an error.
*/
{ size_t bytes = 0;
unsigned bits = 0;
uint8_t x = 0;
while(bytes < numBytes)
{ /* Parse a character. */
switch(*inChars++)
{ '0': x <<= 1; ++bits; break;
'1': x = (x << 1) | 1; ++bits; break;
default: numBytes = 0;
}
/* See if we filled a byte. */
if(bits == 8)
{ outBytes[bytes++] = x;
x = 0;
bits = 0;
}
}
/* Padding, if needed. */
if(bits)
{ bits = 8 - bits;
outBytes[bytes++] = x << bits;
}
/* Finish up. */
if(bytesRead)
*bytesRead = bytes;
return bits;
}
It's your responsibility to make sure inChars is null-terminated. The function will return on the first non-'0' or '1' character it sees or if it runs out of output buffer. Some example usage:
unsigned char q[32] = "1100111...";
uint8_t buf[4];
size_t bytesRead = 5;
if(StringToBits(q, buf, 4, &bytesRead) || bytesRead != 4)
{
/* Partial read; handle error here. */
}
This just reads 4 bytes, and traps the error if it can't.
unsigned char q[4096] = "1100111...";
uint8_t buf[512];
StringToBits(q, buf, 512, NULL);
This just converts what it can and sets the rest to 0 bits.
This function could be done better if C had the ability to break out of more than one level of loop or switch; as it stands, I'd have to add a flag value to get the same effect, which is clutter, or I'd have to add a goto, which I simply refuse.

I don't think that will quite work. You are comparing each "bit" to 1 when it should really be '1'. You can also make it a bit more efficient by getting rid of the if:
unsigned char p[4]={0};
for (int j=0; j<32; j++)
{
p [j / 8] |= (q[j] == `1`) << (7-(j % 8));
}
Going in reverse is pretty simple too. Just mask for each "bit" that you set earlier.
unsigned char q[32]={0};
for (int j=0; j<32; j++) {
q[j] = p[j / 8] & ( 1 << (7-(j % 8)) ) + '0';
}
You'll notice the creative use of (boolean) + '0' to convert between 1/0 and '1'/'0'.

According to your example it does not look like you are going for readability, and after a (late) refresh my solution looks very similar to Chriszuma except for the lack of parenthesis due to order of operations and the addition of the !! to enforce a 0 or 1.
const size_t N = 32; //N must be a multiple of 8
unsigned char q[N+1] = "11011101001001101001111110000111";
unsigned char p[N/8] = {0};
unsigned char r[N+1] = {0}; //reversed
for(size_t i = 0; i < N; ++i)
p[i / 8] |= (q[i] == '1') << 7 - i % 8;
for(size_t i = 0; i < N; ++i)
r[i] = '0' + !!(p[i / 8] & 1 << 7 - i % 8);
printf("%x %x %x %x\n", p[0], p[1], p[2], p[3]);
printf("%s\n%s\n", q,r);

If you are looking for extreme efficiency, try to use the following techniques:
Replace if by subtraction of '0' (seems like you can assume your input symbols can be only 0 or 1).
Also process the input from lower indices to higher ones.
for (int c = 0; c < N; c += 8)
{
int y = 0;
for (int b = 0; b < 8; ++b)
y = y * 2 + q[c + b] - '0';
p[c / 8] = y;
}
Replace array indices by auto-incrementing pointers:
const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
int y = 0;
for (int b = 0; b < 8; ++b)
y = y * 2 + *qptr++ - '0';
*pptr++ = y;
}
Unroll the inner loop:
const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
*pptr++ =
qptr[0] - '0' << 7 |
qptr[1] - '0' << 6 |
qptr[2] - '0' << 5 |
qptr[3] - '0' << 4 |
qptr[4] - '0' << 3 |
qptr[5] - '0' << 2 |
qptr[6] - '0' << 1 |
qptr[7] - '0' << 0;
qptr += 8;
}
Process several input characters simultaneously (using bit twiddling hacks or MMX instructions) - this has great speedup potential!

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight