How do I handle byte order differences when reading/writing floating-point types in C? - c

I'm devising a file format for my application, and I'd obviously like for it to work on both big-endian and little-endian systems. I've already found working solutions for managing integral types using htonl and ntohl, but I'm a bit stuck when trying to do the same with float and double values.
Given the nature of how floating-point representations work, I would assume that the standard byte-order functions won't work on these values. Likewise, I'm not even entirely sure if endianness in the traditional sense is what governs the byte order of these types.
All I need is consistency. A way to write a double out, and ensure I get that same value when I read it back in. How can I do this in C?

Another option could be to use double frexp(double value, int *exp); from <math.h> (C99) to break down the floating-point value into a normalized fraction (in the range [0.5, 1)) and an integral power of 2. You can then multiply the fraction by FLT_RADIXDBL_MANT_DIG to get an integer in the range [FLT_RADIXDBL_MANT_DIG/2, FLT_RADIXDBL_MANT_DIG). Then you save both integers big- or little-endian, whichever you choose in your format.
When you load a saved number, you do the reverse operation and use double ldexp(double x, int exp); to multiply the reconstructed fraction by the power of 2.
This will work best when FLT_RADIX=2 (virtually all systems, I suppose?) and DBL_MANT_DIG<=64.
Care must be taken to avoid overflows.
Sample code for doubles:
#include <limits.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include <stdio.h>
#if CHAR_BIT != 8
#error currently supported only CHAR_BIT = 8
#endif
#if FLT_RADIX != 2
#error currently supported only FLT_RADIX = 2
#endif
#ifndef M_PI
#define M_PI 3.14159265358979324
#endif
typedef unsigned char uint8;
/*
10-byte little-endian serialized format for double:
- normalized mantissa stored as 64-bit (8-byte) signed integer:
negative range: (-2^53, -2^52]
zero: 0
positive range: [+2^52, +2^53)
- 16-bit (2-byte) signed exponent:
range: [-0x7FFE, +0x7FFE]
Represented value = mantissa * 2^(exponent - 53)
Special cases:
- +infinity: mantissa = 0x7FFFFFFFFFFFFFFF, exp = 0x7FFF
- -infinity: mantissa = 0x8000000000000000, exp = 0x7FFF
- NaN: mantissa = 0x0000000000000000, exp = 0x7FFF
- +/-0: only one zero supported
*/
void Double2Bytes(uint8 buf[10], double x)
{
double m;
long long im; // at least 64 bits
int ie;
int i;
if (isnan(x))
{
// NaN
memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\xFF\x7F", 10);
return;
}
else if (isinf(x))
{
if (signbit(x))
// -inf
memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10);
else
// +inf
memcpy(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10);
return;
}
// Split double into normalized mantissa (range: (-1, -0.5], 0, [+0.5, +1))
// and base-2 exponent
m = frexp(x, &ie); // x = m * 2^ie exactly for FLT_RADIX=2
// frexp() can't fail
// Extract most significant 53 bits of mantissa as integer
m = ldexp(m, 53); // can't overflow because
// DBL_MAX_10_EXP >= 37 equivalent to DBL_MAX_2_EXP >= 122
im = trunc(m); // exact unless DBL_MANT_DIG > 53
// If the exponent is too small or too big, reduce the number to 0 or
// +/- infinity
if (ie > 0x7FFE)
{
if (im < 0)
// -inf
memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10);
else
// +inf
memcpy(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10);
return;
}
else if (ie < -0x7FFE)
{
// 0
memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00", 10);
return;
}
// Store im as signed 64-bit little-endian integer
for (i = 0; i < 8; i++, im >>= 8)
buf[i] = (uint8)im;
// Store ie as signed 16-bit little-endian integer
for (i = 8; i < 10; i++, ie >>= 8)
buf[i] = (uint8)ie;
}
void Bytes2Double(double* x, const uint8 buf[10])
{
unsigned long long uim; // at least 64 bits
long long im; // ditto
unsigned uie;
int ie;
double m;
int i;
int negative = 0;
int maxe;
if (!memcmp(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\xFF\x7F", 10))
{
#ifdef NAN
*x = NAN;
#else
*x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
return;
}
if (!memcmp(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10))
{
*x = -INFINITY;
return;
}
else if (!memcmp(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10))
{
*x = INFINITY;
return;
}
// Load im as signed 64-bit little-endian integer
uim = 0;
for (i = 0; i < 8; i++)
{
uim >>= 8;
uim |= (unsigned long long)buf[i] << (64 - 8);
}
if (uim <= 0x7FFFFFFFFFFFFFFFLL)
im = uim;
else
im = (long long)(uim - 0x7FFFFFFFFFFFFFFFLL - 1) - 0x7FFFFFFFFFFFFFFFLL - 1;
// Obtain the absolute value of the mantissa, make sure it's
// normalized and fits into 53 bits, else the input is invalid
if (im > 0)
{
if (im < (1LL << 52) || im >= (1LL << 53))
{
#ifdef NAN
*x = NAN;
#else
*x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
return;
}
}
else if (im < 0)
{
if (im > -(1LL << 52) || im <= -(1LL << 53))
{
#ifdef NAN
*x = NAN;
#else
*x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
return;
}
negative = 1;
im = -im;
}
// Load ie as signed 16-bit little-endian integer
uie = 0;
for (i = 8; i < 10; i++)
{
uie >>= 8;
uie |= (unsigned)buf[i] << (16 - 8);
}
if (uie <= 0x7FFF)
ie = uie;
else
ie = (int)(uie - 0x7FFF - 1) - 0x7FFF - 1;
// If DBL_MANT_DIG < 53, truncate the mantissa
im >>= (53 > DBL_MANT_DIG) ? (53 - DBL_MANT_DIG) : 0;
m = im;
m = ldexp(m, (53 > DBL_MANT_DIG) ? -DBL_MANT_DIG : -53); // can't overflow
// because DBL_MAX_10_EXP >= 37 equivalent to DBL_MAX_2_EXP >= 122
// Find out the maximum base-2 exponent and
// if ours is greater, return +/- infinity
frexp(DBL_MAX, &maxe);
if (ie > maxe)
m = INFINITY;
else
m = ldexp(m, ie); // underflow may cause a floating-point exception
*x = negative ? -m : m;
}
int test(double x, const char* name)
{
uint8 buf[10], buf2[10];
double x2;
int error1, error2;
Double2Bytes(buf, x);
Bytes2Double(&x2, buf);
Double2Bytes(buf2, x2);
printf("%+.15E '%s' -> %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
x,
name,
buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7],buf[8],buf[9]);
if ((error1 = memcmp(&x, &x2, sizeof(x))) != 0)
puts("Bytes2Double(Double2Bytes(x)) != x");
if ((error2 = memcmp(buf, buf2, sizeof(buf))) != 0)
puts("Double2Bytes(Bytes2Double(Double2Bytes(x))) != Double2Bytes(x)");
puts("");
return error1 || error2;
}
int testInf(void)
{
uint8 buf[10];
double x, x2;
int error;
x = DBL_MAX;
Double2Bytes(buf, x);
if (!++buf[8])
++buf[9]; // increment the exponent beyond the maximum
Bytes2Double(&x2, buf);
printf("%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X -> %+.15E\n",
buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7],buf[8],buf[9],
x2);
if ((error = !isinf(x2)) != 0)
puts("Bytes2Double(Double2Bytes(DBL_MAX) * 2) != INF");
puts("");
return error;
}
#define VALUE_AND_NAME(V) { V, #V }
const struct
{
double value;
const char* name;
} testData[] =
{
#ifdef NAN
VALUE_AND_NAME(NAN),
#endif
VALUE_AND_NAME(0.0),
VALUE_AND_NAME(+DBL_MIN),
VALUE_AND_NAME(-DBL_MIN),
VALUE_AND_NAME(+1.0),
VALUE_AND_NAME(-1.0),
VALUE_AND_NAME(+M_PI),
VALUE_AND_NAME(-M_PI),
VALUE_AND_NAME(+DBL_MAX),
VALUE_AND_NAME(-DBL_MAX),
VALUE_AND_NAME(+INFINITY),
VALUE_AND_NAME(-INFINITY),
};
int main(void)
{
unsigned i;
int errors = 0;
for (i = 0; i < sizeof(testData) / sizeof(testData[0]); i++)
errors += test(testData[i].value, testData[i].name);
errors += testInf();
// Test subnormal values. A floating-point exception may be raised.
errors += test(+DBL_MIN / 2, "+DBL_MIN / 2");
errors += test(-DBL_MIN / 2, "-DBL_MIN / 2");
printf("%d error(s)\n", errors);
return 0;
}
Output (ideone):
+NAN 'NAN' -> 00 00 00 00 00 00 00 00 FF 7F
+0.000000000000000E+00 '0.0' -> 00 00 00 00 00 00 00 00 00 00
+2.225073858507201E-308 '+DBL_MIN' -> 00 00 00 00 00 00 10 00 03 FC
-2.225073858507201E-308 '-DBL_MIN' -> 00 00 00 00 00 00 F0 FF 03 FC
+1.000000000000000E+00 '+1.0' -> 00 00 00 00 00 00 10 00 01 00
-1.000000000000000E+00 '-1.0' -> 00 00 00 00 00 00 F0 FF 01 00
+3.141592653589793E+00 '+M_PI' -> 18 2D 44 54 FB 21 19 00 02 00
-3.141592653589793E+00 '-M_PI' -> E8 D2 BB AB 04 DE E6 FF 02 00
+1.797693134862316E+308 '+DBL_MAX' -> FF FF FF FF FF FF 1F 00 00 04
-1.797693134862316E+308 '-DBL_MAX' -> 01 00 00 00 00 00 E0 FF 00 04
+INF '+INFINITY' -> FF FF FF FF FF FF FF 7F FF 7F
-INF '-INFINITY' -> 00 00 00 00 00 00 00 80 FF 7F
FF FF FF FF FF FF 1F 00 01 04 -> +INF
+1.112536929253601E-308 '+DBL_MIN / 2' -> 00 00 00 00 00 00 10 00 02 FC
-1.112536929253601E-308 '-DBL_MIN / 2' -> 00 00 00 00 00 00 F0 FF 02 FC
0 error(s)

Depending on the application it could be a good idea to use a plain text data format (a possibility being XML). If you don't want to waste disk space you can compress it.

XML is probably the most portable way to do it.
However, it appears that you already have most of the parser built, but are stuck on the float/double issue. I would suggest writing it out as a string (to whatever precision you desire) and then reading that back in.
Unless all your target platforms use IEEE-754 floats (and doubles), no byte-swapping tricks will work for you.

If you guarantee that your implementations always treat serialized floating point representations in a specified format, then you will be fine (IEEE 754 is common).
Yes, architectures may order floating point numbers differently (e.g. in big or little endian). Therefore, you will want to somehow specify the endianness. This could be in the format's specification or variable and recorded in the file's data.
The last major pitfall is that alignment for builtins may vary. How your hardware/processor handles malaligned data is implementation defined. So you may need to swap the data/bytes, then move it to the destination float/double.

A library like HDF5 or even NetCDF is probably a bit heavyweight for this as High Performance Mark said, unless you also need the other features available in those libraries.
A lighter-weight alternative that only deals with the serialization would be e.g. XDR (see also wikipedia description). Many OS'es supply XDR routines out of the box, if this is not enough free-standing XDR libraries exist as well.

Related

Why final pointer is being aligned to size of int?

Here's the code under consideration:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
char buffer[512];
int pos;
int posf;
int i;
struct timeval *tv;
int main(int argc, char **argv)
{
pos = 0;
for (i = 0; i < 512; i++) buffer[i] = 0;
for (i = 0; i < 4; i++)
{
printf("pos = %d\n", pos);
*(int *)(buffer + pos + 4) = 0x12345678;
pos += 9;
}
for (i = 0; i < 9 * 4; i++)
{
printf(" %02X", (int)(unsigned char)*(buffer + i));
if ((i % 9) == 8) printf("\n");
}
printf("\n");
// ---
pos = 0;
for (i = 0; i < 512; i++) buffer[i] = 0;
*(int *)(buffer + 4) = 0x12345678;
*(int *)(buffer + 9 + 4) = 0x12345678;
*(int *)(buffer + 18 + 4) = 0x12345678;
*(int *)(buffer + 27 + 4) = 0x12345678;
for (i = 0; i < 9 * 4; i++)
{
printf(" %02X", (int)(unsigned char)*(buffer + i));
if ((i % 9) == 8) printf("\n");
}
printf("\n");
return 0;
}
And the output of code is
pos = 0
pos = 9
pos = 18
pos = 27
00 00 00 00 78 56 34 12 00
00 00 00 78 56 34 12 00 00
00 00 78 56 34 12 00 00 00
00 78 56 34 12 00 00 00 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
I can not get why
*(int *)(buffer + pos + 4) = 0x12345678;
is being placed into the address aligned to size of int (4 bytes). I expect the following actions during the execution of this command:
pointer to buffer, which is char*, increased by the value of pos (0, 9, 18, 27) and then increased by 4. The resulting pointer is char* pointing to char array index [pos + 4];
char* pointer in the brackets is being converted to the int*, causing resulting pointer addressing integer of 4 bytes size at base location (buffer + pos + 4) and integer array index [0];
resulting int* location is being stored with bytes 78 56 34 12 in this order (little endian system).
Instead I see pointer in brackets being aligned to size of int (4 bytes), however direct addressing using constants (see second piece of code) works properly as expected.
target CPU is i.MX287 (ARM9);
target operating system is OpenWrt Linux [...] 3.18.29 #431 Fri Feb 11 15:57:31 2022 armv5tejl GNU/Linux;
compiled on Linux [...] 4.15.0-142-generic #146~16.04.1-Ubuntu SMP Tue Apr 13 09:27:15 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux, installed in Virtual machine;
GCC compiler version gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609
I compile as a part of whole system image compilation, flags are CFLAGS = -Os -Wall -Wmissing-declarations -g3.
Update: thanks to Andrew Henle, I now replace
*(int*)(buffer + pos + 4) = 0x12345678;
with
buffer[pos + 4] = value & 0xff;
buffer[pos + 5] = (value >> 8) & 0xff;
buffer[pos + 6] = (value >> 16) & 0xff;
buffer[pos + 7] = (value >> 24) & 0xff;
and can't believe I must do it on 32-bit microprocessor system, whatever architecture it has, and that GCC is not able to properly slice int into bytes or partial int words and perform RMW for those parts.
char* pointer in the brackets is being converted to the int*, causing resulting pointer addressing integer of 4 bytes size at base location (buffer + pos + 4) and integer array index [0]
This incurs undefined behavior (UB) when the alignments requirements of int * are not met.
Instead copy with memcpy(). A good compiler will emit valid optimized code.
// *(int*)(buffer + pos + 4) = 0x12345678;
memcpy(buffer + pos + 4, &(int){0x12345678}, sizeof (int));

How can I read and obtain separated data from a file using 'fread' in C?

I've written in a file (using 'fwrite()') the following:
TUS�ABQ���������������(A����������(A��B������(A��B���A��(A��B���A������B���A������0����A������0�ABQ�������0�ABQ�����LAS����������������A�����������A��&B�������A��&B��B���A��&B��B������&B��
B����153���B����153�LAS�����153�LAS�����LAX���������������:A����������:AUUB������:AUUB��B��:
AUUB��B����UUB��B����������B��������LAX���������LAX�����MDW���������������A����������A��(�������A��(����A��A��(����A������(����A����A�89���A����A�89MDW�����A�89MDW�����OAK���������
����������������������#�����������#�����������#�����������#�������������������������OAK���������OAK�����SAN���������������LA����������LA��P#������LA��P#��#A��LA��P#��#A������P#��#A����������#A��������SAN���������SAN�����TPA�ABQ����������������B�����������B��#�����...(continues)
which is translated to this:
TUSLWD2.103.47.775.1904.06.40.03AMBRFD4.63.228.935.0043.09.113.0ASDGHU5.226.47.78.3.26...(The same structure)
and the hexdump of that would be:
00000000 54 55 53 00 41 42 51 00 00 00 00 00 00 00 00 00 |TUS.ABQ.........|
00000010 00 00 00 00 00 00 28 41 00 00 0e 42 00 00 f8 41 |......(A...B...A|
00000020 00 00 00 00 4c 41 53 00 00 00 00 00 00 00 00 00 |....LAS.........|
00000030 00 00 00 00 00 00 88 41 00 00 26 42 9a 99 11 42 |.......A..&B...B|
(Continues...)
the structure is, always 2 words of 3 characters each one (i.e. TUS and LWD) followed by 7 floats, and then it repeats again on a on until end of file.
The key thing is: I just want to read every field separated like 'TUS', 'LWD', '2.10', '3.4', '7.77'...
And I can only use 'fread()' to achieve that! For now, I'm trying this:
aux2 = 0;
fseek(fp, SEEK_SET, 0);
fileSize = 0;
while (!feof(fp) && aux<=2) {
fread(buffer, sizeof(char)*4, 1, fp);
printf("%s", buffer);
fread(buffer, sizeof(char)*4, 1, fp);
printf("%s", buffer);
for(i=0; i<7; i++){
fread(&delay, sizeof(float), 1, fp);
printf("%f", delay);
}
printf("\n");
aux++;
fseek(fp,sizeof(char)*7+sizeof(float)*7,SEEK_SET);
aux2+=36;
}
And I get this result:
TUSABQ0.0000000.0000000.00000010.5000000.0000000.00000010.500000
AB0.0000000.000000-10384675421112248092159136000638976.0000000.0000000.000000-10384675421112248092159136000638976.0000000.000000
AB0.0000000.000000-10384675421112248092159136000638976.0000000.0000000.000000-10384675421112248092159136000638976.0000000.000000
But it does not works correctly...
*Note: forget the arguments of the last 'fseek()', cos I've been trying too many meaningless things!
To write the words (i.e. TUS) into the file, I use this:
fwrite(x->data->key, 4, sizeof(char), fp);
and to write the floats, this:
for (i = 0; i < 7; i++) {
fwrite(&current->data->retrasos[i], sizeof(float), sizeof(float), fp);
}
I'd recommend using a structure to hold each data unit:
typedef struct {
float value[7];
char word1[5]; /* 4 + '\0' */
char word2[5]; /* 4 + '\0' */
} unit;
To make the file format portable, you need a function that packs and unpacks the above structure to/from a 36-byte array. On Intel and AMD architectures, float corresponds to IEEE-754-2008 binary32 format in little-endian byte order. For example,
#define STORAGE_UNIT (4+4+7*4)
#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
size_t unit_pack(char *target, const size_t target_len, const unit *source)
{
size_t i;
if (!target || target_len < STORAGE_UNIT || !source) {
errno = EINVAL;
return 0;
}
memcpy(target + 0, source->word1, 4);
memcpy(target + 4, source->word2, 4);
for (i = 0; i < 7; i++)
memcpy(target + 8 + 4*i, &(source->value[i]), 4);
return STORAGE_UNIT;
}
size_t unit_unpack(unit *target, const char *source, const size_t source_len)
{
size_t i;
if (!target || !source || source_len < STORAGE_UNIT) {
errno = EINVAL;
return 0;
}
memcpy(target->word1, source, 4);
target->word1[4] = '\0';
memcpy(target->word2, source + 4, 4);
target->word2[4] = '\0';
for (i = 0; i < 7; i++)
memcpy(&(target->value[i]), source + 8 + i*4, 4);
return STORAGE_UNIT;
}
#else
#error Unsupported architecture!
#endif
The above only works on Intel and AMD machines, but it is certainly easy to extend to other architectures if necessary. (Almost all machines currently use IEEE 754-2008 binary32 for float, only the byte order varies. Those that do not, typically have C extensions that do the conversion to/from their internal formats.)
Using the above, you can -- should! must! -- document your file format, for example as follows:
Words are 4 bytes encoded in UTF-8
Floats are IEEE 754-2008 binary32 values in little-endian byte order
A file contains one or more units. Each unit comprises of
Name Description
word1 First word
word2 Second word
value0 First float
value1 Second float
value2 Third float
value3 Fourth float
value4 Fifth float
value5 Sixth float
value6 Second float
There is no padding.
To write an unit, use a char array of size STORAGE_UNIT as a cache, and write that. So, if you have unit *one, you can write it to FILE *out using
char buffer[STORAGE_UNIT];
if (unit_pack(buffer, sizeof buffer, one)) {
/* Error! Abort program! */
}
if (fwrite(buffer, STORAGE_UNIT, 1, out) != 1) {
/* Write error! Abort program! */
}
Correspondingly, reading from FILE *in would be
char buffer[STORAGE_UNIT];
if (fread(buffer, STORAGE_UNIT, 1, in) != 1) {
/* End of file, or read error.
Check feof(in) or/and ferror(in). */
}
if (unit_unpack(one, buffer, STORAGE_UNIT)) {
/* Error! Abort program! */
}
If one is an array of units, and you are writing or reading one[k], use &(one[k]) (or equivalently one + k) instead of one.

Use printf to print character string in hexadecimal format, distorted results

I want to print a character string in hexadecimal format on machine A. Something like:
ori_mesg = gen_rdm_bytestream (1400, seed)
sendto(machine B, ori_mesg, len(mesg))
On machine B
recvfrom(machine A, mesg)
mesg_check = gen_rdm_bytestream (1400, seed)
for(i=0;i<20;i++){
printf("%02x ", *(mesg+i)& 0xFF);
}
printf("\n");
for(i=0; i<20; i++){
printf("%02x ", *(mesg_check+i));
}
printf("\n");
seed varies among 1, 2, 3, ...
The bytes generation funcion is:
u_char *gen_rdm_bytestream (size_t num_bytes, unsigned int seed)
{
u_char *stream = malloc (num_bytes+4);
size_t i;
u_int16_t seq = seed;
seq = htons(seq);
u_int16_t tail = num_bytes;
tail = htons(tail);
memcpy(stream, &seq, sizeof(seq));
srand(seed);
for (i = 3; i < num_bytes+2; i++){
stream[i] = rand ();
}
memcpy(stream+num_bytes+2, &tail, sizeof(tail));
return stream;
}
But I got results from printf like:
00 01 00 67 c6 69 73 51 ff 4a ec 29 cd ba ab f2 fb e3 46 7c
00 01 00 67 ffffffc6 69 73 51 ffffffff 4a ffffffec 29 ffffffcd ffffffba ffffffab fffffff2 fffffffb ffffffe3 46 7c
or
00 02 88 fa 7f 44 4f d5 d2 00 2d 29 4b 96 c3 4d c5 7d 29 7e
00 02 00 fffffffa 7f 44 4f ffffffd5 ffffffd2 00 2d 29 4b ffffff96 ffffffc3 4d ffffffc5 7d 29 7e
Why are there so many fffff for mesg_check?
Are there any potential reasons for this phenomenon?
Here's a small program that illustrates the problem I think you might be having:
#include <stdio.h>
int main(void) {
char arr[] = { 0, 16, 127, 128, 255 };
for (int i = 0; i < sizeof arr; i ++) {
printf(" %2x", arr[i]);
}
putchar('\n');
return 0;
}
On my system (on which plain char is signed), I get this output:
0 10 7f ffffff80 ffffffff
The value 255, when stored in a (signed) char, is stored as -1. In the printf call, it's promoted to (signed) int -- but the "%2x" format tells printf to treat it as an unsigned int, so it displays fffffffff.
Make sure that your mesg and mesg_check arrays are defined as arrays of unsigned char, not plain char.
UPDATE: Rereading this answer more than a year later, I realize it's not quite correct. Here's a program that works correctly on my system, and will almost certainly work on any reasonable system:
#include <stdio.h>
int main(void) {
unsigned char arr[] = { 0, 16, 127, 128, 255 };
for (int i = 0; i < sizeof arr; i ++) {
printf(" %02x", arr[i]);
}
putchar('\n');
return 0;
}
The output is:
00 10 7f 80 ff
An argument of type unsigned char is promoted to (signed) int (assuming that int can hold all values of type unsigned char, i.e., INT_MAX >= UCHAR_MAX, which is the case on practically all systems). So the argument arr[i] is promoted to int, while the " %02x" format requires an argument of type unsigned int.
The C standard strongly implies, but doesn't quite state directly, that arguments of corresponding signed and unsigned types are interchangeable as long as they're within the range of both types -- which is the case here.
To be completely correct, you need to ensure that the argument is actually of type unsigned int:
printf("%02x", (unsigned)arr[i]);
Yes, always print the string in hexadecimal format as:
for(i=0; till string length; i++)
printf("%02X", (unsigned char)str[i]);
You will get an error when you try to print the whole string in one go and when printing the hexadecimal string character by character which is using 'unsigned char' if the string is in format other than 'unsigned char'.

Converting a floating point number in C to IEEE standard

I am trying to write a code in C that generates a random integer , performs simple calculation and then I am trying to print the values of the file in IEEE standard. But I am unable to do so , Please help.
I am unable to print it in Hexadecimal/Binary which is very important.
If I type cast the values in fprintf, I am getting this Error expected expression before double.
int main (int argc, char *argv) {
int limit = 20 ; double a[limit], b[limit]; //Inputs
double result[limit] ; int i , k ; //Outputs
printf("limit = %d", limit ); double q;
for (i= 0 ; i< limit;i++)
{
a[i]= rand();
b[i]= rand();
printf ("A= %x B = %x\n",a[i],b[i]);
}
char op;
printf("Enter the operand used : add,subtract,multiply,divide\n");
scanf ("%c", &op); switch (op) {
case '+': {
for (k= 0 ; k< limit ; k++)
{
result [k]= a[k] + b[k];
printf ("result= %f\n",result[k]);
}
}
break;
case '*': {
for (k= 0 ; k< limit ; k++)
{
result [k]= a[k] * b[k];
}
}
break;
case '/': {
for (k= 0 ; k< limit ; k++)
{
result [k]= a[k] / b[k];
}
}
break;
case '-': {
for (k= 0 ; k< limit ; k++)
{
result [k]= a[k] - b[k];
}
}
break; }
FILE *file; file = fopen("tb.txt","w"); for(k=0;k<limit;k++) {
fprintf (file,"%x\n
%x\n%x\n\n",double(a[k]),double(b[k]),double(result[k]) );
}
fclose(file); /*done!*/
}
If your C compiler supports IEEE-754 floating point format directly (because the CPU supports it) or fully emulates it, you may be able to print doubles simply as bytes. And that is the case for the x86/64 platform.
Here's an example:
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
void PrintDoubleAsCBytes(double d, FILE* f)
{
unsigned char a[sizeof(d)];
unsigned i;
memcpy(a, &d, sizeof(d));
for (i = 0; i < sizeof(a); i++)
fprintf(f, "%0*X ", (CHAR_BIT + 3) / 4, a[i]);
}
int main(void)
{
PrintDoubleAsCBytes(0.0, stdout); puts("");
PrintDoubleAsCBytes(0.5, stdout); puts("");
PrintDoubleAsCBytes(1.0, stdout); puts("");
PrintDoubleAsCBytes(2.0, stdout); puts("");
PrintDoubleAsCBytes(-2.0, stdout); puts("");
PrintDoubleAsCBytes(DBL_MIN, stdout); puts("");
PrintDoubleAsCBytes(DBL_MAX, stdout); puts("");
PrintDoubleAsCBytes(INFINITY, stdout); puts("");
#ifdef NAN
PrintDoubleAsCBytes(NAN, stdout); puts("");
#endif
return 0;
}
Output (ideone):
00 00 00 00 00 00 00 00
00 00 00 00 00 00 E0 3F
00 00 00 00 00 00 F0 3F
00 00 00 00 00 00 00 40
00 00 00 00 00 00 00 C0
00 00 00 00 00 00 10 00
FF FF FF FF FF FF EF 7F
00 00 00 00 00 00 F0 7F
00 00 00 00 00 00 F8 7F
If IEEE-754 isn't supported directly, the problem becomes more complex. However, it can still be solved.
Here are a few related questions and answers that can help:
How do I handle byte order differences when reading/writing floating-point types in C?
Is there a tool to know whether a value has an exact binary representation as a floating point variable?
C dynamically printf double, no loss of precision and no trailing zeroes
And, of course, all the IEEE-754 related info can be found in Wikipedia.
Try this in your fprint part:
fprintf (file,"%x\n%x\n%x\n\n",*((int*)(&a[k])),*((int*)(&b[k])),*((int*)(&result[k])));
That would translate the double as an integer so it's printed in IEEE standard.
But if you're running your program on a 32-bit machine on which int is 32-bit and double is 64-bit, I suppose you should use:
fprintf (file,"%x%x\n%x%x\n%x%x\n\n",*((int*)(&a[k])),*((int*)(&a[k])+1),*((int*)(&b[k])),*((int*)(&b[k])+1),*((int*)(&result[k])),*((int*)(&result[k])+1));
In C, there are two ways to get at the bytes in a float value: a pointer cast, or a union. I recommend a union.
I just tested this code with GCC and it worked:
#include <stdio.h>
typedef unsigned char BYTE;
int
main()
{
float f = 3.14f;
int i = sizeof(float) - 1;
BYTE *p = (BYTE *)(&f);
p[i] = p[i] | 0x80; // set the sign bit
printf("%f\n", f); // prints -3.140000
}
We are taking the address of the variable f, then assigning it to a pointer to BYTE (unsigned char). We use a cast to force the pointer.
If you try to compile code with optimizations enabled and you do the pointer cast shown above, you might run into the compiler complaining about "type-punned pointer" issues. I'm not exactly sure when you can do this and when you can't. But you can always use the other way to get at the bits: put the float into a union with an array of bytes.
#include <stdio.h>
typedef unsigned char BYTE;
typedef union
{
float f;
BYTE b[sizeof(float)];
} UFLOAT;
int
main()
{
UFLOAT u;
int const i = sizeof(float) - 1;
u.f = 3.14f;
u.b[i] = u.b[i] | 0x80; // set the sign bit
printf("%f\n", u.f); // prints -3.140000
}
What definitely will not work is to try to cast the float value directly to an unsigned integer or something like that. C doesn't know you just want to override the type, so C tries to convert the value, causing rounding.
float f = 3.14;
unsigned int i = (unsigned int)f;
if (i == 3)
printf("yes\n"); // will print "yes"
P.S. Discussion of "type-punned" pointers here:
Dereferencing type-punned pointer will break strict-aliasing rules

Why doesn't my bit left shift?

I'm using embedded C on Keil. I'm trying to program such that it stores a bit, bit shifts and then it stores it again and it repeats until all eight bits are stored.
However, when I debug (maybe debug wrongly), the value only shows "01 00 00 00 00 00 00...". When it stores logic'1' and then when it shift left, it shows "02 00 00 00 00 00 00...". When the loop repeats, it shows the same thing over and over again. What I expected was "01 01 01 01 01 01 01..." (Let's say all the input bits was '1'). How do I solve this problem?
#include <reg51.h>
sbit Tsignal = P1^2;
unsigned char xdata x[500];
for(u=0; u<8; u++)
{
x[i] = x[i] << 1;
x[i] = Tsignal; //Store Tsignal in x
}
Ah, I have solved it already.
unsigned int u;
unsigned char p;
unsigned char xdata x[500];
for(u=0; u<8; u++) //Bit Shift Loop
{
x[i] = x[i] <<1; //Left Bit Shift by 1
p = Tsignal; //Store Tsignal to Buffer p
x[i] |= p;
} //End Bitshift loop
I think you want to do something like this:
for(u=0;u<8;u++)
{
// Update Tsignal.
//Tsignal = GetBitValue();
// Store it to x.
x = (x << 1) | (Tsignal & 0x1)
}

Resources