reading big-endian files in little-endian system - c

I have a data file that I need to read in C. It is compirsed of alternating 16-bit integer stored in binary form, and I need only the first column (ie, every other entry starting at 0)
I have a simple python script that reads the files accurately:
import numpy as np
fname = '[filename]'
columntypes = np.dtype([('curr_pA', '>i2'),('volts', '>i2')])
test = np.memmap(fname, dtype=columntypes,mode='r')['curr_pA']
I want to port this to C. Because my machine is natively little-endian I need to manually perform the byte swap. Here's what I have done:
void swapByteOrder_int16(double *current, int16_t *rawsignal, int64_t length)
{
int64_t i;
for (i=0; i<length; i++)
{
current[i] = ((rawsignal[2*i] << 8) | ((rawsignal[2*i] >> 8) & 0xFF));
}
}
int64_t read_current_int16(FILE *input, double *current, int16_t *rawsignal, int64_t position, int64_t length)
{
int64_t test;
int64_t read = 0;
if (fseeko64(input,(off64_t) position*2*sizeof(int16_t),SEEK_SET))
{
return 0;
}
test = fread(rawsignal, sizeof(int16_t), 2*length, input);
read = test/2;
if (test != 2*length)
{
perror("End of file reached");
}
swapByteOrder_int16(current, rawsignal, length);
return read;
}
In the read_current_int16 function I use fread to read a large chunk of data (both columns) into rawsignal array. I then call swapByteOrder_int16 to pick off every other value, and swap its bytes around. I then cast the result to double and store it in current.
It doesn't work. I get garbage as the output in the C code. I think I've been starting at it for too long and can no longer see my own errors. Can anyone spot anything glaringly wrong?

Perform the endian swap as unsigned math and then assign to double.
void swapByteOrder_int16(double *current, const int16_t *rawsignal, size_t length) {
for (size_t i = 0; i < length; i++) {
int16_t x = rawsignal[2*i];
x = (x*1u << 8) | (x*1u >> 8);
current[i] = x;
}
}

I prefer this mask and shift combination:
current[i] = ((rawsignal[2*i] & 0x00ff) << 8) | (rawsignal[2*i] >> 8)

As suggested by several people, doing the shifts as unsigned does the trick. I am answering this with my implementation just for the sake of completeness since I tweaked it a little from the accepted answer:
void swapByteOrder_int16(double *current, uint16_t *rawsignal, int64_t length)
{
union int16bits bitval;
int64_t i;
for (i=0; i<length; i++)
{
bitval.bits = rawsignal[2*i];
bitval.bits = (bitval.bits << 8) | (bitval.bits >> 8);
current[i] = (double) bitval.currentval;
}
}
union int16bits
{
uint16_t bits;
int16_t currentval;
};

Swapping bits with unsigned types will make things much easier:
void swapByteOrder_int16(double *current, void const *rawsignal_, size_t length)
{
uint16_t const *rawsignal = rawsignal_;
size_t i;
for (i=0; i<length; i++)
{
uint16_t tmp = rawsignal[2*i];
tmp = ((tmp >> 8) & 0xffu) | ((tmp << 8) & 0xff00u);
current[i] = (int16_t)(tmp);
}
}
NOTE: when rawsignal is not aligned, you have to memcpy() it.

Related

How to set the values of an array to a single variable

I'm reading the values from a SD card in an ARM micro:
Res = f_read(&fil, (void*)buf, 6, &NumBytesRead);
where fil is a pointer, buf is a buffer where the data is stored.
And that's the problem: it's an array but I'd like to have the contents of that array in a single variable.
To give an actual example: the 6 bytes read from the file are:
buf[0] = 0x1B
buf[1] = 0x26
buf[2] = 0xB3
buf[3] = 0x54
buf[4] = 0xA1
buf[5] = 0xCF
And I'd like to have: uint64_t data be equal to 0x1B26B354A1CF. That is, all the elements of the array "concatenated" in one single 64 bit integer.
Without type punning you can do as below.
uint64_t data = 0;
for (int i=0; i<6; i++)
{
data <<= 8;
data |= (uint64_t) buf[i];
}
Use union but remember about the endianes.
union
{
uint8_t u8[8];
uint64_t u64;
}u64;
typedef union
{
uint8_t u8[8];
uint64_t u64;
}u64;
typedef enum
{
LITTLE_E,
BIG_E,
}ENDIANESS;
ENDIANESS checkEndianess(void)
{
ENDIANESS result = BIG_E;
u64 d64 = {.u64 = 0xff};
if(d64.u8[0]) result = LITTLE_E;
return result;
}
uint64_t arrayToU64(uint8_t *array, ENDIANESS e) // for the array BE
{
u64 d64;
if(e == LITTLE_E)
{
memmove(&d64, array, sizeof(d64.u64));
}
else
{
for(int index = sizeof(d64.u64) - 1; index >= 0; index--)
{
d64.u8[sizeof(d64.u64) - index - 1] = array[index];
}
}
return d64.u64;
}
int main()
{
uint8_t BIG_E_Array[] = {0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x80};
ENDIANESS e;
printf("This system endianess: %s\n", (e = checkEndianess()) == BIG_E ? "BIG":"LITTLE");
printf("Punned uint64_t for our system 0x%lx\n", arrayToU64(BIG_E_Array, e));
printf("Punned uint64_t for the opposite endianess system 0x%lx\n", arrayToU64(BIG_E_Array, e == BIG_E ? LITTLE_E : BIG_E));
return 0;
}
To things to take care of here:
have the bytes be ordered correctly
read the six bytes into one 64bit integer
Issue 1 can be taken care of by storing the byte coming in in network byte order (Big Endian) into the 64 bit integer in host byte order by for example using the two marcos below:
/* below defines of htonll() and ntohll() are taken from this answer:
https://stackoverflow.com/a/28592202/694576
*/
#if __BIG_ENDIAN__
# define htonll(x) (x)
# define ntohll(x) (x)
#else
# define htonll(x) ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32))
# define ntohll(x) ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32))
#endif
Issue 2 can be solved in multiple ways:
Extending your approach
#define BUFFER_SIZE (6)
...
assert(BUFFER_SIZE <= sizeof (uint64_t));
uint8_t buffer[BUFFER_SIZE];
FILE * pf = ...; /* open file here */
/* test if file has been opened successfully here */
... result = f_read(pf, buffer, BUFFER_SIZE, ...);
/* test result for success */
uint64_t number = 0;
memset(&number, buffer, BUFFER_SIZE)
number = ntohll(number);
Use "Type Punning" by using a union
union buffer_wrapper
{
uint8_t u8[sizeof (uint64_t)];
uint64_t u64;
}
Instead of
uint8_t buffer[BUFFER_SIZE];
use
union buffer_wrapper buffer;
and instead of
memcpy(&number, buffer, BUFFER_SIZE)
number = ntohll(number)
use
number = ntohll(buffer.u64)

Bitwise XOR in C using 64bit instead of 8bits

I consider how to make efficient XORing of 2 bytes arrays.
I have this bytes arrays defined as unsigned char *
I think that XORing them as uint64_t will be much faster. Is it true?
How efficiently convert unsigned char * to this uint64_t * preferably inside the XORing loop? How to make padding of last bytes if length of the bytes array % 8 isn't 0?
Here is my current code that XORs bytes array, but each byte (unsigned char) separately:
unsigned char *bitwise_xor(const unsigned char *A_Bytes_Array, const unsigned char *B_Bytes_Array, const size_t length) {
unsigned char *XOR_Bytes_Array;
// allocate XORed bytes array
XOR_Bytes_Array = malloc(sizeof(unsigned char) * length);
// perform bitwise XOR operation on bytes arrays A and B
for(int i=0; i < length; i++)
XOR_Bytes_Array[i] = (unsigned char)(A_Bytes_Array[i] ^ B_Bytes_Array[i]);
return XOR_Bytes_Array;
}
Ok, in the meantime I have tried to do it this way. My bytes_array are rather large (rgba bitmaps 4*1440*900?).
static uint64_t next64bitsFromBytesArray(const unsigned char *bytesArray, const int i) {
uint64_t next64bits = (uint64_t) bytesArray[i+7] | ((uint64_t) bytesArray[i+6] << 8) | ((uint64_t) bytesArray[i+5] << 16) | ((uint64_t) bytesArray[i+4] << 24) | ((uint64_t) bytesArray[i+3] << 32) | ((uint64_t) bytesArray[i+2] << 40) | ((uint64_t) bytesArray[i+1] << 48) | ((uint64_t)bytesArray[i] << 56);
return next64bits;
}
unsigned char *bitwise_xor64(const unsigned char *A_Bytes_Array, const unsigned char *B_Bytes_Array, const size_t length) {
unsigned char *XOR_Bytes_Array;
// allocate XORed bytes array
XOR_Bytes_Array = malloc(sizeof(unsigned char) * length);
// perform bitwise XOR operation on bytes arrays A and B using uint64_t
for(int i=0; i<length; i+=8) {
uint64_t A_Bytes = next64bitsFromBytesArray(A_Bytes_Array, i);
uint64_t B_Bytes = next64bitsFromBytesArray(B_Bytes_Array, i);
uint64_t XOR_Bytes = A_Bytes ^ B_Bytes;
memcpy(XOR_Bytes_Array + i, &XOR_Bytes, 8);
}
return XOR_Bytes_Array;
}
UPDATE: (2nd approach to this problem)
unsigned char *bitwise_xor64(const unsigned char *A_Bytes_Array, const unsigned char *B_Bytes_Array, const size_t length) {
const uint64_t *aBytes = (const uint64_t *) A_Bytes_Array;
const uint64_t *bBytes = (const uint64_t *) B_Bytes_Array;
unsigned char *xorBytes = malloc(sizeof(unsigned char)*length);
for(int i = 0, j=0; i < length; i +=8) {
uint64_t aXORbBytes = aBytes[j] ^ bBytes[j];
//printf("a XOR b = 0x%" PRIx64 "\n", aXORbBytes);
memcpy(xorBytes + i, &aXORbBytes, 8);
j++;
}
return xorBytes;
}
So I did an experiment:
#include <stdlib.h>
#include <stdint.h>
#ifndef TYPE
#define TYPE uint64_t
#endif
TYPE *
xor(const void *va, const void *vb, size_t l)
{
const TYPE *a = va;
const TYPE *b = vb;
TYPE *r = malloc(l);
size_t i;
for (i = 0; i < l / sizeof(TYPE); i++) {
*r++ = *a++ ^ *b++;
}
return r;
}
Compiled both for uint64_t and uint8_t with clang with basic optimizations. In both cases the compiler vectorized the hell out of this. The difference was that the uint8_t version had code to handle when l wasn't a multiple of 8. So if we add code to handle the size not being a multiple of 8, you'll probably end up with equivalent generated code. Also, the 64 bit version unrolled the loop a few times and had code to handle that, so for big enough arrays you might gain a few percent here. On the other hand, on big enough arrays you'll be memory-bound and the xor operation won't matter a bit.
Are you sure your compiler won't deal with this? This is a kind of micro-optimization that makes sense only when you're measuring things and then you wouldn't need to ask which one is faster, you'd know.

Write and read an integer in a char *

I'm coding a little server in c (a chat server) and i want to write and read an integer (and other type of variable like short int, unsigned int blablabla) in my char *data.
I have a structure DataOutput :
typedef struct t_dataoutput
{
char *data;
unsigned int pos;
} DataOutput;
And i have a function to write an int :
int writeInt(DataOutput *out, int i)
{
// here i resize my char *data
out->data[out->pos] = (i >> 24);
out->data[out->pos + 1] = (i >> 16) & 0xff;
out->data[out->pos + 2] = (i >> 8) & 0xff;
out->data[out->pos + 3] = i & 0xff;
out->pos += 4;
}
In my main function i want to try my code :
int main()
{
DataOutput out;
out.writeInt(&out, 9000);
printf("%d\n", (out.data[0] << 24) | (out.data[1] << 16) | (out.data[2] << 8) | (out.data[3]));
}
But the result is not good ... Why ? I don't understand :(
Sorry for my english i'm french ^^ !
Thx for your help !
The DataOutput.data should be forced to unsigned char because char are sometimes signed, depending of the compiler, and shifting signed chars or casting them to int (even implicitly) will propagate the sign bit:
typedef struct t_dataoutput
{
unsigned char *data;
unsigned int pos;
} DataOutput;
The memory for out->data has to be allocated before filled.
You can use realloc in the writeInt function like that:
int writeInt(DataOutput *out, int i)
{
// here i resize my char *data
out->data = realloc(out->data, out->pos + 4);
// TODO: test if out->data == NULL --> not enough memory!
out->data[out->pos] = (i >> 24) & 0xff;
out->data[out->pos + 1] = (i >> 16) & 0xff;
out->data[out->pos + 2] = (i >> 8) & 0xff;
out->data[out->pos + 3] = i & 0xff;
out->pos += 4;
}
Of course I initialize my structure with this function :
void initDataOutput(DataOutput *out)
{
out->data = NULL;
out->pos = 0;
}
But i can't do that :
DataOutput out;
char tmp[4];
out.data = tmp;
out.pos = 0;
Because in my code i could write things like that for example :
DataOutput out;
writeInt(&out, 1);
writeString(&out, "Hello");
sendData(&out);
where the int is a kind of packet id and "Hello" is the connexion message, but if it's an other id it's not the same information in out->data
Oh sorry and I don't understand what you mean when you say : avoid shifting signed integers and chars
I must use an unsigned char *data in my DataOutput structure ?

How to encode a numeric value as bytes

I need to be able to be able to send a numeric value to a remote socket server and so I need to encode possible numbers as bytes.
The numbers are up to 64 bit, ie requiring up to 8 bytes. The very first byte is the type, and it is always a number under 255 so fits in 1 byte.
For example, if the number was 8 and the type was a 32 bit unsigned integer then the type would be 7 which would be copied to the first (leftmost) byte and then the next 4 bytes would be encoded with the actual number (8 in this case).
So in terms of bytes:
byte1: 7
byte2: 0
byte3: 0
byte4: 0
byte5: 8
I hope this is making sense.
Does this code to perform this encoding look like a reasonable approach?
int type = 7;
uint32_t number = 8;
unsigned char* msg7 = (unsigned char*)malloc(5);
unsigned char* p = msg7;
*p++ = type;
for (int i = sizeof(uint32_t) - 1; i >= 0; --i)
*p++ = number & 0xFF << (i * 8);
You'll want to explicitly cast type to avoid a warning:
*p++ = (unsigned char) type;
You want to encode the number with most significant byte first, but you're shifting in the wrong direction. The loop should be:
for (int i = sizeof(uint32_t) - 1; i >= 0; --i)
*p++ = (unsigned char) ((number >> (i * 8)) & 0xFF);
It looks good otherwise.
Your code is reasonable (although I'd use uint8_t, since you are not using the bytes as “characters”, and Peter is of course right wrt the typo), and unlike the commonly found alternatives like
uint32_t number = 8;
uint8_t* p = (uint8_t *) &number;
or
union {
uint32_t number;
uint8_t bytes[4];
} val;
val.number = 8;
// access val.bytes[0] .. val.bytes[3]
is even guaranteed to work. The first alternative will probably work in a debug build, but more and more compilers might break it when optimizing, while the second one tends to work in practice just about everywhere, but is explicitly marked as a bad thing™ by the language standard.
I would drop the loop and use a "caller allocates" interface, like
int convert_32 (unsigned char *target, size_t size, uint32_t val)
{
if (size < 5) return -1;
target[0] = 7;
target[1] = (val >> 24) & 0xff;
target[2] = (val >> 16) & 0xff;
target[3] = (val >> 8) & 0xff;
target[4] = (val) & 0xff;
return 5;
}
This makes it easier for the caller to concatenate multiple fragments into one big binary packet and keep track of the used/needed buffer size.
Do you mean?
for (int i = sizeof(uint32_t) - 1; i >= 0; --i)
*p++ = (number >> (i * 8)) & 0xFF;
Another option to might be to do
// this would work on Big endian systems, e.g. sparc
struct unsignedMsg {
unsigned char type;
uint32_t value;
}
unsignedMsg msg;
msg.type = 7;
msg.value = number;
unsigned char *p = (unsigned char *) &msg;
or
unsigned char* p =
p[0] = 7;
*((uint32_t *) &(p[1])) = number;

convert big endian to little endian in C [without using provided func] [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
Questions must demonstrate a minimal understanding of the problem being solved. Tell us what you've tried to do, why it didn't work, and how it should work. See also: Stack Overflow question checklist
Closed 9 years ago.
Improve this question
I need to write a function to convert big endian to little endian in C. I can not use any library function.
Assuming what you need is a simple byte swap, try something like
Unsigned 16 bit conversion:
swapped = (num>>8) | (num<<8);
Unsigned 32-bit conversion:
swapped = ((num>>24)&0xff) | // move byte 3 to byte 0
((num<<8)&0xff0000) | // move byte 1 to byte 2
((num>>8)&0xff00) | // move byte 2 to byte 1
((num<<24)&0xff000000); // byte 0 to byte 3
This swaps the byte orders from positions 1234 to 4321. If your input was 0xdeadbeef, a 32-bit endian swap might have output of 0xefbeadde.
The code above should be cleaned up with macros or at least constants instead of magic numbers, but hopefully it helps as is
EDIT: as another answer pointed out, there are platform, OS, and instruction set specific alternatives which can be MUCH faster than the above. In the Linux kernel there are macros (cpu_to_be32 for example) which handle endianness pretty nicely. But these alternatives are specific to their environments. In practice endianness is best dealt with using a blend of available approaches
By including:
#include <byteswap.h>
you can get an optimized version of machine-dependent byte-swapping functions.
Then, you can easily use the following functions:
__bswap_32 (uint32_t input)
or
__bswap_16 (uint16_t input)
#include <stdint.h>
//! Byte swap unsigned short
uint16_t swap_uint16( uint16_t val )
{
return (val << 8) | (val >> 8 );
}
//! Byte swap short
int16_t swap_int16( int16_t val )
{
return (val << 8) | ((val >> 8) & 0xFF);
}
//! Byte swap unsigned int
uint32_t swap_uint32( uint32_t val )
{
val = ((val << 8) & 0xFF00FF00 ) | ((val >> 8) & 0xFF00FF );
return (val << 16) | (val >> 16);
}
//! Byte swap int
int32_t swap_int32( int32_t val )
{
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF );
return (val << 16) | ((val >> 16) & 0xFFFF);
}
Update : Added 64bit byte swapping
int64_t swap_int64( int64_t val )
{
val = ((val << 8) & 0xFF00FF00FF00FF00ULL ) | ((val >> 8) & 0x00FF00FF00FF00FFULL );
val = ((val << 16) & 0xFFFF0000FFFF0000ULL ) | ((val >> 16) & 0x0000FFFF0000FFFFULL );
return (val << 32) | ((val >> 32) & 0xFFFFFFFFULL);
}
uint64_t swap_uint64( uint64_t val )
{
val = ((val << 8) & 0xFF00FF00FF00FF00ULL ) | ((val >> 8) & 0x00FF00FF00FF00FFULL );
val = ((val << 16) & 0xFFFF0000FFFF0000ULL ) | ((val >> 16) & 0x0000FFFF0000FFFFULL );
return (val << 32) | (val >> 32);
}
Here's a fairly generic version; I haven't compiled it, so there are probably typos, but you should get the idea,
void SwapBytes(void *pv, size_t n)
{
assert(n > 0);
char *p = pv;
size_t lo, hi;
for(lo=0, hi=n-1; hi>lo; lo++, hi--)
{
char tmp=p[lo];
p[lo] = p[hi];
p[hi] = tmp;
}
}
#define SWAP(x) SwapBytes(&x, sizeof(x));
NB: This is not optimised for speed or space. It is intended to be clear (easy to debug) and portable.
Update 2018-04-04
Added the assert() to trap the invalid case of n == 0, as spotted by commenter #chux.
If you need macros (e.g. embedded system):
#define SWAP_UINT16(x) (((x) >> 8) | ((x) << 8))
#define SWAP_UINT32(x) (((x) >> 24) | (((x) & 0x00FF0000) >> 8) | (((x) & 0x0000FF00) << 8) | ((x) << 24))
Edit: These are library functions. Following them is the manual way to do it.
I am absolutely stunned by the number of people unaware of __byteswap_ushort, __byteswap_ulong, and __byteswap_uint64. Sure they are Visual C++ specific, but they compile down to some delicious code on x86/IA-64 architectures. :)
Here's an explicit usage of the bswap instruction, pulled from this page. Note that the intrinsic form above will always be faster than this, I only added it to give an answer without a library routine.
uint32 cq_ntohl(uint32 a) {
__asm{
mov eax, a;
bswap eax;
}
}
As a joke:
#include <stdio.h>
int main (int argc, char *argv[])
{
size_t sizeofInt = sizeof (int);
int i;
union
{
int x;
char c[sizeof (int)];
} original, swapped;
original.x = 0x12345678;
for (i = 0; i < sizeofInt; i++)
swapped.c[sizeofInt - i - 1] = original.c[i];
fprintf (stderr, "%x\n", swapped.x);
return 0;
}
here's a way using the SSSE3 instruction pshufb using its Intel intrinsic, assuming you have a multiple of 4 ints:
unsigned int *bswap(unsigned int *destination, unsigned int *source, int length) {
int i;
__m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
for (i = 0; i < length; i += 4) {
_mm_storeu_si128((__m128i *)&destination[i],
_mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&source[i]), mask));
}
return destination;
}
Will this work / be faster?
uint32_t swapped, result;
((byte*)&swapped)[0] = ((byte*)&result)[3];
((byte*)&swapped)[1] = ((byte*)&result)[2];
((byte*)&swapped)[2] = ((byte*)&result)[1];
((byte*)&swapped)[3] = ((byte*)&result)[0];
This code snippet can convert 32bit little Endian number to Big Endian number.
#include <stdio.h>
main(){
unsigned int i = 0xfafbfcfd;
unsigned int j;
j= ((i&0xff000000)>>24)| ((i&0xff0000)>>8) | ((i&0xff00)<<8) | ((i&0xff)<<24);
printf("unsigned int j = %x\n ", j);
}
Here's a function I have been using - tested and works on any basic data type:
// SwapBytes.h
//
// Function to perform in-place endian conversion of basic types
//
// Usage:
//
// double d;
// SwapBytes(&d, sizeof(d));
//
inline void SwapBytes(void *source, int size)
{
typedef unsigned char TwoBytes[2];
typedef unsigned char FourBytes[4];
typedef unsigned char EightBytes[8];
unsigned char temp;
if(size == 2)
{
TwoBytes *src = (TwoBytes *)source;
temp = (*src)[0];
(*src)[0] = (*src)[1];
(*src)[1] = temp;
return;
}
if(size == 4)
{
FourBytes *src = (FourBytes *)source;
temp = (*src)[0];
(*src)[0] = (*src)[3];
(*src)[3] = temp;
temp = (*src)[1];
(*src)[1] = (*src)[2];
(*src)[2] = temp;
return;
}
if(size == 8)
{
EightBytes *src = (EightBytes *)source;
temp = (*src)[0];
(*src)[0] = (*src)[7];
(*src)[7] = temp;
temp = (*src)[1];
(*src)[1] = (*src)[6];
(*src)[6] = temp;
temp = (*src)[2];
(*src)[2] = (*src)[5];
(*src)[5] = temp;
temp = (*src)[3];
(*src)[3] = (*src)[4];
(*src)[4] = temp;
return;
}
}
EDIT: This function only swaps the endianness of aligned 16 bit words. A function often necessary for UTF-16/UCS-2 encodings.
EDIT END.
If you want to change the endianess of a memory block you can use my blazingly fast approach.
Your memory array should have a size that is a multiple of 8.
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
void ChangeMemEndianness(uint64_t *mem, size_t size)
{
uint64_t m1 = 0xFF00FF00FF00FF00ULL, m2 = m1 >> CHAR_BIT;
size = (size + (sizeof (uint64_t) - 1)) / sizeof (uint64_t);
for(; size; size--, mem++)
*mem = ((*mem & m1) >> CHAR_BIT) | ((*mem & m2) << CHAR_BIT);
}
This kind of function is useful for changing the endianess of Unicode UCS-2/UTF-16 files.
If you are running on a x86 or x86_64 processor, the big endian is native. so
for 16 bit values
unsigned short wBigE = value;
unsigned short wLittleE = ((wBigE & 0xFF) << 8) | (wBigE >> 8);
for 32 bit values
unsigned int iBigE = value;
unsigned int iLittleE = ((iBigE & 0xFF) << 24)
| ((iBigE & 0xFF00) << 8)
| ((iBigE >> 8) & 0xFF00)
| (iBigE >> 24);
This isn't the most efficient solution unless the compiler recognises that this is byte level manipulation and generates byte swapping code. But it doesn't depend on any memory layout tricks and can be turned into a macro pretty easily.

Resources