C packing Integer into a buffer RFC 4506 - c

I'm trying to implement this RFC
4.1. Integer
An XDR signed integer is a 32-bit datum that encodes an integer in
the range [-2147483648,2147483647]. The integer is represented in
two's complement notation. The most and least significant bytes are
0 and 3, respectively. Integers are declared as follows:
int identifier;
(MSB) (LSB)
+-------+-------+-------+-------+
|byte 0 |byte 1 |byte 2 |byte 3 | INTEGER
+-------+-------+-------+-------+
<------------32 bits------------>
and here's my code I need to know if there is a better way to do that ?
void packInteger(char *buf,long int i)
{
if(i>=0) {
*buf++ = i>>24;
*buf++ = i>>16;
*buf++ = i>>8;
*buf++ = i;
}
if(i<0) {
i = i*-1;
i = 1 + (unsigned int)(0xffffffffu - i);
buf[0] = (unsigned int)i>>24;
buf[1] = (unsigned int)i>>16;
buf[2] = (unsigned int)i>>8;
buf[3] = (unsigned int)i;
}
}
long int unpackInteger(char *buf)
{
unsigned long int i2 = ((unsigned long int)buf[0]<<24) |
((unsigned long int)buf[1]<<16) |
((unsigned long int)buf[2]<<8) |
buf[3];
long int i;
// change unsigned numbers to signed
if (i2 <= 0x7fffffffu) { i = i2; }
else { i = -1 - (long int)(0xffffffffu - i2); }
return i;
}
int main(void)
{
char buf[4];
packInteger(buf,-31);
printf("%u %u %u %u\n",buf[0],buf[1],buf[2],buf[3]);
long int n = unpackInteger(buf);
printf("%ld",n);
return 0;
}
if someone on 64 bit system is it working or noT ?
version 2
void packInteger(unsigned char *buf,long int i)
{
unsigned long int j = i; // this will convert to 2's complement
*buf++ = i>>24;
*buf++ = i>>16;
*buf++ = i>>8;
*buf++ = i;
}

You should be using unsigned char for your buffers.
A cast to unsigned in C performs the mathematical equivalent of a conversion to 2s complement, so your pack function can be simplified:
void packInteger(unsigned char *buf, long int i)
{
unsigned long u = i;
buf[0] = (u >> 24) & 0xffUL;
buf[1] = (u >> 16) & 0xffUL;
buf[2] = (u >> 8) & 0xffUL;
buf[3] = u & 0xffUL;
}
Your unpack function seems fine (with the change to unsigned char).

Related

Extracting 3 bytes to a number

What is the FASTEST way, using bit operators to return the number, represented with 3 different unsigned char variables ?
unsigned char byte1 = 200;
unsigned char byte2 = 40;
unsigned char byte3 = 33;
unsigned long number = byte1 + byte2 * 256 + byte3 * 256 * 256;
is the slowest way possible.
Just shift each one into place, and OR them together:
#include <stdint.h>
int main(void)
{
uint8_t a = 0xAB, b = 0xCD, c = 0xEF;
/*
* 'a' must be first cast to uint32_t because of the implicit conversion
* to int, which is only guaranteed to be at least 16 bits.
* (Thanks Matt McNabb and Tim Čas.)
*/
uint32_t i = ((uint32_t)a << 16) | (b << 8) | c;
printf("0x%X\n", i);
return 0;
}
Do note however, that almost any modern compiler will replace a multiplication by a power of two with a bit-shift of the appropriate amount.
The fastest way would be the direct memory writing, assuming you know the endian of your system (here the assumption is little endian):
unsigned char byte1 = 200;
unsigned char byte2 = 40;
unsigned char byte3 = 33;
unsigned long number = 0;
((unsigned char*)&number)[0] = byte1;
((unsigned char*)&number)[1] = byte2;
((unsigned char*)&number)[2] = byte3;
Or if you don't mind doing some excercise, you can do something like:
union
{
unsigned long ulongVal;
unsigned char chars[4]; // In case your long is 32bits
} a;
and then by assigning:
a.chars[0] = byte1;
a.chars[1] = byte2;
a.chars[2] = byte3;
a.chars[3] = 0;
you will read the final value from a.ulongVal. This will spare extra memory operations.

Little-endian convention, and saving to a binary file

I have a matrix (2-D int pointer int **mat) that I am trying to write to a file in Linux in Little-endian convention.
Here is my function that writes to the file:
#define BUFF_SIZE 4
void write_matrix(int **mat, int n, char *dest_file) {
int i, j;
char buff[BUFF_SIZE];
int fd = open(dest_file, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR | S_IXUSR);
if (fd < 0) {
printf("Error: Could not open the file \"%s\".\n", dest_file);
}
buff[0] = (n & 0x000000ff);
buff[1] = (n & 0x0000ff00) >> 8;
buff[2] = (n & 0x00ff0000) >> 16;
buff[3] = (n & 0xff000000) >> 24;
write(fd, buff, BUFF_SIZE);
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
buff[0] = (mat[i][j] & 0x000000ff);
buff[1] = (mat[i][j] & 0x0000ff00) >> 8;
buff[2] = (mat[i][j] & 0x00ff0000) >> 16;
buff[3] = (mat[i][j] & 0xff000000) >> 24;
if (write(fd, buff, BUFF_SIZE) != BUFF_SIZE) {
close(fd);
printf("Error: could not write to file.\n");
return;
}
}
}
close(fd);
}
The problem is that when I write out a matrix large enough of the form mat[i][i] = i (let's say 512 X 512), I think I get an overflow, since I get weird negative numbers.
To convert back I use:
void read_matrix(int fd, int **mat, int n, char buff[]) {
int i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
assert(read(fd, buff, BUFF_SIZE) == BUFF_SIZE);
mat[i][j] = byteToInt(buff);
}
}
}
int byteToInt(char buff[]) {
return (buff[3] << 24) | (buff[2] << 16) | (buff[1] << 8) | (buff[0]);
}
What an I doing wrong?
EDITED:
Added the read_matrix function.
It seems like I'm getting a short instead on an int, since 384 = (110000000) becomes -128 = (bin) 1000000
Did a test, and found out that:
char c = 128;
int i = 0;
i |= c;
gives i = -128. Why????
The problem is in your input conversion:
int byteToInt(char buff[]) {
return (buff[3] << 24) | (buff[2] << 16) | (buff[1] << 8) | (buff[0]);
}
You don't mention which platform you are on, but on most common platforms char is signed. And that will cause problems. Suppose, for example, that buff[1] is 0x80 (0b1000000). Since it is a signed value, that is the code for the value -128. And since shift operators start by doing integer promotions on both of their arguments, that will be converted to the integer -128 before the shift operation is performed; in other words, it will have the value 0xFFFFFF80, which will become 0xFFFF8000 after the shift.
The bitwise logical operators (such as |) perform the usual arithmetic conversions before doing the bitwise operations; in the case of (buff[1] << 8) | (buff[0]), the left-hand operator will already be a signed int (because the type of << is the type of its promoted left-hand argument); the right-hand argument, an implicitly signed char, will also be promoted to a signed int, so again if it were 0x80, it would end up being sign-extended to 0xFFFFFF80.
In either case, the bitwise-or operation will end up with unwanted high-order 1 bits.
Explicitly casting buff[x] to an unsigned int won't help, because it will first be sign-extended to an int before being reinterpreted as an unsigned int. Instead, it is necessary to cast it to an unsigned char:
int byteToInt(char buff[]) {
return ((unsigned char)buff[3] << 24)
| ((unsigned char)buff[2] << 16)
| ((unsigned char)buff[1] << 8)
| (unsigned char)buff[0];
}
Since int may be 16-bit, it would be better to use long, and indeed it would be better to use unsigned long to avoid other conversion issues. That means doing a double cast:
unsigned long byteToInt(char buff[]) {
return ((unsigned long)(unsigned char)buff[3] << 24)
| ((unsigned long)(unsigned char)buff[2] << 16)
| ((unsigned long)(unsigned char)buff[1] << 8)
| (unsigned long)(unsigned char)buff[0];
}
What you have is an undefined behaviour often overlooked. Left shifting of signed negative values is undefined. See here for details.
When you do this
int byteToInt(char buff[]) {
return (buff[3] << 24) | (buff[2] << 16) | (buff[1] << 8) | (buff[0]);
}
even if one element of buff has a negative value (I.e. one of the binary data's value sets the MSB) then you hit undefined behaviour. Since your data is binary, reading it as unsigned makes the most sense. You could use a standard type which makes the signedness and length explicit, such as uint8_t from stdint.h.

Reversing 7-bit integer encoding

When looking through code of a relatively big existing codebase, I found the following function:
int write_actual_size(unsigned int actual_size, int &out_size)
{
unsigned char second;
unsigned char third;
unsigned char fourth;
int result;
int usedBytes;
*(unsigned char *)out_size = actual_size | 0x80;
if ( actual_size < 0x80 ) {
*(unsigned char *)out_size = ((unsigned char)actual_size | 0x80) & 0x7F;
result = 1;
} else {
second = (actual_size >> 7) | 0x80;
*(unsigned char *)(out_size + 1) = second;
if (actual_size < 0x4000) {
*(unsigned char *)(out_size + 1) = second & 0x7F;
usedBytes = 2;
} else {
third = (actual_size >> 14) | 0x80;
*(unsigned char *)(out_size + 2) = third;
if (actual_size < 0x200000) {
*(unsigned char *)(out_size + 2) = third & 0x7F;
usedBytes = 3;
}
else {
fourth = (actual_size >> 21) | 0x80;
*(unsigned char *)(out_size + 3) = fourth;
if (actual_size < 0x10000000) {
*(unsigned char *)(out_size + 3) = fourth & 0x7F;
usedBytes = 4;
}
}
}
result = usedBytes;
}
return result;
}
This encodes a normal unsigned integer into one or more bytes, depending on the original input size.
As I understand, the left most bit is used to determine if there is a "follow-up" byte. I assume the reason for this is to save on bandwidth (even if it is just max 3 bytes per packet). Are these valid assumptions?
I want to make a read_actual_size version... Can I just linearly "shift right 7" every byte until I encounter a "0"?
Please don't be very harsh, I'm quite new to C.
A generic VLQ decoder would look something like this:
int decode_vlq(unsigned char *input)
{
int result = 0;
do
{
result = (result << 7) | (*input & 0x7F);
}
while (*input++ & 0x80);
return result;
}
I'm open to suggestions, since my C is pretty rusty, and I wrote this by hand.

Splitting a hex number

newbie question.
Say for example, I have the hex number 0xABCDEF, how would i split it into 0xAB,0xCD and 0xEF?
Is it like this?
unsigned int number = 0xABCDEF
unsigned int ef = a & 0x000011;
unsigned int cd = (a>>8) & 0x000011;
unsigned int ab = (a>>16) & 0x000011;
Thanks
Use 0xff as your mask to remove all but 8 bits of a number:
unsigned int number = 0xABCDEF
unsigned int ef = number & 0xff;
unsigned int cd = (number>>8) & 0xff;
unsigned int ab = (number>>16) & 0xff;
unsigned int number = 0xABCDEF
unsigned int ef = number & 0xff;
unsigned int cd = (number >> 8) & 0xff;
unsigned int ab = (number >> 16) & 0xff;
Instead of the bitwise and (&) operation, you might intead want ef, cd, ab to be unsigned char types, depending on the rest of your code and how you're working with these variables. In which case you cast to unsigned char:
unsigned int number = 0xABCDEF
unsigned char ef = (unsigned char) number;
unsigned char cd = (unsigned char) (number >> 8);
unsigned char ab = (unsigned char) (number >> 16);
The mask to use would be 0xFF, not 0x11. Apart of this, you are right.
void splitByte(unsigned char * split, unsigned int a,int quantBytes)
{
unsigned char aux;
int i;
for(i=0;i<quantBytes;i++)
{
split[i]=a&0x00FF;
a=(a>>8);
}
for(i=0;i<quantBytes-1;i++)
{
aux = split[i];
split[i] = split[quantBytes-i-1];
split[quantBytes-i-1] = aux;
}
}
In the main:
unsigned char split[4];
splitByte(split, 0xffffffff, 4);

Converting number to byte array

Hi I have a base 10 number for example 3198, and the hex representation is 0x0C7E
How do I convert that number to hex and put that hex value in a byte array in the format of [00][0C][7E], assuming the biggest hex value i can have is 0xffffff.
Maybe this will work ?
uint32_t x = 0x0C7E;
uint8_t bytes[3];
bytes[0] = (x >> 0) & 0xFF;
bytes[1] = (x >> 8) & 0xFF;
bytes[2] = (x >> 16) & 0xFF;
/* Go back. */
x = (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
Number is already a continuous memory block - no need to convert it to yet ANOTHER array ! Just fetch separate bytes by using pointer arithmetic:
EDIT: Edited to be endianness-independent
#define FAST_ABS(x) ((x ^ (x>>31)) - (x>>31))
int is_big_endian(void)
{
union {
uint32_t i;
char c[4];
} bint = {0x01020304};
return bint.c[0] == 1;
}
uint32_t num = 0xAABBCCDD;
uint32_t N = is_big_endian() * 3;
printf("first byte 0x%02X\n"
"second byte 0x%02X\n"
"third byte 0x%02X\n"
"fourth byte 0x%02X\n",
((unsigned char *) &num)[FAST_ABS(3 - N)],
((unsigned char *) &num)[FAST_ABS(2 - N)],
((unsigned char *) &num)[FAST_ABS(1 - N)],
((unsigned char *) &num)[FAST_ABS(0 - N)]
);
#include <stdio.h>
union uint32_value {
unsigned int value;
struct little_endian {
unsigned char fou;
unsigned char thi;
unsigned char sec;
unsigned char fir;
} le;
struct big_endian {
unsigned char fir;
unsigned char sec;
unsigned char thi;
unsigned char fou;
} be;
};
int main(void)
{
union uint32_value foo;
foo.value = 3198;
printf("%02x %02x %02x %02x\n", foo.le.fir, foo.le.sec, foo.le.thi, foo.le.fou);
return 0;
}

Resources