How to convert and compare byte array to int - arrays

I'm trying to compare a byte array with a hex number, having a surprisingly hard time.
#include <stdio.h>
int main()
{
int bytes[4] = { 0x7c, 0x71, 0xde, 0xbb };
int number = 0x7c71debb;
printf("%u\n", number);
printf("%u\n", (int)*bytes);
return 0;
}
I'm getting:
2087837371
124
I did some reading and I tried using memcpy as suggested in various places:
#include <stdio.h>
#include <string.h>
int main()
{
int bytes[4] = { 0x7c, 0x71, 0xde, 0xbb };
int number = 0x7c71debb;
int frombytes;
memcpy(&frombytes, bytes, 4);
printf("%u\n", number);
printf("%u\n", frombytes);
return 0;
}
Still the same result:
2087837371
124
I mean, it's been like an hour if I got to be honest frustration is starting to get a hold of me.
It all started from me trying to do this:
if ((unsigned int)bytes == 0x7c71debb)
EDIT:
After switching bytes' type to char or uint8_t, here's what I'm getting:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
int main()
{
uint8_t bytes[4] = { 0x7c, 0x71, 0xde, 0xbb };
int number = 0x7c71debb;
int frombytes;
memcpy(&frombytes, bytes, 4);
printf("%u\n", number);
printf("%u\n", (int)*bytes);
printf("%u\n", frombytes);
return 0;
}
Results:
2087837371
124
3151917436

You are making two assumptions:
You're assuming int is exactly 32 bits in size.
This might be correct, but it could be smaller or larger. You should use int32_t or uint32_t instead.
You're assuming a big-endian machine.
If you are using an x86 or x86_64, this is incorrect. These are little-endian architectures. The bytes are ordered from least-significant to most-significant.
The following code avoids those assumptions:
int32_t frombytes =
(uint32_t)bytes[0] << ( 8 * 3 ) |
(uint32_t)bytes[1] << ( 8 * 2 ) |
(uint32_t)bytes[2] << ( 8 * 1 ) |
(uint32_t)bytes[3] << ( 8 * 0 );
(It looks a bit expensive, but your compiler should optimize this.)

You can build the int using bitwise operators on the array values.
The below code, which assumes big-endian architecture and ints that are at least 4 bytes long, outputs:
2087837371
2087837371
#include <stdio.h>
int main()
{
int bytes[4] = { 0x7c, 0x71, 0xde, 0xbb };
int number = 0x7c71debb;
int number2 = bytes[3] | bytes[2] << 8 | bytes[1] << 16 | bytes[0] << 24;
printf("%u\n", number);
printf("%u\n", number2);
return 0;
}

Related

How to concatenate the hexadecimal data in an array in C

I have my data field as follows DATA = 0x02 0x01 0x02 0x03 0x04 0x05 0x06 0x07 Now I want to concatenate this data as follows DATA = 0x01020304050607. How can I do it using C program. I found a program in C for concatenation of data in an array and the program is as follows:
#include<stdio.h>
int main(void)
{
int num[3]={1, 2, 3}, n1, n2, new_num;
n1 = num[0] * 100;
n2 = num[1] * 10;
new_num = n1 + n2 + num[2];
printf("%d \n", new_num);
return 0;
}
For the hexadecimal data in the array how can I manipulate the above program to concatenate the hexadecimal data?
You need a 64 bit variable num as result, instead of 10 as factor you need 16, and instead of 100 as factor, you need 256.
But if your data is provided as an array of bytes, then you can simply insert complete bytes, i.e. repeatedly shifting by 8 bits (meaning a factor of 256):
int main(void)
{
uint8_t data[8] = { 0x02, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
unsigned long long num = 0;
for (int i=0; i<8; i++) {
num <<=8; // shift by a complete byte, equal to num *= 256
num |= data[i]; // write the respective byte
}
printf("num is %016llx\n",num);
return 0;
}
Output:
num is 0201020304050607
Lest say you have input like
int DATA[8] = {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07};
If you want output like 0x0001020304050607, to store this resultant output you need one variable of unsigned long long type. For e.g
int main(void) {
int DATA[8] = {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07};
int ele = sizeof(DATA)/sizeof(DATA[0]);
unsigned long long mask = 0x00;
for(int row = 0; row < ele; row++) {
mask = mask << 8;/* every time left shifted by 8(0x01-> 0000 0001) times */
mask = DATA[row] | mask; /* put at correct location */
}
printf("%016llx\n",mask);
return 0;
}
Here's some kind of hack that writes your data directly into an integer, without any bitwise operators:
#include <stdio.h>
#include <stdint.h>
#include <string.h>
uint64_t numberize(const uint8_t from[8]) {
uint64_t r = 0;
uint8_t *p = &r;
#if '01' == 0x4849 // big endian
memcpy(p, from, 8);
#else // little endian
for (int i=7; i >= 0; --i)
*p++ = from[i];
#endif
return r;
}
int main() {
const uint8_t data[8] = { 0x02, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
printf("result is %016llx\n", numberize(data));
return 0;
}
This does work and outputs this independently of the endianness of your machine:
result is 0201020304050607
The compile-time endianness test was taken from this SO answer.

little endian to uint with undetermined numbers of bytes

I was trying to write a function that took in N bytes of little endian hex and made it into an unsigned int.
unsigned int endian_to_uint(char* buf, int num_bytes)
{
if (num_bytes == 0)
return (unsigned int) buf[0];
return (((unsigned int) buf[num_bytes -1]) << num_bytes * 8) | endian_to_uint(buf, num_bytes - 1);
}
however, the value returned is approx ~256 times larger than the expected value. Why is that?
If I needed to do use it for a 4 byte buffer, normally you'd do:
unsigned int endian_to_uint32(char* buf)
{
return (((unsigned int) buf[3]) << 24)
| (((unsigned int) buf[2]) << 16)
| (((unsigned int) buf[1]) << 8)
| (((unsigned int) buf[0]));
}
which should be reproduced by the recursive function I wrote, or is there some arithmetic error that I haven't caught?
The below code snippet would work.
unsigned int endian_to_uint(unsigned char* buf, int num_bytes)
{
if (num_bytes == 0)
return (unsigned int) buf[0];
return (((unsigned int) buf[num_bytes -1]) << (num_bytes -1) * 8) | endian_to_uint(buf, num_bytes - 1);
}
Change 1:
Modified the function argument data type from char* to unsigned char *
Reason: For a given buf[] = {0x12, 0x34, 0xab, 0xcd};
When you are trying to read buf[3] i.e here buf[num_bytes -1] will give you 0xffffffcd instead of just 0xcd because of sign extension. For more info on sign extension refer Sign Extension
Change 2:
Use num_bytes-1 when calculating the shift position value. This was a logical error in calculation of the number of bits to be shifted.
There is absolutely no reason to use recursion here: bit shifts is among the fastest operations available, recursion is among the slowest. In addition, recursion is dangerous, hard to read and gives nasty peak stack consumption. It should be avoided in general.
In addition, your function is not a general one, since you return unsigned int, making the function inferior to the shift version in every single way.
To actually write a generic-size little endian conversion function, you can do like this:
void little_endian (size_t bytes, uint8_t dest[bytes], const uint8_t src[bytes])
{
for(size_t i=0; i<bytes; i++)
{
dest[i] = src[bytes-i-1];
}
}
Working example:
#include <stdint.h>
#include <inttypes.h>
#include <stdio.h>
void little_endian (size_t bytes, uint8_t dest[bytes], const uint8_t src[bytes])
{
for(size_t i=0; i<bytes; i++)
{
dest[i] = src[bytes-i-1];
}
}
int main (void)
{
uint8_t data [] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88};
uint32_t u32;
uint64_t u64;
little_endian(4, (uint8_t*)&u32, data);
little_endian(8, (uint8_t*)&u64, data);
printf("%"PRIx32"\n", u32);
printf("%"PRIx64"\n", u64);
return 0;
}

Cast byte array to int

I have an array of four bytes and want to cast it to an int. The following code works just fine for that:
#include <stdio.h>
#include <stdint.h>
int main(void) {
uint8_t array[4] = {0xDE, 0xAD, 0xC0, 0xDE};
uint32_t myint;
myint = (uint32_t)(array[0]) << 24;
myint |= (uint32_t)(array[1]) << 16;
myint |= (uint32_t)(array[2]) << 8;
myint |= (uint32_t)(array[3]);
printf("0x%x\n",myint);
return 0;
}
The result is as expected:
$./test
0xdeadc0de
Now I want to do this in a one-liner like this:
#include <stdio.h>
#include <stdint.h>
int main(void) {
uint8_t array[4] = {0xDE, 0xAD, 0xC0, 0xDE};
uint32_t myint = (uint32_t)(array[0]) << 24 || (uint32_t)(array[1]) << 16 || (uint32_t)(array[2]) << 8 || (uint32_t)(array[3]);
printf("0x%x\n",myint);
return 0;
}
But this results in:
$./test
0x1
Why does my program behave like this?
Your are mixing up the operators for the logical or (||) and the bit wise or (|).
Do
uint32_t myint = (uint32_t)(array[0]) << 24
| (uint32_t)(array[1]) << 16
| (uint32_t)(array[2]) << 8
| (uint32_t)(array[3]);
Logical OR || is different from bitwise OR |
So in your 2nd snippet you use || use |

C: 4 elements of an array to a DWORD

Now can I copy 4 bytes (known start position, NOT 0) in an array (char*data) to a DWORD?
There are two parts to this: first, getting the 4 bytes from the array at the specified position (4 elements, each element is a char which is 1 byte in C), and then moving them to a DWORD.
I'm not sure how to do either.
Thanks!
You can use memcpy (it requires string.h header):
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
int main(void)
{
uint8_t array[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09
}
uint32_t dword;
size_t offset = 2;
memcpy(&dword, array + offset, sizeof(dword));
printf("Dword value = 0x%08x\n", dword);
return 0;
}
I think below code shoudl work
memcpy(destination, input+offset, howMany);
What you look for is deserialization. In this case, somewhere must be specified if the data in your char * are big endian (start with the highest byte) or little endian (start with the lowest byte). For portable applications, BE might be the favourite.
After you have done so, your function might look like
uintmax_t readarrayvalue_be(const char * data, uint8_t bytes)
{
uintmax_t value = 0;
while (bytes) {
value <<= 8;
value |= *(data++) & 0xFF;
bytes--;
}
return value;
}
while in the concrete case it might be restructured as
uint32_t readarrayvalue_u32_be(const char * data)
{
uint32_t value = ((data[0] & 0xFF) << 24) | ((data[1] & 0xFF) << 16) | ((data[2] & 0xFF) << 8) | (data[3] & 0xFF);
return value;
}

Comparing arbitrary bit sequences in a byte array in c

I have a couple uint8_t arrays in my c code, and I'd like to compare an arbitrary sequence bits from one with another. So for example, I have bitarray_1 and bitarray_2, and I'd like to compare bits 13 - 47 from bitarray_1 with bits 5-39 of bitarray_2. What is the most efficient way to do this?
Currently it's a huge bottleneck in my program, since I just have a naive implementation that copies the bits into the beginning of a new temporary array, and then uses memcmp on them.
three words: shift, mask and xor.
shift to get the same memory alignment for both bitarray. If not you will have to shift one of the arrays before comparing them. Your exemple is probably misleading because bits 13-47 and 5-39 have the same memory alignment on 8 bits addresses. This wouldn't be true if you were comparing say bits 14-48 with bits 5-39.
Once everything is aligned and exceeding bits cleared for table boundaries a xor is enough to perform the comparison of all the bits at once. Basically you can manage to do it with just one memory read for each array, which should be pretty efficient.
If memory alignment is the same for both arrays as in your example memcmp and special case for upper and lower bound is probably yet faster.
Also accessing array by uint32_t (or uint64_t on 64 bits architectures) should also be more efficient than accessing by uint8_t.
The principle is simple but as Andrejs said the implementation is not painless...
Here is how it goes (similarities with #caf proposal is no coincidence):
/* compare_bit_sequence() */
int compare_bit_sequence(uint8_t s1[], unsigned s1_off, uint8_t s2[], unsigned s2_off,
unsigned length)
{
const uint8_t mask_lo_bits[] =
{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
const uint8_t clear_lo_bits[] =
{ 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 };
uint8_t v1;
uint8_t * max_s1;
unsigned end;
uint8_t lsl;
uint8_t v1_mask;
int delta;
/* Makes sure the offsets are less than 8 bits */
s1 += s1_off >> 3;
s1_off &= 7;
s2 += s2_off >> 3;
s2_off &= 7;
/* Make sure s2 is the sequence with the shorter offset */
if (s2_off > s1_off){
uint8_t * tmp_s;
unsigned tmp_off;
tmp_s = s2; s2 = s1; s1 = tmp_s;
tmp_off = s2_off; s2_off = s1_off; s1_off = tmp_off;
}
delta = s1_off;
/* handle the beginning, s2 incomplete */
if (s2_off > 0){
delta = s1_off - s2_off;
v1 = delta
? (s1[0] >> delta | s1[1] << (8 - delta)) & clear_lo_bits[delta]
: s1[0];
if (length <= 8 - s2_off){
if ((v1 ^ *s2)
& clear_lo_bits[s2_off]
& mask_lo_bits[s2_off + length]){
return NOT_EQUAL;
}
else {
return EQUAL;
}
}
else{
if ((v1 ^ *s2) & clear_lo_bits[s2_off]){
return NOT_EQUAL;
}
length -= 8 - s2_off;
}
s1++;
s2++;
}
/* main loop, we test one group of 8 bits of v2 at each loop */
max_s1 = s1 + (length >> 3);
lsl = 8 - delta;
v1_mask = clear_lo_bits[delta];
while (s1 < max_s1)
{
if ((*s1 >> delta | (*++s1 << lsl & v1_mask)) ^ *s2++)
{
return NOT_EQUAL;
}
}
/* last group of bits v2 incomplete */
end = length & 7;
if (end && ((*s2 ^ *s1 >> delta) & mask_lo_bits[end]))
{
return NOT_EQUAL;
}
return EQUAL;
}
All possible optimisations are not yet used. One promising one would be to use larger chunks of data (64 bits or 32 bits at once instead of 8), you could also detect cases where offset are synchronised for both arrays and in such cases use a memcmp instead of the main loop, replace modulos % 8 by logical operators & 7, replace '/ 8' by '>> 3', etc., have to branches of code instead of swapping s1 and s2, etc, but the main purpose is achieved : only one memory read and not memory write for each array item hence most of the work can take place inside processor registers.
bits 13 - 47 of bitarray_1 are the same as bits 5 - 39 of bitarray_1 + 1.
Compare the first 3 bits (5 - 7) with a mask and the other bits (8 - 39) with memcmp().
Rather than shift and copy the bits, maybe representing them differently is faster. You have to measure.
/* code skeleton */
static char bitarray_1_bis[BIT_ARRAY_SIZE*8+1];
static char bitarray_2_bis[BIT_ARRAY_SIZE*8+1];
static const char *lookup_table[] = {
"00000000", "00000001", "00000010" /* ... */
/* 256 strings */
/* ... */ "11111111"
};
/* copy every bit of bitarray_1 to an element of bitarray_1_bis */
for (k = 0; k < BIT_ARRAY_SIZE; k++) {
strcpy(bitarray_1_bis + 8*k, lookup_table[bitarray_1[k]]);
strcpy(bitarray_2_bis + 8*k, lookup_table[bitarray_2[k]]);
}
memcmp(bitarray_1_bis + 13, bitarray_2_bis + 5, 47 - 13 + 1);
You can (and should) limit the copy to the minimum possible.
I have no idea if it's faster, but it wouldn't surprise me if it was. Again, you have to measure.
The easiest way to do this is to convert the more complex case into a simpler case, then solve the simpler case.
In the following code, do_compare() solves the simpler case (where the sequences are never offset by more than 7 bits, s1 is always offset as much or more than s2, and the length of the sequence is non-zero). The compare_bit_sequence() function then takes care of converting the harder case to the easier case, and calls do_compare() to do the work.
This just does a single-pass through the bit sequences, so hopefully that's an improvement on your copy-and-memcmp implementation.
#define NOT_EQUAL 0
#define EQUAL 1
/* do_compare()
*
* Does the actual comparison, but has some preconditions on parameters to
* simplify things:
*
* length > 0
* 8 > s1_off >= s2_off
*/
int do_compare(const uint8_t s1[], const unsigned s1_off, const uint8_t s2[],
const unsigned s2_off, const unsigned length)
{
const uint8_t mask_lo_bits[] =
{ 0xff, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
const uint8_t mask_hi_bits[] =
{ 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
const unsigned msb = (length + s1_off - 1) / 8;
const unsigned s2_shl = s1_off - s2_off;
const unsigned s2_shr = 8 - s2_shl;
unsigned n;
uint8_t s1s2_diff, lo_bits = 0;
for (n = 0; n <= msb; n++)
{
/* Shift s2 so it is aligned with s1, pulling in low bits from
* the high bits of the previous byte, and store in s1s2_diff */
s1s2_diff = lo_bits | (s2[n] << s2_shl);
/* Save the bits needed to fill in the low-order bits of the next
* byte. HERE BE DRAGONS - since s2_shr can be 8, this below line
* only works because uint8_t is promoted to int, and we know that
* the width of int is guaranteed to be >= 16. If you change this
* routine to work with a wider type than uint8_t, you will need
* to special-case this line so that if s2_shr is the width of the
* type, you get lo_bits = 0. Don't say you weren't warned. */
lo_bits = s2[n] >> s2_shr;
/* XOR with s1[n] to determine bits that differ between s1 and s2 */
s1s2_diff ^= s1[n];
/* Look only at differences in the high bits in the first byte */
if (n == 0)
s1s2_diff &= mask_hi_bits[8 - s1_off];
/* Look only at differences in the low bits of the last byte */
if (n == msb)
s1s2_diff &= mask_lo_bits[(length + s1_off) % 8];
if (s1s2_diff)
return NOT_EQUAL;
}
return EQUAL;
}
/* compare_bit_sequence()
*
* Adjusts the parameters to match the preconditions for do_compare(), then
* calls it to do the work.
*/
int compare_bit_sequence(const uint8_t s1[], unsigned s1_off,
const uint8_t s2[], unsigned s2_off, unsigned length)
{
/* Handle length zero */
if (length == 0)
return EQUAL;
/* Makes sure the offsets are less than 8 bits */
s1 += s1_off / 8;
s1_off %= 8;
s2 += s2_off / 8;
s2_off %= 8;
/* Make sure s2 is the sequence with the shorter offset */
if (s1_off >= s2_off)
return do_compare(s1, s1_off, s2, s2_off, length);
else
return do_compare(s2, s2_off, s1, s1_off, length);
}
To do the comparison in your example, you'd call:
compare_bit_sequence(bitarray_1, 13, bitarray_2, 5, 35)
(Note that I am numbering the bits from zero, and assuming that the bitarrays are laid out little-endian, so this will start the comparison from the sixth-least-significant bit in bitarray2[0], and the sixth-least-signifcant bit in bitarray1[1]).
What about writing the function that will calculate the offsets from both arrays, apply the mask, shift the bits and store the result to the int so you may compare them. If the bits count (34 in your example) exceeds the length of the int - recurse or loop.
Sorry, the example will be pain in the ass.
Here is my unoptimized bit sequence comparison function:
#include <stdio.h>
#include <stdint.h>
// 01234567 01234567
uint8_t bitsA[] = { 0b01000000, 0b00010000 };
uint8_t bitsB[] = { 0b10000000, 0b00100000 };
int bit( uint8_t *bits, size_t bitpoz, size_t len ){
return (bitpoz<len)? !!(bits[bitpoz/8]&(1<<(7-bitpoz%8))): 0;
}
int bitcmp( uint8_t *bitsA, size_t firstA, size_t lenA,
uint8_t *bitsB, size_t firstB, size_t lenB ){
int cmp;
for( size_t i=0; i<lenA || i<lenB; i++ ){
if( (cmp = bit(bitsA,firstA+i,firstA+lenA) -
bit(bitsB,firstB+i,firstB+lenB)) ) return cmp;
}
return 0;
}
int main(){
printf( "cmp: %i\n", bitcmp( bitsA,1,11, bitsB,0,11 ) );
}
EDIT: Here is my (untested) bitstring equality test function:
#include <stdlib.h>
#include <stdint.h>
#define load_64bit(bits,first) (*(uint64_t*)bits<<first | *(bits+8)>>(8-first))
#define load_32bit(bits,first) (*(uint32_t*)bits<<first | *(bits+4)>>(8-first))
#define load_16bit(bits,first) (*(uint16_t*)bits<<first | *(bits+2)>>(8-first))
#define load_8bit( bits,first) ( *bits<<first | *(bits+1)>>(8-first))
static inline uint8_t last_bits( uint8_t *bits, size_t first, size_t size ){
return (first+size>8?load_8bit(bits,first):*bits<<first)>>(8-size);
}
int biteq( uint8_t *bitsA, size_t firstA,
uint8_t *bitsB, size_t firstB, size_t size ){
if( !size ) return 1;
bitsA+=firstA/8; firstA%=8;
bitsB+=firstB/8; firstB%=8;
for(; size>64;size-=64,bitsA+=8,bitsB+=8)
if(load_64bit(bitsA,firstA)!=load_64bit(bitsB,firstB)) return 0;
for(; size>32;size-=32,bitsA+=4,bitsB+=4)
if(load_32bit(bitsA,firstA)!=load_32bit(bitsB,firstB)) return 0;
for(; size>16;size-=16,bitsA+=2,bitsB+=2)
if(load_16bit(bitsA,firstA)!=load_16bit(bitsB,firstB)) return 0;
for(; size> 8;size-= 8,bitsA++, bitsB++ )
if(load_8bit( bitsA,firstA)!=load_8bit( bitsB,firstB)) return 0;
return !size ||
last_bits(bitsA,firstA,size)==last_bits(bitsB,firstB,size);
}
I made a simple measurement tool to see how fast is it:
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#define SIZE 1000000
uint8_t bitsC[SIZE];
volatile int end_loop;
void sigalrm_hnd( int sig ){ (void)sig; end_loop=1; }
int main(){
uint64_t loop_count; int cmp;
signal(SIGALRM,sigalrm_hnd);
loop_count=0; end_loop=0; alarm(10);
while( !end_loop ){
for( int i=1; i<7; i++ ){
loop_count++;
cmp = biteq( bitsC,i, bitsC,7-i,(SIZE-1)*8 );
if( !cmp ){ printf( "cmp: %i (==0)\n", cmp ); return -1; }
}
}
printf( "biteq: %.2f round/sec\n", loop_count/10.0 );
}
Result:
bitcmp: 8.40 round/sec
biteq: 363.60 round/sec
EDIT2: last_bits() changed.

Resources