How can I use Bit-Fields to save memory?

How can I use Bit-Fields to save memory? - c

This is about ANSI-C (C90). This is what I know:
I can directly tell the compiler how many bits I want for a specific variable.
If I want 1 bit which can have the values zero or one.
or 2 bits for the values 0,1,2,3, and so on...;
I'm familiar with the syntax.
I have problem concerning bitfields:
I want to define a SET structure.
It can have maximum 1024 elements (it can have less, but the maximum is 1024 elements).
The domain of the set is from 1 to 1024. So an element could have any value 1-1024.
I'm trying to create a structure for a SET, and it must be efficient as possible for the memory part.
I tried:
typedef struct set
{
unsigned int var: 1;
} SET;
//now define an array of SETS
SET array_of_sets[MAX_SIZE] //didn't define MAX_SIZE, but no more than 1024 elements in each set.
I know this isn't efficient; maybe it's even not good for what I want. That's why I'm looking for help.

As noted in extensive comments, using a bit field is not the way to go. You can use just 128 bytes of storage for your set containing values 1..1024. You will need to map the value N to bit N-1 (so you have bits 0..1023 to work with). You also need to decide on the operations you need for your set. This code supports 'create', 'destroy', 'insert', 'delete' and 'in_set'. It does not support iteration over the elements in the set; that can be added if you want it.
sets.h
#ifndef SETS_H_INCLUDED
#define SETS_H_INCLUDED
typedef struct Set Set;
enum { MAX_ELEMENTS = 1024 };
extern Set *create(void);
extern void destroy(Set *set);
extern void insert(Set *set, int value);
extern void delete(Set *set, int value);
extern int in_set(Set *set, int value);
#endif /* SETS_H_INCLUDED */
sets.c
#include "sets.h"
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
typedef unsigned long Bits;
#define BITS_C(n) ((Bits)(n))
enum { ARRAY_SIZE = MAX_ELEMENTS / (sizeof(Bits) * CHAR_BIT) };
struct Set
{
Bits set[ARRAY_SIZE];
};
Set *create(void)
{
Set *set = malloc(sizeof(*set));
if (set != 0)
memset(set, 0, sizeof(*set));
return set;
}
void destroy(Set *set)
{
free(set);
}
void insert(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("I: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
set->set[index] |= mask;
}
void delete(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("D: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
set->set[index] &= ~mask;
}
/* C90 does not support <stdbool.h> */
int in_set(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("T: %d (%d:%d:0x%.2lX) = %d\n", value+1, index, bitnum, mask,
(set->set[index] & mask) != 0); */
return (set->set[index] & mask) != 0;
}
#include <stdio.h>
enum { NUMBERS_PER_LINE = 15 };
int main(void)
{
Set *set = create();
if (set != 0)
{
int i;
int n = 0;
for (i = 1; i <= MAX_ELEMENTS; i += 4)
insert(set, i);
for (i = 3; i <= MAX_ELEMENTS; i += 6)
delete(set, i);
for (i = 1; i <= MAX_ELEMENTS; i++)
{
if (in_set(set, i))
{
printf(" %4d", i);
if (++n % NUMBERS_PER_LINE == 0)
{
putchar('\n');
n = 0;
}
}
}
if (n % NUMBERS_PER_LINE != 0)
putchar('\n');
destroy(set);
}
return 0;
}
The functions should really be given a systematic prefix, such as set_. The BITS_C macro is based on the INT64_C macro (and the other related macros) defined in <stdint.h> in C99 and later, which is also not a part of C90.

As per my previous comments, here is an example of how you can pack eight 1-bit elements into one char physical element.
I have only implemented the function to get the value of a 1-bit element, I leave the function to set it to you (it's easy to do).
Note: you can easily change the type of the array element (unsigned char) and experiment with types which can hold more bits (e.g unsigned int) and test if they perform better in terms of speed.
You can also modify the code to make it handle elements bigger than one bit.
#include <stdio.h>
#include <limits.h>
unsigned int get_el(unsigned char* array, unsigned int index)
{
unsigned int bits_per_arr_el = sizeof(unsigned char)*CHAR_BIT;
unsigned int arr_index = index / bits_per_arr_el;
unsigned int bit_offset = index % bits_per_arr_el;
unsigned int bitmask = 1 << bit_offset;
unsigned int retval;
// printf("index=%u\n", index);
// printf("bits_per_arr_el=%u\n", bits_per_arr_el);
// printf("arr_index=%u\n", arr_index);
// printf("bit_offset=%u\n", bit_offset);
retval = array[arr_index] & bitmask ? 1 : 0; // can be simpler if only True/False is needed
return(retval);
}
#define MAX_SIZE 10
unsigned char bitarray[MAX_SIZE];
int main()
{
bitarray[1] = 3; // 00000011
printf("array[7]=%u, array[8]=%u, array[9]=%u, array[10]=%u\n",
get_el(bitarray, 7),
get_el(bitarray, 8),
get_el(bitarray, 9),
get_el(bitarray,10));
return 0;
}
outputs
array[7]=0, array[8]=1, array[9]=1, array[10]=0

typedef struct set
{
unsigned short var:10; // uint var:1 will be padded to 32 bits
} SET; // ushort var:10 (which is max<=1024) padded to 16 bits
As was commented by #Jonathan Leffler use array(unsigned short[])
and define bitmasks
#define bitZer 0x00 //(unsigned)(0 == 0)? true:true;
#define bitOne 0x10 // so from (both inclusive)0-1023 = 1024
... // added for clarification
#define bitTen 0x0A
to look into the bits of each element.
http://www.catb.org/esr/structure-packing/ detailed

To store a value from 0 to 1023 (or from 1 to 1024, which is essentially the same and only involves adding/subtracting 1) you need a minimum of 10 bits.
This means that for 32-bit (unsigned) integers, you can pack 3 values into 30 bits, which gives 2 bits of useless padding.
Example:
%define ELEMENTS 100
uint32_t myArray[ (ELEMENTS + 2) / 3 ];
void setValue(int n, int value) {
uint32_t temp;
uint32_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return;
value--; // Convert "1 to 1024" into "0 to 1023"
temp = myArray[n / 3];
mask = mask << (n % 3)*10;
temp = (temp & ~mask) | (value << (n % 3)*10);
myArray[n / 3] = temp;
}
int getValue(int n) {
uint32_t temp;
uint32_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return 0;
temp = myArray[n / 3];
temp >>= (n % 3)*10;
return (temp & ~mask) + 1;
}
You can do this with bitfields instead, but the code to get/set individual values will end up using branches (e.g. switch( n%3 )) which will be slower in practice.
Removing those 2 bits of padding will cost a little more complexity and a little more overhead. For example:
%define ELEMENTS 100
uint32_t myArray[ (ELEMENTS*10 + 31) / 32 ];
int getValue(int n) {
uint64_t temp;
uint64_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return 0;
temp = myArray[n*10/32 + 1];
temp = (temp << 32) | myArray[n*10/32];
temp >>= (n*10 % 32);
return (temp & ~mask) + 1;
}
This can't be done with bitfields. This is the most space efficient way to store an array of values that range from 1 to 1024.

If you are storing an "array of booleans" or setting flags, it can be useful. For instance, you can initialize or compare up to 64 values at a time.
These macros will work for unsigned char, short, int, long long ... but simplifies significantly if you just pick a type (so you can use a safer static inline function)
#define getbit(x,n) x[n/(sizeof(*x)*8)] & (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define setbit(x,n) x[n/(sizeof(*x)*8)] |= (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define flpbit(x,n) x[n/(sizeof(*x)*8)] ^= (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define clrbit(x,n) x[n/(sizeof(*x)*8)] &= ~( (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) )
to initialize a large array of booleans all you need to do is: char cbits[]={0,0xF,0,0xFF};
or for all zeroes char cbits[4]={0};
or an int example: int ibits[]={0xF0F0F0F0,~0};
//1111000011110000111100001111000011111111111111111111111111111111
If you will only be accessing 1 type of array, it may be better to make the macros into proper functions like:
static inline unsigned char getbit(unsigned char *x, unsigned n){
return x[n>>3] & 1 << (n&7);
}
//etc... similar for other types and functions from macros above
You can also compare multiple flags at a time by '|'ing the flags together and using '&'ed masks; however, it does get a bit more complex when you exceed the native types
For your particular instance you can initialize to all zeroes by:
unsigned char flags[128]={0};
or all 1's by:
uint64_t flags[128] = {~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0};
You can even use enums to name your flags
enum{
WHITE, //0
RED, //1
BLUE, //2
GREEN, //3
...
BLACK //1023
}
if (getbit(flags,WHITE) && getbit(flags,RED) && getbit(flags,BLUE))
printf("red, white and blue\n");

1) The proper solution for this question is to use Bit Array
The question provided the solution with Bit Fields with Struct. There are two typical ways to save memory space for bits related problem, another is to use Bit Array. For this specific case in the question, the better way is to use Bit Array (demoed as follows).
If it is the case like purely independent bit flags here, go
for the Bit Array
If there is a group of relevant bits , such as the IP address or Control Word definition, then it's better to combine them with a struct, that is to use Bit Fields with Sturct
2) Sample code just for demo Bit Array
#include<limits.h>
#define BITS_OF_INT (sizeof(int)*CHAR_BIT)
void SetBit(int A[], int k)
{
//Set the bit at the k-th position
A[k/BITS_OF_INT] |= 1 <<(k%BITS_OF_INT);
}
void ClearBit(int A[], int k)
{
//RESET the bit at the k-th position
A[k/BITS_OF_INT] &= ~(1 <<(k%BITS_OF_INT)) ;
}
int TestBit(int A[], int k)
{
// Return TRUE if bit set
return ((A[k/BITS_OF_INT] & (1 <<(k%BITS_OF_INT)))!= 0) ;
}
#define MAX_SIZE 1024
int main()
{
int A[MAX_SIZE/BITS_OF_INT];
int i;
int pos = 100; // position
for (i = 0; i < MAX_SIZE/BITS_OF_INT; i++)
A[i] = 0;
SetBit(A, pos);
if (TestBit(A, pos)){//do something}
ClearBit(A, pos);
}
3) Furthermore, a worthwhile discussing point from this question is,
How to choose a proper solution between "Bit Array" and "Bit fields with struct"?
Here are some references about this topic.
When to use bit-fields in C?
Readable and Maintainable Bitfields in C

Related

Bitshift on structures

I'm unsure if this is possible due to structure padding and alignment but, assuming you take care of that by aligning your structures to 4/8 bytes, is it possible to bit shift on a structure as if it was a single variable?
What I'd like to do is take a string (max 8 bytes) and shift it into the high order bits of a 64-bit variable.
Like if I do this:
#include <stdint.h>
#include <string.h>
void shiftstr(uint64_t* t,char* c,size_t len){
memcpy(t, c, len);
//now *t==0x000000617369616b
*t<<=(sizeof(uint64_t)-len)*8;
//now *t==0x617369616b000000
}
int main(){
uint64_t k = 0;
char n[] = "kaisa";
shiftstr(&k, n,strlen(n));
return 0;
}
This works just fine, but what if I had, instead of a uint64_t, two uint32_t, either as individual variables or a structure.
#include <stdint.h>
#include <string.h>
struct U64{
uint32_t x;
uint32_t y;
};
void shiftstrstruct(struct U64* t, char* c, size_t len){
memcpy(t, c, len);
/*
At this point I think
x == 0x7369616b
y == 0x00000061
But I could be wrong
*/
//but how can I perform the bit shift?
//Where
//x==0x0000006b
//y==0x61697361
}
int main(){
char n[] = "kaisa";
struct U64 m = {0};
shiftstrstruct(&m, n, strlen(n));
return 0;
}
Up to the memcpy part, it should be the same as if I were performing it on a single variable. I believe the values of x and y are correct in such situations. But, if that's the case that means the values need to be shifted away from x towards y.
I know I can cast but what if I wanted to deal with a 16 byte string that needed to be shifted into two 64 bit variables, or even larger?
Is shifting structures like this possible? Is there a better alternative?

Is shifting structures like this possible?
No, not really. Even if the x and y members are in adjacent memory locations, bit-shift operations on either are performed as integer operations on the individual variables. So, you can't shift bits "out of" one and "into" the other: bits that "fall off" during the shift will be lost.
Is there a better alternative?
You would have to implement such a multi-component bit-shift yourself – making copies of the bits that would otherwise be lost and somehow masking those back into the result, after shifting other bits internally to each 'component' variable. Exactly how to do this would largely depend on the use case.
Here's one possible implementation of a right-shift function for a structure comprising two uint64_t members (I have not added any error-checking for the count, and I assume that uint64_t is exactly 64 bits wide):
#include <stdio.h>
#include <stdint.h>
typedef struct {
uint64_t hi;
uint64_t lo;
} ui128;
void Rshift(ui128* data, int count)
{
uint64_t mask = (1uLL << count) - 1; // Set low "count" bits to 1
uint64_t save = data->hi & mask; // Save bits that fall off hi
data->hi >>= count; // Shift the hi component
data->lo >>= count; // Shift the lo component
data->lo |= save << (64 - count); // Mask in the bits from hi
return;
}
int main()
{
ui128 test = { 0xF001F002F003F004, 0xF005F006F007F008 };
printf("%016llx%016llx\n", test.hi, test.lo);
Rshift(&test, 16);
printf("%016llx%016llx\n", test.hi, test.lo);
return 0;
}
A similar logic could be used for a left-shift function, but you would then need to save the relevant upper (most significant) bits from the lo member and mask them into the shifted hi value:
void Lshift(ui128* data, int count)
{
uint64_t mask = ((1uLL << count) - 1) << (64 - count);
uint64_t save = data->lo & mask;
data->hi <<= count;
data->lo <<= count;
data->hi |= save >> (64 - count);
return;
}

union is your friend, this is what you want:
#include <stdint.h>
#include <stdio.h>
typedef union _shift_u64{
struct _u64{
uint32_t x;
uint32_t y;
} __attribute__((__packed__)) U64;
uint64_t x_and_y;
} SHIFT_U64;
int main(int argc, char* argv[]){
SHIFT_U64 test;
test.U64.x = 4;
test.U64.y = 8;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
test.x_and_y<<=1;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
test.x_and_y>>=1;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
return 0;
}
EDIT: This simple program illustrates how to do it the other way, but you have to check for the carry over bit and shift overflow and shift underflow by yourself. union doesn't care about the data, you just have to make sure that the data makes sense. After compiling, redirect the output of the program to a file or hex-editor and read the errorlevel of the program.
Linux example: ./a.out > a.out.bin; echo "errorlevel=$?"; xxd a.out.bin
#include <stdio.h>
typedef union _shift_it{
struct _data{
unsigned long x : 64;
unsigned long y : 64;
} __attribute__((__packed__)) DATA;
unsigned char x_and_y[16];
} __attribute__((__packed__)) SHIFT_IT;
int main(int argc, char* argv[]){
SHIFT_IT test;
int errorlevel = 0;
//bitmask for shift operation
static const unsigned long LEFT_SHIFTMASK64 = 0x8000000000000000;
static const unsigned long RIGHT_SHIFTMASK64 = 0x0000000000000001;
//test data
test.DATA.x = 0x2468246824682468; //high bits
test.DATA.y = 0x1357135713571357; //low bits
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//left shift
if(test.DATA.x & LEFT_SHIFTMASK64) errorlevel += 1;
test.DATA.x <<= 1;
if(test.DATA.y & LEFT_SHIFTMASK64) errorlevel += 2;
test.DATA.y <<= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//right shift
if(test.DATA.y & RIGHT_SHIFTMASK64) errorlevel += 4;
test.DATA.y >>= 1;
if(test.DATA.x & RIGHT_SHIFTMASK64) errorlevel += 8;
test.DATA.x >>= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//right shift
if(test.DATA.y & RIGHT_SHIFTMASK64) errorlevel += 16;
test.DATA.y >>= 1;
if(test.DATA.x & RIGHT_SHIFTMASK64) errorlevel += 32;
test.DATA.x >>= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//left shift
if(test.DATA.x & LEFT_SHIFTMASK64) errorlevel += 64;
test.DATA.x <<= 1;
if(test.DATA.y & LEFT_SHIFTMASK64) errorlevel += 128;
test.DATA.y <<= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
return errorlevel;
}

Re-Indexing Bits Within a Char

I have an exercise where I have to encode and decode strings at the bit level that are given in by the command line.
The caveat for this is that I have to use a permutation mapping to re-order the bits.
Here's an Example:
The User Inputs The Character To Encode
H
The Binary for H is
01001000
However, that is the regular mapping of the 8 bits, through 0-7.
My program will have to permute the bits to whatever Mapping Patter I use.
For Example, If I use Mapping 64752031
The Bits for the Char 'H'
01001000
Turn To
01000001
When encoding the char, the 0th bit turns to the 6th bit, the 2nd bit turns to the 4th bit, the 3rd bit turns to the 7th bit, and so on. Whatever is based on for that mapping.
Is there a way that I can manipulate and change the order of bits based on the permutation map given?
Thank you.

If you need to process large strings, it is probably better to use a look-up table that will precompute the translation.
#include <stdio.h>
unsigned char perm[256]; // permutation table
unsigned mapping[8]={6,4,7,5,2,0,3,1};
// assumes 7 6 5 4 3 2 1 0
// => 6 4 7 5 2 0 3 1
void mkperm(unsigned char perm[256]) {
for (int i=0; i<256; i++)
perm[i]=0;
for (int i=0;i<256;i++) {
for (int j=7; j>=0; j--) {
int pos=mapping[7-j]; // at mapping[0] is the new position of bit 7
if (i & (1<<j)) // only considers set bits, the table is previously cleared
perm[i] |= (1<<pos) ;
}
}
}
int main() {
mkperm(perm);
printf("%.2x => %.2x\n",'H',perm['H']);
}
mkperm() computes the permutation table by scanning the successive bits of every char. If a bit is set in char i, we set at position i in the translation table a bit at one at a logical weight given by the mapping. Setting this one is done by oring the content of cell i with a 1 properly shifted.

Use bitwise operators.
Here's an example of how to move the second bit to the seventh bit:
x |= (x & 1<<1) << 6;
x &= ~(1<<1);
If my bit numbering bothers anybody, I'm sorry. This is just how I read binary numbers.
You can also put this into an inline function:
inline int bit_mode(int *x, int bit1, int bit2)
{
*x |= *x & (1<<(bit1-1)) << (bit2-1);
*x &= ~(1<<(bit1-1));
return *x;
}
int a;
bit_mode(&a, 2, 7);

Just shift the bits to proper positions. After some fun, I think I've got this:
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <stdint.h>
/**
* A little helper function
* get the bit number 'as' from the byte 'in'
* and put that bit as the number 'num' in the output
*/
static inline
uint8_t map_get_bit_as(uint8_t in,
uint8_t num, uint8_t as)
{
return (!!(in & (1 << as))) << num;
}
uint8_t map(unsigned long mapping, uint8_t in)
{
// static_assert(CHAR_BIT == 8, "are you insane?");
const int bit0 = mapping / 10000000 % 10;
const int bit1 = mapping / 1000000 % 10;
const int bit2 = mapping / 100000 % 10;
const int bit3 = mapping / 10000 % 10;
const int bit4 = mapping / 1000 % 10;
const int bit5 = mapping / 100 % 10;
const int bit6 = mapping / 10 % 10;
const int bit7 = mapping / 1 % 10;
return
map_get_bit_as(in, 0, bit0) |
map_get_bit_as(in, 1, bit1) |
map_get_bit_as(in, 2, bit2) |
map_get_bit_as(in, 3, bit3) |
map_get_bit_as(in, 4, bit4) |
map_get_bit_as(in, 5, bit5) |
map_get_bit_as(in, 6, bit6) |
map_get_bit_as(in, 7, bit7);
}
int main() {
printf("%#02x %#02x\n\n", 'H', map(64752031, 'H'));
}
will output:
0x48 0x41
tested on repl.

If I have correctly understood the order of bits as you are counting them then the corresponding function can look the following way as it is shown in the demonstrative program.
#include <stdio.h>
#include <limits.h>
#include <stdint.h>
char encode( char c, uint32_t mask )
{
unsigned char result = '\0';
for ( size_t i = 0; i < 2 * sizeof( mask ) ; i++ )
{
uint32_t bit = ( ( ( uint32_t )1 << ( CHAR_BIT - 1 - ( mask & 0xf ) ) ) & c ) != 0;
result |= bit << i;
mask >>= 4;
}
return ( char )result;
}
int main( void )
{
uint32_t mask = 0x64752031;
char c = 'H';
printf( "c = %hhx\n", c );
c = encode( c, mask );
printf( "c = %hhx\n", c );
}
The program output is
c = 48
c = 41

custom data type in c: 4x14bit + 1x8bit within a 64 bit 'container'

i am currently trying to figure out an elegant and convinient way to store 4 14-bit values and 1 8-bit value within a 64 bit boundary.
something like this:
typedef struct my64bit{
unsigned data1 : 14;
unsigned data2 : 14;
unsigned data3 : 14;
unsigned data4 : 14;
unsigned other : 8;
}tmy64Bit;
later I wan't to create an array of these 'containers'
tmy64Bit myArray[1000];
so that i have a pointer "myArray" wich points to 1000x64-bits of memory
this array is send via tcp to an embedded-linux SOCFPGA system where it should be copied (with correction of endianess and network byte order) into a specific memory (directly accessible from the fpga)
my problem is that the upper code doesn't create a 64-bit type
sizeof(tmy64Bit)
returns 12, so 12 bytes are allocated instead of 8
filling the struct with data and watching the memory (on my 64 bit linux system) returns this
tmy64Bit test;
memset(&test,0,sizeof(tmy64Bit));
test.data1 = 0x3fff;
...
test.other = 0xAA;
Memory View:
after d1 written = 0xFF3F0000 00000000 00000000
after d2 written = 0xFFFFFF0F 00000000 00000000
after d3 written = 0xFFFFFF0F FF3F0000 00000000
after d4 written = 0xFFFFFF0F FFFFFF0F 00000000
after o written = 0xFFFFFF0F FFFFFF0F AA000000
so the first 2 14 bit variables are stored correctly but then padding fills up the last half-byte and at the end the last byte needs to be stored in a new 64 bit cell
an other aproach would be
typedef struct my2nd64Bit{
uint8_t data[7];
uint8_t other;
}tmy2nd64Bit;
where a
sizeof(tmy2nd64Bit)
returns an 8 (which was expected)
This generates correctly padded structure, but storing the 14 bit always involves a lot of bitshifting and masking

Avoid bit-fields, they are so poorly defined by the C standard that they can barely be used in practice. Your bit-field struct code contains something around 5 to 10 different forms of poorly-specified behavior. C standard bit-fields is a dangerous and superfluous feature, simple as that.
Instead, simply use a raw array of binary values, something like this:
typedef union {
uint8_t array [sizeof(uint64_t)];
uint64_t val64;
}tmy64Bit;
(Note that the uint64_t in the union will be endianess-dependent)
The de facto way to set and clear bits in such a raw array is:
void set_bit (tmy64Bit* x, size_t bit)
{
x->array [bit / 8] |= 1 << (bit % 8);
}
void clear_bit (tmy64Bit* x, size_t bit)
{
x->array [bit / 8] &= ~(1 << (bit % 8));
}
Or if you will, a more readable version (equivalent):
void set_bit (tmy64Bit* x, size_t bit)
{
uint8_t byte_index = bit / 8;
uint8_t bit_index = bit % 8;
x->array[byte_index] |= 1 << bit_index;
}

This is what you want :
typedef struct my64bit{
uint64_t data1 : 14;
uint64_t data2 : 14;
uint64_t data3 : 14;
uint64_t data4 : 14;
uint64_t other : 8;
}tmy64Bit;
unsigned means unsigned int, and this type is 32-bit on most systems. This will cause padding because the individual fields won't be allowed to cross 32-bit boundaries. Using a 64-bit member type won't add padding for this case (you don't cross any 64-bit boundary).
As for any question about bit-fields, you need to remember that most of the bit-field mechanics are implementation defined, which means that if you want to use that, you should check that you actually get what you want. Also, if you plan to use another compiler, check that the behavior is the same (usually it is, but maybe not on exotic platforms). If you properly check, this is safe to use (not undefined behavior), but you might want to use a more portable way, using bit operations for example.

I agree with Lundin's answer, but for this particular situation, my implementation would be a bit different (no pun).
First, I would decide how to pack the fields into each 64-bit word. For example:
Bits Description
0-13 data[0]
14-27 data[1]
28-41 data[2]
42-55 data[3]
56-64 other
Second, I would use a dynamically allocated structure with the FPGA data in a C99 flexible array member, to describe the target device:
typedef struct {
/* Other FPGA-related fields, maybe
* a struct sockaddr_in or _in6
* to identify the FPGA */
size_t words;
uint64_t word[];
} fpga;
fpga *fpga_create(const size_t words)
{
fpga *f;
f = malloc(sizeof (fpga) + word * sizeof(f->word[0]));
if (!f)
return NULL;
f->words = words;
memset(f->word, 0, f->words * sizeof (f->word[0]));
return f;
}
Third, I would use static inline accessors to manipulate the data:
static inline unsigned int fpga_get_data(const fpga *f, const int w, const int i)
{
assert(f != NULL);
assert(w >= 0 && (size_t)w < f->words);
assert(i >= 0 && i < 4);
return (f->word[(size_t)w] >> (i * 14)) & 0x3FFFU;
}
static inline unsigned int fpga_get_other(const fpga *f, const int w)
{
assert(f != NULL);
assert(w >= 0 && (size_t)w < f->words);
return (f->word[(size_t)w] >> 56) & 0xFFU;
}
static inline void fpga_set_data(const fpga *f, const int w, const int i,
const unsigned int value)
{
assert(f != NULL);
assert(w >= 0 && (size_t)w < f->words);
assert(i >= 0 && i < 4);
f->word[(size_t)w] = (f->word[(size_t)w] & (~(0x3FFFU << (i*14))))
| ((value & 0x3FFFU) << (i*14));
}
static inline void fpga_set_other(const fpga *f, const int w, const unsigned int value)
{
assert(f != NULL);
assert(w >= 0 && (size_t)w < f->words);
f->word[(size_t)w] = (f->word[(size_t)w] & (uint64_t)0x00FFFFFFFFFFFFFFULL)
| ((value & 0xFFU) << 56);
}
Above, w is the index to the word, and i is the index of the data entry (0 to 3). If you want a continuous data array, you could use
static inline unsigned int fpga_get_data(const fpga *f, const int i)
{
assert(f != NULL);
assert(i >= 0 && (size_t)i < 4 * f->words);
return (f->word[(size_t)i / 4] >> ((i & 3) * 14)) & 0x3FFFU;
}
if (f != NULL &&
i >= 0 && i < (size_t)4 * f->words)
return (f->word[(size_t)i / 4] >> ((i & 3) * 14)) & 0x3FFFU;
else
return 0U; /* Or abort with an error */
}
static inline void fpga_set_data(const fpga *f, const int i, const unsigned int value)
{
assert(f != NULL);
assert(i >= 0 && (size_t)i / 4 < f->words);
f->word[(size_t)i / 4] = (f->word[(size_t)i / 4] & (~(0x3FFFU << ((i & 3) * 14))))
| ((value & 0x3FFFU) << ((i & 3) * 14));
}
The accessors should be defined in the header file that defines the structure.
Note that if other is actually some sort of checksum for the four data fields, I would calculate them just before sending.

Is there a more optimal way to approach some of these functions?

I completed some bit manipulation exercises out of a textbook recently and have grasped onto some of the core ideas behind manipulating bits firmly. My main concern with making this post is for optimizations to my current code. I get the hunch that there are some functions that I could approach better. Do you have any recommendations for the following code?
#include <stdio.h>
#include "funcs.h"
// basically sizeof(int) using bit manipulation
unsigned int int_size(){
int size = 0;
for(unsigned int i = ~00u; i > 0; i >>= 1, size++);
return size;
}
// get a bit at a specific nth index
// index starts with 0 on the most significant bit
unsigned int bit_get(unsigned int data, unsigned int n){
return (data >> (int_size() - n - 1)) & 1;
}
// set a bit at a specific nth index
// index starts with 0 on the most significant bit
unsigned int bit_set(unsigned int data, unsigned int n){
return data | (1 << (int_size() - n - 1));
}
// gets the bit width of the data (<32)
unsigned int bit_width(unsigned int data){
int width = int_size();
for(; width > 0; width--)
if((data & (1 << width)) != 0)
break;
return width + 1;
}
// print the data contained in an unsigned int
void print_data(unsigned int data){
printf("%016X = ",data);
for(int i = 0; i < int_size(); i++)
printf("%X",bit_get(data,i));
putchar('\n');
}
// search for pattern in source (where pattern is n wide)
unsigned int bitpat_search(unsigned int source, unsigned int pattern,
unsigned int n){
int right = int_size() - n;
unsigned int mask = 0;
for(int i = 0; i < n; i++)
mask |= 1 << i;
for(int i = 0; i < right; i++)
if(((source & (mask << (right - i))) >> (right - i) ^ pattern) == 0)
return i - bit_width(source);
return -1;
}
// extract {count} bits from data starting at {start}
unsigned int bitpat_get(unsigned int data, int start, int count){
if(start < 0 || count < 0 || int_size() <= start || int_size() <= count || bit_width(data) != count)
return -1;
unsigned int mask = 1;
for(int i = 0; i < count; i++)
mask |= 1 << i;
mask <<= int_size() - start - count;
return (data & mask) >> (int_size() - start - count);
}
// set {count} bits (basically width of {replace}) in {*data} starting at {start}
void bitpat_set(unsigned int *data, unsigned int replace, int start, int count){
if(start < 0 || count < 0 || int_size() <= start || int_size() <= count || bit_width(replace) != count)
return;
unsigned int mask = 1;
for(int i = 0; i < count; i++)
mask |= 1 << i;
*data = ((*data | (mask << (int_size() - start - count))) & ~(mask << (int_size() - start - count))) | (replace << (int_size() - start - count));
}

because your int_size() function returns the same value each time you could save some time there:
unsigned int int_size(){
static unsigned int size = 0;
if (size == 0)
for(unsigned int i = ~00u; i > 0; i >>= 1, size++);
return size;
}
so it will calculate the value only once.
But replacing all calls of this function by sizeof(int)*8 would be much better.

I looked through your code and there's nothing that jumps out at me.
Overall, don't sweat the small stuff. If the code runs and works fine, no worries. If you are really concerned about performance, go ahead and run your code through a profiler.
Overall, I will say that the one thing you might be dealing with is the "paranoia" I see in your code regarding the width of an int. I generally use the fixed-length types in stdint.h and give the caller some options regarding what length of ints (i.e. uint8_t, uint16_t, uint32_t, etc.) they want to deal with.
Also, in C99, there are bitfields, which allow for each bit to be addressed into.

unsigned int int_size(){
return __builtin_popcount((unsigned int) -1) / __builtin_popcount((unsigned char) -1);
}
This should be faster than looping.

Including int_size() in all the others seems like its going to kill performance unless the compiler is really good at optimizing that loop out.
You could use a uint32_t instead of an int and then you would know up front the size.
You could also use sizeof(int) to get the size in bytes of an int and multiply by 8. I haven't seen an environment that defined a byte to be other than 8 bits, but the standard does seem to allow for it in saying it is implementation defined.

implement the bit map in the following situation

My question is how to implement the bit map in the following situation?
If a vertex of a graph is in a minimum spanning tree (MST), mark the corresponding bit; later on check whether it is in the MST by checking the bit.
At beginning, I was thinking of using
typedef struct bit_t{
char bit0:1;
} bit;
bit bitmap[num_of_vertex];
And use bitmap array to record the bit;
But then I found the sizeof (bitmap[num_of_vertex]) is num_of_vertex byte, not num_of_vertex/8 byte. so it is not saving space like what I thought;
So far I use
long bit_record = 0;
...
bit_record |= 1<< u;//set vertex as in MST
...
then later on check whether vertex is in MST using:
static bool is_in_MST(int v, int bit_record){
int mask = 1 << v;
if (mask & bit_record)
return true;
else
return false;
}
Though the code works, it will not work if num_of_vertex is larger than 32.
How in general a bitmap in the above situation is implemented?

The situation is that you just can't have 1-bit types in C. The smallest addressable unit is a byte in C, so even if you declare a struct with a one-bit bitfield, it will be padded to a byte (at least). What you can do is create an array of bytes, then access the bits in the array using division and modulus.
unsigned char bitmap[0x100] = { 0 };
void set_nth_bit(unsigned char *bitmap, int idx)
{
bitmap[idx / CHAR_BIT] |= 1 << (idx % CHAR_BIT);
}
void clear_nth_bit(unsigned char *bitmap, int idx)
{
bitmap[idx / CHAR_BIT] &= ~(1 << (idx % CHAR_BIT));
}
int get_nth_bit(unsigned char *bitmap, int idx)
{
return (bitmap[idx / CHAR_BIT] >> (idx % CHAR_BIT)) & 1;
}

about bitmap,here is an example on Programming Pearls, and I added up some notes:
#define BITPERWORD 32 //bits of int，which depends on your computer
#define N 10000000 // number of your elements
#define SHIFT 5 // 32 = 2^5
#define MASK 0x1F // 11111 in binary
int a[N/BITPERWORD + 1]; //space for your bitmap
// i is the the bit you want to use
void set(int i) { a[i>>SHIFT] |= (1<<(i&MASK));}
void clr(int i) { a[i>>SHIFT] &= ~(1<<(i&MASK));}
int test(int i) { return a[i>>SHIFT] & (1<<(i&MASK));}
and don't forget the initialization at the beginning:
for(i=0;i<N;i++)
clr(i);

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

How can I use Bit-Fields to save memory? - c

Related

Bitshift on structures

Re-Indexing Bits Within a Char

custom data type in c: 4x14bit + 1x8bit within a 64 bit 'container'

Is there a more optimal way to approach some of these functions?

implement the bit map in the following situation

Categories

Resources