Bitshift on structures - c

I'm unsure if this is possible due to structure padding and alignment but, assuming you take care of that by aligning your structures to 4/8 bytes, is it possible to bit shift on a structure as if it was a single variable?
What I'd like to do is take a string (max 8 bytes) and shift it into the high order bits of a 64-bit variable.
Like if I do this:
#include <stdint.h>
#include <string.h>
void shiftstr(uint64_t* t,char* c,size_t len){
memcpy(t, c, len);
//now *t==0x000000617369616b
*t<<=(sizeof(uint64_t)-len)*8;
//now *t==0x617369616b000000
}
int main(){
uint64_t k = 0;
char n[] = "kaisa";
shiftstr(&k, n,strlen(n));
return 0;
}
This works just fine, but what if I had, instead of a uint64_t, two uint32_t, either as individual variables or a structure.
#include <stdint.h>
#include <string.h>
struct U64{
uint32_t x;
uint32_t y;
};
void shiftstrstruct(struct U64* t, char* c, size_t len){
memcpy(t, c, len);
/*
At this point I think
x == 0x7369616b
y == 0x00000061
But I could be wrong
*/
//but how can I perform the bit shift?
//Where
//x==0x0000006b
//y==0x61697361
}
int main(){
char n[] = "kaisa";
struct U64 m = {0};
shiftstrstruct(&m, n, strlen(n));
return 0;
}
Up to the memcpy part, it should be the same as if I were performing it on a single variable. I believe the values of x and y are correct in such situations. But, if that's the case that means the values need to be shifted away from x towards y.
I know I can cast but what if I wanted to deal with a 16 byte string that needed to be shifted into two 64 bit variables, or even larger?
Is shifting structures like this possible? Is there a better alternative?

Is shifting structures like this possible?
No, not really. Even if the x and y members are in adjacent memory locations, bit-shift operations on either are performed as integer operations on the individual variables. So, you can't shift bits "out of" one and "into" the other: bits that "fall off" during the shift will be lost.
Is there a better alternative?
You would have to implement such a multi-component bit-shift yourself – making copies of the bits that would otherwise be lost and somehow masking those back into the result, after shifting other bits internally to each 'component' variable. Exactly how to do this would largely depend on the use case.
Here's one possible implementation of a right-shift function for a structure comprising two uint64_t members (I have not added any error-checking for the count, and I assume that uint64_t is exactly 64 bits wide):
#include <stdio.h>
#include <stdint.h>
typedef struct {
uint64_t hi;
uint64_t lo;
} ui128;
void Rshift(ui128* data, int count)
{
uint64_t mask = (1uLL << count) - 1; // Set low "count" bits to 1
uint64_t save = data->hi & mask; // Save bits that fall off hi
data->hi >>= count; // Shift the hi component
data->lo >>= count; // Shift the lo component
data->lo |= save << (64 - count); // Mask in the bits from hi
return;
}
int main()
{
ui128 test = { 0xF001F002F003F004, 0xF005F006F007F008 };
printf("%016llx%016llx\n", test.hi, test.lo);
Rshift(&test, 16);
printf("%016llx%016llx\n", test.hi, test.lo);
return 0;
}
A similar logic could be used for a left-shift function, but you would then need to save the relevant upper (most significant) bits from the lo member and mask them into the shifted hi value:
void Lshift(ui128* data, int count)
{
uint64_t mask = ((1uLL << count) - 1) << (64 - count);
uint64_t save = data->lo & mask;
data->hi <<= count;
data->lo <<= count;
data->hi |= save >> (64 - count);
return;
}

union is your friend, this is what you want:
#include <stdint.h>
#include <stdio.h>
typedef union _shift_u64{
struct _u64{
uint32_t x;
uint32_t y;
} __attribute__((__packed__)) U64;
uint64_t x_and_y;
} SHIFT_U64;
int main(int argc, char* argv[]){
SHIFT_U64 test;
test.U64.x = 4;
test.U64.y = 8;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
test.x_and_y<<=1;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
test.x_and_y>>=1;
printf("test.U64.x=%d, test.U64.y=%d, test.x_and_y=%ld\n", test.U64.x, test.U64.y, test.x_and_y);
return 0;
}
EDIT: This simple program illustrates how to do it the other way, but you have to check for the carry over bit and shift overflow and shift underflow by yourself. union doesn't care about the data, you just have to make sure that the data makes sense. After compiling, redirect the output of the program to a file or hex-editor and read the errorlevel of the program.
Linux example: ./a.out > a.out.bin; echo "errorlevel=$?"; xxd a.out.bin
#include <stdio.h>
typedef union _shift_it{
struct _data{
unsigned long x : 64;
unsigned long y : 64;
} __attribute__((__packed__)) DATA;
unsigned char x_and_y[16];
} __attribute__((__packed__)) SHIFT_IT;
int main(int argc, char* argv[]){
SHIFT_IT test;
int errorlevel = 0;
//bitmask for shift operation
static const unsigned long LEFT_SHIFTMASK64 = 0x8000000000000000;
static const unsigned long RIGHT_SHIFTMASK64 = 0x0000000000000001;
//test data
test.DATA.x = 0x2468246824682468; //high bits
test.DATA.y = 0x1357135713571357; //low bits
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//left shift
if(test.DATA.x & LEFT_SHIFTMASK64) errorlevel += 1;
test.DATA.x <<= 1;
if(test.DATA.y & LEFT_SHIFTMASK64) errorlevel += 2;
test.DATA.y <<= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//right shift
if(test.DATA.y & RIGHT_SHIFTMASK64) errorlevel += 4;
test.DATA.y >>= 1;
if(test.DATA.x & RIGHT_SHIFTMASK64) errorlevel += 8;
test.DATA.x >>= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//right shift
if(test.DATA.y & RIGHT_SHIFTMASK64) errorlevel += 16;
test.DATA.y >>= 1;
if(test.DATA.x & RIGHT_SHIFTMASK64) errorlevel += 32;
test.DATA.x >>= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
//left shift
if(test.DATA.x & LEFT_SHIFTMASK64) errorlevel += 64;
test.DATA.x <<= 1;
if(test.DATA.y & LEFT_SHIFTMASK64) errorlevel += 128;
test.DATA.y <<= 1;
//binary output to stdout
for(int i=0; i<16; i++) putchar(test.x_and_y[i]);
return errorlevel;
}

Related

uint8 data to uint32 bits in EMBEDDED c

I am getting the input data from my microcontroller over uint8 data the are transmitted as 0xff, 0x2a.... the the two bits are subsequntly high and low values.
I need to convert this to uint_32 var where i can use memcpy
example
1C 1D 1E 1F 20 21 22 23
if this are the VALUES that are transferred how i can get them all in a single uint32 variable, with this below approach i can just get the last two bits which is just 23 and not the entire
void Func(const uint8_t * data){
uint32_t msgh =0;
uint32_t msgl=0;
uint32_t datah =0;
uint32_t datal=0;
for(int i= 0; i<dlc;i++){
msgh=*data >> 4;
msgl=*data & 0x0f;
// printf("DATA %x%x",msgh,msgl);
memcpy(&datah, &msgh, 4);
memcpy(&datal, &msgl, 4);
// printf("msgl=%x\n",msgl);
data++;
}
printf("DATA%x%x",datah,datal);
}
No need to memcpying all this stuff – extracting the high and low nibbles of the bytes can be done as follows:
uint32_t high = 0;
uint32_t low = 0;
unsigned int offset = 0;
for(int i = 0; i < 8; ++i)
{
high |= ((uint32_t)(*data) >> 4) << offset;
low |= ((uint32_t)(*data) & 0x0f) << offset;
offset += 4;
++data;
}
Note how you need to shift the offsets such that the nibbles get at their right positions within their respective 32-bit values.
Note, too that this assumes little endian byte order of transferred data (the byte order must be known and fix as part of the protocol definition for communication!).
Big endian order slightly differs:
uint32_t high = 0;
uint32_t low = 0;
unsigned int offset = 32;
for(int i = 0; i < 8; ++i)
{
offset -= 4;
high |= (uint32_t)(*data >> 4) << offset;
low |= (uint32_t)(*data & 0x0f) << offset;
++data;
}
(Well, could have started with 28 as well and subtracted 4 after nibble extraction, analogously to LE variant – this variant reflects my personal preference for constants of powers of two – number of operations doesn't differ anyway…).
Finally note that you might prefer to separate conversion and outputting into separate functions to achieve better reusability, e.g. like
void extract(uint8_t const* data, uint32_t* high, uint32_t* low)
{
// assign to *high and *low analogously to above
}
// assuming C code; if C++, prefer references:
void extract(uint8_t const* data, uint32_t& high, uint32_t& low);
// or alternatively:
typedef struct
{
uint32_t high;
uint32-t low;
} NibblesCombined;
NibblesCombined extract(uint8_t const* data)
{
// assign to the members of the struct analogously to above
}
// again assuming C; C++ could optionally return a std::pair instead!

How can I use Bit-Fields to save memory?

This is about ANSI-C (C90). This is what I know:
I can directly tell the compiler how many bits I want for a specific variable.
If I want 1 bit which can have the values zero or one.
or 2 bits for the values 0,1,2,3, and so on...;
I'm familiar with the syntax.
I have problem concerning bitfields:
I want to define a SET structure.
It can have maximum 1024 elements (it can have less, but the maximum is 1024 elements).
The domain of the set is from 1 to 1024. So an element could have any value 1-1024.
I'm trying to create a structure for a SET, and it must be efficient as possible for the memory part.
I tried:
typedef struct set
{
unsigned int var: 1;
} SET;
//now define an array of SETS
SET array_of_sets[MAX_SIZE] //didn't define MAX_SIZE, but no more than 1024 elements in each set.
I know this isn't efficient; maybe it's even not good for what I want. That's why I'm looking for help.
As noted in extensive comments, using a bit field is not the way to go. You can use just 128 bytes of storage for your set containing values 1..1024. You will need to map the value N to bit N-1 (so you have bits 0..1023 to work with). You also need to decide on the operations you need for your set. This code supports 'create', 'destroy', 'insert', 'delete' and 'in_set'. It does not support iteration over the elements in the set; that can be added if you want it.
sets.h
#ifndef SETS_H_INCLUDED
#define SETS_H_INCLUDED
typedef struct Set Set;
enum { MAX_ELEMENTS = 1024 };
extern Set *create(void);
extern void destroy(Set *set);
extern void insert(Set *set, int value);
extern void delete(Set *set, int value);
extern int in_set(Set *set, int value);
#endif /* SETS_H_INCLUDED */
sets.c
#include "sets.h"
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
typedef unsigned long Bits;
#define BITS_C(n) ((Bits)(n))
enum { ARRAY_SIZE = MAX_ELEMENTS / (sizeof(Bits) * CHAR_BIT) };
struct Set
{
Bits set[ARRAY_SIZE];
};
Set *create(void)
{
Set *set = malloc(sizeof(*set));
if (set != 0)
memset(set, 0, sizeof(*set));
return set;
}
void destroy(Set *set)
{
free(set);
}
void insert(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("I: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
set->set[index] |= mask;
}
void delete(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("D: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
set->set[index] &= ~mask;
}
/* C90 does not support <stdbool.h> */
int in_set(Set *set, int value)
{
assert(value >= 1 && value <= MAX_ELEMENTS);
value--; /* 0..1023 */
int index = value / (sizeof(Bits) * CHAR_BIT);
int bitnum = value % (sizeof(Bits) * CHAR_BIT);
Bits mask = BITS_C(1) << bitnum;
/* printf("T: %d (%d:%d:0x%.2lX) = %d\n", value+1, index, bitnum, mask,
(set->set[index] & mask) != 0); */
return (set->set[index] & mask) != 0;
}
#include <stdio.h>
enum { NUMBERS_PER_LINE = 15 };
int main(void)
{
Set *set = create();
if (set != 0)
{
int i;
int n = 0;
for (i = 1; i <= MAX_ELEMENTS; i += 4)
insert(set, i);
for (i = 3; i <= MAX_ELEMENTS; i += 6)
delete(set, i);
for (i = 1; i <= MAX_ELEMENTS; i++)
{
if (in_set(set, i))
{
printf(" %4d", i);
if (++n % NUMBERS_PER_LINE == 0)
{
putchar('\n');
n = 0;
}
}
}
if (n % NUMBERS_PER_LINE != 0)
putchar('\n');
destroy(set);
}
return 0;
}
The functions should really be given a systematic prefix, such as set_. The BITS_C macro is based on the INT64_C macro (and the other related macros) defined in <stdint.h> in C99 and later, which is also not a part of C90.
As per my previous comments, here is an example of how you can pack eight 1-bit elements into one char physical element.
I have only implemented the function to get the value of a 1-bit element, I leave the function to set it to you (it's easy to do).
Note: you can easily change the type of the array element (unsigned char) and experiment with types which can hold more bits (e.g unsigned int) and test if they perform better in terms of speed.
You can also modify the code to make it handle elements bigger than one bit.
#include <stdio.h>
#include <limits.h>
unsigned int get_el(unsigned char* array, unsigned int index)
{
unsigned int bits_per_arr_el = sizeof(unsigned char)*CHAR_BIT;
unsigned int arr_index = index / bits_per_arr_el;
unsigned int bit_offset = index % bits_per_arr_el;
unsigned int bitmask = 1 << bit_offset;
unsigned int retval;
// printf("index=%u\n", index);
// printf("bits_per_arr_el=%u\n", bits_per_arr_el);
// printf("arr_index=%u\n", arr_index);
// printf("bit_offset=%u\n", bit_offset);
retval = array[arr_index] & bitmask ? 1 : 0; // can be simpler if only True/False is needed
return(retval);
}
#define MAX_SIZE 10
unsigned char bitarray[MAX_SIZE];
int main()
{
bitarray[1] = 3; // 00000011
printf("array[7]=%u, array[8]=%u, array[9]=%u, array[10]=%u\n",
get_el(bitarray, 7),
get_el(bitarray, 8),
get_el(bitarray, 9),
get_el(bitarray,10));
return 0;
}
outputs
array[7]=0, array[8]=1, array[9]=1, array[10]=0
typedef struct set
{
unsigned short var:10; // uint var:1 will be padded to 32 bits
} SET; // ushort var:10 (which is max<=1024) padded to 16 bits
As was commented by #Jonathan Leffler use array(unsigned short[])
and define bitmasks
#define bitZer 0x00 //(unsigned)(0 == 0)? true:true;
#define bitOne 0x10 // so from (both inclusive)0-1023 = 1024
... // added for clarification
#define bitTen 0x0A
to look into the bits of each element.
http://www.catb.org/esr/structure-packing/ detailed
To store a value from 0 to 1023 (or from 1 to 1024, which is essentially the same and only involves adding/subtracting 1) you need a minimum of 10 bits.
This means that for 32-bit (unsigned) integers, you can pack 3 values into 30 bits, which gives 2 bits of useless padding.
Example:
%define ELEMENTS 100
uint32_t myArray[ (ELEMENTS + 2) / 3 ];
void setValue(int n, int value) {
uint32_t temp;
uint32_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return;
value--; // Convert "1 to 1024" into "0 to 1023"
temp = myArray[n / 3];
mask = mask << (n % 3)*10;
temp = (temp & ~mask) | (value << (n % 3)*10);
myArray[n / 3] = temp;
}
int getValue(int n) {
uint32_t temp;
uint32_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return 0;
temp = myArray[n / 3];
temp >>= (n % 3)*10;
return (temp & ~mask) + 1;
}
You can do this with bitfields instead, but the code to get/set individual values will end up using branches (e.g. switch( n%3 )) which will be slower in practice.
Removing those 2 bits of padding will cost a little more complexity and a little more overhead. For example:
%define ELEMENTS 100
uint32_t myArray[ (ELEMENTS*10 + 31) / 32 ];
int getValue(int n) {
uint64_t temp;
uint64_t mask = (1 << 10) - 1;
if(n >= ELEMENTS) return 0;
temp = myArray[n*10/32 + 1];
temp = (temp << 32) | myArray[n*10/32];
temp >>= (n*10 % 32);
return (temp & ~mask) + 1;
}
This can't be done with bitfields. This is the most space efficient way to store an array of values that range from 1 to 1024.
If you are storing an "array of booleans" or setting flags, it can be useful. For instance, you can initialize or compare up to 64 values at a time.
These macros will work for unsigned char, short, int, long long ... but simplifies significantly if you just pick a type (so you can use a safer static inline function)
#define getbit(x,n) x[n/(sizeof(*x)*8)] & (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define setbit(x,n) x[n/(sizeof(*x)*8)] |= (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define flpbit(x,n) x[n/(sizeof(*x)*8)] ^= (typeof(*x))1 << (n&((sizeof(*x)*8)-1))
#define clrbit(x,n) x[n/(sizeof(*x)*8)] &= ~( (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) )
to initialize a large array of booleans all you need to do is: char cbits[]={0,0xF,0,0xFF};
or for all zeroes char cbits[4]={0};
or an int example: int ibits[]={0xF0F0F0F0,~0};
//1111000011110000111100001111000011111111111111111111111111111111
If you will only be accessing 1 type of array, it may be better to make the macros into proper functions like:
static inline unsigned char getbit(unsigned char *x, unsigned n){
return x[n>>3] & 1 << (n&7);
}
//etc... similar for other types and functions from macros above
You can also compare multiple flags at a time by '|'ing the flags together and using '&'ed masks; however, it does get a bit more complex when you exceed the native types
For your particular instance you can initialize to all zeroes by:
unsigned char flags[128]={0};
or all 1's by:
uint64_t flags[128] = {~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0};
You can even use enums to name your flags
enum{
WHITE, //0
RED, //1
BLUE, //2
GREEN, //3
...
BLACK //1023
}
if (getbit(flags,WHITE) && getbit(flags,RED) && getbit(flags,BLUE))
printf("red, white and blue\n");
1) The proper solution for this question is to use Bit Array
The question provided the solution with Bit Fields with Struct. There are two typical ways to save memory space for bits related problem, another is to use Bit Array. For this specific case in the question, the better way is to use Bit Array (demoed as follows).
If it is the case like purely independent bit flags here, go
for the Bit Array
If there is a group of relevant bits , such as the IP address or Control Word definition, then it's better to combine them with a struct, that is to use Bit Fields with Sturct
2) Sample code just for demo Bit Array
#include<limits.h>
#define BITS_OF_INT (sizeof(int)*CHAR_BIT)
void SetBit(int A[], int k)
{
//Set the bit at the k-th position
A[k/BITS_OF_INT] |= 1 <<(k%BITS_OF_INT);
}
void ClearBit(int A[], int k)
{
//RESET the bit at the k-th position
A[k/BITS_OF_INT] &= ~(1 <<(k%BITS_OF_INT)) ;
}
int TestBit(int A[], int k)
{
// Return TRUE if bit set
return ((A[k/BITS_OF_INT] & (1 <<(k%BITS_OF_INT)))!= 0) ;
}
#define MAX_SIZE 1024
int main()
{
int A[MAX_SIZE/BITS_OF_INT];
int i;
int pos = 100; // position
for (i = 0; i < MAX_SIZE/BITS_OF_INT; i++)
A[i] = 0;
SetBit(A, pos);
if (TestBit(A, pos)){//do something}
ClearBit(A, pos);
}
3) Furthermore, a worthwhile discussing point from this question is,
How to choose a proper solution between "Bit Array" and "Bit fields with struct"?
Here are some references about this topic.
When to use bit-fields in C?
Readable and Maintainable Bitfields in C

Unsigned Int Multiplication in C

So I am trying to implement this algorithm in C for multiplying 32-bit unsigned int in order to understand it better:
Step 1: Test multiplier-0
Step 2: if 1, add multiplicand to left half of product
and place the result in the left half of
the product register
Step 3: shift multiplier right 1 bit
Step 4: shift product register right 1 bit
What I am not getting is how to implement step 2. It says to add multiplicand to left half of product and store in left half of product register. I am confused on how to add only to the left half of the product. How do I go about this?
EDIT:
This is something I came with but it does not give me the right answer and I am not sure what is going wrong. Please help!
long unsigned UnsignedMult(unsigned multiplicand, unsigned multiplier){
unsigned int temp32a, temp32b;
unsigned long temp64;
unsigned long product;
int i;
product = multiplier;
temp32b = multiplicand;
for(i=0; i < 32; i++){
if((product & 1)==1){ //add
temp64 = product;
temp64 = temp64 >> 32;
temp32a = temp64;
product = BinaryAdd(temp32a, temp32b);
}
product = product >>= 1;
}
return product;
}
int BinaryAdd(int in1, int in2){
int sum, carry;
sum = in1 ^ in2; // x XOR y
carry = in1 & in2; // x AND y carry in
int i;
for (i = 0; i < 32; i++) {
carry = carry << 1;
in1 = sum;
in2 = carry;
sum = in1 ^ in2; //calculate sum
carry = in1 & in2; //find carry out
}
return sum;
}
Your product register needs to be 64 bits in length to allow two 32 bit integers to be multiplied. Hopefully you have uint64_t available in your compiler to represent this (stdint.h).
To do the addition, you could put your multiplicand into a 64 bit integer, shift it left 32 bits, and then add it to the 64 bit product register.
Something like:
uint64_t tmpMulti;
uint64_t productRegister = 0;
uint32_t multiplicand = 123;
tmpMulti = multiplicand;
tmpMulti <<= 32;
productRegister += tmpMulti;
(Apologies for any syntax mistakes, I haven't written C code in a long time)
Out of interest, I had a go at implementing it myself. This seems to work:
#include <stdio.h>
#include <stdint.h>
void main(int argc, char* argv[])
{
uint32_t multiplier = 17;
uint32_t multiplicand = 12;
uint64_t productRegister = multiplier;
for (int n = 0; n < 32; n++) {
if (productRegister & 1 == 1) {
productRegister += ((uint64_t)multiplicand) << 32;
}
productRegister >>= 1;
}
printf("Result: %d\n", productRegister);
}
The following code does not use <stdint.h>, and uses two 32 bit ints to represent the 64 bit product register. It doesn't attempt to handle overflow, and assumes the answer will fit in 32 bits.
#include <stdio.h>
void main(int argc, char* argv[])
{
unsigned int multiplier = 17;
unsigned int multiplicand = 12;
unsigned int productRegisterLower = multiplier;
unsigned int productRegisterUpper = 0;
for (int n = 0; n < 32; n++) {
if (productRegisterLower & 1 == 1) {
productRegisterUpper += multiplicand;
}
productRegisterLower >>= 1;
productRegisterLower |= productRegisterUpper << 31;
productRegisterUpper >>= 1;
}
printf("Result: %d\n", productRegisterLower);
}
In order to handle the right shift of the product register, it moves the least significant bit of the upper half into the most significant bit of the lower half. To do this, it:
Shifts the lower half to the right 1 bit.
Takes a copy of the upper half and shifts it left 31 bits, so that the least significant bit is now on the left, and the rest of the value is zero.
ORs this with the lower half, to copy the shifted bit across.
Shifts the upper half to the right 1 bit.

Is there a more optimal way to approach some of these functions?

I completed some bit manipulation exercises out of a textbook recently and have grasped onto some of the core ideas behind manipulating bits firmly. My main concern with making this post is for optimizations to my current code. I get the hunch that there are some functions that I could approach better. Do you have any recommendations for the following code?
#include <stdio.h>
#include "funcs.h"
// basically sizeof(int) using bit manipulation
unsigned int int_size(){
int size = 0;
for(unsigned int i = ~00u; i > 0; i >>= 1, size++);
return size;
}
// get a bit at a specific nth index
// index starts with 0 on the most significant bit
unsigned int bit_get(unsigned int data, unsigned int n){
return (data >> (int_size() - n - 1)) & 1;
}
// set a bit at a specific nth index
// index starts with 0 on the most significant bit
unsigned int bit_set(unsigned int data, unsigned int n){
return data | (1 << (int_size() - n - 1));
}
// gets the bit width of the data (<32)
unsigned int bit_width(unsigned int data){
int width = int_size();
for(; width > 0; width--)
if((data & (1 << width)) != 0)
break;
return width + 1;
}
// print the data contained in an unsigned int
void print_data(unsigned int data){
printf("%016X = ",data);
for(int i = 0; i < int_size(); i++)
printf("%X",bit_get(data,i));
putchar('\n');
}
// search for pattern in source (where pattern is n wide)
unsigned int bitpat_search(unsigned int source, unsigned int pattern,
unsigned int n){
int right = int_size() - n;
unsigned int mask = 0;
for(int i = 0; i < n; i++)
mask |= 1 << i;
for(int i = 0; i < right; i++)
if(((source & (mask << (right - i))) >> (right - i) ^ pattern) == 0)
return i - bit_width(source);
return -1;
}
// extract {count} bits from data starting at {start}
unsigned int bitpat_get(unsigned int data, int start, int count){
if(start < 0 || count < 0 || int_size() <= start || int_size() <= count || bit_width(data) != count)
return -1;
unsigned int mask = 1;
for(int i = 0; i < count; i++)
mask |= 1 << i;
mask <<= int_size() - start - count;
return (data & mask) >> (int_size() - start - count);
}
// set {count} bits (basically width of {replace}) in {*data} starting at {start}
void bitpat_set(unsigned int *data, unsigned int replace, int start, int count){
if(start < 0 || count < 0 || int_size() <= start || int_size() <= count || bit_width(replace) != count)
return;
unsigned int mask = 1;
for(int i = 0; i < count; i++)
mask |= 1 << i;
*data = ((*data | (mask << (int_size() - start - count))) & ~(mask << (int_size() - start - count))) | (replace << (int_size() - start - count));
}
because your int_size() function returns the same value each time you could save some time there:
unsigned int int_size(){
static unsigned int size = 0;
if (size == 0)
for(unsigned int i = ~00u; i > 0; i >>= 1, size++);
return size;
}
so it will calculate the value only once.
But replacing all calls of this function by sizeof(int)*8 would be much better.
I looked through your code and there's nothing that jumps out at me.
Overall, don't sweat the small stuff. If the code runs and works fine, no worries. If you are really concerned about performance, go ahead and run your code through a profiler.
Overall, I will say that the one thing you might be dealing with is the "paranoia" I see in your code regarding the width of an int. I generally use the fixed-length types in stdint.h and give the caller some options regarding what length of ints (i.e. uint8_t, uint16_t, uint32_t, etc.) they want to deal with.
Also, in C99, there are bitfields, which allow for each bit to be addressed into.
unsigned int int_size(){
return __builtin_popcount((unsigned int) -1) / __builtin_popcount((unsigned char) -1);
}
This should be faster than looping.
Including int_size() in all the others seems like its going to kill performance unless the compiler is really good at optimizing that loop out.
You could use a uint32_t instead of an int and then you would know up front the size.
You could also use sizeof(int) to get the size in bytes of an int and multiply by 8. I haven't seen an environment that defined a byte to be other than 8 bits, but the standard does seem to allow for it in saying it is implementation defined.

masking most significant bit

I wrote this function to remove the most significant bit in every byte. But this function doesn't seem to be working the way I wanted it to be.
The output file size is always '0', I don't understand why nothing's been written to the output file. Is there a better and simple way to remove the most significant bit in every byte??
In relation to shift operators, section 6.5.7 of the C standard says:
If the value of the right operand is negative or is greater than or
equal to the width of the promoted left operand, the behavior is
undefined.
So firstly, remove nBuffer << 8;. Even if it were well defined, it wouldn't be an assignment operator.
As people have mentioned, you'd be better off using CHAR_BIT than 8. I'm pretty sure, instead of 0x7f you mean UCHAR_MAX >> 1 and instead of 7 you meant CHAR_BIT - 1.
Let's just focus on nBuffer and bit_count, here. I shall comment out anything that doesn't use either of these.
bit_count += 7;
if (bit_count == 7*8)
{
*out_buf++ = nBuffer;
/*if((write(out_fd, bit_buf, sizeof(char))) == -1)
oops("Cannot write on the file", "");*/
nBuffer << 8;
bit_count -= 8;
}
nBuffer = 0;
bit_count = 0;
At the end of this code, what is the value of nBuffer? What about bit_count? What impact would that have on your second loop? while (bit_count > 0)
Now let's focus on the commented out code:
if((write(out_fd, bit_buf, sizeof(char))) == -1)
oops("Cannot write on the file", "");
Where are you assigning a value to bit_buf? Using an uninitialised variable is undefined behaviour.
Instead of going through all of the bits to find the high one, this goes through only the 1 bits. high() returns the high bit of the argument, or zero if the argument is zero.
inline int high(int n)
{
int k;
do {
k = n ^ (n - 1);
n &= ~k;
} while (n);
return (k + 1) >> 1;
}
inline int drop_high(int n)
{
return n ^ high(n);
}
unsigned char remove_most_significant_bit(unsigned char b)
{
int bit;
for(bit = 0; bit < 8; bit++)
{
unsigned char mask = (0x80 >> bit);
if( mask & b) return b & ~mask;
}
return b;
}
void remove_most_significant_bit_from_buffer(unsigned char* b, int length)
{
int i;
for(i=0; i<length;i++)
{
b[i] = remove_most_significant_bit(b[i]);
}
}
void test_it()
{
unsigned char data[8];
int i;
for(i = 0; i < 8; i++)
{
data[i] = (1 << i) + i;
}
for(i = 0; i < 8; i++)
{
printf("%d\r\n", data[i]);
}
remove_most_significant_bit_from_buffer(data, 8);
for(i = 0; i < 8; i++)
{
printf("%d\r\n", data[i]);
}
}
I won't go through your entire answer to provide your reworked code, but removing the most significant bit is easy. This comes from the fact that the most significant bit can easily be found by using log base 2 converted to an integer.
#include <stdio.h>
#include <math.h>
int RemoveMSB(int a)
{
return a ^ (1 << (int)log2(a));
}
int main(int argc, char const *argv[])
{
int a = 4387;
printf("MSB of %d is %d\n", a, (int)log2(a));
a = RemoveMSB(a);
printf("MSB of %d is %d\n", a, (int)log2(a));
return 0;
}
Output:
MSB of 4387 is 12
MSB of 291 is 8
As such, 4387 in binary is 1000100100011 with a most significant bit at 12.
Likewise, 291 in binary is 0000100100011 with a most significant bit at 8.

Resources