C decompress Bitmask source - c

This may be somewhat of an odd question as well as my first one ever on this site and a pretty complicated thing to ask basically I have this decompresser for a very specific archived file, I barely understand this but from what i can grasp its some sort of "bit mask" it reads the first 2 bytes out of target file, and stores them as a sequence.
The first for loop is where I get confused
Say for arguments sake mask is 2 bytes 10 04, or 1040(decimal) thats what it usually is in these files
for (t = 0; t<16; t++) {
if (mask & (1 << (15 - t))) {
This seems to be looping through all 16 bits of those 2 bytes and running an AND operation on mask (1040) on every bit?
The if statement is what I don't understand completely? Whats triggering the if? If the bit is greater then 0?
Because if mask is 1040, then really what were looking at is
if(1040 & 32768) index 15
if(1040 & 16384) index 14
if(1040 & 8192) index 13
if(1040 & 4096) index 12
if(1040 & 2048) index 11
if(1040 & 1024) index 10
if(1040 & 512) and so on.....
if(1040 & 256)
I just really need to know whats triggering this if statement? i think i might be over thinking it, but is it simply trigger if the current bit is greater then 0?
The only other thing i can do is compile this source myself, insert printfs on key variables and go hand in hand with a hex editor and try and figure out whats actually going on here, if anyone could give me a hand would be awesome.
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
uint8_t dest[1024 * 1024 * 4]; // holds the actual data
int main(int argc, char *argv[]) {
FILE *fi, *fo;
char fname[255];
uint16_t mask, tmp, offset, length;
uint16_t seq;
uint32_t dptr, sptr;
uint16_t l, ct;
uint16_t t, s;
int test_len;
int t_length, t_off;
// Print Usage if filename is missing
if (argc<3) {
printf("sld_unpack - Decompressor for .sld files ()\nsld_unpack <filename.sld> <filename.t2>\n");
return(-1);
}
// Open .SLD-file
if (!(fi = fopen(argv[1], "rb"))) {
printf("Error opening %s\n", argv[1]);
return(-1);
}
dptr = 0;
fread((uint16_t*)&seq, 1, 2, fi); // read 1st 2 bytes in file
test_len = ftell(fi);
printf("[Main Header sequence: %d]\n 'offset' : %d \n", seq, test_len);
sptr = 0;
fread((uint16_t*)&seq, 1, 2, fi);
while (!feof(fi)) { // while not at the end of the file set mask equal to sequence (first 2 bytes of header)
mask = seq;
// loop through 16 bit mask
for (t = 0; t<16; t++) {
if (mask & (1 << (15 - t))) { // check all bit fields and run AND check to if value greater then 0?
test_len = ftell(fi);
fread((uint16_t*)&seq, 1, 2, fi); // read
sptr = sptr + 2; // set from 0 to 2
tmp = seq; // set tmp to sequence
offset = ((uint32_t)tmp & 0x07ff) * 2;
length = ((tmp >> 11) & 0x1f) * 2; // 32 - 1?
if (length>0) {
for (l = 0; l<length; l++) {
dest[dptr] = dest[dptr - offset];
dptr++;
}
}
else { // if length == 0
t_length = ftell(fi);
fread((uint16_t*)&seq, 1, 2, fi);
sptr = sptr + 2;
length = seq * 2;
for (s = 0; s<length; s++) {
dest[dptr] = dest[dptr - offset];
dptr++;
}
}
}
else { // if sequence AND returns 0 (or less)?
fread((uint16_t*)&seq, 1, 2, fi);
t_length = ftell(fi);
sptr = sptr + 2;
dest[dptr++] = seq & 0xff;
dest[dptr++] = (seq >> 8) & 0xff;
}
}
fread((uint16_t*)&seq, 1, 2, fi);
}
fclose(fi);
sprintf(fname, "%s\0", argv[2]);
if (!(fo = fopen(fname, "wb"))) { // if file
printf("Error creating %s\n", fname);
return(-1);
}
fwrite((uint8_t*)&dest, 1, dptr, fo);
fclose(fo);
printf("Done.\n");
return(0);
}

Be careful here.
for arguments sake mask is 2 bytes 10 04, or 1040(decimal)
That assumption may be nowhere close to true. You need to show how mask is defined, but generally a mask of bytes 10 (00001010) and 40 (00101000) is binary 101000101000 or decimal (2600) not quite 1040.
The general mask of 2600 decimal will match when bits 4,6,10 & 12 are set. Remember a bit mask is nothing more than a number whose binary representation when anded or ored with a second number produces some desired result. Nothing magic about a bit mask, its just a number with the right bits set for your intended purpose.
When you and two numbers together and test, your are testing whether there are common bits set in both numbers. Using the for loop and shift, you are doing a bitwise test for which common bits are set. Using the mask of 2600 with the loop counter will test true when bits 4,6,10 & 12 are set. In other words when the test clause equals 8, 32, 512 or 2048.
The following is a short example of what is happening in the loop and if statements.
#include <stdio.h>
/* BUILD_64 */
#if defined(__LP64__) || defined(_LP64)
# define BUILD_64 1
#endif
/* BITS_PER_LONG */
#ifdef BUILD_64
# define BITS_PER_LONG 64
#else
# define BITS_PER_LONG 32
#endif
/* CHAR_BIT */
#ifndef CHAR_BIT
# define CHAR_BIT 8
#endif
char *binpad (unsigned long n, size_t sz);
int main (void) {
unsigned short t, mask;
mask = (10 << 8) | 40;
printf ("\n mask : %s (%hu)\n\n",
binpad (mask, sizeof mask * CHAR_BIT), mask);
for (t = 0; t<16; t++)
if (mask & (1 << (15 - t)))
printf (" t %2hu : %s (%hu)\n", t,
binpad (mask & (1 << (15 - t)), sizeof mask * CHAR_BIT),
mask & (1 << (15 - t)));
return 0;
}
/** returns pointer to binary representation of 'n' zero padded to 'sz'.
* returns pointer to string contianing binary representation of
* unsigned 64-bit (or less ) value zero padded to 'sz' digits.
*/
char *binpad (unsigned long n, size_t sz)
{
static char s[BITS_PER_LONG + 1] = {0};
char *p = s + BITS_PER_LONG;
register size_t i;
for (i = 0; i < sz; i++)
*--p = (n>>i & 1) ? '1' : '0';
return p;
}
Output
$ ./bin/bitmask1040
mask : 0000101000101000 (2600)
t 4 : 0000100000000000 (2048)
t 6 : 0000001000000000 (512)
t 10 : 0000000000100000 (32)
t 12 : 0000000000001000 (8)

The if statement is what I don't understand completely? Whats triggering the if? If the bit is greater then 0? ... I just really need to know whats triggering this if statement? i think i might be over thinking it, but is it simply trigger if the current bit is greater then 0?
The C (and C++) if statement "triggers" when the conditional statement evaluates to true, which is any non-zero value; zero equates to false.
Straight C doesn't have a Boolean type, it just use the convention of zero (0) is false, and any other value is true.
if (mask & (1 << (15 - t))) {...}
is the same as
if ((mask & (1 << (15 - t))) != 0) {...}
The expression you gave is only true (non-zero) when there is a bit in the mask in the same position that the 1 was shifted by. i.e. is the 15th bit in the mask set, etc.
N.b.
mask & (1 << (15 - t))
can only ever be 0 or 1 er... will only have one bit set.

Related

Word every 2 bits to symbol

I have a function that read a word, bit by bit and change to symbol:
I need help to change it to read every 2 bits and change to symbol.
I don't have an idea for it and I need your help guys
void PrintWeirdBits(word w , char* buf){
word mask = 1<<(BITS_IN_WORD-1);
int i;
for(i=0;i<BITS_IN_WORD;i++){
if(mask & w)
buf[i]='/';
else
buf[i]='.';
mask>>=1;
}
buf[i] = '\0';
}
Needed symbols:
00 - *
01 - #
10 - %
11 - !
Here is my proposal for your issue.
Using a lookup table for the symbol decoding will eliminate the need in if statements.
(I assumed word is an unsigned 16 bits data type)
#define BITS_PER_SIGN 2
#define BITS_PER_SIGN_MSK 3 // decimal 3 is 0b11 in binary --> two bits set
// General define could be:
// ((1u << BITS_PER_SIGN) - 1)
#define INIT_MASK (BITS_PER_SIGN_MSK << (BITS_IN_WORD - BITS_PER_SIGN))
void PrintWeirdBits(word w , char* buf)
{
static const char signs[] = {'*', '#', '%', '!'};
unsigned mask = INIT_MASK;
int i;
int sign_idx;
for(i=0; i < BITS_IN_WORD / BITS_PER_SIGN; i++)
{
// the bits of the sign represent the index in the signs array
// just need to align these bits to start from bit 0
sign_idx = (w & mask) >> (BITS_IN_WORD - (i + 1)*BITS_PER_SIGN);
// store the decoded sign in the buffer
buf[i] = signs[sign_idx];
// update the mask for the next symbol
mask >>= BITS_PER_SIGN;
}
buf[i] = '\0';
}
Here it seems to be working.
With small effort it can be updated to a generic code for any bit width of the symbol as long as it is power of two (1, 2, 4, 8) and smaller that BITS_IN_WORD.
Assuming word is unsigned int or an unsigned integer type.
void PrintWeirdBits(word w , char* buf){
word mask = 3 << (BITS_IN_WORD -2);
int i;
word cmp;
for(i=0;i<BITS_IN_WORD/2;i++){
cmp = (mask & w) >> (BITS_IN_WORD -2 -2i);
if(cmp == 0x00)
{
buf[i]='*';
}
else if (cmp == 0x01)
{
buf[i]='#';
}
else if (cmp == 0x02)
{
buf[i]='%';
}
else
{
buf[i]='!';
}
mask>>=2;
}
buf[i] = '\0';
}
The important part is
cmp = (mask & w) >> (BITS_IN_WORD -2 -2i);
Here mask and the input w is bitwise ANDed and the result is right shifted to get the value in the first two bits. These bits are compared to get the result.

c, obtaining a special random number

I have a algorithm problem that I need to speed up :)
I need a 32bit random number, with exact 10 bits set to 1. But in the same time, patterns like 101 (5 dec) and 11 (3 dec) to be considered illegal.
Now the MCU is a 8051 (8 bit) and I tested all this in Keil uVision. My first attempt completes, giving the solution
0x48891249
1001000100010010001001001001001 // correct, 10 bits 1, no 101 or 11
The problem is that it completes in 97 Seconds or 1165570706 CPU cycles which is ridiculous!!!
Here is my code
// returns 1 if number is not good. ie. contains at leats one 101 bit sequence
bool checkFive(unsigned long num)
{
unsigned char tmp;
do {
tmp = (unsigned char)num;
if(
(tmp & 7) == 5
|| (tmp & 3) == 3
) // illegal pattern 11 or 101
return true; // found
num >>= 1;
}while(num);
return false;
}
void main(void) {
unsigned long v,num; // count the number of bits set in v
unsigned long c; // c accumulates the total bits set in v
do {
num = (unsigned long)rand() << 16 | rand();
v = num;
// count all 1 bits, Kernigen style
for (c = 0; v; c++)
v &= v - 1; // clear the least significant bit set
}while(c != 10 || checkFive(num));
while(1);
}
The big question for a brilliant mind :)
Can be done faster? Seems that my approach is naive.
Thank you in advance,
Wow, I'm impressed, thanks all for suggestions. However, before accept, I need to test them these days.
Now with the first option (look-up) it's just not realistic, will complete blow my 4K RAM of entire 8051 micro controller :) As you can see in image bellow, I tested for all combinations in Code Blocks but there are way more than 300 and it's not finished yet until 5000 index...
The code I use to test
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
//#define bool bit
//#define true 1
//#define false 0
// returns 1 if number is not good. ie. contains at leats one 101 bit sequence
bool checkFive(uint32_t num)
{
uint8_t tmp;
do {
tmp = (unsigned char)num;
if(
(tmp & 7) == 5
|| (tmp & 3) == 3
) // illegal pattern 11 or 101
return true; // found
num >>= 1;
}while(num);
return false;
}
void main(void) {
uint32_t v,num; // count the number of bits set in v
uint32_t c, count=0; // c accumulates the total bits set in v
//printf("Program started \n");
num = 0;
printf("Program started \n");
for(num=0; num <= 0xFFFFFFFF; num++)
{
//do {
//num = (uint32_t)rand() << 16 | rand();
v = num;
// count all 1 bits, Kernigen style
for (c = 0; v; c++)
v &= v - 1; // clear the least significant bit set
//}while(c != 10 || checkFive(num));
if(c != 10 || checkFive(num))
continue;
count++;
printf("%d: %04X\n", count, num);
}
printf("Complete \n");
while(1);
}
Perhaps I can re-formulate the problem:
I need a number with:
precise (known) amount of 1 bits, 10 in my example
not having 11 or 101 patterns
remaining zeroes can be any
So somehow, shuffle only the 1 bits inside.
Or, take a 0x00000000 and add just 10 of 1 bits in random positions, except the illegal patterns.
Solution
Given a routine r(n) that returns a random integer from 0 (inclusive) to n (exclusive) with uniform distribution, the values described in the question may be generated with a uniform distribution by calls to P(10, 4) where P is:
static uint32_t P(int a, int b)
{
if (a == 0 && b == 0)
return 0;
else
return r(a+b) < a ? P(a-1, b) << 3 | 1 : P(a, b-1) << 1;
}
The required random number generator can be:
static int r(int a)
{
int q;
do
q = rand() / ((RAND_MAX+1u)/a);
while (a <= q);
return q;
}
(The purpose of dividing by (RAND_MAX+1u)/a and the do-while loop is to trim the range of rand to an even multiple of a so that bias due to a non-multiple range is eliminated.)
(The recursion in P may be converted to iteration. This is omitted as it is unnecessary to illustrate the algorithm.)
Discussion
If the number cannot contain consecutive bits 11 or 101, then the closest together two 1 bits can be is three bits apart, as in 1001. Fitting ten 1 bits in 32 bits then requires at least 28 bits, as in 1001001001001001001001001001. Therefore, to satisfy the constraints that there is no 11 or 101 and there are exactly 10 1 bits, the value must be 1001001001001001001001001001 with four 0 bits inserted in some positions (including possibly the beginning or the end).
Selecting such a value is equivalent to placing 10 instances of 001 and 4 instances of 0 in some order.1 There are 14! ways of ordering 14 items, but any of the 10! ways of rearranging the 10 001 instances with each other are identical, and any of the 4! ways of rearranging the 0 instances with each other are identical, so the number of distinct selections is 14! / 10! / 4!, also known as the number of combinations of selecting 10 things from 14. This is 1,001.
To perform such a selection with uniform distribution, we can use a recursive algorithm:
Select the first choice with probability distribution equal to the proportion of the choices in the possible orderings.
Select the remaining choices recursively.
When ordering a instances of one object and b of a second object, a/(a+b) of the potential orderings will start with the first object, and b/(a+b) will start with the second object. Thus, the design of the P routine is:
If there are no objects to put in order, return the empty bit string.
Select a random integer in [0, a+b). If it is less than a (which has probability a/(a+b)), insert the bit string 001 and then recurse to select an order for a-1 instances of 001 and b instances of 0.
Otherwise, insert the bit string 0 and then recurse to select an order for a instances of 001 and b-1 instances of 0.
(Since, once a is zero, only 0 instances are generated, if (a == 0 && b == 0) in P may be changed to if (a == 0). I left it in the former form as that shows the general form of a solution in case other strings are involved.)
Bonus
Here is a program to list all values (although not in ascending order).
#include <stdint.h>
#include <stdio.h>
static void P(uint32_t x, int a, int b)
{
if (a == 0 && b == 0)
printf("0x%x\n", x);
else
{
if (0 < a) P(x << 3 | 1, a-1, b);
if (0 < b) P(x << 1, a, b-1);
}
}
int main(void)
{
P(0, 10, 4);
}
Footnote
1 This formulation means we end up with a string starting 001… rather than 1…, but the resulting value, interpreted as binary, is equivalent, even if there are instances of 0 inserted ahead of it. So the strings with 10 001 and 4 0 are in one-to-one correspondence with the strings with 4 0 inserted into 1001001001001001001001001001.
One way to satisfy your criteria in a limited number of solutions is to utilize the fact that there can be no more that four groups of 000s within the bit population. This also means that there can one be one group of 0000 in the value. Knowing this, you can seed your value with a single 1 in bits 27-31 and then continue adding random bits checking that each bit added satisfies your 3 or 5 constraints.
When adding random bits to your value and satisfying your constraints, there can always be combinations that lead to a solution that can never satisfy all constraints. To protect against those cases, just keep an iteration count and reset/restart the value generation if iterations exceed that value. Here, if a solution is going to be found, it will be found in less than 100 iterations. And is generally found in 1-8 attempts. Meaning for each value you generate, you have on average no more than 800 iterations which will be a far cry less than "97 Seconds or 1165570706 CPU cycles" (I haven't counted cycles, but the return is almost instantaneous)
There are many ways to approach this problem, this is just one that worked in a reasonable amount of time:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <limits.h>
#define BPOP 10
#define NBITS 32
#define LIMIT 100
/** rand_int for use with shuffle */
static int rand_int (int n)
{
int limit = RAND_MAX - RAND_MAX % n, rnd;
rnd = rand();
for (; rnd >= limit; )
rnd = rand();
return rnd % n;
}
int main (void) {
int pop = 0;
unsigned v = 0, n = NBITS;
size_t its = 1;
srand (time (NULL));
/* one of first 5 bits must be set */
v |= 1u << (NBITS - 1 - rand_int (sizeof v + 1));
pop++; /* increment pop count */
while (pop < BPOP) { /* loop until pop count 10 */
if (++its >= LIMIT) { /* check iterations */
#ifdef DEBUG
fprintf (stderr, "failed solution.\n");
#endif
pop = its = 1; /* reset for next iteration */
v = 0;
v |= 1u << (NBITS - 1 - rand_int (sizeof v + 1));
}
unsigned shift = rand_int (NBITS); /* get random shift */
if (v & (1u << shift)) /* if bit already set */
continue;
/* protect against 5 (101) */
if ((shift + 2) < NBITS && v & (1u << (shift + 2)))
continue;
if ((int)(shift - 2) >= 0 && v & (1u << (shift - 2)))
continue;
/* protect against 3 (11) */
if ((shift + 1) < NBITS && v & (1u << (shift + 1)))
continue;
if ((int)(shift - 1) >= 0 && v & (1u << (shift - 1)))
continue;
v |= 1u << shift; /* add bit at shift */
pop++; /* increment pop count */
}
printf ("\nv : 0x%08x\n", v); /* output value */
while (n--) { /* output binary confirmation */
if (n+1 < NBITS && (n+1) % 4 == 0)
putchar ('-');
putchar ((v >> n & 1) ? '1' : '0');
}
putchar ('\n');
#ifdef DEBUG
printf ("\nits: %zu\n", its);
#endif
return 0;
}
(note: you will probably want a better random source like getrandom() or reading from /dev/urandom if you intend to generate multiple random solutions within a loop -- expecially if you are calling the executable in a loop from your shell)
I have also included a DEBUG define that you can enable by adding the -DDEBUG option to your compiler string to see the number of failed solutions and number of iterations on the final.
Example Use/Output
The results for 8 successive runs:
$ ./bin/randbits
v : 0x49124889
0100-1001-0001-0010-0100-1000-1000-1001
v : 0x49124492
0100-1001-0001-0010-0100-0100-1001-0010
v : 0x48492449
0100-1000-0100-1001-0010-0100-0100-1001
v : 0x91249092
1001-0001-0010-0100-1001-0000-1001-0010
v : 0x92488921
1001-0010-0100-1000-1000-1001-0010-0001
v : 0x89092489
1000-1001-0000-1001-0010-0100-1000-1001
v : 0x82491249
1000-0010-0100-1001-0001-0010-0100-1001
v : 0x92448922
1001-0010-0100-0100-1000-1001-0010-0010
As Eric mentioned in his answer, since each 1 but must be separated by at least two 0 bits, you basically start with the 28-bit pattern 1001001001001001001001001001. It's then a matter of placing the remaining four 0 bits within this bit pattern, and there are 11 distinct places to insert each zero.
This can be accomplished by first selecting a random number from 1 to 11 to determine where to place a bit. Then you left shift all the bits above the target bit by 1. Repeat 3 more times, and you have your value.
This can be done as follows:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
void binprint(uint32_t n)
{
int i;
for (i=0;i<32;i++) {
if ( n & (1u << (31 - i))) {
putchar('1');
} else {
putchar('0');
}
}
}
// inserts a 0 bit into val after pos "1" bits are found
uint32_t insert(uint32_t val, int pos)
{
int cnt = 0;
uint32_t mask = 1u << 31;
uint32_t upper, lower;
while (cnt < pos) {
if (val & mask) { // look for a set bit and count if you find one
cnt++;
}
mask >>= 1;
}
if (mask == (1u << 31)) {
return val; // insert at the start: no change
} else if (mask == 0) {
return val << 1; // insert at the end: shift the whole thing by 1
} else {
mask = (mask << 1) - 1; // mask has all bits below the target set
lower = val & mask; // extract the lower portion
upper = val & (~mask); // extract the upper portion
return (upper << 1) | lower; // recombine with the upper portion shifted 1 bit
}
}
int main()
{
int i;
uint32_t val = 01111111111; // hey look, a good use of octal!
srand(time(NULL));
for (i=0;i<4;i++) {
int p = rand() % 11;
printf("p=%d\n", p);
val = insert(val, p);
}
binprint(val);
printf("\n");
return 0;
}
Sample output for two runs:
p=3
p=10
p=9
p=0
01001001000100100100100100100010
...
p=3
p=9
p=3
p=1
10001001000010010010010010010001
Run time is negligible.
Since you don't want a lookup table here is the way:
Basically you have this number with 28 bits set to 0 and 1 in which you need to insert 4x 0 :
0b1001001001001001001001001001
Hence you can use the following algorithm:
int special_rng_nolookup(void)
{
int secret = 0b1001001001001001001001001001;
int low_secret;
int high_secret;
unsigned int i = 28; // len of secret
unsigned int rng;
int mask = 0xffff // equivalent to all bits set in integer
while (i < 32)
{
rng = __asm__ volatile(. // Pseudo code
"rdrand"
);
rng %= (i + 1); // will generate a number between 0 and 28 where you will add a 0. Then between 0 and 29, 30, 31 for the 3 next loop.
low_secret = secret & (mask >> (i - rng)); // locate where you will add your 0 and save the lower part of your number.
high_secret = (secret ^ low_secret) << (!(!rng)); // remove the lower part to your int and shift to insert a 0 between the higher part and the lower part. edit : if rng was 0 you want to add it at the very beginning (left part) so no shift.
secret = high_secret | low_secret; // put them together.
++i;
}
return secret;
}

Set first 10 bit of int

I have a 32-bit int and I want to set the first 10 bit to a specific number.
IE
The 32-bit int is:
11101010101010110101100100010010
I want the first 10 bit to be the number 123, which is
0001111011
So the result would be
00011110111010110101100100010010
Does anyone know the easiest way I would be able to do this? I know that we have to do bit-shifting but I'm not good at it so I'm not sure
Thank you!
uint32_t result = (input & 0x3fffff) | (newval << 22);
0x3fffff masks out the highest 10 bits (it has the lowest 22 bits set). You have to shift your new value for the highest 10 bits by 22 places.
Convert inputs to unsigned 32-bit integers
uint32_t num = strtoul("11101010101010110101100100010010", 0, 2);
uint32_t firstbits = 123;
Mask off the lower 32-10 bits. Create mask by shifting a unsigned long 1 22 places left making 100_0000_0000_0000_0000_0000 then decrementing to 11_1111_1111_1111_1111_1111
uint32_t mask = (1UL << (32-10)) - 1;
num &= mask;
Or in firstbits shifted left by 32-10
num |= firstbits << (32-10);
Or in 1 line:
(num & (1UL << (32-10)) - 1) | (firstbits*1UL << (32-10))
Detail about firstbits*1UL. The type of firstbits is not defined by OP and may only be a 16-bit int. To insure code can shift and form an answer that exceeds 16 bits (the minimum width of int), multiple by 1UL to insure the value is unsigned and has at least 32 bit width.
You can "erase" bits (set them to 0) by using a bit wise and ('&'); bits that are 0 in either value will be 0 in the result.
You can set bits to 1 by using a bit wise or ('|'); bits that are 1 in either value will be 1 in the result.
So: and your number with a value where the first 10 bits are 0 and the rest are 1; then 'or' it with the first 10 bits you want put in, and 0 for the other bits. If you need to calculate that value, then a left-shift would be the way to go.
You can also take a mask and replace approach where you zero the lower bits required to hold 123 and then simply | (OR) the value with 123 to gain the final result. You can accomplish the exact same thing with shifts as shown by several other answers, or you can accomplish it with masks:
#include <stdio.h>
#ifndef BITS_PER_LONG
#define BITS_PER_LONG 64
#endif
#ifndef CHAR_BIT
#define CHAR_BIT 8
#endif
char *binpad2 (unsigned long n, size_t sz);
int main (void) {
unsigned x = 0b11101010101010110101100100010010;
unsigned mask = 0xffffff00; /* mask to zero lower 8 bits */
unsigned y = 123; /* value to replace zero bits */
unsigned masked = x & mask; /* zero the lower bits */
/* show intermediate results */
printf ("\n x : %s\n", binpad2 (x, sizeof x * CHAR_BIT));
printf ("\n & mask : %s\n", binpad2 (mask, sizeof mask * CHAR_BIT));
printf ("\n masked : %s\n", binpad2 (masked, sizeof masked * CHAR_BIT));
printf ("\n | 123 : %s\n", binpad2 (y, sizeof y * CHAR_BIT));
masked |= y; /* apply the final or with 123 */
printf ("\n final : %s\n", binpad2 (masked, sizeof masked * CHAR_BIT));
return 0;
}
/** returns pointer to binary representation of 'n' zero padded to 'sz'.
* returns pointer to string contianing binary representation of
* unsigned 64-bit (or less ) value zero padded to 'sz' digits.
*/
char *binpad2 (unsigned long n, size_t sz)
{
static char s[BITS_PER_LONG + 1] = {0};
char *p = s + BITS_PER_LONG;
register size_t i;
for (i = 0; i < sz; i++)
*--p = (n>>i & 1) ? '1' : '0';
return p;
}
Output
$ ./bin/bitsset
x : 11101010101010110101100100010010
& mask : 11111111111111111111111100000000
masked : 11101010101010110101100100000000
| 123 : 00000000000000000000000001111011
final : 11101010101010110101100101111011
How about using bit fields in C combined with a union? The following structure lets you set the whole 32-bit value, the top 10 bits or the bottom 22 bits. It isn't as versatile as a generic function but you can't easily make a mistake when using it. Be aware this and most solutions may not work on all integer sizes and look out for endianness as well.
union uu {
struct {
uint32_t bottom22 : 22;
uint32_t top10 : 10;
} bits;
uint32_t value;
};
Here is an example usage:
int main(void) {
union uu myuu;
myuu.value = 999999999;
printf("value = 0x%08x\n", myuu.value);
myuu.bits.top10 = 0;
printf("value = 0x%08x\n", myuu.value);
myuu.bits.top10 = 0xfff;
printf("value = 0x%08x\n", myuu.value);
return 0;
}
The output is:
value = 0x3b9ac9ff
value = 0x001ac9ff
value = 0xffdac9ff

Understanding bit manipulation (set/clear) in C programming

I'm stuck understanding bit operations on integers in C.
Suppose I have the number 13. Its binary representation is 1101. How can I set the bit at its second position? How can I clear the bit?
Here is the function I wrote so far for setting the bit:
int setBit(int data, int pos, int val)
{
if (val==1)
data |= (1U << (pos - 1));
else
data ^= (1U << (pos-1));
return data;
}
Will this work correctly?
n = n & (~(1U <<x)) will reset the bit in position x.
Actually what we are doing suppose n=1101
We want to reset 3rd bit.
How does it work?
So 1U <<3=000....1000
~( 1U <<3)=111....0111
n=000..1101
& 111..0111
Result is 000..0101.
For inserting a bit y at position x:(position starts from 0)
1101---->11y01
Giving the example for position 2.
num= FFFF FFFF (in hex)(all 1's) //1111......1111
number=N // in which you will insert bit
num1=num<<x; //for x=2 as in this case
//num1=1111.....1100
num2=~(num1); //num2=0000.....0011
lowbits=N & num2; // =0000.....0001 (N=1101)
highbits= N &num1;// =0000.....1100
highbits<<=1; // =0000....11000
N= highbits | lowbits;//=0000....11001
Now set the x-th bit(here x=2) as you required using the method described below
Note: More generally changing the kth bit of number n to y (maybe 0 or 1) can be done this way
n^=(-y ^ n) & (1U <<k); (&- logical and)
Deletion of a bit is similar to insertion. Step by step perform the operation and you will get it.
EDIT: I have changed the use of 1 to 1U because in first case when using only 1 without any modifiers is defined to be an signed int. From K&R the right shifts of signed values are implementation defined. Also if you left-shift a signed number so that the sign bit is affected, the result is undefined.
These operations on unsigned value have well define behaviour: Vacated fields are filled with zeroes.
Setting, clearing and toggling the state of a bit is straightforward:
inline void bit_set (unsigned long *bf, unsigned char n)
{ *bf |= (1 << n); }
inline void bit_clear (unsigned long *bf, unsigned char n)
{ *bf &= ~(1 << n); }
inline void bit_toggle (unsigned long *bf, unsigned char n)
{ *bf ^= (1 << n); }
Note: bitfields, and the functions above, are zero based (i.e. the least significant bit is bit 0 not bit 1) So if you want to clear, set or toggle the second bit from the right (bit index 1, the 2's bit (binary), or bit 2 counting right-to-left), you pass a bit index of 1. n in the functions above is the bit index. The following is a quick reference:
+-----+-----+-----+-----+-----+-----+-----+-----+
bit index | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+-----+-----+-----+-----+-----+-----+-----+-----+
binary | 128 | 64 | 32 | 16 | 8 | 4 | 2 | 1 |
+-----+-----+-----+-----+-----+-----+-----+-----+
Here is a quick example of the use operating on bit 1, (the 2's bit in binary):
#include <stdio.h>
#include <stdlib.h>
#define WDSZ 64
/* bit functions */
inline void bit_set (unsigned long *bf, unsigned char n) { *bf |= (1 << n); }
inline void bit_clear (unsigned long *bf, unsigned char n) { *bf &= ~(1 << n); }
inline void bit_toggle (unsigned long *bf, unsigned char n) { *bf ^= (1 << n); }
/* simple return of binary string */
char *binstr (unsigned long n);
int main (int argc, char **argv) {
unsigned long bf = (argc > 1) ? strtoul (argv[1], NULL, 10) : 13;
printf ("\n original value : %3lu (%s)\n", bf, binstr (bf));
bit_set (&bf, 1);
printf (" set bit 1 : %3lu (%s)\n", bf, binstr (bf));
bit_clear (&bf, 1);
printf (" clear bit 1 : %3lu (%s)\n", bf, binstr (bf));
bit_toggle (&bf, 1);
printf (" toggle bit 1 : %3lu (%s)\n\n", bf, binstr (bf));
return 0;
}
/* simple return of binary string */
char *binstr (unsigned long n) {
static char s[WDSZ + 1] = {0};
char *p = s + WDSZ;
while (n) {
p--;
*p = (n & 1) ? '1' : '0';
n >>= 1;
}
return p;
}
Output
$ ./bin/bitsetcleartoggle
original value : 13 (1101)
set bit 1 : 15 (1111)
clear bit 1 : 13 (1101)
toggle bit 1 : 15 (1111)
Here is a simple answer for what I understand your problem to be:
int setBit(int data, int pos, int val) {
if (val)
return data | (1U << (pos - 1));
else
return data & ~(1U << (pos - 1));
}
But I think numbering the bits starting at 1 is not a good idea. The more common usage is to number the bits from 0 to sizeof(type) * CHAR_BIT - 1
whenever I have a problem like this I will break it down into smaller parts...
suppose i have no 13 binary of 13 is 1101
now how can i add extra bit at second position?
ok that is pretty straight forward... first let make a number with a bit in the second position, zero's everywhere else... we will use an int for convenience...
int mask = 2; // or 0x2 if you rather or 0b10 if your compiler supports that ...
well that isn't very special, I can't reuse that machinery as it were... so let try a different way...
int mask = 1 << 1; // 1 in the fist position moved one to the left...
ok now we have part, now there are 2 intuitive ways to set that on our 13...
int answer = 13 | mask; // binary OR
or
int answer = 13 + mask;
these 2 are the same for 13... but will give you different answers for 14... because + always adds the value, and | will only change the bits that aren't set on the left side... so you need to pick the semantics that are correct for you...
now your second question is a little trickier ... first we will pick the same mask...
//pick nth bit
int mask = 1 < n;
// now to toggle that on a number... XOR
int answer = q ^ mask;
I like using the n'th vs position because it makes more sense in the 0 case...
//For Inserting Bit
int insertbit(int data,int pos,int val)
{
int no1,no2;
no1=data;
no1=no1>>(pos-1);
no1=no1<<(pos-1);
no2=data-no1;
no1=no1<<1;
no1=no1 | no2;
if(val==1)
{
no1=setbit(no1,pos,val);
}
return no1;
}
//Setting Bits
int setbit(int data,int pos,int val)
{
int no=1;
no=no<<(pos-1);
if(val==0)
{
no=~no;
data=data&no;
}
else
{
data=no|data;
}
return data;
}
I Coded This Way But I Need Some Shortcut for code insert function

how to make a bit-set/byte-array conversion in c

Given an array,
unsigned char q[32]="1100111...",
how can I generate a 4-bytes bit-set, unsigned char p[4], such that, the bit of this bit-set, equals to value inside the array, e.g., the first byte p[0]= "q[0] ... q[7]"; 2nd byte p[1]="q[8] ... q[15]", etc.
and also how to do it in opposite, i.e., given bit-set, generate the array?
my own trial out for the first part.
unsigned char p[4]={0};
for (int j=0; j<N; j++)
{
if (q[j] == '1')
{
p [j / 8] |= 1 << (7-(j % 8));
}
}
Is the above right? any conditions to check? Is there any better way?
EDIT - 1
I wonder if above is efficient way? As the array size could be upto 4096 or even more.
First, Use strtoul to get a 32-bit value. Then convert the byte order to big-endian with htonl. Finally, store the result in your array:
#include <arpa/inet.h>
#include <stdlib.h>
/* ... */
unsigned char q[32] = "1100111...";
unsigned char result[4] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));
There are other ways as well.
But I lack <arpa/inet.h>!
Then you need to know what byte order your platform is. If it's big endian, then htonl does nothing and can be omitted. If it's little-endian, then htonl is just:
unsigned long htonl(unsigned long x)
{
x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
return x;
}
If you're lucky, your optimizer might see what you're doing and make it into efficient code. If not, well, at least it's all implementable in registers and O(log N).
If you don't know what byte order your platform is, then you need to detect it:
typedef union {
char c[sizeof(int) / sizeof(char)];
int i;
} OrderTest;
unsigned long htonl(unsigned long x)
{
OrderTest test;
test.i = 1;
if(!test.c[0])
return x;
x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
return x;
}
Maybe long is 8 bytes!
Well, the OP implied 4-byte inputs with their array size, but 8-byte long is doable:
#define kCharsPerLong (sizeof(long) / sizeof(char))
unsigned char q[8 * kCharsPerLong] = "1100111...";
unsigned char result[kCharsPerLong] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));
unsigned long htonl(unsigned long x)
{
#if kCharsPerLong == 4
x = (x & 0xFF00FF00UL) >> 8) | (x & 0x00FF00FFUL) << 8);
x = (x & 0xFFFF0000UL) >> 16) | (x & 0x0000FFFFUL) << 16);
#elif kCharsPerLong == 8
x = (x & 0xFF00FF00FF00FF00UL) >> 8) | (x & 0x00FF00FF00FF00FFUL) << 8);
x = (x & 0xFFFF0000FFFF0000UL) >> 16) | (x & 0x0000FFFF0000FFFFUL) << 16);
x = (x & 0xFFFFFFFF00000000UL) >> 32) | (x & 0x00000000FFFFFFFFUL) << 32);
#else
#error Unsupported word size.
#endif
return x;
}
For char that isn't 8 bits (DSPs like to do this), you're on your own. (This is why it was a Big Deal when the SHARC series of DSPs had 8-bit bytes; it made it a LOT easier to port existing code because, face it, C does a horrible job of portability support.)
What about arbitrary length buffers? No funny pointer typecasts, please.
The main thing that can be improved with the OP's version is to rethink the loop's internals. Instead of thinking of the output bytes as a fixed data register, think of it as a shift register, where each successive bit is shifted into the right (LSB) end. This will save you from all those divisions and mods (which, hopefully, are optimized away to bit shifts).
For sanity, I'm ditching unsigned char for uint8_t.
#include <stdint.h>
unsigned StringToBits(const char* inChars, uint8_t* outBytes, size_t numBytes,
size_t* bytesRead)
/* Converts the string of '1' and '0' characters in `inChars` to a buffer of
* bytes in `outBytes`. `numBytes` is the number of available bytes in the
* `outBytes` buffer. On exit, if `bytesRead` is not NULL, the value it points
* to is set to the number of bytes read (rounding up to the nearest full
* byte). If a multiple of 8 bits is not read, the last byte written will be
* padded with 0 bits to reach a multiple of 8 bits. This function returns the
* number of padding bits that were added. For example, an input of 11 bits
* will result `bytesRead` being set to 2 and the function will return 5. This
* means that if a nonzero value is returned, then a partial byte was read,
* which may be an error.
*/
{ size_t bytes = 0;
unsigned bits = 0;
uint8_t x = 0;
while(bytes < numBytes)
{ /* Parse a character. */
switch(*inChars++)
{ '0': x <<= 1; ++bits; break;
'1': x = (x << 1) | 1; ++bits; break;
default: numBytes = 0;
}
/* See if we filled a byte. */
if(bits == 8)
{ outBytes[bytes++] = x;
x = 0;
bits = 0;
}
}
/* Padding, if needed. */
if(bits)
{ bits = 8 - bits;
outBytes[bytes++] = x << bits;
}
/* Finish up. */
if(bytesRead)
*bytesRead = bytes;
return bits;
}
It's your responsibility to make sure inChars is null-terminated. The function will return on the first non-'0' or '1' character it sees or if it runs out of output buffer. Some example usage:
unsigned char q[32] = "1100111...";
uint8_t buf[4];
size_t bytesRead = 5;
if(StringToBits(q, buf, 4, &bytesRead) || bytesRead != 4)
{
/* Partial read; handle error here. */
}
This just reads 4 bytes, and traps the error if it can't.
unsigned char q[4096] = "1100111...";
uint8_t buf[512];
StringToBits(q, buf, 512, NULL);
This just converts what it can and sets the rest to 0 bits.
This function could be done better if C had the ability to break out of more than one level of loop or switch; as it stands, I'd have to add a flag value to get the same effect, which is clutter, or I'd have to add a goto, which I simply refuse.
I don't think that will quite work. You are comparing each "bit" to 1 when it should really be '1'. You can also make it a bit more efficient by getting rid of the if:
unsigned char p[4]={0};
for (int j=0; j<32; j++)
{
p [j / 8] |= (q[j] == `1`) << (7-(j % 8));
}
Going in reverse is pretty simple too. Just mask for each "bit" that you set earlier.
unsigned char q[32]={0};
for (int j=0; j<32; j++) {
q[j] = p[j / 8] & ( 1 << (7-(j % 8)) ) + '0';
}
You'll notice the creative use of (boolean) + '0' to convert between 1/0 and '1'/'0'.
According to your example it does not look like you are going for readability, and after a (late) refresh my solution looks very similar to Chriszuma except for the lack of parenthesis due to order of operations and the addition of the !! to enforce a 0 or 1.
const size_t N = 32; //N must be a multiple of 8
unsigned char q[N+1] = "11011101001001101001111110000111";
unsigned char p[N/8] = {0};
unsigned char r[N+1] = {0}; //reversed
for(size_t i = 0; i < N; ++i)
p[i / 8] |= (q[i] == '1') << 7 - i % 8;
for(size_t i = 0; i < N; ++i)
r[i] = '0' + !!(p[i / 8] & 1 << 7 - i % 8);
printf("%x %x %x %x\n", p[0], p[1], p[2], p[3]);
printf("%s\n%s\n", q,r);
If you are looking for extreme efficiency, try to use the following techniques:
Replace if by subtraction of '0' (seems like you can assume your input symbols can be only 0 or 1).
Also process the input from lower indices to higher ones.
for (int c = 0; c < N; c += 8)
{
int y = 0;
for (int b = 0; b < 8; ++b)
y = y * 2 + q[c + b] - '0';
p[c / 8] = y;
}
Replace array indices by auto-incrementing pointers:
const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
int y = 0;
for (int b = 0; b < 8; ++b)
y = y * 2 + *qptr++ - '0';
*pptr++ = y;
}
Unroll the inner loop:
const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
*pptr++ =
qptr[0] - '0' << 7 |
qptr[1] - '0' << 6 |
qptr[2] - '0' << 5 |
qptr[3] - '0' << 4 |
qptr[4] - '0' << 3 |
qptr[5] - '0' << 2 |
qptr[6] - '0' << 1 |
qptr[7] - '0' << 0;
qptr += 8;
}
Process several input characters simultaneously (using bit twiddling hacks or MMX instructions) - this has great speedup potential!

Resources