How to write only 12 bits to a char array in C? - c

I'm trying to implement a FAT12 file system in which there's a FAT table data structure which is an unsigned char array. I need to write a function which given an array index would write a value to the next 12 bits (because it's FAT12) which is quite tricky because part of the value needs to go to one byte and the other part needs to go the half of the second byte.
This is the get value function I came up with:
//FAT is the unsigned char array
int GetFatEntry(int FATindex, unsigned char * FAT) {
unsigned int FATEntryCode; // The return value
// Calculate the offset of the WORD to get
int FatOffset = ((FATindex * 3) / 2);
if (FATindex % 2 == 1){ // If the index is odd
FATEntryCode = ((unsigned char)(&FAT[FatOffset])[0] + (((unsigned char)(&FAT[FatOffset])[1]) << 8));
FATEntryCode >>= 4; // Extract the high-order 12 bits
}
else{ // If the index is even
FATEntryCode = ((unsigned char)(&FAT[FatOffset])[0] + (((unsigned char)(&FAT[FatOffset])[1]) << 8));
FATEntryCode &= 0x0fff; // Extract the low-order 12 bits
}
return FATEntryCode;
}
I'm struggling to come up with the function which would set a value given a FATindex. I would appreciate any suggestions.

This seems to work. The data that should be written should be in the first 12 bits of data
void WriteFatEntry(int FATindex, unsigned char * FAT, unsigned char data[2]) {
// Calculate the offset of the WORD to get
int FatOffset = ((FATindex * 3) / 2);
unsigned char d;
if (FATindex % 2 != 0){ // If the index is odd
// Copy from data to d and e, and shift everything so that second half of
// e contains first half of data[1], and first half of e contains second
// half of data[0], while second half of d contains first half of data[0].
// First half of d contains a copy of first four bits in FAT[FatOffset]
// so that nothing changes when it gets written
unsigned char e=data[1];
e>>=4;
d=data[0];
e|=(d<<4) & 0b11110000;
d>>=4;
d |= FAT[FatOffset] & 0b11110000;
FAT[FatOffset]=d;
FAT[FatOffset+1] = e;
}
else{ // If the index is even
d = data[1] & 0b11110000;
d |= FAT[FatOffset+1] & 0b00001111;
FAT[FatOffset] = data[0];
FAT[FatOffset+1] = d;
}
}

#include <stdio.h>
#if 1 /* assuming MSB first */
#define MSB (idx)
#define LSB (idx+1)
#else /* assuming LSB first */
#define MSB (idx+1)
#define LSB (idx)
#endif
unsigned fat_getval(unsigned char * tab, unsigned num)
{
unsigned idx;
unsigned val;
idx = num + num/2;
val = (tab[MSB] <<8 ) + (tab[idx+1] ) ;
if (num %2 ==0) val >>= 4;
return val & 0xfff;
}
void fat_putval(unsigned char * tab, unsigned slot, unsigned val)
{
unsigned idx;
idx = slot + slot/2;
if (slot %2 ==0) { /* xyz_ */
val <<= 4;
val |= tab[LSB] & 0xf;
}
else { /* _xyz */
val |= (tab[MSB] & 0xf0) << 8;
}
tab[MSB] = val >>8;
tab[LSB] = val &0xff;
}
#undef MSB
#undef LSB
unsigned char fattable[] = "\x01\x23\x45\x67\x89\xab"; // 12 nibbles
int main(void)
{
unsigned idx, ret;
for (idx = 0; idx < 6; idx++) { // 6 bytes -> 12 nibbles */
printf(" %02x", fattable[idx] );
}
printf("\n");
printf("Put(0,0xabc):\n");
fat_putval(fattable, 0, 0xabc);
for (idx = 0; idx < 6; idx++) {
printf(" %02x", fattable[idx] );
}
printf("\n");
printf("Put(3,0xdef):\n");
fat_putval(fattable, 3, 0xdef);
for (idx = 0; idx < 6; idx++) {
printf(" %02x", fattable[idx] );
}
printf("\n");
printf("Get(0 to 4):\n");
for (idx = 0; idx < 4; idx++) { // 12 / 3 ~> 4 * 12bit entries
ret = fat_getval( fattable, idx);
printf("%u := %x\n", idx, ret );
}
printf("\n");
return 0;
}

Related

Writing 9-bit values to byte array (or EEPROM) without wasting remaining bit in the following byte

I could not found an answer with google so i went for it and programmed quite a few hours.
I want to save 9-bit values to eeprom without wasting the other 7 bits.
I save values that would be up to 500 and i have not much EEPROM left.
The same principle can be applied to arrays, which i did just to not waer down the EEPROM.
So I made this little program:
/*
* Write only a certain number of bits to EEPROM.
*
* keeps the other bit in the byte of the eeprom as they are.
*
* Working version with 9 bits:
* 2019-10-03 15:57
* 2019-10-03 22:09 tested with chars too
* 2019-10-04 08:25 works with 7 bit chars also!
* 2019-10-04 12:27 fixed the combining of oldByte and new values in writeBitsToEEPROM(), because chars like 'รถ' altered previous bit (left side) that should not have been altered.
*
*/
#include "arduino.h"
#include "EEPROM.h"
#include "math.h"
#define BIT_BLOCKS_COUNT 15
#define BLOCK_BYTE_COUNT 17
#define ARRAY_SIZE BLOCK_BYTE_COUNT+2
//TODO: change back to original value
#define EEPROM_SIZE ARRAY_SIZE
byte fakeEEPROM[ARRAY_SIZE] = {0};
String byteToString(byte value){
char byteChar[9];
byteChar[8] = '\0'; //we need a terminator
for(int i=7; i>=0; i--){
byteChar[7-i] = (value & (1 << i)) ? '1' : '0';
}
return String(byteChar);
}
String byteToString(unsigned long value, byte bytesToRead){
String str1 = byteToString(value >> 8);
String str2 = byteToString(value & 0xFF);
return str1 + " " + str2;
}
int globBlockStartAdress = 0;
byte globNumberOfBits = 0;
int globBlockSizeBytes = 0;
bool initBitBlock(int blockStartAdress, int blockCount, byte numberOfBits) {
globBlockStartAdress = blockStartAdress;
globNumberOfBits = numberOfBits;
// calc needed number of bytes and roud up
int tempBlockSize = blockCount * numberOfBits / 8;
if(blockCount * numberOfBits % 8)
tempBlockSize++;
// make number of bytes even
if(tempBlockSize % 2)
tempBlockSize++;
globBlockSizeBytes = tempBlockSize;
if(blockStartAdress + globBlockSizeBytes > EEPROM_SIZE)
return false;
return true;
}
/*
* Writes 1 to 9 bits to "internalAdress" within a designated block in eeprom
*/
void writeBitsToEEPROM(unsigned int bitsToBeWritten, int internalAdress){
//TODO: check if value is not higher than what can be stored
// if(bitsToBeWritten){
//
// }
int trueEEPROMAdress = globBlockStartAdress + internalAdress * globNumberOfBits / 8;
if(trueEEPROMAdress + 1 >= ARRAY_SIZE || internalAdress * globNumberOfBits / 8 >= globBlockSizeBytes){
Serial.print("globBlockSizeBytes: ");
Serial.println(globBlockSizeBytes);
Serial.println("FEHLER writeBitsToEEPROMWTF: ");
Serial.println(trueEEPROMAdress + 1);
Serial.println(internalAdress * globNumberOfBits / 8 );
}
byte startBitOfEEPROMByte = (internalAdress * globNumberOfBits) % 8;
unsigned int oldIntFromEEPROM = (fakeEEPROM[trueEEPROMAdress] << 8) | fakeEEPROM[trueEEPROMAdress + 1];
//Todo: change to eeprom
//filter out only the bits that need to be kept.
//EEPROM.get(trueEEPROMAdress, oldEEPROMByteBits);
// there might be bits in the byte that we dont want to change. left side and right side
unsigned int mask1KeepFromEEPROM = (0xFFFF << (16 - startBitOfEEPROMByte));
unsigned int mask2KeepFromEEPROM = (0xFFFF >> (startBitOfEEPROMByte + globNumberOfBits));
//if(16 - startBitOfEEPROMByte - numberOfBits > 0)
//mask2KeepFromEEPROM= (0xFFFF >> (startBitOfEEPROMByte + numberOfBits));
// masks combined
unsigned int maskIntToKeepFromEEPROM = mask1KeepFromEEPROM | mask2KeepFromEEPROM;
int newEEPROMInt = (oldIntFromEEPROM & maskIntToKeepFromEEPROM) | ((bitsToBeWritten << (16 - globNumberOfBits - startBitOfEEPROMByte) & ~maskIntToKeepFromEEPROM));
//Todo: change to eeprom
//write
//EEPROM.update(trueEEPROMAdress, newEEPROMByteBitsA);
fakeEEPROM[trueEEPROMAdress] = (newEEPROMInt >> 8);
fakeEEPROM[trueEEPROMAdress + 1] = (byte) newEEPROMInt;
if(trueEEPROMAdress + 1 > BLOCK_BYTE_COUNT){
Serial.println("FEHLER writeBitsToEEPROM");
Serial.println(trueEEPROMAdress + 1);
Serial.println("blockStartAdress");
Serial.println(globBlockStartAdress);
Serial.println("internalAdress");
Serial.println(internalAdress);
Serial.println("numberOfBits");
Serial.println(globNumberOfBits);
}
// Serial.print("trueEEPROMAdress: ");
// Serial.println(trueEEPROMAdress);
//
// Serial.print("internalAdress: ");
// Serial.println(internalAdress);
//
// Serial.print("globNumberOfBits: ");
// Serial.println(globNumberOfBits);
//
// Serial.print("bitsToBeWritten: ");
// Serial.println(byteToString(bitsToBeWritten,2));
//
// Serial.print(" mask1KeepFromEEPROM: ");
// Serial.println(byteToString(mask1KeepFromEEPROM,2));
//
// Serial.print("mask2KeepFromEEPROM: ");
// Serial.println(byteToString(mask2KeepFromEEPROM,2));
//
// Serial.print("maskIntToKeepFromEEPROM: ");
// Serial.println(byteToString(maskIntToKeepFromEEPROM,2));
//
// Serial.print("oldIntFromEEPROM: ");
// Serial.println(byteToString(oldIntFromEEPROM,2));
//
// Serial.print("newEEPROMInt: ");
// Serial.println(byteToString(newEEPROMInt,2));
//
// Serial.print("512: ");
// Serial.println(byteToString(512, 2));
//
// Serial.print("65535: ");
// Serial.println(byteToString(65535, 2));
}
unsigned int ReadBitsFromEEPROM(int internalAdress){
int trueEEPROMAdress = globBlockStartAdress + internalAdress * globNumberOfBits / 8;
byte startBitOfEEPROMByte = (internalAdress * globNumberOfBits) % 8;
if(trueEEPROMAdress + 1 > BLOCK_BYTE_COUNT)
Serial.println("FEHLER readBits");
unsigned int oldIntFromEEPROM = (fakeEEPROM[trueEEPROMAdress] << 8) | fakeEEPROM[trueEEPROMAdress + 1];
//Todo: change to eeprom
//filter out only the bits that need to be kept.
//EEPROM.get(trueEEPROMAdress, oldEEPROMByteBits);
unsigned int mask1KeepFromEEPROM = (0xFFFF << (16 - startBitOfEEPROMByte));
unsigned int mask2KeepFromEEPROM = (0xFFFF >> (startBitOfEEPROMByte + globNumberOfBits));
unsigned int maskIntToKeepFromEEPROM = mask1KeepFromEEPROM | mask2KeepFromEEPROM;
unsigned int valueFromEEPROM = ~maskIntToKeepFromEEPROM & oldIntFromEEPROM;
// Serial.print("trueEEPROMAdress: ");
// Serial.println(trueEEPROMAdress);
//
// Serial.print("internalAdress: ");
// Serial.println(internalAdress);
//
// Serial.print("numberOfBits: ");
// Serial.println(numberOfBits);
//
// Serial.print(" mask1KeepFromEEPROM: ");
// Serial.println(byteToString(mask1KeepFromEEPROM,2));
//
// Serial.print("mask2KeepFromEEPROM: ");
// Serial.println(byteToString(mask2KeepFromEEPROM,2));
////
// Serial.print("maskIntToKeepFromEEPROM: ");
// Serial.println(byteToString(maskIntToKeepFromEEPROM,2));
////
// Serial.print("oldIntFromEEPROM: ");
// Serial.println(byteToString(oldIntFromEEPROM,2));
return (valueFromEEPROM >> (16 - globNumberOfBits - startBitOfEEPROMByte));
}
void setup() {
Serial.begin(57600);
Serial.print(F("\n# Programversion: "));
Serial.print(__TIME__);
Serial.print(" ");
Serial.println(__DATE__);
Serial.println("Setup finished");
delay(1000);
}
void printEEPROM(){
for(int i = 0; i < ARRAY_SIZE; i++){
byte b;
//Todo: change to eeprom
//EEPROM.get(i, b);
b = fakeEEPROM[i];
Serial.print(byteToString(b));
Serial.print(" ");
}
Serial.println();
}
void testNumbers() {
Serial.println("bits?");
while( ! Serial.available());
String input = Serial.readString();
unsigned int value = input.toInt();
initBitBlock(1, 15, 9);
// Serial.print("value: ");
// Serial.println(byteToString(value));
for(int i = 0; i < BIT_BLOCKS_COUNT;i++){
for(int j = 0; j < BLOCK_BYTE_COUNT; j++){
fakeEEPROM[j] = 0xFF;
if(j > BLOCK_BYTE_COUNT)
Serial.println("FEHLER testNumbers");
}
// Serial.print("EEPROM before: ");
// printEEPROM();
writeBitsToEEPROM(value, i);
Serial.print("Returned: ");
Serial.println(ReadBitsFromEEPROM(i));
// Serial.print("EEPROM after: ");
// printEEPROM();
// Serial.println();
}
delay(1000);
}
#define CHAR_COUNT 16
void testChars() {
// Serial.println("bits?");
// while( ! Serial.available());
// String input = Serial.readString();
//
// unsigned int value = input.toInt();
initBitBlock(1, CHAR_COUNT, 7);
Serial.println("string?");
while( ! Serial.available());
String input = Serial.readString();
Serial.println(input);
char testString[CHAR_COUNT] = {'\0'};
input.toCharArray(testString, CHAR_COUNT, 0);
for(int j = 0; j < ARRAY_SIZE; j++){
fakeEEPROM[j] = 0;//xFF;
}
for(int i = 0; i < CHAR_COUNT; i++){
Serial.print("EEPROM before: ");
printEEPROM();
writeBitsToEEPROM(testString[i], i);
Serial.print("EEPROM after: ");
printEEPROM();
Serial.println();
}
Serial.println("Returned: ");
for(int i = 0; i < CHAR_COUNT; i++){
Serial.print((char) ReadBitsFromEEPROM(i));
}
Serial.println();
delay(1000);
}
void loop(){
testChars();
testNumbers();
}
which of course it not complete. Its just for saving those 9-bit values.
My question is: Has anyone else programmed a function like this - or knows where to find this - that is not limited to 9 bits (10 bits will span over 3 bytes)?
This function should take the number of bits given by bitsPerVal from each value in the input array pVals and pack them into the byte array pointed to by pOutBytes:
#include <stdint.h>
void pack_bits(uint32_t *pVals, size_t numVals, int bitsPerVal, uint8_t *pOutBytes)
{
uint32_t mask = ~(UINT32_MAX << bitsPerVal);
int outBitsLeft = 8;
int inBitsLeft = bitsPerVal;
while(numVals > 0)
{
if(inBitsLeft > outBitsLeft)
{
inBitsLeft -= outBitsLeft;
*pOutBytes |= (*pVals & mask) >> inBitsLeft;
mask >>= outBitsLeft;
outBitsLeft = 0;
}
else
{
outBitsLeft -= inBitsLeft;
*pOutBytes |= (*pVals & mask) << outBitsLeft;
mask = ~(UINT32_MAX << bitsPerVal);
inBitsLeft = bitsPerVal;
--numVals;
++pVals;
}
if(0 == outBitsLeft)
{
outBitsLeft = 8;
++pOutBytes;
}
}
}
The array pointed to by pOutBytes must suitably sized (ie ((numVals*bitsPerVal) + 7) / 8) and initialised to zero before calling. You can write it to your EEPROM after.
Hopefully this works well, I have done much testing on it though.
Here is an example of how 10 bits (actually 16-bits when written...) from 2 different fields could write to 16-bits of output.
struct EEPROM_Output
{
uint16_t a : 9; // 0 - 511 can be stored here
uint16_t b : 1; // 0 or 1 here.
uint16_t pad : 6; // Future use - we place this here to make it obvious that there are bits remaining.
};
void foo()
{
struct EEPROM_Output save;
save.a = 100;
save.b = 1;
WriteToEEPROM(&save, sizeof(save));
}

Extract 14-bit values from an array of bytes in C

In an arbitrary-sized array of bytes in C, I want to store 14-bit numbers (0-16,383) tightly packed. In other words, in the sequence:
0000000000000100000000000001
there are two numbers that I wish to be able to arbitrarily store and retrieve into a 16-bit integer. (in this case, both of them are 1, but could be anything in the given range) If I were to have the functions uint16_t 14bitarr_get(unsigned char* arr, unsigned int index) and void 14bitarr_set(unsigned char* arr, unsigned int index, uint16_t value), how would I implement those functions?
This is not for a homework project, merely my own curiosity. I have a specific project that this would be used for, and it is the key/center of the entire project.
I do not want an array of structs that have 14-bit values in them, as that generates waste bits for every struct that is stored. I want to be able to tightly pack as many 14-bit values as I possibly can into an array of bytes. (e.g.: in a comment I made, putting as many 14-bit values into a chunk of 64 bytes is desirable, with no waste bits. the way those 64 bytes work is completely tightly packed for a specific use case, such that even a single bit of waste would take away the ability to store another 14 bit value)
Well, this is bit fiddling at its best. Doing it with an array of bytes makes it more complicated than it would be with larger elements because a single 14 bit quantity can span 3 bytes, where uint16_t or anything bigger would require no more than two. But I'll take you at your word that this is what you want (no pun intended). This code will actually work with the constant set to anything 8 or larger (but not over the size of an int; for that, additional type casts are needed). Of course the value type must be adjusted if larger than 16.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define W 14
uint16_t arr_get(unsigned char* arr, size_t index) {
size_t bit_index = W * index;
size_t byte_index = bit_index / 8;
unsigned bit_in_byte_index = bit_index % 8;
uint16_t result = arr[byte_index] >> bit_in_byte_index;
for (unsigned n_bits = 8 - bit_in_byte_index; n_bits < W; n_bits += 8)
result |= arr[++byte_index] << n_bits;
return result & ~(~0u << W);
}
void arr_set(unsigned char* arr, size_t index, uint16_t value) {
size_t bit_index = W * index;
size_t byte_index = bit_index / 8;
unsigned bit_in_byte_index = bit_index % 8;
arr[byte_index] &= ~(0xff << bit_in_byte_index);
arr[byte_index++] |= value << bit_in_byte_index;
unsigned n_bits = 8 - bit_in_byte_index;
value >>= n_bits;
while (n_bits < W - 8) {
arr[byte_index++] = value;
value >>= 8;
n_bits += 8;
}
arr[byte_index] &= 0xff << (W - n_bits);
arr[byte_index] |= value;
}
int main(void) {
int mod = 1 << W;
int n = 50000;
unsigned x[n];
unsigned char b[2 * n];
for (int tries = 0; tries < 10000; tries++) {
for (int i = 0; i < n; i++) {
x[i] = rand() % mod;
arr_set(b, i, x[i]);
}
for (int i = 0; i < n; i++)
if (arr_get(b, i) != x[i])
printf("Err #%d: %d should be %d\n", i, arr_get(b, i), x[i]);
}
return 0;
}
Faster versions Since you said in comments that performance is an issue: open coding the loops gives a roughly 10% speed improvement on my machine on the little test driver included in the original. This includes random number generation and testing, so perhaps the primitives are 20% faster. I'm confident that 16- or 32-bit array elements would give further improvements because byte access is expensive:
uint16_t arr_get(unsigned char* a, size_t i) {
size_t ib = 14 * i;
size_t iy = ib / 8;
switch (ib % 8) {
case 0:
return (a[iy] | (a[iy+1] << 8)) & 0x3fff;
case 2:
return ((a[iy] >> 2) | (a[iy+1] << 6)) & 0x3fff;
case 4:
return ((a[iy] >> 4) | (a[iy+1] << 4) | (a[iy+2] << 12)) & 0x3fff;
}
return ((a[iy] >> 6) | (a[iy+1] << 2) | (a[iy+2] << 10)) & 0x3fff;
}
#define M(IB) (~0u << (IB))
#define SETLO(IY, IB, V) a[IY] = (a[IY] & M(IB)) | ((V) >> (14 - (IB)))
#define SETHI(IY, IB, V) a[IY] = (a[IY] & ~M(IB)) | ((V) << (IB))
void arr_set(unsigned char* a, size_t i, uint16_t val) {
size_t ib = 14 * i;
size_t iy = ib / 8;
switch (ib % 8) {
case 0:
a[iy] = val;
SETLO(iy+1, 6, val);
return;
case 2:
SETHI(iy, 2, val);
a[iy+1] = val >> 6;
return;
case 4:
SETHI(iy, 4, val);
a[iy+1] = val >> 4;
SETLO(iy+2, 2, val);
return;
}
SETHI(iy, 6, val);
a[iy+1] = val >> 2;
SETLO(iy+2, 4, val);
}
Another variation
This is quite a bit faster yet on my machine, about 20% better than above:
uint16_t arr_get2(unsigned char* a, size_t i) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
return (buf >> (ib % 8)) & 0x3fff;
}
void arr_set2(unsigned char* a, size_t i, unsigned val) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
unsigned io = ib % 8;
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
a[iy+2] = buf >> 16;
}
Note that for this code to be safe you should allocate one extra byte at the end of the packed array. It always reads and writes 3 bytes even when the desired 14 bits are in the first 2.
One more variation Finally, this runs just a bit slower than the one above (again on my machine; YMMV), but you don't need the extra byte. It uses one comparison per operation:
uint16_t arr_get2(unsigned char* a, size_t i) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned io = ib % 8;
unsigned buf = ib % 8 <= 2
? a[iy] | (a[iy+1] << 8)
: a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
return (buf >> io) & 0x3fff;
}
void arr_set2(unsigned char* a, size_t i, unsigned val) {
size_t ib = i * 14;
size_t iy = ib / 8;
unsigned io = ib % 8;
if (io <= 2) {
unsigned buf = a[iy] | (a[iy+1] << 8);
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
} else {
unsigned buf = a[iy] | (a[iy+1] << 8) | (a[iy+2] << 16);
buf = (buf & ~(0x3fff << io)) | (val << io);
a[iy] = buf;
a[iy+1] = buf >> 8;
a[iy+2] = buf >> 16;
}
}
The easiest solution is to use a struct of eight bitfields:
typedef struct __attribute__((__packed__)) EightValues {
uint16_t v0 : 14,
v1 : 14,
v2 : 14,
v3 : 14,
v4 : 14,
v5 : 14,
v6 : 14,
v7 : 14;
} EightValues;
This struct has a size of 14*8 = 112 bits, which is 14 bytes (seven uint16_t). Now, all you need is to use the last three bits of the array index to select the right bitfield:
uint16_t 14bitarr_get(unsigned char* arr, unsigned int index) {
EightValues* accessPointer = (EightValues*)arr;
accessPointer += index >> 3; //select the right structure in the array
switch(index & 7) { //use the last three bits of the index to access the right bitfield
case 0: return accessPointer->v0;
case 1: return accessPointer->v1;
case 2: return accessPointer->v2;
case 3: return accessPointer->v3;
case 4: return accessPointer->v4;
case 5: return accessPointer->v5;
case 6: return accessPointer->v6;
case 7: return accessPointer->v7;
}
}
Your compiler will do the bit-fiddling for you.
The Basis for Storage Issue
The biggest issue you are facing is the fundamental question of "What is my basis for storage going to be?" You know the basics, what you have available to you is char, short, int, etc... The smallest being 8-bits. No matter how you slice your storage scheme, it will ultimately have to rest in memory in a unit of memory based on this 8 bit per byte layout.
The only optimal, no bits wasted, memory allocation would be to declare an array of char in the least common multiple of 14-bits. It is the full 112-bits in this case (7-shorts or 14-chars). This may be the best option. Here, declaring an array of 7-shorts or 14-chars, would allow the exact storage of 8 14-bit values. Of course if you have no need for 8 of them, then it wouldn't be of much use anyway as it would waste more than the 4-bits lost on a single unsigned value.
Let me know if this is something you would like to further explore. If it is, I'm happy to help with the implementation.
Bitfield Struct
The comments regarding bitfield packing or bit packing are exactly what you need to do. This can involve a structure alone or in combination with a union, or by manually right/left shifting values directly as needed.
A short example applicable to your situation (if I understood correctly you want 2 14-bit areas in memory) would be:
#include <stdio.h>
typedef struct bitarr14 {
unsigned n1 : 14,
n2 : 14;
} bitarr14;
char *binstr (unsigned long n, size_t sz);
int main (void) {
bitarr14 mybitfield;
mybitfield.n1 = 1;
mybitfield.n2 = 1;
printf ("\n mybitfield in memory : %s\n\n",
binstr (*(unsigned *)&mybitfield, 28));
return 0;
}
char *binstr (unsigned long n, size_t sz)
{
static char s[64 + 1] = {0};
char *p = s + 64;
register size_t i = 0;
for (i = 0; i < sz; i++) {
p--;
*p = (n >> i & 1) ? '1' : '0';
}
return p;
}
Output
$ ./bin/bitfield14
mybitfield in memory : 0000000000000100000000000001
Note: the dereference of mybitfield for purposes of printing the value in memory breaks strict aliasing and it is intentional just for the purpose of the output example.
The beauty, and purpose for using a struct in the manner provided is it will allow direct access to each 14-bit part of the struct directly, without having to manually shift, etc.
Update - assuming you want big endian bit packing. This is code meant for a fixed size code word. It's based on code I've used for data compression algorithms. The switch case and fixed logic helps with performance.
typedef unsigned short uint16_t;
void bit14arr_set(unsigned char* arr, unsigned int index, uint16_t value)
{
unsigned int bitofs = (index*14)%8;
arr += (index*14)/8;
switch(bitofs){
case 0: /* bit offset == 0 */
*arr++ = (unsigned char)(value >> 6);
*arr &= 0x03;
*arr |= (unsigned char)(value << 2);
break;
case 2: /* bit offset == 2 */
*arr &= 0xc0;
*arr++ |= (unsigned char)(value >> 8);
*arr = (unsigned char)(value << 0);
break;
case 4: /* bit offset == 4 */
*arr &= 0xf0;
*arr++ |= (unsigned char)(value >> 10);
*arr++ = (unsigned char)(value >> 2);
*arr &= 0x3f;
*arr |= (unsigned char)(value << 6);
break;
case 6: /* bit offset == 6 */
*arr &= 0xfc;
*arr++ |= (unsigned char)(value >> 12);
*arr++ = (unsigned char)(value >> 4);
*arr &= 0x0f;
*arr |= (unsigned char)(value << 4);
break;
}
}
uint16_t bit14arr_get(unsigned char* arr, unsigned int index)
{
unsigned int bitofs = (index*14)%8;
unsigned short value;
arr += (index*14)/8;
switch(bitofs){
case 0: /* bit offset == 0 */
value = ((unsigned int)(*arr++) ) << 6;
value |= ((unsigned int)(*arr ) ) >> 2;
break;
case 2: /* bit offset == 2 */
value = ((unsigned int)(*arr++)&0x3f) << 8;
value |= ((unsigned int)(*arr ) ) >> 0;
break;
case 4: /* bit offset == 4 */
value = ((unsigned int)(*arr++)&0x0f) << 10;
value |= ((unsigned int)(*arr++) ) << 2;
value |= ((unsigned int)(*arr ) ) >> 6;
break;
case 6: /* bit offset == 6 */
value = ((unsigned int)(*arr++)&0x03) << 12;
value |= ((unsigned int)(*arr++) ) << 4;
value |= ((unsigned int)(*arr ) ) >> 4;
break;
}
return value;
}
Here's my version (updated to fix bugs):
#define PACKWID 14 // number of bits in packed number
#define PACKMSK ((1 << PACKWID) - 1)
#ifndef ARCHBYTEALIGN
#define ARCHBYTEALIGN 1 // align to 1=bytes, 2=words
#endif
#define ARCHBITALIGN (ARCHBYTEALIGN * 8)
typedef unsigned char byte;
typedef unsigned short u16;
typedef unsigned int u32;
typedef long long s64;
typedef u16 pcknum_t; // container for packed number
typedef u32 acc_t; // working accumulator
#ifndef ARYOFF
#define ARYOFF long
#endif
#define PRT(_val) ((unsigned long) _val)
typedef unsigned ARYOFF aryoff_t; // bit offset
// packary -- access array of packed numbers
// RETURNS: old value
extern inline pcknum_t
packary(byte *ary,aryoff_t idx,int setflg,pcknum_t newval)
// ary -- byte array pointer
// idx -- index into array (packed number relative)
// setflg -- 1=set new value, 0=just get old value
// newval -- new value to set (if setflg set)
{
aryoff_t absbitoff;
aryoff_t bytoff;
aryoff_t absbitlhs;
acc_t acc;
acc_t nval;
int shf;
acc_t curmsk;
pcknum_t oldval;
// get the absolute bit number for the given array index
absbitoff = idx * PACKWID;
// get the byte offset of the lowest byte containing the number
bytoff = absbitoff / ARCHBITALIGN;
// get absolute bit offset of first containing byte
absbitlhs = bytoff * ARCHBITALIGN;
// get amount we need to shift things by:
// (1) our accumulator
// (2) values to set/get
shf = absbitoff - absbitlhs;
#ifdef MODSHOW
do {
static int modshow;
if (modshow > 50)
break;
++modshow;
printf("packary: MODSHOW idx=%ld shf=%d bytoff=%ld absbitlhs=%ld absbitoff=%ld\n",
PRT(idx),shf,PRT(bytoff),PRT(absbitlhs),PRT(absbitoff));
} while (0);
#endif
// adjust array pointer to the portion we want (guaranteed to span)
ary += bytoff * ARCHBYTEALIGN;
// fetch the number + some other bits
acc = *(acc_t *) ary;
// get the old value
oldval = (acc >> shf) & PACKMSK;
// set the new value
if (setflg) {
// get shifted mask for packed number
curmsk = PACKMSK << shf;
// remove the old value
acc &= ~curmsk;
// ensure caller doesn't pass us a bad value
nval = newval;
#if 0
nval &= PACKMSK;
#endif
nval <<= shf;
// add in the value
acc |= nval;
*(acc_t *) ary = acc;
}
return oldval;
}
pcknum_t
int_get(byte *ary,aryoff_t idx)
{
return packary(ary,idx,0,0);
}
void
int_set(byte *ary,aryoff_t idx,pcknum_t newval)
{
packary(ary,idx,1,newval);
}
Here are benchmarks:
set: 354740751 7.095 -- gene
set: 203407176 4.068 -- rcgldr
set: 298946533 5.979 -- craig
get: 268574627 5.371 -- gene
get: 166839767 3.337 -- rcgldr
get: 207764612 4.155 -- craig

Bitmask understanding in C program

I have a program that my professor gave me for a HW, and I want to see if any of y'all can explain me how bits work. Note: I don't want you guys to give me the answer; I want to learn so if you guys can explain me how this work would be awesome so I can go ahead an start on my hw.
Instructions:
a) unsigned setbits (unsigned x, int p, int n, unsigned y) that returns x with the n bits that begin at position p (right-adjusted) set to the rightmost n bits of y, leaving the other bits unchanged. Note: it does not change the values of x and y though.
b) unsigned invertbits (unsigned x, int p, int n) that returns x with the n bits that begin at position p (right-adjusted) inverted, i.e. 1 changed to 0 and vice versa, leaving the other bits unchanged. Note: it does not change the value of x though.
#include <stdio.h>
#include <limits.h>
void bit_print(int);
int pack(char, char, char, char);
char unpack(int, int);
unsigned getbits(unsigned, int, int);
void bit_print(int a){
int i;
int n = sizeof(int) * CHAR_BIT;
int mask = 1 << (n-1); // mask = 100...0
for (i=1; i<=n; i++){
putchar(((a & mask) == 0)? '0' : '1');
a <<= 1;
if (i % CHAR_BIT == 0 && i < n)
putchar(' ');
}
putchar('\n');
}
int pack(char a, char b, char c, char d){
int p=a;
p = (p << CHAR_BIT) | b;
p = (p << CHAR_BIT) | c;
p = (p << CHAR_BIT) | d;
return p;
}
char unpack(int p, int k){ // k=0, 1, 2, or 3
int n = k * CHAR_BIT; // n = 0, 8, 16, 24
unsigned mask = 255; // mask = low-order byte
mask <<= n;
return ((p & mask) >> n);
}
// getbits() extracts n bits from position p(start counting from the right-most bit) in x
unsigned getbits(unsigned x, int p, int n){
unsigned temp = x >> (p+1-n);
unsigned mask = 0;
mask = ~mask;
mask = mask << n;
mask = ~mask;
return temp & mask;
// return (x >> (p+1-n)) & ~(~0<<n);
}
int main(){
int x = 19;
printf("The binary rep. of %d is:\n", x);
bit_print(x);
int p=pack('w', 'x', 'y', 'z');
printf("\n'w', 'x', 'y', and 'z' packed together is equal to %d. Its binary rep. is:\n", p);
bit_print(p);
printf("calling unpack(p, 0) to extract the byte # 0 from the right:\n");
bit_print(unpack(p, 0));
printf("calling unpack(p, 1) to extract the byte # 1 from the right:\n");
bit_print(unpack(p, 1));
printf("calling unpack(p, 2) to extract the byte # 2 from the right:\n");
bit_print(unpack(p, 2));
printf("calling unpack(p, 3) to extract the byte # 3 from the right:\n");
bit_print(unpack(p, 3));
unsigned result = getbits(p, 20, 7);
printf("\ncalling getbits(p, 20, 7) to extract 7 bits from bit # 20 returns %d:\n", result);
bit_print(result);
return 0;
}
Using bitwise AND & , OR |, XOR ^, NOT ~ and a proper bit mask you can manipulate bits inside a variable. You will also need bit shifts >> and <<.
So let us have an example:
Let's take a 8bit var x = 0xff and try to invert its 3'rd bit:
unsigned char x = 0xff; // Our var
unsigned char mask = 1<<3; // Our mask
x = x & ~mask; // Invert mask so its value is b1111_0111
// and make a bitwise AND with x
Every bit in x keeps its value if there is 1 in a mask, and turns into 0 when masks bit value is 0. Now x value is x = 0xf7.
Using other operators you can do whatever you want with bits :)
So for example yours unpack function does:
char unpack(int p, int k){ // k - byte offset
int n = k * CHAR_BIT; // n - bit offset (k * 8)
unsigned mask = 255; // mask with all ones at first byte (0x000f)
mask <<= n; // move mask left n times;
// Now the ones are at the k'th byte
// if k = 2 => mask = 0x0f00
return ((p & mask) >> n); // Mask out k'th byte of p and remove all zeros
// from beginning.
}
When p = 0x3579 and k = 1:
n = k * CHAR_BIT; // n = 8
mask = 255; // mask = 0x000f
mask <<= n; // mask = 0x00f0
p &= mask; // p = 0x0070
p >>= n; // p = 0x0007
I hope it will help you!

Convert integer from (pure) binary to BCD

I'm to stupid right now to solve this problem...
I get a BCD number (every digit is an own 4Bit representation)
For example, what I want:
Input: 202 (hex) == 514 (dec)
Output: BCD 0x415
Input: 0x202
Bit-representation: 0010 0000 0010 = 514
What have I tried:
unsigned int uiValue = 0x202;
unsigned int uiResult = 0;
unsigned int uiMultiplier = 1;
unsigned int uiDigit = 0;
// get the dec bcd value
while ( uiValue > 0 )
{
uiDigit= uiValue & 0x0F;
uiValue >>= 4;
uiResult += uiMultiplier * uiDigit;
uiMultiplier *= 10;
}
But I know that's very wrong this would be 202 in Bit representation and then split into 5 nibbles and then represented as decimal number again
I can solve the problem on paper but I just cant get it in a simple C-Code
You got it the wrong way round. Your code is converting from BCD to binary, just as your question's (original) title says. But the input and output values you provided are correct only if you convert from binary to BCD. In that case, try:
#include <stdio.h>
int main(void) {
int binaryInput = 0x202;
int bcdResult = 0;
int shift = 0;
printf("Binary: 0x%x (dec: %d)\n", binaryInput , binaryInput );
while (binaryInput > 0) {
bcdResult |= (binaryInput % 10) << (shift++ << 2);
binaryInput /= 10;
}
printf("BCD: 0x%x (dec: %d)\n", bcdResult , bcdResult );
return 0;
}
Proof: http://ideone.com/R0reQh
Try the following.
unsigned long toPackedBcd (unsigned int val)
{
unsigned long bcdresult = 0; char i;
for (i = 0; val; i++)
{
((char*)&bcdresult)[i / 2] |= i & 1 ? (val % 10) << 4 : (val % 10) & 0xf;
val /= 10;
}
return bcdresult;
}
Also one may try the following variant (although maybe little inefficient)
/*
Copyright (c) 2016 enthusiasticgeek<enthusiasticgeek#gmail.com> Binary to Packed BCD
This code may be used (including commercial products) without warranties of any kind (use at your own risk)
as long as this copyright notice is retained.
Author, under no circumstances, shall not be responsible for any code crashes or bugs.
Exception to copyright code: 'reverse string function' which is taken from http://stackoverflow.com/questions/19853014/reversing-a-string-in-place-in-c-pointers#19853059
Double Dabble Algorithm for unsigned int explanation
255(binary) - base 10 -> 597(packed BCD) - base 16
H| T| U| (Keep shifting left)
11111111
1 1111111
11 111111
111 11111
1010 11111 <-----added 3 in unit's place (7+3 = 10)
1 0101 1111
1 1000 1111 <-----added 3 in unit's place (5+3 = 8)
11 0001 111
110 0011 11
1001 0011 11 <-----added 3 in ten's place (6+3 = 9)
1 0010 0111 1
1 0010 1010 1 <-----added 3 in unit's place (7+3 = 10)
10 0101 0101 -> binary 597 but bcd 255
^ ^ ^
| | |
2 5 5
*/
#include <stdio.h>
#include <string.h>
//Function Prototypes
unsigned int binaryToPackedBCD (unsigned int binary);
char * printPackedBCD(unsigned int bcd, char * bcd_string);
// For the following function see http://stackoverflow.com/questions/19853014/reversing-a-string-in-place-in-c-pointers#19853059
void reverse(char *str);
//Function Definitions
unsigned int binaryToPackedBCD (unsigned int binary) {
const unsigned int TOTAL_BITS = 32;
/*Place holder for bcd*/
unsigned int bcd = 0;
/*counters*/
unsigned int i,j = 0;
for (i=0; i<TOTAL_BITS; i++) {
/*
Identify the bit to append to LSB of 8 byte or 32 bit word -
First bitwise AND mask with 1.
Then shift to appropriate (nth shift) place.
Then shift the result back to the lsb position.
*/
unsigned int binary_bit_to_lsb = (1<<(TOTAL_BITS-1-i)&binary)>>(TOTAL_BITS-1-i);
/*shift by 1 place and append bit to lsb*/
bcd = ( bcd<<1 ) | binary_bit_to_lsb;
/*printf("=> %u\n",bcd);*/
/*Don't add 3 for last bit shift i.e. in this case 32nd bit*/
if( i >= TOTAL_BITS-1) {
break;
}
/*else continue*/
/* Now, check every nibble from LSB to MSB and if greater than or equal 5 - add 3 if so */
for (j=0; j<TOTAL_BITS; j+=4) {
unsigned int temp = (bcd & (0xf<<j))>>j;
if(temp >= 0x5) {
/*printf("[%u,%u], %u, bcd = %u\n",i,j, temp, bcd);*/
/*Now, add 3 at the appropriate nibble*/
bcd = bcd + (3<<j);
// printf("Now bcd = %u\n", bcd);
}
}
}
/*printf("The number is %u\n",bcd);*/
return bcd;
}
char * printPackedBCD(unsigned int bcd, char * bcd_string) {
const unsigned int TOTAL_BITS = 32;
printf("[LSB] =>\n");
/* Now, check every nibble from LSB to MSB and convert to char* */
for (unsigned int j=0; j<TOTAL_BITS; j+=4) {
//for (unsigned int j=TOTAL_BITS-1; j>=4; j-=4) {
unsigned int temp = (bcd & (0xf<<j))>>j;
if(temp==0){
bcd_string[j/4] = '0';
} else if(temp==1){
bcd_string[j/4] = '1';
} else if(temp==2){
bcd_string[j/4] = '2';
} else if(temp==3){
bcd_string[j/4] = '3';
} else if(temp==4){
bcd_string[j/4] = '4';
} else if(temp==5){
bcd_string[j/4] = '5';
} else if(temp==6){
bcd_string[j/4] = '6';
} else if(temp==7){
bcd_string[j/4] = '7';
} else if(temp==8){
bcd_string[j/4] = '8';
} else if(temp==9){
bcd_string[j/4] = '9';
} else {
bcd_string[j/4] = 'X';
}
printf ("[%u - nibble] => %c\n", j/4, bcd_string[j/4]);
}
printf("<= [MSB]\n");
reverse(bcd_string);
return bcd_string;
}
// For the following function see http://stackoverflow.com/questions/19853014/reversing-a-string-in-place-in-c-pointers#19853059
void reverse(char *str)
{
if (str != 0 && *str != '\0') // Non-null pointer; non-empty string
{
char *end = str + strlen(str) - 1;
while (str < end)
{
char tmp = *str;
*str++ = *end;
*end-- = tmp;
}
}
}
int main(int argc, char * argv[])
{
unsigned int number = 255;
unsigned int bcd = binaryToPackedBCD(number);
char bcd_string[8];
printPackedBCD(bcd, bcd_string);
printf("Binary (Base 10) = %u => Packed BCD (Base 16) = %u\n OR \nPacked BCD String = %s\n", number, bcd, bcd_string);
return 0;
}
The real problem here is confusion of bases and units
The 202 should be HEX which equates to 514 decimal... and therefore the BCD calcs are correct
Binary code decimal will convert the decimal (514) into three nibble sized fields:
- 5 = 0101
- 1 = 0001
- 4 = 0100
The bigger problem was that you have the title the wrong way around, and you are converting Uint to BCD, whereas the title asked for BCD to Unint
My 2 cents, I needed similar for a RTC chip which used BCD to encode the time and date info. Came up with the following macros that worked fine for the requirement:
#define MACRO_BCD_TO_HEX(x) ((BYTE) ((((x >> 4) & 0x0F) * 10) + (x & 0x0F)))
#define MACRO_HEX_TO_BCD(x) ((BYTE) (((x / 10 ) << 4) | ((x % 10))))
A naive but simple solution:
char buffer[16];
sprintf(buffer, "%d", var);
sscanf(buffer, "%x", &var);
This is the solution that I developed and works great for embedded systems, like Microchip PIC microcontrollers:
#include <stdio.h>
void main(){
unsigned int output = 0;
unsigned int input;
signed char a;
//enter any number from 0 to 9999 here:
input = 1265;
for(a = 13; a >= 0; a--){
if((output & 0xF) >= 5)
output += 3;
if(((output & 0xF0) >> 4) >= 5)
output += (3 << 4);
if(((output & 0xF00) >> 8) >= 5)
output += (3 << 8);
output = (output << 1) | ((input >> a) & 1);
}
printf("Input decimal or binary: %d\nOutput BCD: %X\nOutput decimal: %u\n", input, output, output);
}
This is my version for a n byte conversion:
//----------------------------------------------
// This function converts n bytes Binary (up to 8, but can be any size)
// value to n bytes BCD value or more.
//----------------------------------------------
void bin2bcdn(void * val, unsigned int8 cnt)
{
unsigned int8 sz, y, buff[20]; // buff = malloc((cnt+1)*2);
if(cnt > 8) sz = 64; // 8x8
else sz = cnt * 8 ; // Size in bits of the data we shift
memset(&buff , 0, sizeof(buff)); // Clears buffer
memcpy(&buff, val, cnt); // Copy the data to buffer
while(sz && !(buff[cnt-1] & 0x80)) // Do not waste time with null bytes,
{ // so search for first significative bit
rotate_left(&buff, sizeof(buff)); // Rotate until we find some data
sz--; // Done this one
}
while(sz--) // Anyting left?
{
for( y = 0; y < cnt+2; y++) // Here we fix the nibbles
{
if(((buff[cnt+y] + 0x03) & 0x08) != 0) buff[cnt+y] += 0x03;
if(((buff[cnt+y] + 0x30) & 0x80) != 0) buff[cnt+y] += 0x30;
}
rotate_left(&buff, sizeof(buff)); // Rotate the stuff
}
memcpy(val, &buff[cnt], cnt); // Copy the buffer to the data
// free(buff); //in case used malloc
} // :D Done
long bin2BCD(long binary) { // double dabble: 8 decimal digits in 32 bits BCD
if (!binary) return 0;
long bit = 0x4000000; // 99999999 max binary
while (!(binary & bit)) bit >>= 1; // skip to MSB
long bcd = 0;
long carry = 0;
while (1) {
bcd <<= 1;
bcd += carry; // carry 6s to next BCD digits (10 + 6 = 0x10 = LSB of next BCD digit)
if (bit & binary) bcd |= 1;
if (!(bit >>= 1)) return bcd;
carry = ((bcd + 0x33333333) & 0x88888888) >> 1; // carrys: 8s -> 4s
carry += carry >> 1; // carrys 6s
}
}
Simple solution
#include <stdio.h>
int main(void) {
int binaryInput = 514 ; //0x202
int bcdResult = 0;
int digit = 0;
int i=1;
printf("Binary: 0x%x (dec: %d)\n", binaryInput , binaryInput );
while (binaryInput > 0) {
digit = binaryInput %10; //pick digit
bcdResult = bcdResult+digit*i;
i=16*i;
binaryInput = binaryInput/ 10;
}
printf("BCD: 0x%x (dec: %d)\n", bcdResult , bcdResult );
return 0;
}
Binary: 0x202 (dec: 514)
BCD: 0x514 (dec: 1300)
You can also try the following:
In every iteration the remainder ( represented as a nibble ) is positioned in its corresponding place.
uint32_t bcd_converter(int num)
{
uint32_t temp=0;
int i=0;
while(num>0){
temp|=((num%10)<<i);
i+=4;
num/=10;
}
return temp;
}

What is a better method for packing 4 bytes into 3 than this?

I have an array of values all well within the range 0 - 63, and decided I could pack every 4 bytes into 3 because the values only require 6 bits and I could use the extra 2bits to store the first 2 bits of the next value and so on.
Having never done this before I used the switch statement and a nextbit variable (a state machine like device) to do the packing and keep track of the starting bit. I'm convinced however, there must be a better way.
Suggestions/clues please, but don't ruin my fun ;-)
Any portability problems regarding big/little endian?
btw: I have verified this code is working, by unpacking it again and comparing with the input. And no it ain't homework, just an exercise I've set myself.
/* build with gcc -std=c99 -Wconversion */
#define ASZ 400
typedef unsigned char uc_;
uc_ data[ASZ];
int i;
for (i = 0; i < ASZ; ++i) {
data[i] = (uc_)(i % 0x40);
}
size_t dl = sizeof(data);
printf("sizeof(data):%z\n",dl);
float fpl = ((float)dl / 4.0f) * 3.0f;
size_t pl = (size_t)(fpl > (float)((int)fpl) ? fpl + 1 : fpl);
printf("length of packed data:%z\n",pl);
for (i = 0; i < dl; ++i)
printf("%02d ", data[i]);
printf("\n");
uc_ * packeddata = calloc(pl, sizeof(uc_));
uc_ * byte = packeddata;
uc_ nextbit = 1;
for (int i = 0; i < dl; ++i) {
uc_ m = (uc_)(data[i] & 0x3f);
switch(nextbit) {
case 1:
/* all 6 bits of m into first 6 bits of byte: */
*byte = m;
nextbit = 7;
break;
case 3:
/* all 6 bits of m into last 6 bits of byte: */
*byte++ = (uc_)(*byte | (m << 2));
nextbit = 1;
break;
case 5:
/* 1st 4 bits of m into last 4 bits of byte: */
*byte++ = (uc_)(*byte | ((m & 0x0f) << 4));
/* 5th and 6th bits of m into 1st and 2nd bits of byte: */
*byte = (uc_)(*byte | ((m & 0x30) >> 4));
nextbit = 3;
break;
case 7:
/* 1st 2 bits of m into last 2 bits of byte: */
*byte++ = (uc_)(*byte | ((m & 0x03) << 6));
/* next (last) 4 bits of m into 1st 4 bits of byte: */
*byte = (uc_)((m & 0x3c) >> 2);
nextbit = 5;
break;
}
}
So, this is kinda like code-golf, right?
#include <stdlib.h>
#include <string.h>
static void pack2(unsigned char *r, unsigned char *n) {
unsigned v = n[0] + (n[1] << 6) + (n[2] << 12) + (n[3] << 18);
*r++ = v;
*r++ = v >> 8;
*r++ = v >> 16;
}
unsigned char *apack(const unsigned char *s, int len) {
unsigned char *s_end = s + len,
*r, *result = malloc(len/4*3+3),
lastones[4] = { 0 };
if (result == NULL)
return NULL;
for(r = result; s + 4 <= s_end; s += 4, r += 3)
pack2(r, s);
memcpy(lastones, s, s_end - s);
pack2(r, lastones);
return result;
}
Check out the IETF RFC 4648 for 'The Base16, Base32 and Base64 Data Encodings'.
Partial code critique:
size_t dl = sizeof(data);
printf("sizeof(data):%d\n",dl);
float fpl = ((float)dl / 4.0f) * 3.0f;
size_t pl = (size_t)(fpl > (float)((int)fpl) ? fpl + 1 : fpl);
printf("length of packed data:%d\n",pl);
Don't use the floating point stuff - just use integers. And use '%z' to print 'size_t' values - assuming you've got a C99 library.
size_t pl = ((dl + 3) / 4) * 3;
I think your loop could be simplified by dealing with 3-byte input units until you've got a partial unit left over, and then dealing with a remainder of 1 or 2 bytes as special cases. I note that the standard referenced says that you use one or two '=' signs to pad at the end.
I have a Base64 encoder and decode which does some of that. You are describing the 'decode' part of Base64 -- where the Base64 code has 4 bytes of data that should be stored in just 3 - as your packing code. The Base64 encoder corresponds to the unpacker you will need.
Base-64 Decoder
Note: base_64_inv is an array of 256 values, one for each possible input byte value; it defines the correct decoded value for each encoded byte. In the Base64 encoding, this is a sparse array - 3/4 zeroes. Similarly, base_64_map is the mapping between a value 0..63 and the corresponding storage value.
enum { DC_PAD = -1, DC_ERR = -2 };
static int decode_b64(int c)
{
int b64 = base_64_inv[c];
if (c == base64_pad)
b64 = DC_PAD;
else if (b64 == 0 && c != base_64_map[0])
b64 = DC_ERR;
return(b64);
}
/* Decode 4 bytes into 3 */
static int decode_quad(const char *b64_data, char *bin_data)
{
int b0 = decode_b64(b64_data[0]);
int b1 = decode_b64(b64_data[1]);
int b2 = decode_b64(b64_data[2]);
int b3 = decode_b64(b64_data[3]);
int bytes;
if (b0 < 0 || b1 < 0 || b2 == DC_ERR || b3 == DC_ERR || (b2 == DC_PAD && b3 != DC_PAD))
return(B64_ERR_INVALID_ENCODED_DATA);
if (b2 == DC_PAD && (b1 & 0x0F) != 0)
/* 3rd byte is '='; 2nd byte must end with 4 zero bits */
return(B64_ERR_INVALID_TRAILING_BYTE);
if (b2 >= 0 && b3 == DC_PAD && (b2 & 0x03) != 0)
/* 4th byte is '='; 3rd byte is not '=' and must end with 2 zero bits */
return(B64_ERR_INVALID_TRAILING_BYTE);
bin_data[0] = (b0 << 2) | (b1 >> 4);
bytes = 1;
if (b2 >= 0)
{
bin_data[1] = ((b1 & 0x0F) << 4) | (b2 >> 2);
bytes = 2;
}
if (b3 >= 0)
{
bin_data[2] = ((b2 & 0x03) << 6) | (b3);
bytes = 3;
}
return(bytes);
}
/* Decode input Base-64 string to original data. Output length returned, or negative error */
int base64_decode(const char *data, size_t datalen, char *buffer, size_t buflen)
{
size_t outlen = 0;
if (datalen % 4 != 0)
return(B64_ERR_INVALID_ENCODED_LENGTH);
if (BASE64_DECLENGTH(datalen) > buflen)
return(B64_ERR_OUTPUT_BUFFER_TOO_SMALL);
while (datalen >= 4)
{
int nbytes = decode_quad(data, buffer + outlen);
if (nbytes < 0)
return(nbytes);
outlen += nbytes;
data += 4;
datalen -= 4;
}
assert(datalen == 0); /* By virtue of the %4 check earlier */
return(outlen);
}
Base-64 Encoder
/* Encode 3 bytes of data into 4 */
static void encode_triplet(const char *triplet, char *quad)
{
quad[0] = base_64_map[(triplet[0] >> 2) & 0x3F];
quad[1] = base_64_map[((triplet[0] & 0x03) << 4) | ((triplet[1] >> 4) & 0x0F)];
quad[2] = base_64_map[((triplet[1] & 0x0F) << 2) | ((triplet[2] >> 6) & 0x03)];
quad[3] = base_64_map[triplet[2] & 0x3F];
}
/* Encode 2 bytes of data into 4 */
static void encode_doublet(const char *doublet, char *quad, char pad)
{
quad[0] = base_64_map[(doublet[0] >> 2) & 0x3F];
quad[1] = base_64_map[((doublet[0] & 0x03) << 4) | ((doublet[1] >> 4) & 0x0F)];
quad[2] = base_64_map[((doublet[1] & 0x0F) << 2)];
quad[3] = pad;
}
/* Encode 1 byte of data into 4 */
static void encode_singlet(const char *singlet, char *quad, char pad)
{
quad[0] = base_64_map[(singlet[0] >> 2) & 0x3F];
quad[1] = base_64_map[((singlet[0] & 0x03) << 4)];
quad[2] = pad;
quad[3] = pad;
}
/* Encode input data as Base-64 string. Output length returned, or negative error */
static int base64_encode_internal(const char *data, size_t datalen, char *buffer, size_t buflen, char pad)
{
size_t outlen = BASE64_ENCLENGTH(datalen);
const char *bin_data = (const void *)data;
char *b64_data = (void *)buffer;
if (outlen > buflen)
return(B64_ERR_OUTPUT_BUFFER_TOO_SMALL);
while (datalen >= 3)
{
encode_triplet(bin_data, b64_data);
bin_data += 3;
b64_data += 4;
datalen -= 3;
}
b64_data[0] = '\0';
if (datalen == 2)
encode_doublet(bin_data, b64_data, pad);
else if (datalen == 1)
encode_singlet(bin_data, b64_data, pad);
b64_data[4] = '\0';
return((b64_data - buffer) + strlen(b64_data));
}
I complicate life by having to deal with a product that uses a variant alphabet for the Base64 encoding, and also manages not to pad data - hence the 'pad' argument (which can be zero for 'null padding' or '=' for standard padding. The 'base_64_map' array contains the alphabet to use for 6-bit values in the range 0..63.
Another simpler way to do it would be to use bit fields. One of the lesser known corners of C struct syntax is the big field. Let's say you have the following structure:
struct packed_bytes {
byte chunk1 : 6;
byte chunk2 : 6;
byte chunk3 : 6;
byte chunk4 : 6;
};
This declares chunk1, chunk2, chunk3, and chunk4 to have the type byte but to only take up 6 bits in the structure. The result is that sizeof(struct packed_bytes) == 3. Now all you need is a little function to take your array and dump it into the structure like so:
void
dump_to_struct(byte *in, struct packed_bytes *out, int count)
{
int i, j;
for (i = 0; i < (count / 4); ++i) {
out[i].chunk1 = in[i * 4];
out[i].chunk2 = in[i * 4 + 1];
out[i].chunk3 = in[i * 4 + 2];
out[i].chunk4 = in[i * 4 + 3];
}
// Finish up
switch(struct % 4) {
case 3:
out[count / 4].chunk3 = in[(count / 4) * 4 + 2];
case 2:
out[count / 4].chunk2 = in[(count / 4) * 4 + 1];
case 1:
out[count / 4].chunk1 = in[(count / 4) * 4];
}
}
There you go, you now have an array of struct packed_bytes that you can easily read by using the above struct.
Instead of using a statemachine you can simply use a counter for how many bits are already used in the current byte, from which you can directly derive the shift-offsets and whether or not you overflow into the next byte.
Regarding the endianess: As long as you use only a single datatype (that is you don't reinterpret pointer to types of different size (e.g. int* a =...;short* b=(short*) a;) you shouldn't get problems with endianess in most cases
Taking elements of DigitalRoss's compact code, Grizzly's suggestion, and my own code, I have written my own answer at last. Although DigitalRoss provides a usable working answer, my usage of it without understanding, would not have provided the same satisfaction as to learning something. For this reason I have chosen to base my answer on my original code.
I have also chosen to ignore the advice Jonathon Leffler gives to avoid using floating point arithmetic for the calculation of the packed data length. Both the recommended method given - the same DigitalRoss also uses, increases the length of the packed data by as much as three bytes. Granted this is not much, but is also avoidable by the use of floating point math.
Here is the code, criticisms welcome:
/* built with gcc -std=c99 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
unsigned char *
pack(const unsigned char * data, size_t len, size_t * packedlen)
{
float fpl = ((float)len / 4.0f) * 3.0f;
*packedlen = (size_t)(fpl > (float)((int)fpl) ? fpl + 1 : fpl);
unsigned char * packed = malloc(*packedlen);
if (!packed)
return 0;
const unsigned char * in = data;
const unsigned char * in_end = in + len;
unsigned char * out;
for (out = packed; in + 4 <= in_end; in += 4) {
*out++ = in[0] | ((in[1] & 0x03) << 6);
*out++ = ((in[1] & 0x3c) >> 2) | ((in[2] & 0x0f) << 4);
*out++ = ((in[2] & 0x30) >> 4) | (in[3] << 2);
}
size_t lastlen = in_end - in;
if (lastlen > 0) {
*out = in[0];
if (lastlen > 1) {
*out++ |= ((in[1] & 0x03) << 6);
*out = ((in[1] & 0x3c) >> 2);
if (lastlen > 2) {
*out++ |= ((in[2] & 0x0f) << 4);
*out = ((in[2] & 0x30) >> 4);
if (lastlen > 3)
*out |= (in[3] << 2);
}
}
}
return packed;
}
int main()
{
size_t i;
unsigned char data[] = {
12, 15, 40, 18,
26, 32, 50, 3,
7, 19, 46, 10,
25, 37, 2, 39,
60, 59, 0, 17,
9, 29, 13, 54,
5, 6, 47, 32
};
size_t datalen = sizeof(data);
printf("unpacked datalen: %td\nunpacked data\n", datalen);
for (i = 0; i < datalen; ++i)
printf("%02d ", data[i]);
printf("\n");
size_t packedlen;
unsigned char * packed = pack(data, sizeof(data), &packedlen);
if (!packed) {
fprintf(stderr, "Packing failed!\n");
return EXIT_FAILURE;
}
printf("packedlen: %td\npacked data\n", packedlen);
for (i = 0; i < packedlen; ++i)
printf("0x%02x ", packed[i]);
printf("\n");
free(packed);
return EXIT_SUCCESS;
}

Resources