LZW Compression - c

The LZW compression algorithm is increasing the size in bits after compression:
Here is the code for Compression function:
// compression
void compress(FILE *inputFile, FILE *outputFile) {
int prefix;
int character;
int nextCode;
int index;
// LZW starts out with a dictionary of 256 characters (in the case of 8 codeLength) and uses those as the "standard"
// character set.
nextCode = 256; // next code is the next available string code
dictionaryInit();
// while (there is still data to be read)
while ((character = getc(inputFile)) != (unsigned)EOF) { // ch = read a character;
// if (dictionary contains prefix+character)
if ((index = dictionaryLookup(prefix, character)) != -1) prefix = index; // prefix = prefix+character
else { // ...no, try to add it
// encode s to output file
writeBinary(outputFile, prefix);
// add prefix+character to dictionary
if (nextCode < dictionarySize) dictionaryAdd(prefix, character, nextCode++);
// prefix = character
prefix = character; //... output the last string after adding the new one
}
}
// encode s to output file
writeBinary(outputFile, prefix); // output the last code
if (leftover > 0) fputc(leftoverBits << 4, outputFile);
// free the dictionary here
dictionaryDestroy();
}
Where the writeBinary (It acts like a buffer in the program) function is as follows:
void writeBinary(FILE * output, int code);
int leftover = 0;
int leftoverBits;
void writeBinary(FILE * output, int code) {
if (leftover > 0) {
int previousCode = (leftoverBits << 4) + (code >> 8);
fputc(previousCode, output);
fputc(code, output);
leftover = 0; // no leftover now
} else {
leftoverBits = code & 0xF; // save leftover, the last 00001111
leftover = 1;
fputc(code >> 4, output);
}
}
Can you spot the error, please? I'll be grateful!

chux already pointed you to the solution: You need to start out with 9-bit codes, and increase the code size up to 12 whenever the available codes for the current bit size are exhausted. If you're writing 12-bit codes from the beginning, there's no compression effect, of course.

Related

DES CBC mode not outputting correctly

I am working on a project in C to implement CBC mode on top of a skeleton code for DES with OpenSSL. We are not allowed to use a function that does the CBC mode automatically, in the sense that we must implement it ourselves. I am getting output but I have result files and my output is not matching up completely with the intended results. I also am stuck on figuring out how to pad the file to ensure all the blocks are of equal size, which is probably one of the reasons why I'm not receiving the correct output. Any help would be appreciated. Here's my modification of the skeleton code so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/des.h>
#include <sys/time.h>
#include <unistd.h>
#define ENC 1
#define DEC 0
DES_key_schedule key;
int append(char*s, size_t size, char c) {
if(strlen(s) + 1 >= size) {
return 1;
}
int len = strlen(s);
s[len] = c;
s[len+1] = '\0';
return 0;
}
int getSize (char * s) {
char * t;
for (t = s; *t != '\0'; t++)
;
return t - s;
}
void strToHex(const_DES_cblock input, unsigned char *output) {
int arSize = 8;
unsigned int byte;
for(int i=0; i<arSize; i++) {
if(sscanf(input, "%2x", &byte) != 1) {
break;
}
output[i] = byte;
input += 2;
}
}
void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) {
DES_LONG temp[2];
memcpy(temp, roundOutput, 8*sizeof(unsigned char));
for(int i=0; i<2; i++) {
xorValue[i] = temp[i] ^ data[i];
}
}
void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) {
DES_LONG in[2];
doBitwiseXor(in, data, roundOutput);
DES_encrypt1(in,&key,ENC);
printf("ENCRYPTED\n");
printvalueOfDES_LONG(in);
printf("%s","\n");
fwrite(in, 8, 1, outFile);
memcpy(roundOutput, in, 2*sizeof(DES_LONG));
}
int main(int argc, char** argv)
{
const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
// Initialize the timing function
struct timeval start, end;
gettimeofday(&start, NULL);
int l;
if ((l = DES_set_key_checked(&cbc_key,&key)) != 0)
printf("\nkey error\n");
FILE *inpFile;
FILE *outFile;
inpFile = fopen("test.txt", "r");
outFile = fopen("test_results.txt", "wb");
if(inpFile && outFile) {
unsigned char ch;
// A char array that will hold all 8 ch values.
// each ch value is appended to this.
unsigned char eight_bits[8];
// counter for the loop that ensures that only 8 chars are done at a time.
int count = 0;
while(!feof(inpFile)) {
// read in a character
ch = fgetc(inpFile);
// print the character
printf("%c",ch);
// append the character to eight_bits
append(eight_bits,1,ch);
// increment the count so that we only go to 8.
count++;
const_DES_cblock roundOutput;
// When count gets to 8
if(count == 8) {
// for formatting
printf("%s","\n");
// Encrypt the eight characters and store them back in the char array.
//DES_encrypt1(eight_bits,&key,ENC);
doCBCenc(eight_bits, roundOutput, outFile);
// prints out the encrypted string
int k;
for(k = 0; k < getSize(eight_bits); k++){
printf("%c", eight_bits[k]);
}
// Sets count back to 0 so that we can do another 8 characters.
count = 0;
// so we just do the first 8. When everything works REMOVE THE BREAK.
//break;
}
}
} else {
printf("Error in opening file\n");
}
fclose(inpFile);
fclose(outFile);
// End the timing
gettimeofday(&end, NULL);
// Initialize seconds and micros to hold values for the time output
long seconds = (end.tv_sec - start.tv_sec);
long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
// Output the time
printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros);
}
Your crypto is at least half correct, but you have a lot of actual or potential other errors.
As you identified, raw CBC mode can only encrypt data which is a multiple of the block size, for DES 64 bits or 8 bytes (on most modern computers and all where you could use OpenSSL). In some applications this is okay; for example if the data is (always) an MD5 or SHA-256 or SHA-512 hash, or a GUID, or an IPv6 (binary) address, then it is a block multiple. But most applications want to handle at least any length in bytes, so they need to use some scheme to pad on encrypt and unpad on decrypt the last block (all blocks before the last already have the correct size). Many different schemes have been developed for this, so you need to know which to use. I assume this is a school assignment (since no real customer would set such a stupid and wasteful combination of requirements) and this should either have been specified or clearly left as a choice. One padding scheme very common today (although not for single-DES, because that is broken, unsafe, obsolete, and not common) is the one defined by PKCS5 and generalized by PKCS7 and variously called PKCS5, PKCS7, or PKCS5/7 padding, so I used that as an example.
Other than that:
you try to test feof(inpFile) before doing fgetc(inpFile). This doesn't work in C. It results in your code treating the low 8 bits of EOF (255 aka 0xFF on practically all implementations) as a valid data character added to the characters that were actually in the file. The common idiom is to store the return of getchar/getc/fgetc in a signed int and compare to EOF, but that would have required more changes so I used an alternate.
you don't initialize eight_bits which is a local-scope automatic duration variable, so its contents are undefined and depending on the implementation are often garbage, which means trying to 'append' to it by using strlen() to look for the end won't work right and might even crash. Although on some implementations at least some times it might happen to contain zero bytes, and 'work'. In addition it is possible in C for a byte read from a file (and stored here) to be \0 which will also make this work wrong, although if this file contains text, as its name suggests, it probably doesn't contain any \0 bytes.
once you fill eight_bits you write 'off-the-end' into element [8] which doesn't exist. Technically this is Undefined Behavior and anything at all can happen, traditionally expressed on Usenet as nasal demons. Plus after main finishes the first block it doesn't change anything in eight_bits so all further calls to append find it full and discard the new character.
while you could fix the above points separately, a much simple solution is available: you are already using count to count the number of bytes in the current block, so just use it as the subscript.
roundOutput is also an uninitialized local/auto variable within the loop, which is then used as the previous block for the CBC step, possibly with garbage or wrong value(s). And you don't use the IV at all, as is needed. You should allocate this before the loop (so it retains its value through all iterations) and initialize it to the IV, and then for each block in the loop your doCBCenc can properly XOR it to the new block and then leave the encrypted new block to be used next time.
your code labelled 'prints out the encrypted string' prints plaintext not ciphertext -- which is binary and shouldn't be printed directly anyway -- and is not needed because your file-read loop already echoes each character read. But if you do want to print a (validly null-terminated) string it's easier to just use fputs(s) or [f]printf([f,]"%s",s) or even fwrite(s,1,strlen(s),f).
your doCBCenc has a reference to printvalueofDES_LONG which isn't defined anywhere, and which along with two surrounding printf is clearly not needed.
you should use a cast to convert the first argument to doCBCenc -- this isn't strictly required but is good style and a good compiler (like mine) complains if you don't
finally, when an error occurs you usually print a message but then continue running, which will never work right and may produce symptoms that disguise the problem and make it hard to fix.
The below code fixes the above except that last (which would have been more work for less benefit) plus I removed routines that are now superfluous, and the timing code which is just silly: Unix already has builtin tools to measure and display process time more easily and reliably than writing code. Code I 'removed' is under #if 0 for reference, and code I added under #else or #if 1 except for the cast. The logic for PKCS5/7 padding is under #if MAYBE so it can be either selected or not. Some consider it better style to use sizeof(DES_block) or define a macro instead of the magic 8's, but I didn't bother -- especially since it would have required changes that aren't really necessary.
// SO70209636
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/des.h>
#include <sys/time.h>
#include <unistd.h>
#define ENC 1
#define DEC 0
DES_key_schedule key;
#if 0
int append(char*s, size_t size, char c) {
if(strlen(s) + 1 >= size) {
return 1;
}
int len = strlen(s);
s[len] = c;
s[len+1] = '\0';
return 0;
}
int getSize (char * s) {
char * t;
for (t = s; *t != '\0'; t++)
;
return t - s;
}
void strToHex(const_DES_cblock input, unsigned char *output) {
int arSize = 8;
unsigned int byte;
for(int i=0; i<arSize; i++) {
if(sscanf(input, "%2x", &byte) != 1) {
break;
}
output[i] = byte;
input += 2;
}
}
#endif
void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) {
DES_LONG temp[2];
memcpy(temp, roundOutput, 8*sizeof(unsigned char));
for(int i=0; i<2; i++) {
xorValue[i] = temp[i] ^ data[i];
}
}
void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) {
DES_LONG in[2];
doBitwiseXor(in, data, roundOutput);
DES_encrypt1(in,&key,ENC);
#if 0
printf("ENCRYPTED\n");
printvalueOfDES_LONG(in);
printf("%s","\n");
#endif
fwrite(in, 8, 1, outFile);
memcpy(roundOutput, in, 2*sizeof(DES_LONG));
}
int main(int argc, char** argv)
{
const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
#if 0
// Initialize the timing function
struct timeval start, end;
gettimeofday(&start, NULL);
#endif
int l;
if ((l = DES_set_key_checked(&cbc_key,&key)) != 0)
printf("\nkey error\n");
#if 1
DES_cblock roundOutput; // must be outside the loop
memcpy (roundOutput, IV, 8); // and initialized
#endif
FILE *inpFile;
FILE *outFile;
inpFile = fopen("test.txt", "r");
outFile = fopen("test.encrypt", "wb");
if(inpFile && outFile) {
unsigned char ch;
// A char array that will hold all 8 ch values.
// each ch value is appended to this.
unsigned char eight_bits[8];
// counter for the loop that ensures that only 8 chars are done at a time.
int count = 0;
#if 0
while(!feof(inpFile)) {
// read in a character
ch = fgetc(inpFile);
#else
while( ch = fgetc(inpFile), !feof(inpFile) ){
#endif
// print the character
printf("%c",ch);
#if 0
// append the character to eight_bits
append(eight_bits,1,ch);
// increment the count so that we only go to 8.
count++;
#else
eight_bits[count++] = ch;
#endif
#if 0
const_DES_cblock roundOutput;
#endif
// When count gets to 8
if(count == 8) {
// for formatting
printf("%s","\n");
// Encrypt the eight characters and store them back in the char array.
//DES_encrypt1(eight_bits,&key,ENC);
doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile);
#if 0
// prints out the encrypted string
int k;
for(k = 0; k < getSize(eight_bits); k++){
printf("%c", eight_bits[k]);
}
#endif
// Sets count back to 0 so that we can do another 8 characters.
count = 0;
// so we just do the first 8. When everything works REMOVE THE BREAK.
//break;
}
}
#if MAYBE
memset (eight_bits+count, 8-count, 8-count); // PKCS5/7 padding
doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile);
#endif
} else {
printf("Error in opening file\n");
}
fclose(inpFile);
fclose(outFile);
#if 0
// End the timing
gettimeofday(&end, NULL);
// Initialize seconds and micros to hold values for the time output
long seconds = (end.tv_sec - start.tv_sec);
long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
// Output the time
printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros);
#endif
}
PS: personally I wouldn't put the fwrite in doCBCenc; I would only do the encryption and let the caller do whatever I/O is appropriate which might in some cases not be fwrite. But what you have is not wrong for the requirements you apparently have.

Building a PHP Extension using original rijndael, correct encryption/decrypt however trailing extra bytes

So I have just begun writing a PHP Extension, using the original Rijndael code (which is now formally AES).
However it seems there is a bug somewhere in the code, I know it encrypts/decrypts correctly, but on the output it adds a extra 6 bytes to the string, which I assume is something to do with the conversion from uint8_t array to char array.
I havent posted the code here as it would take up half the page, so I have posted it to github here :
https://github.com/Hect0rius/PHPEXT-Rijndael
The main code I am referring to is the following is in php_rijndael.c (2 functions:
/* {{{ proto resource rijndael_encrypt(string inData, string key)
encrypts a string with rijndael /
PHP_FUNCTION(rijndael_encrypt)
{
/ Inputs */
char *inData; // Data Ptr.
size_t inDataLen; // Data Length.
char *key; // Key Ptr.
size_t keyLen; // Key Length.
zend_ulong keyBits; // Bits, between 128/192/256.
/* Get Parameters from Zend */
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s|l", &inData, &inDataLen, &key, &keyLen, &keyBits) == FAILURE) {
return;
}
/* Since rijndael takes what it needs via key bits, then we just allow the overflow of the key. */
switch(keyBits) {
case 128:
if(keyLen < 16) { php_error_docref(NULL, E_WARNING, "Key length must be 16 characters long."); RETURN_FALSE; }
break;
case 192:
if(keyLen < 24) { php_error_docref(NULL, E_WARNING, "Key length must be 24 characters long."); RETURN_FALSE; }
break;
case 256:
if(keyLen < 32) { php_error_docref(NULL, E_WARNING, "Key length must be 32 characters long."); RETURN_FALSE; }
break;
}
/* Convert from original pointers to uin8_t arrays */
uint8_t dataU8[16];
uint8_t keyU8[16];
uint8_t output[16], i = 0;
do {
dataU8[i] = (uint8_t)inData[i];
keyU8[i] = (uint8_t)key[i];
i++;
}
while(i < 16);
/* Setup Rijndael stack */
uint32_t rk[4 * (MAXNR + 1)];
int32_t Nr = rijndaelKeySetupEnc(rk, keyU8, keyBits);
/* Decrypt Buffer. */
rijndaelEncrypt(rk, Nr, dataU8, output);
/* Now return data back into a char array*/
char outChar[16], *ptr = outChar;
i = 0;
do {
ptr[i] = (char)output[i];
i++;
}
while(i < 16);
RETURN_STRING(outChar);
}
/* }}} */
/* {{{ proto resource rijndael_decrypt(string inData, string key)
decrypts a string with rijndael /
PHP_FUNCTION(rijndael_decrypt)
{
/ Inputs */
char *inData; // Data Ptr.
size_t inDataLen; // Data Length.
char *key; // Key Ptr.
size_t keyLen; // Key Length.
zend_ulong keyBits; // Bits, between 128/192/256.
/* Get Parameters from Zend */
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s|l", &inData, &inDataLen, &key, &keyLen, &keyBits) == FAILURE) {
return;
}
/* Since rijndael takes what it needs via key bits, then we just allow the overflow of the key. */
switch(keyBits) {
case 128:
if(keyLen < 16) { php_error_docref(NULL, E_WARNING, "Key length must be 16 characters long."); RETURN_FALSE; }
break;
case 192:
if(keyLen < 24) { php_error_docref(NULL, E_WARNING, "Key length must be 24 characters long."); RETURN_FALSE; }
break;
case 256:
if(keyLen < 32) { php_error_docref(NULL, E_WARNING, "Key length must be 32 characters long."); RETURN_FALSE; }
break;
}
/* Convert from original pointers to uin8_t arrays */
uint8_t dataU8[16];
uint8_t keyU8[16];
uint8_t output[16], i = 0;
do {
dataU8[i] = (uint8_t)inData[i];
keyU8[i] = (uint8_t)key[i];
i++;
}
while(i < 16);
/* Setup Rijndael Stack */
uint32_t rk[4 * (MAXNR + 1)];
int32_t Nr = rijndaelKeySetupDec(rk, keyU8, keyBits);
/* Decrypt input uint8_t array */
rijndaelDecrypt(rk, Nr, dataU8, output);
/* Convert data back to a char */
char outChar[16], *ptr = outChar;
i = 0;
do {
ptr[i] = (char)output[i];
i++;
}
while(i < 16);
RETURN_STRING(ptr);
}
/* }}} */
#endif /* HAVE_RIJNDAEL */
I only guess it is correctly decrypting the encrypted buffer as it outputs back to all zero's, here is the test.php file:
You are seeing padding.
AES (and Rijndael) are block ciphers and as such process data in blocks, 16-bytes for AES.
If the input is not an exact multiple of the block size some padding needs to be added to the data to be encrypted and removed on decryption. The most common padding is PKCS#7 however some implementations are brain-dead and do not support PKCS#7, sometimes using null padding which can not support encrypting binary data.
Note: The PHP mcrypt implementation does not support standard PKCS#7 (née PKCS#5) padding, only non-standard null padding that can't even be used with binary data.

Understanding C Requirements of bits and offset [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 5 years ago.
Improve this question
I understand the general idea of C and how making a log file would go. Reading/writing to a file and such.
My concern is the following format that is desired:
[![enter image description here][1]][1]
I've gotten a good chunk done now but am concerned with how to append to my log file after the first record. I increment the file's record count (in the top 2 bytes) and write the first record after it. How would I then setup to add the 2nd/3rd/etc records to showup after each other?
//confirm a file exists in the directory
bool fileExists(const char* file)
{
struct stat buf;
return (stat(file, &buf) == 0);
}
int rightBitShift(int val, int space)
{
return ((val >> space) & 0xFF);
}
int leftBitShift(int val, int space)
{
return (val << space);
}
int determineRecordCount(char * logName)
{
unsigned char record[2];
FILE *fp = fopen(logName, "rb");
fread(record, sizeof(record), 1, fp);
//display the record number
int recordNum = (record[0] << 8) | record[1];
recordNum = recordNum +1;
return (recordNum);
}
void createRecord(int argc, char **argv)
{
int recordNum;
int aux = 0;
int dst;
char* logName;
char message[30];
memset(message,' ',30);
//check argument count and validation
if (argc == 7 && strcmp("-a", argv[2]) ==0 && strcmp("-f", argv[3]) ==0 && strcmp("-t", argv[5]) ==0)
{
//aux flag on
aux = 1;
logName = argv[4];
strncpy(message, argv[6],strlen(argv[6]));
}
else if (argc == 6 && strcmp("-f", argv[2]) ==0 && strcmp("-t", argv[4]) ==0)
{
logName = argv[3];
strncpy(message, argv[5],strlen(argv[5]));
}
else
{
printf("Invalid Arguments\n");
exit(0);
}
//check if log exists to get latest recordNum
if (fileExists(logName))
{
recordNum = determineRecordCount(logName);
printf("%i\n",recordNum);
}
else
{
printf("Logfile %s not found\n", logName);
recordNum = 1;
}
//Begin creating record
unsigned char record[40]; /* One record takes up 40 bytes of space */
memset(record, 0, sizeof(record));
//recordCount---------------------------------------------------------------------
record[0] = rightBitShift (recordNum, 8); /* Upper byte of sequence number */
record[1] = rightBitShift (recordNum, 0); /* Lower byte of sequence number */
//get aux/dst flags---------------------------------------------------------------
//get date and time
time_t timeStamp = time(NULL);
struct tm *date = localtime( &timeStamp );
if (date->tm_isdst)
dst = 1;
record[2] |= aux << 7; //set 7th bit
record[2] |= dst << 6; //set 6th
//timeStamp-----------------------------------------------------------------------
record[3] |= rightBitShift(timeStamp, 24);//high byte
record[4] |= rightBitShift(timeStamp, 16);
record[5] |= rightBitShift(timeStamp, 8);
record[6] |= rightBitShift(timeStamp, 0); //low byte
//leave bytes 7-8, set to 0 -----------------------------------------
record[7] = 0;
record[8] = 0;
//store message--------------------------------------------
strncpy(&record[9], message, strlen(message));
//write record to log-----------------------------------------------------------------
FILE *fp = fopen(logName, "w+");
unsigned char recordCount[4];
recordCount[0] = rightBitShift (recordNum, 8); /* Upper byte of sequence number */
recordCount[1] = rightBitShift (recordNum, 0); /* Lower byte of sequence number */
recordCount[2] = 0;
recordCount[3] = 0;
fwrite(recordCount, sizeof(recordCount), 1, fp);
fwrite(record, sizeof(record), 1, fp);
fclose(fp);
printf("Record saved successfully\n");
}
NOTE: I've never had to do this before in C, take it with a grain of salt.
This is a very specific binary formatting where each bit is precisely accounted for. It's using the Least-Significant-Bit numbering scheme (LSB 0) where the bits are numbered from 7 to 0.
Specifying that the "upper byte" comes first means this format is big-endian. The most significant bits come first. This is like how we write our numbers, four thousand, three hundred, and twenty one is 4321. 1234 would be little-endian. For example, the Number Of Records and Sequence are both 16 bit big-endian numbers.
Finally, the checksum is a number calculated from the rest of the record to verify there were no mistakes in transmission. The spec defines how to make the checksum.
Your job is to precisely reproduce this format, probably using the fixed-sized types found in stdint.h or unsigned char. For example, the sequence would be a uint16_t or unsigned char[2].
The function to produce a record might have a signature like this:
unsigned char *make_record( const char *message, bool aux );
The user only has to supply you with the message and the aux flag. The rest you can be figured out by the function. You might decide to let them pass in the timestamp and sequence. Point is, the function needs to be passed just the data, it takes care of the formatting.
This byte-ordering means you can't just write out integers, they might be the wrong size or the wrong byte order. That means any multi-byte integers must be serialized before you can write them to the record. This answer covers ways to do that and I'll be using the ones from this answer because they proved a bit more convenient.
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
unsigned char *make_record( const char *message, bool aux ) {
// Allocate and zero memory for the buffer.
// Zeroing means no risk of accidentally sending garbage.
unsigned char *buffer = calloc( 40, sizeof(unsigned char) );
// As we add to the buffer, pos will track the next byte to be written.
unsigned char *pos = buffer;
// I decided not make the user responsible for
// the sequence number. YMMV.
static uint16_t sequence = 1;
pos = serialize_uint16( pos, sequence );
// Get the timestamp and DST.
time_t timestamp = time(NULL);
struct tm *date = localtime( &timestamp );
// 2nd row is all flags and a bunch of 0s. Start with them all off.
uint8_t flags = 0;
if( aux ) {
// Flip the 7th bit on.
flags |= 0x80;
}
if( date->tm_isdst ) {
// Flip the 6th bit on.
flags |= 0x40;
}
// That an 8 bit integer has no endianness, this is to ensure
// pos is consistently incremented.
pos = serialize_uint8(pos, flags);
// I don't know what their timestamp format is.
// This is just a guess. It's probably wrong.
pos = serialize_uint32(pos, (uint32_t)timestamp);
// "Spare" is all zeros.
// The spec says this is 3 bytes, but only gives it bytes
// 7 and 8. I'm going with 2 bytes.
pos = serialize_uint16(pos, 0);
// Copy the message in, 30 bytes.
// strncpy() does not guarantee the message will be null
// terminated. This is probably fine as the field is fixed width.
// More info about the format would be necessary to know for sure.
strncpy( pos, message, 30 );
pos += 30;
// Checksum the first 39 bytes.
// Sorry, I don't know how to do 1's compliment sums.
pos = serialize_uint8( pos, record_checksum( buffer, 39 ) );
// pos has moved around, but buffer remains at the start
return buffer;
}
int main() {
unsigned char *record = make_record("Basset hounds got long ears", true);
fwrite(record, sizeof(unsigned char), 40, stdout);
}
At this point my expertise is exhausted, I've never had to do this before. I'd appreciate folks fixing up the little mistakes in edits and suggesting better ways to do it in the comments, like what to do with the timestamp. And maybe someone else can cover how to do 1's compliment checksums in another answer.
As a byte is composed by 8 bits (from 0 to 7) you can use bitwise operations to modify them as asked in your specifications. Take a look for general information (https://en.wikipedia.org/wiki/Bitwise_operations_in_C). As a preview, you can use >> or << operators to determine which bit to modify, and use logical operators | and & to set it's values.

How to Hash CPU ID in C

I'm trying to short the cpu id of my microcontroller (STM32F1).
The cpu id is composed by 3 word ( 3 x 4 bytes). This is the id string built from the 3 word: 980416578761680031125348904
I found a very useful library that do this.
The library is Hashids and there is a C code.
I try to build a test code on PC with "Code Blocks IDE" and the code works.
But when I move the code into the embedded side (Keil v5 IDE), I get an error on strdup() function: "strdup implicit declaration of function".
The problem is related to the strdup function isn't a standard library function and ins't included into string.h.
I will avoid to replace the strdup function with a custom function (that mimic the behaviour of strdup) to avoid memory leak because strdup copy strings using malloc.
Is there a different approach to compress long numbers?
Thanks for the help!
<---Appendix--->
This is the function that uses the strdup.
/* common init */
struct hashids_t *
hashids_init3(const char *salt, size_t min_hash_length, const char *alphabet)
{
struct hashids_t *result;
unsigned int i, j;
size_t len;
char ch, *p;
hashids_errno = HASHIDS_ERROR_OK;
/* allocate the structure */
result = _hashids_alloc(sizeof(struct hashids_t));
if (HASHIDS_UNLIKELY(!result)) {
hashids_errno = HASHIDS_ERROR_ALLOC;
return NULL;
}
/* allocate enough space for the alphabet and its copies */
len = strlen(alphabet) + 1;
result->alphabet = _hashids_alloc(len);
result->alphabet_copy_1 = _hashids_alloc(len);
result->alphabet_copy_2 = _hashids_alloc(len);
if (HASHIDS_UNLIKELY(!result->alphabet || !result->alphabet_copy_1
|| !result->alphabet_copy_2)) {
hashids_free(result);
hashids_errno = HASHIDS_ERROR_ALLOC;
return NULL;
}
/* extract only the unique characters */
result->alphabet[0] = '\0';
for (i = 0, j = 0; i < len; ++i) {
ch = alphabet[i];
if (!strchr(result->alphabet, ch)) {
result->alphabet[j++] = ch;
}
}
result->alphabet[j] = '\0';
/* store alphabet length */
result->alphabet_length = j;
/* check length and whitespace */
if (result->alphabet_length < HASHIDS_MIN_ALPHABET_LENGTH) {
hashids_free(result);
hashids_errno = HASHIDS_ERROR_ALPHABET_LENGTH;
return NULL;
}
if (strchr(result->alphabet, ' ')) {
hashids_free(result);
hashids_errno = HASHIDS_ERROR_ALPHABET_SPACE;
return NULL;
}
/* copy salt */
result->salt = strdup(salt ? salt : HASHIDS_DEFAULT_SALT);
result->salt_length = (unsigned int) strlen(result->salt);
/* allocate enough space for separators */
result->separators = _hashids_alloc((size_t)
(ceil((float)result->alphabet_length / HASHIDS_SEPARATOR_DIVISOR) + 1));
if (HASHIDS_UNLIKELY(!result->separators)) {
hashids_free(result);
hashids_errno = HASHIDS_ERROR_ALLOC;
return NULL;
}
/* non-alphabet characters cannot be separators */
for (i = 0, j = 0; i < strlen(HASHIDS_DEFAULT_SEPARATORS); ++i) {
ch = HASHIDS_DEFAULT_SEPARATORS[i];
if ((p = strchr(result->alphabet, ch))) {
result->separators[j++] = ch;
/* also remove separators from alphabet */
memmove(p, p + 1,
strlen(result->alphabet) - (p - result->alphabet));
}
}
/* store separators length */
result->separators_count = j;
/* subtract separators count from alphabet length */
result->alphabet_length -= result->separators_count;
/* shuffle the separators */
hashids_shuffle(result->separators, result->separators_count,
result->salt, result->salt_length);
/* check if we have any/enough separators */
if (!result->separators_count
|| (((float)result->alphabet_length / (float)result->separators_count)
> HASHIDS_SEPARATOR_DIVISOR)) {
unsigned int separators_count = (unsigned int)ceil(
(float)result->alphabet_length / HASHIDS_SEPARATOR_DIVISOR);
if (separators_count == 1) {
separators_count = 2;
}
if (separators_count > result->separators_count) {
/* we need more separators - get some from alphabet */
int diff = separators_count - result->separators_count;
strncat(result->separators, result->alphabet, diff);
memmove(result->alphabet, result->alphabet + diff,
result->alphabet_length - diff + 1);
result->separators_count += diff;
result->alphabet_length -= diff;
} else {
/* we have more than enough - truncate */
result->separators[separators_count] = '\0';
result->separators_count = separators_count;
}
}
/* shuffle alphabet */
hashids_shuffle(result->alphabet, result->alphabet_length,
result->salt, result->salt_length);
/* allocate guards */
result->guards_count = (unsigned int) ceil((float)result->alphabet_length
/ HASHIDS_GUARD_DIVISOR);
result->guards = _hashids_alloc(result->guards_count + 1);
if (HASHIDS_UNLIKELY(!result->guards)) {
hashids_free(result);
hashids_errno = HASHIDS_ERROR_ALLOC;
return NULL;
}
if (HASHIDS_UNLIKELY(result->alphabet_length < 3)) {
/* take some from separators */
strncpy(result->guards, result->separators, result->guards_count);
memmove(result->separators, result->separators + result->guards_count,
result->separators_count - result->guards_count + 1);
result->separators_count -= result->guards_count;
} else {
/* take them from alphabet */
strncpy(result->guards, result->alphabet, result->guards_count);
memmove(result->alphabet, result->alphabet + result->guards_count,
result->alphabet_length - result->guards_count + 1);
result->alphabet_length -= result->guards_count;
}
/* set min hash length */
result->min_hash_length = min_hash_length;
/* return result happily */
return result;
}
The true question seems to be
Is there a different approach to compress long numbers?
There are many. They differ in several respects, including which bits of the input contribute to the output, how many inputs map to the same output, and what manner of transformations of the input leave the output unchanged.
As a trivial examples, you can compress the input to a single bit by any of these approaches:
Choose the lowest-order bit of the input
Choose the highest-order bit of the input
The output is always 1
etc
Or you can compress to 7 bits by using using the number of 1 bits in the input as the output.
None of those particular options is likely to be of interest to you, of course.
Perhaps you would be more interested in producing 32-bit outputs for your 96-bit inputs. Do note that in that case on average there will be at least 264 possible inputs that map to each possible output. That depends only on the sizes of input and output, not on any details of the conversion.
For example, suppose that you have
uint32_t *cpuid = ...;
pointing to the hardware CPU ID. You can produce a 32-bit value from it that depends on all the bits of the input simply by doing this:
uint32_t cpuid32 = cpuid[0] ^ cpuid[1] ^ cpuid[2];
Whether that would suit your purpose depends on how you intend to use it.
You can easily implement strdup yourself like this:
char* strdup (const char* str)
{
size_t size = strlen(str);
char* result = malloc(size);
if(result != NULL)
{
memcpy(result, str, size+1);
}
return result;
}
That being said, using malloc or strdup on an embedded system is most likely just nonsense practice, see this. Nor would you use float numbers. Overall, that library seems to have been written by a desktop-minded person.
If you are implementing something like for example a chained hash table on an embedded system, you would use a statically allocated memory pool and not malloc. I'd probably go with a non-chained one for that reason (upon duplicates, pick next free spot in the buffer).
Unique device ID register (96 bits) is located under address 0x1FFFF7E8. It is factory programmed and is read-only. You can read it directly without using any other external library. For example:
unsigned int b = *(0x1FFFF7E8);
should give you the first 32 bits (31:0) of the unique device ID. If you want to retrieve a string as in case of the library mentioned, the following should work:
sprintf(id, "%08X%08X%08X", *(0x1FFFF7E8), *(0x1FFFF7E8 + 4), *(0x1FFFF7E8 + 8);
Some additional casting may be required, but generally that's what the library did. Please refer to STM32F1xx Reference Manual (RM0008), section 30.2 for more details. The exact memory location to read from is different in case of Cortex-M4 family of the MCUs.

comparing array of strings

#include<reg51.h>
#include<string.h>
#include"_LCD_R8C.c"
unsigned char c[12];
unsigned char chr[11];
void serial_int (void) interrupt 4
{
if (RI==1)
{
chr[11] = SBUF;
RI = 0;
TI = 0;
}
}
int main()
{
unsigned char a[2][11]={"$0016221826","$0123456789"};
int i,j;
lcd_init();
lcd_clear();
SCON = 0x50;
TMOD = 0x20;
TH1 = 0xFD;
ET0 = 0;
TR1 = 1;
RI = 1;
ES = 1;
EA = 1;
for(j=0;j<1;j++)
{
for(i=0;i<=10;i++)
{
c[i]=chr[i];
}
c[11]='\0';
}
for(i=0;i<=1;i++)
{
j=strcmp(a[i],c); /* !!! Here is the problem !!! */
if(j==0)
{
lcd_printxy(1,1,"yes");
}
else
{
lcd_printxy(1,6,"no");
}
}
}
I am getting the display as "no", please let me know what is the problem?
the problem might be
1) the received array of characters are not converted to string, or
2) the received array of characters are converted to string but not able to compare with the available string..
please go through the program
One obvious bug for starters - change:
unsigned char a[2][11]={"$0016221826","$0123456789"};
to:
unsigned char a[2][12]={"$0016221826","$0123456789"};
(You need to allow room for the terminating '\0' in each string - I'm surprised your compiler didn't complain about this ?)
Also, this line in your interrupt handler is wrong:
chr[11] = SBUF;
Several problems with this - char only has storage for 11 chars, not 12, and you probably want to be accumulating characters from index 0 and then bumping the index, otherwise you're just overwriting the same character each time.
Looking at the rest of the code there are so many other problems that I think you may need to take a step back here and start with a simpler program - get that working first and then add to it in stages.
You might also want to get a decent introductory book on C and study it as there are lots of very basic mistakes in the code, so you might benefit from a better understanding of the language itself.
You only assign a value to chr[11], the rest of the array is uninitialized and will contain random data. You then copy this array containing random data to c (you could use e.g. memcpy here instead of looping yourself), and finally you compare the complete contents of c (which is random data) with one of the entries in a. So it's kind of natural that the result of that comparison will be that the strings are not equal.
Edit: A redesign of the program in the question
Your program has too many problems to be easily fixed, so I decided to try and rewrite it:
#include <reg51.h>
#include <string.h>
#include "_LCD_R8C.c"
#define INPUT_LENGTH 11
#define ACCEPTABLE_INPUT_COUNT 2
char input[INPUT_LENGTH]; /* The input from the serial port */
int input_pos = 0; /* Current position to write in the input buffer */
int input_done = 0; /* 0 = not done yet, 1 = all input read */
void serial_int (void) interrupt 4
{
if (!input_done && RI == 1)
{
/* Put the input at next position in the input buffer */
/* Then increase the position */
input[input_pos++] = SBUF;
RI = 0;
TI = 0;
/* Check if we have received all input yet */
if (input_pos >= INPUT_LENGTH)
input_done = 1;
}
}
int main()
{
/* Array of data that this programs thinks is acceptable */
/* +1 to the input length, to fit the terminating '\0' character */
char acceptable_inputs[ACCEPTABLE_INPUT_COUNT][INPUT_LENGTH + 1] = {
"$0016221826", "$0123456789"
};
iny acceptable_found = 0; /* Acceptable input found? */
/* Initialization */
lcd_init();
lcd_clear();
SCON = 0x50;
TMOD = 0x20;
TH1 = 0xFD;
ET0 = 0;
TR1 = 1;
RI = 1;
ES = 1;
EA = 1;
/* Wait until we have received all input */
while (!input_done)
; /* Do nothing */
/* Check the received input for something we accept */
for (int i = 0; i < ACCEPTABLE_INPUT_COUNT; i++)
{
if (memcmp(acceptable_inputs[i], input, INPUT_LENGTH) == 0)
{
/* Yes, the data received is acceptable */
acceptable_found = 1;
break; /* Don't have to check any more */
}
}
if (acceptable_found)
lcd_printxy(1, 1, "Yes");
else
lcd_printxy(1, 1, "No");
return 0;
}

Resources