I am seeing a potential overflow: streamBuffer is a struct object (part of the FreeRTOS lib) and upon executing the following line in OutputToSerial(), I see streamBuffer.xHead's value is set to an extremely large value even though it's not being modified at the time.
LONG_TO_STR(strData, txStr);
Note that I didn't have any issues when I called nRF24_ReadReg() multiple times before.
Also, often times I see that printf doesn't print the entire text that's being printed (prior to calling the time when I see a potential overflow) - instead misses on some chars.
Any way to get a better understanding of the cause? I don't see any hardfaults or anything to look in the registers...
For a reference, the following is the struct's definition:
typedef struct StreamBufferDef_t /*lint !e9058 Style convention uses tag. */
{
volatile size_t xTail; /* Index to the next item to read within the buffer. */
volatile size_t xHead; /* Index to the next item to write within the buffer. */
size_t xLength; /* The length of the buffer pointed to by pucBuffer. */
size_t xTriggerLevelBytes; /* The number of bytes that must be in the stream buffer before a task that is waiting for data is unblocked. */
volatile TaskHandle_t xTaskWaitingToReceive; /* Holds the handle of a task waiting for data, or NULL if no tasks are waiting. */
volatile TaskHandle_t xTaskWaitingToSend; /* Holds the handle of a task waiting to send data to a message buffer that is full. */
uint8_t *pucBuffer; /* Points to the buffer itself - that is - the RAM that stores the data passed through the buffer. */
uint8_t ucFlags;
#if ( configUSE_TRACE_FACILITY == 1 )
UBaseType_t uxStreamBufferNumber; /* Used for tracing purposes. */
#endif
} StreamBuffer_t;
// file.c
#define PRI_UINT64_C_Val(value) ((unsigned long) (value>>32)), ((unsigned long)value)
#define LONG_TO_STR(STR, LONG_VAL) (sprintf(STR, "%lx%lx", PRI_UINT64_C_Val(LONG_VAL)))
unsigned long long concatData(uint8_t *arr, uint8_t size)
{
long long unsigned value = 0;
for (uint8_t i = 0; i < size; i++)
{
value <<= 8;
value |= arr[i];
}
return value;
}
void nRF24_ReadReg(nrfl2401 *nrf, uint8_t reg, const uint8_t rxSize, uint8_t *rxBuffer, char *text)
{
uint8_t txBuffer[1] = {0};
uint8_t spiRxSize = rxSize;
if (reg <= nRF24_CMD_W_REG)
{
txBuffer[0] = nRF24_CMD_R_REG | (reg & nRF24_R_W_MASK);
spiRxSize++;
}
else
{
txBuffer[0] = reg;
}
nRF24_SendCommand(nrf, txBuffer, rxBuffer, spiRxSize);
OutputToSerial(txBuffer, rxBuffer, spiRxSize, text);
}
void OutputToSerial(uint8_t *writeBuffer, uint8_t *readBuffer, uint8_t size, char *text)
{
char strData[100] = {0}, rxStrData[100] = {0};
long long unsigned txStr = concatData(writeBuffer, size);
long long unsigned rxStr = concatData(readBuffer, size);
LONG_TO_STR(strData, txStr); // POTENTIAL ERROR.....!
LONG_TO_STR(rxStrData, rxStr);
char outputMsg[60] = {0};
strcpy(outputMsg, text);
strcat(outputMsg, ": 0x%s ----------- 0x%s\n");
printf (outputMsg, strData, rxStrData);
}
// main.c
StreamBufferHandle_t streamBuffer;
Perhaps other issues, yet the LONG_TO_STR(x) is simply a mess.
Consider the value 0x123400005678 would print as "12345678". That ref code is broken.
Yes its too bad code has long long yet no "%llx". Easy enough to re-write it all into a clean function.
//#define PRI_UINT64_C_Val(value) ((unsigned long) (value>>32)), ((unsigned long)value)
//#define LONG_TO_STR(STR, LONG_VAL) (sprintf(STR, "%lx%lx", PRI_UINT64_C_Val(LONG_VAL)))
#include <stdio.h>
#include <string.h>
#include <limits.h>
// Good for base [2...16]
void ullong_to_string(char *dest, unsigned long long x, int base) {
char buf[sizeof x * CHAR_BIT + 1]; // Worst case size
char *p = &buf[sizeof buf - 1]; // set to last element
*p = '\0';
do {
p--;
*p = "0123456789ABCDEF"[x % (unsigned) base];
x /= (unsigned) base;
} while (x);
strcpy(dest, p);
}
int main(void) {
char buf[100];
ullong_to_string(buf, 0x123400005678, 16); puts(buf);
ullong_to_string(buf, 0, 16); puts(buf);
ullong_to_string(buf, ULLONG_MAX, 16); puts(buf);
ullong_to_string(buf, ULLONG_MAX, 10); puts(buf);
return 0;
}
Output
123400005678
0
FFFFFFFFFFFFFFFF
18446744073709551615
Related
this is my first post on this page, I just learned the C language a few days ago.
I have a problem with this C code, the idea is that the function "create_test_buffer" modifies the variable "test_ptr" adding a value of 100 in each position, but when I print "test_ptr" again after calling the function, nothing seems to have changed.
The checksum doesn't seem to be modified either, it is supposed that if you add a value of 100 in each position and in total the array has 512 positions you should get 512*100=51200, but instead it is showing 6400.
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define TEST_BUFFER_LEN 512
#define NETWORK_SCAN_LIST_SIZE 8
typedef struct {
char ssid[33];
} wifi_ap_record_t;
wifi_ap_record_t ap_info_struct[NETWORK_SCAN_LIST_SIZE] =
{
{.ssid="SSID_3"},
{.ssid="SSID_1"},
{.ssid="SSID_1"},
{.ssid="SSID_1"},
{.ssid="SSID_1"},
{.ssid="SSID_2"},
{.ssid="SSID_1"},
{.ssid="SSID_1"},
};
uint16_t test_ptr[TEST_BUFFER_LEN];
uint16_t test_len = 0;
static void create_test_buffer(uint16_t *, uint16_t *);
static uint16_t calculate_checksum(uint16_t *, uint16_t);
int main()
{
printf("test_ptr before: %x\n", test_ptr);
printf("len before: %x\r\n", test_len);
create_test_buffer(test_ptr, &test_len);
printf("test_ptr after: %x\n", test_ptr);
printf("len after: %x\r\n", test_len);
uint16_t checksum = calculate_checksum(test_ptr, TEST_BUFFER_LEN);
printf("Checksum: %x\r\n", checksum);
return 0;
}
static void create_test_buffer(uint16_t *buff_ptr, uint16_t *len)
{
#define TEST_BUFFER_LEN 512
static uint16_t test_buffer[TEST_BUFFER_LEN];
for (uint16_t idx = 0 ; idx < TEST_BUFFER_LEN; ++idx) {
test_buffer[idx] = 0x64;
}
memcpy(buff_ptr, test_buffer, TEST_BUFFER_LEN);
// buff_ptr = test_buffer;
len = TEST_BUFFER_LEN;
}
static uint16_t calculate_checksum(uint16_t *data, uint16_t len)
{
uint16_t idx = 0;
uint16_t checksum = 0;
while (idx < len) {
checksum += data[idx];
idx++;
}
return checksum;
}
as for the functions I have tried to change the way the modification of the parameters is done, first I tried with "len = TEST_BUFFER_LEN;" and then with "memcpy(buff_ptr, test_buffer, TEST_BUFFER_LEN);" being this last one the one that at least allows me to see a change in the checksum.
You forgot to dereference the pointer to "len".
*len = TEST_BUFFER_LEN;
This means "Write to the variable that len points to". If you forget the *, you overwrite the pointer, and not the thing the pointer points to.
You've also got at least one more mistake in the code - memcpy takes the length in bytes, not 16-bit words, so you have to multiply the length with the size of each element before passing that to memcpy.
memcpy(buff_ptr, test_buffer, TEST_BUFFER_LEN * sizeof(uint16_t));
Your compiler likely warned you about both of these. You should take compiler warnings seriously, and ideally, enable all of them (i.e. -Wall -Wextra on GCC/Clang)
I am writing C code on STM32 (specifically the STM32756-EVAL) and I have created an enum which reads an incoming char array and assigns an enum to it. The value of this enum is then placed as the index for a function pointer array.
The reason why I have this code is to be able to decide on what function to call based on the receiving char array, without relying on a giant if-else stack reading through the char arrays one by one.
The relevant code:
enum cmd cmd_Converter(unsigned char* inputCmd){//Converts the input cmd from uint8_t to an enum.
switch (inputCmd[0]){ //Currently we are using a switch-case. I expect this list to expand to something like 50.
case 'F':
if (memcmp(inputCmd, "FIRMV", COMMAND_LENGTH) == 0) return FIRMV;
else return INVAL;
break;
case 'V':
if (memcmp(inputCmd, "VALCN", COMMAND_LENGTH) == 0) return VALCN;
else return INVAL;
break;
default:
return INVAL;
}
}
void process_Message(uint8_t* message, uint16_t Len){
unsigned char inputCmd[COMMAND_LENGTH];
unsigned char inputData[DATA_LENGTH];
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
//Function that separates message, inputCmd, and inputMessage.
memcpy((char*) inputCmd, (const char*)message + COMMAND_CHAR, COMMAND_LENGTH);
memcpy((char*) inputData, (const char*)message + DATA_CHAR, DATA_LENGTH);
enum cmd enumCmd = cmd_Converter(inputCmd);
void (*cmd_Function_Pointer[])(unsigned char* inputData) = {FIRMV_F, VALCN_F, INVAL_F}; //Is this even needed?
(*cmd_Function_Pointer[enumCmd])(inputData);
// message_Received(message, Len);
// send_Message(outputCmd, outputData);
}
void FIRMV_F(unsigned char *inputData){
//Do thing
}
void VALCN_F(unsigned char *inputData){
//Do thing
}
void INVAL_F(unsigned char *inputData){
//Do thing
}
The enum is there to improve code readability, so that anyone reading the code can see the enum and the function pointer and go "enum FIRMV will call FIRMV_F from (*cmd_Function_Pointer[enumCmd])(inputData)". One of the weaknesses I've identified is that it relies on the sequence of enum cmd and cmd_Function_Pointer[] to be identical, and if the list of enums gets too long it will be hard to maintain this identical sequence.
I am wondering whether there are any methods within C that would allow for "synchronizing" the identifiers in an enum and function names called by a function pointer?
The full code:
usbd_cdc_if.c
/**
* #brief Data received over USB OUT endpoint are sent over CDC interface
* through this function.
*
* #note
* This function will issue a NAK packet on any OUT packet received on
* USB endpoint until exiting this function. If you exit this function
* before transfer is complete on CDC interface (ie. using DMA controller)
* it will result in receiving more data while previous ones are still
* not sent.
*
* #param Buf: Buffer of data to be received
* #param Len: Number of data received (in bytes)
* #retval Result of the operation: USBD_OK if all operations are OK else USBD_FAIL
*/
static int8_t CDC_Receive_FS(uint8_t* Buf, uint32_t *Len)
{
/* USER CODE BEGIN 6 */
USBD_CDC_SetRxBuffer(&hUsbDeviceFS, &Buf[0]);
USBD_CDC_ReceivePacket(&hUsbDeviceFS);
memset (buffer, '\0', 64); // clear the buffer
uint8_t len = (uint8_t)*Len; //Converts Len as uint32_t to len as uint8_t
memcpy(buffer, Buf, len); // copy the data to the buffer
memset(Buf, '\0', len); // clear the Buf also
//Code used to send message back
process_Message(buffer, len);
return (USBD_OK);
/* USER CODE END 6 */
}
/**
* #brief CDC_Transmit_FS
* Data to send over USB IN endpoint are sent over CDC interface
* through this function.
* #note
*
*
* #param Buf: Buffer of data to be sent
* #param Len: Number of data to be sent (in bytes)
* #retval USBD_OK if all operations are OK else USBD_FAIL or USBD_BUSY
*/
uint8_t CDC_Transmit_FS(uint8_t* Buf, uint16_t Len)
{
uint8_t result = USBD_OK;
/* USER CODE BEGIN 7 */
USBD_CDC_HandleTypeDef *hcdc = (USBD_CDC_HandleTypeDef*)hUsbDeviceFS.pClassData;
if (hcdc->TxState != 0){ //If TxState in hcdc is not 0, return USBD_BUSY.
return USBD_BUSY;
}
USBD_CDC_SetTxBuffer(&hUsbDeviceFS, Buf, Len); //SetTxBuffer sets the size of the buffer, as well as the buffer itself.
result = USBD_CDC_TransmitPacket(&hUsbDeviceFS); //USBD_CDC_TransmitPacket(&hUsbDeviceFS) transmits
/* USER CODE END 7 */
return result;
}
messageprocesser.c
#include "messageprocesser.h"
#include "main.h"
#include "usbd_cdc_if.h"
#include <string.h>
#include "string.h"
//Sample cmd: TOARM_FIRMV_00000000_4C\r\n
#define MESSAGE_LENGTH 25
#define COMMAND_CHAR 6 //See SW Protocol or sample cmd
#define COMMAND_LENGTH 5
#define DATA_CHAR 12
#define DATA_LENGTH 8
#define CHECKSUM_CHAR 21
#define CHECKSUM_LENGTH 2
enum cmd {FIRMV, VALCN, INVAL};
enum cmd cmd_Converter(unsigned char* inputCmd){//Converts the input cmd from uint8_t to an enum.
switch (inputCmd[0]){
case 'F':
if (memcmp(inputCmd, "FIRMV", COMMAND_LENGTH) == 0) return FIRMV;
else return INVAL;
break;
case 'V':
if (memcmp(inputCmd, "VALCN", COMMAND_LENGTH) == 0) return VALCN;
else return INVAL;
break;
default:
return INVAL;
}
}
void process_Message(uint8_t* message, uint16_t Len){
//HAL_GPIO_TogglePin(GPIOF, GPIO_PIN_10);
unsigned char inputCmd[COMMAND_LENGTH]; //These are not null-terminated strings.
unsigned char inputData[DATA_LENGTH]; //These are just an array of chars.
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
//Function that separates message, inputCmd, and inputMessage.
memcpy((char*) inputCmd, (const char*)message + COMMAND_CHAR, COMMAND_LENGTH);
memcpy((char*) inputData, (const char*)message + DATA_CHAR, DATA_LENGTH);
enum cmd enumCmd = cmd_Converter(inputCmd);
void (*cmd_Function_Pointer[])(unsigned char* inputData) = {FIRMV_F, VALCN_F, INVAL_F};
(*cmd_Function_Pointer[enumCmd])(inputData);
}
void FIRMV_F(unsigned char *inputData){
//HAL_GPIO_TogglePin(GPIOF, GPIO_PIN_10);
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
memcpy(outputCmd, "FIRMV", COMMAND_LENGTH);
memcpy(outputData, "01050A00", DATA_LENGTH);
send_Message(outputCmd, outputData);
}
void VALCN_F(unsigned char *inputData){
//HAL_GPIO_TogglePin(GPIOF, GPIO_PIN_10);
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
memcpy(outputCmd, "VALCN", COMMAND_LENGTH);
memcpy(outputData, "00000000", DATA_LENGTH);
send_Message(outputCmd, outputData);
}
void INVAL_F(unsigned char *inputData){
HAL_GPIO_TogglePin(GPIOF, GPIO_PIN_10);
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
memcpy(outputCmd, "REEEE", COMMAND_LENGTH);
memcpy(outputData, "99999999", DATA_LENGTH);
send_Message(outputCmd, outputData);
}
void send_Message(uint8_t* cmd, uint8_t* data){
uint8_t outputMessage[MESSAGE_LENGTH] = "TOWST_";
memcpy((char*) outputMessage + COMMAND_CHAR, (const char*) cmd, COMMAND_LENGTH);
outputMessage[COMMAND_CHAR + COMMAND_LENGTH] = '_';
memcpy((char*) outputMessage + DATA_CHAR, (const char*) data, DATA_LENGTH);
outputMessage[DATA_CHAR + DATA_LENGTH] = '_';
//Deal with checksum
int outputCheckSum = checkSum_Generator(outputMessage);
char outputCheckSumHex[2] = {'0', '0'};
itoa (outputCheckSum, outputCheckSumHex, 16);
if (outputCheckSum < 16) { //Adds a 0 if CS has fewer than 2 numbers
outputCheckSumHex[1] = outputCheckSumHex[0];
outputCheckSumHex[0] = '0';
}
outputCheckSumHex[0] = toupper (outputCheckSumHex[0]);
outputCheckSumHex[1] = toupper (outputCheckSumHex[1]);
memcpy((char*) outputMessage + CHECKSUM_CHAR, (const char*) outputCheckSumHex, CHECKSUM_LENGTH);
outputMessage[23] = '\r';
outputMessage[24] = '\n';
//return a processed message array
CDC_Transmit_FS(outputMessage, sizeof(outputMessage));
}
int checkSum_Generator(uint8_t* message){
int checkSum = 0;
for (int i = 0; i < CHECKSUM_CHAR; i++){ //Gives the cs of TOARM_COMND_DATA0000_.
checkSum ^= message[i];
}
return checkSum;
}
The attempts at solving this issue
Another question that involved "linking function pointers and enum" that I have looked into, but the solution offered doesn't seem to fix the issue I have mentioned, only circumventing by having smaller code.
So far I have tried changing the names of the functions to become identical to their enum counterparts, renaming FIRMV_F() to FIRMV(). Unsurprisingly, I got this:
../Core/Src/messageprocesser.c:59:6: error: 'FIRMV' redeclared as different kind of symbol
I have also tried assigning the function pointer array in a way similar to conventional arrays:
void process_Message(uint8_t* message, uint16_t Len){
//HAL_GPIO_TogglePin(GPIOF, GPIO_PIN_10);
unsigned char inputCmd[COMMAND_LENGTH]; //These are not null-terminated strings.
unsigned char inputData[DATA_LENGTH]; //These are just an array of chars.
unsigned char outputCmd[COMMAND_LENGTH];
unsigned char outputData[DATA_LENGTH];
//Function that separates message, inputCmd, and inputMessage.
memcpy((char*) inputCmd, (const char*)message + COMMAND_CHAR, COMMAND_LENGTH);
memcpy((char*) inputData, (const char*)message + DATA_CHAR, DATA_LENGTH);
enum cmd enumCmd = cmd_Converter(inputCmd);
void (*cmd_Function_Pointer[INVAL + 1])(unsigned char* inputData);
(*cmd_Function_Pointer[FIRMV]) = FIRMV_F;
(*cmd_Function_Pointer[VALCN]) = VALCN_F;
(*cmd_Function_Pointer[INVAL]) = INVAL_F;
(*cmd_Function_Pointer[enumCmd])(inputData); //What is going on here?
}
I have gotten the following errors.
../Core/Src/messageprocesser.c:52:33: error: lvalue required as left operand of assignment
52 | (*cmd_Function_Pointer[FIRMV]) = FIRMV_F;
| ^
../Core/Src/messageprocesser.c:53:33: error: lvalue required as left operand of assignment
53 | (*cmd_Function_Pointer[VALCN]) = VALCN_F;
| ^
../Core/Src/messageprocesser.c:54:33: error: lvalue required as left operand of assignment
54 | (*cmd_Function_Pointer[INVAL]) = INVAL_F;
This makes sense since my understanding is that the functions FIRMV_F do not have an lvalue, but I do not know how to fix it, assuming it is possible.
Please let me know if more detail or clarity is needed.
To Less Determined Readers: Using enum as index in function pointer array for readability. In current code enum sequencing needs to be identical to function pointer sequence. This seems vulnerable. Want method of making sure enum sequencing stays identical to function pointer sequence.
There's a common recommended practice, assuming enum values are just sequential. Make an enum like this:
typedef enum
{
INVAL,
FIRMV,
VALCN,
CMD_N // number of items in the enum
} cmd_t;
And a function template like this:
typedef void cmd_func_t (unsigned char *inputData);
Then you can create an array of function pointers where each index corresponds to the relevant enum, by using designated initializers:
static cmd_func_t* const cmd_list[] = // size not specified on purpose
{
[INVAL] = INVAL_F,
[FIRMV] = FIRMV_F,
[VALCN] = VALCN_F,
};
Verify integrity with:
_Static_assert(sizeof cmd_list/sizeof cmd_list[0] == CMD_N,
"cmd_list has wrong size compared to cmd_t");
Function call usage, for example:
cmd_list[FIRMV](param);
Also, just for completeness, we can go completely loco with "do not repeat yourself" and generate a lot of this through X-macros: https://godbolt.org/z/zY1nh5M5T. Not really recommended since it makes the code look obscure, but quite powerful. For example strings like "FIRMV" could be generated at compile-time, as shown in that example.
You can have your cmd_Converter() function just return the function pointer that is required. You can still use a switch statement to remove the need to have a long if...else if... chain. This way you don't need any enums, nor arrays of function pointers, and there is nothing to keep in sync.
#include <string.h>
#include <stdlib.h>
#define COMMAND_LENGTH 8
typedef void (*handler_t)(const unsigned char *arg);
void FIRMV_F(const unsigned char *inputData);
handler_t cmd_Converter(const unsigned char *inputCmd)
{
handler_t result = NULL;
switch (inputCmd[0])
{
case 'F':
if (memcmp(inputCmd, "FIRMV", COMMAND_LENGTH) == 0)
result = FIRMV_F;
break;
// more cases here.
default:
break;
}
return result;
}
Edit: If you want a purely data-driven approach that avoids the switch statement altogether, you can have an array of structures mapping command names to function pointers. This still removes the need for enums - the mapping is explicit:
#include <string.h>
#include <stdlib.h>
#define COMMAND_LENGTH 8
typedef void (*handler_t)(const unsigned char *arg);
typedef struct
{
const char *name;
handler_t fn_p;
} cmd_t;
void FIRMV_F(const unsigned char *inputData);
void VALCN_F(const unsigned char *inputData);
static const cmd_t commands[] =
{
{"FIRMV", FIRMV_F},
{"VALCN", VALCN_F}
};
handler_t cmd_Converter(const unsigned char *inputCmd)
{
handler_t result = NULL;
for (size_t i = 0; i < sizeof commands / sizeof commands[0]; ++i)
{
if (strcmp((const char *)inputCmd, commands[i].name) == 0)
{
result = commands[0].fn_p;
break;
}
}
return result;
}
This is using a linear search. If you don't have many commands, this might be enough. If there are a lot, you could sort the array and do a binary search.
I am trying to print out byte array as one byte at the time in hexadecimal format within for loop like this:
int my_function(void *data)
{
obuf = (str*)data;
int i;
for (i = 0; i < obuf->len; i++)
{
printf("%02X:", obuf->s[i]);
}
return 0;
}
str in this case is structure from Kamailio - review at http://www.asipto.com/pub/kamailio-devel-guide/#c05str
The expected output:
80:70:0F:80:00:00:96:00:1D:54:7D:7C:36:9D:1B:9A:20:BF:F9:68:E8:E8:E8:F8:68:98:E8:EE:E8:B4:7C:3C:34:74:74:64:74:69:2C:5A:3A:3A:3A:3A:3A:3A:32:24:43:AD:19:1D:1D:1D:1D:13:1D:1B:3B:60:AB:AB:AB:AB:AB:0A:BA:BA:BA:BA:B0:AB:AB:AB:AB:AB:0A:BA:BA:BA:BA:B9:3B:61:88:43:
What I am getting:
FFFFFF80:70:0F:FFFFFF80:00:00:FFFFFF96:00:1D:54:7D:7C:36:FFFFFF9D:1B:FFFFFF9A:20:FFFFFFBF:FFFFFFF9:68:FFFFFFE8:FFFFFFE8:FFFFFFE8:FFFFFFF8:68:FFFFFF98:FFFFFFE8:FFFFFFEE:FFFFFFE8:FFFFFFB4:7C:3C:34:74:74:64:74:69:2C:5A:3A:3A:3A:3A:3A:3A:32:24:43:FFFFFFAD:19:1D:1D:1D:1D:13:1D:1B:3B:60:FFFFFFAB:FFFFFFAB:FFFFFFAB:FFFFFFAB:FFFFFFAB:0A:FFFFFFBA:FFFFFFBA:FFFFFFBA:FFFFFFBA:FFFFFFB0:FFFFFFAB:FFFFFFAB:FFFFFFAB:FFFFFFAB:FFFFFFAB:0A:FFFFFFBA:FFFFFFBA:FFFFFFBA:FFFFFFBA:FFFFFFB9:3B:61:FFFFFF88:43:
Could someone please help me understand why there are some of bytes prefixed with FFFFFF and other aren't?
Thanks in advance
Looks like obuf->s[i] returns a signed value
You would need to cast it to a unsigned value to get rid of the FFF.. at start.
printf("%02X:", (unsigned char)(obuf->s[i]));
The problem appears with chars that have the most significant bit set (which are out of the proper pure ASCII set range 0-127). The key point is to consider chars as unsigned.
printf("%02X:", (unsigned char)(obuf->s[i]));
See this simple compilable repro C code:
#include <stdio.h>
#include <string.h>
struct _str {
char* s; /* pointer to the beginning of string (char array) */
int len; /* string length */
};
typedef struct _str str;
int my_function(void *data)
{
str* obuf;
int i;
obuf = (str*)data;
for (i = 0; i < obuf->len; i++) {
printf("%02X:", (unsigned char)(obuf->s[i]));
}
return 0;
}
int main(void)
{
char buf[2];
str s;
/* Test with ordinary ASCII string */
s.s = "Hello";
s.len = strlen(s.s);
my_function(&s);
printf("\n");
/* Test with char values with most significant bit set */
buf[0] = 0xF1;
buf[1] = 0x00;
s.s = buf;
s.len = 1;
my_function(&s);
return 0;
}
With MSVC, I get this output:
48:65:6C:6C:6F:
F1:
I've found some md5 code that consists of the following prototypes...
I've been trying to find out where I have to put the string I want to hash, what functions I need to call, and where to find the string once it has been hashed. I'm confused with regards to what the uint32 buf[4] and uint32 bits[2] are in the struct.
struct MD5Context {
uint32 buf[4];
uint32 bits[2];
unsigned char in[64];
};
/*
* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
* initialization constants.
*/
void MD5Init(struct MD5Context *context);
/*
* Update context to reflect the concatenation of another buffer full
* of bytes.
*/
void MD5Update(struct MD5Context *context, unsigned char const *buf, unsigned len);
/*
* Final wrapup - pad to 64-byte boundary with the bit pattern
* 1 0* (64-bit count of bits processed, MSB-first)
*/
void MD5Final(unsigned char digest[16], struct MD5Context *context);
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
void MD5Transform(uint32 buf[4], uint32 const in[16]);
I don't know this particular library, but I've used very similar calls. So this is my best guess:
unsigned char digest[16];
const char* string = "Hello World";
struct MD5Context context;
MD5Init(&context);
MD5Update(&context, string, strlen(string));
MD5Final(digest, &context);
This will give you back an integer representation of the hash. You can then turn this into a hex representation if you want to pass it around as a string.
char md5string[33];
for(int i = 0; i < 16; ++i)
sprintf(&md5string[i*2], "%02x", (unsigned int)digest[i]);
Here's a complete example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(__APPLE__)
# define COMMON_DIGEST_FOR_OPENSSL
# include <CommonCrypto/CommonDigest.h>
# define SHA1 CC_SHA1
#else
# include <openssl/md5.h>
#endif
char *str2md5(const char *str, int length) {
int n;
MD5_CTX c;
unsigned char digest[16];
char *out = (char*)malloc(33);
MD5_Init(&c);
while (length > 0) {
if (length > 512) {
MD5_Update(&c, str, 512);
} else {
MD5_Update(&c, str, length);
}
length -= 512;
str += 512;
}
MD5_Final(digest, &c);
for (n = 0; n < 16; ++n) {
snprintf(&(out[n*2]), 16*2, "%02x", (unsigned int)digest[n]);
}
return out;
}
int main(int argc, char **argv) {
char *output = str2md5("hello", strlen("hello"));
printf("%s\n", output);
free(output);
return 0;
}
As other answers have mentioned, the following calls will compute the hash:
MD5Context md5;
MD5Init(&md5);
MD5Update(&md5, data, datalen);
MD5Final(digest, &md5);
The purpose of splitting it up into that many functions is to let you stream large datasets.
For example, if you're hashing a 10GB file and it doesn't fit into ram, here's how you would go about doing it. You would read the file in smaller chunks and call MD5Update on them.
MD5Context md5;
MD5Init(&md5);
fread(/* Read a block into data. */)
MD5Update(&md5, data, datalen);
fread(/* Read the next block into data. */)
MD5Update(&md5, data, datalen);
fread(/* Read the next block into data. */)
MD5Update(&md5, data, datalen);
...
// Now finish to get the final hash value.
MD5Final(digest, &md5);
To be honest, the comments accompanying the prototypes seem clear enough. Something like this should do the trick:
void compute_md5(char *str, unsigned char digest[16]) {
MD5Context ctx;
MD5Init(&ctx);
MD5Update(&ctx, str, strlen(str));
MD5Final(digest, &ctx);
}
where str is a C string you want the hash of, and digest is the resulting MD5 digest.
It would appear that you should
Create a struct MD5context and pass it to MD5Init to get it into a proper starting condition
Call MD5Update with the context and your data
Call MD5Final to get the resulting hash
These three functions and the structure definition make a nice abstract interface to the hash algorithm. I'm not sure why you were shown the core transform function in that header as you probably shouldn't interact with it directly.
The author could have done a little more implementation hiding by making the structure an abstract type, but then you would have been forced to allocate the structure on the heap every time (as opposed to now where you can put it on the stack if you so desire).
All of the existing answers use the deprecated MD5Init(), MD5Update(), and MD5Final().
Instead, use EVP_DigestInit_ex(), EVP_DigestUpdate(), and EVP_DigestFinal_ex(), e.g.
// example.c
//
// gcc example.c -lssl -lcrypto -o example
#include <openssl/evp.h>
#include <stdio.h>
#include <string.h>
void bytes2md5(const char *data, int len, char *md5buf) {
// Based on https://www.openssl.org/docs/manmaster/man3/EVP_DigestUpdate.html
EVP_MD_CTX *mdctx = EVP_MD_CTX_new();
const EVP_MD *md = EVP_md5();
unsigned char md_value[EVP_MAX_MD_SIZE];
unsigned int md_len, i;
EVP_DigestInit_ex(mdctx, md, NULL);
EVP_DigestUpdate(mdctx, data, len);
EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_free(mdctx);
for (i = 0; i < md_len; i++) {
snprintf(&(md5buf[i * 2]), 16 * 2, "%02x", md_value[i]);
}
}
int main(void) {
const char *hello = "hello";
char md5[33]; // 32 characters + null terminator
bytes2md5(hello, strlen(hello), md5);
printf("%s\n", md5);
}
Basically I'm writing a printf function for an dedicated system so I want to pass an optional number of arguments without using VA_ARGS macros. I knocked up a simple example and this block of code works:
#include <stdio.h>
void func(int i, ...);
int main(int argc, char *argv);
int main(int argc, char *argv) {
unsigned long long f = 6799000015ULL;
unsigned long long *g;
//g points to f
g = &f;
printf("natural: %llu in hex: %llX address: %x\n", *g, *g, g);
//put pointer onto stack
func(6, g, g);
return 0;
}
void func(int i, ...) {
unsigned long long *f;
//pop value off
f = *(&i + 1);
printf("address: %x natural: %llu in hex: %llX\n", f, *f, *f);
}
However the larger example I'm trying to transfer this to doesn't work.
(in the main function):
unsigned long long f = 6799000015ULL;
unsigned long long *g;
g = &f;
kprintf("ull test: 1=%U 2=%X 3=%x 4= 5=\n", g, g, g);
(my dodgy printf function that I'm having trouble with. It maybe worth pointing out
this code DOES work with ints, char strings or anyother % flags which are passed by
value and not pointer. The only difference between what did work and the unsigned
long longs is one is bigger, so I pass by value instead to ensure I don't increment
the &format+ args part wrongly. Does that make sense?)
void kprintf(char *format, ...)
{
char buffer[KPRINTF_BUFFER_SIZE];
int bpos = 0; /* position to write to in buffer */
int fpos = 0; /* position of char to print in format string */
char ch; /* current character being processed*/
/*
* We have a variable number of paramters so we
* have to increment from the position of the format
* argument.
*/
int arg_offset = 1;
/*
* Think this through Phill. &format = address of format on stack.
* &(format + 1) = address of argument after format on stack.
* void *p = &(format + arg_offset);
* kprintf("xxx %i %s", 32, "hello");
* memory would look like = [ 3, 32, 5, "xxx", 32, "hello" ]
* get to 32 via p = &(format + 1); (int)p (because the int is copied, not a pointer)
* get to hello via p = &(format + 2); (char*)p;
*/
void *arg;
unsigned long long *llu;
arg = (void*) (&format + arg_offset);
llu = (unsigned long long*) *(&format + arg_offset);
while (1)
{
ch = format[fpos++];
if (ch == '\0')
break;
if (ch != '%')
buffer[bpos++] = ch;
else
{
ch = format[fpos++];
if (ch == 's')
bpos += strcpy(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, (char*)arg);
else if (ch == '%')
buffer[bpos++] = '%';
else if (ch == 'i')
bpos += int_to_str(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, *((int*)arg));
else if (ch == 'x')
bpos += int_to_hex_str(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, *((int*)arg));
else if (ch == 'o')
bpos += int_to_oct_str(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, *((int*)arg));
else if (ch == 'X') {
//arg is expected to be a pointer we need to further dereference.
bpos += unsigned_long_long_to_hex(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, *llu);
} else if (ch == 'U') {
bpos += unsigned_long_long_to_str(&buffer[bpos], KPRINTF_BUFFER_SIZE - bpos, *llu);
} else
{
puts("invalid char ");
putch(ch);
puts(" passed to kprintf\n");
}
arg_offset++;
arg = (void *)(&format + arg_offset);
llu = (unsigned long long*) *(&format + arg_offset);
}
}
buffer[bpos] = '\0';
puts(buffer);
}
(and the unsigned long long functions it goes on to call):
int unsigned_long_long_to_hex(char *buffer, int max_size, unsigned long long number)
{
return ull_number_to_str(buffer, max_size, number, BASE_HEX);
}
int unsigned_long_long_to_str(char *buffer, int max_size, unsigned long long number) {
return ull_number_to_str(buffer, max_size, number, BASE_DECIMAL);
}
int ull_number_to_str(char *buffer, int max_size, unsigned long long number, int base) {
int bufpos = 0;
unsigned int lo_byte = (unsigned int) number;
unsigned int hi_byte = (unsigned int) (number >> 32);
bufpos = number_to_str(buffer, max_size, lo_byte, base);
bufpos += number_to_str(buffer + bufpos, max_size, hi_byte, base);
return bufpos;
}
#define NUMERIC_BUFF_SIZE (11 * (ADDRESS_SIZE / 32))
int number_to_str(char *buffer, int max_size, int number, int base)
{
char *char_map = "0123456789ABCDEF";
int remain = 0;
char buff_stack[NUMERIC_BUFF_SIZE];
int stk_pnt = 0;
int bpos = 0;
/* with this method of parsing, the digits come out backwards */
do
{
if (stk_pnt > NUMERIC_BUFF_SIZE)
{
puts("Number has too many digits to be printed. Increasse NUMBERIC_BUFF_SIZE\n");
return 0;
}
remain = number % base;
number = number / base;
buff_stack[stk_pnt++] = char_map[remain];
} while (number > 0);
/* before writing...ensure we have enough room */
if (stk_pnt > max_size)
{
//error. do something?
puts("number_to_str passed number with too many digits to go into buffer\n");
//printf("error. stk_pnt > max_size (%d > %d)\n", stk_pnt, max_size);
return 0;
}
/* reorder */
while (stk_pnt > 0)
buffer[bpos++] = buff_stack[--stk_pnt];
return bpos;
}
Sorry guys, I can't see what I've done wrong. I appreciate this is a "wall of code" type scenario but hopefully someone can see what I've done wrong. I appreciate you probably dislike not using VA_ARGS but I don't understand why this technique shouldn't just work? And also, I'm linking with -nostdlib too. If someone can help I'd really appreciate it. Also, this isn't meant to be production quality code so if I lack some C fundamentals feel free to be constructive about it :-)
It's a bad idea to code this way. Use stdarg.h.
On the off chance (I presume this based on the name kprintf) that you're working on a hobby kernel or embedded project and looking to avoid using standard libraries, I recommend at least writing your own (architecture and compiler specific) set of stdarg macros that conform to the well-known interfaces and code against that. That way your code doesn't look like such a WTF by dereferencing past the address of the last argument.
You can make a va_list type that stores the last-known address, and your va_arg macro could appropriately align the sizeof of the type it's passed and advance the pointer accordingly. For most conventions I have worked on for x86, every type is promoted to 32 bits...
You have to read on the calling conventions for your platform, i.e. how on your target processor/OS function arguments are passed, and how registers are saved. Not all parameters are passed on stack. Depending on number of parameters and their types, many complex situations can arise.
I should add: if you want to manipulate the stack by hand as you are doing above, you need to do it in assembler, not in C. The C language follows a defined standard, and what you are doing above it not legal code (i.e., its meaning is not well-defined). As such, the compiler is allowed to do anything it wants with it, such as optimize it in weird ways unsuitable to your needs.