Valgrind memcpy Invalid write of size 8 (uintptr_t *) - c

I have an issue with memcpy and valgrind, telling me about an Invalid write of size 8.
I got to the point of figuring out where the faulty code is, but I have no clue as to why it is faulty...
I'm aware that there are other questions regarding that, but they don't help me really.
The following is an excerpt of the most important bits of my approach on a somewhat "universal" stack, when my regular value would be of type uintptr_t.
Here are two defines that I used below:
// default stack batch size
#define STACK_BATCH_DEFAULT 8
// size of one value in the stack
#define STACK_SIZEOF_ONE sizeof(uintptr_t)
The structure of the stack is as follows:
typedef struct Stack
{
size_t count; // count of values in the stack
size_t size; // size of one value in bytes
size_t alloced; // allocated count
uintptr_t *value; // the values
int batch; // memory gets allocated in those batches
}
Stack;
I have an initialization function for the stack:
bool stack_init(Stack *stack, size_t size, int batch)
{
if(!stack) return false;
stack->batch = batch ? batch : STACK_BATCH_DEFAULT;
stack->size = size;
stack->count = 0;
stack->value = 0;
stack->alloced = 0;
return true;
}
Then the stack_push function, where valgrind throws the error Invalid write of size 8:
bool stack_push(Stack *stack, uintptr_t *value)
{
if(!stack || !value) return false;
// calculate required amount of elements
size_t required = stack->batch * (stack->count / stack->batch + 1);
// allocate more memory if we need to
if(required > stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
// set the value
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(stack->value + stack->size * stack->count, value, stack->size); // <--- valgrind throws the error here
}
else
{
stack->value[stack->count] = *value;
}
// increment count
stack->count++;
return true;
}
Then in my program I'm calling the functions as follows:
Stack stack = {0};
stack_init(&stack, sizeof(SomeStruct), 0);
/* ... */
SomeStruct push = { // this is a struct that is larger than STACK_SIZEOF_ONE
.int_a = 0,
.int_b = 0,
.int_c = 0,
.id = 0,
.pt = pointer_to_struct, // it is a pointer to some other struct that was allocated beforehand
};
stack_push(&stack, (uintptr_t *)&push);
And with universal I meant that I can also have a regular stack:
Stack stack = {0};
stack_init(&stack, sizeof(uintptr_t), 0);
/* ... */
uintptr_t a = 100;
stack_push(&stack, &a);
Also, I'm open to hear general tips and advices if there are any things that should/could be improved :)
Edit: Below is a runnable code.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
// default stack batch size
#define STACK_BATCH_DEFAULT 8
// size of one value in the stack
#define STACK_SIZEOF_ONE sizeof(uintptr_t)
#define TESTCOUNT 10
#define MAX_BUF 16
typedef struct Stack
{
size_t count; // count of values in the stack
size_t size; // size of one value in bytes
size_t alloced; // allocated count
uintptr_t *value; // the values
int batch; // memory gets allocated in those batches
}
Stack;
typedef struct SomeStruct
{
size_t a;
size_t b;
size_t c;
size_t id;
char *str;
}
SomeStruct;
bool stack_init(Stack *stack, size_t size, int batch)
{
if(!stack) return false;
stack->batch = batch ? batch : STACK_BATCH_DEFAULT;
stack->size = size;
stack->count = 0;
stack->value = 0;
stack->alloced = 0;
return true;
}
bool stack_push(Stack *stack, uintptr_t *value)
{
if(!stack || !value) return false;
// calculate required amount of elements
size_t required = stack->batch * (stack->count / stack->batch + 1);
// allocate more memory if we need to
if(required > stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
// set the value
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(stack->value + stack->size * stack->count, value, stack->size); // <--- valgrind throws the error here
}
else
{
stack->value[stack->count] = *value;
}
// increment count
stack->count++;
return true;
}
bool stack_pop(Stack *stack, uintptr_t *value)
{
if(!stack) return false;
if(!stack->count) return false;
// decrement count of elements
stack->count--;
// return the value if we have an address
if(value)
{
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(value, stack->value + stack->size * stack->count, stack->size);
}
else
{
*value = stack->value[stack->count];
}
}
int required = stack->batch * (stack->count / stack->batch + 1);
if(required < stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
if(!stack->value) return false;
return true;
}
int main(void)
{
// initialize variables
bool valid = false;
Stack default_stack = {0};
Stack some_stack = {0};
// initialize stacks
stack_init(&default_stack, sizeof(uintptr_t), 0);
stack_init(&some_stack, sizeof(SomeStruct), 0);
// test default case - push
printf("Testing the default case, pushing...\n");
for(int i = 0; i < TESTCOUNT; i++)
{
uintptr_t push = i;
valid = stack_push(&default_stack, &push);
if(!valid) return -1;
}
// ...now pop
printf("Testing the default case, popping...\n");
do
{
uintptr_t pop = 0;
valid = stack_pop(&default_stack, &pop);
if(valid) printf("%llu,", pop);
}
while(valid);
printf("\n");
// test some case - push
printf("Testing some case, pushing...\n");
for(int i = 0; i < TESTCOUNT; i++)
{
// generate the push struct
SomeStruct push = {
.a = i * 10,
.b = i * 100,
.c = i * 1000,
.id = i,
.str = 0,
};
// allocate a string
push.str = malloc(MAX_BUF + 1);
snprintf(push.str, MAX_BUF, "%d", i);
// push
valid = stack_push(&some_stack, (uintptr_t *)&push);
if(!valid) return -1;
}
// ...now pop
printf("Testing some case, popping...\n");
do
{
SomeStruct pop = {0};
valid = stack_pop(&some_stack, (uintptr_t *)&pop);
if(valid)
{
printf("a=%d,b=%d,c=%d,id=%d,str=%s\n", pop.a, pop.b, pop.c, pop.id, pop.str);
free(pop.str);
}
}
while(valid);
printf("\n");
/* leave out free functions for this example.... */
return 0;
}

After hours I figured it out :D The mistake happened because I very rarely do pointer arithmetic... In short, I was assuming that it would always calculate with a byte.
Let's take a look at the lines containing:
memcpy(stack->value + stack->size * stack->count, value, stack->size);
...and break it down, so it is more readable. And also, I'll even add a handy dandy comment in it:
size_t offset = stack->size * stack->count; // offset in bytes
void *dest = stack->value + offset;
void *src = value;
memcpy(dest, src, stack->size);
Now the pro C-programmer should instantly spot the problem. It is with the calculation of stack->value + offset, where it should add offset in bytes but it is not, because the stack->value is of type uintptr_t * and not of type uint8_t *.
So to fix it, I replaced it with this line:
void *dest = (uint8_t *)stack->value + offset;
And the code works.

Related

Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?

The following code is trying to count word frequency in a document, by using hashset and vector.
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/** ==================================== VECTOR ======================================= */
typedef enum {
true, false
} bool;
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
/**
* Overwrites the element at the specified position.
*/
void VectorReplace(vector *v, const void *elemAddr, int position) {
assert(position >= 0 && position < v->elemNum);
void *posAddr = (char *)v->elems + position * v->elemSize;
/* free the memory of old element first */
if (v->freefn != NULL) v->freefn(posAddr);
memcpy(posAddr, elemAddr, v->elemSize);
}
/** ==================================== HASHSET ======================================= */
typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);
typedef struct {
int elemNum; //current element number
int bucketNum; //number of hash bucket
int elemSize; //how many byte each element has
vector *buckets; //array of vector
HashSetHashFunction hashfn;
HashSetCompareFunction compfn;
HashSetFreeFunction freefn;
} hashset;
void HashSetNew(hashset *h, int elemSize, int numBuckets,
HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
assert(h->buckets != NULL);
for (int i = 0; i < numBuckets; i++) {
vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
VectorNew(bucket, elemSize, freefn, 4);
}
h->bucketNum = numBuckets;
h->elemSize = elemSize;
h->elemNum = 0;
h->hashfn = hashfn;
h->compfn = comparefn;
h->freefn = freefn;
}
void HashSetEnter(hashset *h, const void *elemAddr) {
int hash = h->hashfn(elemAddr, h->bucketNum);
vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
// search in the hash set first
int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
if (pos != -1) {
// replace the old one if find a match
VectorReplace(bucket, elemAddr, pos);
} else {
// otherwise insert the new one
VectorAppend(bucket, elemAddr);
h->elemNum++;
}
}
/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */
/****************************************************************
*
* doc_freq is a key-value pair of [documentid, frequency]
* It's not supposed to be exposed to user or search engine.
* -----------------------------------------------------------
* It looks like:
* [1611742826915764000] [4 ]
* |-------------------| |-------|
* docid freq
***************************************************************/
typedef struct {
long docid;
int freq;
} doc_freq;
static void new_docfreq(doc_freq *df, long docid, int freq) {
df->docid = docid;
df->freq = freq;
}
/**
* HashSetHashFunction<doc_freq>
*/
static int hash_docfreq(const void *elemAddr, int numBuckets) {
doc_freq *df = (doc_freq *)elemAddr;
return (int)(df->docid % numBuckets);
}
/**
* HashSetCompareFunction<doc_freq>
*/
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
long id1 = ((doc_freq *)elemAddr1)->docid;
long id2 = ((doc_freq *)elemAddr2)->docid;
if (id1 < id2) {
return -1;
} else if (id1 > id2) {
return 1;
} else { // id1 == id2
return 0;
}
}
/**
* word_index is a index of a single word.
* ---------------------------------------
* A typical word_index looks like:
* [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
* |-----| |------------------------------|
* word freqs
*/
typedef struct {
char *word;
hashset *freqs; // hashset<doc_freq>
} word_index;
static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
size_t wordlen = strlen(word);
wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}
/**
* Mainly used to build a word_index.
*/
void add_docfreq(word_index *wi, const long docid, const int frequency) {
doc_freq df;
new_docfreq(&df, docid, frequency);
HashSetEnter(wi->freqs, &df);
}
/** ==================================== UNIT-TEST ======================================= */
int main(void) {
/* apple: [1611742826915764000, 5][1611742826915538000, 10] */
word_index *apple = (word_index *)malloc(sizeof(word_index));
new_wordindex(apple, "apple");
add_docfreq(apple, 1611742826915764000L, 5);
add_docfreq(apple, 1611742826915538000L, 10);
}
It gave me a segmentation fault:
[1] 84309 segmentation fault testindexer
lldb find the problem occured when hashset try to callback the given pointer of function hashfn. I don't quite understand what is EXC_BAD_ACCESS (code=EXC_I386_GPFLT) here. I have done several unit test on hashset before, the HashSetEnter() function worked well with hashfn. Another unit test was conducted on hash_docfreq() function, it can also calculate correctly the hash number. I'm a little bit confused. Anyone can help? Thanks!
Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
127 }
128
129 void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130 int hash = h->hashfn(elemAddr, h->bucketNum);
131 vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
132 // search in the hash set first
133 int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)
Running under gdb, after the fault, doing a tb command to get a stack traceback, we see:
#0 0x00000005004016e6 in ?? ()
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
#2 0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
frequency=5) at orig.c:266
#3 0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
150 int hash = h->hashfn(elemAddr, h->bucketNum);
You are segfaulting in HashSetEnter, at the line:
int hash = h->hashfn(elemAddr, h->bucketNum);
This is because h is not valid at this point.
Examinining the source, the place that sets the value that is ultimately invalid, it is set in new_wordindex.
In new_wordindex, you are saving [and returning] the address of h.
h is a function scoped variable here, so it is no longer valid after the function returns.
You have to use malloc for this. And, later, you need to be able to free this pointer during cleanup.
Here's the refactored code for the incorrect function.
Note that to show old/original code vs. new/corrected code, I'm using preprocessor conditionals:
#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif
The code under #if 0 can be elided/removed, leaving just the #else code.
static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
#else
hashset *h = malloc(sizeof(*h));
HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = h;
#endif
size_t wordlen = strlen(word);
wi->word = (char *) malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}

Will memory not freed cause segmentation fault in C?

I've just encountered a very strange bug. I was doing unit-test for a simple function as below.
UPDATE: Thanks #Bodo, here's the minimal working example. You can simply compile and run tokenizer.c.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
/* ============================= BOOL =============================== */
#ifndef _BOOL_
#define _BOOL_
typedef enum {
true, false
} bool;
#endif // _BOOL_
/* ============================= STACK =============================== */
#ifndef _STACK_
#define _STACK_
typedef void (*stack_freefn)(void *elemAddr);
typedef struct {
size_t size; // number of element allowed
int ite; // point to the current last element
size_t elemSize; // size of each element (how many bytes)
void *elems; // stockage of elements
stack_freefn freefn; // free memory allocated for each element if necessary
} stack;
/* constructor */
void new_stack(stack *s, const size_t size, const size_t elemSize, stack_freefn freefn) {
s->size = size;
s->ite = 0;
s->elemSize = elemSize;
s->elems = malloc(size * elemSize);
s->freefn = freefn;
}
/* free memory */
void dispose_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
}
free(s->elems);
s->elems = NULL;
}
/* push one new element on the top */
void push_stack(stack *s, const void *value, const size_t elemSize) {
if (s->ite == s->size) {
s->size *= 2;
s->elems = realloc(s->elems, s->size * s->elemSize);
}
void *elemAddr = (char *)s->elems + s->elemSize * s->ite++;
memcpy(elemAddr, value, s->elemSize);
}
/* pop our the element on the top */
void pop_stack(stack *s, void *res) {
if (s->ite > 0) {
void *elemAddr = (char *)s->elems + ((s->ite - 1) * s->elemSize);
memcpy(res, elemAddr, s->elemSize);
s->ite--;
}
}
void clear_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
} else {
s->ite = 0;
}
}
size_t stack_size(stack *s) {
return s->ite;
}
#endif // _STACK_
/* ============================= VECTOR =============================== */
#ifndef _VECTOR_
#define _VECTOR_
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Frees up all the memory of the specified vector.
*/
void VectorDispose(vector *v) {
if (v->freefn != NULL) {
for (; v->elemNum > 0; v->elemNum--) {
void *elemAddr = (char *)v->elems + (v->elemNum - 1) * v->elemSize;
v->freefn(elemAddr);
}
}
free(v->elems);
v->elems = NULL;
}
/**
* Returns the logical length of the vector.
*/
int VectorLength(const vector *v) {
return v->elemNum;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
#endif // _VECTOR_
/* ============================= TOKENIZER =============================== */
/**
* Dump current string into vector as a new word.
* Strings are null-terminated.
*/
static void dumpstack(stack *s, vector *v) {
size_t len = stack_size(s);
char *word = (char *)malloc((len + 1) * sizeof(char)); // +1 for null-terminator
for (int i = len - 1; i >= 0; i--) {
pop_stack(s, word + i * sizeof(char));
}
word[len] = '\0';
VectorAppend(v, &word);
clear_stack(s);
}
static const size_t kTokenStackDefaultSize = 64;
static void tokenize(vector *words, char *stream) {
stack s;
new_stack(&s, kTokenStackDefaultSize, sizeof(char), NULL);
size_t len = strlen(stream);
bool begin = false;
char c;
for (int i = 0; i < len; i++) {
c = stream[i];
/* =============================== My printf() is here ============================== */
// printf("char c = [%c]\n", c);
/* =============================== My printf() is here ============================== */
if (isalpha(c) || isdigit(c)) {
if (begin == false) begin = true;
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
} else if (c == '-') {
if (begin == true) { // case: covid-19
push_stack(&s, &c, sizeof(char));
} else {
if (i < len - 1 && isdigit(stream[i + 1])) { // case: -9
begin = true;
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
} else if (c == '.' && begin == true) { // case: 3.14
if (isdigit(stream[i - 1])) {
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
dispose_stack(&s);
}
/* ============================= UNIT-TEST =============================== */
/**
* HashSetFreeFunction<char *>
*/
static void freestr(void *elemAddr) {
char *str = *(char **)elemAddr;
free(str);
}
/**
* HashSetCompareFunction<char *>
*/
static int compstr(const void *elemAddr1, const void *elemAddr2) {
char *str1 = *(char **)elemAddr1;
char *str2 = *(char **)elemAddr2;
return strcmp(str1, str2);
}
static void test_tokenize(void) {
printf("Testing Tokenizer.c::tokenize() ...\n");
char *sentence = "Covid-19: Top adviser warns France at 'emergency' virus moment - BBC News\nPi = 3.14\n-1 is negative.";
vector words;
VectorNew(&words, sizeof(char *), freestr, 256);
tokenize(&words, sentence);
char *musthave[] = {"covid-19", "top", "3.14", "-1"};
char *musthavenot[] = {"-", "1"};
assert(VectorLength(&words) == 16);
for (int i = 0; i < sizeof(musthave)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthave[i], compstr, 0, false) != -1);
}
for (int i = 0; i < sizeof(musthavenot)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthavenot[i], compstr, 0, false) == -1);
}
VectorDispose(&words);
printf("[ALL PASS]\n");
}
int main(void) {
test_tokenize();
}
I've got segmentation fault at first.
[1] 4685 segmentation fault testtokenizer
But when I add a printf() to debug, the segmentation fault was gone and the test passed. After comment out the printf, the function still works. I was so confused.
Just recall that before this test, I tested some memory dispose function, and perhaps had left some unfreed blocks in memory. Will that be the reason for fleeting segmentation fault? Thx bros.
UPDATE:
Now I can't even reproduce this bug myself. tokenizer.c above can pass the unit-test. I thought it might caused by makefile prerequisite rules. gcc didn't re-compile some object files when source code is changed.
Thanks #Steve Summit, you make it clear that unfreed memory will not cause segmentation fault.
Thanks #schwern for code review, it's really helpful.
But when I add a printf() to debug, the segmentation fault was gone and the test passed. After comment out the printf, the function still works. I was so confused.
They call it undefined behavior, because its behavior is undefined. Seemingly unrelated operations might nudge things just a bit to make the code "work" but they're only tangentially related to the problem.
I tested some memory dispose function, and perhaps had left some unfreed blocks in memory. Will that be the reason for fleeting segmentation fault?
No. It does mean the memory is unreferencable and "leaked". The memory will be freed to the operating system when the process exits.
The problem must lie elsewhere. Without seeing your whole program we can't say for sure, but two fishy things stand out.
You're defining a fixed sized stack, but you're pushing onto it an indeterminate number of times. Unless push_stack has protection against this, you will walk off your allocated memory.
You're storing references to variables on the stack. lower, c
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
Once lower goes out of scope it will automatically be freed and the memory reused. &lower is invalid once tokenize returns. This seems to be fine if your stack only lasts the length of the function, but it's worth noting.
And it's possible new_stack, push_stack, or dumpstack are doing something incorrect.

Circular buffer does not give the correct size of the buffer after 6-th element

I have written the code for the circular buffer in C and it works well until some extent. I took the size of the buffer being equal to 10. When I fill the buffer till element 6 - it works fine. But at the moment when I fill the 7-th element - I get the result "The size of the buffer is equal to 767". For the element 8 - it does not work. I use "head" to write and "tail" to extract values. Could you please help me with this?
#include<stdio.h>
#include<stdint.h>
#include <stdbool.h>
typedef struct RingBuffer {
uint16_t* buffer;
size_t head;
size_t tail;
size_t max;
bool full;
}*cbuf_handle_t;
cbuf_handle_t init_RingBuffer (uint8_t* buffer, size_t size){
cbuf_handle_t cbuf = malloc (sizeof(cbuf_handle_t));
cbuf->buffer = buffer;
cbuf->max = size;
return cbuf;
}
void RingBuffer_free(cbuf_handle_t cbuf){
free(cbuf);
}
void RingBuffer_reset(cbuf_handle_t cbuf){
cbuf->head = 0;
cbuf->tail = 0;
cbuf->full = false;
}
bool RingBuffer_full (cbuf_handle_t cbuf){
return cbuf->full;
}
bool RingBuffer_empty(cbuf_handle_t cbuf){
return (!cbuf->full && (cbuf->tail == cbuf->head));
}
size_t RingBuffer_Capacity(cbuf_handle_t cbuf){
return cbuf->max;
}
size_t RingBuffer_size(cbuf_handle_t cbuf){
size_t size = cbuf->max;
if (!cbuf->full){
if (cbuf->head >= cbuf->tail)
{
size = (cbuf->head - cbuf->tail);}
else
{
size = (cbuf->head - cbuf->tail + cbuf->max);
}
}
return size;
}
void RingBuffer_AdvancePointer(cbuf_handle_t cbuf){
if (cbuf->full){
cbuf->tail = (cbuf->tail+1)%cbuf->max;
}
cbuf->head = (cbuf->head + 1)%cbuf->max;
cbuf->full = (cbuf->head == cbuf->tail);
}
void RingBuffer_retreatPointer (cbuf_handle_t cbuf){
cbuf->full = false;
cbuf->tail = (cbuf->tail + 1)%cbuf->max;
}
void RingBuffer_addValue (cbuf_handle_t cbuf, uint8_t data){
cbuf->buffer[cbuf->head] = data;
RingBuffer_AdvancePointer(cbuf);
}
int RingBuffer_Remove (cbuf_handle_t cbuf, uint8_t *data){
int r = -1;
if (!RingBuffer_empty(cbuf)){
*data = cbuf->buffer[cbuf->tail];
RingBuffer_retreatPointer(cbuf);
r = 0;
}
return r;
}
int main (){
uint8_t arr[10];
cbuf_handle_t cpt = init_RingBuffer(arr, 10);
//initialzie the buffer, tail and head and max
int i = 0;
RingBuffer_reset(cpt);
for ( i = 0 ; i< 6; i++){
RingBuffer_addValue(cpt, i);
}
size_t size = RingBuffer_size(cpt);
printf("The size of the buffer %d", size);
}
Thank you in advance!
Regards
Rostyslav
As said in comments, the declaration of the structure as a pointer is generally not recommended. However you can fix that bug by changing the way you allocate it using malloc :
cbuf_handle_t cbuf = malloc (sizeof(*cbuf));
This is because, cbuf being a pointer to the structure, if you dereference it you get the structure and thus its real size when you pass it to sizeof.

Hashmap implementation problem in C with void pointer as value

Hi I am attempting to implement a really simple hashmap in regular C with a string as key and a void pointer as value as I wish to use the map for multiple data types.
So far I have this
struct node{
void * value;
char * key;
};
unsigned long strhash(char *string)
{
unsigned long hash = 5381;
int c;
while ((c = *string++))
{
hash = ((hash << 5) + hash) + c;
}
return hash;
}
map_t *map_create(int maxSize){
map_t *map = malloc(sizeof(map_t));
map->curSize = 0;
map->maxSize = maxSize;
map->nodes = calloc(map->maxSize, sizeof(node_t *));
return map;
}
node_t *node_create(char *key, void *value){
node_t *node = malloc(sizeof(node_t));
node->key = key;
node->value = value;
return node;
}
void map_insert(map_t *map, char *key, void *value){
node_t *node = node_create(key, value);
int idx = strhash(key) % map->maxSize;
if(map->nodes[idx] == NULL){
map->nodes[idx] = node;
}else{
while(map->nodes[idx] != NULL){
idx++%map->maxSize;
}
map->nodes[idx] = node;
}
return;
}
void map_print(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
printf("index: %d\t value: %d\n",i, *(int*)map->nodes[i]->value);
}
}
return;
}
void map_destroy(map_t *map){
for(int i = 0; i < map->maxSize; i++){
if(map->nodes[i] != NULL){
free(map->nodes[i]);
}
}
free(map->nodes);
free(map);
return;
}
int main(){
map_t *map = map_create(32);
for(int i = 0; i < 30; i++){
map_insert(map, (char*)&i, &i);
}
map_print(map);
map_destroy(map);
return 0;
}
The problem is the output is not as I'd expect when the map gets printed all that is retrieved is the value "30" on all indexes which is the last number inserted into the map. If I change the value to type int the map works as expected, so is there must be something crucial I am missing in regards to pointers.
I am not the greatest at C so any light which could be shed on this would be most appreciated.
The problem is that you're using the same pointer every time you call map_insert(). It just stores the pointer, it doesn't copy the data. Each time through the loop you change the contents of that memory, so all the hash map elements point to that same value.
There are two ways you can fix it. One way is to always make a dynamically-allocated copy of the data before calling map_insert():
for (int i = 0; i < 30; i++) {
int *i_copy = malloc(sizeof *i_copy);
*i_copy = i;
map_insert(map, (char *)i_copy, (char *)i_copy);
}
The other option is to add the size of the value to the map_insert() and node_create() arguments. Then node_create call malloc() and memcpy() to copy the value to dynamic memory.
BTW, there's another problem. The key is supposed to be a null-terminated string (strhash() depends on this), but you're using &i, which is a pointer to an integer. Casting a pointer to an integer to char* doesn't return a string, it just returns a pointer to the same location with a different data type. I haven't fixed this above.
OP stores a reference to the same value, so of course all lookups yield the same value (which is not even a string, but whatever the storage representation of the value of the variable i happens to be).
I prefer chaining the hash map entries, and keeping a copy of the hash in the entry:
struct entry {
struct entry *next;
size_t hash;
void *data;
size_t data_size;
int data_type;
unsigned char name[];
};
typedef struct {
size_t size;
size_t used; /* Number of entries, total */
struct entry **slot; /* Array of entry pointers */
size_t (*hash)(const unsigned char *, size_t);
} hashmap;
int hashmap_new(hashmap *hmap, const size_t size,
size_t (*hash)(const unsigned char *, size_t))
{
if (!hmap)
return -1; /* No hashmap specified */
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
if (size < 1)
return -1; /* Invalid size */
if (!hash)
return -1; /* No hash function specified. */
hmap->slot = calloc(size, sizeof hmap->slot[0]);
if (!hmap->slot)
return -1; /* Not enough memory */
hmap->size = size;
hmap->hash = hash;
return 0;
}
void hashmap_free(hashmap *hmap)
{
if (hmap) {
size_t i = hmap->size;
while (i-->0) {
struct entry *next = hmap->slot[i];
struct entry *curr;
while (next) {
curr = next;
next = next->next;
free(curr->data);
/* Poison the entry, to help detect use-after-free bugs. */
curr->next = NULL;
curr->data = NULL;
curr->hash = 0;
curr->data_size = 0;
curr->data_type = 0;
curr->name[0] = '\0';
free(curr);
}
}
}
free(hmap->slot);
hmap->size = 0;
hmap->used = 0;
hmap->slot = NULL;
hmap->hash = NULL;
}
To insert a key-value pair, the function either uses the data specified as-is, in which case it's the caller's responsibility to ensure each key has their own unique data not overwritten later; or we copy the user data. In the above hashmap_free() function, you'll see free(curr->data);; it assumes we allocated memory dynamically, and copied the user data there. So:
int hashmap_add(hashmap *hmap, const unsigned char *name,
const void *data, const size_t data_size,
const int data_type)
{
const size_t namelen = (name) ? strlen(name) : 0;
struct entry *curr;
size_t i;
if (!hmap)
return -1; /* No hashmap specified. */
if (name_len < 1)
return -1; /* NULL or empty name. */
/* Allocate memory for the hashmap entry,
including enough room for the name, and end of string '\0'. */
curr = malloc(sizeof (struct entry) + namelen + 1;
if (!curr)
return -1; /* Out of memory. */
/* Copy data, if any. */
if (data_size > 0) {
curr->data = malloc(data_size);
if (!curr->data) {
free(curr);
return -1; /* Out of memory. */
}
memcpy(curr->data, data, data_size);
} else {
curr->data = NULL;
curr->data_size = 0;
}
curr->data_type = data_type;
/* Calculate the hash of the name. */
curr->hash = hmap->hash(name, namelen);
/* Copy name, including the trailing '\0'. */
memcpy(curr->name, name, namelen + 1);
/* Slot to prepend to. */
i = curr->hash % hmap->size;
curr->next = hmap->slot[i];
hmap->slot[i] = curr;
/* An additional node added. */
hmap->used++;
return 0;
}
The meaning of data_type is completely up to the user of the code.
Lookup can be made based on the hash and the data type:
/* Returns 0 if found. */
int hashmap_find(hashmap *hmap, const unsigned char *name,
const int data_type,
void **dataptr_to, size_t *size_to)
{
struct entry *curr;
size_t hash;
if (size_to)
*size_to = 0;
if (dataptr_to)
*dataptr_to = NULL;
if (!hmap)
return -1; /* No hashmap specified. */
if (!name || !*name)
return -1; /* NULL or empty name. */
hash = hmap->hash(name, strlen(name));
curr = hmap->slot[hash % hmap->size];
for (curr = hmap->slot[hash % hmap->size]; curr != NULL; curr = curr->next) {
if (curr->data_type == data_type && curr->hash == hash &&
!strcmp(curr->name, name)) {
/* Data type an name matches. Save size if requested. */
if (size_to)
*size_to = curr->data_size;
if (dataptr_to)
*dataptr_to = curr->data;
return 0; /* Found. */
}
}
return -1; /* Not found. */
}
The above lookup returns 0 if found, and nonzero if error or not found. (This way, even zero-size NULL data can be stored in the hash map.)
If the number of data types supported is small, say 32, then using an unsigned int with each bit (1U<<0 == 1, 1U<<1 == 2, 1U<<2 == 4, and so on) reserved for a specific type, you can do the lookup using a mask, allowing only the specified types. Similarly, the data_type can be a mask, describing which types the value can be interpreted as (almost always will have just one bit set).
This scheme also allows one to dynamically resize the hashmap, by allocating a new slot array of pointers, and moving each old entry to the new one. The keys don't need to be rehashed, because the original hash is stored in each entry. For lookup efficiency, the chains (hanging off each slot) should be as short as possible. A common "rule of thumb" is that hashmap->size should be between hashmap->used and 2 * hashmap->used.
When you call map_insert(map, (char*)&i, &i); the value inserted into hasmap is the pointer to i variable, i.e. its address in memory, and not the value of i.
So when you change i value inside the for loop there is the side-effect to all entries into the hashmap, and at the end of the loop you only see the last value assigned.

Memory comparison causes system halt

I am working on a kernel module and I need to compare two buffers to find out if they are equivalent. I am using the memcmp function defined in the Linux kernel to do so. My first buffer is like this:
cache_buffer = (unsigned char *)vmalloc(4097);
cache_buffer[4096] = '/0';
The second buffer is from a page using the page_address() function.
page = bio_page(bio);
kmap(page);
write_buffer = (char *)page_address(page);
kunmap(page);
I have printed the contents of both buffers before hand and not only to they print correctly, but they also have the same content. So next, I do this:
result = memcmp(write_buffer, cache_buffer, 2048); // only comparing up to 2048 positions
This causes the kernel to freeze up and I cannot figure out why. I checked the implementation of memcmp and saw nothing that would cause the freeze. Can anyone suggest a cause?
Here is the memcmp implementation:
int memcmp(const void *cs, const void *ct, size_t count)
{
const unsigned char *su1, *su2;
int res = 0;
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
if ((res = *su1 - *su2) != 0)
break;
return res;
}
EDIT: The function causing the freeze is memcmp. When I commented it out, everything worked. Also, when I did I memcmp as follows
memcmp(write_buffer, write_buffer, 2048); //comparing two write_buffers
Everything worked as well. Only when I throw the cache_buffer into the mix is when I get the error. Also, above is a simplification of my actual code. Here is the entire function:
static int compare_data(sector_t location, struct bio * bio, struct cache_c * dmc)
{
struct dm_io_region where;
unsigned long bits;
int segno;
struct bio_vec * bvec;
struct page * page;
unsigned char * cache_data;
char * temp_data;
char * write_data;
int result, length, i;
cache_data = (unsigned char *)vmalloc((dmc->block_size * 512) + 1);
where.bdev = dmc->cache_dev->bdev;
where.count = dmc->block_size;
where.sector = location << dmc->block_shift;
printk(KERN_DEBUG "place: %llu\n", where.sector);
dm_io_sync_vm(1, &where, READ, cache_data, &bits, dmc);
length = 0;
bio_for_each_segment(bvec, bio, segno)
{
if(segno == 0)
{
page = bio_page(bio);
kmap(page);
write_data = (char *)page_address(page);
//kunmap(page);
length += bvec->bv_len;
}
else
{
page = bio_page(bio);
kmap(page);
temp_data = strcat(write_data, (char *)page_address(page));
//kunmap(page);
write_data = temp_data;
length += bvec->bv_len;
}
}
printk(KERN_INFO "length: %u\n", length);
cache_data[dmc->block_size * 512] = '\0';
for(i = 0; i < 2048; i++)
{
printk("%c", write_data[i]);
}
printk("\n");
for(i = 0; i < 2048; i++)
{
printk("%c", cache_data[i]);
}
printk("\n");
result = memcmp(write_data, cache_data, length);
return result;
}
EDIT #2: Sorry guys. The problem was not memcmp. It was the result of memcmp. When ever it returned a positive or negative number, the function that called my function would play with some pointers, one of which was uninitialized. I don't know why I didn't realize it before. Thanks for trying to help though!
I'm no kernel expert, but I would assume you need to keep this memory mapped while doing the comparison? In other words, don't call kunmap until after the memcmp is complete. I would presume that calling it before will result in write_buffer pointing to a page which is no longer mapped.
Taking your code in the other question, here is a rough attempt at incremental. Still needs some cleanup, I'm sure:
static int compare_data(sector_t location, struct bio * bio, struct cache_c * dmc)
{
struct dm_io_region where;
unsigned long bits;
int segno;
struct bio_vec * bvec;
struct page * page;
unsigned char * cache_data;
char * temp_data;
char * write_data;
int length, i;
int result = 0;
size_t position = 0;
size_t max_size = (dmc->block_size * 512) + 1;
cache_data = (unsigned char *)vmalloc(max_size);
where.bdev = dmc->cache_dev->bdev;
where.count = dmc->block_size;
where.sector = location << dmc->block_shift;
printk(KERN_DEBUG "place: %llu\n", where.sector);
dm_io_sync_vm(1, &where, READ, cache_data, &bits, dmc);
bio_for_each_segment(bvec, bio, segno)
{
// Map the page into memory
page = bio_page(bio);
write_data = (char *)kmap(page);
length = bvec->bv_len;
// Make sure we don't go past the end
if(position >= max_size)
break;
if(position + length > max_size)
length = max_size - position;
// Compare the data
result = memcmp(write_data, cache_data + position, length);
position += length;
kunmap(page);
// If the memory is not equal, bail out now and return the result
if(result != 0)
break;
}
cache_data[dmc->block_size * 512] = '\0';
return result;
}

Resources