Will memory not freed cause segmentation fault in C? - c

I've just encountered a very strange bug. I was doing unit-test for a simple function as below.
UPDATE: Thanks #Bodo, here's the minimal working example. You can simply compile and run tokenizer.c.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
/* ============================= BOOL =============================== */
#ifndef _BOOL_
#define _BOOL_
typedef enum {
true, false
} bool;
#endif // _BOOL_
/* ============================= STACK =============================== */
#ifndef _STACK_
#define _STACK_
typedef void (*stack_freefn)(void *elemAddr);
typedef struct {
size_t size; // number of element allowed
int ite; // point to the current last element
size_t elemSize; // size of each element (how many bytes)
void *elems; // stockage of elements
stack_freefn freefn; // free memory allocated for each element if necessary
} stack;
/* constructor */
void new_stack(stack *s, const size_t size, const size_t elemSize, stack_freefn freefn) {
s->size = size;
s->ite = 0;
s->elemSize = elemSize;
s->elems = malloc(size * elemSize);
s->freefn = freefn;
}
/* free memory */
void dispose_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
}
free(s->elems);
s->elems = NULL;
}
/* push one new element on the top */
void push_stack(stack *s, const void *value, const size_t elemSize) {
if (s->ite == s->size) {
s->size *= 2;
s->elems = realloc(s->elems, s->size * s->elemSize);
}
void *elemAddr = (char *)s->elems + s->elemSize * s->ite++;
memcpy(elemAddr, value, s->elemSize);
}
/* pop our the element on the top */
void pop_stack(stack *s, void *res) {
if (s->ite > 0) {
void *elemAddr = (char *)s->elems + ((s->ite - 1) * s->elemSize);
memcpy(res, elemAddr, s->elemSize);
s->ite--;
}
}
void clear_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
} else {
s->ite = 0;
}
}
size_t stack_size(stack *s) {
return s->ite;
}
#endif // _STACK_
/* ============================= VECTOR =============================== */
#ifndef _VECTOR_
#define _VECTOR_
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Frees up all the memory of the specified vector.
*/
void VectorDispose(vector *v) {
if (v->freefn != NULL) {
for (; v->elemNum > 0; v->elemNum--) {
void *elemAddr = (char *)v->elems + (v->elemNum - 1) * v->elemSize;
v->freefn(elemAddr);
}
}
free(v->elems);
v->elems = NULL;
}
/**
* Returns the logical length of the vector.
*/
int VectorLength(const vector *v) {
return v->elemNum;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
#endif // _VECTOR_
/* ============================= TOKENIZER =============================== */
/**
* Dump current string into vector as a new word.
* Strings are null-terminated.
*/
static void dumpstack(stack *s, vector *v) {
size_t len = stack_size(s);
char *word = (char *)malloc((len + 1) * sizeof(char)); // +1 for null-terminator
for (int i = len - 1; i >= 0; i--) {
pop_stack(s, word + i * sizeof(char));
}
word[len] = '\0';
VectorAppend(v, &word);
clear_stack(s);
}
static const size_t kTokenStackDefaultSize = 64;
static void tokenize(vector *words, char *stream) {
stack s;
new_stack(&s, kTokenStackDefaultSize, sizeof(char), NULL);
size_t len = strlen(stream);
bool begin = false;
char c;
for (int i = 0; i < len; i++) {
c = stream[i];
/* =============================== My printf() is here ============================== */
// printf("char c = [%c]\n", c);
/* =============================== My printf() is here ============================== */
if (isalpha(c) || isdigit(c)) {
if (begin == false) begin = true;
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
} else if (c == '-') {
if (begin == true) { // case: covid-19
push_stack(&s, &c, sizeof(char));
} else {
if (i < len - 1 && isdigit(stream[i + 1])) { // case: -9
begin = true;
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
} else if (c == '.' && begin == true) { // case: 3.14
if (isdigit(stream[i - 1])) {
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
dispose_stack(&s);
}
/* ============================= UNIT-TEST =============================== */
/**
* HashSetFreeFunction<char *>
*/
static void freestr(void *elemAddr) {
char *str = *(char **)elemAddr;
free(str);
}
/**
* HashSetCompareFunction<char *>
*/
static int compstr(const void *elemAddr1, const void *elemAddr2) {
char *str1 = *(char **)elemAddr1;
char *str2 = *(char **)elemAddr2;
return strcmp(str1, str2);
}
static void test_tokenize(void) {
printf("Testing Tokenizer.c::tokenize() ...\n");
char *sentence = "Covid-19: Top adviser warns France at 'emergency' virus moment - BBC News\nPi = 3.14\n-1 is negative.";
vector words;
VectorNew(&words, sizeof(char *), freestr, 256);
tokenize(&words, sentence);
char *musthave[] = {"covid-19", "top", "3.14", "-1"};
char *musthavenot[] = {"-", "1"};
assert(VectorLength(&words) == 16);
for (int i = 0; i < sizeof(musthave)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthave[i], compstr, 0, false) != -1);
}
for (int i = 0; i < sizeof(musthavenot)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthavenot[i], compstr, 0, false) == -1);
}
VectorDispose(&words);
printf("[ALL PASS]\n");
}
int main(void) {
test_tokenize();
}
I've got segmentation fault at first.
[1] 4685 segmentation fault testtokenizer
But when I add a printf() to debug, the segmentation fault was gone and the test passed. After comment out the printf, the function still works. I was so confused.
Just recall that before this test, I tested some memory dispose function, and perhaps had left some unfreed blocks in memory. Will that be the reason for fleeting segmentation fault? Thx bros.
UPDATE:
Now I can't even reproduce this bug myself. tokenizer.c above can pass the unit-test. I thought it might caused by makefile prerequisite rules. gcc didn't re-compile some object files when source code is changed.
Thanks #Steve Summit, you make it clear that unfreed memory will not cause segmentation fault.
Thanks #schwern for code review, it's really helpful.

But when I add a printf() to debug, the segmentation fault was gone and the test passed. After comment out the printf, the function still works. I was so confused.
They call it undefined behavior, because its behavior is undefined. Seemingly unrelated operations might nudge things just a bit to make the code "work" but they're only tangentially related to the problem.
I tested some memory dispose function, and perhaps had left some unfreed blocks in memory. Will that be the reason for fleeting segmentation fault?
No. It does mean the memory is unreferencable and "leaked". The memory will be freed to the operating system when the process exits.
The problem must lie elsewhere. Without seeing your whole program we can't say for sure, but two fishy things stand out.
You're defining a fixed sized stack, but you're pushing onto it an indeterminate number of times. Unless push_stack has protection against this, you will walk off your allocated memory.
You're storing references to variables on the stack. lower, c
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
Once lower goes out of scope it will automatically be freed and the memory reused. &lower is invalid once tokenize returns. This seems to be fine if your stack only lasts the length of the function, but it's worth noting.
And it's possible new_stack, push_stack, or dumpstack are doing something incorrect.

Related

Valgrind memcpy Invalid write of size 8 (uintptr_t *)

I have an issue with memcpy and valgrind, telling me about an Invalid write of size 8.
I got to the point of figuring out where the faulty code is, but I have no clue as to why it is faulty...
I'm aware that there are other questions regarding that, but they don't help me really.
The following is an excerpt of the most important bits of my approach on a somewhat "universal" stack, when my regular value would be of type uintptr_t.
Here are two defines that I used below:
// default stack batch size
#define STACK_BATCH_DEFAULT 8
// size of one value in the stack
#define STACK_SIZEOF_ONE sizeof(uintptr_t)
The structure of the stack is as follows:
typedef struct Stack
{
size_t count; // count of values in the stack
size_t size; // size of one value in bytes
size_t alloced; // allocated count
uintptr_t *value; // the values
int batch; // memory gets allocated in those batches
}
Stack;
I have an initialization function for the stack:
bool stack_init(Stack *stack, size_t size, int batch)
{
if(!stack) return false;
stack->batch = batch ? batch : STACK_BATCH_DEFAULT;
stack->size = size;
stack->count = 0;
stack->value = 0;
stack->alloced = 0;
return true;
}
Then the stack_push function, where valgrind throws the error Invalid write of size 8:
bool stack_push(Stack *stack, uintptr_t *value)
{
if(!stack || !value) return false;
// calculate required amount of elements
size_t required = stack->batch * (stack->count / stack->batch + 1);
// allocate more memory if we need to
if(required > stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
// set the value
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(stack->value + stack->size * stack->count, value, stack->size); // <--- valgrind throws the error here
}
else
{
stack->value[stack->count] = *value;
}
// increment count
stack->count++;
return true;
}
Then in my program I'm calling the functions as follows:
Stack stack = {0};
stack_init(&stack, sizeof(SomeStruct), 0);
/* ... */
SomeStruct push = { // this is a struct that is larger than STACK_SIZEOF_ONE
.int_a = 0,
.int_b = 0,
.int_c = 0,
.id = 0,
.pt = pointer_to_struct, // it is a pointer to some other struct that was allocated beforehand
};
stack_push(&stack, (uintptr_t *)&push);
And with universal I meant that I can also have a regular stack:
Stack stack = {0};
stack_init(&stack, sizeof(uintptr_t), 0);
/* ... */
uintptr_t a = 100;
stack_push(&stack, &a);
Also, I'm open to hear general tips and advices if there are any things that should/could be improved :)
Edit: Below is a runnable code.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
// default stack batch size
#define STACK_BATCH_DEFAULT 8
// size of one value in the stack
#define STACK_SIZEOF_ONE sizeof(uintptr_t)
#define TESTCOUNT 10
#define MAX_BUF 16
typedef struct Stack
{
size_t count; // count of values in the stack
size_t size; // size of one value in bytes
size_t alloced; // allocated count
uintptr_t *value; // the values
int batch; // memory gets allocated in those batches
}
Stack;
typedef struct SomeStruct
{
size_t a;
size_t b;
size_t c;
size_t id;
char *str;
}
SomeStruct;
bool stack_init(Stack *stack, size_t size, int batch)
{
if(!stack) return false;
stack->batch = batch ? batch : STACK_BATCH_DEFAULT;
stack->size = size;
stack->count = 0;
stack->value = 0;
stack->alloced = 0;
return true;
}
bool stack_push(Stack *stack, uintptr_t *value)
{
if(!stack || !value) return false;
// calculate required amount of elements
size_t required = stack->batch * (stack->count / stack->batch + 1);
// allocate more memory if we need to
if(required > stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
// set the value
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(stack->value + stack->size * stack->count, value, stack->size); // <--- valgrind throws the error here
}
else
{
stack->value[stack->count] = *value;
}
// increment count
stack->count++;
return true;
}
bool stack_pop(Stack *stack, uintptr_t *value)
{
if(!stack) return false;
if(!stack->count) return false;
// decrement count of elements
stack->count--;
// return the value if we have an address
if(value)
{
if(stack->size > STACK_SIZEOF_ONE)
{
memcpy(value, stack->value + stack->size * stack->count, stack->size);
}
else
{
*value = stack->value[stack->count];
}
}
int required = stack->batch * (stack->count / stack->batch + 1);
if(required < stack->alloced)
{
uintptr_t *tmp = realloc(stack->value, required * stack->size);
if(!tmp) return false;
stack->value = tmp;
stack->alloced = required;
}
if(!stack->value) return false;
return true;
}
int main(void)
{
// initialize variables
bool valid = false;
Stack default_stack = {0};
Stack some_stack = {0};
// initialize stacks
stack_init(&default_stack, sizeof(uintptr_t), 0);
stack_init(&some_stack, sizeof(SomeStruct), 0);
// test default case - push
printf("Testing the default case, pushing...\n");
for(int i = 0; i < TESTCOUNT; i++)
{
uintptr_t push = i;
valid = stack_push(&default_stack, &push);
if(!valid) return -1;
}
// ...now pop
printf("Testing the default case, popping...\n");
do
{
uintptr_t pop = 0;
valid = stack_pop(&default_stack, &pop);
if(valid) printf("%llu,", pop);
}
while(valid);
printf("\n");
// test some case - push
printf("Testing some case, pushing...\n");
for(int i = 0; i < TESTCOUNT; i++)
{
// generate the push struct
SomeStruct push = {
.a = i * 10,
.b = i * 100,
.c = i * 1000,
.id = i,
.str = 0,
};
// allocate a string
push.str = malloc(MAX_BUF + 1);
snprintf(push.str, MAX_BUF, "%d", i);
// push
valid = stack_push(&some_stack, (uintptr_t *)&push);
if(!valid) return -1;
}
// ...now pop
printf("Testing some case, popping...\n");
do
{
SomeStruct pop = {0};
valid = stack_pop(&some_stack, (uintptr_t *)&pop);
if(valid)
{
printf("a=%d,b=%d,c=%d,id=%d,str=%s\n", pop.a, pop.b, pop.c, pop.id, pop.str);
free(pop.str);
}
}
while(valid);
printf("\n");
/* leave out free functions for this example.... */
return 0;
}
After hours I figured it out :D The mistake happened because I very rarely do pointer arithmetic... In short, I was assuming that it would always calculate with a byte.
Let's take a look at the lines containing:
memcpy(stack->value + stack->size * stack->count, value, stack->size);
...and break it down, so it is more readable. And also, I'll even add a handy dandy comment in it:
size_t offset = stack->size * stack->count; // offset in bytes
void *dest = stack->value + offset;
void *src = value;
memcpy(dest, src, stack->size);
Now the pro C-programmer should instantly spot the problem. It is with the calculation of stack->value + offset, where it should add offset in bytes but it is not, because the stack->value is of type uintptr_t * and not of type uint8_t *.
So to fix it, I replaced it with this line:
void *dest = (uint8_t *)stack->value + offset;
And the code works.

Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?

The following code is trying to count word frequency in a document, by using hashset and vector.
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/** ==================================== VECTOR ======================================= */
typedef enum {
true, false
} bool;
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
/**
* Overwrites the element at the specified position.
*/
void VectorReplace(vector *v, const void *elemAddr, int position) {
assert(position >= 0 && position < v->elemNum);
void *posAddr = (char *)v->elems + position * v->elemSize;
/* free the memory of old element first */
if (v->freefn != NULL) v->freefn(posAddr);
memcpy(posAddr, elemAddr, v->elemSize);
}
/** ==================================== HASHSET ======================================= */
typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);
typedef struct {
int elemNum; //current element number
int bucketNum; //number of hash bucket
int elemSize; //how many byte each element has
vector *buckets; //array of vector
HashSetHashFunction hashfn;
HashSetCompareFunction compfn;
HashSetFreeFunction freefn;
} hashset;
void HashSetNew(hashset *h, int elemSize, int numBuckets,
HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
assert(h->buckets != NULL);
for (int i = 0; i < numBuckets; i++) {
vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
VectorNew(bucket, elemSize, freefn, 4);
}
h->bucketNum = numBuckets;
h->elemSize = elemSize;
h->elemNum = 0;
h->hashfn = hashfn;
h->compfn = comparefn;
h->freefn = freefn;
}
void HashSetEnter(hashset *h, const void *elemAddr) {
int hash = h->hashfn(elemAddr, h->bucketNum);
vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
// search in the hash set first
int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
if (pos != -1) {
// replace the old one if find a match
VectorReplace(bucket, elemAddr, pos);
} else {
// otherwise insert the new one
VectorAppend(bucket, elemAddr);
h->elemNum++;
}
}
/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */
/****************************************************************
*
* doc_freq is a key-value pair of [documentid, frequency]
* It's not supposed to be exposed to user or search engine.
* -----------------------------------------------------------
* It looks like:
* [1611742826915764000] [4 ]
* |-------------------| |-------|
* docid freq
***************************************************************/
typedef struct {
long docid;
int freq;
} doc_freq;
static void new_docfreq(doc_freq *df, long docid, int freq) {
df->docid = docid;
df->freq = freq;
}
/**
* HashSetHashFunction<doc_freq>
*/
static int hash_docfreq(const void *elemAddr, int numBuckets) {
doc_freq *df = (doc_freq *)elemAddr;
return (int)(df->docid % numBuckets);
}
/**
* HashSetCompareFunction<doc_freq>
*/
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
long id1 = ((doc_freq *)elemAddr1)->docid;
long id2 = ((doc_freq *)elemAddr2)->docid;
if (id1 < id2) {
return -1;
} else if (id1 > id2) {
return 1;
} else { // id1 == id2
return 0;
}
}
/**
* word_index is a index of a single word.
* ---------------------------------------
* A typical word_index looks like:
* [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
* |-----| |------------------------------|
* word freqs
*/
typedef struct {
char *word;
hashset *freqs; // hashset<doc_freq>
} word_index;
static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
size_t wordlen = strlen(word);
wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}
/**
* Mainly used to build a word_index.
*/
void add_docfreq(word_index *wi, const long docid, const int frequency) {
doc_freq df;
new_docfreq(&df, docid, frequency);
HashSetEnter(wi->freqs, &df);
}
/** ==================================== UNIT-TEST ======================================= */
int main(void) {
/* apple: [1611742826915764000, 5][1611742826915538000, 10] */
word_index *apple = (word_index *)malloc(sizeof(word_index));
new_wordindex(apple, "apple");
add_docfreq(apple, 1611742826915764000L, 5);
add_docfreq(apple, 1611742826915538000L, 10);
}
It gave me a segmentation fault:
[1] 84309 segmentation fault testindexer
lldb find the problem occured when hashset try to callback the given pointer of function hashfn. I don't quite understand what is EXC_BAD_ACCESS (code=EXC_I386_GPFLT) here. I have done several unit test on hashset before, the HashSetEnter() function worked well with hashfn. Another unit test was conducted on hash_docfreq() function, it can also calculate correctly the hash number. I'm a little bit confused. Anyone can help? Thanks!
Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
127 }
128
129 void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130 int hash = h->hashfn(elemAddr, h->bucketNum);
131 vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
132 // search in the hash set first
133 int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)
Running under gdb, after the fault, doing a tb command to get a stack traceback, we see:
#0 0x00000005004016e6 in ?? ()
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
#2 0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
frequency=5) at orig.c:266
#3 0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
150 int hash = h->hashfn(elemAddr, h->bucketNum);
You are segfaulting in HashSetEnter, at the line:
int hash = h->hashfn(elemAddr, h->bucketNum);
This is because h is not valid at this point.
Examinining the source, the place that sets the value that is ultimately invalid, it is set in new_wordindex.
In new_wordindex, you are saving [and returning] the address of h.
h is a function scoped variable here, so it is no longer valid after the function returns.
You have to use malloc for this. And, later, you need to be able to free this pointer during cleanup.
Here's the refactored code for the incorrect function.
Note that to show old/original code vs. new/corrected code, I'm using preprocessor conditionals:
#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif
The code under #if 0 can be elided/removed, leaving just the #else code.
static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
#else
hashset *h = malloc(sizeof(*h));
HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = h;
#endif
size_t wordlen = strlen(word);
wi->word = (char *) malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}

Manipulating structs with a void function in C

so I've been set a task of creating a faux string struct and implementing all the usual string functions on my faux string struct. I'm stuck on the tests of my strcat implementation called append, with the first test failing (segfault) being the 5th line. My function for creating new structs should be OK because it passed all the tests, but I've included it just incase.
I've already been able to successfully implement length, get, set and copy functions for my faux string structs.
The struct:
struct text {
int capacity;
char *content;
};
typedef struct text text;
My function for creating new structs:
text *newText(char *s) {
printf("new Text from %s\n", s);
int sizeNeeded = (strlen(s)+1);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
text *out = malloc(sizeGot);
char *c = malloc(sizeGot);
strcpy(c, s);
out->content = c;
out->capacity = (sizeGot);
printf("the capacity is %d\n", sizeGot);
return out;
free(c);
}
My append function:
void append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %d\n", t1->content, *t2->content);
int sizeNeeded = (t1->capacity + t2->capacity);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
char *stringy = calloc(sizeGot, 32);
stringy = strcat(t1->content, t2->content);
free(t1);
t1 = newText(stringy);
}
and finally the tests:
void testAppend() {
text *t = newText("car");
text *t2 = newText("pet");
append(t, t2);
assert(like(t, "carpet"));
assert(t->capacity == 24);
text *t3 = newText("789012345678901234");
append(t, t3);
assert(like(t, "carpet789012345678901234"));
assert(t->capacity == 48);
freeText(t);
freeText(t2);
freeText(t3);
}
You are allocating memory in the wrong way. You could fix this by using a flexible array member like this:
typedef struct {
int capacity;
char content[];
} text;
text *out = malloc(sizeof(text) + sizeof(something));
strcpy(out->content, str);
...
And obviously code such as this is nonsense:
return out;
free(c);
}
Enable compiler warnings and listen to them.
Och, some errors you have:
Inside text_new you allocate memory for text *out using text *out = malloc(sizeGot); when sizeGot = 24 is a constant value. You should allocate sizeof(*out) or sizeof(text) bytes of memory for it.
I don't know what for int sizeGot = 24; while (sizeNeeded > sizeGot) the loop inside text_new and append is for. I guess the intention is to do allocations in power of 24. Also it mostly looks like the same code is in both functions, it does look like code duplication, which is a bad thing.
Inside append You pass a pointer to t1, not a double pointer, so if you modify the t1 pointer itself the modification will not be visible outside of function scope. t1 = newText(stringy); is just pointless and leaks memory. You could void append(text **t1, text *t2) and then *t1 = newText(stringy). But you can use a way better approach using realloc - I would expect append to "append" the string, not to create a new object. So first resize the buffer using realloc then strcat(&t1->content[oldcapacity - 1], string_to_copy_into_t1).
int sizeNeeded = (t1->capacity + t2->capacity); is off. You allocate capacity in power of 24, which does not really interact with string length. You need to have strlen(t1->content) + strlen(t2->content) + 1 bytes for both strings and the null terminator.
Try this:
size_t text_newsize(size_t sizeNeeded)
{
// I think this is just `return 24 << (sizeNeeded / 24);`, but not sure
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot *= 2;
}
return sizeGot;
}
text *newText(char *s) {
printf("new Text from %s\n", s);
if (s == NULL) return NULL;
int sizeNeeded = strlen(s) + 1;
int sizeGot = text_newsize(sizeNeeded);
text *out = malloc(sizeof(*out));
if (out == NULL) {
return NULL;
}
out->content = malloc(sizeGot);
if (out->content == NULL) {
free(out);
return NULL;
}
strcpy(out->content, s);
out->capacity = sizeGot;
printf("the capacity is %d\n", sizeGot);
return out;
}
and this:
int append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %s\n", t1->content, t2->content);
int sizeNeeded = strlen(t1->content) + strlen(t2->content) + 1;
if (t1->capacity < sizeNeeded) {
// this could a text_resize(text*, size_t) function
int sizeGot = text_newsize(sizeNeeded);
void *tmp = realloc(t1->content, sizeGot);
if (tmp == NULL) return -ENOMEM;
t1->content = tmp;
t1->capacity = sizeGot;
}
strcat(t1->content, t2->content);
return 0;
}
Some remarks:
Try to handle errors in your library. If you have a function like void append(text *t1, text *t2) let it be int append(text *t1, text *t2) and return 0 on success and negative number on *alloc errors.
Store the size of everything using size_t type. It's defined in stddef.h and should be used to represent a size of an object. strlen returns size_t and sizeof also returns size_t.
I like to put everything inside a single "namespace", I do that by prepending the functions with a string like text_.
I got some free time and decided to implement your library. Below is the code with a simple text object storing strings, I use 24 magic number as allocation chunk size.
// text.h file
#ifndef TEXT_H_
#define TEXT_H_
#include <stddef.h>
#include <stdbool.h>
struct text;
typedef struct text text;
text *text_new(const char content[]);
void text_free(text *t);
int text_resize(text *t, size_t newsize);
int text_append(text *to, const text *from);
int text_append_mem(text *to, const void *from, size_t from_len);
const char *text_get(const text *t);
int text_append_str(text *to, const char *from);
char *text_get_nonconst(text *t);
size_t text_getCapacity(const text *t);
bool text_equal(const text *t1, const text *t2);
#endif // TEXT_H_
// text.c file
//#include "text.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
struct text {
size_t capacity;
char *content;
};
text *text_new(const char content[])
{
text * const t = malloc(sizeof(*t));
if (t == NULL) goto MALLOC_ERR;
const struct text zero = {
.capacity = 0,
.content = NULL,
};
*t = zero;
if (content != NULL) {
const int ret = text_append_str(t, content);
if (ret) {
goto TEXT_APPEND_ERR;
}
}
return t;
TEXT_APPEND_ERR:
free(t);
MALLOC_ERR:
return NULL;
}
void text_free(text *t)
{
assert(t != NULL);
free(t->content);
free(t);
}
int text_resize(text *t, size_t newcapacity)
{
// printf("%s %d -> %d\n", __func__, t->capacity, newcapacity);
// we resize in chunks
const size_t chunksize = 24;
// clap the capacity into multiple of 24
newcapacity = (newcapacity + chunksize - 1) / chunksize * chunksize;
void * const tmp = realloc(t->content, newcapacity);
if (tmp == NULL) return -ENOMEM;
t->content = tmp;
t->capacity = newcapacity;
return 0;
}
int text_append_mem(text *to, const void *from, size_t from_len)
{
if (to == NULL || from == NULL) return -EINVAL;
if (from_len == 0) return 0;
const size_t oldcapacity = to->capacity == 0 ? 0 : strlen(to->content);
const size_t newcapacity = oldcapacity + from_len + 1;
int ret = text_resize(to, newcapacity);
if (ret) return ret;
memcpy(&to->content[newcapacity - from_len - 1], from, from_len);
to->content[newcapacity - 1] = '\0';
return 0;
}
int text_append_str(text *to, const char *from)
{
if (to == NULL || from == NULL) return -EINVAL;
return text_append_mem(to, from, strlen(from));
}
int text_append(text *to, const text *from)
{
if (to == NULL || from == NULL) return -EINVAL;
if (text_getCapacity(from) == 0) return 0;
return text_append_str(to, text_get(from));
}
const char *text_get(const text *t)
{
return t->content;
}
const size_t text_strlen(const text *t)
{
return t->capacity == 0 ? 0 : strlen(t->content);
}
size_t text_getCapacity(const text *t)
{
return t->capacity;
}
bool text_equal_str(const text *t, const char *str)
{
assert(t != NULL);
if (str == NULL && t->capacity == 0) return true;
const size_t strlength = strlen(str);
const size_t t_strlen = text_strlen(t);
if (t_strlen != strlength) return false;
if (memcmp(text_get(t), str, strlength) != 0) return false;
return true;
}
// main.c file
#include <stdio.h>
int text_testAppend(void) {
text *t = text_new("car");
if (t == NULL) return -1;
text *t2 = text_new("pet");
if (t2 == NULL) return -1;
if (text_append(t, t2)) return -1;
assert(text_equal_str(t, "carpet"));
assert(text_getCapacity(t) == 24);
text *t3 = text_new("789012345678901234");
if (t3 == NULL) return -1;
if (text_append(t, t3)) return -1;
assert(text_equal_str(t, "carpet789012345678901234"));
assert(text_getCapacity(t) == 48);
text_free(t);
text_free(t2);
text_free(t3);
return 0;
}
int main()
{
text *t1 = text_new("abc");
text_append_str(t1, "def");
printf("%s\n", text_get(t1));
text_free(t1);
printf("text_testAppend = %d\n", text_testAppend());
return 0;
}

Reallocating/resizing Lua 5.1 userdata in C

How do you resize a Lua 5.1 userdata object in C during runtime?
I would like to change the size of the NumArray structure described in Roberto Ierusalimschy's book Programming in Lua, 2nd edition, pp. 260, from the Lua 5.1 console.
My NumArray userdata can either store unsigned chars or lua_Numbers. I played around with calling the unmodified luaM_realloc_ function defined in lmem.c, but finally any call to C's realloc
(via l_alloc) function just returns NULL, so I get a not enough memory error message.
Would somebody please help me ?
--- lapi.c -------------------------------------------------------------------------------------------------
LUA_API void *agn_resizeud (lua_State *L, void *block, size_t osize, size_t nsize) {
Udata *u = (Udata *)luaM_realloc_(L, block, osize + sizeof(Udata), nsize + sizeof(Udata));
u->uv.len = nsize;
if (u == NULL)
luaL_error(L, "Error in " LUA_QS ": failed to allocate memory.", "API/agn_resizeud");
return u;
}
--- numarray.c ---------------------------------------------------------------------------------------------
#define checkarray(L, n) (NumArray *)luaL_checkudata(L, n, "numarray")
[...]
/* Auxiliary C function to finally call luaM_realloc_ via the Lua C API */
void *reallocud (lua_State *L, void *block, size_t o, size_t n, size_t sizeofelem, size_t sizeofnumarray) {
if (n + 1 > MAX_SIZET/sizeofelem)
luaL_error(L, "Error in " LUA_QS ": memory allocation error: block too big.", "(reallocud)", n);
else
return agn_resizeud(L, block, sizeofnumarray + o*sizeofelem, sizeofnumarray + n*sizeofelem);
}
/* function to resize a NumArray */
static int numarray_resize (lua_State *L) {
size_t i;
global_State *g = G(L);
NumArray *a = checkarray(L, 1);
int n = luaL_checkinteger(L, 2);
if (n < 1)
luaL_error(L, "Error in " LUA_QS ": new size %d is non-positive.", "numarray.resize", n);
if (n == a->size) { /* do nothing and do not complain */
lua_pushinteger(L, a->size);
return 1;
}
if (n > a->size) { /* extend */
a = reallocud(L, a, a->size, n, a->isnumber ? sizeof(lua_Number) : sizeof(char), sizeof(NumArray));
if (a->isnumber) {
for (i=a->size; i < n; i++) a->data.n[i] = 0;
} else {
for (i=a->size; i < n; i++) a->data.c[i] = 0;
}
} else { /* reduce */
if (a->isnumber) {
for (i=a->size - 1; i > n - 1; i--) a->data.n[i] = 0;
} else {
for (i=a->size - 1; i > n - 1; i--) a->data.c[i] = 0;
}
a = reallocud(L, a, a->size, n, a->isnumber ? sizeof(lua_Number) : sizeof(char), sizeof(NumArray));
}
a->size = n;
lua_pushnumber(L, a->size);
return 1;
}
[...]
typedef struct NumArray {
size_t size; /* number of slots */
char isnumber;
union data {
lua_Number n[1]; /* pointer to the various lua_Number values */
unsigned char c[1];
} data;
} NumArray;
There is no need to mess with the internals of Lua.
But you need to change how the userdata is created: instead of allocating the struct and the data in the same memory block, allocate them separately, using lua_newuserdata to allocate the struct and malloc to allocate the data.
Then, to resize the data part, just update the size in the struct and realloc the data part.
you have:
static int numarray_resize (lua_State *L) {
[...]
if (n > a->size) { /* extend */
a = realloc(L, a, a->size, n, a->isnumber ? sizeof(lua_Number) : sizeof(char), sizeof(NumArray));
if (a->isnumber) {
[...]
But realloc takes 2 arguments, you want reallocud.
reallocing L with size a is likely to be a giant number and L may not have ever been allocated by the alloc family so there a couple reasons to get NULL unexpectedly.

linux kernel module memory checker

I'm developing a kernel memory checker to find memory leaks in kernel space.
I have two functions profile_vmalloc and profile_vfree, profile_vmalloc uses vmalloc to allocate memory and adds memory allocated info to a list, profile_vfree frees the allocated memory and removes that info from the list. How to replace vmalloc and vfree so that when I compile and run my module any kernel module allocating and freeing memory will use my functions ?
How do I use sysfs to create a read only file that will allow the user to see a summary of memory leaks?
This is what my code looks like so far
// Linux Kernel headers
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/config.h>
#include <linux/vmalloc.h>
#define FILE_NAME_LENGTH 256
#define vmalloc(size) profile_malloc (block_size, __FILE__, __LINE__)
#define vfree(mem_ref) profile_free(mem_ref)
struct _MEM_INFO
{
const void *address;
size_t size;
char file_name[FILE_NAME_LENGTH];
size_t length;
};
typedef struct _MEM_INFO MEM_INFO;
struct _MEM_LEAK {
MEM_INFO mem_info;
struct _MEM_LEAK * next;
};
typedef struct _MEM_LEAK MEM_LEAK;
#undef vmalloc
#undef vfree
static MEM_PROFILER_LIST * ptr_start = NULL;
static MEM_PROFILER_LIST * ptr_next = NULL;
/*
* adds allocated memory info. into the list
*
*/
void add(MEM_INFO alloc_info)
{
MEM_PROFILER_LIST * mem_leak_info = NULL;
mem_leak_info = (MEM_PROFILER_LIST *) malloc (sizeof(MEM_PROFILER_LIST));
mem_leak_info->mem_info.address = alloc_info.address;
mem_leak_info->mem_info.size = alloc_info.size;
strcpy(mem_leak_info->mem_info.file_name, alloc_info.file_name);
mem_leak_info->mem_info.line = alloc_info.line;
mem_leak_info->next = NULL;
if (ptr_start == NULL)
{
ptr_start = mem_leak_info;
ptr_next = ptr_start;
}
else {
ptr_next->next = mem_leak_info;
ptr_next = ptr_next->next;
}
}
/*
* remove memory info. from the list
*
*/
void erase(unsigned pos)
{
unsigned int i = 0;
MEM_PROFILER_LIST * alloc_info, * temp;
if(pos == 0)
{
MEM_PROFILER_LIST * temp = ptr_start;
ptr_start = ptr_start->next;
free(temp);
}
else
{
for(i = 0, alloc_info = ptr_start; i < pos;
alloc_info = alloc_info->next, ++i)
{
if(pos == i + 1)
{
temp = alloc_info->next;
alloc_info->next = temp->next;
free(temp);
break;
}
}
}
}
/*
* deletes all the elements from the list
*/
void clear()
{
MEM_PROFILER_LIST * temp = ptr_start;
MEM_PROFILER_LIST * alloc_info = ptr_start;
while(alloc_info != NULL)
{
alloc_info = alloc_info->next;
free(temp);
temp = alloc_info;
}
}
/*
* profile of vmalloc
*/
void * profile_vmalloc (unsigned int size, const char * file, unsigned int line)
{
void * ptr = vmalloc (size);
if (ptr != NULL)
{
add_mem_info(ptr, size, file, line);
}
return ptr;
}
/*
* profile of free
*/
void profile_free(void * mem_ref)
{
remove_mem_info(mem_ref);
free(mem_ref);
}
/*
* gets the allocated memory info and adds it to a list
*
*/
void add_mem_info (void * mem_ref, unsigned int size, const char * file, unsigned int line)
{
MEM_INFO mem_alloc_info;
/* fill up the structure with all info */
memset( &mem_alloc_info, 0, sizeof ( mem_alloc_info ) );
mem_alloc_info.address = mem_ref;
mem_alloc_info.size = size;
strncpy(mem_alloc_info.file_name, file, FILE_NAME_LENGTH);
mem_alloc_info.line = line;
/* add the above info to a list */
add(mem_alloc_info);
}
/*
* if the allocated memory info is part of the list, removes it
*
*/
void remove_mem_info (void * mem_ref)
{
unsigned int i;
MEM_PROFILER_LIST * leak_info = ptr_start;
/* check if allocate memory is in our list */
for(i = 0; leak_info != NULL; ++i, leak_info = leak_info->next)
{
if ( leak_info->mem_info.address == mem_ref )
{
erase ( i );
break;
}
}
}
/*
* writes a memory leak summary to a file
*/
void mem_leak_summary(void)
{
unsigned int i;
MEM_PROFILER_LIST * mem_output;
FILE * fp_write = fopen (SUMMARY_FILE, "wt");
char info[1024];
if(fp_write != NULL)
{
fwrite(info, (strlen(info) + 1) , 1, fp_write);
sprintf(info, "%s\n", "-----------------------------------");
fwrite(info, (strlen(info) + 1) , 1, fp_write);
for(mem_output= ptr_start; mem_output!= NULL; mem_output= mem_output->next)
{
sprintf(info, "address : %d\n", leak_info->mem_output.address);
fwrite(info, (strlen(info) + 1) , 1, fp_write);
sprintf(info, "size : %d bytes\n", leak_info->mem_output.size);
fwrite(info, (strlen(info) + 1) , 1, fp_write);
sprintf(info, "line : %d\n", leak_info->mem_output.line);
fwrite(info, (strlen(info) + 1) , 1, fp_write);
sprintf(info, "%s\n", "-----------------------------------");
fwrite(info, (strlen(info) + 1) , 1, fp_write);
}
}
clear();
}
static int __init profiler_init(void)
{
return 0;
}
static void __exit profiler_cleanup(void)
{
printk("profiler module uninstalled\n");
}
module_init(profiler_init);
module_exit(profiler_cleanup);
MODULE_LICENSE("GPL");
How to replace vmalloc and vfree so
that when I compile and run my module any kernel module allocating and
freeing memory will use my functions ?
Step 1. In the files that call vmalloc & vfree, define macros:
#define vmalloc(x) profile_vmalloc(x)
#define vfree(x) profile_vfree(x)
This will ensure that your functions are called.
Step 2: In your definition of the profile_* functions, do you accounting and call the actual vmalloc/vfree functions.
You are already doing the step 2, so you just need to take care of step 1.

Resources