Cutting a substring from a radix tree of strings - c

my problem is this: I want to search through a radix tree of strings, each having a unique number, until I find the one with the number given as function parameter. While I search the tree recursively I need to update the string (concatenate current string with the new one as I go down and cut off the suffix as I get back from recurrence). Each tree node has a pointer to its child and its brother. The function is as follows and it's not working for some reason.
void prev(struct tree *t, int w, int start, int end) {
char *newWord = "";
bool f = false;
bool *found = &f;
void prevRec(struct tNode *t,
int w, int start, int end, char *soFar) {
if (t != NULL) {
char *updatedWord = malloc(strlen(soFar) + strlen(t->word) + 1);
strcpy(updatedWord,soFar);
strcat(updatedWord,t->word);
int length = strlen(t->word);
if (t->count == w) {
*found = true;
if ((start > -1) && (start <= end)) {
newWord = updatedWord;
} else newWord = "";
} else {
if (!*found) {
prevRec(t->child,w,start,end,updatedWord);
char *sub = substring(updatedWord, 0, strlen(updatedWord) - length);
free(updatedWord);
updatedWord = sub;
prevRec(t->brother,w,start,end,updatedWord);
sub = substring(updatedWord, 0, strlen(updatedWord) - length);
free(updatedWord);
updatedWord = sub;
}
}
}
} //prevRec
prevRec(t->root->child,w,start,end,newWord);
if (strlen(newWord) < 1 || end > strlen(newWord)) {
printf("ignored\n");
globalFound = 1;
} else {
char *tmp = substring(newWord,start,end - start + 1);
insert(t,tmp);
free(tmp);
}
}

Related

How do I debug a Segmentation fault when I nothing is wrong before the fault or after?

The particular problem I have is that in my main function, I have added a print statement before and after I call the "bad" function. It always shows the before statement, but never the after statement. I also added a print statement to the end of the "bad" function, and I can see that it runs properly to the very last line of the "bad" function, so it should return normally. After the functions last print and before the main function print, I get the segfault. Any ideas? Here is the code:
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100) {
scanf("%[^\n]", myItem);
i++;
if (myItem == EOF) {
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
//printf("string read in from user typing: %s\n", myItem);
printf("i = %d\n", i);
emailFilter(myItem);
printf("done with email filter in main\n");
//printf("item from this pass is:%s\n\n", myItem);
}
return 0;
}
and the "bad" function:
void emailFilter(char* mySubject)
{
printf(" Just entered the emailFilter() .\n");
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
printf("Leaving emailfilter()...\n");
}
and running this code provides me:
$:
i = 1
Just entered the emailFilter() .
C, Meeting ,01/12/2019,15:30,NEB202
Leaving emailfilter()...
done with email filter in main
i = 2
Just entered the emailFilter() .
Leaving emailfilter()...
Segmentation fault
This shows that I always make it through the function, but still don't return properly.
Here is my entire code to reproduce the error.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct node {
char * event_data[5];
struct node * next;
};
struct node *head = NULL;
struct node *current = NULL;
char* earliest = NULL;
char* substring (char* orig, char* dest, int offset, int len)
{
int input_len = strlen (orig);
if (offset + len > input_len)
{
return NULL;
}
strncpy (dest, orig + offset, len);
//add null char \0 to end
char * term = "\0";
strncpy (dest + len, term, 1);
return dest;
}
char * firstItem(char* shortenedSubject)
{
int i = 0;
int currentLength = 0;
int currentCharIndex = 0;
int eventIndex = 0;
char * toReturn = (char*)malloc(100);
while ((shortenedSubject[currentLength] != '\0') && (shortenedSubject[currentLength] != ',') )//50 is my safety num to make sure it exits eventually
{
currentLength++;
}
if (shortenedSubject[currentLength] == ',') {
substring(shortenedSubject, toReturn, 0, currentLength);
}
return toReturn;
}
char parseSubject(char* subject,char * eventDataToReturn[5]) //returns "what type of command called, or none"
{
char toReturn;
char * shortenedSubject = (char*)malloc(100);
substring(subject,shortenedSubject,9,strlen(subject)-9);//put substring into tempString
int currentCharIndex = 0;// well feed into index of substring()
int eventIndex = 0; //lets us know which event to fill in
int currentLength = 0;//lets us know length of current event
int i = 0; //which char in temp string were alooking at
char * action = firstItem(shortenedSubject);
if (strlen(action) == 1)
{
if ( action[0] == 'C')
{
toReturn = 'C';
}
else if (action[0] == 'X')
{
toReturn = 'X';
}
else if (action[0] == 'D')
{
toReturn = 'D';
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
char* debug2;
while ((shortenedSubject[i] != '\0') && (i <= 50) )//50 is my safety num to make sure it exits eventually
{
char debugvar = shortenedSubject[i];
currentLength++;
if (shortenedSubject[i] == ',')
{
//eventDataToReturn[i] = substring2(shortenedSubject,currentCharIndex,currentLength);
substring(shortenedSubject,eventDataToReturn[eventIndex],currentCharIndex,currentLength-1);
debug2 = eventDataToReturn[eventIndex];
currentCharIndex= i +1;
eventIndex++;
currentLength = 0;
//i++;
}
i++;
}
substring(shortenedSubject,eventDataToReturn[4],currentCharIndex,currentLength);
return toReturn;
}
void printEventData(char* my_event_data[])
{
//printf("\nPrinting event data...\n");
for (int i = 1; i < 4; i++)
{
printf("%s,",my_event_data[i]);
}
//print last entry, no comma
printf("%s",my_event_data[4]);
}
void printEventsInorder()
{
struct node * ptr = head;
while (ptr != NULL)//if not empty, check each one and add when ready
{
printEventData(ptr->event_data);
printf("\n");
ptr = ptr->next;
}
}
void insertFront(char* my_event_data[5])
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 0; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
head = link;
}
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45,turn timeL into timeL1 and timeL2, and time R1 and timeR2
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,3,2);//extract first half of time
day_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,6,4);//extract first half of time
year_L = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,0,2);//extract first half of time
month_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,3,2);//extract first half of time
day_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,6,4);//extract first half of time
year_R = atoi(char_holder); //convert first half of time to int
int time_L1,time_L2,time_R1,time_R2;
substring(event_data_L[3],char_holder,0,2);//extract first half of time
time_L1 = atoi(char_holder); //convert first half of time to int
substring(event_data_L[3],char_holder,3,2);//extract second half of time
time_L2 = atoi(char_holder); //convert second half of time to int
substring(event_data_R[3],char_holder,0,2);
time_R1 = atoi(char_holder);
substring(event_data_R[3],char_holder,3,2);
time_R2 = atoi(char_holder);
//convert to 2 ints, first compare left ints, then right ints
if(year_L < year_R)
{
return 1;
}
else if ( year_L == year_R)
{
if (month_L < month_L)
{
return 1;
}
else if (month_L == month_L)
{
if (day_L < day_R)
{
return 1;
}
else if (day_L == day_R)
{
if (time_L1 < time_R1)
{
return 1;
}
else if (time_L1 == time_R1)
{
if (time_L2 < time_R2)
{
return 1;
}
else if (time_L2 == time_R2)
{
return 2;
}
else//else, time is greater
{
return 3;
}
}
else //left time is greater, return 3
{
return 3;
}
}
else
{
return 3;
}
}
else
{
return 3;
}
}
else //its left is greater than right so return 3 to show that
{
return 3;
}
}
void create(char* my_event_data[5]) {
//print required sentence
char * debugvar2 = my_event_data[3];
if (head == NULL)//if empty calendar, just add it
{
insertFront(my_event_data);
//printf("EARLIEST bc empty list, \n");
printf("C, ");
printEventData(my_event_data);
printf("\n");
return;
}
else
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 1; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
struct node *ptr = head;
struct node *prev = NULL;
if (ptr->next == NULL) //if this is the last node to check against
{
if (isEarlier(my_event_data, ptr->event_data) == 1)
{ //check against it
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL) //if this is first item in linked list...
{
link->next = head; //assign something before head
head = link; //move head to that thing
}
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
return;
}
else //else is equal to or later, so tack it on after:
{
ptr->next = link;
}
}
else
{
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (isEarlier(my_event_data,ptr->event_data) == 1)
{
if (head == ptr) //if earlier than head... insert and print
{
//printf("earlier than head!");
printf("C, ");
printEventData(my_event_data);
printf("\n");
link->next = ptr;
head = link;
}
else //if earlier than non head, insert, but dont print
{
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
}
return;
}
else
{
prev = ptr;
ptr = ptr->next;
}
}
if (isEarlier(my_event_data,ptr->event_data) == 1) //while ptr-> is null now
{
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr->next;
return;
}
else
{
prev = link;
link = ptr;
}
}
return;
}
//if it gets here, it is the latest meeting, tack it on the end
//prev->ptr = link;
}
void change(char* my_event_data[5]) {
//create a link
struct node *ptr = head;
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (*ptr->event_data[1] == *my_event_data[1])
{
for (int i = 1; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
ptr = ptr->next;
}
if (*ptr->event_data[1] == *my_event_data[1]) //check final node
{
for (int i = 0; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
printf("event to change not found");
return;
//if it gets here, nothing matched the title to change
}
void delete(char* my_event_data[5])
{
struct node *ptr = head;
struct node *prev = NULL;
while (ptr != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if ( strcmp( ptr->event_data[1], my_event_data[1] ) == 0) // if title matches, delete it
{
if (prev != NULL)
{
prev->next = ptr->next;
}
if (ptr == head)
{
head = ptr->next;
}
free(ptr);
printf("D, ");
printEventData(my_event_data);
printf("\n");
return;
}
prev = ptr;
ptr = ptr->next;
}
}
void emailFilter(char* mySubject)
{
if (strlen(mySubject) < 9)
{
return;
}
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
}
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100)
{
scanf("%[^\n]", myItem);
i++;
if ( myItem == EOF )
{
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
printf("i = %d\n", i);
emailFilter(myItem);
}
return 0;
}
Also please note that this error happens when I use a txt file as STDIN via the ">" symbol on the command line. Here is the file I use:
Subject: C,Meeting ,01/12/2019,15:30,NEB202
Subject: C,Meeting ,01/12/2019,16:30,NEB202
Subject: C,Meeting ,01/12/2019,11:30,NEB202
Having tried to find something to contribute, there's this:
The code is dealing with the date/time. Below is the declaration and use of a "destination buffer" into which is copied fragments of the string:
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45 // ....
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
//...
Notice that char_holder isn't pointing anywhere in particular. UB...
While it represents a beginner's approach, it is actually painful to see code like this. Below is a more concise version of isEarlier() (untested.)
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
memcpy( l + 0, ed_L[2][6],4 ); // YYYY
memcpy( l + 4, ed_L[2][0],2 ); // MM
memcpy( l + 6, ed_L[2][3],2 ); // DD
memcpy( l + 8, ed_L[3][0],2 ); // hh
memcpy( l + 10, ed_L[3][3],2 ); // mm
memcpy( r + 0, ed_R[2][6],4 ); // YYYY
memcpy( r + 4, ed_R[2][0],2 ); // MM
memcpy( r + 6, ed_R[2][3],2 ); // DD
memcpy( r + 8, ed_R[3][0],2 ); // hh
memcpy( r + 10, ed_R[3][3],2 ); // mm
int res = memcmp( l, r, 12 );
return res < 0 ? 1 : res == 0 ? 2 : 3;
}
Note: The sample data provided indicates 2 digits for both month and day, and is ambiguous as to "mm/dd" or "dd/mm" format. The offset values used here come from the OP code.
One way to reduce the possibility of bugs in code is to both write less but more capable code, if you can, and to perform "unit testing" on code that you write. Focus on one function at a time and do not use global variables. Another is to become as familiar as you can with the proven capabilities of functions in the standard library.
EDIT: Looking at this answer, it occurs to me that this function should, itself, be refactored:
void reformatDateTime( char *d, char *s[5] ) {
memcpy( d + 0, s[2][6],4 ); // YYYY
memcpy( d + 4, s[2][0],2 ); // MM
memcpy( d + 6, s[2][3],2 ); // DD
memcpy( d + 8, s[3][0],2 ); // hh
memcpy( d + 10, s[3][3],2 ); // mm
}
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
reformatDateTime( l, ed_L );
reformatDateTime( r, ed_R );
int res = memcmp( l, r, 12 );
res = res < 0 ? 1 : res == 0 ? 2 : 3;
printf( "isEarlier() '%.12s' vs '%.12s' result %d\n", l, r, res ); // debug
return res;
}
and I can see that it runs properly
You contradict yourself, you say that it sometimes or always seg faults. It's rather unlikely that some C code would crash at the point of leaving a function, since there's no "RAII" and in this case no multi-threading either. A stack corruption could have destroyed the function return address however.
The best way of debugging is not so much about focusing on the symptom, as it trying to pinpoint where something goes wrong. You've already done as much, so that's most of the debug effort already done.
One way of debugging from there is step #1: stare at the function for one minute. After around 5 seconds: event_holder[i] = ((char*)malloc(100 * sizeof(char*))); Well that's an obvious bug. After some 30 seconds more: wait, who cleans up this memory? The delete function perhaps but why is it then executed conditionally? (Turns out delete doesn't free() memory though.) The function leaks memory, another bug. Then after one full minute we realize that parseSubject does a whole lot of things and we'll need to dig through that one in detail if we want to weed out every possible chance of bugs. And it will take a lot more time to get to the bottom of that. But we already found 2 blatant bugs just by glancing at the code.
Fix the bugs, try again, is the problem gone?
At another glance there's a bug in main(), myItem == EOF is senseless and shouldn't compile. This suggests that you are compiling with way too lax warning levels or ignoring warnings, either is a very bad thing. What compiler options are recommended for beginners learning C?
We might note that extensive use of "magic numbers" make the code hard to read. It is also usually a sure sign of brittle code. Where do these 5 and 9 and so on come from? Use named constants. We will also fairly quickly note the lack of const correctness in something that's only supposed to parse, not change data. And so on.
I didn't read the code in detail, but the overall lack of following best practices and 3 bugs found just by brief glances suggests there's a whole lot more bugs in there.

How can I access the tree in my printpostroder function?

This is my tree struct :
typedef struct quad
{
struct quad *child[4];
char *names;
} quad;
and I need to build it then print it in postorder
but I can't access the memory for the tree in my printpostorder function :
void printpostorder(quad * tree)
{
if (tree->names[0] == 'G') {
printpostorder(tree->child[0]);
printpostorder(tree->child[1]);
printpostorder(tree->child[2]);
printpostorder(tree->child[3]);
printf("%s", tree->names);
}
else {
printf("%s", tree->names);
}
}
I can access it before calling this function in the main function.
int main(void){
int n = 0;
int size;
quad * t;
char * str1 = (char *)malloc(MAX * sizeof(char));
printf("Enter name: ");
scanf("%s", str1);
size = strlen(str1);
t = build_preorder_tree(str1,t,&n,size);
printpostorder(t);
}
here is the build tree function, basically we have to build from preorder, then print out the post order.
quad* build_preorder_tree(char *s_r, quad * tree, int * index_ptr,int size){
char c;
int s = 0;
int index = *index_ptr;
c = s_r[index];
char d = ']';
char * ptr = (char *)malloc(MAX * sizeof(char));
char * ptr1;
if(index == size){
return;
}
tree = malloc(sizeof(quad*));
tree -> names = (char *)malloc(MAX * sizeof(char));
if(c == 'G') {
tree->names = "G";
(*index_ptr)++;
tree->child[SW] = build_preorder_tree(s_r, tree->child[SW],index_ptr,size);
tree->child[SE] = build_preorder_tree(s_r, tree->child[SE],index_ptr,size);
tree->child[NW] = build_preorder_tree(s_r, tree->child[NW],index_ptr,size);
tree->child[NE] = build_preorder_tree(s_r, tree->child[NE],index_ptr,size);
}
if(c == 'W') {
tree->names = "W";
(*index_ptr)++;
}
if(c == 'B') {
strcpy(ptr,s_r);
ptr1 = strtok(ptr+index,"]");
strncat(ptr1,&d,1);
s = strlen(ptr1);
(*index_ptr)= (*index_ptr) + s;
tree->names = ptr1;
}
return tree;
}
And when I call the printpostorder(t), a seg fault occurred at first call of
if (tree->names[0] == 'G')
I tried to access the tree before the printpostorder is call, and I was able to access every element of the tree

Manipulating structs with a void function in C

so I've been set a task of creating a faux string struct and implementing all the usual string functions on my faux string struct. I'm stuck on the tests of my strcat implementation called append, with the first test failing (segfault) being the 5th line. My function for creating new structs should be OK because it passed all the tests, but I've included it just incase.
I've already been able to successfully implement length, get, set and copy functions for my faux string structs.
The struct:
struct text {
int capacity;
char *content;
};
typedef struct text text;
My function for creating new structs:
text *newText(char *s) {
printf("new Text from %s\n", s);
int sizeNeeded = (strlen(s)+1);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
text *out = malloc(sizeGot);
char *c = malloc(sizeGot);
strcpy(c, s);
out->content = c;
out->capacity = (sizeGot);
printf("the capacity is %d\n", sizeGot);
return out;
free(c);
}
My append function:
void append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %d\n", t1->content, *t2->content);
int sizeNeeded = (t1->capacity + t2->capacity);
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot = sizeGot * 2;
}
char *stringy = calloc(sizeGot, 32);
stringy = strcat(t1->content, t2->content);
free(t1);
t1 = newText(stringy);
}
and finally the tests:
void testAppend() {
text *t = newText("car");
text *t2 = newText("pet");
append(t, t2);
assert(like(t, "carpet"));
assert(t->capacity == 24);
text *t3 = newText("789012345678901234");
append(t, t3);
assert(like(t, "carpet789012345678901234"));
assert(t->capacity == 48);
freeText(t);
freeText(t2);
freeText(t3);
}
You are allocating memory in the wrong way. You could fix this by using a flexible array member like this:
typedef struct {
int capacity;
char content[];
} text;
text *out = malloc(sizeof(text) + sizeof(something));
strcpy(out->content, str);
...
And obviously code such as this is nonsense:
return out;
free(c);
}
Enable compiler warnings and listen to them.
Och, some errors you have:
Inside text_new you allocate memory for text *out using text *out = malloc(sizeGot); when sizeGot = 24 is a constant value. You should allocate sizeof(*out) or sizeof(text) bytes of memory for it.
I don't know what for int sizeGot = 24; while (sizeNeeded > sizeGot) the loop inside text_new and append is for. I guess the intention is to do allocations in power of 24. Also it mostly looks like the same code is in both functions, it does look like code duplication, which is a bad thing.
Inside append You pass a pointer to t1, not a double pointer, so if you modify the t1 pointer itself the modification will not be visible outside of function scope. t1 = newText(stringy); is just pointless and leaks memory. You could void append(text **t1, text *t2) and then *t1 = newText(stringy). But you can use a way better approach using realloc - I would expect append to "append" the string, not to create a new object. So first resize the buffer using realloc then strcat(&t1->content[oldcapacity - 1], string_to_copy_into_t1).
int sizeNeeded = (t1->capacity + t2->capacity); is off. You allocate capacity in power of 24, which does not really interact with string length. You need to have strlen(t1->content) + strlen(t2->content) + 1 bytes for both strings and the null terminator.
Try this:
size_t text_newsize(size_t sizeNeeded)
{
// I think this is just `return 24 << (sizeNeeded / 24);`, but not sure
int sizeGot = 24;
while (sizeNeeded > sizeGot) {
sizeGot *= 2;
}
return sizeGot;
}
text *newText(char *s) {
printf("new Text from %s\n", s);
if (s == NULL) return NULL;
int sizeNeeded = strlen(s) + 1;
int sizeGot = text_newsize(sizeNeeded);
text *out = malloc(sizeof(*out));
if (out == NULL) {
return NULL;
}
out->content = malloc(sizeGot);
if (out->content == NULL) {
free(out);
return NULL;
}
strcpy(out->content, s);
out->capacity = sizeGot;
printf("the capacity is %d\n", sizeGot);
return out;
}
and this:
int append(text *t1, text *t2) {
printf("t1 content is %s, t2 content is %s\n", t1->content, t2->content);
int sizeNeeded = strlen(t1->content) + strlen(t2->content) + 1;
if (t1->capacity < sizeNeeded) {
// this could a text_resize(text*, size_t) function
int sizeGot = text_newsize(sizeNeeded);
void *tmp = realloc(t1->content, sizeGot);
if (tmp == NULL) return -ENOMEM;
t1->content = tmp;
t1->capacity = sizeGot;
}
strcat(t1->content, t2->content);
return 0;
}
Some remarks:
Try to handle errors in your library. If you have a function like void append(text *t1, text *t2) let it be int append(text *t1, text *t2) and return 0 on success and negative number on *alloc errors.
Store the size of everything using size_t type. It's defined in stddef.h and should be used to represent a size of an object. strlen returns size_t and sizeof also returns size_t.
I like to put everything inside a single "namespace", I do that by prepending the functions with a string like text_.
I got some free time and decided to implement your library. Below is the code with a simple text object storing strings, I use 24 magic number as allocation chunk size.
// text.h file
#ifndef TEXT_H_
#define TEXT_H_
#include <stddef.h>
#include <stdbool.h>
struct text;
typedef struct text text;
text *text_new(const char content[]);
void text_free(text *t);
int text_resize(text *t, size_t newsize);
int text_append(text *to, const text *from);
int text_append_mem(text *to, const void *from, size_t from_len);
const char *text_get(const text *t);
int text_append_str(text *to, const char *from);
char *text_get_nonconst(text *t);
size_t text_getCapacity(const text *t);
bool text_equal(const text *t1, const text *t2);
#endif // TEXT_H_
// text.c file
//#include "text.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
struct text {
size_t capacity;
char *content;
};
text *text_new(const char content[])
{
text * const t = malloc(sizeof(*t));
if (t == NULL) goto MALLOC_ERR;
const struct text zero = {
.capacity = 0,
.content = NULL,
};
*t = zero;
if (content != NULL) {
const int ret = text_append_str(t, content);
if (ret) {
goto TEXT_APPEND_ERR;
}
}
return t;
TEXT_APPEND_ERR:
free(t);
MALLOC_ERR:
return NULL;
}
void text_free(text *t)
{
assert(t != NULL);
free(t->content);
free(t);
}
int text_resize(text *t, size_t newcapacity)
{
// printf("%s %d -> %d\n", __func__, t->capacity, newcapacity);
// we resize in chunks
const size_t chunksize = 24;
// clap the capacity into multiple of 24
newcapacity = (newcapacity + chunksize - 1) / chunksize * chunksize;
void * const tmp = realloc(t->content, newcapacity);
if (tmp == NULL) return -ENOMEM;
t->content = tmp;
t->capacity = newcapacity;
return 0;
}
int text_append_mem(text *to, const void *from, size_t from_len)
{
if (to == NULL || from == NULL) return -EINVAL;
if (from_len == 0) return 0;
const size_t oldcapacity = to->capacity == 0 ? 0 : strlen(to->content);
const size_t newcapacity = oldcapacity + from_len + 1;
int ret = text_resize(to, newcapacity);
if (ret) return ret;
memcpy(&to->content[newcapacity - from_len - 1], from, from_len);
to->content[newcapacity - 1] = '\0';
return 0;
}
int text_append_str(text *to, const char *from)
{
if (to == NULL || from == NULL) return -EINVAL;
return text_append_mem(to, from, strlen(from));
}
int text_append(text *to, const text *from)
{
if (to == NULL || from == NULL) return -EINVAL;
if (text_getCapacity(from) == 0) return 0;
return text_append_str(to, text_get(from));
}
const char *text_get(const text *t)
{
return t->content;
}
const size_t text_strlen(const text *t)
{
return t->capacity == 0 ? 0 : strlen(t->content);
}
size_t text_getCapacity(const text *t)
{
return t->capacity;
}
bool text_equal_str(const text *t, const char *str)
{
assert(t != NULL);
if (str == NULL && t->capacity == 0) return true;
const size_t strlength = strlen(str);
const size_t t_strlen = text_strlen(t);
if (t_strlen != strlength) return false;
if (memcmp(text_get(t), str, strlength) != 0) return false;
return true;
}
// main.c file
#include <stdio.h>
int text_testAppend(void) {
text *t = text_new("car");
if (t == NULL) return -1;
text *t2 = text_new("pet");
if (t2 == NULL) return -1;
if (text_append(t, t2)) return -1;
assert(text_equal_str(t, "carpet"));
assert(text_getCapacity(t) == 24);
text *t3 = text_new("789012345678901234");
if (t3 == NULL) return -1;
if (text_append(t, t3)) return -1;
assert(text_equal_str(t, "carpet789012345678901234"));
assert(text_getCapacity(t) == 48);
text_free(t);
text_free(t2);
text_free(t3);
return 0;
}
int main()
{
text *t1 = text_new("abc");
text_append_str(t1, "def");
printf("%s\n", text_get(t1));
text_free(t1);
printf("text_testAppend = %d\n", text_testAppend());
return 0;
}

Duplicate nodes when removing from kdtree

I am writing an algorithm that requires me to search nearest neighbors of points. I found the kdtree library from this post (Using Google's C KD Tree Library) but it does not have a function to delete individual nodes from the tree. So I started to implement my own using
www (dot) geeksforgeeks.org/k-dimensional-tree-set-3-delete/
as a template. It all runs through but unfortunately sometimes nodes get duplicated.
My test case is the following:
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include <errno.h>
#include <string.h>
#include <stdarg.h>
#include "kdtree.h"
/* (hopefully) platform independent directory creation */
#if defined(_WIN32) || defined(WIN32) /* this should be defined under windows, regardless of 64 or 32 bit*/
#include <direct.h>
#include <sys/stat.h>
#define GetWorkingDir _getcwd
#define MakeDir(str) _mkdir(str)
#else /* unix based system */
#include <unistd.h>
#include <sys/stat.h>
#define GetWorkingDir getcwd
#define MakeDir(str) mkdir(str, 0777)
#endif
#ifndef MAX_PATH
#define MAX_PATH 260
#endif
void GetLogDir(char* strPath, int nBufSize)
{
if(GetWorkingDir(strPath, nBufSize))
{
strncat(strPath, "/log/", 5);
MakeDir(strPath);
}
else
{
fprintf(stderr, "Could not get working directory");
exit(ENOENT);
}
}
FILE* GetOpenFileHandle(const char* strFilenamePlusPath, const char* strOpenMode)
{
if(strOpenMode == NULL) // too bad we dont have default arguments in C :(
{
strOpenMode = "a+";
}
return(fopen(strFilenamePlusPath, strOpenMode));
}
int CloseFile(FILE* pFile)
{
if(pFile != NULL)
{
fprintf(pFile, "\r\n"); // append a new line before closing!
return(fclose(pFile));
}
fprintf(stderr, "Invalid file handle");
exit(EFAULT);
}
void NodeLabelToFile(FILE* pFile, kdnode* node, const char* strName)
{
fprintf(pFile, "%s [label=\"(%.3f, %.3f)\"] \n", strName, node->pos[0], node->pos[1]);
}
char* NodeToString(kdnode* node, int* num)
{
char* strName = (char*) malloc(MAX_PATH);
if(*num == 0)
{
sprintf(strName, "%s","root");
}
else
{
sprintf(strName, "node%d", *num);
}
return strName;
}
void NodesToFile(FILE* pFile, kdnode* node, const char* strParentname, int* num)
{
if(node && pFile)
{
char* strLeft = NULL;
char* strRight = NULL;
if(node->left)
{
(*num)++;
strLeft = NodeToString(node->left, num);
NodeLabelToFile(pFile, node->left, strLeft);
fprintf(pFile, "%s -> %s \n", strParentname, strLeft);
}
if(node->right)
{
(*num)++;
strRight = NodeToString(node->right, num); // name of the current node
NodeLabelToFile(pFile, node->right, strRight);
fprintf(pFile, "%s -> %s \n", strParentname, strRight);
}
if(strLeft)
{
NodesToFile(pFile, node->left, strLeft, num);
free(strLeft);
}
if(strRight)
{
// (*num)++;
NodesToFile(pFile, node->right, strRight, num);
free(strRight);
}
}
}
FILE* MakeOpenLogFile(const char* strFilename, const char* strOpenMode)
{
if(strOpenMode == NULL)
{
strOpenMode = "a+";
}
char* strFilenamePlusPath = (char*) malloc(MAX_PATH);
GetLogDir(strFilenamePlusPath, MAX_PATH);
strncat(strFilenamePlusPath, strFilename, strlen(strFilename));
FILE* pFile = GetOpenFileHandle(strFilenamePlusPath, strOpenMode);
free(strFilenamePlusPath);
return(pFile);
}
void KDTreeToDotFile(kdtree* Tree, const char* strFilename)
{
if(Tree)
{
FILE* pFile = MakeOpenLogFile(strFilename, "w");
fprintf(pFile, "%s", "digraph d { \n"); // print opening statement for the graph in dot language
// traverse the tree and print the nodes
int* num = (int*) malloc(sizeof(int)); // make this a unique location to make sure numbers can't occur twice
*num = 0;
char* strRoot = NodeToString(Tree->root, num);
NodeLabelToFile(pFile, Tree->root, strRoot);
NodesToFile(pFile, Tree->root, "root", num);
if(strRoot)
{
free(strRoot);
}
free(num);
fprintf(pFile,"%s", "}"); // close the digraph environment
CloseFile(pFile);
}
}
int main(int argc, const char * argv[])
{
int numel = 20;
int toRemove = 19;
double dMax = 3000;
int nNumDim = 2;
printf("init rng");
srand(1234); // seed the rng // srand((unsigned) time(&t));
printf("creating kdtree");
kdtree* TreeRoot = kd_create(nNumDim); // construct the kd tree for the nearest neighbor search
kd_data_destructor(TreeRoot, free); // set free as data destructor
double* pos = (double*) malloc(nNumDim * numel * sizeof(double));
int retval;
for (int ii = 0; ii < numel; ii++)
{
pos[nNumDim * ii] = floor((double)rand()/(double)(RAND_MAX/dMax));
pos[nNumDim * ii + 1] = floor((double)rand()/(double)(RAND_MAX/dMax));
int* randint = (int*) malloc(sizeof(int));
*randint = rand();
retval = kd_insert2(TreeRoot,
pos[nNumDim * ii],
pos[nNumDim * ii + 1],
randint, sizeof(int));
assert(retval == 0);
}
KDTreeToDotFile(TreeRoot, "original.dot");
double* dRemovePos = (double*) malloc(sizeof(double)*nNumDim);
for (int ii = 0; ii < toRemove; ii++)
{
dRemovePos[0] = pos[2*ii];
dRemovePos[1] = pos[2*ii + 1];
kd_remove(TreeRoot, dRemovePos);
}
KDTreeToDotFile(TreeRoot, "removed.dot");
kd_free(TreeRoot); // free kdtree
return 0;
}
and the functions to remove the nodes are implemented like this:
(I don't think if it is too much code, so I only will post my changes to the kd library. If I should add the rest of the code, which is more than 1000 lines unfortunately, just tell me in the comments.)
int kd_remove(kdtree* tree, const double* pos)
{
printf("removing node %.3f, %.3f \n", pos[0], pos[1]);
if(tree->root != NULL)
{
assert(tree->dim != 0); // prevent division by 0 (error code 136)
assert(pos != NULL); // make sure a valid position is passed
tree->root = remove_rec(tree->root, pos, tree->dim, tree->destr, 0);
}
return(0);
}
kdnode* remove_rec(kdnode* node, const double* pos, int dim, void (*destr)(void*), int depth)
{
if(node == NULL)
{
return(NULL);
}
int curdim = depth % dim;
if(same_pos(node->pos, pos, dim))
{
// we found the droid we're looking for
if(node->right)
{
// find the minimum in the right subtree
kdnode* node_min = find_min(node->right, curdim, dim);
if(node_min)
{
copy_node_data(node_min, node, dim);
node->right = remove_rec(node->right, node_min->pos, dim, destr, depth + 1);
}
}
else if(node->left)
{
// find the minimum in the left subtree
kdnode* node_min = find_min(node->left, curdim, dim);
if(node_min)
{
copy_node_data(node_min, node, dim);
node->left = remove_rec(node->left, node_min->pos, dim, destr, depth + 1);
}
}
else
{
// no subtrees -> delete the found node
clear_rec(node, destr);
return(NULL);
}
return node; // return the newly filled node to the recursion step one "above"
}
else
{
// points are not the same, look further
if(pos[curdim] < node->pos[curdim])
{
// position we're looking for is smaller -> go left
node->left = remove_rec(node->left, pos, dim, destr, depth + 1);
}
else
{
// go right, position we're looking for is greater
node->right = remove_rec(node->right, pos, dim, destr, depth + 1);
}
return node;
}
}
void copy_node_data(const kdnode* src, kdnode* dst, int dim)
{
if(src && dst)
{
int nNumBytes = dim * sizeof(double);
memcpy(dst->pos, src->pos, nNumBytes);
if(dst->data != NULL)
{
free(dst->data);
dst->data = malloc(src->databytes);
}
memcpy(dst->data, src->data, src->databytes);
dst->databytes = src->databytes;
}
}
int same_pos(const double* pos1, const double* pos2, int dim)
{
for (int i = 0; i < dim; ++i)
{
if(pos1[i] != pos2[i])
{
return 0; // false
}
}
return 1; // true
}
kdnode* find_min(kdnode* node, int dir, int numdim)
{
return find_min_rec(node, dir, 0, numdim);
}
kdnode* find_min_rec(kdnode* node, int dir, int depth, int numdim)
{
if(!node)
{
return NULL;
}
if(node->left == NULL && node->right == NULL)
{
return node; // is leaf node
}
int curdim = depth % numdim;
if(curdim == numdim)
{
if(node->left == NULL)
{
// no smaller node in tree
return node;
}
else
{
// left subtree is populated -> we need to go deeper
return find_min_rec(node->left, node->dir, depth + 1, numdim);;
}
}
// we have to search both subtrees and find the smallest value compared to the current node
return min_node(node, find_min_rec(node->left, node->dir, depth + 1, numdim),
find_min_rec(node->right, node->dir, depth + 1, numdim), node->dir);
}
kdnode* min_node(kdnode* a, kdnode* left, kdnode* right, int dir)
{
if(a == NULL)
{
// node a is the only one that can't be NULL!
fprintf(stderr, "Error: invalid node passed! \n");
exit(EFAULT);
}
kdnode* result = a;
if(left != NULL)
{
if(left->pos[dir] < result->pos[dir])
{
result = left;
}
}
if(right != NULL)
{
if(right->pos[dir] < result->pos[dir])
{
result = right;
}
}
return result;
}
original.dot looks like this and removed.dot like that.
I've been debugging this since yesterday and I have the feeling it is something really obvious that I am missing here...
Thanks in advance to anyone willing to help :)
You are creating 40 elements
int numel = 20;
int nNumDim = 2;
double* pos = (double*) malloc(nNumDim * numel * sizeof(double)); // Don't cast
but removing only 38
int toRemove = 19;
for (int ii = 0; ii < toRemove; ii++)
{
dRemovePos[0] = pos[nNumDim * ii];
dRemovePos[1] = pos[nNumDim * ii + 1];
kd_remove(TreeRoot, dRemovePos);
}
In the last iteration:
pos[nNumDim * ii]; = pos[2 * 18]; = pos[36];
pos[nNumDim * ii + 1]; = pos[2 * 18 + 1]; = pos[37];
pos[38] and pos[39] are still there.
Change to int toRemove = 20;.
Your code is obfuscated due to the flat array, why don't you declare some type like
struct data {
double el1;
double el2;
};
or
typedef double data[2];
and then
data *value = malloc(numel * sizeof(*value));
So, I know this probably won't be read by anyone but I found the bug after not touching the code for a while and for completeness here is how:
In the find_min() function I start the recursion with depth = 0.
This can cause the split dimension to get messed up and therefore not access all the nodes.
I modified the function to take depth as an argument and pass the recursion depth of remove_rec() like this:
kdnode* node_min = find_min(node->right, curdim, dim, depth + 1);
and
kdnode* node_min = find_min(node->left, curdim, dim, depth + 1);
respectively.

Memory leak in a recursive function in c

I need some help with memory leak in my C program. The following function searches a radix trie to find a word with a given number. It allocates some memory in every recursive call and I don't know how to sort it out so that the blocks allocated aren't lost. Please help.
char *substring(char *str, int position, int length) {
int c = 0;
char *sub = malloc(length*(sizeof(char))+1);
while (c < length) {
sub[c] = str[position + c];
c++;
}
return sub;
}
void prev(int w, int start, int end) {
char *newWord = "";
bool found = false;
void prevRec(struct tNode *t,
int w, int start, int end, char *soFar, int prevLength) {
if (t != NULL) {
char *updatedWord = malloc(strlen(soFar) + strlen(t->word));
strcpy(updatedWord,soFar);
strcat(updatedWord,t->word);
printf("%s\n", updatedWord);
int length = strlen(t->word);
if (t->count == w) {
found = true;
if ((start > -1) && (end <= strlen(updatedWord))) {
newWord = updatedWord;
} else {
newWord = "";
}
} else {
struct tNode *tmp = t->child;
struct tNode *tmp1 = NULL;
while ((tmp != NULL) && (!found)) {
prevRec(tmp,w,start,end,updatedWord,length);
tmp1 = tmp;
tmp = tmp->brother;
updatedWord = substring(updatedWord, 0, strlen(updatedWord) - prevLength);
}
}
}
}
prevRec(root,w,start,end,newWord,0);
printf("%s\n",newWord);
if (strlen(newWord) == 0) printf("ignored");
else {
char *tmp = substring(newWord,start,end - start + 1);
insert(tmp);
free(tmp);
}
You must free what you've allocated. In your case you could to sth. like that: replace
updatedWord = substring(updatedWord, 0, strlen(updatedWord) - prevLength);
by
char *sub = substring(updatedWord, 0, strlen(updatedWord) - prevLength);
free( updatedWord );
updatedWord = sub;
and add another
free( updatedWord );
as last line of your if( t != NULL ) block.
Besides as #Eregith has already mentioned in his comment, the '+1' for NULL is missing in the length you are allocating. And you should also add some error checking, as malloc() may return NULL

Resources