counting occurrences of word in binary search tree - c

I'm working on creating a function word_count that counts the number of occurrences of the given word in a binary search. I'm having some trouble with how the logic would work, I'm stuck on the initial part. There's another function I created word_observe, that checks if a word is in the tree if it is not it adds it in and sets the count to one. If it is it increments count every time it comes across it. That could possibly be useful to understand the full program.
typedef struct tnode {
char *word;
int count;
struct tnode *left;
struct tnode *right;
} tnode;
tnode *word_observe(char *word, tnode *node) {
if (node != NULL) {
if (strcmp(node->word, word) == 0) {
node->count++;
return node;
} else if (strcmp(node->word, word) < 0) {
node->left = word_observe(word, node->left);
} else {
node->right = word_observe(word, node->right);
}
} else {
tnode *newnode = (tnode*)malloc(sizeof(tnode));
newnode->word = strdup(word);
newnode->count = 1;
newnode->left = NULL;
newnode->right = NULL;
node = newnode;
}
return node;
}
int word_count(char *word, tnode *node) {
if (node != NULL) {
if (strcmp(node->word, word) == 0) {
return node->count;
} else if (strcmp(word, node->word) < 0) {
word_count(word, node->left);
} else {
word_count(word, node->right);
}
}
return -1;
}
int main(void) {
tnode *counts = NULL;
counts = word_observe("dog", counts);
counts = word_observe("dog", counts);
counts = word_observe("apple", counts);
counts = word_observe("hello", counts);
counts = word_observe("pineapple", counts);
counts = word_observe("pineapple", counts);
counts = word_observe("pineapple", counts);
counts = word_observe("zebra", counts);
counts = word_observe("zebra", counts);
counts = word_observe("zebra", counts);
counts = word_observe("zebra", counts);
printf("apple: %d\n", word_count("apple", counts));
printf("dog: %d\n", word_count("dog", counts));
printf("pineapple: %d\n", word_count("pineapple", counts));
printf("zebra: %d\n", word_count("zebra", counts));
return 0;
}

You're kind of close. Your word_count function isn't searching correctly, which is puzzling since you are doing this correctly in word_observe. You need to search down the left side for < words and down the right side for > words. Since you're using recursion, you need to return those calls directly. When the word is found, simply return node->count, what you're doing invokes undefined behavior since count isn't initialized. Finally, you must return a value to indicate if the word isn't found.
int word_count(char *word, tnode *node) {
if (node != NULL) {
if (strcmp(node->word, word) == 0) {
// found the word, return its count. This value will
// ride the `return`s up the stack until it's returned back to main
return node->count;
} else if (strcmp(node->word, word) < 0) {
// < word, take the left path
return word_count(word, node->left);
} else {
// > word, take the right path
return word_count(word, node->right);
}
}
// couldn't find the word, return -1
return -1;
}
Working demo

Related

Data loss occurred in C program

I try to make a function to get numbers from a text file and count numbers that do not overlap and exceed a certain number. And I'm going to put it in the linked list. Calculating a number that doesn't overlap with creating a linked list is a good thing, but there is a problem with counting numbers that exceed a certain number. It comes out less than the original number. In Excel, there are numbers that should come out, but in the case of the C program function that I made, they are less than that number.
#include<stdio.h>
#include<stdlib.h>
typedef struct Node {
int key;
struct Node* link;
} listNode;
typedef struct Head {
struct Node* head;
}headNode;
int NodeCount = 0;
int Data_morethan5000_Count = 0;
headNode* initialize(headNode* rheadnode);
int DeleteList(headNode* rheadnode);
void GetData(headNode* rheadnode);
int InsertNode(headNode* rheadnode, int key);
void PrintResult(headNode* rheadnode);
int main()
{
int key;
headNode* headnode = NULL;
headnode = initialize(headnode);
GetData(headnode);
PrintResult(headnode);
DeleteList(headnode);
return 0;
}
headNode* initialize(headNode* rheadnode) {
headNode* temp = (headNode*)malloc(sizeof(headNode));
temp->head = NULL;
return temp;
}
int DeleteList(headNode* rheadnode) {
listNode* p = rheadnode->head;
listNode* prev = NULL;
while (p != NULL) {
prev = p;
p = p->link;
free(prev);
}
free(rheadnode);
return 0;
}
void GetData(headNode* rheadnode)
{
int dataType;
int newData;
FILE* fp = NULL;
fp = fopen("input.txt", "r");
if (fp != NULL) {
while (fscanf(fp, "%d", &newData) != EOF)
{
dataType = InsertNode(rheadnode, newData);
if (newData > 5000)
Data_morethan5000_Count++;
switch (dataType)
{
case 0:
break;
case 1:
NodeCount++;
}
}
fclose(fp);
}
}
int InsertNode(headNode* rheadnode, int key) {
listNode* search, * previous;
listNode* node = (listNode*)malloc(sizeof(listNode));
node->key = key;
search = rheadnode->head;
previous = NULL;
while (search != NULL)
{
if (node->key < search->key)
{
previous = search;
search = search->link;
}
else if (node->key == search->key)
return 0;
else
break;
}
if (previous == NULL)
{
node->link = rheadnode->head;
rheadnode->head = node;
}
else
{
node->link = search;
previous->link = node;
}
return 1;
}
void PrintResult(headNode* rheadnode) {
/*
The total number of nodes: 10011
More than 5000 values: 45460
Execution time: 1.234567 sec
*/
printf("The total number of nodes: %d\n", NodeCount);
printf("More than 5000 values: %d\n", Data_morethan5000_Count);
printf("Execution time: sec");
}
The code above is my program code. I am using a method to get a number from the input.txt file, put it into the newData variable, and increase the value of Data_morethan5000_Count if it is over 5000. So the result should be 45460. However, the C program outputs a result value of 45432. I want to know where data loss is taking place.

Can't returning a tree node when is found

How can I return a tree node when I found it?
So, I have a binary tree and, when I search in the tree what I searched for was found I want to return the pointer to that node, so I can usa that node in other function. There is my search function:
Tnode *Tsearch(Tnode *r, char *word) {
if(r == NULL) {
printf("%s NOT FOUND\n", word);
return NULL;
}
int comp = strcasecmp(r->word, word);
if( comp == 0) {
printf("%s FOUND\n", r->word);
return r;
}
else if( comp > 0) {
Tsearch(r->left, word);
}
else if( comp < 0) {
Tsearch(r->right, word);
}
return 0;
}
My problem is that when I try to use the return of the function Tsearch it doesn't work and I really can't understand why and how to solve it.
The function where I want to use that returned node from the search function is the following:
int Tsearch_ref(Tnode *r, char (*words)[30]) {
if(r == NULL) {
return 0;
}
printf("%s, %d", words[0], (int)strlen(words[0]));
Tsearch(r,words[0]);
auxT = Tsearch(r,words[0]);
Lnode *aux = auxT->head;
printf("Title: %s\n", ((Book *)aux->ref)->title);
while(aux != NULL) {
aux_arr[i].ref=aux->ref;
printf("Title: %s\n", ((Book *)aux_arr[i].ref)->title);
printf("%p\n", &(aux_arr[i].ref));
aux = aux->next;
i++;
}
}
This function is not complete because I was trying to solve the return problem, but basically I want to take that tree node that have a list inside and put that list into a temporary array.
The structures are the following:
typedef struct {
char *title;
char isbn13[ISBN13_SIZE];
char *authors;
char *publisher;
int year;
} Book;
typedef struct lnode {
struct lnode *next;
void *ref;
} Lnode;
typedef struct tnode {
struct tnode *left;
struct tnode *right;
char *word;
Lnode *head;
} Tnode;
It's my first question here in StackOverflow, so if you need any more info about anything I will obviously provide it.
Thanks in advance!
You need to change the recursive calls to Tsearch to actually return the found node. So, instead of this code:
else if( comp > 0) {
Tsearch(r->left, word);
}
else if( comp < 0) {
Tsearch(r->right, word);
}
do this:
else if( comp > 0) {
return Tsearch(r->left, word);
}
else if( comp < 0) {
return Tsearch(r->right, word);
}
Note that if your tree is very deep, you may use up the entire call stack and throw an exception.
Why not use while loop?
Tnode *Tsearch(Tnode *r, char *word) {
Tnode *cur = r;
int comp;
while (cur != NULL)
{
if ((comp = strcasecmp(cur->word, word)) == 0)
{
printf("%s FOUND\n", cur->word);
return cur;
}
else if (comp > 0)
{
/* Move to the left child */
cur = cur->left;
}
else
{
/* Move to the right child */
cur = cur->right;
}
}
printf("NOT FOUND\n");
return NULL;
}

Unable to iterate the linked list in C

I am trying to print all the members of a linked list. I am traversing the list and counting the duplicate copies of the integers in the list if any. But when I traverse the list again to check for duplicate copies, my ipNext points to null terminating my previous traverse loop.
Inserting data function:
void insertIP(bstNode *head, char user[], int ip){
if(head != NULL){
bstNode* startList = head;
while ((startList) && (strcmp(startList->data, user) != 0) ){
if(strcmp(user, startList->data)<0)
{
startList=startList->left;
}
else if(strcmp(user, startList->data)>0)
{
startList=startList->left;
}
}
if (startList != NULL){
IP* new = (IP*)malloc(sizeof(IP));
new->ip = ip;
//new->count = (new->count + 1);
new->ipNext=NULL;
IP* temp = startList->ipHead;
startList->ipHead = new;
new->ipNext = temp;
}
}
}
Iteration function which looks for a specific data entry and count the occurences of it in the linked list if any.
bstNode* search(char* key, bstNode* root)
{
int res;
bstNode *leaf = root;
if( leaf != NULL ) {
res = strcmp(key, leaf->data);
if( res < 0)
search( key, leaf->left);
else if( res > 0)
search( key, leaf->right);
else
{
printf("\n'%s' found!\n", key);
//int count = 0;
bstNode *temp = leaf;
while (temp->ipHead != NULL) {
int tempip = temp->ipHead->ip;
int ipcount = 0;
uint32_t ip = tempip;
struct in_addr ip_addr;
ip_addr.s_addr = ip;
bstNode *cpy = leaf;
ipcount = count(&cpy, tempip);
//temp = leaf;
printf("The IP address is %s\n C:%d\n", inet_ntoa(ip_addr), ipcount);
temp->ipHead = temp->ipHead->ipNext;
}
}
}
else printf("\nNot in tree\n");
return leaf;
}
Supporting function (This set the ipNext value to null which terminates the loop in search. Even though I am passing a copy of the pointer, I think that is my problem).
int count(bstNode** start, int item)
{
bstNode* current = *start;
int count = 0;
while (current->ipHead->ipNext != NULL)
{
if (current->ipHead->ip == item)
{
count++;
}
current->ipHead = current->ipHead->ipNext;
}
return count;
}
The data structure decleration:
typedef struct ip{
int ip;
struct ip *ipNext;
}IP;
typedef struct bstNode
{
char data[32];
struct bstNode* left;
struct bstNode* right;
IP *ipHead;
}bstNode;
BST insert function:
bstNode *insert(bstNode *root, char *word, int ip)
{
bstNode *node = root;
if(node==NULL){
node= malloc(sizeof(bstNode));
//IP* ipNode=malloc(sizeof(IP));
strcpy(node->data, word);
node->left=NULL;
node->right=NULL;
insertIP(node, word, ip);
}
else{
if(strcmp(word, node->data)<0)
node->left=insert(node->left, word, ip);
else if(strcmp(word, node->data)>0)
node->right=insert(node->right, word,ip);
else if(strcmp(word, node->data) == 0) {
insertIP(node, word, ip);
}
}
return node;
}
I appreciate everyones help!
As pointed out in the comments, here is your problem:
while ((startList) && (strcmp(startList->data, user) != 0) ){
if(strcmp(user, startList->data)<0)
{
startList=startList->left;
}
else if(strcmp(user, startList->data)>0)
{
startList=startList->left;
}
}
You are taking the left path in both cases. I wouldn't post this as an answer (since it is already answered in the comments by Pras, but when I see this kind of non-optimized code, it bothers me: You may call the strcmp() function several times with the same data and the result will always be the same. strcmp() may be a costly operation if long strings are compared and besides you are doing this inside of a tree, so this operation could be performed A LOT of times. So why don't you assign the result in a variable the first time and then test the result, not call the strcmp() again. Like:
int result;
while (startList && (result = strcmp(startList->data, user)) ) {
if (result < 0)
{
startList = startList->left;
}
else if (result > 0)
{
startList = startList->right;
}
}
Actually the second if is not needed, you may use a simple else because the test for zero is done in the condition of the while statement, so apparently if the result is not < 0, it will be > 0 for sure.

how to add a word in the link list while also checking and updating the frequency of the word in c

I'm trying to finish one of my assignments and I have some issues. I have to make a program that uses struct to create a link list in which I have to add words. If the word is already in the linked list then I just have to update the frequency.
I already have this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct words Words;
struct words{
char *word;
int freq;
Words *next;
};
/*
Inserts a copy of newWord into the list, in lexicographical order. If newWord is already
in the list, increment the freq member of the node. The function returns a pointer to
the list.
*/
Words *addWord(Words *headPtr, char* newWord){
Words *current = headPtr;
if(headPtr == NULL)
{
current->word = newWord;
current->freq = 1;
}
else
{
while(current != NULL)
if(strcmp(headPtr->word, newWord))
{
current->freq++;
return headPtr;
}
else
{
current->word = newWord;
current->freq = 1;
}
}
return headPtr;
}
//prints the words in the list, along with the frequency of each word
void printWords(Words *headPtr){
while(headPtr != NULL)
{
printf("%s: %d", headPtr->word, headPtr->freq);
headPtr = headPtr->next;
}
}
//frees the entire list. Note: Words **headPtr since the headPtr NULL upon return
void deleteList(Words **headPtr){
Words *current = *headPtr;
Words *next;
while(current != NULL)
{
next = current->next;
free(current);
current = next;
}
*headPtr = NULL;
}
int main(){
char word[20];
Words *list = NULL;
scanf("%s", word);
while(!feof(stdin)){
list = addWord(list, word);
scanf("%s", word);
}
printWords(list);
deleteList(&list);
}
There are some problems in your code. See comments embedded into your code:
Words *addWord(Words *headPtr, char* newWord){
Words *current = (Words*) malloc(sizeof(Words)); // Don't malloc here.
// You don't know yet
// whether you need
// a new node or you
// you just need to
// update freq
if(current == NULL) // If current is NULL you have
// serious problems, i.e. you
// are out of memory.
// Did you really intended to do:
// if (headPtr == NULL)
{
current->word = newWord;
*current->next = (*headPtr);
(*headPtr) = *current; // I'm not sure what you try here
// but it seems strange
}
else
{
while(current != NULL)
if(strcmp(headPtr->word, newWord)) // This is not the way to compare
// strings. Two strings compare
// when "strcmp" returns 0.
//
// Further you don't want to
// use headPtr here.
{
current->freq++; // Use of uninitialized value
return; // Missing argument to return
}
else
{
current->word = newWord; // Use of uninitialized value
*current->next = (*headPtr); // Use of uninitialized value
(*headPtr) = *current;
}
}
// Missing return
}
Here is some code to start with:
#define WORD_SIZE 20
struct words{
char word[WORD_SIZE]; // Use a char array
int freq;
Words *next;
};
Words *addWord(Words *headPtr, char* newWord)
{
Words *current = headPtr; // Make a copy of headPtr
Words* new;
if ((current == NULL) || (strcmp(current->word, newWord) > 0))
{
// Insert in front of list
new = malloc(sizeof(Words)); // Allocate memory
if (new == NULL)
{
// oh, dear - out of memory - print an error message and exit
exit(1);
}
strncpy(new->word, newWord, WORD_SIZE); // Make sure not to overflow
// the buffer, so use strncpy
(new->word)[WORD_SIZE-1] = '\0'; // Make sure to zero terminate
new->freq = 1;
new->next = headPtr;
return new;
}
while(1)
{
int cmp = strcmp(current->word, newWord);
if(cmp == 0)
{
current->freq++;
return headPtr;
}
if(cmp < 0)
{
if ((current->next == NULL) || (strcmp(current->next->word, newWord) > 0))
{
// Add code to insert node after current
return headPtr;
}
}
else
{
// This should never happen...
printf("BAD CODE 1\n");
exit(1);
}
current = current->next;
}
}

Trouble with strcmp function in C

I am writing a function called check that compares the alphabetical string of a dictionary that is loaded in through the command line with a text that is also loaded in through the command line. The function is part of a larger function called speller that acts as a spell checker.
I ran several printf debugging tests to check if the words being compared in the strcmp function. The problem comes here. The function finds that all words in the text are incorrectly spelled even when the printf test shows that the strings from the dictionary and the text are the same.
Don't know where to go from this point so any help would be greatly appreciated. Thanks so much
Below is the code for the particular function. Thanks again.
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
node *hashtable[27];
/* Returns true if word is in dictionary else false. */
int hash_fun (const char key);
bool check (const char *word)
{
//case-desensitizing
char caseless[strlen (word)];
int i, length;
for (int head = 0; head < 26; head++) {
hashtable[head] = NULL;
}
for (i = 0, length = strlen (word); i < length; i++) {
//("%c\n",word[i]);
if (isupper (word[i])) {
caseless[i] = tolower (word[i]);
} else {
caseless[i] = word[i];
}
}
caseless[i] = '\0';
//printf("-%s %s- \n*",word, caseless);
int word_index = hash_fun (caseless);
//printf("%i", word_index);
node *new_node = malloc (sizeof (node));
if (new_node == NULL) {
return 2;
}
if (word_index >= 0) {
if (hashtable[word_index] == NULL) {
hashtable[word_index] = new_node;
new_node->next = NULL;
}
node *cursor = malloc (sizeof (node));
cursor = hashtable[word_index];
while (cursor != NULL) {
//printf("Dictionary:%s and Text:%s \n", cursor->word, caseless);
int found;
found = strcmp (caseless, cursor->word);
if (found == 0) {
return true;
}
cursor = cursor->next;
}
}
return false;
}

Resources