I'm trying to create a program that reads a dictionary and then stores the words into the hash table, then read another file checks every word of that file if it is in the hash table if it is not then it will be outputted as a misspelled word. I'm first trying to check if I can load the dictionary file into my hash table and then output the words in the hash table yet my code seems to crash whenever I try to run it. The hash function I use was taken from the Internet. I'm also still very new with data structures, and having a hard time understanding.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// file to read
#define dictionary "dictionary.txt"
// No. of buckets
const unsigned int N = 10;
typedef struct node
{
char* word;
struct node *next;
}
node;
node *table[10];
// hash function
unsigned int hash(char *word)
{
// TODO
unsigned int hash = 5381;
int c = 0;
while (c == *word++)
hash = ((hash << 5) + hash) + c;
return hash % 10;
}
int main(void)
{
// initialize array heads to NULL
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
// Open file to read
FILE *indata = fopen(dictionary, "r");
if (indata == NULL)
{
printf("cant open\n");
return 1;
}
// variable to store words read from the file
char *words = malloc(sizeof(char) * 20);
if (words == NULL)
{
printf("no memory\n");
return 1;
}
// While loop to read through the file
while (fgets(words, 20, indata))
{
// get the index of the word using hash function
int index = hash(words);
// create new node
node *newNode = malloc(sizeof(node));
if (newNode == NULL)
{
printf("here\n");
return 1;
}
// make the new node the new head of the list
strcpy(newNode->word, words);
newNode->next = table[index];
table[index] = newNode;
// free memory
free(newNode);
}
// free memory
free(words);
// loop to print out the values of the hash table
for (int i = 0; i < N; i++)
{
node *tmp = table[i];
while (tmp->next != NULL)
{
printf("%s\n", tmp->word);
tmp = tmp->next;
}
}
// loop to free all memory of the hash table
for (int i = 0; i < N; i++)
{
if (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
// close the file
fclose(indata);
}
At least three bugs that independently caused a segfault:
First, newNode->word is used unitialized, so it points to random memory, so the strcpy would segfault. Better to use strdup
Also, after you put newNode in the table, you do free(newNode) making what it points to invalid. This causes the second loop to segfault
Third, in the second loop, if table[i] is null, the while (tmp->next != NULL) will segfault
I've annotated and corrected your code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// file to read
#define dictionary "dictionary.txt"
// No. of buckets
const unsigned int N = 10;
typedef struct node {
char *word;
struct node *next;
} node;
node *table[10];
// hash function
unsigned int
hash(char *word)
{
// TODO
unsigned int hash = 5381;
int c = 0;
while (c == *word++)
hash = ((hash << 5) + hash) + c;
// NOTE: not a bug but probably better
#if 0
return hash % 10;
#else
return hash % N;
#endif
}
int
main(void)
{
// initialize array heads to NULL
for (int i = 0; i < N; i++) {
table[i] = NULL;
}
// Open file to read
FILE *indata = fopen(dictionary, "r");
if (indata == NULL) {
printf("cant open\n");
return 1;
}
// variable to store words read from the file
char *words = malloc(sizeof(char) * 20);
if (words == NULL) {
printf("no memory\n");
return 1;
}
// While loop to read through the file
while (fgets(words, 20, indata)) {
// get the index of the word using hash function
int index = hash(words);
// create new node
node *newNode = malloc(sizeof(node));
if (newNode == NULL) {
printf("here\n");
return 1;
}
// make the new node the new head of the list
// NOTE/BUG: word is never set to anything valid -- possible segfault here
#if 0
strcpy(newNode->word, words);
#else
newNode->word = strdup(words);
#endif
newNode->next = table[index];
table[index] = newNode;
// free memory
// NOTE/BUG: this will cause the _next_ loop to segfault -- don't deallocate
// the node you just added to the table
#if 0
free(newNode);
#endif
}
// free memory
free(words);
// loop to print out the values of the hash table
for (int i = 0; i < N; i++) {
node *tmp = table[i];
// NOTE/BUG: this test fails if the tmp is originally NULL (i.e. no entries
// in the given hash index)
#if 0
while (tmp->next != NULL) {
#else
while (tmp != NULL) {
#endif
printf("%s\n", tmp->word);
tmp = tmp->next;
}
}
// loop to free all memory of the hash table
for (int i = 0; i < N; i++) {
if (table[i] != NULL) {
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
// close the file
fclose(indata);
}
UPDATE:
I made a linked list program before that stores an integer in the list, int number; struct node *next; and I used newNode->number = 5; and it worked, why is it in this case it doesn't?? Is it because I am working with strings here??
The difference is that word is a pointer. It must be assigned a value before it can be used. strcpy does not assign a value to word. It tries to use the contents of word as the destination address of the copy.
But, the other two bugs happen regardless of word being a char * vs number being int.
If you had defined word not as a pointer, but as a fixed array [not as good in this usage], the strcpy would have worked. That is, instead of char *word;, if you had done (e.g.) char word[5];
But, what you did is better [with the strdup change] unless you can guarantee that the length of word can hold the input. strdup will guarantee that.
But, notice that I [deliberately] made word have only five chars to illustrate the problem. It means that the word to be added can only be 4 characters in length [we need one extra byte for the nul terminator character]. You'd need to use strncpy instead of strcpy but strncpy has issues [it does not guarantee to add the nul char at the end if the source length is too large].
Conincidentally, there is another question today that has an answer that may help shed some more light on the differences of your word struct member: Difference between memory allocations of struct member (pointer vs. array) in C
From a cursory glance I can see two problems:
You don't allocate space for your word in the node; you simply strcopy the word into an undefined pointer. You might want to use strdup instead.
You free the memory of the node after you added it to the list. The table is an array of pointers, so you store the point in the table and then throw away the memory that it points to.
Oh, three: and in the final loop you free the unallocated memory again...
Related
I have been working on this problem set for quite a time and the code seems to be wrong but I couldn't find the solution. I have been comparing my code and other people's code but I still don't know where I got wrong. Really appreciate all your help if you can provide me with some ways to solve this problem. It keeps prompting me free(): double free detected in tcache 2 but I can't seem to find my mistake.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 50;
// Hash table
node *table[N];
//word count
int count = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
bool found = false;
node *current = table[hash(word)];
while (current != NULL)
{
if (strcasecmp(current -> word, word) == 0)
{
found = true;
}
else if(current -> next != NULL)
{
current = current -> next;
}
else
{
return false;
}
}
return found;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO
unsigned long hash = 5381;
int c;
while ((c = toupper(*word++)))
{
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
FILE *infile = fopen(dictionary, "r");
if (infile == NULL)
{
return false;
}
char buffer[LENGTH+1];
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
fclose(infile);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return count;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
int num = count;
for (int i = 0; i < N ; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
num--;
}
}
if (num == 0)
{
return true;
}
else
{
return false;
}
}
The calls of free in this while loop
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
does not make a sense. You deleted at once (an object of the type node using the pointer n) what you was trying to add to the table (a valid address to an allocated object of the type node). As a result the element of the table at the position hash(buffer) that is set like
table[hash(buffer)] = n;
has an invalid value because it is the address of the already deleted node in this statement
free(n);
So in the function unload this invalid address will be again used to free already freed memory within the function load.
Pay attention to that you did not allocate memory as you wrote in a comment "for node n". n is just a pointer to the allocated unnamed object of the type node. So you are not freeing the pointer n itself in this statement
free(n);
You are freeing the allocated object of the type node using the pointer n. Thus all pointers that pointed to the allocated object of the type node become invalid.
This program works as a spell checker, it reads a dictionary file to load into the hash table, then reads another text file that will be read and will check every word if it is in the hash table, if not then it is considered a misspelled word. All of my functions seem to work except for my check function, when I run it the number of misspelled words is always the same as the number of words in text. This was working before but I changed the hash function because this hash function was said to be better to assign the values into unique index, but after changing just the hash function the check function doesn't work anymore.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 200000;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
// TODO
int len = strlen(word);
char copy[len + 1];
// change into lowercase the word
for (int i = 0; i != '\0'; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
if (table[index] == NULL)
{
return false;
}
node *tmp = table[index];
// check if the word is in the hash table
while (tmp != NULL)
{
if (strcmp(tmp->word, copy) == 0)
{
return true;
}
tmp = tmp->next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned int hash = 0;
for (int i = 0, n = strlen(word); i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// TODO
char *words = malloc(sizeof(char) * (LENGTH + 1));
if (words == NULL)
{
return 1;
}
// initialize the hash table to NULL
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
// open dictionary file
FILE *indata = fopen(dictionary, "r");
// 1 character for '\0' and another for '\n' because fgets takes a trailing new line
// when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra characters
while (fgets(words, LENGTH + 2, indata) != NULL)
{
// get the index by using the hash function
int index = hash(words);
// allocate memory for the newNode
node *newNode = malloc(sizeof(node));
if (newNode == NULL)
{
return false;
}
// get rid of the trailing new line from fgets
words[strlen(words) - 1] = '\0';
strcpy(newNode->word, words);
// make the newNode the head of the list
newNode->next = table[index];
table[index] = newNode;
}
// free memory and close the opened file
free(words);
fclose(indata);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
// TODO
// counter of words loaded
unsigned int counter = 0;
// loop through the hash table
for (int i = 0; i < N; i++)
{
node *tmp = table[i];
while (tmp != NULL)
{
counter++;
tmp = tmp->next;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
// TODO
// loop through the whole hash table
for (int i = 0; i < N; i++)
{
while (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
return true;
}
There are multiple problems in your code:
The definition node *table[N]; is invalid in C because N must be a compile time constant expression. const unsigned int N = 200000; fits this constraint in C++, but not in C. N must be a macro or an enum definition.
in check(), the loop to copy the string as lowercase is incorrect: for (int i = 0; i != '\0'; i++) should be for (int i = 0; word[i] != '\0'; i++)
in check() you do not null terminate the string you build in copy. copy is allocated with malloc(), it is uninitialized, so the null terminator must be set explicitly.
the char argument in tolower(word[i]) must be cast as tolower((unsigned char)word[i]) to avoid undefined behavior on negative char values, should the char be signed on your platform.
in load(), the words array is allocated with a length of LENGTH+1 bytes, but you pass LENGTH+2 to fgets as the buffer size, causing potential undefined behavior if the dictionary contains a line with LENGTH characters.
in load(), hash(words) is called before stripping the newline at the end of the line. Hence the hash code is incorrect and the word will not be found in the dictionary because it is stored in the wrong bucket.
Here is a modified version:
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node {
struct node *next;
char word[LENGTH + 1];
} node;
// Number of buckets in hash table
#define HASH_SIZE 200000
// Hash table
node *table[HASH_SIZE];
// Hashes word to a number
unsigned int hash(const char *word) {
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned int hash = 0;
for (int i = 0; word[i] != '\0'; i++) {
hash = (hash << 2) ^ word[i];
}
return hash % HASH_SIZE;
}
// Returns true if word is in dictionary else false
bool check(const char *word) {
char copy[LENGTH + 1];
int i, len = strlen(word);
if (len > LENGTH)
return false;
// change into lowercase the word
for (i = 0; word[i] != '\0'; i++) {
copy[i] = (char)tolower((unsigned char)word[i]);
}
copy[i] = '\0';
// get the index by using the hash function
int index = hash(copy);
// check if the word is in the hash table
for (node *tmp = table[index]; tmp != NULL; tmp = tmp->next) {
if (strcmp(tmp->word, copy) == 0) {
return true;
}
}
return false;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary) {
// 1 character for '\0' and another for '\n' because fgets takes a trailing new line
// when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra bytes
char words[LENGTH + 2];
// open dictionary file
FILE *indata = fopen(dictionary, "r");
if (indata == NULL)
return false;
while (fgets(words, sizeof words, indata) != NULL) {
// get rid of the trailing new line from fgets
words[strcspn(words, "\n")] = '\0';
// allocate memory for the newNode
node *newNode = malloc(sizeof(node));
if (newNode == NULL) {
fclose(indata);
return false;
}
strcpy(newNode->word, words);
// get the index by using the hash function
int index = hash(words);
// make the newNode the head of the list
newNode->next = table[index];
table[index] = newNode;
}
// close the opened file
fclose(indata);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void) {
// counter of words loaded
unsigned int counter = 0;
// loop through the hash table
for (int i = 0; i < HASH_SIZE; i++) {
for (node *tmp = table[i]; tmp != NULL; tmp = tmp->next) {
counter++;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void) {
// loop through the whole hash table
for (int i = 0; i < HASH_SIZE; i++) {
while (table[i] != NULL) {
node *next = table[i]->next;
free(table[i]);
table[i] = next;
}
}
return true;
}
I am new to C programming. I am trying to do the pset5 in CS50 while trying to understand the concepts of memory, linked list and hashtable. I wrote the code and it compiled but there seems to be something wrong because every time I tried to execute the code it returns some garbage value. Could anyone please help me with that? Many thanks.
#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<string.h>
#include "dictionary.h"
#define DICTIONARY "dictionaries/small"
typedef struct node
{
char WORD[LENGTH + 1];
struct node *next;
}
node;
int hash(char *word);
int main(void)
{
node **HASHTABLE = malloc(sizeof(node) * 26);
//open the dictionary
FILE *dic = fopen(DICTIONARY, "r");
if (dic == NULL)
{
fprintf(stderr, "Could not open the library\n");
return 1;
}
int index = 0;
char word[LENGTH + 1];
for (int c = fgetc(dic); c != EOF; c = fgetc(dic))
{
word[index] = c;
index++;
if (c == '\n')
{
int table = hash(word);
printf("%d\n", table);
//create a newnode
node *newnode = malloc(sizeof(node));
strcpy(newnode->WORD, word);
newnode->next = NULL;
printf("Node: %s\n", newnode->WORD);
index = 0;
//add new node to hash table
if (HASHTABLE[table] == NULL)
{
HASHTABLE[table] = newnode;
}
else
{
HASHTABLE[table]->next = newnode;
}
}
}
for(int i = 0; i < 26; i++)
{
node *p = HASHTABLE[i];
while (p != NULL)
{
printf("%s", p->WORD);
p = p->next;
}
}
//free memory
for(int i = 0; i < 26; i++)
{
node *p = HASHTABLE[i];
while (p != NULL)
{
node *temp = p->next;
free(p);
p = temp;
}
}
free(HASHTABLE);
}
int hash(char *word)
{
int i = 0;
if (islower(word[0]))
return i = word[0] - 'a';
if (isupper(word[0]))
return i = word[0] - 'A';
return 0;
}
Your code has serious problems that result in undefined behavior.
Two of them are the result of this line:
node **HASHTABLE = malloc(sizeof(node) * 26);
That allocates 26 node structures, but the HASHTABLE variable expects the address of a pointer to an array of node * pointers (that's the ** in the node **HASHTABLE declaration).
So, you should replace it with something like:
node **HASHTABLE = malloc( 26 * sizeof( *HASHTABLE ) );
Note that I used the dereferenced value of the variable being assigned to - HASHTABLE. This means in this case a node (one less * than in the declaration). So if the type of HASHTABLE changes, you don't need to make any other changes to the malloc() statement.
That problem, while technically undefined behavior, likely wouldn't cause any problems.
However, there's still a problem with
node **HASHTABLE = malloc( 26 * sizeof( *HASHTABLE ) );
that will cause problems - and serious ones.
That array of 26 pointers isn't initialized - you don't know what's in them. They can point anywhere. So this won't work well, if at all:
if (HASHTABLE[table] == NULL)
Meaning this points off to somewhere unknown:
HASHTABLE[table]->next = newnode;
And that will cause all kinds of problems.
The simplest fix? Initialize the values all to zero by using calloc() instead of malloc():
node **HASHTABLE = calloc( 26, sizeof( *HASHTABLE ) );
Until that's fixed, any results from your entire program are questionable, at best.
The reason for the garbage is that you didn't null-terminate the string:
strcpy(newnode->WORD, word);
strcpy expects the src to point to a null-terminated string. Simply adding 0 at the end. Simply terminate it with
word[index] = 0;
before the strcpy.
Other than that, the ones in Andrew Henle's answer should be addressed too, but I am not going to repeat them here.
BTW, next you will notice that
HASHTABLE[table]->next = newnode;
wouldn't work properly - that code always inserts the node as the 2nd one. But you want to always insert the new node unconditionally as the head, with
newnode->next = HASHTABLE[table];
HASHTABLE[table] = newnode;
There need not be any special condition for inserting the first node to a bucket.
My question is connected with task from CS50, pset5. For ones who don't know any about that, I'll try to explain. Nothing very special. I just need to make function which will intake dictionary file (it was written before, all of the words in that file are uppercase), which contains more over 20K words, and sort them somehow. I've made simple and naive algorithm, building hash-table, which sort words, depending on the theirs first letters. And I've passed all checks by the CS50, so my program is working well. But comparing to the course's one - it is too slow. Time of executing for personnel's is 0.1s, but for mine - 5.0s - 7.0s. What can I improve in this code to make it faster? Or should I totally change everything? I have no experience in optimization, `cause just started learning. It would be great to study from any of you =) Thanks in advance!
// Some constant values, which are declared before the function
#define LENGTH 46
#define ALPHALENGTH 26
/* Definition of node struct. Nothing special, in fact =) */
typedef struct node {
char word[LENGTH +1];
struct node *next;
} node;
node *hashTable[ALPHALENGTH];
bool load(const char *dictionary) {
FILE *f = fopen(dictionary, "r");
if (f == NULL) {
return false;
}
char word[LENGTH + 1];
int hash = 0;
for (int i = 0; i < ALPHALENGTH; i++) {
hashTable[i] = NULL;
}
// 46 - is LENGTH, but for some reason it is impossible
// to put variable`s name between quotation marks
while (fscanf(f, "%46s", word) == 1) {
// make every letter lowercase to get result from 0 - 25
hash = tolower(word[0]) - 'a';
node *new_node = malloc(sizeof(node));
strcpy(new_node->word, word);
// check if element is first in the list
if (hashTable[hash] == NULL) {
new_node->next = NULL;
hashTable[hash] = new_node;
} else {
node *ptr = hashTable[hash];
do {
if (ptr->next == NULL) {
break;
}
ptr = ptr->next;
} while (true);
ptr->next = new_node;
new_node->next = NULL;
}
}
fclose(f);
return true;
}
Your problem isn't your hash function; it's that your hash table is way too small.
From the sound of things, you have about 26 hash buckets for over 20,000 words. This places between 750 and 1000 words in each bucket. (Probably much more in some, as the hash function you're using is not uniform. There are very few words that start with x or q, for instance.)
Try expanding the hash table to 1000 entries (for instance), so that there are around 20 entries in each bucket. You will need a new hash function to do this; anything will work, but to work well it will need to generate values up to the size of the table. (Adding together the values of all the letters won't work, for instance, as it'll almost never reach 1000.)
The problem is not in your hash function, nor in the size of your hash table, it is in your list management: your method for appending words to the corresponding lists has a complexity of O(N2).
By the way, your hash function is not used for hashing, but for dispatching. You are sorting the table only on the first letter of each word, keeping the words with the same initial in the same order. If you meant to sort the dictionary completely, you would still need to sort each list.
You can drastically improve the performance while keeping the same semantics by prepending to the lists and reversing the lists at the end of the parsing phase.
For a dictionary with 20 thousand words, the code below runs 50 times faster (as expected by the CS50 site):
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LENGTH 46
#define ALPHALENGTH 26
typedef struct node {
struct node *next;
char word[LENGTH +1];
} node;
node *hashTable[ALPHALENGTH];
bool load(const char *dictionary) {
FILE *f = fopen(dictionary, "r");
if (f == NULL) {
return false;
}
char word[LENGTH + 1];
int hash = 0;
for (int i = 0; i < ALPHALENGTH; i++) {
hashTable[i] = NULL;
}
while (fscanf(f, "%46s", word) == 1) {
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
return false;
// make every letter lowercase to get result from 0 - 25
hash = tolower(word[0]) - 'a';
strcpy(new_node->word, word);
/* prepending to the list */
new_node->next = hashTable[hash];
hashTable[hash] = new_node;
}
for (int i = 0; i < ALPHALENGTH; i++) {
node *n, *prev, *next;
/* reverse list */
for (prev = NULL, n = hashTable[i]; n != NULL; ) {
next = n->next;
n->next = prev;
prev = n;
n = next;
}
hashTable[i] = prev;
}
fclose(f);
return true;
}
void save(void) {
for (int i = 0; i < ALPHALENGTH; i++) {
for (node *n = hashTable[i]; n != NULL; n = n->next) {
puts(n->word);
}
}
}
int main(int argc, char *argv[]) {
if (argc > 1) {
if (load(argv[1]))
save();
}
}
Changing the fscanf() to a simpler fgets() might provide a marginal performance improvement, at the cost of more restrictive semantics for the dictionary format.
I am having some issues with dynamically allocating a string for a node in a tree. I have included my node structure below for reference.
struct node
{
char *string;
struct node *left;
struct node *right;
};
typedef struct node node;
I am supposed to read words from a text file and then store those words into a tree. I am able to store char arrays that have been defined, such as char string[20] without problems, but not strings that are supposed to be dynamically allocated.
I am only going to post the code I am using to read my file and try to create the dynamically allocated array. I have already created the file pointer and checked that it is not NULL. Every time I try to run the program, it simply crashes, do I need to try and read the words character by character?
//IN MAIN
node *p, *root ;
int i;
int u;
root = NULL;
char input[100];
while(fscanf(fp, "%s", &input) != EOF)
{
//Create the node to insert into the tree
p = (node *)malloc(sizeof(node));
p->left = p->right = NULL;
int p = strlen(input); //get the length of the read string
char *temp = (char*) malloc(sizeof(char)*p);
//malloc a dynamic string of only the length needed
strcpy(local, input);
strcpy(p->word,local);
insert(&root, p);
}
To be completely clear, I only want advice regarding the logic of my code, and only would like someone to help point me in the right direction.
You are invoking many undefined behaviors by
passing pointer to object having wrong type to scanf(). i.e. In fscanf(ifp, "%s", &input), char(*)[100] is passed where char* is expected
accessing out-of-range of allocated buffer when storeing terminating null-character in strcpy(local, input);
using value of buffer allocated via malloc() and not initialized in strcpy(curr->word,local);
Your code should be like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct node_t {
struct node_t* left, *right;
int count;
char* word;
} node;
void insert(node ** tree, node * item);
int main(void) {
FILE* ifp = stdin;
node * curr, * root;
int i;
int u;
root = NULL;
char input[100];
/* you should specify the maximum length to read in order to avoid buffer overrun */
while(fscanf(ifp, "%99s", input) != EOF)
{
//Create the node to insert into the tree
curr = malloc(sizeof(node));
if(curr == NULL) /* add error check */
{
perror("malloc 1");
return 1;
}
curr->left = curr->right = NULL;
curr->count = 1;
int p = strlen(input); //get the length of the read string
char *local = malloc(sizeof(char)*(p + 1)); /* make room for terminating null-character */
if (local == NULL) /* add error check again */
{
perror("malloc 2");
return 1;
}
//malloc a dynamic string of only the length needed
//To lowercase, so Job and job is considered the same word
/* using strlen() in loop condition is not a good idea.
* you have already calculated it, so use it. */
for(u = 0; u < p; u++)
{
/* cast to unsigned char in order to avoid undefined behavior
* for passing out-of-range value */
input[u] = tolower((unsigned char)input[u]);
}
strcpy(local, input);
curr->word = local; /* do not use strcpy, just assign */
insert(&root, curr);
}
/* code to free what is allocated will be here */
return 0;
}
//Separate insert function
void insert(node ** tree, node * item)
{
if(!(*tree))
{
*tree = item;
return;
}
if(strcmp(item->word,(*tree)->word) < 0)
insert(&(*tree)->left, item);
else if(strcmp(item->word,(*tree)->word) > 0)
insert(&(*tree)->right, item);
/* note: memory leak may occur if the word read is same as what is previously read */
}