Memory Allocation error identified by valgrind - c

bool load(const char *dictionary)
{
// TODO
//create alphanumeric frequency trie from dictionary stored in temporary location
// open dictioary
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
//beggining of dictionary trie called 'root'
root = (trie*) malloc( sizeof(trie) );
if (root == NULL)
{
printf("error allocating memory to root for load");
return false;
}
//beggining of traversal node called 'current' and "attachment" to read/traverse root node
trie* current = NULL;
int a = (int)'a';
int z = (int)'z';
int cha = 0;
current = root;
//construct trie letter branches from ch (character) of single word-lines in dictionary
for ( char ch = fgetc(dict) ; EOF != ch ; ch = fgetc(dict) )
{
//set cursor letter to indexable value
if ( ch == '\'' )
{
cha = (z + 1) - a;
//printf("#%d ",cha);
}
else
{
cha = (ch - a);
//printf("%d ",cha);
}
//create or traverse existing letter branch for next letter ch in word-line
if( current->children[cha] == NULL && ) //(cha >= 0 && cha <=26) )
{
//printf("L");
current -> children[cha] = (trie*) malloc( sizeof(trie) );
current = current -> children[cha];
}
else //if ( cha >= 0 && cha <=26 )
{
current = current -> children[cha];
}
//for end of word-line in dictionary label as word and reset cursor node to root of dictionary trie (tree)
if ( ch == '\n' )
{
//printf("\n");
current->is_word = true;
wordcount++;
current = root;
//printf("%d", wordcount);
}
}
My program compiles and works exactly as specified for a problem I'm working on however i'm failing the valgrind test at the beginning of the if statement below. Valgrind Test returns "Invalid read of size 8." I'm hoping the code I provided below is enough to clarify where I'm insulting the system's memory.
if( (cha >= 0 && cha <=26) && current->children[cha] == NULL )
{
current -> children[cha] = (trie*) malloc( sizeof(trie) );
current = current -> children[cha];
}
else if ( cha >= 0 && cha <=26 )
{
current = current -> children[cha];
}
Also below is the structure of my trie node:
#define COUNT 27
typedef struct trie
{
bool is_word;
struct trie *children[COUNT];
}
trie;
//instantiation structures and variables
trie* root;
int wordcount = 0;
bool loaded;
//freetrie function prototype
void freetrie(trie* step);
Here's how I free malloc memory for the trie nodes:
void freetrie(trie* root)
{
for(int i = 0; i < 27; i++)
{
if (root -> children[i] != NULL)
{
freetrie(root -> children[i]);
}
}
free(root);
return;
}
bool unload(void)
{
// TODO
// free memory allocated by load for dictionary
trie* current = root;
freetrie(current);
return true;
}

The line if( current->children[cha] == NULL && (cha >= 0 && cha <=26) ) executes index boundary check only after accessing array, it should be rewritten to verify that index is valid before accessing array at that position. It is also a good idea to get rid of magic numbers:
#define TRIE_CHILDREN_COUNT 27
typedef struct trie
{
bool is_word;
struct trie *children[TRIE_CHILDREN_COUNT];
}
trie;
if((0 <= cha) && (cha < TRIE_CHILDREN_COUNT) && (NULL == current->children[cha]))

Related

i can't undestand why the checkcs50 are saing ":( speller compiles expected exit code 0, not 2" someone can explain?

I would be relly glad if anyone can help to find what I'm doing wrong. I alread read in other forum that maybe is because somo change in speller code.
But, only the "new" things that I add was the function and the global pointer. I can't undertand why checkcs50 are tell me;
:) dictionary.c, dictionary.h, and Makefile exist
:( speller compiles
expected exit code 0, not 2
:| handles most basic words properly
can't check until a frown turns upside down
:| handles min length (1-char) words
can't check until a frown turns upside down
:| handles max length (45-char) words
can't check until a frown turns upside down
:| handles words with apostrophes properly
can't check until a frown turns upside down
:| spell-checking is case-insensitive
can't check until a frown turns upside down
:| handles substrings properly
can't check until a frown turns upside down
:| program is free of memory errors
can't check until a frown turns upside down
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 26;
// Hash table
node *table[N];
// i created that
node *fre;
// i created that function
void unl(int i, node *tmp);
// Returns true if word is in dictionary else false
bool check(const char *word)
{
node *midle;
midle -> next = table[13];
// TODO
unsigned int w = hash(word);
node * tmp = table[w];
if (w > 12)
{
tmp = midle;
}
if (tmp == NULL)
{
return false;
}
while (tmp != NULL)
{
if (strcmp(tmp -> word, word) == 0)
{
return true;
}
else
{
tmp = tmp->next;
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
unsigned int index_hash = 0;
int c = word[0];
if ((90 - c) >= 0)
{
return (90 - c);
}
else
{
return (122 - c);
}
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE * text;
text = fopen(dictionary,"r");
if (text == NULL)
{
fclose(text);
return false;
}
char buffer[46];
while (fscanf (text, "%s",buffer) != EOF)
{
char * word = buffer;
unsigned int index_hash = hash(word);
if (table[index_hash] == NULL)
{
node * n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
table[index_hash] = malloc(sizeof(node));
if (table[index_hash] == NULL)
{
return false;
}
// talvez ter que colocar o '&' no n
table [index_hash] -> next = n;
strcpy(n -> word, word);
n -> next = NULL;
}
else
{
node * n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
int c= 0;
while (n -> next != NULL)
{
c++;
}
for (int i = 0; i < c; i++)
{
if (i == c-1)
{
strcpy(n -> word,word);
n -> next = NULL;
}
}
}
}
return true;
// TODO
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
int size_loaded = 0;
int size_loaded_sp = 0;
node * tmp = table[0];
node * tmp_midle = table[22];
if (tmp == NULL)
{
return 0;
}
if (tmp_midle == NULL)
{
return 0;
}
while (tmp != NULL && tmp_midle != NULL)
{
if (tmp != NULL)
{
size_loaded++;
tmp = tmp->next;
}
if (tmp_midle != NULL)
{
size_loaded_sp++;
tmp_midle = tmp_midle-> next;
}
}
// TODO
return (size_loaded + size_loaded_sp);
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
node *tmp;
int j = 0;
//usar esse while pra recursividade usando esse fre
// talvez eu deva por uma funçao e usar recursividade
for (int i = 0; i < 46; i++)
{
tmp = table[i];
while (tmp != NULL)
{
tmp = tmp -> next;
j++;
}
for (int k = 0; k > j; k++)
{
if (fre == NULL)
{
unl(i,tmp);
}
else
{
tmp = fre;
unl(i,tmp);
}
}
}
// TODO
return false;
}
void unl(int i, node *tmp)
{
free(tmp);
tmp = table[i];
while (tmp != NULL)
{
fre = tmp;
tmp = tmp -> next;
}
}```
Make sure you didn't change anything in speller.c and in dictionary.h. I had made this mistake, and this was the cause of that error.

Insertion into AVL tree only replaces root node

I'm currently working on an assignment where the N most frequent words in a book (.txt) must be printed. The issue that I'm currently facing is that when I add a node to one of my trees, it simply replaces the root node and thus, the tree remains as a single node.
Code snippet which adds words from the file "stopwords.txt" to a tree named stopwords:
Dict stopwords = newDict();
if (!readFile("stopwords.txt"))
{
fprintf(stderr, "Can't open stopwords\n");
exit(EXIT_FAILURE);
}
FILE *fp = fopen("stopwords.txt", "r");
while (fgets(buf, MAXLINE, fp) != NULL)
{
token = strtok(buf, "\n");
DictInsert(stopwords, buf); //the root is replaced here
}
fclose(fp);
The data structures are defined as follows:
typedef struct _DictNode *Link;
typedef struct _DictNode
{
WFreq data;
Link left;
Link right;
int height;
} DictNode;
typedef struct _DictRep *Dict;
struct _DictRep
{
Link root;
};
typedef struct _WFreq {
char *word; // word buffer (dynamically allocated)
int freq; // count of number of occurences
} WFreq;
Code to insert and rebalance tree:
// create new empty Dictionary
Dict newDict(void)
{
Dict d = malloc(sizeof(*d));
if (d == NULL)
{
fprintf(stderr, "Insufficient memory!\n");
exit(EXIT_FAILURE);
}
d->root = NULL;
return d;
}
// insert new word into Dictionary
// return pointer to the (word,freq) pair for that word
WFreq *DictInsert(Dict d, char *w)
{
d->root = doInsert(d->root, w); //the root is replaced here before doInsert runs
return DictFind(d, w);
}
static int depth(Link n)
{
if (n == NULL)
return 0;
int ldepth = depth(n->left);
int rdepth = depth(n->right);
return 1 + ((ldepth > rdepth) ? ldepth : rdepth);
}
static Link doInsert(Link n, char *w)
{
if (n == NULL)
{
return newNode(w);
}
// insert recursively
int cmp = strcmp(w, n->data.word);
if (cmp < 0)
{
n->left = doInsert(n->left, w);
}
else if (cmp > 0)
{
n->right = doInsert(n->right, w);
}
else
{ // (cmp == 0)
// if time is already in the tree,
// we can return straight away
return n;
}
// insertion done
// correct the height of the current subtree
n->height = 1 + max(height(n->left), height(n->right));
// rebalance the tree
int dL = depth(n->left);
int dR = depth(n->right);
if ((dL - dR) > 1)
{
dL = depth(n->left->left);
dR = depth(n->left->right);
if ((dL - dR) > 0)
{
n = rotateRight(n);
}
else
{
n->left = rotateLeft(n->left);
n = rotateRight(n);
}
}
else if ((dR - dL) > 1)
{
dL = depth(n->right->left);
dR = depth(n->right->right);
if ((dR - dL) > 0)
{
n = rotateLeft(n);
}
else
{
n->right = rotateRight(n->right);
n = rotateLeft(n);
}
}
return n;
}
static Link newNode(char *w)
{
Link n = malloc(sizeof(*n));
if (n == NULL)
{
fprintf(stderr, "Insufficient memory!\n");
exit(EXIT_FAILURE);
}
n->data.word = w;
n->data.freq = 1;
n->height = 1;
n->left = NULL;
n->right = NULL;
return n;
}
// Rotates the given subtree left and returns the root of the updated
// subtree.
static Link rotateLeft(Link n)
{
if (n == NULL)
return n;
if (n->right == NULL)
return n;
Link rightNode = n->right;
n->right = rightNode->left;
rightNode->left = n;
n->height = max(height(n->left), height(n->right)) + 1;
rightNode->height = max(height(rightNode->right), n->height) + 1;
return rightNode;
}
// Rotates the given subtree right and returns the root of the updated
// subtree.
static Link rotateRight(Link n)
{
if (n == NULL)
return n;
if (n->left == NULL)
return n;
Link leftNode = n->left;
n->left = leftNode->right;
leftNode->right = n;
n->height = max(height(n->left), height(n->right)) + 1;
leftNode->height = max(height(leftNode->right), n->height) + 1;
return leftNode;
}
I believe that most of the code is functional and it is simply the insertion which fails. When I attempted to debug this with gdb, I had discovered that the root node (d->root) was replaced before the recursive insert function (doInsert) was run, causing the program to always return the node n which, as a result, already exists in the tree. For example, if the text file contained the following:
a
b
c
then the program would first insert "a" as stopwords->root, then "b" would replace "a" and become the new stopwords->root, finally "c" would replace "b" as the stopwords->root, resulting in a tree with one node, "c".
There are many inconsistencies in your code.
One mistake is here:
d->root = doInsert(d->root, w);
You reassign unconditionally the root each time when you insert a new node.
You are supposed to return the new node from the function doInsert and to reassign the root only if the new node had become a new root.
But other mistake that you make is that you return from doInsert a local variable n that was not newly allocated but that was initialized to point to the previous root.
Inside doInsert you need to allocate a new node NEW and use a variable x to walk down from the root until you find a place to insert a new allocated node NEW. If x stops at root then you reinitialize the d->root = NEW.
Your function newNode just stores the passed string pointer, so what is pointed at will change when you modify the original string.
To prevent that, you should copy the input string on node insertions.
To archive that,
n->data.word = w;
should be
n->data.word = malloc(strlen(w) + 1);
if (n->data.word == NULL)
{
fprintf(stderr, "Insufficient memory!\n");
exit(EXIT_FAILURE);
}
strcpy(n->data.word, w);
Add #include <string.h> to use strlen() and strcpy() if it isn't.

CS50 Pset5 check() counting too many words as misspelled

I have loaded the dictionary into a tree structure and successfully gotten speller.c to compile with the following implementations of load() and check().
However, when I run the program, an incorrect number of words are counted as misspelled by my check() function. (In the case of lalaland.txt, it's 17187 words out of 17756).
I can't figure out what's wrong with my code and would be extremely grateful to anyone who could help point me in the right direction.
typedef struct node
{
bool isword;
struct node *children[27];
}
node;
node *root = NULL;
// Function returns the position of any given letter in the alphabet e.g. a = 1, b = 2 etc. Returns 0 for an apostrophe.
int index(char letter)
{
if (isalpha(letter))
{
int i = letter - 96;
return i;
}
return 0;
}
// Keeps track of number of words loaded into dictionary.
unsigned int wordno = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
char newword[LENGTH + 1];
node *temp = root;
for (int j = 0; j < strlen(word); j++)
{
//Makes each letter of the input lowercase and inserts it into a new array.
newword[j] = tolower(word[j]);
}
for (int i = 0; i < strlen(word); i++)
{
//Finds the position of the character in the alphabet by making a call to index().
int letter = index(newword[i]);
if (temp->children[letter] == NULL)
{
return false;
}
else
{
temp = temp->children[letter];
}
}
if (temp->isword == true)
{
return true;
}
return false;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
root = calloc(1, sizeof(node));
node *temp = root;
if (dict == NULL)
{
fprintf(stderr, "Could not load dictionary.\n");
return false;
}
char word[LENGTH+1];
while (fscanf(dict, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
int letter = index(word[i]);
if (temp->children[letter] == NULL)
{
temp->children[letter] = calloc(1, sizeof(node));
if ((temp->children[letter]) == NULL)
{
unload();
return false;
}
}
temp = temp->children[letter];
}
temp->isword = true;
wordno++;
}
return true;
}
node *temp = root;
should be placed inside this while loop:
while (fscanf(dict, "%s", word) != EOF)
By doing this, you allow temp to go back and point to the root node each time the loop begins iterating over a new word in the file.

Pset 5 Speller Trie segmentation fault in unload

I have implemented load, check, and unload for dictionary.c using trie, and I am suffering a segmentation fault. The error happens inside load function, or, to be more precise, inside the function called un_node, a recursive function I call in unload. The line if(cursor -> children[a] != NULL) pops up when I run debug50. Although I did not malloc my root/head node as a global (I just wrote node *root;), isn't the memory allocated in load function extant?
I did check whether I null pointed every children of next_node and root after mallocking them.
Regarding the un_node function, I am not sure whether I should free(cursor); return; inside the for loop or outside the for loop.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
/**
* Returns true if word is in dictionary else false.
*/
//create a trie data type
typedef struct node
{
bool is_word;
struct node *children[27]; //this is a pointer too!
}node;
//function prototype
void un_node (node *node_name);
void nullpoint (node *node_name);
//initialize root
node *root;
bool check(const char *word)
{
//create a trav pointer
node *cursor = root;
int i = 0;
while(word[i] != '\0')
{
char ch = word[i];
int index = (tolower(ch) - 97);
if(index == -58)
{
index = 26;
}
//validate index
if(index < 0 || index > 26)
{
printf("Error: index\n");
return false;
}
if(cursor -> children[index] != NULL)
{
cursor = cursor -> children[index];
i++;
}
else
{
//if it is NULL then word is not in dictionary
return false;
}
}
//end of word, so check if there is a flag from load
if(cursor -> is_word == true)
{
return true;
}
else
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//malloc space for root node
root = malloc(sizeof(node));
//all of root's children point to NULL now
nullpoint (root);
//open dictionary
FILE *dptr = fopen(dictionary, "r");
if(dptr == NULL)
{
return false;
}
char *c = malloc(sizeof(char));
node *next_node;
//scan the file char by char until end and store it in c
while(fscanf(dptr,"%s",c) != EOF)
{
//in the beginning of every word, make a traversal pointer copy of root so we can always refer back to root
node *trav = root;
//when temp increments, it moves on to next character of word
char *temp;
temp = c;
//repeat for every word
while ((*temp) != '\0')
{
//convert char into array index
int alpha = (tolower(*temp) - 97);
//handle apostrophe
if(alpha == -58)
{
alpha = 26;
}
//validate alpha
if(alpha < 0 || alpha > 26)
{
printf("Error: alpha\n");
return false;
}
//if array element is pointing to NULL, i.e. it hasn't been open yet,
if(trav -> children[alpha] == NULL)
{
//then malloc next node and point it with the cursor.
next_node = malloc(sizeof(node));
//initialize children of newly allocated node
nullpoint(next_node);
//cursor points at the newly allocated memory
trav -> children[alpha] = next_node;
//cusor moves on
trav = trav -> children[alpha];//null?
//quit if malloc returns null
if(next_node == NULL)
{
printf("Could not open dictionary");
return false;
}
}
else
{
//if an already existing path, just go to it
trav = trav -> children[alpha];
}
//increment the address of temp variable
temp++;
}
//a word is loaded.
trav -> is_word = true;
}
//success
fclose(dptr);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// TODO
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
node *cursor = root;
un_node(cursor);
return true;
}
void un_node (node *cursor)
{
for(int a = 0; a<27; a++)
{
//if the children's pointee is not NULL, i.e. this is not a dead end
if(cursor -> children[a] != NULL)
{
//go back to the beginning (with cursor -> children[a] as new argument) restart this function
un_node(cursor -> children[a]);
}
}
//and when it is dead end, start to
free(cursor);
return;
}
//function that points all children of node to NULL
void nullpoint (node *node_name)
{
for(int t=0;t<27;t++)
{
node_name -> children[t] = NULL;
}
}

Segmentation Fault in Trie implementation in C

I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.

Resources