I am going through the cs50x course, doing speller check program. In my fourth implementation of this program I've ran into malloc problem.
This time I decided to implement a binary tree.
I've read a lot of threads about this problem and checked my code for several times, but I still can't understand what I'm doing wrong.
Problem appears in the recursive function that loads dictionary into ram.
#include <stdbool.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
// standart node of the trie
typedef struct node
{
char word[LENGTH + 1];
struct node* less;
struct node* more;
}
node;
// Function definitions
void unload_node(node* pr_node);
void ld_bin_tree(int min, int max, node* node);
bool check_word(char* lword, node* parent);
// Global variables
// root of the tree
node* root;
FILE* dict;
//size of dictionary
int dict_size = 0;
bool load(const char* dictionary)
{
// open dictionary file
dict = fopen(dictionary, "r");
int nwords = 0;
int min = 0;
int max = 0;
root = malloc(sizeof(node));
//if file wasn't open
if(dict == NULL)
{
printf("Error opening ditionary file!");
return false;
}
// tmp storage for read word
char buffer[LENGTH + 1];
// count words in the dictionary
while(fscanf(dict, "%s", buffer) > 0)
{
nwords++;
}
max = nwords;
rewind(dict);
ld_bin_tree(min, max, root);
// close file
fclose(dict);
return false;
}
/*
* Recursion function to fill in binary tree
*/
void ld_bin_tree(int min, int max, node* node)
{
// tmp word holder
char buffer[LENGTH + 1];
// next mid value
int mid = (min + max) / 2;
// if mid == 0 then the bottom of the brunch reached, so return
if(max - min < 2)
{
if(min == 0)
{
fscanf(dict, "%s", node->word);
dict_size++;
return;
}
return;
}
// go through the dict to the mid string
for(int i = 0; i <= mid; i++)
{
fscanf(dict, "%s", buffer);
}
// fill in word
strcpy(node->word, buffer);
// go at the beginning of the dict
rewind(dict);
// fill in input node
// fill in new children nodes
struct node* new_node = malloc(sizeof(node));
node->less = new_node;
// send lesser side
ld_bin_tree(min, mid, node->less);
new_node = malloc(sizeof(node));
node->more = new_node;
// send greater side
ld_bin_tree(mid, max, node->more);
dict_size++;
return;
}
I've tried to get this error using valgrind but it gives me a lot of warnings about reading and writing in unappropriated memory blocks. But because I'm not very good with programming yet, this warnings didn't give me a clue of what's happening.
So I'm asking for more precise help, if it's possible. Thank you in advance.
Other parts of the speller program may be found here:
https://www.dropbox.com/sh/m1q1ui2g490fls7/AACnVhjjdFpv1J0mUUhY2uV2a?dl=0
In function ld_bin_tree() you have
struct node* new_node = malloc(sizeof(node));
Here node is a pointer not an object of type struct node.
You have
node *node;
So the global definition of node is being overwritten which makes it a pointer.
So you are not allocating memory for your whole structure. You should have
struct node* new_node = malloc(sizeof(struct node));
Related
I have been working on this problem set for quite a time and the code seems to be wrong but I couldn't find the solution. I have been comparing my code and other people's code but I still don't know where I got wrong. Really appreciate all your help if you can provide me with some ways to solve this problem. It keeps prompting me free(): double free detected in tcache 2 but I can't seem to find my mistake.
// Implements a dictionary's functionality
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 50;
// Hash table
node *table[N];
//word count
int count = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
bool found = false;
node *current = table[hash(word)];
while (current != NULL)
{
if (strcasecmp(current -> word, word) == 0)
{
found = true;
}
else if(current -> next != NULL)
{
current = current -> next;
}
else
{
return false;
}
}
return found;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO
unsigned long hash = 5381;
int c;
while ((c = toupper(*word++)))
{
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
FILE *infile = fopen(dictionary, "r");
if (infile == NULL)
{
return false;
}
char buffer[LENGTH+1];
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
fclose(infile);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return count;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
int num = count;
for (int i = 0; i < N ; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
num--;
}
}
if (num == 0)
{
return true;
}
else
{
return false;
}
}
The calls of free in this while loop
while (fscanf(infile, "%s", buffer) != EOF)
{
node *n = malloc(sizeof(node));
strcpy(n -> word, buffer);
n -> next = table[hash(buffer)];
table[hash(buffer)] = n;
count++;
free(n);
}
does not make a sense. You deleted at once (an object of the type node using the pointer n) what you was trying to add to the table (a valid address to an allocated object of the type node). As a result the element of the table at the position hash(buffer) that is set like
table[hash(buffer)] = n;
has an invalid value because it is the address of the already deleted node in this statement
free(n);
So in the function unload this invalid address will be again used to free already freed memory within the function load.
Pay attention to that you did not allocate memory as you wrote in a comment "for node n". n is just a pointer to the allocated unnamed object of the type node. So you are not freeing the pointer n itself in this statement
free(n);
You are freeing the allocated object of the type node using the pointer n. Thus all pointers that pointed to the allocated object of the type node become invalid.
I'm adding words (character per node) on a trie data structure - that happens correctly based on a implementantion I found on the web -
http://www.techiedelight.com/trie-implementation-insert-search-delete/
Although I want to extend this and add a list containing some data based on the words, such term frequency etc.
Right now I'm facing an issue with the pointer of the list when adding the first element on a trie node - in the method append_posting_list - and getting a segmetation fault.
Here is the code so far.
main.h
#ifndef TRIE_H
#define TRIE_H
#define CHAR_SIZE 26
typedef struct posting_list {
int doc_id;
int tf;
int df;
struct posting_list *next;
} posting_list_node ;
struct Trie
{
posting_list_node *p_node; // this will be the head of the posting list for every word;
int isLeaf; // 1 when node is a leaf node
struct Trie* character[CHAR_SIZE];
};
struct Trie* getNewTrieNode();
void insert(struct Trie* *head, char* str, int doc_id);
int search(struct Trie* head, char* str);
#endif //TRIE_H
main.c
#include <stdio.h>
#include <stdlib.h>
#include "main.h"
int main(){
struct Trie* head = getNewTrieNode();
insert(&head, "hello", 1);
return 0;
}
// Function that returns a new Trie node
struct Trie* getNewTrieNode()
{
struct Trie* node = (struct Trie*)malloc(sizeof(struct Trie));
node->isLeaf = 0;
for (int i = 0; i < CHAR_SIZE; i++)
node->character[i] = NULL;
return node;
}
posting_list_node* get_mem(){
posting_list_node* p;
p = (posting_list_node *)malloc(sizeof(posting_list_node));
if (p == NULL){
printf("Memory allocation failed\n");
exit(EXIT_FAILURE);
}
return p;
}
void append_posting_list(int doc_id, posting_list_node **n){
posting_list_node *new, *q;
new = get_mem();
new->doc_id = doc_id;
new->tf = 1;
new->next = NULL;
// if new is the first element of the list
if(n == NULL) {
*n = new;
} else {
q = *n;
while( q->next!=NULL) {
q = q->next;
}
q->next = new;
}
}
// Iterative function to insert a string in Trie.
void insert(struct Trie* *head, char* str, int doc_id)
{
// start from root node
struct Trie* curr = *head;
while (*str)
{
// create a new node if path doesn't exists
if (curr->character[*str - 'a'] == NULL)
curr->character[*str - 'a'] = getNewTrieNode();
// go to next node
curr = curr->character[*str - 'a'];
// move to next character
str++;
}
// already found this word, increase frequency
if(curr->isLeaf) {
curr->p_node->tf += 1;
} else {
append_posting_list(doc_id, curr->p_node);
// mark current node as leaf
curr->isLeaf = 1;
}
}
// Iterative function to search a string in Trie. It returns 1
// if the string is found in the Trie, else it returns 0
int search(struct Trie* head, char* str)
{
// return 0 if Trie is empty
if (head == NULL)
return 0;
struct Trie* curr = head;
while (*str)
{
// go to next node
curr = curr->character[*str - 'a'];
// if string is invalid (reached end of path in Trie)
if (curr == NULL)
return 0;
// move to next character
str++;
}
// if current node is a leaf and we have reached the
// end of the string, return 1
return curr->isLeaf;
}
I'm really stuck here.
Any suggestions would be really appreciated.
I found a couple things that when fixed, got rid of your segmentation fault.
In getNewTrieNode() I think you need to set p_node to NULL
struct Trie* getNewTrieNode() {
struct Trie* node = (struct Trie*)malloc(sizeof(struct Trie));
node->isLeaf = 0;
for (int i = 0; i < CHAR_SIZE; i++)
node->character[i] = NULL;
node->p_node = NULL;
return node;
}
append_posting_list() takes post_list_node **, but in insert(), you are passing just post_list_node *
void append_posting_list(int doc_id, posting_list_node **n)
append_posting_list(doc_id, curr->p_node);
looks like it should be
append_posting_list(doc_id, &(curr->p_node));
In append_posting_list()
if (n == NULL) {
should be
if (*n == NULL) {
in order to see if a pointer to an empty list is being passed in.
You should really have some functions to print out your data structure while you are working on it, so you can test each piece as you develop it. Simply compiling and running code and not getting any errors is no gurantee the code is working correctly with complex data structures like this. Making sure that each piece works perfectly before going on to the next piece will save you hours in trying to track down segmentation faults and other errors like this.
Im trying to write a program that reads each word inputted by user and then sticks that word into a linked list. This is what I have tried so far but got seg faults but not too sure where I went wrong with mallocing/pointers. (Havent implemented printList yet).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LEN 20
typedef struct node{
char *word;
struct node *next;
}node_t;
node_t *read(node_t *node);
void printList(node_t *node);
node_t *insertNode(char *word, node_t *node, int size);
int main(int argc, char *argv[]) {
node_t *start = NULL;
printf("Enter a sentence:\n");
read(start);
return 0;
}
void *read(node_t *node){
int i, size = MAX_LEN;
char c, *word;
if(!(word=malloc(size))){
printf("Out of memory!\n");
exit(EXIT_FAILURE);
}
while((c=getchar())!='\n'){
for(i=0;c!=' ';i++){
word[i]=c;
if(i>size){
size=size*2;
if(!realloc(word, size)){
printf("Out of memory\n");
exit(EXIT_FAILURE);
}
}
}
node = insertNode(word,node,size);
}
return node;
}
node_t *insertNode(char *word, node_t *node, int size){
node_t *new_node, *current;
new_node = (node_t*)malloc(sizeof(node_t));
new_node->next = NULL;
if(!(new_node->word = malloc(size))){
printf("Out of memory\n");
exit(EXIT_FAILURE);
}
strcpy(new_node->word,word);
if (node == NULL){
node = new_node;
current = new_node;
}
else{
current->next = new_node;
current = new_node;
}
return node;
}
There are several issues:
Your prototype and the implementation of read don't match; make both return a node_t *.
You have two nested loops for input, one reading from stdinand another one cycling through the characters. The inner loop never updated its condition, because c can only be changed by the outer loop. There should be just one loop, which takes care of reading from the stream and writing to the string.
You don't keep tzhe result of realloc, which means that you don't reflect updates when the handle to the allocated memory changes. In these cases, you will access the old handle, which has become invalid.
You don't terminate your string with a null character.
You should reallocate before you access memory out of bounds. That usually means to check whether to enlarge the array before writing to it. Note that for an array of length n, n itself is already an illegal index.
The result of getchar should be an int, ot a char so that all valid input is distinct from EOF, for which you don't check.
Therer are probably more issues, the ones listed are the ones concerned with read. I haven't looked into the linked list insertion.
In order to properly terminate the string with a zero, I recommend to write an infinite loop and postpone the break condition after possible reallocation. Foe example:
node_t *read(node_t *node)
{
int size = MAX_LEN;
int i = 0;
char *word = malloc(size);
if(word == NULL) {
printf("Out of memory!\n");
exit(EXIT_FAILURE);
}
while (1) {
int c = getchar();
if(i >= size) {
size = size*2;
word = realloc(word, size);
if (word == NULL) {
printf("Out of memory\n");
exit(EXIT_FAILURE);
}
}
if (c == '\n' || c == EOF) {
word[i] = '\0';
break;
}
word[i++] = c;
}
node = insertNode(word, node, size);
return node;
}
I think the error is caused by the line
return node;
in insertNode. That should be
return new_node;
I am creating a program that will read a word from a text file in main.c, and send it over to list.c file to create a new node to store that word. The node will also store three ints: first (number of times this word appears in txt file 1), second(number of times this word appears in txt file 2), and dif (abs(first-second)). After adding all the new words to the file and counting the number of times each word exists in each txt file, the main.c will call a method that will calculate the difference between first and second for each node. This is difference (stored in dif for each node) will be used to sort the linked nodes in decreasing order.
EX. word: the, first: 2888, second: 2466, dif: 422.
red, 39 12 27
.....
However, when main calls the sort method, a infinite loop occurs. This infinite loop comes from the inner loop of the sorting algorithm, where the current node is assigned the node from the curr->next pointer. Somewhere during the sort method, the current node's next pointer points to the current node, not the actual next node in the linkedlist. If the sort method is dactivated, then all other functions work fine, including printAll which goes through the entire list and prints the data in each node (see my example above).
My issue is that I cannot find where in my sort method how current->next started to point to the current node. Any help is appreciated!
/*
* list.h
*/
#ifndef LIST_H_
#define LIST_H_
typedef struct node Node;
void findWord(char *word, int book);
void addWord(char *word, int book);
void editWord(Node **endPtr, int book);
void sort();
void swap(Node **a, Node **b);
void calculateDiff();
void printAll();
#endif /* LIST_H_ */
/*
* list.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"
typedef struct node{
int first;
int second;
int dif;
char name[20];
struct node *next;
}Node;
Node *front = NULL;
/*
* Sees if the current word exists in the
* linkedlist.
*/
void findWord(char *word, int book) {
Node *curr = front;
int boolean = 0;
while (curr != NULL) {
if(strcmp(curr->name, word) == 0) {
boolean = 1;
editWord(&curr, book);
break;
}
curr = curr->next;
}
if(!boolean) { //Add word if it does not exist.
addWord(word, book);
}
}
/*
* Creates a new node for the added word. Adds to front.
*/
void addWord(char *word, int book) {
Node *newNode = malloc (sizeof(Node));
/*
* Since this word is being added
* to the linkedlist with a newly
* created node, either the
* first or second int must be to 1
* while the other is set to 0. Based
* off of book int.
*/
if(book == 1) {
newNode->first = 1;
newNode->second = 0;
} else {
newNode->first = 0;
newNode->second = 1;
}
newNode->dif = 0;
strcpy(newNode->name, word);
newNode->next = front;
front = newNode;
}
/*
* Edits the data for an existing word.
* Only called if current word exists in
* the linkedlist.
*/
void editWord(Node **endPtr, int book) {
if (book == 1) {
(*endPtr)->first++;
} else {
(*endPtr)->second++;
}
}
/*
* Sorts the list in descending order based on
* difference value.
*/
void sort() {
Node *curr, *last = NULL;
curr = front;
while (curr != last) {
while (curr->next != last) {
if(curr->dif < curr->next->dif ) {
swap(&curr, &curr->next);
}
curr = curr->next;
}
last = curr;
curr = front;
}
}
/*
* Swaps the data in the current and next node in the list.
*/
void swap(Node **a, Node **b) {
int temp;
char nameTemp[20];
//Swap first
temp = (*a)->first;
(*a)->first = (*b)->first;
(*b)->first = temp;
//Swap second
temp = (*a)->second;
(*a)->second = (*b)->second;
(*b)->second = temp;
//Swap dif
temp = (*a)->dif;
(*a)->dif = (*b)->dif;
(*b)->dif = temp;
//Swap name
strcpy(nameTemp, (*a)->name);
strcpy((*a)->name, (*b)->name);
strcpy((*b)->name, nameTemp);
}
/*
* Calculates the difference between first and second
*/
void calculateDiff() {
Node *curr = front;
while(curr != NULL) {
curr->dif = abs((curr->first - curr->second));
curr = curr->next;
}
}
/*
* Prints all the data from the nodes.
*/
void printAll() {
printf("|| Word || RedBadge || LittleRegiment || Diff\n");
Node *curr = front;
while ( curr != NULL ) {
printf("%s, %d, %d, %d\n", curr->name, curr->first, curr->second, curr->dif);
curr = curr->next;
}
}
/*
* main.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "list.h"
void readBook(int book, FILE *infile);
void readLine(char *line, int book);
int main (void) {
setvbuf(stdout, NULL, _IONBF,0);
FILE *infile = fopen("RedBadge.txt", "r");
FILE *infile2 = fopen("LittleRegiment.txt", "r");
readBook(1, infile);
readBook(2, infile2);
fclose(infile);
fclose(infile2);
calculateDiff();
sort();
printAll();
return 0;
}
void readBook(int book, FILE *infile) {
char line[70];
//Read in each line
while (!feof(infile)) {
fgets(line, 70, infile);
readLine(line, book);
}
}
void readLine(char *line, int book) {
int i = 0, j = 0;
char word[20];
while (line[i]) {
line[i] = tolower(line[i]); //Convert line to lowercase
if((line[i] <= 'z' && line[i] >= 'a') || line[i] == 39 || line[i] == '-') {
word[j] = line[i];
j++;
} else if (j != 0) {
word[j] = '\0';
findWord(word, book);
j = 0;
}
i++;
}
}
I believe your error is actually a buffer overflow. There are words in those books that are longer than 19 characters (the max that will fit in your word variable). When your readline function tries to read those words it will write outside the boundaries of the word array, which is undefined behavior. It will then also use strcpy to copy the word into the node, which will also overflow the node's word array.
A quick fix is to just throw away the extra characters past 19 that won't fit in your word array. In readline add a test for how big j is:
if (j < sizeof word - 1) {
word[j] = line[i];
j++;
}
One of the words in question is "ain't--plundering----" (at least in the copy of the text i downloaded), which leads me to think maybe you also should split words on punctuation.
I coded a simple source. It contains a queue and some of the function a queue needs but for some reason malloc() only works once.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define QUEUE sizeof(Queue)
Definition of the Node, which is an element of the list, and the queue.
typedef struct node {
char * value;
struct node * next;
} Node;
typedef struct queue {
Node * head;
Node * tail;
} Queue;
int initialization(void ** list, int type){
int code = -1;
//create an empty list.
//if queue dynamically allocate memory and assign NULL to both properties head and tail.
return code;
}
enqueue() add one element in the queue at a time. but for some reason it can only add one element then the program crashes.
int enqueue(Queue * q, char * instruction){
int code = -1;
if(q != NULL){
printf("Prepare to enqueue!\n");
Node * n = NULL;
n = (Node*)malloc(sizeof(Node));
if(n != NULL){
printf("Node created!\n");
strcpy(n->value, instruction);
n->next = NULL;
//if first value
if(q->head == NULL){
q->head = n;
q->tail = n;
printf("Enqueue first Node\n");
}
else {
q->tail->next = n;
q->tail = n;
printf("Enqueue another Node\n");
}
code = 0;
printf("Node \"%s\" Enqueued\n", instruction);
}
}
return code;
}
int dequeue(Queue * q){
int code = -1;
//dequeuing code here.
return code;
}
int isEmpty(void * list, int type){
int code = 0;
//check if the list is empty
return code;
}
the for loop in the main() function never reaches 3
int main(int argc, char * argv[]){
Queue * queue = NULL;
initialization((void*)&queue, QUEUE);
int i = 0;
for(i = 0; i < 3; i++){
if(enqueue(queue, "some value") != 0){
printf("couldn't add more Node\n");
break;
}
}
while(!isEmpty(queue, QUEUE)){
dequeue(queue);
}
return 0;
}
The initialization function is written this way because it should also be able to initialize stacks (I removed the stack code to reduce the source but even without it the bug persist). I also put printfs to debug the code. And I have more than enough memory to make this simple code run how it should.
Thanks in Advance!
Running this, I crash with a segmentation fault, as I'd expect:
n = (Node*)malloc(sizeof(Node));
n is allocated, it's contents uninitialized and effectively random
if(n != NULL){
n is not NULL, so...
strcpy(n->value, instruction);
And we crash.
See the problem? n->value is a pointer to nowhere. Or, to somewhere, but nowhere known. Nowhere good. And we're just dumping a string into that space.
Either change the Node struct so that value is a char [SOME_SIZE], or use strdup() instead of strcpy(), to actually allocate some memory for the poor thing.
n->value = strdup(instruction);