Trouble with strcmp function in C - c

I am writing a function called check that compares the alphabetical string of a dictionary that is loaded in through the command line with a text that is also loaded in through the command line. The function is part of a larger function called speller that acts as a spell checker.
I ran several printf debugging tests to check if the words being compared in the strcmp function. The problem comes here. The function finds that all words in the text are incorrectly spelled even when the printf test shows that the strings from the dictionary and the text are the same.
Don't know where to go from this point so any help would be greatly appreciated. Thanks so much
Below is the code for the particular function. Thanks again.
typedef struct node {
char word[LENGTH + 1];
struct node *next;
} node;
node *hashtable[27];
/* Returns true if word is in dictionary else false. */
int hash_fun (const char key);
bool check (const char *word)
{
//case-desensitizing
char caseless[strlen (word)];
int i, length;
for (int head = 0; head < 26; head++) {
hashtable[head] = NULL;
}
for (i = 0, length = strlen (word); i < length; i++) {
//("%c\n",word[i]);
if (isupper (word[i])) {
caseless[i] = tolower (word[i]);
} else {
caseless[i] = word[i];
}
}
caseless[i] = '\0';
//printf("-%s %s- \n*",word, caseless);
int word_index = hash_fun (caseless);
//printf("%i", word_index);
node *new_node = malloc (sizeof (node));
if (new_node == NULL) {
return 2;
}
if (word_index >= 0) {
if (hashtable[word_index] == NULL) {
hashtable[word_index] = new_node;
new_node->next = NULL;
}
node *cursor = malloc (sizeof (node));
cursor = hashtable[word_index];
while (cursor != NULL) {
//printf("Dictionary:%s and Text:%s \n", cursor->word, caseless);
int found;
found = strcmp (caseless, cursor->word);
if (found == 0) {
return true;
}
cursor = cursor->next;
}
}
return false;
}

Related

What is wrong with my replace string with another string or character using linked list

I have a linked list with many chars which I input from my input (what is the weather today?), to be replaced with another string (for example what replaced with how, so I get how is the weather today?).
But if the given words are right next to each other for example whatwhat, it will change to howwhat, disregarding the second part.
I think the problem is in the compare function, but I have no clue how to fix it, but the logic of replace should go like this:
If the words from my list and the needed word are the same, then proceed to iterate to the position where the next node of the word that should be changed (unwanted word) should be (pretty much the end of the word), then I create a new linked list with character with the wanted word, and connect temp to the start of the list and the next of the list to the position where the next character of the word that needs to be changed (unwanted word), which I found in the first loop.
Also don't roast my input() function, I know it is unsafe I just want to see what unsafe means with my own eyes, while I still have nothing to lose.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct node {
int value_c;
struct node *next_c;
struct node *prev_c;
};
typedef struct node string;
int compare(string *head, char *word) {
int counter = 0;
string *temp = head;
for (int i = 0; i < strlen(word); i++) {
if (temp->value_c == word[i]) {
temp = temp->next_c;
counter++;
}
}
if (counter == strlen(word))
return 1;
else
return 0;
}
void print_c(string *head) {
while (head != NULL) {
printf("%c", head->value_c);
head = head->next_c;
}
}
void append_c(string **head, char thing) {
string *newNode = (string *)malloc(sizeof(string));
newNode->value_c = thing;
newNode->next_c = NULL;
if (*head == NULL) {
*head = newNode;
newNode->prev_c = NULL;
return;
}
string *temp = *head;
while (temp->next_c != NULL)
temp = temp->next_c;
temp->next_c = newNode;
newNode->prev_c = temp;
}
string *replace_all1(string *head, char *what, char *with_what) {
string *temp = head;
while (temp != NULL) {
printf("%c ", temp->value_c);
if (compare(temp, what) == 1) {
printf("%i ", 1);
printf("%c ", temp->value_c);
string *new = temp;
for (int i = 0; i < strlen(what) - 1; i++) {
new = new->next_c;
}
string *word = NULL;
for (int i = 0; i < strlen(with_what); i++) {
append_c(&word, with_what[i]);
}
string *word_temp = word;
while (word_temp->next_c != NULL) {
word_temp = word_temp->next_c;
}
word_temp->next_c = new->next_c;
if (temp->prev_c != NULL) {
temp->prev_c->next_c = word;
} else {
head = word;
print_c(head);
temp = word;
print_c(temp);
word->prev_c = NULL;
}
}
temp = temp->next_c;
}
printf("\n");
return head;
}
string *String(char *str) {
string *st = NULL;
int i = 0;
while (str[i] != '\0') {
append_c(&st, str[i]);
i++;
}
return st;
}
string *input() {
char *a = (char *)malloc(sizeof(char));
scanf("%[^\n]", a); //maximum of 1408
string *stri = String(a);
return stri;
free(a);
}
int main() {
string *list = NULL;
string *big_boy_string = input();
//printf("%c", big_boy_string->value_c);
//print_c(big_boy_string);
//printf("\n");
//printf("%i", compare(big_boy_string, "what"));
//printf("%i ", len(big_boy_string));
//printf("\n");
//print_c(slice(big_boy_string, 1, 10));
//print_c(replace(big_boy_string, 'h', 'a'));
//printf("\n");
//print_c(reverse(big_boy_string));
print_c(replace_all1(big_boy_string, "a", "b"));
//getline();
}
char *a = (char*) malloc(sizeof(char));
scanf("%[^\n]",a); //maximum of 1408
The first statement allocates memory for just 1 byte. So the maximum is not 1408, but 1. It can store a single char, or the null-terminator if it's a string, but no more.
Next, scanf() will write to out of bounds memory, and invoke undefined behaviour. The subsequent functions all depend on this undefined behaviour, so I'm not going to look at them.
But then, you've a memory leak in the same function.
return stri;
free(a);
You return before freeing the allocated memory. The call to free() is never executed.
The return value of malloc() is also ignored. Code risks undefined behaviour if the subsequent dereferences are on a NULL pointer.
Aside: The cast is meaningless and may hide a bug. malloc() and family returns a void * that is implicitly converted to the right type.
Re: Also don't roast my input() function, I know its unsafe I just
want to see what unsafe means with my own eyes.
If you are already aware of this, then you shouldn't be asking why your code doesn't work. You are relying on undefined behaviour (playing with fire).
There is no need to look further than the input function: it has undefined behavior or the worst kind because you attempt to read the input string into a very small array, allocated for a single byte. You must fix this first. Since you know the maximum length of your input string, you can use this:
string *input(void) {
char a[1409];
if (scanf("%1408[^\n]", a) != 1) { //maximum of 1408
// invalid or missing input
return NULL;
}
scanf(%*[^\n]"); // consume any remaining characters on the input line
scanf(%*1[\n]"); // consume the newline if present
return String(a);
}
Here is an alternative using getchar() instead of scanf() which is quite tricky and error prone:
string *input(void) {
char a[1409];
int c;
size_t i = 0;
while ((c = getchar()) != EOF && c != '\n') {
if (i + 1 < sizeof(a))
a[i++] = (char)c;
}
if (c == EOF && i == 0) {
/* end of file without any input */
return NULL;
}
a[i] = '\0';
return String(a);
}
The compare function is incorrect: it should return false as soon as the comparison fails and it must test for the end of string (temp == NULL):
int compare(const string *head, const char *word) {
string *temp = head;
for (size_t i = 0; word[i] != '\0'; i++) {
if (temp == NULL || temp->value_c != word[i])
return 0;
temp = temp->next_c;
}
return 1;
}
The replace_all1() function has problems too:
for (int i = 0; i < strlen(what) - 1; i++) will cause undefined behavior if what is an empty string because strlen(what) - 1 is unsigned with the value SIZE_MAX in this case, causing the loop to proceed for a very long time, well beyond the end of the list pointed to by new.
while (word_temp->next_c != NULL) will cause a undefined behavior if the replaced word is empty as word_temp will be NULL.
once you replace the sublist, you do not update temp correctly to point to the node after the replaced one, which you could achieve by setting temp to word_temp.
the function does not free the replaced sublist.
Here is a modified version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct node {
int value_c;
struct node *next_c;
struct node *prev_c;
};
typedef struct node string;
void string_append_char(string **head, int c) {
string *node = malloc(sizeof(*node));
if (node == NULL) {
fprintf(stderr, "out of memory\n");
exit(1);
}
node->value_c = c;
node->next_c = NULL;
if (*head == NULL) {
node->prev_c = NULL;
*head = node;
} else {
string *temp = *head;
while (temp->next_c != NULL)
temp = temp->next_c;
node->prev_c = temp;
temp->next_c = node;
}
}
string *string_new(const char *str) {
string *st = NULL;
for (int i = 0; str[i] != '\0'; i++) {
string_append_char(&st, str[i]);
}
return st;
}
string *string_input(const char *prompt) {
string *st = NULL;
int c;
if (prompt) {
printf("%s", prompt);
}
while ((c = getchar()) != EOF && c != '\n') {
string_append_char(&st, c);
}
return st;
}
void string_print(const char *before, const string *head, const char *after) {
printf("%s", before);
while (head != NULL) {
putchar(head->value_c);
head = head->next_c;
}
printf("%s", after);
}
void string_free(string *head) {
while (head != NULL) {
string *next = head->next_c;
free(head);
head = next;
}
}
int string_compare(const string *head, const char *word) {
const string *temp = head;
for (size_t i = 0; word[i] != '\0'; i++) {
if (temp == NULL || temp->value_c != word[i])
return 0;
temp = temp->next_c;
}
return 1;
}
int string_replace(string **head, const char *what, const char *with_what) {
int count = 0;
if (*what == '\0')
return 0;
string *temp = *head;
while (temp != NULL) {
if (string_compare(temp, what)) {
count++;
// locate the last node of the substring
string *temp_end = temp;
for (size_t i = 0; what[i + 1] != '\0'; i++) {
temp_end = temp_end->next_c;
}
string *next = temp_end->next_c;
if (*with_what == '\0') {
// just delete the substring
if (temp->prev_c != NULL) {
temp->prev_c->next_c = next;
} else {
*head = next;
}
if (next) {
next->prev_c = temp->prev_c;
}
} else {
// create a string from the replacement
string *word = string_new(with_what);
// locate the last node of the new substring
string *word_end = word;
while (word_end->next_c != NULL) {
word_end = word_end->next_c;
}
word->prev_c = temp->prev_c;
if (temp->prev_c != NULL) {
temp->prev_c->next_c = word;
} else {
*head = word;
}
word_end->next_c = next;
if (next) {
next->prev_c = word_end;
}
}
temp_end->next_c = NULL;
string_free(temp);
temp = next;
} else {
temp = temp->next_c;
}
}
return count;
}
int main() {
string *list = string_input("enter string: ");
string_print("input: ", list, "\n");
printf("replacing 'what' to 'how': %d matches\n", string_replace(&list, "what", "how"));
string_print("rep1: ", list, "\n");
printf("replacing 'a' to 'b': %d matches\n", string_replace(&list, "a", "b"));
string_print("rep2: ", list, "\n");
printf("deleting 'h': %d matches\n", string_replace(&list, "h", ""));
string_print("rep3: ", list, "\n");
string_free(list);
return 0;
}
Sample session:
enter string: what is the weather today?
input: what is the weather today?
replacing 'what' to 'how': 1 matches
rep1: how is the weather today?
replacing 'a' to 'b': 2 matches
rep2: how is the webther todby?
deleting 'h': 3 matches
rep3: ow is te webter todby?

CS50 Pset5 Speller, Check50 says that my code is all correct, but its not right when I test my code

I have been working on the Speller assignment in pset5, and when I run the check50 command, everything seems to be fine.
:) dictionary.c exists
:) speller compiles
:) handles most basic words properly
:) handles min length (1-char) words
:) handles max length (45-char) words
:) handles words with apostrophes properly
:) spell-checking is case-insensitive
:) handles substrings properly
:) program is free of memory errors
But when I start testing the program myself by running "./speller texts/lalaland.txt " This happens
ps5/speller/ $ ./speller texts/lalaland.txt
Segmentation fault (core dumped)
Here is my code, code anyone help spot what is wrong with this.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 676;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
int number = hash(word);
node *current = table[number];
while (current != NULL)
{
if (strcasecmp(word, current -> word) == 0)
{
return true;
}
current = current -> next;
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
int a = toupper(word[0]) - 'A';
int b = toupper(word[1]) - 'A';
return (a * b);
}
int counter;
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
//Open the file and check if it actually exists
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("Unable to open file\n");
return false;
}
for (int i = 0; i < N; i++)
{
table [i] = malloc(sizeof(node));
table [i]-> next = NULL;
for (int u = 0; u < 48 ; u++)
{
table [i]-> word[u] = '0';
}
}
char buffer[LENGTH + 1];
//Read all the individual strings from the file
while (fscanf(file, "%s", buffer) != EOF)
{
//Create nodes and copy the words into them
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
strcpy(current_node -> word, buffer);
int hashnumber = hash(buffer);
node *pointer = table[hashnumber];
if (pointer == NULL)
{
table[hashnumber] = current_node;
counter++;
}
else
{
current_node -> next = table[hashnumber];
table[hashnumber] = current_node;
counter++;
}
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return counter;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
for (int i = 0; i < 676; i++)
{
node *current = table[i];
while (current != NULL)
{
node *temp = current;
current = current -> next;
free(temp);
}
}
return true;
}
Try this:
node *current_node = malloc(sizeof(node));
if (current_node == NULL)
{
return false;
}
current_node->next = NULL; // Missing, and presumed NULL..
strcpy(current_node->word, buffer);
It seemed like once I initialized the variable as NULL and made changes to the hash function. I checked through the other files and it worked now. Thanks!

Using Trie to Store Word Counts

Goal is to read a web page, store all words in a trie with each node containing one letter and a count of the number of characters, print the words and number of occurrences. I keep getting a segmentation fault and I think the issue is in one of these functions. Thanks!
struct trieNode *indexPage(const char *url) {
if (url == NULL) {
return NULL;
printf("Web link must be provided.");
}
//get text from page and check return value
char *page = NULL;
int bytesRead = getText(url, page, MAX_BUFFER_SIZE);
if (page == NULL) {
printf("Page could not be indexed.");
return NULL;
}
//index buffer into separate words
int i = 0;
char *word = NULL;
struct trieNode *node = malloc(sizeof(struct trieNode));
if (node == NULL) {
printf("Node memory could not be allocated.");
return NULL;
}
while (i < bytesRead) {
while (isalpha(page[i])) {
word[i] = page[i];
}
addWordOccurrence(word, sizeof(word), i);
i++;
}
return node;
}
//Create space for node in heap and add to trie structure
int addWordOccurrence(const char* word, const int wordLength, int index) {
if (word == NULL)
return -1;
//allocate memory for new node
struct trieNode *node = malloc(sizeof(struct trieNode));
if (node == NULL) {
printf("Node memory could not be allocated.");
return -2;
}
//recursively add characters to trie and
//increase count
if (index < wordLength) {
setNodeData(node->child[index], word[index]);
node->count++;
}
addWordOccurrence(word, wordLength, index + 1);
return 0;
}
Using gdb I found the fault may be coming from the print function, possibly when trying to access pointers.
//Prints contents
void printTrieContents(struct trieNode *root) {
//if child is found with a non zero count
//add child character to string
char *word = NULL;
for (int i = 0; i < SIZE; i++) {
if ((root->count) && (root->child[i])) {
word[i] = i + 'a';
printTrieContents(root->child[i]);
}
}
if (root->child == NULL) {
printf("%s: %d", word, root->count);
}
}
There are multiple issues:
in indexPage, while (isalpha(page[i])) { word[i] = page[i]; } is potentially an infinite loop.
in printTrieContents, word[i] = i + 'a' dereferences a null pointer as word is never allocated.
addWordOccurrence always recurses, even after reaching the last character. There is no need for recursion, use a loop and a proper test.
more algorithmic issues: the code needs a lot a work.
superficially, it looks like addWordOccurrence(word, sizeof(word), i); should be addWordOccurrence(word, sizeof(word), 0); - the last parameter being the index of each letter that is handled in the recursion.

CS50 Pset5 check() counting too many words as misspelled

I have loaded the dictionary into a tree structure and successfully gotten speller.c to compile with the following implementations of load() and check().
However, when I run the program, an incorrect number of words are counted as misspelled by my check() function. (In the case of lalaland.txt, it's 17187 words out of 17756).
I can't figure out what's wrong with my code and would be extremely grateful to anyone who could help point me in the right direction.
typedef struct node
{
bool isword;
struct node *children[27];
}
node;
node *root = NULL;
// Function returns the position of any given letter in the alphabet e.g. a = 1, b = 2 etc. Returns 0 for an apostrophe.
int index(char letter)
{
if (isalpha(letter))
{
int i = letter - 96;
return i;
}
return 0;
}
// Keeps track of number of words loaded into dictionary.
unsigned int wordno = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
char newword[LENGTH + 1];
node *temp = root;
for (int j = 0; j < strlen(word); j++)
{
//Makes each letter of the input lowercase and inserts it into a new array.
newword[j] = tolower(word[j]);
}
for (int i = 0; i < strlen(word); i++)
{
//Finds the position of the character in the alphabet by making a call to index().
int letter = index(newword[i]);
if (temp->children[letter] == NULL)
{
return false;
}
else
{
temp = temp->children[letter];
}
}
if (temp->isword == true)
{
return true;
}
return false;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
root = calloc(1, sizeof(node));
node *temp = root;
if (dict == NULL)
{
fprintf(stderr, "Could not load dictionary.\n");
return false;
}
char word[LENGTH+1];
while (fscanf(dict, "%s", word) != EOF)
{
for (int i = 0; i < strlen(word); i++)
{
int letter = index(word[i]);
if (temp->children[letter] == NULL)
{
temp->children[letter] = calloc(1, sizeof(node));
if ((temp->children[letter]) == NULL)
{
unload();
return false;
}
}
temp = temp->children[letter];
}
temp->isword = true;
wordno++;
}
return true;
}
node *temp = root;
should be placed inside this while loop:
while (fscanf(dict, "%s", word) != EOF)
By doing this, you allow temp to go back and point to the root node each time the loop begins iterating over a new word in the file.

I have a segmentation fault and am unsure about what is wrong with my code

Any guidance would be appreciated. I personally believe the problem lies in the load method. Also, the basic functionality of each method is written in the comments. What could be the cause of my segmentation fault? and Is everything working as intended? Thank you for your time.
Any resources that may point in me in the proper direction would be appreciated too.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include "dictionary.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <cs50.h>
//Defining node:
typedef struct node
{ //Inner workings of each "element" in the linked lists
char word[LENGTH + 1]; //the word within the node is +1'd due to the memory after the word containing /0
struct node *next; //linked list
}node;
node *alphabetList[27]; //26 buckets that can contain variables of type node(of dynamic size)
//one bucket for each letter of the alphabet
node *cursor = NULL;
node *head = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int bucketIndex ;
//no need to malloc information b/c we are simply pointing to previously established nodes.
if(word[0] >= 65 && word[0] < 97){
bucketIndex = word[0] - 65;
}
else{
bucketIndex = word[0] - 97;
}
node *head = alphabetList[bucketIndex];
node *cursor = head;
while(cursor != NULL)
{
cursor = cursor -> next;
if(strcmp(cursor -> word, word) != 0)
{
return true;
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
{
//for every word we scan we want to malloc a node to ascertain we have sufficent memory
node *new_node = malloc(sizeof(node));
if(new_node == NULL) //error check(if you run out of memory malloc will return null)
{
unload();
return false;
}
//error check complete.
else{
strcpy(new_node -> word, word);
}
//not sure from here on
char first_letter = new_node[i].word[0]; //first letter of node word (confused on how to execute this properly)
first_letter = tolower(first_letter);
int index = first_letter - 97;
if(word){
for(node *ptr = alphabetList[index]; ptr!= NULL; ptr = ptr->next)
{
if(!ptr-> next){
ptr->next = new_node;
}
}
}
else
{
alphabetList[index] = new_node;
}
i++;
}
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return 0;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
for(int i = 0; i <= 26; i++)
{
node *head = alphabetList[i];
node *cursor = head;
while(cursor != NULL)
{
node *temp = cursor;
cursor = cursor -> next;
free(temp);
}
}
return true;
}
The problem is obvious now you've said on which line the code crashes. Consider these lines...
char *word = NULL;
int i = 0; //index
FILE *dictionaryTextFile;
dictionaryTextFile = fopen(dictionary, "r");
//scan for word
while(fscanf(dictionaryTextFile, "%s", word) != EOF)
You've got 2 problems there. Firstly, you don't check that the call to fopen worked. You should always check that the value returned is not NULL.
Secondly, and the cause of the crash, is that word is still NULL - you don't allocate any space to hold a string in it. You might as well declare it the same as you declare it inside node so replace
char *word = NULL;
with
char word[LENGTH+1];
Speaking of node and to save you coming back with another crash later, you should always make sure you initialise all attributes of a struct. In this case new_node->next should be set to NULL as otherwise you'll come to check it later in your for loop (which looks fine BTW) and it might appear to point to a node, but it's pointing at some random place in memory and the code will crash.

Resources