Spell-checker in C - c

I've been trying to implement a spell-checker using a large dictionary against some text file which contains around 2000 words. However, my spell-checker returns all words as being misspelled. I honestly have no idea why — could someone help me?
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
#define lenght 45
#define hashtable_size 65536
char word[lenght+1];
int count = 0;
/*
*
* Hash function. Thanks to Brenda from cs50 reddit.
*/
int hash_it(const char* needs_hashing)
{
unsigned int hash = 0;
for (int i=0, n=strlen(needs_hashing); i<n; i++)
hash = (hash << 2) ^ needs_hashing[i];
return hash % hashtable_size;
}
typedef struct node
{
char* word;
struct node* next;
}node;
node* previous;
node* hashtable[hashtable_size];
/*
*
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
char word[lenght+1];
FILE* dict = fopen(dictionary,"r");
for(int i = 0; i < 26;i++)
{
hashtable[i] = NULL;
for(int a = fgetc(dict); a != EOF; a = fgetc(dict))
{
count++;
int hashvalue = hash_it(word);
node* new = malloc(sizeof(node));
if(hashtable[hashvalue] == NULL)
{
hashtable[hashvalue] = new;
new -> next = NULL;
}
else
{
new -> next = hashtable[hashvalue];
hashtable[hashvalue] = new;
}
}
}
fclose(dict);
return true;
}
/*
*
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
char tmp[lenght + 1];
int lenghtw = strlen(word);
for (int i = 0; i < lenghtw; i++)
{
tmp[i] = tolower(word[i]);
}
int index = hash_it(tmp);
if (hashtable[index] == NULL)
{
return false;
}
node* cursor = hashtable[index];
while(cursor != NULL)
{
if(strcmp(tmp, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
/*
*
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return count;
}
/*
*
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
int index = 0;
while(index < hashtable_size)
{
if(hashtable[index] == NULL)
{
index++;
}
else
{
while(hashtable[index] != NULL)
{
node* cursor = hashtable[index];
hashtable[index] = cursor -> next;
free(cursor);
}
index++;
}
}
return true;
}
int main(int argc, char **argv)
{
if (argc != 2)
return 3;
if (!load("dictionary"))
return 1;
printf("loaded %d words\n", size());
printf("word '%s'%s found\n", argv[1], check(argv[1]) ? "" : " not");
unload();
return 0;
}

There are many problems in your code:
in the load function, you do not load words from the dictionary into the hash table. You read one character at a time with fgetc() and create a node from an uninitialized local buffer word.
the hash_it function only hashes the last 16 characters from the word. Furthermore, hashtable_size is a power of 2, a bad idea. Indeed only the last 8 characters participate in the hash value. This is not a bug, just an inefficient hashing method.
in the check function, you copy the word and convert it to lowercase, but you forget to set the final byte of the tmp array to '\0'.
Here is a corrected version of load that reads one word per dictionary line:
bool load(const char *dictionary) {
char line[256];
FILE *dict = fopen(dictionary, "r");
if (!dict)
return false;
while (fgets(line, sizeof line, dict) != NULL) {
char *p = line + strspn(line, " \t"); // skip blanks
p[strcspn(p, " \t\r\n")] = '\0'; // strip trailing blanks
if (*p == '\0' || *p == '#' || *p == ';')
continue; // ignore blank lines and comments
count++;
int hashvalue = hash_it(p);
node *np = malloc(sizeof(node));
np->word = strdup(p);
np->next = hashtable[hashvalue];
hashtable[hashvalue] = np;
}
fclose(dict);
return true;
}

Related

Printing top 10 recurring words in a file

Edited question:
Hi guys, my goal is to print the top 10 occurring words in a file, I have managed to get everything to work from reading the file to counting word occurrences and printing it, but when I implement my qsort I get a segfault. I looked over my pointers and they look okay to me, I would appreciate any feedback.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX 51
struct words
{
char *ch;
int index;
struct words *pNext;
};
struct words* createWordCounter(char *ch)
{
struct words *pCounter = NULL;
pCounter = (struct words*)malloc(sizeof(char));
pCounter->ch = (char*)malloc(strlen(ch)+1);
strcpy(pCounter->ch, ch);
pCounter->index = 1;
pCounter->pNext = NULL;
return pCounter;
}
struct words *pStart = NULL;
char* removePunc(struct words* ch)
{
char *src = ch, *dst = ch;
while (*src)
{
if (ispunct((unsigned char)*src))
{
src++;
}
else if (isupper((unsigned char)*src))
{
*dst++ = tolower((unsigned char)*src);
src++;
}
else if (src == dst)
{
src++;
dst++;
}
else
{
*dst++ = *src++;
}
}
*dst = 0;
}
void addWord(char *word)
{
struct words *pCounter = NULL;
struct words *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->ch) == 0)
{
++pCounter->index;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
pLast->pNext = createWordCounter(word);
}
void printWord(struct words *pCounter)
{
printf("\n%-30s %5d\n", pCounter->ch, pCounter->index);
}
//sort
int compare (const void * a, const void * b){
struct words *A1 = (struct words *)a;
struct words *B1 = (struct words *)b;
return B1->index - A1->index;
/*
if ((A1->count - B1->count) > 0)
return -1;
else if ((A1->count - B2->count) < 0)
return 1;
else
return 0;
*/
}
int main(int argc, char * argv[])
{
struct words *pCounter = NULL;
char temp[MAX];
FILE *fpt;
if(argc == 2)
{
printf("File name is: %s\n",argv[1]);
fpt = fopen(argv[1], "r");
//fail test
if(fpt == NULL)
{
printf("cannot open file, exiting program...\n");
exit(0);
}
//get the data out of the file and insert in struct
int wordCounter = 0;
int i = 0;
int lines = 0;
while((fscanf(fpt, "%s ", &temp)) == 1)
{
removePunc(temp);
addWord(temp);
if(temp == ' ')
i++;
if(temp == '\n')
lines++;
wordCounter++;
}
/*
pCounter = pStart;
while(pCounter != NULL)
{
printWord(pCounter);
pCounter = pCounter->pNext;
}
*/
//sort
qsort(pCounter, wordCounter, sizeof(struct words), compare);
for(int j = 0; i < 10; i++)
{
printWord(pCounter);
}
}
fclose(fpt);
return 0;
}
First temp is already a pointer, so do not include '&' before it in fscanf. Second, don't skimp on buffer size (e.g. #define MAX 1024). Third, protect your array bounds with the field-width modifier and don't put trailing whitespace in your format-string.
Putting it altogether (presuming you use 1024 as MAX, you can use
fscanf(fpt, "1023%s", temp))
Well done on checking the return of fscanf during your read.
Adding to the things that have already been mentioned.
In createWordCounter(...)
pCounter = (struct words*)malloc(sizeof(char));
you are allocating memory for a char. Even though the pointer to a struct is the pointer to its first member, the first element of words is a pointer to a char. It is better to be careful and write
struct words *pCounter = malloc(sizeof *pCounter);
Also, be mindful of operator precedence.
In addWord(...) you have
++pCounter->index;
What that does is increment the pointer pCounter before accessing index. If you are trying to increment index, it should be
++(pCounter->index);
or
pCounter->index++;
I recommend striping your program down to its bare essentials and test each part one at a time systematically to narrow down the cause of your errors.
I think the main problem is the size of temp array when you try to using fscanf.
while((fscanf(fpt, "%s ", temp)) == 1)
When the length of one line is bigger than MAX, segmentation fault occur.
You can change your code like this
#define SCANF_LEN2(x) #x
#define SCANF_LEN(x) SCANF_LEN2(x)
//...
//your original code
//...
while((fscanf(fpt, "%"SCANF_LEN(MAX)"s ", temp)) == 1)
By the way, you should check
(1) compile warning about type
char* removePunc(struct words* ch)
should be char* removePunc(char *ch)
if(temp == ' ') should be if(temp[0] == ' ')
if(temp == '\n') should be if(temp[0] == '\n')
(2) malloc size
pCounter = (struct words*)malloc(sizeof(char)); should be pCounter = (struct words*)malloc(sizeof(struct words));
(3) remember free after using malloc

Memory Leak using Tries - pset5 cs50 - dictionary.c

I wrote a code for pset5 in CS50 course and was wondering if anyone could help me solve the problem of memory leak.
There is a program called speller.c that gets words from a text and use functions in dictionary.c (below) to look at another file (dictionary) and if the word is mispelled or not in the dictionary, it will printf all those words and calculate the running time of the whole program.
My code is actually working fine and I do get the correct output, but when I use valgrind, it says that memory is leaking. Why is this hapenning?
/**
* CS50 - PSET5 - user: linhobru
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
#define ALPHABETSIZE 27
typedef struct node
{
bool is_word;
struct node *children[ALPHABETSIZE];
}
node;
node *root = NULL;
int num_of_words = 0;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
node* next = root;
int k = ALPHABETSIZE;
for (int i = 0, len = strlen(word); i < len ; i++) // until (char) word[i] == '\0'
{
// verify letter and transform in index from 0 to 26
if (word[i] == '\'')
{
k = ALPHABETSIZE - 1;
}
else if (isalpha(word[i]))
{
k = tolower(word[i]) - 97;
}
if (next->children[k] == NULL)
{
return false;
}
else
{
next = next->children[k];
}
}
return next->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
char word[LENGTH];
root = malloc(sizeof(node));
if (root == NULL)
{
unload();
fclose(dict);
return false;
}
while (fscanf(dict,"%s", word) != EOF)
{
node* now = root;
int k = ALPHABETSIZE;
for (int i = 0; word[i] != '\0'; i++)
{
// verify letter and transform in index from 0 to 26
if (word[i] == 39) // '\''
{
k = ALPHABETSIZE - 1;
}
else if (word[i] >= 97 && word[i] <= 122)
{
k = word[i] - 97;
}
/* else if (word[i] >= 65 && word[i] <= 90)
{
k = word[i] % 65;
}*/
else
{
fclose(dict);
return false;
}
if (now->children[k] == NULL)
{
node* new_node = malloc(sizeof(node));
now->children[k] = new_node;
}
now = now->children[k];
if (word[i + 1] == '\0')
{
now->is_word = true;
}
}
num_of_words++;
}
fclose(dict);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return num_of_words;
}
/**
* Receives root at first and then each children to free.
*/
void unloadNode(node* next)
{
// call unloadNode on every node in this node's children
for(int i = 0; i < ALPHABETSIZE; i++)
{
if(next->children[i] != NULL)
{
unloadNode(next->children[i]);
}
}
// once the children nodes are freed, free this node
free(next);
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
unloadNode(root);
return true;
}
access to files in: https://github.com/linhobru/cs50psets/tree/master/workspace/pset5/speller/

How to compare words if they are identical in C ?linked list [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
The code is given. I need to count number of words that are non identical. In order to do that i need to compare them using stcrmp. By looking the code below, how i need to construct while or if statements to compare words in a file by using double linked list? I suppose this condition should be in the main to print it then. My condition doesn't work. Also, can you give some advice where and how to sort words by their length here?
To understand the code some explanation:
This program holds a doubly linked list that will read a file that is entered as a command line argument, read each line from file, tokenize each word from line and for each word will place it into a Word Length structure depending on its length and then will place it into a word_count structure dependent on the word's string and count each word's occurrence in a file.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " ,.+-=!?:;\t"
#define MAXLINE 25000
typedef struct word_count {
char *word;
int count;
struct word_count *next;
struct word_count *prev;
} WORD;
typedef struct word_length_count {
int length;
int count;
WORD *words;
struct word_length_count *next;
struct word_length_count *prev;
} WLENGTH;
int splitIntoWords(char line[]);
void processLength(char *word);
void processWord(char *word, WORD *wordCount);
void printWordLength();
WLENGTH *createWordLength(char *word);
WORD *createWordCount(char *word);
WLENGTH *wordLength = NULL;
int main(unsigned int argc, unsigned char *argv[]) {
FILE *fpin;
char line[MAXLINE];
int totalWordCount = 0;
if ((fpin = fopen(argv[1], "r")) == NULL) {
printf("Can't open input file.\n");
exit(-1);
}
printf("This is the words all tokenized from the input!\n");
while (fgets(line, MAXLINE, fpin) != NULL) {
line[strcspn(line, "\n")] = '\0';
if (line[0] == '\0')
continue;
totalWordCount += splitIntoWords(line);
}
printf("Total number of words is: %d\n", totalWordCount);
printWordLength();
printf("\nFINISHED!");
}
int splitIntoWords(char line[]) {
char *word;
int count=0;
word = strtok(line, DELIM);
for (;word != NULL;) {
count++;
printf("%s\n", word);
processLength(word);
word = strtok(NULL, DELIM);
}
return count;
}
void processLength(char *word)
{
WLENGTH *wLCounter = NULL;
WLENGTH *wLLast = NULL;
if (wordLength == NULL) {
wordLength = createWordLength(word);
return;
}
wLCounter = wordLength;
while (wLCounter != NULL) {
if (strlen(word) == wLCounter->length) {
++wLCounter->count;
processWord(word, wLCounter->words);
return;
}
wLLast = wLCounter;
wLCounter = wLCounter->next;
}
wLLast->next = createWordLength(word);
}
void processWord(char *word, WORD *wordCount) {
WORD *wCounter = NULL;
WORD *wLast = NULL;
if (wordCount == NULL) {
wordCount = createWordCount(word);
return;
}
wCounter = wordCount;
while (wCounter != NULL) {
if (strcmp(word, wCounter->word) == 0) {
++wCounter->count;
return;
}
wLast = wCounter;
wCounter = wCounter->next;
}
wLast->next = createWordCount(word);
}
WLENGTH *createWordLength(char *word) {
WLENGTH *wLCounter = NULL;
wLCounter = (WLENGTH*)malloc(sizeof(WLENGTH));
wLCounter->words = createWordCount(word);
wLCounter->count = 1;
wLCounter->length = strlen(word);
wLCounter->next = NULL;
return wLCounter;
}
WORD *createWordCount(char *word) {
WORD *wCount = NULL;
wCount = (WORD*)malloc(sizeof(WORD));
wCount->word = (char*)malloc(strlen(word+1));
strcpy(wCount->word, word);
wCount->count = 1;
wCount->next = NULL;
return wCount;
}
void printWordLength() {
WLENGTH *temp = wordLength;
WORD *tempWORD = wordLength->words;
while (temp != NULL) {
WORD *tempWORD = wordLength->words;
tempWORD = temp->words;
printf("\nFor Word Length: %d : There are: %d occurances!\n", temp->length, temp->count);
while (tempWORD != NULL) {
printf("\t%s\toccurs:%d\n", tempWORD->word, tempWORD->count);
tempWORD = tempWORD->next;
}
}
}
You're missing this at the bottom of the outermost while loop of printWordLength():
temp = temp->next;
That's why it goes into an infinite loop (which you didn't tell us).
Now, to count distinct words you just need to count every WORD* in every WORDLENGTH*, which you can do while you're printing them in printWordLength():
void printWordLength()
{
WLENGTH * temp = wordLength;
WORD * tempWORD = wordLength->words;
unsigned int unique_words = 0;
while(temp != NULL)
{
WORD * tempWORD = wordLength->words;
tempWORD = temp->words;
printf("\nFor Word Length: %d : There are: %d occurences!\n",
temp->length, temp->count);
while(tempWORD != NULL)
{
printf("\t%s\toccurs:%d\n", tempWORD->word, tempWORD->count);
unique_words++;
tempWORD = tempWORD->next;
}
temp = temp->next;
}
printf("\nThere are %u unique words\n", unique_words);
}

Load/fill a struct with char** array as a struct member, c

In the last two days i have asked a question to load struct, but i have a problem to access my struct out side my loop(a loop to load my struct). i have edited my question/and code this way:
myfile.txt
Biology,chemistry,maths,music
Mechanics,IT,Geology,music,Astronomy
football,vollyball,baseball
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define path "myfile.txt"
typedef struct student_info
{
char **cources_as_list;
} std_info;
std_info *myinfo; //a global var that will conatain student info
int line_count = 0, cource_count = 0;
char** load_file()
{
char *line = NULL;
size_t len = 0;
FILE *fp;
int indexq=0;
fp = fopen(path, "r");
if (fp == NULL)
{
perror("FILE OPEN ERROR[IN load_file]: ");
exit(1);
}
char **mydata = malloc (sizeof (char *) * 4);//aup to four elements
while (getline(&line, &len, fp) != -1)
{
strtok(line, "\n");
mydata[indexq]= strdup(line);
indexq++;
}
line_count = indexq;
return mydata;
}
char **return_cource_list(char *cources_string) {
char *token;
char **cource_list = malloc(sizeof(char *) * 10);
int index = 0;
//course_string is delimited by ",": (eg. Biology,chemistry,maths,music). parse this and add to my char ** variable.
token = strtok(cources_string, ",");
while (token != NULL)
{
cource_list[index] = strdup(token);
token = strtok(NULL, ",");
index++;
}
cource_count = index;
return cource_list;
}
int main()
{
int i, j;
char** mydata = load_file(); //returns lines as a list/char ** array from file
for (i = 0; i < line_count; i++) //line_count is the number of elements/lines in "mydata"
{
printf("line_data: %s\n",mydata[i]);//i can see all my lines!
char **std_cource_list = return_cource_list(mydata[i]);
for (j = 0; j < cource_count; j++)
{
printf("\tcourse[%d]: %s\n",j,std_cource_list[j]);//i have all my courses as a list from each line
}
//can i load my struct like this? or any option to load my struct?
myinfo[i].cources_as_list = std_cource_list;
}
// i want to see my structure elements here, (nested for loop required).
}
Am getting seg_fault error while loading my char array to my struct.
(i.e: this line: myinfo[i].cources_as_list = std_cource_list;)
You need to allocate the memory for your struct.
std_info *myinfo = malloc(sizeof(std_info));
Also don't make it global, since there is really no need for global variables in this task.
Try
std_info * myinfo = malloc(line_count * sizeof *myinfo);
This allocates memory to hold line_count objects of std_info, with myinfo pointing to the 1st.
You never allocate space for myinfo and I would suggest making it a local variable. There is almost no need for global variables except in very specific cases.
Also, you are using malloc() almost only for fixed size allocations which would be easier to manage and more efficient if you do statically in the sense that you can use arrays for that.
This might be what you're interested in
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
struct student_info
{
char **courses;
size_t size;
};
char **
load_file(const char *const path)
{
char *line;
FILE *file;
char **data;
size_t row;
size_t length;
size_t count;
file = fopen(path, "r");
if (file == NULL)
{
perror("FILE OPEN ERROR[IN load_file]: ");
return NULL; // Notify the caller that there was a problem
// but do not necessarily quit as you might
// retry with another path.
}
count = 0;
for (int chr = fgetc(file) ; chr != EOF ; chr = fgetc(file))
count += (chr == '\n') ? 1 : 0;
rewind(file);
data = malloc((count + 1) * sizeof(*data));
if (data == NULL)
{
// Perhaps notify the error
fclose(file);
return NULL;
}
data[count] = NULL; // Use as end of array delimiter
row = 0;
line = NULL;
length = 0;
while ((length = getline(&line, &length, file)) != -1)
{
// The last character is always `\n' so remove it
data[row] = malloc(length);
if (data == NULL)
{
fclose(file);
for (size_t i = row ; i >= 0 ; --i)
{
free(data[i]);
free(data);
return NULL;
}
}
data[row][length - 1] = '\0';
memcpy(data[row], line, length - 1);
++row;
}
fclose(file);
// You need to `free' this, read the documentation
free(line);
return data;
}
char **
extract_courses_as_list(const char *const input)
{
char **courses;
size_t index;
const char *tail;
const char *head;
size_t count;
head = input;
count = 0;
/* Count the number of fields to allocate memory */
while (head != NULL)
{
tail = strchr(head, ',');
if (tail != NULL)
head = tail + 1;
else
head = NULL;
count += 1;
}
index = 0;
/* Allocate memory for the list, and the sentinel */
courses = malloc((count + 1) * sizeof(*courses));
head = input;
while (head != NULL)
{
ptrdiff_t length;
/* find the next `,' in the input string */
tail = strchr(head, ',');
if (tail == NULL) /* if it's not there, it's the last one */
tail = strchr(head, '\0');
/* compute the number of characters of the field */
length = (ptrdiff_t) (tail - head);
/* allocate space to copy the string */
courses[index] = malloc(length + 1);
if (courses == NULL) /* always be safe and check */
{
for (size_t i = index ; i >= 0 ; --i)
free(courses[index]);
free(courses);
return NULL;
}
/* always remember to `null' terminate */
courses[index][length] = '\0';
/* finally, copy the string */
memcpy(courses[index], head, length);
/* check whehter it was the last field and
* update the pointer to the next one accordingly
*/
if ((tail != NULL) && (*tail != '\0'))
head = tail + 1;
else
head = NULL;
/* Don't forget the fields counter */
index++;
}
courses[count] = NULL;
return courses;
}
void
concatenate_lists(struct student_info *info, char **source)
{
char **temporary;
size_t length;
length = info->size;
for (size_t i = 0 ; source[i] != NULL ; ++i)
length++;
temporary = realloc(info->courses, length * sizeof(*temporary));
if (temporary == NULL)
return;
for (size_t i = 0 ; source[i] != NULL ; ++i)
temporary[i + info->size] = strdup(source[i]);
info->courses = temporary;
info->size = length;
}
void
free_list(char **lines)
{
if (lines == NULL)
return;
for (size_t i = 0 ; lines[i] != '\0' ; ++i)
free(lines[i]);
free(lines);
}
int
main()
{
struct student_info info;
char **lines;
lines = load_file("data.tx");
if (lines == NULL)
return -1;
info.courses = NULL;
info.size = 0;
for (size_t i = 0 ; lines[i] != NULL ; ++i)
{
char **courses;
courses = extract_courses_as_list(lines[i]);
if (courses == NULL)
continue;
concatenate_lists(&info, courses);
free_list(courses);
}
for (size_t i = 0 ; i < info.size ; ++i)
{
fprintf(stderr, "%s\n", info.courses[i]);
free(info.courses[i]);
}
free(info.courses);
free_list(lines);
return 0;
}
You will notice that I never used strdup(), the reason being that the length of the string that we want to copy is always known.

Outputting a string from a structure

I am trying to create a program that takes input from a file, puts each word into a "words" structure, and then outputs the results with the frequency of each word, but whenever I try to output the string it just prints something like ?k#?? where I would expect the string to be.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct s_words {
char *str; //stores the word; no pre-determined size
int count;
struct s_words* next;
} words;
words* create_words(char* word) {
//allocate space for the structure
words* newWord = malloc(strlen(word));
if (NULL != newWord){
//allocate space for storing the new word in "str"
//if str was array of fixed size, storage wud be wasted
newWord->str = (char *)malloc(strlen(word));
strcpy(newWord->str,word); //copy “word” into newWord->str
newWord->str[strlen(word)]='\0';
newWord->count = 1; //initialize count to 1;
newWord->next = NULL; //initialize next;
}
return newWord;
}
//If the word is in the list, add 1 to count.
words* add_word(words* wordList, char* word) {
int found=0;
words *temp=wordList;
// search if word exists in the list; if so, make found=1
while (temp!=NULL) {
// printf("looptest\n");
if (strcmp(word,temp->str) == 0) { //use strcmp command
//printf("looptest0\n");
found=1;
temp->count = temp->count + 1; //increment count;
return wordList;
//printf("looptest1\n");
}
else {
temp = temp -> next; //update temp
// printf("looptest2\n");
}
}
// printf("looptest3\n");
//new word
words* newWord = create_words(word);
// printf("looptest4\n");
if (NULL != newWord) {
// printf("looptest5\n");
newWord->next = wordList;
wordList = newWord;
//Insert new word at the head of the list
}
else{
// printf("looptest6\n");
temp = wordList;
while(temp->next != NULL){
// printf("looptest7\n");
temp = temp->next;
}
temp->next = newWord;
}
return newWord;
}
int main(int argc, char* argv[]) {
words *mywords; //head of linked list containing words
mywords=NULL;
FILE *myFile;
myFile = fopen(argv[1],"r"); //first parameter is input file
if (myFile==0) {
printf("file not opened\n");
return 1;
}
else {
printf("file opened\n");
}
//start reading file character by character;
//when word has been detected; call the add_word function
int ch, word = 0, k=0;
char thisword[100];
while ( (ch = fgetc(myFile)) != EOF )
{
// printf("%c",ch);
if (ch==' ' || ch==',' || ch==';' || ch==':' || ch == '.') //detect new word? Check if ch is a delimiter
{
// printf("\ncheck2\n");
if ( word ) //make sure previous character was not delimiter
{
// printf("check\n");
word = 0;
thisword[k] = '\0'; //make the kth character of thisword as \0
// printf("test2\n");
//now call add_word to add thisword into the list
mywords = add_word(mywords,thisword);
// printf("check3\n");
k=0;
}
// printf("test\n");
}
else
{
word = 1;
thisword[k] = ch; //make the kth character of thisword equal to ch
k++;
}
if(ch == EOF){
thisword[k] = '\0';
mywords = add_word(mywords,thisword);
}
}
printf("%s\n",mywords->str);
printf("printing list\n");
//Traverse list and print each word and its count to outputfile
//output file is second parameter being passed
//haven't started to deal with the output file
words* temp = mywords;
while(temp != NULL){
printf("%s\tcount: %i\n",temp->str,temp->count);
temp = temp->next;
}
printf("list complete\n");
return 0;
}
This is all my code, I can't figure out how to error test what the problem is since I can't figure out how to output the strings. I've only started programming in C this year so I assume there's something basic I'm missing.
newWord->str = (char *)malloc(strlen(word));
strcpy(newWord->str,word); //copy “word” into newWord->str
newWord->str[strlen(word)]='\0';
.. writes the null out-of-bounds.
Assuming that strlen() returns the desired value, you should malloc an extra char:
newWord->str = (char *)malloc(1+strlen(word));
Note Olaf comment re. casting in C. Also note that it's unlikely that this is your ONLY bug.

Resources