Equal strings produces different hash index - c

I have a program here that does replicate a memory filesystem (not finished yet), it has to read from a file its commands and they are pretty self explanatory here:
create /foo
create /foo/bar
create /foo/baz
create /foo/baz/qux
write /foo/bar "test"
read /foo/bar
read /foo/baz/qux
read /foo/baz/quux
create /foo/bar
create /dir
create /bar
create /dir/bar
find bar
delete /foo/bar
find wat
find foo
read /foo/bar
create /foo/bar
read /foo/bar
delete_r /foo
exit
I then have a function that given the string it manipulates it to insert folder names in an array strings, a command is a command string and the fullPath string is given by another function that does use the previously created array of strings to compose a new one. Here is the struct and the manipulation structure:
typedef struct _command {
unsigned char command[10];
unsigned char path[255][255];
unsigned char* fullPath;
int pathLevels;
} command;
This is the node structure that does implement the tree-like structure:
typedef struct _node {
int isRoot;
int isDir;
char* message;
int childNumber;
struct _node* childNodes[1024];
unsigned char fullPath[MAX_LEN_PATH];
unsigned char resName[255];
} node;
And the function that does manipulate the string:
command* createCommandMul(unsigned char* str) {
unsigned char* c = str;
command* commandPointer = (command*) malloc(sizeof(command));
//commandPointer->path[0][0] = '/';
//commandPointer->path[0][1] = '\0';
int commandIndex = 0;
int pathLevel = 0;
int pathIndex = 0;
/* Parte Comando */
while(*c != ' ' && commandIndex < 10) {
commandPointer->command[commandIndex] = *c++;
commandIndex++;
}
while(commandIndex<10) {
commandPointer->command[commandIndex] = '\0';
commandIndex++;
}
while(*c == ' ' || *c == '/') c++;
/* Parte Path*/
while(*c != '\0') {
if (*c == '/') {
commandPointer->path[pathLevel][pathIndex] = '\0';
pathLevel++;
pathIndex = 0;
c++;
} else {
commandPointer->path[pathLevel][pathIndex] = *c++;
pathIndex++;
}
}
commandPointer->path[pathLevel][pathIndex] = '\0';
commandPointer->pathLevels = pathLevel;
return commandPointer;
}
I have a createDir function that does check if the node* passed to the function is either a dir or the root (imagine this has a tree);
if it is it creates the node.
int createDir(node* fatherOfChildToCreate, unsigned char* fullPath, command* currentCommand) {
if ((fatherOfChildToCreate->isRoot == 1 || fatherOfChildToCreate->isDir == 1) && fatherOfChildToCreate->childNumber < 1024) {
node* dirToCreate = (node*) malloc(sizeof(node));
command* comando = (command*) currentCommand;
dirToCreate->isDir = 1;
dirToCreate->isRoot = 0;
dirToCreate->message = NULL;
dirToCreate->childNumber = 0;
strcmp(dirToCreate->fullPath, fullPath);
for (int i = 0; i < 1024; i++) dirToCreate->childNodes[i] = NULL;
int index = (int) hashCalc(comando->path[comando->pathLevels]);
printf("Hash di %s = %d", comando->path[comando->pathLevels], index);
fatherOfChildToCreate->childNodes[index] = dirToCreate;
fatherOfChildToCreate->childNumber += 1;
return 1;
} else return 0;
}
Note that this createDirfunctions is created with the purpose of creating a direct subDir of the node* fatherOfChildToCreate so basically the first command of the text file does create /foo using this function because its only parentDir is the root one, which is created in the main().
The second command will search for the /foo directory using this function down below, and since it is the parent directory of /foo/bar that pointer will be passed to the createDir function that will create a childNode in the /foo dir.
node* linearSearchUpper(node* rootNode, unsigned char* upperPath, command* currentCommand) {
command* comandoSearch = (command*) currentCommand;
node* curr = (node*) rootNode;
int counter = comandoSearch->pathLevels;
int index;
unsigned char* upperName = comandoSearch->path[comandoSearch->pathLevels - 1];
for (int i = 0; i < counter; i++) {
index = (int) hashCalc(comandoSearch->path[i]);
printf("Hash di %s = %d", comandoSearch->path[i], index);
if (curr->childNodes[index] == NULL) return NULL;
else curr = curr->childNodes[index];
}
if (strcmp(upperPath, curr->fullPath) == 1) return curr;
}
In all this I've used this hash function to search for the parentDir and inserting a new element in the node->childNodes[] array
unsigned long hashCalc(unsigned char* str) {
unsigned long hash = 5381;
int c;
while (c = *str++)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash % 1024;
}
Now, I'll paste the main() which is the last function to review.
int main() {
node* rootNode = (node*) createRoot();
command* comando = (command*) malloc(sizeof(command));
unsigned char* upPath = NULL;
unsigned char* allPath = NULL;
unsigned char* line = NULL;
FILE* fp;
size_t len = 0;
ssize_t read;
fp = fopen("/Users/mattiarighetti/Downloads/semplice.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1) {
if (*line == 'f') {
//comandoFind = createCommandFind(line);
} if (*line == 'w') {
//comandoWrite = createCommandWrite(line);
} if (*line == 'c') {
comando = createCommandMul(line);
upPath = upperPath(comando);
allPath = fullPath(comando);
if (comando->pathLevels == 0) {
if (createDir(rootNode, allPath, comando) == 1) printf("ok\n\n");
else printf("no\n\n");
} else {
node* upperNode = (node*) linearSearchUpper(rootNode, upPath, comando);
if (upperNode == NULL) {
printf("no\n\n");
}
else {
if (createDir(upperNode, allPath, comando) == 1) printf("ok\n\n");
else printf("no\n\n");
}
}
}
}
fclose(fp);
if (line)
free(line);
return 0;
}
So, what this does is reading line to line from the file, creating and filling the command struct, it then creates an upPath which is the parent (to be found) and the fullPath. The problem I am getting is that the program uses createDir for the first line of this text file, and this is ok, but reading foo in the comando->path[I] for some strange reason, the hash function gives me 179 which is not correct. The in goes on, the second line it uses linearSearchUpper() to search for the parent folder /foo, so it gives comando->path[I] which is again foo but this time the hashCalc gives me 905 which should be the correct answer so in the end the linearSearchUpper can't find the /foo folder since it doesn't exist in the index 905. This thing happens every time I use a create command or create_dir with folders that are childs of the rootOne, so dirs like /foo, /dir, /bar will give me a strange hash index.
Do you have any idea on why this could happen?

I haven't tried to understand your whole program, but the strings for wich you get the different hashes really are different: One of them retains the new-line character at the end, probably from fgets.
The numerc value of the new-line character in ASCII is 10, so:
hash("foo") == 905;
hash("foo\n") == (33 * hash("foo") + '\n') % 1024
== (33 * 905 + 10) % 1024
== 179
The solution is to either remove trailing spaces from the string you receive from fgets or to use better tokenising, that will guarantee that your tokens don't have leading or trailing spaces.

Related

Why is this giving me a segmentation fault on one system but not another?

I'm running a program that creates a dictionary tree by reading in words from 'words.txt', and then can search to see if certain words are in the tree. Running this program on https://www.onlinegdb.com/online_c_compiler works perfectly, but I get a segmentation fault when I try to run it on my own Linux system. Any ideas as to why? Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/* Node structure of trie */
struct node
{
struct node *next[27]; // 26 for a-z and last one(26th index) is for apostrophe
int end; // value as 1 denotes the end of the word.
};
/* This method is for creating a new node */
struct node *createNode()
{
struct node *newNode = (struct node *)malloc(sizeof(struct node));
newNode->end = 0; // set end as false i.e. 0
for (int i = 0; i < 27; i++) // set all children as NULL
newNode->next[i] = NULL;
return newNode;
}
/* This method is for inserting a word in the trie */
void insert(struct node *root, char *word)
{
struct node *curr = root;
for (int i = 0; i < strlen(word); i++) // iterating character by character
{
int index;
if (word[i] == '\'') // if character is apostrophe index is 26
index = 26;
else
index = tolower(word[i]) - 'a'; // else the index as the alphabet sequence number starting from 0.
// for a - 0, b - 1, ..... z - 25
if (!curr->next[index])
curr->next[index] = createNode(); // create node of that character if not created yet
curr = curr->next[index]; // then go for next character
}
curr->end = 1; // mark end as 1 to denote the ending of the word
}
/* This method is for searching a word in the trie */
int search(struct node *root, char *word)
{
struct node *curr = root;
for (int i = 0; i < strlen(word); i++) // iterating character by character
{
/* Getting index same as insert function */
int index;
if (word[i] == '\'')
index = 26;
else
index = tolower(word[i]) - 'a';
if (!curr->next[index]) // if node of current character not found means the word doesn't exist in trie.
return 0;
curr = curr->next[index];
}
if (curr != NULL && curr->end) // if iterated all the characters and end is 1 then the word exists.
return 1;
else
return 0; // otherwise doesn't exist.
}
int main()
{
/* Reading the file line by line */
FILE *file;
size_t len = 1000;
char *word = (char *)malloc(len);
file = fopen("word.txt", "r");
struct node *root = createNode();
while (fgets(word, len, file) != NULL) // iterating line by line
{
int len = strlen(word);
if (word[len - 1] == '\n') // removing the newline which is at the end of the every line
word[len - 1] = '\0';
insert(root, word); // inserting every word
}
int ans;
word = (char *)("error's"); // checking the existence of the word "error's"
ans = search(root, word);
if (ans == 1)
printf("\"%s\" found!\n", word);
else
printf("\"%s\" not found!\n", word);
word = (char *)("hilli");// checking the existence of the word "hilli"
ans = search(root, word);
if (ans == 1)
printf("\"%s\" found!\n", word);
else
printf("\"%s\" not found!\n", word);
return 0;
}
Here's code that should work. It does work on macOS 10.15.2 Catalina using GCC 9.2.0 and XCode 11.3.1 with the compiler set fussy and a number of memory debugging options enabled. It does not attempt to free the trie that it builds; it should (it is a good exercise to be able to free the structures you build).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/* Node structure of trie */
struct node
{
struct node *next[27]; // 26 for a-z and last one(26th index) is for apostrophe
int end; // value as 1 denotes the end of the word.
};
/* This method is for creating a new node */
static struct node *createNode(void)
{
struct node *newNode = (struct node *)malloc(sizeof(struct node));
newNode->end = 0; // set end as false i.e. 0
for (int i = 0; i < 27; i++) // set all children as NULL
newNode->next[i] = NULL;
return newNode;
}
/* This method is for inserting a word in the trie */
static void insert(struct node *root, char *word)
{
struct node *curr = root;
int length = strlen(word);
for (int i = 0; i < length; i++) // iterating character by character
{
int index;
if (word[i] == '\'') // if character is apostrophe index is 26
index = 26;
else
index = tolower(word[i]) - 'a'; // else the index as the alphabet sequence number starting from 0.
// for a - 0, b - 1, ..... z - 25
if (!curr->next[index])
curr->next[index] = createNode(); // create node of that character if not created yet
curr = curr->next[index]; // then go for next character
}
curr->end = 1; // mark end as 1 to denote the ending of the word
}
/* This method is for searching a word in the trie */
static int search(struct node *root, char *word)
{
struct node *curr = root;
int length = strlen(word);
for (int i = 0; i < length; i++) // iterating character by character
{
/* Getting index same as insert function */
int index;
if (word[i] == '\'')
index = 26;
else
index = tolower(word[i]) - 'a';
if (!curr->next[index]) // if node of current character not found means the word doesn't exist in trie.
return 0;
curr = curr->next[index];
}
if (curr != NULL && curr->end) // if iterated all the characters and end is 1 then the word exists.
return 1;
else
return 0; // otherwise doesn't exist.
}
int main(void)
{
/* Reading the file line by line */
FILE *file;
size_t len = 1000;
char *word = (char *)malloc(len);
const char filename[] = "word.txt";
file = fopen(filename, "r");
if (file == 0)
{
fprintf(stderr, "Failed to open file '%s' for reading\n", filename);
exit(EXIT_FAILURE);
}
struct node *root = createNode();
while (fgets(word, len, file) != NULL) // iterating line by line
{
//int len = strlen(word);
//if (word[len - 1] == '\n') // removing the newline which is at the end of the every line
// word[len - 1] = '\0';
word[strcspn(word, "\r\n")] = '\0';
printf("Word: [%s]\n", word);
insert(root, word); // inserting every word
}
int ans;
word = (char *)("error's"); // checking the existence of the word "error's"
ans = search(root, word);
if (ans == 1)
printf("\"%s\" found!\n", word);
else
printf("\"%s\" not found!\n", word);
word = (char *)("hilli");// checking the existence of the word "hilli"
ans = search(root, word);
if (ans == 1)
printf("\"%s\" found!\n", word);
else
printf("\"%s\" not found!\n", word);
return 0;
}
The code runs correctly given a data file containing a suitable subset of these lines:
enough
abracadabra
acid
test
hilli
error's
tests
testing
tested
tester
testosterone
acidly
acidic
It was tested with both DOS (CRLF) and Unix (NL or LF) line endings and was safe with both because it uses strcspn() to zap either sort of line ending:
word[strcspn(word, "\r\n")] = '\0';
If you had old Mac-style line endings (CR only), then you'd have problems with fgets() not recognizing the ends of lines — but if you fixed that (using POSIX getdelim() for example), it would work correctly with such lines too.
The changes made to your code are basically cosmetic, but allow the code to compile cleanly (source trie79.c; program trie79) using fairly stringent options:
$ gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes \
> trie79.c -o trie79
$

Printing top 10 recurring words in a file

Edited question:
Hi guys, my goal is to print the top 10 occurring words in a file, I have managed to get everything to work from reading the file to counting word occurrences and printing it, but when I implement my qsort I get a segfault. I looked over my pointers and they look okay to me, I would appreciate any feedback.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX 51
struct words
{
char *ch;
int index;
struct words *pNext;
};
struct words* createWordCounter(char *ch)
{
struct words *pCounter = NULL;
pCounter = (struct words*)malloc(sizeof(char));
pCounter->ch = (char*)malloc(strlen(ch)+1);
strcpy(pCounter->ch, ch);
pCounter->index = 1;
pCounter->pNext = NULL;
return pCounter;
}
struct words *pStart = NULL;
char* removePunc(struct words* ch)
{
char *src = ch, *dst = ch;
while (*src)
{
if (ispunct((unsigned char)*src))
{
src++;
}
else if (isupper((unsigned char)*src))
{
*dst++ = tolower((unsigned char)*src);
src++;
}
else if (src == dst)
{
src++;
dst++;
}
else
{
*dst++ = *src++;
}
}
*dst = 0;
}
void addWord(char *word)
{
struct words *pCounter = NULL;
struct words *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->ch) == 0)
{
++pCounter->index;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
pLast->pNext = createWordCounter(word);
}
void printWord(struct words *pCounter)
{
printf("\n%-30s %5d\n", pCounter->ch, pCounter->index);
}
//sort
int compare (const void * a, const void * b){
struct words *A1 = (struct words *)a;
struct words *B1 = (struct words *)b;
return B1->index - A1->index;
/*
if ((A1->count - B1->count) > 0)
return -1;
else if ((A1->count - B2->count) < 0)
return 1;
else
return 0;
*/
}
int main(int argc, char * argv[])
{
struct words *pCounter = NULL;
char temp[MAX];
FILE *fpt;
if(argc == 2)
{
printf("File name is: %s\n",argv[1]);
fpt = fopen(argv[1], "r");
//fail test
if(fpt == NULL)
{
printf("cannot open file, exiting program...\n");
exit(0);
}
//get the data out of the file and insert in struct
int wordCounter = 0;
int i = 0;
int lines = 0;
while((fscanf(fpt, "%s ", &temp)) == 1)
{
removePunc(temp);
addWord(temp);
if(temp == ' ')
i++;
if(temp == '\n')
lines++;
wordCounter++;
}
/*
pCounter = pStart;
while(pCounter != NULL)
{
printWord(pCounter);
pCounter = pCounter->pNext;
}
*/
//sort
qsort(pCounter, wordCounter, sizeof(struct words), compare);
for(int j = 0; i < 10; i++)
{
printWord(pCounter);
}
}
fclose(fpt);
return 0;
}
First temp is already a pointer, so do not include '&' before it in fscanf. Second, don't skimp on buffer size (e.g. #define MAX 1024). Third, protect your array bounds with the field-width modifier and don't put trailing whitespace in your format-string.
Putting it altogether (presuming you use 1024 as MAX, you can use
fscanf(fpt, "1023%s", temp))
Well done on checking the return of fscanf during your read.
Adding to the things that have already been mentioned.
In createWordCounter(...)
pCounter = (struct words*)malloc(sizeof(char));
you are allocating memory for a char. Even though the pointer to a struct is the pointer to its first member, the first element of words is a pointer to a char. It is better to be careful and write
struct words *pCounter = malloc(sizeof *pCounter);
Also, be mindful of operator precedence.
In addWord(...) you have
++pCounter->index;
What that does is increment the pointer pCounter before accessing index. If you are trying to increment index, it should be
++(pCounter->index);
or
pCounter->index++;
I recommend striping your program down to its bare essentials and test each part one at a time systematically to narrow down the cause of your errors.
I think the main problem is the size of temp array when you try to using fscanf.
while((fscanf(fpt, "%s ", temp)) == 1)
When the length of one line is bigger than MAX, segmentation fault occur.
You can change your code like this
#define SCANF_LEN2(x) #x
#define SCANF_LEN(x) SCANF_LEN2(x)
//...
//your original code
//...
while((fscanf(fpt, "%"SCANF_LEN(MAX)"s ", temp)) == 1)
By the way, you should check
(1) compile warning about type
char* removePunc(struct words* ch)
should be char* removePunc(char *ch)
if(temp == ' ') should be if(temp[0] == ' ')
if(temp == '\n') should be if(temp[0] == '\n')
(2) malloc size
pCounter = (struct words*)malloc(sizeof(char)); should be pCounter = (struct words*)malloc(sizeof(struct words));
(3) remember free after using malloc

Load/fill a struct with char** array as a struct member, c

In the last two days i have asked a question to load struct, but i have a problem to access my struct out side my loop(a loop to load my struct). i have edited my question/and code this way:
myfile.txt
Biology,chemistry,maths,music
Mechanics,IT,Geology,music,Astronomy
football,vollyball,baseball
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define path "myfile.txt"
typedef struct student_info
{
char **cources_as_list;
} std_info;
std_info *myinfo; //a global var that will conatain student info
int line_count = 0, cource_count = 0;
char** load_file()
{
char *line = NULL;
size_t len = 0;
FILE *fp;
int indexq=0;
fp = fopen(path, "r");
if (fp == NULL)
{
perror("FILE OPEN ERROR[IN load_file]: ");
exit(1);
}
char **mydata = malloc (sizeof (char *) * 4);//aup to four elements
while (getline(&line, &len, fp) != -1)
{
strtok(line, "\n");
mydata[indexq]= strdup(line);
indexq++;
}
line_count = indexq;
return mydata;
}
char **return_cource_list(char *cources_string) {
char *token;
char **cource_list = malloc(sizeof(char *) * 10);
int index = 0;
//course_string is delimited by ",": (eg. Biology,chemistry,maths,music). parse this and add to my char ** variable.
token = strtok(cources_string, ",");
while (token != NULL)
{
cource_list[index] = strdup(token);
token = strtok(NULL, ",");
index++;
}
cource_count = index;
return cource_list;
}
int main()
{
int i, j;
char** mydata = load_file(); //returns lines as a list/char ** array from file
for (i = 0; i < line_count; i++) //line_count is the number of elements/lines in "mydata"
{
printf("line_data: %s\n",mydata[i]);//i can see all my lines!
char **std_cource_list = return_cource_list(mydata[i]);
for (j = 0; j < cource_count; j++)
{
printf("\tcourse[%d]: %s\n",j,std_cource_list[j]);//i have all my courses as a list from each line
}
//can i load my struct like this? or any option to load my struct?
myinfo[i].cources_as_list = std_cource_list;
}
// i want to see my structure elements here, (nested for loop required).
}
Am getting seg_fault error while loading my char array to my struct.
(i.e: this line: myinfo[i].cources_as_list = std_cource_list;)
You need to allocate the memory for your struct.
std_info *myinfo = malloc(sizeof(std_info));
Also don't make it global, since there is really no need for global variables in this task.
Try
std_info * myinfo = malloc(line_count * sizeof *myinfo);
This allocates memory to hold line_count objects of std_info, with myinfo pointing to the 1st.
You never allocate space for myinfo and I would suggest making it a local variable. There is almost no need for global variables except in very specific cases.
Also, you are using malloc() almost only for fixed size allocations which would be easier to manage and more efficient if you do statically in the sense that you can use arrays for that.
This might be what you're interested in
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
struct student_info
{
char **courses;
size_t size;
};
char **
load_file(const char *const path)
{
char *line;
FILE *file;
char **data;
size_t row;
size_t length;
size_t count;
file = fopen(path, "r");
if (file == NULL)
{
perror("FILE OPEN ERROR[IN load_file]: ");
return NULL; // Notify the caller that there was a problem
// but do not necessarily quit as you might
// retry with another path.
}
count = 0;
for (int chr = fgetc(file) ; chr != EOF ; chr = fgetc(file))
count += (chr == '\n') ? 1 : 0;
rewind(file);
data = malloc((count + 1) * sizeof(*data));
if (data == NULL)
{
// Perhaps notify the error
fclose(file);
return NULL;
}
data[count] = NULL; // Use as end of array delimiter
row = 0;
line = NULL;
length = 0;
while ((length = getline(&line, &length, file)) != -1)
{
// The last character is always `\n' so remove it
data[row] = malloc(length);
if (data == NULL)
{
fclose(file);
for (size_t i = row ; i >= 0 ; --i)
{
free(data[i]);
free(data);
return NULL;
}
}
data[row][length - 1] = '\0';
memcpy(data[row], line, length - 1);
++row;
}
fclose(file);
// You need to `free' this, read the documentation
free(line);
return data;
}
char **
extract_courses_as_list(const char *const input)
{
char **courses;
size_t index;
const char *tail;
const char *head;
size_t count;
head = input;
count = 0;
/* Count the number of fields to allocate memory */
while (head != NULL)
{
tail = strchr(head, ',');
if (tail != NULL)
head = tail + 1;
else
head = NULL;
count += 1;
}
index = 0;
/* Allocate memory for the list, and the sentinel */
courses = malloc((count + 1) * sizeof(*courses));
head = input;
while (head != NULL)
{
ptrdiff_t length;
/* find the next `,' in the input string */
tail = strchr(head, ',');
if (tail == NULL) /* if it's not there, it's the last one */
tail = strchr(head, '\0');
/* compute the number of characters of the field */
length = (ptrdiff_t) (tail - head);
/* allocate space to copy the string */
courses[index] = malloc(length + 1);
if (courses == NULL) /* always be safe and check */
{
for (size_t i = index ; i >= 0 ; --i)
free(courses[index]);
free(courses);
return NULL;
}
/* always remember to `null' terminate */
courses[index][length] = '\0';
/* finally, copy the string */
memcpy(courses[index], head, length);
/* check whehter it was the last field and
* update the pointer to the next one accordingly
*/
if ((tail != NULL) && (*tail != '\0'))
head = tail + 1;
else
head = NULL;
/* Don't forget the fields counter */
index++;
}
courses[count] = NULL;
return courses;
}
void
concatenate_lists(struct student_info *info, char **source)
{
char **temporary;
size_t length;
length = info->size;
for (size_t i = 0 ; source[i] != NULL ; ++i)
length++;
temporary = realloc(info->courses, length * sizeof(*temporary));
if (temporary == NULL)
return;
for (size_t i = 0 ; source[i] != NULL ; ++i)
temporary[i + info->size] = strdup(source[i]);
info->courses = temporary;
info->size = length;
}
void
free_list(char **lines)
{
if (lines == NULL)
return;
for (size_t i = 0 ; lines[i] != '\0' ; ++i)
free(lines[i]);
free(lines);
}
int
main()
{
struct student_info info;
char **lines;
lines = load_file("data.tx");
if (lines == NULL)
return -1;
info.courses = NULL;
info.size = 0;
for (size_t i = 0 ; lines[i] != NULL ; ++i)
{
char **courses;
courses = extract_courses_as_list(lines[i]);
if (courses == NULL)
continue;
concatenate_lists(&info, courses);
free_list(courses);
}
for (size_t i = 0 ; i < info.size ; ++i)
{
fprintf(stderr, "%s\n", info.courses[i]);
free(info.courses[i]);
}
free(info.courses);
free_list(lines);
return 0;
}
You will notice that I never used strdup(), the reason being that the length of the string that we want to copy is always known.

Spell-checker in C

I've been trying to implement a spell-checker using a large dictionary against some text file which contains around 2000 words. However, my spell-checker returns all words as being misspelled. I honestly have no idea why — could someone help me?
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
#define lenght 45
#define hashtable_size 65536
char word[lenght+1];
int count = 0;
/*
*
* Hash function. Thanks to Brenda from cs50 reddit.
*/
int hash_it(const char* needs_hashing)
{
unsigned int hash = 0;
for (int i=0, n=strlen(needs_hashing); i<n; i++)
hash = (hash << 2) ^ needs_hashing[i];
return hash % hashtable_size;
}
typedef struct node
{
char* word;
struct node* next;
}node;
node* previous;
node* hashtable[hashtable_size];
/*
*
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
char word[lenght+1];
FILE* dict = fopen(dictionary,"r");
for(int i = 0; i < 26;i++)
{
hashtable[i] = NULL;
for(int a = fgetc(dict); a != EOF; a = fgetc(dict))
{
count++;
int hashvalue = hash_it(word);
node* new = malloc(sizeof(node));
if(hashtable[hashvalue] == NULL)
{
hashtable[hashvalue] = new;
new -> next = NULL;
}
else
{
new -> next = hashtable[hashvalue];
hashtable[hashvalue] = new;
}
}
}
fclose(dict);
return true;
}
/*
*
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
char tmp[lenght + 1];
int lenghtw = strlen(word);
for (int i = 0; i < lenghtw; i++)
{
tmp[i] = tolower(word[i]);
}
int index = hash_it(tmp);
if (hashtable[index] == NULL)
{
return false;
}
node* cursor = hashtable[index];
while(cursor != NULL)
{
if(strcmp(tmp, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
/*
*
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return count;
}
/*
*
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
int index = 0;
while(index < hashtable_size)
{
if(hashtable[index] == NULL)
{
index++;
}
else
{
while(hashtable[index] != NULL)
{
node* cursor = hashtable[index];
hashtable[index] = cursor -> next;
free(cursor);
}
index++;
}
}
return true;
}
int main(int argc, char **argv)
{
if (argc != 2)
return 3;
if (!load("dictionary"))
return 1;
printf("loaded %d words\n", size());
printf("word '%s'%s found\n", argv[1], check(argv[1]) ? "" : " not");
unload();
return 0;
}
There are many problems in your code:
in the load function, you do not load words from the dictionary into the hash table. You read one character at a time with fgetc() and create a node from an uninitialized local buffer word.
the hash_it function only hashes the last 16 characters from the word. Furthermore, hashtable_size is a power of 2, a bad idea. Indeed only the last 8 characters participate in the hash value. This is not a bug, just an inefficient hashing method.
in the check function, you copy the word and convert it to lowercase, but you forget to set the final byte of the tmp array to '\0'.
Here is a corrected version of load that reads one word per dictionary line:
bool load(const char *dictionary) {
char line[256];
FILE *dict = fopen(dictionary, "r");
if (!dict)
return false;
while (fgets(line, sizeof line, dict) != NULL) {
char *p = line + strspn(line, " \t"); // skip blanks
p[strcspn(p, " \t\r\n")] = '\0'; // strip trailing blanks
if (*p == '\0' || *p == '#' || *p == ';')
continue; // ignore blank lines and comments
count++;
int hashvalue = hash_it(p);
node *np = malloc(sizeof(node));
np->word = strdup(p);
np->next = hashtable[hashvalue];
hashtable[hashvalue] = np;
}
fclose(dict);
return true;
}

Readline Completion Problem

I am trying to get command completion working but it seems like its not working properly..
Please have a look at my code and tell me how I can fix it..
Thanks in advance...
char store_commands() {
char *newEnv;
DIR * dir;
char *new ;
struct dirent * entry;
char *env = getenv("PATH");
do {
newEnv = strsep(&env, ":");
if(newEnv != NULL)
if(strlen(newEnv) > 0) {
dir = opendir(newEnv);
if( dir == NULL ) break;
if(flag == 1) {
flag = 0;
while((entry = readdir(dir)) != NULL) {
new = malloc(strlen(entry->d_name) + 1) ;
new = strcpy(new, entry->d_name);
commands[++count] = new; // add possible commands into an array
printf("---%i %s\n", count ,commands[count]);
}
}
closedir(dir); // close directory
}
} while(newEnv);
return **commands;
}
static char** my_completion( const char * text , int start, int end){
char **matches;
store_commands();
matches = (char **)NULL;
if (start == 0)
matches = rl_completion_matches ((char*)text, &my_generator);
return matches;
}
char * dupstr (char* s) {
char *r;
r = (char*) malloc ((strlen (s) + 1));
strcpy (r, s);
return (r);
}
char* my_generator(const char* text, int state) {
int index, len;
char *comm;
if (!state) {
index = 0;
len = (int)strlen (text);
}
while ( (comm = commands[index])) {
index++;
if (strncmp (comm, text, len) == 0)
return (dupstr(comm));
}
return NULL;
}
int main (int argc, char * argv[]) {
char *command;
using_history();
rl_readline_name = basename(argv[0]);
rl_attempted_completion_function = my_completion;
while ( (command = readline(" $ "))!= NULL ) { // scan stdin
rl_bind_key('\t',rl_complete);
if(strlen(command) > 0)
add_history(command);
}
return 0;
}
Some test cases
l (tab)
Display all 1281 possibilities? (y or n) // all possibilities come up when I put one letter *** all possibilities wrong actually what I meant was all commands including the ones dont start with l
ls (tab)
ls lsbom lsdistcc lsm lso lsvfs // seems alright here
however if I press enter
comm[0]: 'ls' and comm[1]: '(null)' // execution of the command fails!!! WHY????
Execution of the command is failed
: No such file or directory
If I use a static array like this one char *test[7] = {"ls","cat","lso", "mk", "mkd", "mkdir",""}; everything seems fine including execution of the command..
Where are the definitions/declarations for commands[] and count ?
Also: your style is incoherent, the program is barely readable.
Why do you cast the return from malloc() at one place and not at another place?
If (x == 0) {} and if ( !x ) {} are equivalent. Make your choice and stick with it.
The ugly do { ... } while ( ... ); loop can be replaced by a for( ... ; ... ; ...) {} loop, saving you two levels of indentation.

Resources