I am using a hash function from the internet and when I use a print function before the return statement, it makes my program correct, but if I remove it, it becomes bugged again... like literally frustrating because I can do printf("asfasfnasfnk\n"); and it would output correctly but the moment I delete the printf function its bugged again...
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
Output:
MISSPELLED WORDS
A
is
not
a
caterpillar
WORDS MISSPELLED: 5
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
printf("%s -> %lu\n", word, hash%N);
return hash % N;
}
Output:
cat -> 1984
caterpillar -> 109622
MISSPELLED WORDS
a -> 97
A
cat -> 1984
is -> 471
is
not -> 1832
not
a -> 97
a
caterpillar -> 109622
WORDS MISSPELLED: 4
WORDS IN DICTIONARY: 2
WORDS IN TEXT: 6
TIME IN load: 0.00
TIME IN check: 0.00
TIME IN size: 0.00
TIME IN unload: 0.00
TIME IN TOTAL: 0.00
The words in the dictionary is cat and caterpillar, the words in the text is "A cat is not a caterpillar"
Functions:
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Number of buckets in hash table
const unsigned int N = 200000;
// Hash table
node *table[N];
// Returns true if word is in dictionary else false
bool check(const char *word)
{
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
node *tmp = table[index];
// check if the word is in the hash table
while (tmp != NULL)
{
if (strcmp(tmp->word, copy) == 0)
{
free(copy);
return true;
}
tmp = tmp->next;
}
free(copy);
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
/* credits to...
*https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
*/
unsigned long hash = 0;
int n = strlen(word);
for (int i = 0; i < n; i++)
{
hash = (hash << 2) ^ word[i];
}
return hash % N;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// TODO
char *words = malloc(sizeof(char) * (LENGTH + 1));
if (words == NULL)
{
return 1;
}
// initialize the hash table to NULL
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
// open dictionary file
FILE *indata = fopen(dictionary, "r");
// 1 character for '\0' and another for '\n' because fgets takes a trailing new line
// when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra characters
while (fgets(words, LENGTH + 2, indata) != NULL)
{
// get rid of the trailing new line from fgets
words[strlen(words) - 1] = '\0';
// allocate memory for the newNode
node *newNode = malloc(sizeof(node));
if (newNode == NULL)
{
return false;
}
// get the index by using the hash function
int index = hash(words);
strcpy(newNode->word, words);
// make the newNode the head of the list
newNode->next = table[index];
table[index] = newNode;
}
// free memory and close the opened file
free(words);
fclose(indata);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
// TODO
// counter of words loaded
unsigned int counter = 0;
// loop through the hash table
for (int i = 0; i < N; i++)
{
node *tmp = table[i];
while (tmp != NULL)
{
counter++;
tmp = tmp->next;
}
}
return counter;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
// TODO
// loop through the whole hash table
for (int i = 0; i < N; i++)
{
while (table[i] != NULL)
{
node *tmp = table[i]->next;
free(table[i]);
table[i] = tmp;
}
}
return true;
}
// TODO
int len = strlen(word);
char *copy = malloc(sizeof(char) * len + 1);
// change into lowercase the word
for (int i = 0; i < len; i++)
{
copy[i] = tolower(word[i]);
}
// get the index by using the hash function
int index = hash(copy);
Notice that + 1 in the malloc. Why is that there? It's to allow space for the terminating zero byte that marks the end of the string.
Say the string is "test". Then strlen will return 4. Your loop will iterate from 0 to 3 inclusive, copying the four letters in the word.
But you will not copy the terminating zero byte on the end of the string. When hash calls strlen on copy, who knows what value it will get since what you have passed it is not a legal string.
Change the condition in the for loop to i <= len.
Related
The goal of the following code segment is to take a sorted array of strings, and count how many of each word there is.
That information is then put into a struct called reduceNode that holds a string and a count for the given string.
The reduceNode structs are put into another array.
Once all of the words and their counts are found and put into the intermediate array, they are inserted into a global array of reduceNode structs.
This method is called by threads, which is why I am storing the results into a global array.
Anytime I run this part of the program, I am getting a segfault.
I assume I am accessing an array out of bounds, but I am having trouble narrowing down where I am doing so.
void* reduce(void* num) //Reduce function
{
int index = *(int*)num;
int curSize = 0; //Size of the current linked list
struct HashNode *head = HashTable[index]; //Get the head of the linked list from the hashtable
struct HashNode *linkedList = head; //Pointer to the head to traverse the linked list
while(linkedList != NULL) //Gets the size of the current linked list
{
curSize++;
linkedList = linkedList->next;
}
linkedList = head;
int linkedListTraverse = 0; //Array index for each linked list node
int numSort[curSize];
char* wordSort[curSize];
while(linkedList != NULL)
{
if(app == 1)
numSort[linkedListTraverse] = linkedList->num; //Copy the data from the linked list into an array
else
{
wordSort[linkedListTraverse] = (char*) malloc(sizeof(linkedList->string));
strcpy(wordSort[linkedListTraverse],linkedList->string); //Copy the data from the linked list into an array
}
linkedList = linkedList->next;
linkedListTraverse++;
}
if(app == 1)
{
qsort(numSort, curSize, sizeof(int), numCmpFunc); //Sort the current node
int i, j = 0;
reduceNode* numSortArray[curSize];
reduceNode* curNum;
for(i = 0; i < curSize; i++)
{
curNum = (reduceNode*) malloc(sizeof(reduceNode));
curNum->num = numSort[i];
numSortArray[i] = curNum;
}
i = 0;
while(sortedArray[i] != NULL)
{
i++;
}
for(j = 0; j < curSize; j++, i++)
{
sortedArray[i] = numSortArray[j];
}
return (void*) 0;
}
else
{
int i = 0;
while(i < curSize) //Convert all of the words to lowercase
{
char* str = wordSort[i];
char *p;
for (p = str; *p != '\0'; p++)
*p = (char)tolower(*p);
i++;
}
qsort(wordSort, curSize, sizeof(char*), stringCmpFunc); //Sort the current node
}
int curWordIndex = 0; //Exclusively for wordcount
int checkWordIndex = 1;
int curArrayIndex = 0;
reduceNode *curWord;
reduceNode* wordCountArray[curSize];
while(curWordIndex < curSize)
{
curWord = malloc(sizeof(reduceNode));
curWord->word = wordSort[curWordIndex]; //Set the word
curWord->count = 1; //Start the count out at 1
while(strcmp(wordSort[curWordIndex], wordSort[checkWordIndex]) == 0) //While the two words are equal
{
checkWordIndex++; //Advance the leading index check
curWord->count++;
if(checkWordIndex >= curSize) //If the leading index goes beyond the array bounds
{
break;
}
}
if(checkWordIndex <= curSize)
{
curWordIndex = checkWordIndex;
checkWordIndex = curWordIndex + 1;
}
else if(checkWordIndex >= curSize) //If the leading index goes beyond the array bounds
{
if(strcmp(curWord->word, wordSort[curWordIndex]) != 0)
{
curWord->word = wordSort[curWordIndex]; //Set the word
curWord->count = 1; //Start the count out at 1
curArrayIndex++;
wordCountArray[curArrayIndex] = curWord;
}
else
{
wordCountArray[curArrayIndex] = curWord;
curArrayIndex++;
}
break;
}
wordCountArray[curArrayIndex] = curWord;
curWord = NULL;
curArrayIndex++;
}
int i,j = 0;
while(sortedArray[i] != NULL)
{
i++;
}
for(j = 0; j < curSize; j++, i++)
{
sortedArray[i] = wordCountArray[j];
}
return (void*) 0;
}
reduceNode is defined as
typedef struct reduceNode
{
int count;
char *word;
int num;
} reduceNode;
sortedArray is declared globally as
reduceNode **sortedArray;
and later initialized as
sortedArray = (reduceNode **)calloc(1,sizeof(reduceNode*)*inputCount);
Input count is the number of words that are read into the program
An example input would be an array: [alpha, alpha, bravo, charlie, charlie, charlie, delta]. The expected result would be [alpha 2, bravo 1, charlie 3, delta 1].
1.
You checkWordIndex reaches exactly curSize and strcmp(wordSort[curWordIndex], wordSort[checkWordIndex] will go out of bounds. I recomment printing the indicies for debugging.
if(checkWordIndex < curSize)
{
curWordIndex = checkWordIndex;
checkWordIndex = curWordIndex + 1;
}
this code will still lead to checkWordIndex == curSize
2.
You allocate new memory, remember to free it.
3.
For thread safety lookup mutex in C.
I recommend using only one indicie and iterating like
while(index < cursize-1)
{
...
++index;
}
your fist indicie is index and your second is index+1.
I'm having issues to correct my code so that it works as I want it.
I have three arrays given in this example:
char arr[MAX_ELEMENTS][MAX_LENGTH] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[MAX_ELEMENTS][MAX_LENGTH] = {"PARI", "PARI", "LOND", "PARI", "PARI", "CYKA", "LOND", "CYKA", "PARI"};
int freq[MAX_ELEMENTS];
I wish to create a function that can count the amount of occurrences of string elements in arr[] and store them in freq[]. Apart from that I also wish to know in what race[] there have been the most occurrences of given arr[].
To demonstrate this here is an example of what output I wish to receive when the function works:
In Race [PARI] the highest occurence was [ABS] with 3 occurences!
In Race [LOND] the highest occurence was [ACT] with 1 occurences!
.....
Currently, I am able to count the occurrences of arr[] in freq[] but I can't associate them with their respective race[] and give that output..
for(i=0; i<size; i++)
{
count = 1;
for(j=i+1; j<size; j++)
{
/* If duplicate element is found */
if(strcmp(arr[i], arr[j])==0)
{
count++;
/* Make sure not to count frequency of same element again */
freq[j] = 0;
}
}
/* If frequency of current element is not counted */
if(freq[i] != 0)
{
freq[i] = count;
}
}
Giving me currently :
ABS occurs 3 times.
ACT occurs 2 times.
etc. etc...
But I don't know how I can associate them with the race[] and only count them if a given race.
You probably have to use struct here to format your data.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define true 1
#define len 100
#define elms 10
struct NODE;
#define Node struct NODE
struct NODE {
unsigned long int val;
int count;
char name[len];
Node *left;
Node *right;
};
Node * makeNode(char * str, unsigned long int val){
Node * tmp = (Node *)malloc(sizeof(Node));
strcpy(tmp->name, str);
tmp->val = val;
tmp->left = NULL;
tmp->right = NULL;
tmp->count = 1;
return tmp;
}
unsigned long int getHash(char * name){
int prime = 19;
int i = 0;
unsigned long int val = 0;
while(name[i]!='\0'){
val += (name[i] * pow(prime, i) );
++i;
}
return val;
}
void insert(Node * root, char * name){
Node * newnode;
int val = getHash(name);
Node * tmp = root;
while(tmp != NULL) {
if ( tmp->val == val){
tmp->count += 1;
break;
}
if (val > tmp->val){
if( tmp->right != NULL)
tmp = tmp->right;
else{
tmp->right = makeNode(name, val);
break;
}
}else {
if( tmp->left != NULL)
tmp = tmp->left;
else{
tmp -> left = makeNode(name, val);
break;
}
}
}
}
Node * find(Node * root, char * name){
int val = getHash(name);
Node * tmp = root;
while(tmp != NULL){
if(tmp -> val == val){
return tmp;
}else if (val > tmp->val){
tmp = tmp->right;
}else{
tmp = tmp->left;
}
}
return NULL;
}
struct Race {
char name[len];
char elements[elms][len];
};
char arr[elms][len] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[elms][len] = {"PARI", "PARI", "LOND", "PARI", "PARI", "CYKA", "LOND", "CYKA", "PARI"};
int freq[elms];
void copyArray(char dest[elms][len], char src[elms][len] ){
int i = 0;
while(strlen(src[i]) > 0){
strcpy(dest[i],src[i]);
++i;
}
}
int main(){
Node * root = makeNode("root", 0);
int i = 0;
while(strlen(arr[i]) > 0){
insert(root,arr[i]);
++i;
}
i = 0;
while(strlen(arr[i]) > 0){
Node * r = find(root,arr[i]);
printf("found %s, count = %ld\n", r->name, r->count);
++i;
}
// make representation of race
struct Race r1, r2;
strcpy(r1.name, "PARI");
{
char tmp[elms][len] = { "ABS", "PPB", "QQQ" };
copyArray(r1.elements, tmp);
}
strcpy(r2.name, "LOND");
{
char tmp[elms][len] = { "ACT" };
copyArray(r2.elements, tmp);
}
struct Race races[2] = {r1, r2};
i = 0;
while(i < 2){
struct Race * current = &races[i];
printf("for %s", current->name);
Node * max = NULL;
int m = -1;
int j = 0;
while(strlen(current->elements[j]) > 0){
Node * tmp = find(root, current->elements[j]);
if( tmp != NULL && tmp->count > m) {
max = tmp;
m = tmp->count;
}
++j;
}
if (max != NULL){
printf(" max is %s : %d\n", max->name, max->count);
}else{
printf(" max is None\n");
}
++i;
}
return 0;
}
Basically you have to format you data, and specify link between them. Here I used Binary tree and Rabin karp hashing technique to store data efficiently.
Binary tree is best way to solve counting problem, since the search operation fairly cheap. and Rabin karp hashing technique will avoid string comparison every time.
And I create a struct called Race to store all related elements of that race. so the algorithm is going to be.
let arr be array of elements
let races be array of races
for each race in races
define related element
#find occurrence now
#Binary tree will increment count if element already exist.
let binary_tree be a Binary Tree
for each element in arr
add element to binary_tree
# now we have all the elements with it's count
# let's iterate through races now
for each race in races
m = null
for element in race.elements
node = find_element_in_binary_tree(element)
if node is not null
m = max(m, node)
if m is not null then
print m
else
print not found
First, initializations, note the []s
char arr[][MAX_LENGTH] = {"ABS","ABS","ABS","ACT","ACT","PPB","PPB","QQQ","QQQ"};
char race[][MAX_LENGTH] = {"PARI","PARI","LOND","PARI","PARI","CYKA","LOND","CYKA","PARI"};
int freq[MAX_ELEMENTS];
int n = sizeof(arr)/sizeof(*arr); // get actual number of used items
int i,j;
int max = 0; // init max to 0
The main loop goes through arr and race, and whenever a dupe is found at [j] (after [i]), "invalidate" the dupe ("already processed") by setting its first char to 0 (empty string).
Note that j starts from i and not i+1 to ensure freq is at least 1, even for the first non-dupes items.
for(i=0 ; i<n ; i++) {
freq[i] = 0; // ensure freq is 0 for any item
if ( ! *arr[i]) continue; // skip already processed items
for(j=i ; j<n ; j++) { // j=i, not i+1!
if (!strcmp(arr[i],arr[j]) && !strcmp(race[i],race[j])) {
freq[i]++; // update max if necessary
if (freq[i] > max) max = freq[i];
if (j > i) *arr[j] = 0; // invalidate that arr element
}
}
}
Finally display the max appearances, including ties
printf("Items at max=%d:\n", max);
for(i=0 ; i<n ; i++) {
if (freq[i] == max) { // skipped items are never displayed (max cannot be 0)
printf("%s / %s\n", arr[i],race[i]);
}
}
(no need to check for "invalidation" as max will be >0, and all invalidated items have freq[i] == 0)
I've been working on a Trie program for practice and i'm running into some logic/coding issues when it comes to trying to create a method that removes or destroys words that are currently in the trie:
#include "trie.h"
/**************************************************************************/
/* Helper Functions
* trie_node_t * trie_new( void );
* Allocates memory for a new trie node
* Returns NULL if memory allocation was not possible or
* a memory address to a trie_node in the heap
/**************************************************************************/
trie_node_t * trie_new( void ){
trie_node_t * tmp = NULL;
int i;
if ( ( tmp = ( trie_node_t * ) malloc ( sizeof( trie_node_t ) ) ) == NULL )
return NULL;
for( i = 0; i < ALPHA_SIZE; i++ ) {
tmp->child[ i ] = NULL;
tmp->end = 1;
}
return tmp;
}
/**************************************************************************/
/* Functions functions
* int trie_size ( trie_node_t * root );
* Returns the number of words in the trie
* int trie_contains ( trie_node_t * root, char word[ ] );
* Returns 1 if a the word is in the trie
* 0 otherwise
* int trie_insert ( trie_node_t ** rootRef, char word[ ] );
* Returns 1 if the word is inserted in the trie
* 0 otherwise
* int trie_remove ( trie_node_t ** rootRef, char word[ ] );
* Returns 1 if the word is removed from the trie
* 0 otherwise
* int trie_destroy ( trie_node_t ** Tref );
* Returns 1 if the trie and all its node are destroyed
**************************************************************************/
int trie_size ( trie_node_t * root ) {
int i = 0;
int count = 0;
trie_node_t *temp = root; //Creates a temp variable (Because its being modified)
//if the reference does not exist(no more children)
if (temp == NULL){
return EXIT_FAILURE;
}
//loops through the child array
for (i = 0; i < ALPHA_SIZE; i++){
//if the reference to next child is not null
if(temp->child[i] != NULL){
//and the end character is '/0' (or not)
if(temp->child[i]->end == '/0'){
count += 1;
}
//add the return of trie_size to count, then run through the method again at the lower level.
count += trie_size(temp->child[i]);
}
}
return count;
}
/**************************************************************************/
int trie_contains ( trie_node_t * root, char word[ ] ){
trie_node_t *temp = root; //Create a temp variable, can't use the default root. (just a location).
int i = 0;
int length = strlen(word); //finds the length of array
int index = 0;
if (temp == NULL){
return EXIT_FAILURE;
}
if(!valid_word(word)){
return 0;
}
for (i = 0; i < length; i++){
index = charToInt(tlower(word[i]));
if (!temp->child[index]){
return 0;
}
temp = temp->child[index];
}
if (temp->end != '/0'){ //Checks if the end character at the last index is '/0' if it is not, it is not a word. Return 0 (not found)
return 0;
}
return 1;
}
/**************************************************************************/
int trie_insert ( trie_node_t ** rootRef, char word[ ] ){
trie_node_t *temp = *rootRef; //Create a temp variable, can't use the default rootRef. (just a location).
int i = 0;
int length = strlen(word); //finds the length of array
int index = 0;
if (temp == NULL){ //checks that the reference is not null
return EXIT_FAILURE;
}
if (!valid_word(word)){//checks if word is valid.
return 0;
}
for (i = 0; i < length; i++){
if (word[i] == '/0'){
index = 26;
} else if (word[i] >= 'A' || word[i] <= 'Z'){
index = charToInt(tolower(word[i])); //turns the word[i] into a usable integer.
}
else {
return EXIT_FAILURE;
}
if (!temp->child[index]){ //if there is not a child reference.
temp->child[index] = trie_new(); //create one.
}
temp = temp->child[index]; //move down to next level (next child)
}
temp->end = '\0';
return 1;
}
/**************************************************************************
* I'm pretty sure the majority of this method is incorrect, this was just an attempt I had. From what I understand I have to do is this:
* (1) Find the last character in the word (Found with the '/0' end char) and as long as there are no nodes connected at a lower level free the memory.
* (2) Move up (recursion is probably the easiest way to do this, but I haven't been able to figure out how) and check again.
* (3) Continue until the first letter of the word is reached, then exit.
* My question is how do I check to see if another node is connected? Also what would be the format of the recursive call?
**************************************************************************/
void trie_remove ( trie_node_t ** rootRef, char word[ ] ){
trie_node_t *temp = *rootRef;
int i = 0;
int index = 0;
int length = strlen(word);
if (temp == NULL){
return;
}
if (!valid_word(word)){
return;
}
//Code is incomplete, just an attempt.
for (i = 0; i < length; i++){
index = charToInt(tolower(word[i])); //not sure if this is necessary.
if (temp->child[i]->end == '/0'){
free(temp->child[i]);
}
}
return;
}
/**************************************************************************/
int trie_destroy ( trie_node_t ** rootRef ){
//issues with logic here, not exactly sure how to do this.
return 1;
}
/**************************************************************************/
int trie_init(trie_node_t **rootRef){
if ( *rootRef == NULL) {
*rootRef = trie_new();
}
return 1;
}
/**************************************************************************/
int valid_word (char word[]){
int length = 0;
int i = 0;
for (i = 0; i < length; i++){
if(charToInt(word[i]) > 26 || charToInt(word[i]) < 0){
return 0;
}
}
return 1;
}
/**************************************************************************/
int charToInt(char c){
return (int)c-(int)'a';
}
Trie.h: (Important stuff anyways) ALPHA_SIZE is 27.
typedef struct trie_node {
char end;
struct trie_node *child[ ALPHA_SIZE ]; //reference to trie node.
}trie_node_t;
I hope I formatted this question correctly, and I have looked on the site for possible solutions already but have yet to find any. If anyone can help me check my logic/ assist me in the coding of these methods that would be amazing. I don't just want the code I want to learn why it works!
Thanks in advance.
for Remove, you need to remove free'd pointers from the trie:
for (i = 0; i < length; i++){
index = charToInt(tolower(word[i])); //not sure if this is necessary.
if (temp->child[i]->end == '/0'){
free(temp->child[i]);
temp->child[i]=0; // lets not point to free'd elements
}
}
For destroy - try recursion, roughly:
for (int i=ALPHA_MIN; i < ALPHA_MAX; ++i)
{
if (rootRef->child[i])
{
trie_destroy(rootRef->child[i]);
}
}
free (rootRef);
I am having trouble implementing my insert function for my hash table.
So I implement some test calls where I just call the function separately. For actual use, I call the function inside a while loop. For testing purpose, I only run the loop 4 times.
I post some outputs below. The reason the table looks weird is because of my hash function. It hashes the words such that A = 1, B = 2, C = 3, and so on. The position of the letter in the word is irrelevant, since I will consider permutations of the word. Moreover, the case of the letter will be irrelevant in this problem as well, so the value of a = the value of A = 1.
And for strings, abc = 1 + 2 + 3 = 6, bc = 2 + 3 = 5, etc.
Overall, the hash function is fine. The problem is the insert function.
The first 4 words of my local dictionary are A, A's, AA's, AB's.
My expected output should be (I got the same output when I run the test calls):
0:
1: [W: A, Len:1]
2:
3:
...
18:
19:
20: [W: A's, Len:3]
21: [W: AA's, Len:4]
22: [W: AB's, Len:4]
But when I call the function inside a loop, whatever is last on the list will overwrite other entries. If I run the loop 100 times, then the last entry still replaces the previous ones (Notice how the lengths of the words are unchanged, but only the words are replaced):
0:
1: [W: AB's, L:1]
2:
3:
...
18:
19:
20: [W: AB's, Len:3]
21: [W: AB's, Len:4]
22: [W: AB's, Len:4]
Below is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int hash(char *word)
{
int h = 0;
while(*word != '\0')
{
if(*word >='A' && *word < 'A'+26) {
h=h+(*word -'A' + 1);
}
else if(*word >='a' && *word < 'a'+26) {
h=h+(*word -'a' + 1);
}
//else { // special characters
// return -1;
//}
word++;
}
return h;
}
typedef struct Entry {
char *word;
int len;
struct Entry *next;
} Entry;
#define TABLE_SIZE 1000 // random numbers for testing
Entry *table[TABLE_SIZE] = { NULL }; // an array of elements
void init() {
int i;
for (i = 0; i < TABLE_SIZE; i++) {
// initialize values
struct Entry *en = (struct Entry *)malloc(sizeof(struct Entry));
en->word = "";
en->len = 0;
en->next = table[i];
table[i] = en;
}
}
//Insert element
void insertElement(char *word, int len) {
int h = hash(word);
int i;
// because all words are different so there is no need to check for duplicates
struct Entry *en = (struct Entry *)malloc(sizeof(struct Entry));
en->word = word;
en->len = len;
en->next = table[h];
table[h] = en;
}
void cleanTable()
{
struct Entry *p, *q;
int i;
for( i=0; i<TABLE_SIZE; ++i )
{
for( p=table[i]; p!=NULL; p=q )
{
q = p->next;
free( p );
}
} // for each entry
}
int main() {
init(); // create hash table
// test calls produce correct output
//insertElement("A", (int)strlen("A"));
//insertElement("A's", (int)strlen("A's"));
//insertElement("AA's", (int)strlen("AA's"));
//insertElement("AB's", (int)strlen("AB's"));
int i;
i = 0;
FILE* dict = fopen("/usr/share/dict/words", "r"); //open the dictionary for read-only access
if(dict == NULL) {
return;
}
// Read each line of the file, and insert the word in hash table
char word[128];
while(i < 4 && fgets(word, sizeof(word), dict) != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // trim the \n
}
insertElement(word, (int)strlen(word));
i++;
}
for ( i=0; i < 50; i++)
{
printf("%d: ", i);
struct Entry *enTemp = table[i];
while (enTemp->next != NULL)
{
printf("[W: %s, Len:%d] ", enTemp->word, enTemp->len);
enTemp = enTemp->next;
}
printf("\n");
}
cleanTable();
return 0;
}
Try to reallocate the memory in each loop in this part of code:
char* word = malloc(sizeof(char)*128);
while(i < 4 && fgets(word, sizeof(word), dict) != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // trim the \n
}
insertElement(word, (int)strlen(word));
word = malloc(sizeof(char)*128);
i++;
}
You forgot to reallocate memory to every string which causes all pointers points at same point
Note: Not tested
notice that your insertElement get a pointer to a string, and assign that pointer to the current Entry, but its the main function, you pass the word argument(a pointer) that point the stack allocated string, and that string is changed after each read of a word. you must use malloc so that each word point to its own memory area
I am totally new to C so I am having troubles with hash table and linked list. I am making an anagram solver. There are many examples I found online but each person has done it differently and rather complicated so I'm really confused now.
I'm pretty okay with the most of the implementation of the program. But I'm actually stuck at the very beginning.
So I need to create a hash table where in each entry, the key is an int and the value is a linked list of words.
The way I get the key, or the hash value, is by converting a word to a number. For example, A is 1, B is 2, C is 3, AB is 3, BC is 5, ABC is 6, and so on. I guess the words should be case insensitive to make things easier.
Below is the code I'm working on. I'm pretty sure is not in the correct syntax. Right now I'm just working on the structure of the table.
typedef struct Entry {
int key;
char *word;
Entry *next;
} Entry;
typedef struct HashTable {
int size;
Entry *entry;
} HashTable;
// initialize table
HashTable* create(int size) {
HashTable *table = (HashTable *)malloc(sizeof(HashTable));
table->entry = (Entry *)malloc(sizeof(Entry) * size);
table->size = size;
int i;
for (i = 0; i < size; i++) {
table->entry[i].key = 0; // All entries to be 0
}
return table;
}
// hash the word
int getHash(char *word)
{
// How do I implement a loop here
}
void insert(HashTable *table, int key, char *word) {
int hash = getHash(word);
int i = 0;
// if key has already existed, find and add to linked list
while(table->entry[hash].key != 0 && (i < table->size)) {
if(table->entry[hash].key == key) {
table->entry[hash].word = word;
return; /* */
}
//hash = (hash + 1); // I'm also stuck with incrementing the hash value
i++; // increment loop index
}
// if key does not exist, find a '0 slot', and store the key and value
if(table->entry[hash].key == 0) {
table->entry[hash].key = key;
table->entry[hash].word = word;
}
}
I would suggest start from a rather simple way to find anagrams of a word from text.
int anagrams(char * word, char * text) {
int bin[256] = { 0 }, m = 0, found = 0, len = 0, c, i;
for (i = 0; word[i]; i++, bin[c]--, len++) {
c = word[i];
if(bin[c] == 0) m++;
}
for (i = 0; text[i]; i++) {
c = text[i];
if (bin[c] == 0) m++;
if (bin[c] == -1) m--;
bin[c]++;
if (i >= len) {
c = text[i - len];
if (bin[c] == 0) m++;
if (bin[c] == 1) m--;
bin[c]--;
}
if (m == 0) found++;
}
return found;
}