HashTable top 20 count ideas - c

I'm having a bit of trouble thinking of an idea to solve my problem.
I have a word counting program, which uses a hashtable to count
all of the words in any number of files, and print only the words
that are in all files, and their counts. I also store all of my used
hash indexs in a linked list.
Solved my own problem, I knew the answer would be simple. I just figured out the one with the lowest count and if my new value was greater than than that, but it at the index of the one of the lowest count in the array of the twenty word structs.
Thanks for all of your help everyone!
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <string.h>
/*Structures*///////////////////////////////////////////
//A struct to hold the words in the hash tables and their
//counts
struct counter{
int count;
int printed;
char word[51];
int allfiles[101];
struct counter * next;
};
//A struct to hold the hash indexes of the already visited
//index, for easy printing
struct index{
int used;
struct index * next;
};
//A simple struct to pass arguments to the work function for
//threading
struct arguments{
void * id;
int fileid;
};
////////////////////////////////////////////////////////
/*Functions*////////////////////////////////////////////
static int hash(char * word);
static void inHash(struct counter * newWord, int hash, int FILEID);
static void indexchain(int hash);
//static void hashprint(int NUMFILES);
static void * work(struct arguments *);
static void toptwenty(int NUMFILES);
static void print();
////////////////////////////////////////////////////////
/*Global Variables*/////////////////////////////////////
struct counter * top[20] = {0};
struct counter * hashtable[6222] = {0};
struct index * head;
////////////////////////////////////////////////////////
int main(int argc, char * argv[])
{
//check for valid number of arguments
if(argc < 2)
{
fprintf(stderr, "invalid number of arguments");
return 1;
}
//set up index chain starts with a null node
head = malloc(sizeof(struct index));
head->next = NULL;
head->used = -1;
//loop through any number of files
int arg;
for(arg = 1; arg < argc; arg++)
{
struct arguments * argum = malloc(sizeof(struct arguments));
argum->fileid = arg;
argum->id = ((void*)argv[arg]);
work(argum);
}
//hashprint(argc);
toptwenty(argc);
print();
return 0;
}
/*Function Definitions*/
//this function takes a file name and counts
//the words in the file
static void * work(struct arguments * argum)
{
int FILEID = argum->fileid;
void * in = argum->id;
int fd = open((char*)in, O_RDONLY);
if (fd == -1)
{
fprintf(stderr, "can't open %s for reading!\n", (char*)in);
exit(-1);
}
int BUFLEN = (int) lseek(fd, 0, SEEK_END);
lseek(fd, 0, 0);
//A few variable
char buf[BUFLEN + 1];
int lastRead;
lastRead = read(fd, buf, BUFLEN);
if (lastRead == -1)
{
fprintf(stderr, "error reading file %s!\n", (char*)in);
exit(-1);
}
//Parse the filebuffer for words.
char newword[51];
int c;
int curindex = 0;
buf[BUFLEN + 1] = ' ';
//not doing the last space because it is eof
for(c = 0; c < BUFLEN + 1; c++)
{
if((buf[c] >= 'A' && buf[c] <= 'Z'))
{
buf[c] += 32;
}
if(buf[c] >= 'a' && buf[c] <= 'z')
{
//add the next char to the string.
newword[curindex] = buf[c];
curindex++;
}
else
{
//make a new struct for the entry, and add it to the hashtable
//add its hash to the
if(strlen(newword) >= 6)
{
struct counter * temp = malloc(sizeof(struct counter));
strcpy(temp->word,newword);
int thishash = hash(temp->word);
//Only save hash indexes if they are in the first files
if(FILEID == 1)
{
indexchain(thishash);
}
inHash(temp, thishash, FILEID);
}
int wordlength = strlen(newword);
int i;
for(i = 0;i < wordlength; i++)
{
newword[i] = 0;
}
curindex = 0;
}
}
close(fd);
return in;
}
//Bad hash function by just adding ascii values of the
//characters
static int hash(char * word)
{
int loop = strlen(word);
int i;
int hashval = 0;
for(i = 0; i < loop; i++)
hashval += word[i];
return hashval;
}
//add a new word to the hash table
static void inHash(struct counter * newWord, int hash, int FILEID)
{
int eflag = 0;
if(hashtable[hash] == NULL)
{
//if the entry isnt in the table
if(FILEID == 1)
{
newWord->allfiles[FILEID] = 1; /*FILEID ARRAY TEST*/
newWord->count = 1;
newWord->next = NULL;
hashtable[hash] = newWord;
}
}
else
{
//if its not, but what if it is?
struct counter * cur = hashtable[hash];
if(strcmp(cur->word, newWord->word) == 0)
{
//is the word in the first slot?
cur->count += 1;
cur->allfiles[FILEID] = 1; /*FILEID ARRAY TEST*/
eflag = 1;
}
else
{
while(cur->next != NULL)
{
cur = cur->next;
if(strcmp(cur->word, newWord->word) == 0)
{
//if the word already exsists, update the count
cur->allfiles[FILEID] = 1; /*FILEID ARRAY TEST*/
cur->count += 1;
eflag = 1;
break;
}
}
}
//if its not in any bucket, make a new bucket
if(eflag == 0)
{
//Else add the new entry to the end of that list
if(FILEID == 1)
{
newWord->allfiles[FILEID] = 1; /*FILEID ARRAY TEST*/
newWord->count = 1;
newWord->next = NULL;
cur->next = newWord;
}
}
}
}
//adding a value to the linked list for printing
static void indexchain(int hash)
{
struct index * p = head;
int eflag = 0;
while(p->next != NULL)
{
if(p->used != hash)
p = p->next;
else
{
eflag = 1;
break;
}
}
if(eflag == 0)
{
struct index * newValue = malloc(sizeof(struct index));
newValue->used = hash;
newValue->next = NULL;
p->next = newValue;
}
}
/*
//This function will print the values in the hash tables and their counts
//Prints based on number of files to check if words are in all files
static void hashprint(int NUMFILES)
{
struct index * p;
p = head->next;
int hash;
int i;
int printbool = 1;
while(p != NULL)
{
hash = p->used;
struct counter * ptr = hashtable[hash];
while(ptr != NULL)
{
if(ptr->printed == 0)
{
for(i = 1; i < NUMFILES; i++)
{
if(ptr->allfiles[i] == 0)
{
printbool = 0;
break;
}
else
printbool = 1;
}
if(printbool == 1)
{
ptr->printed = 1;
printf("%s %d\n", ptr->word, ptr->count);
}
}
ptr = ptr->next;
}
p = p->next;
}
}
*/
//A function to see which numbers have the top twenty highest count
static void toptwenty(int NUMFILES)
{
struct index * p;
p = head->next;
int hash;
int i;
int printbool = 1;
while(p != NULL)
{
hash = p->used;
struct counter * ptr = hashtable[hash];
while(ptr != NULL)
{
if(ptr->printed == 0)
{
for(i = 1; i < NUMFILES; i++)
{
if(ptr->allfiles[i] == 0)
{
printbool = 0;
break;
}
else
printbool = 1;
}
if(printbool == 1)
{
for(i = 0; i < 20; i++)
{
if(top[i] == NULL)
{
top[i] = ptr;
break;
}
else if(ptr->count > top[i]->count)
{
top[i] = ptr;
break;
}
}
}
}
ptr = ptr->next;
}
p = p->next;
}
}
//print the top 20 count
static void print()
{
int i;
for(i = 0; i < 20; i++)
{
if(top[i] != NULL)
{
if(top[i]->printed == 0)
{
//printf("%s\n", top[i]->word);
printf("%s %d\n", top[i]->word, top[i]->count);
top[i]->printed = 1;
}
}
else
break;
}
}

Create an priority Queue that holds the 20 hash indexes that have the top counts and their corresponding counts.
When you are counting the lowest value is at the top of the queue if your new word beats it remove it from the queue O(1) and add your new one to the queue O(log(n)) which is only O(log(20)).

Related

cs50 speller stuck!! Please tell me why i am getting Signal 11(SIGSEGV): dumping core

I am completely stuck with this segmentation fault. Can't even test if my code will actually do what's intended. Anyone can please help?
I am getting segmentation fault at the line: table[hashed] = n;
(Process terminating with default action of signal 11 (SIGSEGV): dumping core.
Bad permissions for mapped region at address.....)
Here is my code.Many Thanks!
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 1327;
// Hash table
node *table[sizeof(node) * N];
int loadedsize = 0;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int hashed = hash(word);
node *cursor = table[hashed]->next;
while (cursor != NULL)
{
if(strcasecmp(word, cursor->word) == 0)
{
return true;
}
else
{
cursor = cursor->next;
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
int sum = 0;
for (int i = 1; i <= strlen(word); i++)
{
sum += (tolower(word[i]) * (15 - i));
}
sum = sum % 1327;
return sum;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
//setup variable for word
char s[LENGTH + 1];
//NULL table
for (int i = 0; i < N; i++)
{
table[i] = NULL;
}
//setup new node
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
//open file
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
//read words and place node in table
while (fscanf(dict, "%s", s) != EOF)
{
int hashed = hash(s);
strcpy(n->word, s);
n->next = NULL;
if (table[hashed] == NULL)
{
table[hashed] = n;
}
else
{
n->next = table[hashed];
table[hashed] = n; <<<<THIS IS WHERE I GET DUMPING CORE>>>>
}
loadedsize += 1;
}
free(n);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return loadedsize;
}
```

Cs50 pset5 - program crashing

I finished writing my code for speller. It compiles fine but the only output being printed is "Misspelled words". The words misspelled , words in text, words in dictionary does not get printed. I'm assuming its because the program crashes before then? Here is my code. If only I knew in which function or area my problem lies I might be able to fix it. Also, my hash function is to base hash indexes base on the first two letters of the word.
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <strings.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "dictionary.h"
int dsize = 0;
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 676;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
// hash word
int hashnumber = hash(word);
node *cursor = table[hashnumber];
// traversing linked list at that hash number
while(cursor != NULL)
{
if((strcasecmp(cursor->word, word) == 0))
{
return true;
}
}
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
int hash;
// TODO: Improve this hash function
for( int i = 97; i < 123; i ++)
{
hash = (i - 97) * 26;
// check asciivalue both uppercase and lowercase for first letter
if(word[0] == (char)i || word[0] == (char)(i - 32) )
{
for ( int j = 97; j < 122; j++)
{
// check asciivalue both uppercase and lowercase for second letter
if(word[1] == (char)j || word[1] == (char)(j - 32))
{
hash = hash + (j - 97);
}
}
}
}
return hash;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
for( int i = 0; i < N; i++)
{
table[i] = NULL;
}
FILE *input = fopen(dictionary, "r");
if (dictionary== NULL)
{
return false;
}
node *temp;
char word[LENGTH + 1];
while((fscanf(input, "%s", word)) != EOF)
{
temp = malloc(sizeof(node));
if(temp == NULL)
{
return false;
}
strcpy(temp->word, word);
int hashnumber = hash(word);
if (table[hashnumber] == NULL)
{
table[hashnumber] = temp;
}
else
{
temp->next = table[hashnumber];
table[hashnumber] = temp;
}
dsize++;
}
fclose(input);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return dsize;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
node *temp;
node *cursor;
for ( int i = 0; i < N; i++ )
{
cursor = table[i];
while(table[i] != NULL)
{
temp = cursor;
cursor = cursor->next;
free(temp);
}
if(cursor == NULL && i == (N - 1))
{
return true;
}
}
return false;
}

trie data structure insert function is not working. Why?

I have implemented a trie data structure (reference). When I insert into the data structure, I get a segmentation fault. It could be a semantic error. Please help to correct it.
#include <stdio.h>
#include <stdlib.h>
#define maxlength 10
typedef struct node {
int isend;
struct node *branch[27];
} trinode;
int count, len;
trinode *createnode() {
trinode *new = (trinode *)malloc(sizeof(trinode));
int ch;
for (ch = 0; ch < 26; ch++) {
new->branch[ch] = NULL;
}
new->isend = 0;
}
trinode *insert_trie(trinode *root, char *newenty) {
int ind;
trinode *proot;
if (root == NULL)
root = createnode();
proot = root;
for (int i = 0; i < maxlength; i++) {
ind = newenty[i] - 'a';
if (newenty[i] == '\0')
break;
else {
if (root->branch[ind] == NULL)
root->branch[ind] = createnode();
root = root->branch[ind];
}
if (root->isend != 0)
printf("trying to insert duplicate");
else
root->isend = 1;
return proot;
}
}
void print_trie(trinode *cur) {
char word[40];
for (int i = 0; i < 26; i++) {
if (cur->branch[i] != NULL) {
word[count++] = (i + 'a');
if ((cur->branch[i]->isend) == 1) {
printf("\n");
for (int j = 0; j < count; j++) {
printf("%c", word[j]);
}
}
print_trie(cur->branch[i]);
}
}
count--;
}
int search_trie(trinode *root, char *target) {
int ind;
for (int i = 0; i < maxlength && root; i++) {
ind = target[i] - 'a';
if (target[i] == '\0')
break;
else
root = root->branch[ind];
}
if (root && root->isend == 1)
return root;
else
return 0;
}
int main() {
int ch;
trinode *root = NULL;
char *newenty;
char *target;
int check;
while (1) {
printf("\n enter option 1.insert_trie 2.display 3.search 4.exit");
scanf("%d", &ch);
switch (ch)
{
case 1:
printf("enter word");
scanf("%s", newenty);
root = insert_trie(root, newenty);
break;
case 2:
count = 0;
print_trie(root);
break;
case 3:
printf("enter elem you want to search");
scanf("%s", target);
check = search_trie(root, target);
if (check == 0)
printf("word not found");
else
printf("found");
break;
case 4:
exit(0);
break;
}
}
}
For starters the function createnode returns nothing
trinode *createnode()
{
trinode *new=(trinode *)malloc(sizeof(trinode));
int ch;
for(ch=0;ch<26;ch++)
{
new->branch[ch]=NULL;
}
new->isend=0;
}
Also it is unclear why the condition in the for loop is ch<26 instead of ch < 27 while the data member branch has 27 elements
struct node *branch[27];
This for in the function insert_trie
for(int i=0;i<maxlength;i++)
does not make a sense because within the loop there is the return statement
return proot;
So the loop has no more than one iteration.
The function print_trie depends on the global variable count that is a very bad design and it is unclear what the function does.
The function search_trie is declared like
int search_trie(trinode *root,char *target)
That is it has the return type int. However the function returns a pointer of the type trinode*:
if(root && root->isend==1)
return root;
In main the pointers
char *newenty;
char *target;
are not initialized and have indeterminate values. Thus these statements
scanf("%s",newenty)
and
scanf("%s",target);
invoke undefined behavior.
And you need to format the text of the program. A bad formatting is usually a reason of bugs.
char *newenty;
….
scanf("%s",newenty);
root=insert_trie(root,newenty);
newenty isn't pointing to valid memory, allocate memory to it like below.
char *newenty = malloc(maxLength);

Hashing, linked list, delete node

My task is to delete a node from a array of pointers which point to structure.
My code doesn't work and I just don't know why:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Jmena4.h"
#define LENGTH 101
#define P 127
#define Q 31
typedef struct node {
char *name;
struct uzel *next;
} NODE;
int hash(const char Name[]) {
int i;
int n = strlen(Name);
int result;
result = Name[0] * P + Name[1] * Q + Name[n - 1] + n;
return result % LENGTH;
}
void Insert(NODE *array[], const char *name) {
NODE *u;
int h;
u = (NODE*)malloc(sizeof(NODE));
u->name = name;
h = hash(name);
u->next = array[h];
array[h] = u;
}
int Search(NODE *array[], const char *name) {
NODE *u;
u = array[hash(name)];
while (u != NULL) {
if (strcmp(u->name, name) == 0) {
printf("%s\n", u->name);
return 1;
}
u = u->next;
}
printf("Name: %s wasn't found\n", name);
return 0;
}
int Delete(NODE *array[], const char *name) {
NODE *current;
NODE *previous;
int position = hash(name);
current = array[position];
previous = NULL;
while (current != NULL) {
if (strcmp(current->name, name) == 0) {
if (previous == NULL) {
array[position] = current->next;
return 1;
} else {
previous->next = current->next;
current = NULL;
return 1;
}
}
previous = current;
current = current->next;
}
return 0;
}
int main() {
int i;
NODE *array[LENGTH];
for (i = 0; i < LENGTH; i++) {
array[i] = NULL;
}
for (i = 0; i < Pocet; i++) {
Insert(array, Jmena[i]);
}
for (i = 0; i < PocetZ; i++) {
Delete(array, JmenaZ[i]);
}
Search(array, "Julie");
system("PAUSE");
return 0;
}
EDIT 1: I changed names of variables and instead of position = array[position] should be current = array[position], but it still doesn't work.
EDIT 2 : In array Jmena is string "Julie" and I can search it after Insert function, but after I delete strings from JmenaZ which not included "Julie" program output is: Name: Julie wasn't found.
For one thing, current isn't initialized before it gets tested in the while loop.

How to print a 2D array in C

I've been trying to get my program to print a barchart.
The issue is at the bottom, where I make a 2D array to hold the values and then attempt to
print the array. The problem is that is prints nothing. I've tried to solve it for a few hours with no luck. Any suggestions?
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " " /* the delimiter */
#define MAX_CHANGE (10.0/86400.0) /* 10kg/day */
/* seconds in a day is 24 hours * 60 minutes * 60 seconds */
/* return 0 if the passed strings don't math, 1 otherwise */
/* defines the structure of Node */
struct Node{
char * id;
float weight;
int time;
int count;
struct Node * next;
} *head, *p, *t, *last;
/* Constructor which returns a pointer to a new node*/
struct Node *newNode(int *time, char * id, float *w)
{ /*note malloc returns a pointer */
struct Node *r = (struct Node *)malloc( sizeof(struct Node) );
r->time = *time;
r->id = strdup(id); //a duplicate id is made to prevent all the nodes from using the same userID
r->weight = *w;
r->count = 1;
r->next = NULL;
return r;
}
/* prints the list starting with head */
printList(struct Node * head)
{
while(head != NULL)
{
printf("%d %s %f\n",head->time,head->id,head->weight);
head = head->next;
}
return 0;
}
int main() {
char line[1024];
int lasttime = 0;
int success;
int timestamp;
int duration;
char userID[1000] = "";
char *token;
char temp[1000];
float weight;
float lastweight;
float change;
float changePerTime;
head = (struct Node*)malloc(sizeof(struct Node));
head->id = "";
head->weight = 0.0;
head->time = 0;
head->next = NULL;
last = head;
/*FILE * f = fopen("C:\\Users\\Chris\\Documents\\School\\York\\Computer Science\\2031 Software Tools\\Labs\\lab3\\testcases\\01.in","r"); */
/* last points to the last node in the list
head is always the same node
p is used to travers the list
t is a pointer the most recent occurrense of a user record
*/
while (fgets(line,1024,stdin) != NULL) {
userID[0] ='\0'; // resets userID
token = strtok(line, DELIM);
success = sscanf(token,"%d",&timestamp);
if (success < 1 || timestamp == 0)
{
printf("Invalid time\n");
continue;
}
while((token = strtok(NULL,DELIM) ) != NULL && token[0] != '.' && ! isdigit(token[0]) )
{
strcpy(temp,token); //
strcat(temp,DELIM ); // adds space between each token
strcat(userID, temp); // src temp must be a const string, not a pointer
temp[0] = '\0';
}
userID[strlen(userID)-1] = '\0'; //erases the tailing space.
if(strlen(userID) > 179 || !strlen(userID) )
{printf("Illegal userID\n"); continue; }
else if(token == NULL || sscanf(token,"%f", &weight) < 1 || weight < 30.0 || weight > 300.0)
{printf("Illegal weight\n"); continue; }
else if (lasttime >= timestamp)
{printf("Nonmonotonic timestamps\n"); continue; }
else {
/* sets t to last found user record and sets "last" to the last record*/
for(p = head, t = NULL; p != NULL; p = p->next)
{
if(strcmp(userID,p->id) == 0)
{
t=p;
}
last = p; // set last to last p.
}
if(t == NULL)
{
printf("OK newuser\n");
}
else if(t != NULL)
{
/* increments count of id's for this user */
(t->count)++;
duration = timestamp - t->time;
change = weight - t->weight;
changePerTime = change / duration;
if(changePerTime < -MAX_CHANGE || changePerTime > MAX_CHANGE)
printf("Suspiciously large weight change\n");
else
printf("OK\n");
}
/* adds node to end of list */
last->next = newNode(&timestamp,userID,&weight);
last = last->next;
/* adds time to last time */
lasttime = timestamp;
}
}
//fclose(f);
char bc[10][last->count];
int j, i, k, bh;
for(p = head; p != NULL, j <= last->count; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
{
for(i = 11, k=0, bh = (int)(p->weight / 30);i >= 0; i--)
{
if(k < bh)
{
bc[i][j] = '*';
k++;
}
else bc[i][j] = ' ';
}
j++;
}
}
//printf("%c", bc[9][1]);
int m=0, n=0;
for(m < 10; m++;)
{
for(n=0 ;n < last->count; n++)
{
printf("%c",bc[m][n]);
}
printf("%c",'\n');
}
}
Your outer for loop parts are incorrectly placed. Instead of:
for(m < 10; m++;)
You want:
for(m=0;m < 10; m++)
The condition, m<10, is the second part of the for loop, whereas you've mistakenly put it in the initialization part of the loop. Similarly, the increment statement, i++, was in your condition part, so you had no incrementing of the m variable happening.

Resources