Related
Why does this error occur when I run my code?
error: RUN FINISHED; Segmentation fault: 11; real time: 3s; user: 0ms; system: 0m
I'm creating 10 threads where each thread is a ticket seller. There is a 10by10 array that holds the seats of the tickets. Depending on the type of the ticket-seller a person will be sold that specific seat.
Is the issue with the pthreads?
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/*
* File: ticketsellers.c
* Author: iantheflyinghawaiian
*
* Created on July 4, 2016, 11:27 AM
*/
#include <stdio.h>
#include <pthread.h>
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
// seller thread to serve one time slice (1 minute)
int theatre[10][10] ;
struct node
{
int info;
struct node *ptr;
}*front,*rear,*temp,*front1;
int count = 0;
/* Create an empty queue */
void create()
{
front = rear = NULL;
}
/* Returns queue size */
void queuesize()
{
printf("\n Queue size : %d", count);
}
/* Enqueing the queue */
void enq(int data)
{
if (rear == NULL)
{
rear = (struct node *)malloc(1*sizeof(struct node));
rear->ptr = NULL;
rear->info = data;
front = rear;
}
else
{
temp=(struct node *)malloc(1*sizeof(struct node));
rear->ptr = temp;
temp->info = data;
temp->ptr = NULL;
rear = temp;
}
count++;
}
/* Displaying the queue elements */
void display()
{
front1 = front;
if ((front1 == NULL) && (rear == NULL))
{
printf("Queue is empty");
return;
}
while (front1 != rear)
{
printf("%d ", front1->info);
front1 = front1->ptr;
}
if (front1 == rear)
printf("%d", front1->info);
}
/* Dequeing the queue */
void deq()
{
front1 = front;
if (front1 == NULL)
{
printf("\n Error: Trying to display elements from empty queue");
return;
}
else
if (front1->ptr != NULL)
{
front1 = front1->ptr;
printf("\n Dequed value : %d", front->info);
free(front);
front = front1;
}
else
{
printf("\n Dequed value : %d", front->info);
free(front);
front = NULL;
rear = NULL;
}
count--;
}
/* Returns the front element of queue */
int frontelement()
{
if ((front != NULL) && (rear != NULL))
return(front->info);
else
return 0;
}
/* Display if queue is empty or not */
void empty()
{
if ((front == NULL) && (rear == NULL))
printf("\n Queue empty");
else
printf("Queue not empty");
}
//Ticket Seller
void * sell(char *seller_type)
{
char seller_type1;
seller_type1 = *seller_type;
int i;
i = 0;
while (i == 0);
{
pthread_mutex_lock(&mutex);
pthread_cond_wait(&cond, &mutex);
pthread_mutex_unlock(&mutex);
// Serve any buyer available in this seller queue that is ready
// now to buy ticket till done with all relevant buyers in their queue
//………………
// Case statements for seller_types
switch(seller_type1)
{
case 'H' :
printf("Seller type: H\n");
i = 1;
break;
case 'M' :
printf("Seller type: M\n");
i = 1;
break;
case 'L' :
printf("Seller type: L\n");
i = 1;
break;
}
}
return NULL; // thread exits
}
void wakeup_all_seller_threads()
{
pthread_mutex_lock(&mutex);
pthread_cond_broadcast(&cond);
pthread_mutex_unlock(&mutex);
}
int main()
{
int i, N;
pthread_t tids[10];
printf("Enter N value of Customers: ");
scanf("%d", &N);
printf("Number of Customers: %d", N);
char seller_type;
// Create necessary data structures for the simulator.
// Create buyers list for each seller ticket queue based on the
// N value within an hour and have them in the seller queue.
// Create 10 threads representing the 10 sellers.
seller_type = 'H';
pthread_create(&tids[i], NULL, sell, &seller_type);
seller_type = 'M';
for (i = 1; i < 4; i++)
pthread_create(&tids[i], NULL, sell, &seller_type);
seller_type = 'L';
for (i = 4; i < 10; i++)
pthread_create(&tids[i], NULL, sell, &seller_type);
// wakeup all seller threads
wakeup_all_seller_threads();
// wait for all seller threads to exit
for (i = 0 ; i < 10 ; i++)
pthread_join(&tids[i], NULL);
// Printout simulation results
//…………
exit(0);
}
As already assumed by #2501: the culprit causing the segfault was the uninitialized variable i in main.
I took the liberty to write a minimal example for your pthread-creating, with the addition of some printf`s.
It compiles without warnings with
gcc -W -Wall threadtest.c -o threadtest -pthread
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
struct seller_type {
char st;
int tid;
};
void *sell(void *arg)
{
struct seller_type *seller_type1 = arg;
int i;
i = 0;
printf("thread #%d: seller_type1->st = %c\n", seller_type1->tid,
seller_type1->st);
// no semicolon after while()
while (i == 0) {
switch (seller_type1->st) {
case 'H':
printf("Seller type: H\n");
i = 1;
break;
case 'M':
printf("Seller type: M\n");
i = 1;
break;
case 'L':
printf("Seller type: L\n");
i = 1;
break;
}
}
printf("thread #%d: Work done\n", seller_type1->tid);
return NULL;
}
int main()
{
int i = 0;
struct seller_type *seller_type1;
pthread_t tids[10];
seller_type1 = calloc(10, sizeof(struct seller_type));
// All error handling ommitted! Yes, ALL!
seller_type1[0].st = 'H';
seller_type1[0].tid = 0;
pthread_create(&tids[0], NULL, sell, &seller_type1[0]);
for (i = 1; i < 4; i++) {
seller_type1[i].st = 'M';
seller_type1[i].tid = i;
pthread_create(&tids[i], NULL, sell, &seller_type1[i]);
}
for (i = 4; i < 10; i++) {
seller_type1[i].st = 'L';
seller_type1[i].tid = i;
pthread_create(&tids[i], NULL, sell, &seller_type1[i]);
}
puts("All threads created");
// wait for all seller threads to exit
for (i = 0; i < 10; i++) {
pthread_join(tids[i], NULL);
printf("Thread %d joined\n", i);
}
puts("All threads joined");
exit(EXIT_SUCCESS);
}
I hope you can build upon that.
You have a data race when you pass seller_type to the newly created thread, while at the same time modify the variable in the main.
The type of the function passed to pthread_create must be: void*(*)(void*), not the type you're using.
Function pthread_join requires a type pthread_t as the first argument, not a pointer to that type, which is what you're passing to it.
The variable i in the main isn't initialized, and is used to index the array tids, in the first pthread_create call.
All of those four problems cause undefined behavior by themselves. I suspect the last one causes the crash.
I am getting a seg fault error at my pthread_join line. Below is my thread creation and joining code as well as my my_func thread function that I am calling. The program is supposed to create a variable amount of threads to execute a grep like function.
int
parallelSearchStatic(char **argv)
{
pthread_t worker_thread[NUM_THREADS];
ARGS_FOR_THREAD *args_for_thread;
queue_element_t *element;
int i;
int num_occurrences = 0;
int queue_count = 0;
queue_t *queue = createQueue(); /* Create and initialize the queue data structure. */
DIR *d;
struct dirent *dir;
d = opendir(argv[2]);
if (d)
{
while ((dir = readdir(d)) != NULL)
{
element = (queue_element_t *)malloc(sizeof(queue_element_t));
if(element == NULL){
perror("malloc");
exit(EXIT_FAILURE);
}
strcpy(element->path_name, argv[2]);
strcat(element->path_name, "/");
strcat(element->path_name, dir->d_name);
insertElement(queue, element);
queue_count++;
}
closedir(d);
}
int increment = queue_count/NUM_THREADS;
for(i=0;i<NUM_THREADS;i++){
args_for_thread = (ARGS_FOR_THREAD *)malloc(sizeof(ARGS_FOR_THREAD));
args_for_thread->threadID=i;
args_for_thread->queue=createQueue();
args_for_thread->args=argv;
for(i = 0; i < increment; i++)
{
insertElement(args_for_thread->queue, removeElement(queue));
queue_count--;
}
if(i == (NUM_THREADS - 1) && queue_count != 0)
{
for(i = 0; i < queue_count; i++)
{
insertElement(args_for_thread->queue, removeElement(queue));
}
}
if((pthread_create(&worker_thread[i], NULL, my_func, (void *)args_for_thread))!=0){
printf("Cannot create thread \n");
exit(0);
}
}
for(i=0;i<NUM_THREADS;i++)
{
pthread_join(worker_thread[i], NULL);
}
for(i = 0; i < NUM_THREADS; i++)
num_occurrences += countArray[i];
return num_occurrences;
}
void *my_func(void *this_arg)
{
ARGS_FOR_THREAD *args_for_me = (ARGS_FOR_THREAD *)this_arg; // Typecast the argument passed to this function to the appropriate type
int threadID = args_for_me->threadID;
queue_t *queue = args_for_me->queue;
char** args = args_for_me->args;
int count = 0;
while(queue->head != NULL)
{
queue_element_t *element = removeElement(queue);
char *a[5];
a[0] = args[0];
a[1] = args[1];
a[2] = element->path_name;
a[3] = args[3];
a[4] = args[4];
count += serialSearch(a);
}
countArray[threadID] = count;
free((void *)args_for_me); // Free up the structure
pthread_exit(NULL);
}
You are using re-using i in nested loops within an outer loop that also uses i as its counter variable:
for(i = 0; i < increment; i++)
and
for(i = 0; i < queue_count; i++)
This means that your pthread_create() (which follows these inner loops) is using the wrong value of i to access worker_thread[i], so some values of worker_thread[] remain uninitialised and then cause pthread_join() to crash.
Use a different variable (eg. j) for the inner loops.
I am trying to implement a generic hash structure that can support any type of data and any hash function.
A wrote the code and try to run it, it dosn't work, it breaks. I try to debug it and there it works well. I don't know where the problem is?
Here is the code that I used for implementing the structure:
The "hash.h" file:
typedef struct tip_hash_nod
{
void *info;
struct tip_hash_nod *urm;
}NOD_LISTA_HASH;
typedef struct
{
NOD_LISTA_HASH *Table;
int size;
int sizeMemory;
int (*hash)(const void *obiect,const int m);
void (*distruge)(void *obiect);
}*HASH;
void initializare_hash(HASH *h,int size,int (*hash_dat)(const void *obiect,const int m),void (*distruge)(void *obiect));
int hash_insert(HASH *h,void *obiect,int sizeOfObiect);
int hash_search(HASH h,void *obiect,int (*compara)(const void *a,const void *b));
void hash_delete(HASH *h);
And the "hash.c" file:
void initializare_hash(HASH *h,int size,int (*hash_dat)(const void *obiect,const int m),void (*distruge)(void *obiect))
{
int i;
(*h) = (HASH)malloc(sizeof(HASH));
(*h)->sizeMemory = size;
if(size != 0)
{
(*h)->Table = (NOD_LISTA_HASH *)malloc((*h)->sizeMemory * sizeof(NOD_LISTA_HASH));
for(i=0;i<(*h)->sizeMemory;i++)
{
(*h)->Table[i].info = NULL;
(*h)->Table[0].urm = NULL;
}
}
else
{
(*h)->Table = (NOD_LISTA_HASH *)malloc(sizeof(NOD_LISTA_HASH));
(*h)->Table[0].info = NULL;
(*h)->Table[0].urm = NULL;
(*h)->sizeMemory = 1;
}
(*h)->size = 0;
(*h)->hash = hash_dat;
(*h)->distruge = distruge;
}
int hash_insert(HASH *h,void *obiect,int sizeOfObiect)
{
int i,poz;
NOD_LISTA_HASH *p;
if((*h)->size == (*h)->sizeMemory)
{
HASH h1;
initializare_hash(&h1,2*(*h)->sizeMemory,(*h)->hash,(*h)->distruge);
for(i=0;i<(*h)->sizeMemory;i++)
{
if((*h)->Table[i].info != NULL)
hash_insert(&h1,(*h)->Table[i].info,sizeOfObiect);
p=(*h)->Table[i].urm;
while(p!=NULL)
{
hash_insert(&h1,p->info,sizeOfObiect);
p = p->urm;
}
}
hash_delete(h);
*h=h1;
return hash_insert(h,obiect,sizeOfObiect);
}
else
{
poz = (*h)->hash(obiect,(*h)->sizeMemory);
if((*h)->Table[poz].info == NULL)
{
(*h)->Table[poz].info = malloc(sizeOfObiect);
memcpy((*h)->Table[poz].info,obiect,sizeOfObiect);
(*h)->Table[poz].urm = NULL;
(*h)->size++;
}
else
{
p = &((*h)->Table[poz]);
while(p->urm!=NULL)
p = p->urm;
p->urm = (NOD_LISTA_HASH *)malloc(sizeof(NOD_LISTA_HASH));
p = p->urm;
p->info = malloc(sizeOfObiect);
memcpy(p->info,obiect,sizeOfObiect);
p->urm = NULL;
}
return poz;
}
}
int hash_search(HASH h,void *obiect,int (*compara)(const void *a,const void *b))
{
int poz;
NOD_LISTA_HASH *p;
poz = h->hash(obiect,h->sizeMemory);
if(h->Table[poz].info == NULL)
return -1;
else
if(compara(h->Table[poz].info,obiect)==0)
return poz;
else
{
p=h->Table[poz].urm;
while(p != NULL)
{
if(compara(p->info,obiect)==0)
return poz;
p = p->urm;
}
return -1;
}
}
static void distruge_lista(NOD_LISTA_HASH *p,void (*distruge_obiect)(void *obiect))
{
if(p->urm != NULL)
distruge_lista(p->urm,distruge_obiect);
else
{
if(p->info != NULL)
distruge_obiect(p->info);
free(p);
}
}
void hash_delete(HASH *h)
{
int i;
for(i=0;i<(*h)->sizeMemory;i++)
{
if((*h)->Table[i].info != NULL && (*h)->Table[i].urm != NULL)
{
distruge_lista((*h)->Table[i].urm,(*h)->distruge);
}
}
free((*h)->Table);
*h = NULL;
}
And this is my "main.c" file:
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include "hash.h"
int comparare(const void *a,const void *b)
{
return (*(int *)a - *(int *)b);
}
int hash(const void *obiect,int m)
{
return (*(int *)obiect) % m;
}
void distruge_obiect(void *obiect)
{
free((int *)obiect);
}
int main()
{
HASH h;
int val,error;
initializare_hash(&h,0,hash,distruge_obiect);
val = 20;
hash_insert(&h,&val,sizeof(int));
val = 800;
hash_insert(&h,&val,sizeof(int));
val = 2000;
hash_insert(&h,&val,sizeof(int));
val = 765;
hash_insert(&h,&val,sizeof(int));
val = 800;
error = hash_search(h,&val,comparare);
if(error == -1)
printf("Elementul %d nu se afla in hash.\n",val);
else
printf("Elementul %d se afla pe pozitia: %d.\n",val,error);
hash_delete(&h);
getch();
return 0;
}
How I already sad if I try to debug it works with no problem, but when I run it, it crashes. I can onely make an assumption that it can not dealocate the memory or something. My call stack loocks like this:
You've dropped a pretty big pile of code on us, without much to go on. I had a quick look anyway, and noticed this incorrect allocation:
(*h) = (HASH)malloc(sizeof(HASH));
HASH is a pointer type, so you are allocating only enough memory for one pointer. You want to allocate memory for the thing to which it points:
*h = malloc(sizeof(**h));
(The cast is not required in C, and some folks around here will be strident about not using one.)
That error would be entirely enough to cause all manner of bad behavior. In particular, the erroneous code might seem to work until you dynamically allocate more memory and write to that, so perhaps that explains why your tests crash on the second insertion.
The message queue simply stops working when dealing with many many threads. It only seems to work okay with 10 threads, for exmaple. GDB tells me
Program received signal SIGSEGV, Segmentation fault.
__GI_____strtol_l_internal (nptr=0x0, endptr=endptr#entry=0x0, base=base#entry=10, group=group#entry=0, loc=0x7ffff78b0060 <_nl_global_locale>)
at ../stdlib/strtol_l.c:298
298 ../stdlib/strtol_l.c: No such file or directory.
But I have no idea what this means. The same code on Windows works fine but on linux it doesn't, which confuses me more.
You can see below how this queue works. It is a singly linked list with locking while receiving messages. Please help me find where I messed up.
typedef struct Message {
unsigned type;
unsigned code;
void *data;
} Message;
typedef struct MessageQueueElement {
Message message;
struct MessageQueueElement *next;
} MessageQueueElement;
typedef struct MessageQueue {
MessageQueueElement *first;
MessageQueueElement *last;
} MessageQueue;
MessageQueue mq;
pthread_mutex_t emptyLock, sendLock;
pthread_cond_t emptyCond;
void init() {
mq.first = malloc(sizeof(MessageQueueElement));
mq.last = mq.first;
pthread_mutex_init(&emptyLock, NULL);
pthread_mutex_init(&sendLock, NULL);
pthread_cond_init(&emptyCond, NULL);
}
void clean() {
free(mq.first);
pthread_mutex_destroy(&emptyLock);
pthread_mutex_destroy(&sendLock);
pthread_cond_destroy(&emptyCond);
}
void sendMessage(MessageQueue *this, Message *message) {
pthread_mutex_lock(&sendLock);
if (this->first == this->last) {
pthread_mutex_lock(&emptyLock);
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
pthread_cond_signal(&emptyCond);
pthread_mutex_unlock(&emptyLock);
} else {
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
}
pthread_mutex_unlock(&sendLock);
}
int waitMessage(MessageQueue *this, int (*readMessage)(unsigned, unsigned, void *)) {
pthread_mutex_lock(&emptyLock);
if (this->first == this->last) {
pthread_cond_wait(&emptyCond, &emptyLock);
}
pthread_mutex_unlock(&emptyLock);
int n = readMessage(this->first->message.type, this->first->message.code, this->first->message.data);
MessageQueueElement *temp = this->first;
this->first = this->first->next;
free(temp);
return n;
}
some test code:
#define EXIT_MESSAGE 0
#define THREAD_MESSAGE 1
#define JUST_A_MESSAGE 2
#define EXIT 0
#define CONTINUE 1
int readMessage(unsigned type, unsigned code, void *data) {
if (type == THREAD_MESSAGE) {
printf("message from thread %d: %s\n", code, (char *)data);
free(data);
} else if (type == JUST_A_MESSAGE) {
puts((char *)data);
free(data);
} else if (type == EXIT_MESSAGE) {
puts("ending the program");
return EXIT;
}
return CONTINUE;
}
int nThreads;
int counter = 0;
void *worker(void *p) {
double pi = 0.0;
for (int i = 0; i < 1000000; i += 1) {
pi += (4.0 / (8.0 * i + 1.0) - 2.0 / (8.0 * i + 4.0) - 1.0 / (8.0 * i + 5.0) - 1.0 / (8.0 * i + 6.0)) / pow(16.0, i);
}
char *s = malloc(100);
sprintf(s, "pi equals %.8f", pi);
sendMessage(&mq, &(Message){.type = THREAD_MESSAGE, .code = (int)(intptr_t)p, .data = s});
counter += 1;
char *s2 = malloc(100);
sprintf(s2, "received %d message%s", counter, counter == 1 ? "" : "s");
sendMessage(&mq, &(Message){.type = JUST_A_MESSAGE, .data = s2});
if (counter == nThreads) {
sendMessage(&mq, &(Message){.type = EXIT_MESSAGE});
}
}
int main(int argc, char **argv) {
clock_t timer = clock();
init();
nThreads = atoi(argv[1]);
pthread_t threads[nThreads];
for (int i = 0; i < nThreads; i += 1) {
pthread_create(&threads[i], NULL, worker, (void *)(intptr_t)i);
}
while (waitMessage(&mq, readMessage));
for (int i = 0; i < nThreads; i += 1) {
pthread_join(threads[i], NULL);
}
clean();
timer = clock() - timer;
printf("%.2f\n", (double)timer / CLOCKS_PER_SEC);
return 0;
}
--- EDIT ---
Okay I managed to fix the problem by changing the program a bit using semaphores. The waitMessage function doesn't have to be locked since it is accessed by only one thread and the values that it modifies does not clash with sendMessage.
MessageQueue mq;
pthread_mutex_t mutex;
sem_t sem;
void init() {
mq.first = malloc(sizeof(MessageQueueElement));
mq.last = mq.first;
pthread_mutex_init(&mutex, NULL);
sem_init(&sem, 0, 0);
}
void clean() {
free(mq.first);
pthread_mutex_destroy(&mutex);
sem_destroy(&sem);
}
void sendMessage(MessageQueue *this, Message *message) {
pthread_mutex_lock(&mutex);
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
pthread_mutex_unlock(&mutex);
sem_post(&sem);
}
int waitMessage(MessageQueue *this, int (*readMessage)(unsigned, unsigned, void *)) {
sem_wait(&sem);
int n = readMessage(this->first->message.type, this->first->message.code, this->first->message.data);
MessageQueueElement *temp = this->first;
this->first = this->first->next;
free(temp);
return n;
}
Your waitMessage function is modifying this->first outside of any locking. This is a bad thing.
It's often not worth recreating things that are already provided for you by an OS. You're effectively trying to set up a pipe of Message structures. You could simply use an anonymous pipe instead (see here for Linux, or here for Windows) and write/read Message structures to/from it. There's also POSIX message queues which are probably a bit more efficient.
In your case with multiple worker threads you'd have to have a supplementary mutex semaphore to control which worker is trying to read from the pipe or message queue.
Edit:
Hash.c is updated with revisions from the comments, I am still getting a Seg fault. I must be missing something here that you guys are saying
I have created a hash table ADT using C but I am encountering a segmentation fault when I try to call a function (find_hash) in the ADT.
I have posted all 3 files that I created parse.c, hash.c, and hash.h, so you can see all of the variables. We are reading from the file gettysburg.txt which is also attached
The seg fault is occuring in parse.c when I call find_hash. I cannot figure out for the life of me what is going on here. If you need anymore information I can surely provide it.
sorry for the long amount of code I have just been completely stumped for a week now on this. Thanks in advance
The way I run the program is first:
gcc -o parse parse.c hash.c
then: cat gettysburg.txt | parse
Parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)malloc(sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
cur2 = next_hash_walk(t);
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)malloc(sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
t->sorted_array = NULL;
t->index=0;
t->sort_num=0;
for(i=0;i<size;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
hash_entry *cur;
for(i = 0; i<(table->size);i++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
free(cur->key); //Freeing memory for key and data
free(cur->data);
}
free(table->buckets[i]); //free the whole bucket
}}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)malloc(sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
if(strcmp(table->buckets[i]->key, key) == 0)
return((table->buckets[i]->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size]; //creates an array, same size as table->size(# of buckets)
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
count ++;}
node_num[i] = count;
count = 0;}
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) / (table->size);
}
void *start_hash_walk(Phash_table table){
Phash_table temp = table;
int i, j, k;
hash_entry *cur; //CHANGE IF NEEDED to HASH_TABLE *
if(table->sorted_array != NULL) free(table->sorted_array);
table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));
for(i = 0; i < table->total; i++){
if(table->buckets[i]!=NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
table->sorted_array[i] = table->buckets[i]->data;
}}
}
for(j = (table->total) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp -> buckets[0]-> data = table->sorted_array[k-1];
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp->buckets[0] -> data;
}
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
table->sort_num ++;
return table->sorted_array[table->sort_num];
}
hash.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_entry_ { //Linked List
void *data; //Generic pointer
char *key; //String-based key value
struct hash_entry_ *next; //Self-Referencing pointer
} hash_entry, *Phash_entry;
typedef struct hash_table_ {
hash_entry **buckets; //Pointer to a pointer to a Linked List of type hash_entry
int (*hash_func)(char *);
int (*cmp_func)(void *, void *);
int size;
void **sorted_array; //Array used to sort each hash entry
int index;
int total;
int largest;
float average;
int sort_num;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
Gettysburg.txt
Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.
Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.
We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.
It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.
It's possible that one of several problems with this code are loops like:
for(table->buckets[i];
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
The initializing part of the for loop (table->buckets[i]) has no effect. If i is 0 and table->buckets[0] == NULL, then the condition on this loop (table->buckets[i]->next != NULL) will dereference a null pointer and crash.
That's where your code seemed to be crashing for on my box, at least. When I changed several of your loops to:
if (table->buckets[i] != NULL) {
for(;
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
}
...it kept crashing, but in a different place. Maybe that will help get you unstuck?
Edit: another potential problem is that those for loops are destructive. When you call find_hash, do you really want all of those buckets to be modified?
I'd suggest using something like:
hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
// ...
}
}
When I do that and comment out your dump_dictionary function, your code runs without crashing.
Hmm,
here's hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)calloc(1, sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *)); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
for(i=0;t->buckets[i] != NULL;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
for(i = 0; i<(table->size);i++){
if(table->buckets[i]!=NULL)
for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
free(table->buckets[i]->key); //Freeing memory for key and data
free(table->buckets[i]->data);
}
free(table->buckets[i]); //free the whole bucket
}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
if(strcmp(cur->key, key) == 0)
return((cur->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size];
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++)
{
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
count ++;}
node_num[i] = count;
count = 0;
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}
void *start_hash_walk(Phash_table table){
void* temp = 0; //oook: this was another way of overwriting your input table
int i, j, k;
int l=0; //oook: new counter for elements in your sorted_array
hash_entry *cur;
if(table->sorted_array !=NULL) free(table->sorted_array);
table->sorted_array = (void**)calloc((table->total), sizeof(void*));
for(i = 0; i < table->size; i ++){
//for(i = 0; i < table->total; i++){ //oook: i don't think you meant total ;)
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
table->sorted_array[l++] = cur->data;
}
}
//oook: sanity check/assert on expected values
if (l != table->total)
{
printf("oook: l[%d] != table->total[%d]\n",l,table->total);
}
for(j = (l) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if (table->sorted_array[k-1] && table->sorted_array[k])
{
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp;
}
}
else
printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
/*oook: this was blowing up since you were incrementing past the size of sorted_array..
NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
//table->sort_num++
return table->sorted_array[table->sort_num++];
}
here's parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h> //oook: added so you can assert ;)
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)calloc(1,sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
dictionary_size++;
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
if (!cur) //ook: do test or assert for null values
{
printf("oook: null== (cur = start_hash_walk)\n");
exit(-1);
}
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
{//oook: i think you needed these braces
cur2 = next_hash_walk(t);
if (!cur2) //ook: do test or assert for null values
{
printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
}
else
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}//oook: i think you needed these braces
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
/* ooook: do assert on programmatic errors.
this function *requires non-null inputs. */
assert(cur1 && cur2);
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
follow the //ooks
Explanation:
There were one or two places this was going to blow up in.
The quick fix and answer to your question was in parse.c, circa L100:
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
..checking that cur is not null before calling printf fixes your immediate seg-fault.
But why would cur be null ? ~because of this bad-boy:
void *start_hash_walk(Phash_table table)
Your hash_func(char *string) can (& does) return non-unique values. This is of course ok except that you have not yet implemented your linked list chains. Hence you end up with table->sorted_array containing less than table->total elements ~or you would if you were iterating over all table->size buckets ;)
There are one or two other issues.
For now i hacked Nate Kohl's for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next) further, to be for(cur=table->buckets[i]; cur != NULL; cur=cur->next) since you have no chains. But this is *your TODO so enough said about that.
Finally. note that in next_hash_walk(Phash_table table) you have:
table->sort_num++
return table->sorted_array[table->sort_num];
Ouch! Do check those array bounds!
Notes
1) If you're function isn't designed to change input, then make the input const. That way the compiler may well tell you when you're inadvertently trashing something.
2) Do bound checking on your array indices.
3) Do test/assert for Null pointers before attempting to use them.
4) Do unit test each of your functions; never write too much code before compiling & testing.
5) Use minimal test-data; craft it such that it limit-tests your code & attempts to break it in cunning ways.
6) Do initialise you data structures!
7)Never use egyptian braces ! {
only joking ;)
}
PS Good job so far ~> pointers are tricky little things! & a well asked question with all the necessary details so +1 and gl ;)
(//oook: maybe add a homework tag)