Solving Maze Using BFS in C - Segmentation fault - c

I am attempting to solve a maze using BFS, but I am getting a segmentation fault. Can anyone help me figure out why this is happening? Also if you see anything else wrong with my code? my if statements are to check if the point exists in the maze and that there is a white space at the point. I also am solving the maze using DFS, but the seg fault happens during solve_bfs. Also, everything except solve.c was given to my professor, so the only thing I am suppose to edit is solve.c
Solve.c
#include <stdio.h>
#include <stdlib.h>
#include "maze.h"
#include "stack.h"
#include "queue.h"
int solve_bfs(maze * the_maze){
Queue Q= initQueue();
enqueue(Q, the_maze->entry);
while(!emptyQueue(Q)){
coord to_explore= dequeue(Q);
if(to_explore.row == the_maze->exit.row && to_explore.col == the_maze->exit.col){
print_maze(the_maze);
free(Q);
return 1;
}
else{
the_maze->data[to_explore.row][to_explore.col]= 'o';
if(to_explore.row-1 >= 0){
if(the_maze->data[to_explore.row-1][to_explore.col] == ' '){
coord new;
new.row= to_explore.row-1;
new.col= to_explore.col;
enqueue(Q, new);
}
}
if(to_explore.col+1 < the_maze->width){
if(the_maze->data[to_explore.row][to_explore.col+1]== ' '){
coord new;
new.row= to_explore.row;
new.col= to_explore.col+1;
enqueue(Q, new);
}
}
if(to_explore.row+1 < the_maze->height){
if(the_maze->data[to_explore.row+1][to_explore.col]== ' '){
coord new;
new.row= to_explore.row+1;
new.col= to_explore.col;
enqueue(Q, new);
}
}
if(to_explore.col-1 >= 0) {
if(the_maze->data[to_explore.row][to_explore.col-1]== ' '){
coord new;
new.row= to_explore.row;
new.col= to_explore.col-1;
enqueue(Q, new);
}
}
}
if(emptyQueue(Q)){
print_maze(the_maze);
free(Q);
return 0;
}
}
}
int solve_dfs(maze * the_maze){
Stack s= initStack();
push(s, the_maze->entry);
while(!emptyStack(s)){
coord to_explore= pop(s);
if(to_explore.row == the_maze->exit.row && to_explore.col == the_maze->exit.col){
print_maze(the_maze);
free(s);
return 1;
}
else{
if(to_explore.row-1 >= 0){
if(the_maze->data[to_explore.row-1][to_explore.col]== ' '){
coord new;
new.row= to_explore.row-1;
new.col= to_explore.col;
push(s, new);
}
}
if(to_explore.col+1 < the_maze->width){
if(the_maze->data[to_explore.row][to_explore.col+1]== ' '){
coord new;
new.row= to_explore.row;
new.col= to_explore.col+1;
push(s, new);
}
}
if(to_explore.row+1 < the_maze->height){
if(the_maze->data[to_explore.row+1][to_explore.col]== ' '){
coord new;
new.row= to_explore.row+1;
new.col= to_explore.col;
push(s, new);
}
}
if(to_explore.col-1 >= 0){
if(the_maze->data[to_explore.row][to_explore.col-1]== ' '){
coord new;
new.row= to_explore.row;
new.col= to_explore.col-1;
push(s, new);
}
}
}
if(emptyStack(s)){
print_maze(the_maze);
free(s);
return 0;
}
}
}
void print_maze(maze * the_maze){
the_maze->data[the_maze->entry.row][the_maze->entry.col]='S';
the_maze->data[the_maze->exit.row][the_maze->exit.col]='F';
for(int i=0; i<the_maze->width; i++){
for(int j=0; j<the_maze->height; j++){
printf("%s",the_maze->data[i][j]);
}
}
}
coord * make_coord(int r, int c){
coord * coord= malloc(sizeof(coord));
coord->row = r;
coord->col = c;
return coord;
}
void print_coord(coord c){
printf("(%d,%d)",c.row, c.col);
}
Maze.c
#include <stdio.h>
#include <stdlib.h>
#include "maze.h"
/**********************************************************
create_maze
creates a new maze from the input file characters
*********************************************************/
maze * create_maze(FILE *in) {
// create maze
maze * new_maze = (maze *) malloc(sizeof(maze));
// read first line of in
int start_row;
int start_col;
int end_row;
int end_col;
int num_rows;
int num_cols;
// start line
char * line = malloc(sizeof(char)*100); // start buffer out at 100
size_t num_read;
getline(&line, &num_read, in);
if(sscanf(line, "%d %d %d %d %d %d", &num_rows, &num_cols, &start_row, &start_col, &end_row, &end_col) != 6) {
fprintf(stderr, "Maze file format invalid. Top line must include 6 numbers.\n");
return NULL;
}
// if any are negative values, return NULL
if(start_row < 0 || start_col < 0 || end_row < 0 || end_col < 0 || num_rows < 0 || num_cols < 0) {
fprintf(stderr, "Maze file format invalid. Maze file numbers in first row must be non-negative.\n");
return NULL;
}
// make sure start_row is in bounds
if(start_row >= num_rows) {
fprintf(stderr, "Maze file format invalid. Start row must be < num rows in maze.\n");
return NULL;
}
// make sure end_row is in bounds
if(end_row >= num_rows) {
fprintf(stderr, "Maze file format invalid. End row must be < num rows in maze.\n");
return NULL;
}
// make sure start_col is in bounds
if(start_col >= num_cols) {
fprintf(stderr, "Maze file format invalid. Start col must be < num cols in maze.\n");
return NULL;
}
// make sure end_col is in bounds
if(end_col >= num_cols) {
fprintf(stderr, "Maze file format invalid. Start col must be < num cols in maze.\n");
return NULL;
}
// assign maze members
new_maze->entry.row = start_row;
new_maze->entry.col = start_col;
new_maze->exit.row = end_row;
new_maze->exit.col = end_col;
new_maze->width = num_cols;
new_maze->height = num_rows;
// allocate memory for maze data
new_maze->data = (char **) malloc(sizeof(char *)*num_rows);
int i;
for(i=0; i<num_rows; i++) {
new_maze->data[i] = (char *) malloc(sizeof(char)*num_cols);
}
// get characters from file, one line at a time
size_t num_vals_read = 0;
for(i=0; i<num_rows; i++) {
num_vals_read = getline(&line, &num_read, in);
if(num_vals_read != num_cols + 1) { //account for newline character
fprintf(stderr, "Maze file format invalid. Found %d chars on line %d and the width is %d.\n",
(num_vals_read-1), i, new_maze->width);
return NULL;
}
// parse out line
int j;
for(j=0; j<num_cols; j++) {
if(line[j] != ' ' && line[j] != '*') {
fprintf(stderr, "Maze file format invalid. Maze file data must contain spaces and stars. Read %c.\n", line[j]);
return NULL;
}
new_maze->data[i][j] = line[j];
}
}
// try to read more data
char c;
if((c = fgetc(in)) != EOF) {
fprintf(stderr, "Maze file format invalid. Too many characters past %d rows.\n", num_rows);
return NULL;
}
free(line);
return new_maze;
}
/******************************************************
free_maze
frees memory used by the_maze
******************************************************/
void free_maze(maze * the_maze) {
// first free the data
// need to free rows of data, then data
int i;
for(i = 0; i < the_maze->height; i++) {
free(the_maze->data[i]);
}
free(the_maze->data);
free(the_maze);
return;
}
Queue.c
#include <stdio.h>
#include <stdlib.h>
#include "queue.h"
/* initializes empty queue */
Queue initQueue() {
Queue q = malloc(sizeof(QueueType));
q->head = 0;
q->tail = 0;
return q;
}
/* returns 1 if queue is empty and 0 otherwise */
int emptyQueue(Queue Q) {
return (Q->head == Q->tail);
}
/* puts data item d into queue */
void enqueue(Queue Q, QueueData d) {
if(full(Q)) {
printf("Queue is full. Did not add item.\n");
return;
}
Q->tail++;
Q->tail = Q->tail % MAX_Q; // in case it goes off array
Q->data[Q->tail] = d;
}
/* removes data item from queue */
QueueData dequeue(Queue Q) {
if(emptyQueue(Q)) {
printf("Attempting to remove from empty queue\n");
exit(1);
}
Q->head++;
Q->head = Q->head % MAX_Q; // in case it goes off array
return Q->data[Q->head];
}
/* checks if queue is full */
int full(Queue Q) {
return (Q->tail + 1) % MAX_Q == Q->head;
}
/* freeQueue */
void freeQueue(Queue Q) {
free(Q);
}
Main.c
#include <stdio.h>
#include <stdlib.h>
#include "maze.h"
#define NUM_PARAMS 2
/* prototypes */
void usage(char * executable);
/***********************************************************************
* main
executable_name input_filename.txt
opens input_filename.txt for reading
creates maze object
runs the maze solver
frees maze
*********************************************************************/
// function completed for the CS 305 students: DO NOT MODIFY (Unless you find a bug)
int main(int argc, char * argv[]) {
if(argc != NUM_PARAMS) {
usage(argv[0]);
return EXIT_FAILURE;
}
// open file for reading
FILE *fp = NULL;
fp = fopen(argv[1], "r");
if(fp == NULL) {
fprintf(stderr, "Error opening input file %s. Exiting.\n", argv[1]);
return EXIT_FAILURE;
}
// create maze objects
// need 2 since we are running BFS on one (which modifies the
// maze with the path marker character)
// need clean copy of maze for DFS
maze * the_maze = create_maze(fp);
rewind(fp); // resets file pointer to beginning of file
maze * the_maze2 = create_maze(fp);
// done with file at this point
fclose(fp);
fp = NULL;
// check maze to see if it was created successfully
if(the_maze == NULL || the_maze2 == NULL) {
fprintf(stderr, "Error creating maze data structure\n");
return EXIT_FAILURE;
}
// run breadth-first-search on maze
printf("\nSolving using breadth-first search.\n");
int a = solve_bfs(the_maze);
// run depth-first-search on maze
printf("\nSolving using depth-first search:\n");
int b = solve_dfs(the_maze2);
printf("\ncan solve BFS: %d, can solve DFS: %d\n\n", a, b);
// free memory and exit
free_maze(the_maze);
free_maze(the_maze2);
return EXIT_SUCCESS;
}
/*********************************************************
usage
prints error message to user
**********************************************************/
void usage(char * executable) {
printf("Usage: \n%s maze_file.txt\n", executable);
}
stack.c
#include <stdio.h>
#include <stdlib.h>
#include "stack.h"
/* initializes a new stack */
Stack initStack() {
Stack s = (Stack) malloc(sizeof(StackType));
s->top = NULL;
return s;
}
/* empty returns 0 if S is empty and non-zero if S is not empty */
int emptyStack(Stack S) {
return (S->top == NULL);
}
/* pushes d to S */
void push(Stack S, StackData d) {
Node * n = (Node *)malloc(sizeof(Node));
n->data = d;
n->next = S->top;
S->top = n;
}
/* pops top item from S */
StackData pop(Stack S) {
if(emptyStack(S)) {
printf("Stack is empty. Attempting to pop an empty stack. Exiting program.\n");
exit(1); // exiting program
}
// there is data to pop
StackData toReturn = S->top->data;
Node * tmp = S->top; // in order to free this later
S->top = S->top->next; // move pointer to next item in stack
free(tmp);
return toReturn;
}
/* frees stack memory */
void freeStack(Stack S) {
while(!emptyStack(S)) {
pop(S);
}
free(S);
}
queue.h
#ifndef QUEUE_H
#define QUEUE_H
#include "maze.h"
#define MAX_Q 5000 // 1 more than what can be stored in the queue
// in this application, the mazes are on the small
// side
/* data to store into queue */
typedef coord QueueData; // putting coordinates into queue
/* queue data structure */
typedef struct QueueTag QueueType;
typedef struct QueueTag* Queue; // pointer to queue struct
// so when it is passed, the values
// can be updated in functions
struct QueueTag {
int head;
int tail;
QueueData data[MAX_Q]; // space for items in queue
};
/* function prototypes on queues */
Queue initQueue();
int emptyQueue(Queue Q);
void enqueue(Queue Q, QueueData d);
QueueData dequeue(Queue Q);
int full(Queue Q);
void freeQueue();
#endif
maze.h
#ifndef MAZE_H
#define MAZE_H
/* struct definition for coord */
typedef struct coord {
int row;
int col;
} coord;
/* struct definition for maze */
typedef struct maze {
coord entry;
coord exit;
int width;
int height;
char ** data;
} maze;
/* prototypes */
/* in maze.c */
maze * create_maze(FILE *in);
void free_maze(maze * the_maze);
/* in solve.c */
int solve_bfs(maze * the_maze);
int solve_dfs(maze * the_maze);
void print_maze(maze * the_maze);
coord * make_coord(int r, int c);
void print_coord(coord c);
#endif
stack.h
#ifndef STACK_H
#define STACK_H
#include "maze.h"
#define BAD {-1, -1} // coordinate off maze
/* data to store into stack */
typedef coord StackData;
/* stack data structures */
typedef struct NodeTag Node;
typedef struct StackTag StackType;
typedef struct StackTag* Stack;
/* linked list implementation of stacks */
struct NodeTag {
StackData data;
Node *next;
};
struct StackTag {
Node * top;
};
/* function prototypes on stacks */
Stack initStack();
int emptyStack(Stack S);
void push(Stack S, StackData d);
StackData pop(Stack S);
void freeStack(Stack S);
#endif

you need to fix your bound checking, assuming zero based indexing
# incorrect
to_explore.col+1 =< the_maze->width
# correct
to_explore.col+1 < the_maze->width
similar is the case for checking of to_explore.rowemphasized text

The error is in print_maze function.
void print_maze(maze * the_maze){
the_maze->data[the_maze->entry.row][the_maze->entry.col]='S';
the_maze->data[the_maze->exit.row][the_maze->exit.col]='F';
for(int i=0; i<the_maze->width; i++){
for(int j=0; j<the_maze->height; j++){
printf("%s",the_maze->data[i][j]);
}
}
data[i] represents ith row, so i should iterate over height.
data[i][j] represents jth element in ith row, so j should iterate over width. You need to swap height and width in the for loop.
void print_maze(maze * the_maze){
the_maze->data[the_maze->entry.row][the_maze->entry.col]='S';
the_maze->data[the_maze->exit.row][the_maze->exit.col]='F';
for(int i=0; i<the_maze->height; i++){
for(int j=0; j<the_maze->width; j++){
printf("%s",the_maze->data[i][j]);
}
}
}

Related

C program for dijiktras shortest path not working [duplicate]

This question already has answers here:
What is a debugger and how can it help me diagnose problems?
(2 answers)
How Can I debug a C program on Linux?
(4 answers)
Closed 1 year ago.
I have to build a program which takes in the vertices and edges from a csv file and uses an adjacency matrix to store the distance from one vertex to another and calls the function shortest_path which uses the dijkstra's algorithm to find the shortest path and calls the printpath function to print the information of all the vertices it goes through to get from the origin to the end.The information about the vertices is stored in the array of structures arr[].
The problem is that the program stops working when main() call the shortest_path() and the return value is 3221225725
The shortest_path function runs on its own in another program I made but is not called in the main when I execute this program
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_BUFFER 100
#define MAX_NB 8000
#define INFINITY 9999
typedef struct edges edges;
struct edges{
int from,to,weight;
};
typedef struct stops stops;
struct stops {
int id;
float lat,lont;
char title[MAX_BUFFER];
};
stops* arr[MAX_NB]={0};
typedef struct Graph{
int vertices;
// int visited;
} Graph;
int visited[MAX_NB];
int amatrix[MAX_NB][MAX_NB];
int n;
Graph* create_graph(int num_nodes){
Graph* g = (Graph *)malloc(sizeof(struct Graph));
g->vertices=num_nodes;
int i,j;
for(i=0;i<num_nodes;i++){
for(j=0;j<num_nodes;j++){
amatrix[i][j]=0;
}
}
n=num_nodes;
return g;
}
Graph *graph;
int next_field( FILE *f, char *buf, int max ) {
int i=0, end=0, quoted=0;
for(;;) {
// fetch the next character from file
buf[i] = fgetc(f);
// if we encounter quotes then flip our state and immediately fetch next char
if(buf[i]=='"') { quoted=!quoted; buf[i] = fgetc(f); }
// end of field on comma if we're not inside quotes
if(buf[i]==',' && !quoted) { break; }
// end record on newline or end of file
if(feof(f) || buf[i]=='\n') { end=1; break; }
// truncate fields that would overflow the buffer
if( i<max-1 ) { ++i; }
}
buf[i] = 0; // null terminate the string
return end; // flag stating whether or not this is end of the line
}
void fetch_stops ( FILE *csv, struct stops *p) {
char buf[MAX_BUFFER];
next_field( csv, buf, MAX_BUFFER );
p->id = atoi(buf);
next_field( csv, p->title, MAX_BUFFER );
next_field( csv, buf, MAX_BUFFER );
p->lat = atof(buf);
next_field( csv, buf, MAX_BUFFER );
p->lont = atof(buf);
}
void fetch_edges ( FILE *csv, struct edges *p) {
char buf[MAX_BUFFER];
next_field( csv, buf, MAX_BUFFER );
p->from = atoi(buf);
next_field( csv, buf, MAX_BUFFER );
p->to = atoi(buf);
next_field( csv, buf, MAX_BUFFER );
p->weight = atoi(buf);
}
void print_stops( struct stops *p ) {
printf("%d \t \t %s \t %f %f\n",
p->id,p->title, p->lat, p->lont);
}/*
void print_edges( struct edges *p ) {
printf("%d \t \t %d \t %d\n",
p->from,p->to, p->weight);
}
*/
int load_vertices(char *fname){
FILE *f;
struct stops pArray[MAX_NB];
struct stops p;
f=fopen(fname,"r");
if(!f) {
printf("unable to open file\n");
return 0;
}
fetch_stops( f, &p ); // discard the header data in the first line
int ngames = 0;
while(!feof(f)) {
fetch_stops( f, &pArray[ngames]);
arr[ngames]=&pArray[ngames];
ngames++;
}
printf("loaded %d vertices\n",ngames);
fclose(f);
graph = create_graph(ngames);
return 1;
}
void add_edge(int from, int to, int weight){
amatrix[from][to]=weight;
amatrix[to][from]=weight;
}
int load_edges(char *fname/*,Graph *g*/){
FILE *f;
struct edges pArray[MAX_NB];
struct edges p;
f=fopen(fname,"r");
if(!f) {
printf("unable to open file\n");
return 0;
}
fetch_edges( f, &p ); // discard the header data in the first line
int nedges = 0;
int from,to,weight;
while(!feof(f)) {
fetch_edges( f, &pArray[nedges]);
nedges++;
}
int i;
for(i=0;i<nedges;i++){
add_edge(pArray[i].from,pArray[i].to,pArray[i].weight);
}
printf("loaded %d edges\n",nedges);
fclose(f);
return 1;
}
void printpath(int parent[], int u){
// Base Case : If j is source
if (parent[u] == - 1)
return;
printpath(parent, parent[u]);
printf("%d %s\n", arr[u]->id, arr[u]->title);
}
void shortest_path(int origin, int end){
printf("Works1");
int distance[MAX_NB];
int pred[MAX_NB];
int cost[MAX_NB][MAX_NB];
int count,minD,nextn,i,j;
pred[0]=-1;
int n=MAX_NB;
printf("Works2");
for (i = 0; i < n; i++){
for (j = 0; j < n; j++){
if (amatrix[i][j] == 0)
cost[i][j] = INFINITY;
else
cost[i][j] = amatrix[i][j];
}
}
for (i = 0; i <n; i++) {
distance[i] = cost[origin][i];
}
printf("Works1");
distance[origin] = 0;
printf("Works2");
visited[origin] = 1;
count = 1;
while (count < n - 1) {
minD = INFINITY;
for (i = 0; i < n; i++){
if ((distance[i] < minD) && (visited[i])!=1) {
minD = distance[i];
nextn = i;
}}
visited[nextn] = 1;
for (i = 0; i < n; i++)
if (!(visited[i]))
if (minD + cost[nextn][i] < distance[i]) {
distance[i] = minD + cost[nextn][i];
pred[i]=nextn;
}
count++;
}
printf("Works");
printpath(pred,end);
}
int main () {
load_vertices("vertices.csv");
load_edges("edges.csv")
printf("%d",amatrix[300][7490]);
shortest_path(300,253);
return EXIT_SUCCESS;
}

simple patient managing program using queue

I'm making simple patient managing program using circular queue but q.rear always have "0" value while executing exit_hos()
I thought that addq() makes variable "rear" different, but It doesn't work.
is_empty() always return front and rear is same.
I think I'm misunderstanding some codes and memory concepts.
how can I fix these functions?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_SIZE 50
#define MAX_QUEUE_SIZE 6
typedef struct {
char** value;
int front;
int rear;
} Queue;
void init_queue(Queue* q) {
q->value = (char**)malloc(sizeof(char*) * MAX_QUEUE_SIZE);
q->front = 0;
q->rear = 0;
}
int is_full(Queue* q) {
if (((q->rear +1) % MAX_QUEUE_SIZE) == q->front)
return 1;
else
return 0;
}
int is_empty(Queue* q) {
if (q->front == q->rear)
return 1;
else
return 0;
}
void addq(Queue* q, char* value) {
q->rear = (q->rear+1) % MAX_QUEUE_SIZE;
q->value[q->rear] = value;
printf("addq: %s", value);
return;
}
char* deleteq(Queue* q) {
q->front = (q->front + 1) % MAX_QUEUE_SIZE;
return q->value[q->front];
}
void arrive(Queue q) {
int input;
char name[MAX_SIZE];
printf("\n");
printf("1. submit\n");
printf("2. cancel\n");
scanf("%d", &input);
if (input == 1) {
if (is_full(&q) == 1) {
printf("Service is not available\n");
}
else {
printf("name: ");
scanf("%s", name);
addq(&q, name);
}
}
else if (input == 2) {
return;
}
else {
printf("input error\n");
return;
}
return;
}
void exit_hos(Queue q) {
char patient[MAX_SIZE];
if (is_empty(&q) == 1)
{
printf("There is no patient waiting\n");
}
else {
strcpy(patient, deleteq(&q));
printf("patient: %s", patient);
}
return;
}
int main() {
int input;
Queue q;
init_queue(&q);
while (1)
{
printf("\nINPUT\n");
printf("1. Arrive hostpital\n");
printf("2. Exit hospital\n");
printf("3. service exit\n");
scanf("%d", &input);
if (input == 1)
arrive(q);
else if (input == 2) {
exit_hos(q);
}
else if (input == 3) {
printf("exit\n");
return 0;
}
else {
printf("input error\n");
}
}
free(q.value);
return 0;
}
I think that this line is wrong:
q->value = (char**)malloc(sizeof(char*) * MAX_QUEUE_SIZE);
I think that it should be:
char * _value = (char*)malloc(sizeof(char*) * MAX_QUEUE_SIZE);
q->value = &_value;
malloc is going to return a pointer to a char array. q->value is a pointer to a pointer to a char array. So you want to set it to the address of the char array that malloc is created for you.
Change you init_queue code to this and it will work:
void init_queue(Queue* q) {
char * _value = (char*)malloc(sizeof(char*) * MAX_QUEUE_SIZE);
q->value = &_value;
q->front = 0;
q->rear = 0;
}
Output:
Chris#DESKTOP-BCMC1RF ~
$ ./main.exe
INPUT
1. Arrive hostpital
2. Exit hospital
3. service exit
1
1. submit
2. cancel
1
name: fred
addq: fred
INPUT
1. Arrive hostpital
2. Exit hospital
3. service exit
2
If you already have a max queue size and a max size, you are better off pre-allocating the whole thing as an array, reducing memory headaches. As a general rule, avoid headaches unless they provide a feature you want.
Note: This method of keeping track of and re-using memory is called a circular buffer (not to be confused with the linked list types that are more commonly called queues).
#define MAX_SIZE 50
#define MAX_QUEUE_SIZE 6
typedef struct {
char value [MAX_QUEUE_SIZE][MAX_SIZE + 1]; //+1 to hold extra null termination
unsigned int front;
unsigned int size; //size is a clearer than rear, which could have meant end item or end+1 and needed special empty queue handling
} Queue;
void init_queue(Queue* q) {
memset(q,0,sizeof(Queue)); //just zero it all
//more info on this and some situation-dependent alternatives https://stackoverflow.com/questions/11152160/initializing-a-struct-to-0
}
int is_full(const Queue* q) {
return q->size >= MAX_QUEUE_SIZE;
}
int is_empty(const Queue* q) {
return q->size == 0;
}
//sometimes called a push operation
//return 0 if failed
int addq(Queue* q, const char* value) {
//error check, abort, error handling section:
//full queue -> abort
if(is_full(q)) return 0;
//long value -> truncate handled via strncpy
//actual operation
const unsigned int destination = (q->front + q->size) % MAX_QUEUE_SIZE;
strncpy(q->value[destination],value,MAX_SIZE);
q->size = q->size + 1;
printf("addq: %s", q->value[destination]);
return q->size;
}
//sometimes called a pop operation
//return value may not persist if addq is called, but fine for your use of copying on call
const char* deleteq(Queue* q) {
if(is_empty(q)) return 0;
const char * retval = q->value[q->front];
q->front = (q->front + 1) % MAX_QUEUE_SIZE;
q->size = q->size - 1;
return retval;
}
also remember to use either MAX_SIZE + 1 or strncpy with MAX_SIZE - 1 since "No null-character is implicitly appended at the end of destination if source is longer than num."
(and strcpy and scanf as you sling them onto arrays is unsafe)

Reading string from array of pointers

How can I read each individual character from a string that is accessed through an array of pointers? In the below code I currently have generated an array of pointers to strings called, symCodes, in my makeCodes function. I want to read the strings 8 characters at a time, I thought about concatenating each string together, then looping through that char by char but the strings in symCodes could be up to 255 characters each, so I feel like that could possibly be too much all to handle at once. Instead, I thought I could read each character from the strings, character by character.
I've tried scanf or just looping through and always end up with seg faults. At the end of headerEncode(), it's near the bottom. I malloc enough memory for each individual string, I try to loop through the array of pointers and print out each individual character but am ending up with a seg fault.
Any suggestions of a different way to read an array of pointers to strings, character by character, up to n amount of characters is appreciated.
EDIT 1: I've updated the program to no longer output warnings when using the -Wall and -W flags. I'm no longer getting a seg fault(yay!) but I'm still unsure of how to go about my question, how can I read an array of pointers to strings, character by character, up to n amount of characters?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
#define FAIL 0
#define SUCCESS 1
/* global 1 day arrays that hold chars and their freqs from file */
unsigned long globalFreqs[256] = {0};
unsigned char globalUsedCh[256] = {0};
char globalCodes[256] = {0};
unsigned char globalUniqueSymbols;
unsigned long totalCount = 0;
typedef struct HuffmanTreeNode* HTNode;
struct HuffmanTreeNode* globalSortedLL;
/*
struct has the input letter, the letters frequency, and the left and irght childs
*/
struct HuffmanTreeNode
{
char symbol;
unsigned long freq;
char *code;
struct HuffmanTreeNode *left, *right;
struct HuffmanTreeNode* next;
};
/* does it make sense to have a struct for the entire huffman tree to see its size? */
struct HuffmanTree
{
unsigned size;
};
/*generate new node with given symbol and freq */
struct HuffmanTreeNode* newNode(char symbol, int freq)
{
struct HuffmanTreeNode* newNode = malloc(sizeof(struct HuffmanTreeNode));
newNode->symbol = symbol;
newNode->freq = freq;
newNode->left = newNode->right = NULL;
return newNode;
}
/*current work in progress, i believe this is the way to insert it for a BST
/* will change for HuffmanTreenode once working
/*
*/
struct HuffmanTreeNode* insert(struct HuffmanTreeNode* node, struct HuffmanTreeNode* htnNew)
{
struct HuffmanTreeNode* currentNode = node;
if(currentNode == NULL || compareTwoNodes(htnNew, currentNode))
{
htnNew->next = currentNode;
return htnNew;
}
else
{
while(currentNode->next != NULL && compareTwoNodes(currentNode->next, htnNew))
{
currentNode = currentNode->next;
}
htnNew->next = currentNode->next;
currentNode->next = htnNew;
return node;
}
}
int compareTwoNodes(struct HuffmanTreeNode* a, struct HuffmanTreeNode* b)
{
if(b->freq < a->freq)
{
return 0;
}
if(a->freq == b->freq)
{
if(a->symbol > b->symbol)
return 1;
return 0;
}
if(b->freq > a->freq)
return 1;
}
struct HuffmanTreeNode* popNode(struct HuffmanTreeNode** head)
{
struct HuffmanTreeNode* node = *head;
*head = (*head)->next;
return node;
}
/*convert output to bytes from bits*/
/*use binary fileio to output */
/*put c for individual character byte*/
/*fwrite each individual byte for frequency of symbol(look at fileio slides) */
/*
#function:
#param:
#return:
*/
int listLength(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* current = node;
int length = 0;
while(current != NULL)
{
length++;
current = current->next;
}
return length;
}
/*
#function:
#param:
#return:
*/
void printList(struct HuffmanTreeNode* node)
{
struct HuffmanTreeNode* currentNode = node;
while(currentNode != NULL)
{
if(currentNode->symbol <= ' ' || currentNode->symbol > '~')
printf("=%d", currentNode->symbol);
else
printf("%c", currentNode->symbol);
printf("%lu ", currentNode->freq);
currentNode = currentNode->next;
}
printf("\n");
}
/*
#function:
#param:
#return:
*/
void buildSortedList()
{
int i;
for(i = 0; i < 256; i++)
{
if(!globalFreqs[i] == 0)
{
globalSortedLL = insert(globalSortedLL, newNode(i, globalFreqs[i]));
}
}
printf("Sorted freqs: ");
printList(globalSortedLL);
printf("listL: %d\n", listLength(globalSortedLL));
}
/*
#function: isLeaf()
will test to see if the current node is a leaf or not
#param:
#return
*/
int isLeaf(struct HuffmanTreeNode* node)
{
if((node->left == NULL) && (node->right == NULL))
return SUCCESS;
else
return FAIL;
}
/*where I plan to build the actual huffmantree */
/*
#function:
#param:
#return:
*/
struct HuffmanTreeNode* buildHuffmanTree(struct HuffmanTreeNode* node)
{
int top = 0;
struct HuffmanTreeNode *left, *right, *topNode, *huffmanTree;
struct HuffmanTreeNode* head = node;
struct HuffmanTreeNode *newChildNode, *firstNode, *secondNode;
while(head->next != NULL)
{
/*grab first two items from linkedL, and remove two items*/
firstNode = popNode(&head);
secondNode = popNode(&head);
/*combine sums, use higher symbol, create new node*/
newChildNode = newNode(secondNode->symbol, (firstNode->freq + secondNode->freq));
newChildNode->left = firstNode;
newChildNode->right = secondNode;
/*insert new node, decrement total symbols in use */
head = insert(head, newChildNode);
}
return head;
}
void printTable(char *codesArray[])
{
int i;
printf("Symbol\tFreq\tCode\n");
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
if(i <= ' ' || i > '~')
{
printf("=%d\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
else
{
printf("%c\t%lu\t%s\n", i, globalFreqs[i], codesArray[i]);
}
}
}
printf("Total chars = %lu\n", totalCount);
}
void makeCodes(
struct HuffmanTreeNode *node, /* Pointer to some tree node */
char *code, /* The *current* code in progress */
char *symCodes[256], /* The array to hold the codes for all the symbols */
int depth) /* How deep in the tree we are (code length) */
{
char *copiedCode;
int i = 0;
if(isLeaf(node))
{
code[depth] = '\0';
symCodes[node->symbol] = code;
return;
}
copiedCode = malloc(255*sizeof(char));
memcpy(copiedCode, code, 255*sizeof(char));
code[depth] = '0';
copiedCode[depth] = '1';
makeCodes(node->left, code, symCodes, depth+1);
makeCodes(node->right, copiedCode, symCodes, depth+1);
}
/*
#function: getFileFreq()
gets the frequencies of each character in the given
file from the command line, this function will also
create two global 1d arrays, one for the currently
used characters in the file, and then one with those
characters frequencies, the two arrays will line up
parallel
#param: FILE* in, FILE* out,
the current file being processed
#return: void
*/
void getFileFreq(FILE* in, FILE* out)
{
unsigned long freqs[256] = {0};
int i, t, fileCh;
while((fileCh = fgetc(in)) != EOF)
{
freqs[fileCh]++;
totalCount++;
}
for(i = 0; i < 256; i++)
{
if(freqs[i] != 0)
{
globalUsedCh[i] = i;
globalFreqs[i] = freqs[i];
if(i <= ' ' || i > '~')
{
globalUniqueSymbols++;
}
else
{
globalUniqueSymbols++;
}
}
}
/* below code until total count is for debugging purposes */
printf("Used Ch: ");
for(t = 0; t < 256; t++)
{
if(globalUsedCh[t] != 0)
{
if(t <= ' ' || t > '~')
{
printf("%d ", globalUsedCh[t]);
}
else
printf("%c ", globalUsedCh[t]);
}
}
printf("\n");
printf("Freq Ch: ");
for(t = 0; t < 256; t++)
{
if(globalFreqs[t] != 0)
{
printf("%lu ", globalFreqs[t]);
}
}
printf("\n");
/* end of code for debugging/vizualazation of arrays*/
printf("Total Count %lu\n", totalCount);
printf("globalArrayLength: %d\n", globalUniqueSymbols);
}
void headerEncode(FILE* in, FILE* out, char *symCodes[256])
{
char c;
int i, ch, t, q, b, z;
char *a;
char *fileIn;
unsigned char *uniqueSymbols;
unsigned char *byteStream;
unsigned char *tooManySym = 0;
unsigned long totalEncodedSym;
*uniqueSymbols = globalUniqueSymbols;
totalEncodedSym = ftell(in);
rewind(in);
fileIn = malloc((totalEncodedSym+1)*sizeof(char));
fread(fileIn, totalEncodedSym, 1, in);
if(globalUniqueSymbols == 256)
{
fwrite(tooManySym, 1, sizeof(char), out);
}
else
{
fwrite(uniqueSymbols, 1, sizeof(uniqueSymbols)-7, out);
}
for(i = 0; i < 256; i++)
{
if(globalFreqs[i] != 0)
{
fwrite(globalUsedCh+i, 1, sizeof(char), out);
fwrite(globalFreqs+i, 8, sizeof(char), out);
}
}
for(t = 0; t < totalEncodedSym; t++)
{
fwrite(symCodes[fileIn[t]], 8, sizeof(char), out);
}
for(q = 0; q < totalEncodedSym; q++)
{
symCodes[q] = malloc(255*sizeof(char));
a = symCodes[q];
while(*a != '\0')
printf("%c\n", *(a++));
}
printf("Total encoded symbols: %lu\n", totalEncodedSym);
printf("%s\n", fileIn);
}
void encodeFile(FILE* in, FILE* out)
{
int top = 0;
int i;
char *code;
char *symCodes[256] = {0};
int depth = 0;
code = malloc(255*sizeof(char));
getFileFreq(in, out);
buildSortedList();
makeCodes(buildHuffmanTree(globalSortedLL), code, symCodes, depth);
printTable(symCodes);
headerEncode(in, out, symCodes);
free(code);
}
/*
void decodeFile(FILE* in, FILE* out)
{
}*/
There are many problems in your code:
[major] function compareTwoNodes does not always return a value. The compiler can detect such problems if instructed to output more warnings.
[major] the member symbol in the HuffmanTreeNode should have type int. Type char is problematic as an index value because it can be signed or unsigned depending on compiler configuration and platform specificities. You assume that char has values from 0 to 255, which is incorrect for most platforms where char actually has a range of -128 .. 127. Use unsigned char or int but cast the char values to unsigned char to ensure proper promotion.
[major] comparison if (globalUniqueSymbols == 256) is always false because globalUniqueSymbols is an unsigned char. The maximum number of possible byte values is indeed 256 for 8-bit bytes, but it does not fit in an unsigned char, make globalUniqueSymbols an int.
[major] *uniqueSymbols = globalUniqueSymbols; in function headerEncode stores globalUniqueSymbols into an uninitialized pointer, definitely undefined behavior, probable segmentation fault.
[major] sizeof(uniqueSymbols) is the size of a pointer, not the size of the array not the size of the type. Instead of hacking it as sizeof(uniqueSymbols)-7, fputc(globalUniqueSymbols, out);
[major] fwrite(tooManySym, 1, sizeof(char), out); is incorrect too, since tooManySym is initialized to 0, ie: it is a NULL pointer. You need a special value to tell that all bytes values are used in the source stream, use 0 for that and write it with fputc(0, out);.
You have nested C style comments before function insert, this is not a bug but error prone and considered bad style.
function newNode should take type unsigned long for freq for consistency.
function buildHuffmanTree has unused local variables: right, top and topNode.
variable i is unused in function makeCodes.
many unused variables in headerEncode: byteStream, c, ch, b...
totalEncodedSym is an unsigned long, use an index of the proper type in the loops where you stop at totalEncodedSym.
unused variables un encodeFile: i, top...
Most of these can be detected by the compiler with the proper warning level: gcc -Wall -W or clang -Weverything...
There are probably also errors in the program logic, but you cannot see these until you fix the major problems above.

Duplicate nodes when removing from kdtree

I am writing an algorithm that requires me to search nearest neighbors of points. I found the kdtree library from this post (Using Google's C KD Tree Library) but it does not have a function to delete individual nodes from the tree. So I started to implement my own using
www (dot) geeksforgeeks.org/k-dimensional-tree-set-3-delete/
as a template. It all runs through but unfortunately sometimes nodes get duplicated.
My test case is the following:
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include <errno.h>
#include <string.h>
#include <stdarg.h>
#include "kdtree.h"
/* (hopefully) platform independent directory creation */
#if defined(_WIN32) || defined(WIN32) /* this should be defined under windows, regardless of 64 or 32 bit*/
#include <direct.h>
#include <sys/stat.h>
#define GetWorkingDir _getcwd
#define MakeDir(str) _mkdir(str)
#else /* unix based system */
#include <unistd.h>
#include <sys/stat.h>
#define GetWorkingDir getcwd
#define MakeDir(str) mkdir(str, 0777)
#endif
#ifndef MAX_PATH
#define MAX_PATH 260
#endif
void GetLogDir(char* strPath, int nBufSize)
{
if(GetWorkingDir(strPath, nBufSize))
{
strncat(strPath, "/log/", 5);
MakeDir(strPath);
}
else
{
fprintf(stderr, "Could not get working directory");
exit(ENOENT);
}
}
FILE* GetOpenFileHandle(const char* strFilenamePlusPath, const char* strOpenMode)
{
if(strOpenMode == NULL) // too bad we dont have default arguments in C :(
{
strOpenMode = "a+";
}
return(fopen(strFilenamePlusPath, strOpenMode));
}
int CloseFile(FILE* pFile)
{
if(pFile != NULL)
{
fprintf(pFile, "\r\n"); // append a new line before closing!
return(fclose(pFile));
}
fprintf(stderr, "Invalid file handle");
exit(EFAULT);
}
void NodeLabelToFile(FILE* pFile, kdnode* node, const char* strName)
{
fprintf(pFile, "%s [label=\"(%.3f, %.3f)\"] \n", strName, node->pos[0], node->pos[1]);
}
char* NodeToString(kdnode* node, int* num)
{
char* strName = (char*) malloc(MAX_PATH);
if(*num == 0)
{
sprintf(strName, "%s","root");
}
else
{
sprintf(strName, "node%d", *num);
}
return strName;
}
void NodesToFile(FILE* pFile, kdnode* node, const char* strParentname, int* num)
{
if(node && pFile)
{
char* strLeft = NULL;
char* strRight = NULL;
if(node->left)
{
(*num)++;
strLeft = NodeToString(node->left, num);
NodeLabelToFile(pFile, node->left, strLeft);
fprintf(pFile, "%s -> %s \n", strParentname, strLeft);
}
if(node->right)
{
(*num)++;
strRight = NodeToString(node->right, num); // name of the current node
NodeLabelToFile(pFile, node->right, strRight);
fprintf(pFile, "%s -> %s \n", strParentname, strRight);
}
if(strLeft)
{
NodesToFile(pFile, node->left, strLeft, num);
free(strLeft);
}
if(strRight)
{
// (*num)++;
NodesToFile(pFile, node->right, strRight, num);
free(strRight);
}
}
}
FILE* MakeOpenLogFile(const char* strFilename, const char* strOpenMode)
{
if(strOpenMode == NULL)
{
strOpenMode = "a+";
}
char* strFilenamePlusPath = (char*) malloc(MAX_PATH);
GetLogDir(strFilenamePlusPath, MAX_PATH);
strncat(strFilenamePlusPath, strFilename, strlen(strFilename));
FILE* pFile = GetOpenFileHandle(strFilenamePlusPath, strOpenMode);
free(strFilenamePlusPath);
return(pFile);
}
void KDTreeToDotFile(kdtree* Tree, const char* strFilename)
{
if(Tree)
{
FILE* pFile = MakeOpenLogFile(strFilename, "w");
fprintf(pFile, "%s", "digraph d { \n"); // print opening statement for the graph in dot language
// traverse the tree and print the nodes
int* num = (int*) malloc(sizeof(int)); // make this a unique location to make sure numbers can't occur twice
*num = 0;
char* strRoot = NodeToString(Tree->root, num);
NodeLabelToFile(pFile, Tree->root, strRoot);
NodesToFile(pFile, Tree->root, "root", num);
if(strRoot)
{
free(strRoot);
}
free(num);
fprintf(pFile,"%s", "}"); // close the digraph environment
CloseFile(pFile);
}
}
int main(int argc, const char * argv[])
{
int numel = 20;
int toRemove = 19;
double dMax = 3000;
int nNumDim = 2;
printf("init rng");
srand(1234); // seed the rng // srand((unsigned) time(&t));
printf("creating kdtree");
kdtree* TreeRoot = kd_create(nNumDim); // construct the kd tree for the nearest neighbor search
kd_data_destructor(TreeRoot, free); // set free as data destructor
double* pos = (double*) malloc(nNumDim * numel * sizeof(double));
int retval;
for (int ii = 0; ii < numel; ii++)
{
pos[nNumDim * ii] = floor((double)rand()/(double)(RAND_MAX/dMax));
pos[nNumDim * ii + 1] = floor((double)rand()/(double)(RAND_MAX/dMax));
int* randint = (int*) malloc(sizeof(int));
*randint = rand();
retval = kd_insert2(TreeRoot,
pos[nNumDim * ii],
pos[nNumDim * ii + 1],
randint, sizeof(int));
assert(retval == 0);
}
KDTreeToDotFile(TreeRoot, "original.dot");
double* dRemovePos = (double*) malloc(sizeof(double)*nNumDim);
for (int ii = 0; ii < toRemove; ii++)
{
dRemovePos[0] = pos[2*ii];
dRemovePos[1] = pos[2*ii + 1];
kd_remove(TreeRoot, dRemovePos);
}
KDTreeToDotFile(TreeRoot, "removed.dot");
kd_free(TreeRoot); // free kdtree
return 0;
}
and the functions to remove the nodes are implemented like this:
(I don't think if it is too much code, so I only will post my changes to the kd library. If I should add the rest of the code, which is more than 1000 lines unfortunately, just tell me in the comments.)
int kd_remove(kdtree* tree, const double* pos)
{
printf("removing node %.3f, %.3f \n", pos[0], pos[1]);
if(tree->root != NULL)
{
assert(tree->dim != 0); // prevent division by 0 (error code 136)
assert(pos != NULL); // make sure a valid position is passed
tree->root = remove_rec(tree->root, pos, tree->dim, tree->destr, 0);
}
return(0);
}
kdnode* remove_rec(kdnode* node, const double* pos, int dim, void (*destr)(void*), int depth)
{
if(node == NULL)
{
return(NULL);
}
int curdim = depth % dim;
if(same_pos(node->pos, pos, dim))
{
// we found the droid we're looking for
if(node->right)
{
// find the minimum in the right subtree
kdnode* node_min = find_min(node->right, curdim, dim);
if(node_min)
{
copy_node_data(node_min, node, dim);
node->right = remove_rec(node->right, node_min->pos, dim, destr, depth + 1);
}
}
else if(node->left)
{
// find the minimum in the left subtree
kdnode* node_min = find_min(node->left, curdim, dim);
if(node_min)
{
copy_node_data(node_min, node, dim);
node->left = remove_rec(node->left, node_min->pos, dim, destr, depth + 1);
}
}
else
{
// no subtrees -> delete the found node
clear_rec(node, destr);
return(NULL);
}
return node; // return the newly filled node to the recursion step one "above"
}
else
{
// points are not the same, look further
if(pos[curdim] < node->pos[curdim])
{
// position we're looking for is smaller -> go left
node->left = remove_rec(node->left, pos, dim, destr, depth + 1);
}
else
{
// go right, position we're looking for is greater
node->right = remove_rec(node->right, pos, dim, destr, depth + 1);
}
return node;
}
}
void copy_node_data(const kdnode* src, kdnode* dst, int dim)
{
if(src && dst)
{
int nNumBytes = dim * sizeof(double);
memcpy(dst->pos, src->pos, nNumBytes);
if(dst->data != NULL)
{
free(dst->data);
dst->data = malloc(src->databytes);
}
memcpy(dst->data, src->data, src->databytes);
dst->databytes = src->databytes;
}
}
int same_pos(const double* pos1, const double* pos2, int dim)
{
for (int i = 0; i < dim; ++i)
{
if(pos1[i] != pos2[i])
{
return 0; // false
}
}
return 1; // true
}
kdnode* find_min(kdnode* node, int dir, int numdim)
{
return find_min_rec(node, dir, 0, numdim);
}
kdnode* find_min_rec(kdnode* node, int dir, int depth, int numdim)
{
if(!node)
{
return NULL;
}
if(node->left == NULL && node->right == NULL)
{
return node; // is leaf node
}
int curdim = depth % numdim;
if(curdim == numdim)
{
if(node->left == NULL)
{
// no smaller node in tree
return node;
}
else
{
// left subtree is populated -> we need to go deeper
return find_min_rec(node->left, node->dir, depth + 1, numdim);;
}
}
// we have to search both subtrees and find the smallest value compared to the current node
return min_node(node, find_min_rec(node->left, node->dir, depth + 1, numdim),
find_min_rec(node->right, node->dir, depth + 1, numdim), node->dir);
}
kdnode* min_node(kdnode* a, kdnode* left, kdnode* right, int dir)
{
if(a == NULL)
{
// node a is the only one that can't be NULL!
fprintf(stderr, "Error: invalid node passed! \n");
exit(EFAULT);
}
kdnode* result = a;
if(left != NULL)
{
if(left->pos[dir] < result->pos[dir])
{
result = left;
}
}
if(right != NULL)
{
if(right->pos[dir] < result->pos[dir])
{
result = right;
}
}
return result;
}
original.dot looks like this and removed.dot like that.
I've been debugging this since yesterday and I have the feeling it is something really obvious that I am missing here...
Thanks in advance to anyone willing to help :)
You are creating 40 elements
int numel = 20;
int nNumDim = 2;
double* pos = (double*) malloc(nNumDim * numel * sizeof(double)); // Don't cast
but removing only 38
int toRemove = 19;
for (int ii = 0; ii < toRemove; ii++)
{
dRemovePos[0] = pos[nNumDim * ii];
dRemovePos[1] = pos[nNumDim * ii + 1];
kd_remove(TreeRoot, dRemovePos);
}
In the last iteration:
pos[nNumDim * ii]; = pos[2 * 18]; = pos[36];
pos[nNumDim * ii + 1]; = pos[2 * 18 + 1]; = pos[37];
pos[38] and pos[39] are still there.
Change to int toRemove = 20;.
Your code is obfuscated due to the flat array, why don't you declare some type like
struct data {
double el1;
double el2;
};
or
typedef double data[2];
and then
data *value = malloc(numel * sizeof(*value));
So, I know this probably won't be read by anyone but I found the bug after not touching the code for a while and for completeness here is how:
In the find_min() function I start the recursion with depth = 0.
This can cause the split dimension to get messed up and therefore not access all the nodes.
I modified the function to take depth as an argument and pass the recursion depth of remove_rec() like this:
kdnode* node_min = find_min(node->right, curdim, dim, depth + 1);
and
kdnode* node_min = find_min(node->left, curdim, dim, depth + 1);
respectively.

Seg. Fault in Hash Table ADT - C

Edit:
Hash.c is updated with revisions from the comments, I am still getting a Seg fault. I must be missing something here that you guys are saying
I have created a hash table ADT using C but I am encountering a segmentation fault when I try to call a function (find_hash) in the ADT.
I have posted all 3 files that I created parse.c, hash.c, and hash.h, so you can see all of the variables. We are reading from the file gettysburg.txt which is also attached
The seg fault is occuring in parse.c when I call find_hash. I cannot figure out for the life of me what is going on here. If you need anymore information I can surely provide it.
sorry for the long amount of code I have just been completely stumped for a week now on this. Thanks in advance
The way I run the program is first:
gcc -o parse parse.c hash.c
then: cat gettysburg.txt | parse
Parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)malloc(sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
cur2 = next_hash_walk(t);
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)malloc(sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
t->sorted_array = NULL;
t->index=0;
t->sort_num=0;
for(i=0;i<size;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
hash_entry *cur;
for(i = 0; i<(table->size);i++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
free(cur->key); //Freeing memory for key and data
free(cur->data);
}
free(table->buckets[i]); //free the whole bucket
}}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)malloc(sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
if(strcmp(table->buckets[i]->key, key) == 0)
return((table->buckets[i]->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size]; //creates an array, same size as table->size(# of buckets)
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
count ++;}
node_num[i] = count;
count = 0;}
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) / (table->size);
}
void *start_hash_walk(Phash_table table){
Phash_table temp = table;
int i, j, k;
hash_entry *cur; //CHANGE IF NEEDED to HASH_TABLE *
if(table->sorted_array != NULL) free(table->sorted_array);
table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));
for(i = 0; i < table->total; i++){
if(table->buckets[i]!=NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
table->sorted_array[i] = table->buckets[i]->data;
}}
}
for(j = (table->total) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp -> buckets[0]-> data = table->sorted_array[k-1];
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp->buckets[0] -> data;
}
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
table->sort_num ++;
return table->sorted_array[table->sort_num];
}
hash.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_entry_ { //Linked List
void *data; //Generic pointer
char *key; //String-based key value
struct hash_entry_ *next; //Self-Referencing pointer
} hash_entry, *Phash_entry;
typedef struct hash_table_ {
hash_entry **buckets; //Pointer to a pointer to a Linked List of type hash_entry
int (*hash_func)(char *);
int (*cmp_func)(void *, void *);
int size;
void **sorted_array; //Array used to sort each hash entry
int index;
int total;
int largest;
float average;
int sort_num;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
Gettysburg.txt
Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.
Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.
We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.
It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.
It's possible that one of several problems with this code are loops like:
for(table->buckets[i];
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
The initializing part of the for loop (table->buckets[i]) has no effect. If i is 0 and table->buckets[0] == NULL, then the condition on this loop (table->buckets[i]->next != NULL) will dereference a null pointer and crash.
That's where your code seemed to be crashing for on my box, at least. When I changed several of your loops to:
if (table->buckets[i] != NULL) {
for(;
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
}
...it kept crashing, but in a different place. Maybe that will help get you unstuck?
Edit: another potential problem is that those for loops are destructive. When you call find_hash, do you really want all of those buckets to be modified?
I'd suggest using something like:
hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
// ...
}
}
When I do that and comment out your dump_dictionary function, your code runs without crashing.
Hmm,
here's hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)calloc(1, sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *)); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
for(i=0;t->buckets[i] != NULL;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
for(i = 0; i<(table->size);i++){
if(table->buckets[i]!=NULL)
for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
free(table->buckets[i]->key); //Freeing memory for key and data
free(table->buckets[i]->data);
}
free(table->buckets[i]); //free the whole bucket
}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
if(strcmp(cur->key, key) == 0)
return((cur->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size];
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++)
{
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
count ++;}
node_num[i] = count;
count = 0;
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}
void *start_hash_walk(Phash_table table){
void* temp = 0; //oook: this was another way of overwriting your input table
int i, j, k;
int l=0; //oook: new counter for elements in your sorted_array
hash_entry *cur;
if(table->sorted_array !=NULL) free(table->sorted_array);
table->sorted_array = (void**)calloc((table->total), sizeof(void*));
for(i = 0; i < table->size; i ++){
//for(i = 0; i < table->total; i++){ //oook: i don't think you meant total ;)
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
table->sorted_array[l++] = cur->data;
}
}
//oook: sanity check/assert on expected values
if (l != table->total)
{
printf("oook: l[%d] != table->total[%d]\n",l,table->total);
}
for(j = (l) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if (table->sorted_array[k-1] && table->sorted_array[k])
{
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp;
}
}
else
printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
/*oook: this was blowing up since you were incrementing past the size of sorted_array..
NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
//table->sort_num++
return table->sorted_array[table->sort_num++];
}
here's parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h> //oook: added so you can assert ;)
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)calloc(1,sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
dictionary_size++;
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
if (!cur) //ook: do test or assert for null values
{
printf("oook: null== (cur = start_hash_walk)\n");
exit(-1);
}
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
{//oook: i think you needed these braces
cur2 = next_hash_walk(t);
if (!cur2) //ook: do test or assert for null values
{
printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
}
else
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}//oook: i think you needed these braces
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
/* ooook: do assert on programmatic errors.
this function *requires non-null inputs. */
assert(cur1 && cur2);
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
follow the //ooks
Explanation:
There were one or two places this was going to blow up in.
The quick fix and answer to your question was in parse.c, circa L100:
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
..checking that cur is not null before calling printf fixes your immediate seg-fault.
But why would cur be null ? ~because of this bad-boy:
void *start_hash_walk(Phash_table table)
Your hash_func(char *string) can (& does) return non-unique values. This is of course ok except that you have not yet implemented your linked list chains. Hence you end up with table->sorted_array containing less than table->total elements ~or you would if you were iterating over all table->size buckets ;)
There are one or two other issues.
For now i hacked Nate Kohl's for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next) further, to be for(cur=table->buckets[i]; cur != NULL; cur=cur->next) since you have no chains. But this is *your TODO so enough said about that.
Finally. note that in next_hash_walk(Phash_table table) you have:
table->sort_num++
return table->sorted_array[table->sort_num];
Ouch! Do check those array bounds!
Notes
1) If you're function isn't designed to change input, then make the input const. That way the compiler may well tell you when you're inadvertently trashing something.
2) Do bound checking on your array indices.
3) Do test/assert for Null pointers before attempting to use them.
4) Do unit test each of your functions; never write too much code before compiling & testing.
5) Use minimal test-data; craft it such that it limit-tests your code & attempts to break it in cunning ways.
6) Do initialise you data structures!
7)Never use egyptian braces ! {
only joking ;)
}
PS Good job so far ~> pointers are tricky little things! & a well asked question with all the necessary details so +1 and gl ;)
(//oook: maybe add a homework tag)

Resources