How can I resolve the collision in the hashing in this code I did? Currently cannot search for NG CHEA YEAT's ID only - c

I have the following text file
1171203258:HOSSAIN, MARUF
1181202660:KUHAN RAJ A/L TAMIL CHEL WAM
1181203465:PONG KAI SUN
1191102443:FAIZA OSAMA ABDALLA HASHIM
1201302289:LEE JIA WEI
1201302368:SHEIKH, AHNAF AZMAIN
1201100584:HI CHIA LING
1201101509:NG CHEA YEAT
1191103201:PHUAH CHEE HAOU
1201100879:MOSTAFA ARABY MADBOULY AHMED
1191103215:TONG JUN YANG
1191103119:ANG QIZHENG
1171302286:DARWIN KUMAR A/L MUNIAN
1181101192:HAIZUN NAJWA BINTI MOHD RIFIN
1201100926:NG XUE NIE
1191302417:ALMARHOON, ALI HUSSAIN A
1201100225:HEMAN RAO A/L SUBRAMANIAM
1181100823:LIM ZHEN BANG
1161202587:SOHEIL PRAKASAN SUPPAN
1201100603:AVINASH MURALI
1181101858:CHEAH KOK YEW
1191103071:GAN WEI TONG
1201100301:KEVIN THAM ZHENG YIT
1201100648:LIM CHER AIK
1201302222:SHIVAA RUTRAN A/L NAGATHEESAN
1201100779:TAN WEI XIANG
1191100919:WONG HONG WEI
The code I have for now, work well but have collision in the hashing I think
Here is what I have so far:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MDIR 27 //size of list
#define MBUFF 256
#define MHASH 109 //hash function is %109
#define MNAME 40
struct List{
char name[40];
int studID;
};
//function prototype
int comparator(const void* p, const void* q){
return strcmp(((struct List*)p)->name,((struct List*)q)->name);
}
int readData(struct List dir[]);
int hashfunc(char *name);
void hash(struct List dir[], int ndir,
int hashtable[]);
int search(char *key,
struct List s[], int hashtable[]);
//main function
int main(){
int ndir, result, hashtable[MHASH];
int count;
int i;
int j;
struct List s[27];
char temp[27];
char query[40];
FILE *fptr;
fptr = fopen("rec.txt", "r+");
if (fptr != NULL) {
printf("File created successfully!\n");
}
else {
printf("Failed to create the file.\n");
// exit status for OS that an error occurred
return -1;
}
for(count = 0; count < 27; count++){
fscanf(fptr,"%d", &s[count].studID);
fgets(s[count].name,40,fptr);
}
qsort
qsort(s,27,sizeof(struct List),comparator);
printing the sorted name then continue the hashing of searching
//printing sorted name
printf("Sorted Names\n");
for(i=0;i<27;i++){
printf("%d%s\n", i+1, s[i].name);
}
fclose(fptr);
hashing of searching part
ndir=readData(s);
hash(s,ndir,hashtable);
puts("\nName to search>>");
fgets(query,MNAME-1,stdin);
query[strlen(query)-1]='\0';
result=search(query,s,hashtable);
if(result==-1)
printf("Not Found");
else
printf("%s's ID is %d\n",
s[result].name, s[result].studID);
return 0;
}
read function
int readData(struct List dir[]){
FILE *fdir=fopen("rec.txt","r");
char buff[MBUFF];
int i=0;
while(i<MDIR && fgets(buff,MBUFF-1,fdir)){
dir[i].studID=atol(strtok(buff,":"));
strcpy(dir[i].name,strtok(NULL, "\n"));
i++;
}
return(i);
}
hash function
int hashfunc(char *name){
long sum=0;
int k=0;
while(name[k]){
sum+=name[k];
k++;
}
return( (int) (sum % MHASH) );
}
hash function
void hash(struct List dir[], int ndir,
int hashtable[]){
int k;
int index;
for(k=0;k<ndir;k++){
index = hashfunc(dir[k].name);
hashtable[index]=k;
}
}
search function
int search(char *key, struct List dir[],
int hashtable[]){
int index=hashfunc(key);
int k=hashtable[index];
if(strcmp(key,dir[k].name)==0)
return(k);
else
return(-1);
}
I am not sure for the hashing of searching part

Whenever faced with a need to separate fields in a line of data, the normal approach is to read an entire line of data as a string into a buffer (character array). Then you separate what you need from the buffer using whatever method fits the data the best. Either using a pair of pointers to bracket the text you need and then copying the characters between the pointers. You can automate the process using string functions like strchr() to locate the ':' in the buffer. You can also use string functions like strtok() to split the buffer into tokens on any given set of delimiters.
However here there is an even simpler method. Since you have a fixed format for the studID and name in the line, you can simply use sscanf(), e.g.
#include <stdio.h>
#include <stdlib.h>
#define MXSTUD 30 /* if you need a constant, #define one (or more) */
#define MXNAME 40
typedef struct list { /* adding typedef for convenience */
char name[MXNAME];
unsigned studID;
} list;
...
int main (int argc, char **argv) {
int count = 0; /* count of students */
char buf[MXNAME * 2]; /* temprorary storage for line */
list s[MXSTUD] = {{ .name = "" }}; /* list array initialized all 0 */
/* open filename given as 1st argument or "rec.text" if none given */
FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
if (!fptr) { /* validate file open for reading */
fputs ("error: file open failed\n", stderr);
return 1;
}
while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
/* separate studID and name using sscanf() */
if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
count += 1; /* increment count on success */
}
}
...
That's all that is needed to read each line of data and separate the line into studID and name storing each in an element of the list array of struct.
Use qsort() For Sorting
Regardless of whether you have an array or allocated block of memory containing objects, qsort() provides a simple and efficient way to sort it. All you need to do is write a compare() function telling qsort() how to compare the elements. The declaration for the qsort() compare function is:
int compare (const void *a, const void *b);`
Where a and b are simple pointers-to elements of your array to be compared. So when writing the function, all you need to do is cast a and b to the proper type and write the logic to compare whatever you like in the two elements. A negative return means a sorts before b and a positive return means b sorts before a. A zero return means the elements are equal.
Casting the a and b to type const list * (you include const since the data isn't modified which allows the compiler freedom to optimize more fully), you simply loop over each name comparing characters and returning when two characters differ or the end of file is reached. Here, to sort your s[] array by name you can do:
/* qsort compare function lexagraphically sorts words */
int compare (const void *a, const void *b)
{
/* a & b are pointers to adjacent list elements, (pointers to list) */
const list *sa = (const list *)a,
*sb = (const list *)b;
const char *na = sa->name, /* pointers to name in each element */
*nb = sb->name;
/* loop advancing a character in each word per-iteration */
for (;; na++, nb++) {
/* if characters differ or at end of either */
if (*na != *nb || !*na)
break;
}
return (*na > *nb) - (*na < *nb); /* return sort order */
}
Then to sort your array of list (your s[] array) with qsort(), all that is needed is:
qsort (s, count, sizeof *s, compare); /* sort array by name */
Putting it all together in a short program that reads from the filename given as the first argument to the program (or from "rec.text" by default if no argument is given), you can do:
#include <stdio.h>
#include <stdlib.h>
#define MXSTUD 30 /* if you need a constant, #define one (or more) */
#define MXNAME 40
typedef struct list { /* adding typedef for convenience */
char name[MXNAME];
unsigned studID;
} list;
/* qsort compare function lexagraphically sorts words */
int compare (const void *a, const void *b)
{
/* a & b are pointers to adjacent list elements, (pointers to list) */
const list *sa = (const list *)a,
*sb = (const list *)b;
const char *na = sa->name, /* pointers to name in each element */
*nb = sb->name;
/* loop advancing a character in each word per-iteration */
for (;; na++, nb++) {
/* if characters differ or at end of either */
if (*na != *nb || !*na)
break;
}
return (*na > *nb) - (*na < *nb); /* return sort order */
}
int main (int argc, char **argv) {
int count = 0; /* count of students */
char buf[MXNAME * 2]; /* temprorary storage for line */
list s[MXSTUD] = {{ .name = "" }}; /* list array initialized all 0 */
/* open filename given as 1st argument or "rec.text" if none given */
FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
if (!fptr) { /* validate file open for reading */
fputs ("error: file open failed\n", stderr);
return 1;
}
while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
/* separate studID and name using sscanf() */
if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
count += 1; /* increment count on success */
}
}
qsort (s, count, sizeof *s, compare); /* sort array by name */
for (int i = 0; i < count; i++) { /* output results */
printf ("%2d %10u %s\n", i + 1, s[i].studID, s[i].name);
}
}
(note: you simply need to open the file in read mode "r")
Example Use/Output
With your data in a file named dat/studIDlist.txt, for the 27 students in your data you would get:
$ ./bin/studIDlist dat/studIDlist.txt
1 1191302417 ALMARHOON, ALI HUSSAIN A
2 1191103119 ANG QIZHENG
3 1201100603 AVINASH MURALI
4 1181101858 CHEAH KOK YEW
5 1171302286 DARWIN KUMAR A/L MUNIAN
6 1191102443 FAIZA OSAMA ABDALLA HASHIM
7 1191103071 GAN WEI TONG
8 1181101192 HAIZUN NAJWA BINTI MOHD RIFIN
9 1201100225 HEMAN RAO A/L SUBRAMANIAM
10 1201100584 HI CHIA LING
11 1171203258 HOSSAIN, MARUF
12 1201100301 KEVIN THAM ZHENG YIT
13 1181202660 KUHAN RAJ A/L TAMIL CHEL WAM
14 1201302289 LEE JIA WEI
15 1201100648 LIM CHER AIK
16 1181100823 LIM ZHEN BANG
17 1201100879 MOSTAFA ARABY MADBOULY AHMED
18 1201101509 NG CHEA YEAT
19 1201100926 NG XUE NIE
20 1191103201 PHUAH CHEE HAOU
21 1181203465 PONG KAI SUN
22 1201302368 SHEIKH, AHNAF AZMAIN
23 1201302222 SHIVAA RUTRAN A/L NAGATHEESAN
24 1161202587 SOHEIL PRAKASAN SUPPAN
25 1201100779 TAN WEI XIANG
26 1191103215 TONG JUN YANG
27 1191100919 WONG HONG WEI

You will have to get line by line your file and store it in an array.
FILE *fp = fopen("lorem.txt", "r");
if(fp == NULL) {
perror("Unable to open file!");
exit(1);
}
char chunk[128];
while(fgets(chunk, sizeof(chunk), fp) != NULL) {
fputs(chunk, stdout);
fputs("|*\n", stdout); // marker string used to show where the content the chunk array has ended
}
fclose(fp);
To split each line use strtok() function:
char *token = strtok(line, ":"); // To separate the first block from the second like seen on your image.
char *token[1] = strtok(token, ","); // To separate the other part

Related

Sum count by grouping according to another column in C

I have a text file of the following form. Left column is names of players and right column is their score in games they played.
john 40
mary 50
john 30
kevin 88
kevin 29
joe 102
david 11
mary 134
I want to sum up the scores of the players. So, I want to print output of the form
john 70
mary 184
kevin 117
joe 102
david 11
I know that this can be easily done in R or Python. But I want to do this using C. So, I try to declare an array of structures in C and try to read each line from the file. struct is defined as a global variable, so by default, the struct members are initialized to zero values or null character in case of char array. Then, I try to read each row into the struct, which itself is an element of the array. But, while implementing this, I got stuck where new rows are to be read and then stored into structs. Is there any efficient way to do this ? Since R or 'pandas are based on C, their underlying code is probably written in C. How is it done there ?
Thanks
Typically you'd read a line, split it up on whitespace, see if an entry with that name already exists in a hash table or tree or other map data structure, and if so, add the current value to it, and if not, insert it using the current value. Then at the end traverse the map printing out the entries. Basically, the same approach you'd take with any language.
However, those other languages often have things like map data structures, high level abstractions for reading files and parsing text, etc., so a task like this can be done in a few lines (Shoot, awk can do it in one). With C, you have to write most of that stuff yourself, or use add-on libraries - the C standard, for example, has no hash table or trees. You basically have to do everything manually that languages like Python are doing for you under the hood.
Here's an example that uses the POSIX binary search tree functions (An awkward but portable API):
#define _GNU_SOURCE
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <search.h>
struct record {
int num;
char name[];
};
struct record *make_record(const char *name, int num) {
size_t len = strlen(name);
struct record *r = malloc(sizeof *r + len + 1);
r->num = num;
memcpy(r->name, name, len);
r->name[len] = 0;
return r;
}
int reccmp(const void *va, const void *vb) {
const struct record *a = va, *b = vb;
return strcmp(a->name, b->name);
}
void print_rec(const void *nodep, VISIT which, int depth) {
(void)depth;
// Print records in sorted order.
if (which == postorder || which == leaf) {
const struct record *r = *(const struct record **)nodep;
printf("%s\t%d\n", r->name, r->num);
}
}
int main(int argc, char **argv) {
if (argc != 2) {
fprintf(stderr, "Usage: %s filename\n", argc > 0 ? argv[0] : "program");
return EXIT_FAILURE;
}
FILE *fp = fopen(argv[1], "r");
if (!fp) {
fprintf(stderr, "%s: Unable to open %s: %s\n", argv[0], argv[1],
strerror(errno));
return EXIT_FAILURE;
}
void *counts = NULL; // Opaque pointer to the root of the tree
int lineno = 0;
char *line = NULL;
size_t line_len = 0;
while (getline(&line, &line_len, fp) > 0) {
lineno += 1;
char *saveptr = NULL;
char *name = strtok_r(line, " ", &saveptr);
char *numstr = strtok_r(NULL, " ", &saveptr);
if (!name || !*name || !numstr || !*numstr) {
fprintf(stderr, "Line %d of input is malformed!\n", lineno);
continue;
}
int num = atoi(numstr);
struct record *new_rec = make_record(name, num);
// tsearch() either inserts a new node and returns a pointer to it,
// or returns a pointer to an existing matching node.
struct record *found_rec =
*(struct record **)tsearch(new_rec, &counts, reccmp);
if (new_rec != found_rec) {
// If it's the latter, update its number sum and free the struct used
// to look it up.
found_rec->num += num;
free(new_rec);
}
}
free(line);
fclose(fp);
twalk(counts, print_rec);
#ifdef __GLIBC__
// Prevent spurious warnings from tools like ASan and valgrind about
// memory leaks.
tdestroy(counts, free);
#endif
return 0;
}
Example usage:
$ gcc -g -O -Wall -Wextra group.c
$ ./a.out input.txt
david 11
joe 102
john 70
kevin 117
mary 184

Read from a csv file , separate every line and every field in C

I have a csv file in which I have to separate every line (\n) and every field (,) in this line.
My goal is to create an array of structs. Every struct in each "box" of the array must contains 4 fields of every line.
How can I write it in c?
I thought to use fscanf and fgets but I don't know how to use them together because with fgets I want to divide lines while with fscanf i want to divide fields .
Final situation :
| 0 , noto, 233460, 32209.073312 | 1, piangea, 4741192, 811.. | 2 ,spenti! , .... |
| position 0 in the array | position 1 in the array | position 2 in the array |
records.csv
0,noto,233460,32209.073312
1,piangea,4741192,81176.622633
2,spenti!,1014671, 4476.013614
3,misericordia,496325,61628.929334
4,quando,4476757,10838.641053
main.c
#include <stdlib.h>
#include<stdio.h>
#include <string.h>
struct _SortedArray {
int id;
char field1[12];
int field2;
float field3;
};
int main() {
FILE *fd;
int res;
struct _SortedArray files[101];
int n;
fd = fopen("records.csv", "r");
if (fd == NULL) {
perror("Error");
exit(1);
}
char r[100];
n = 0;
while (n<6) {
if(fgets(r,100,fd)!=NULL){
puts(r);
fscanf(r, "%[^,],%[^,],%[^,],%[^\n]\n", &files[n].id, &files[n].field1, &files[n].field2, &files[n].field3);
}
n++;
}
for(int i=0;i<6;i++){
printf(" INT:%c,CHAR:%s //",files[i].id, files[i].field1);
}
return 0;
}
Your code contains various little problems and a major inconsistency. The major inconsistency is that you should use sscanf instead of fscanf to process the line returned by fgets.
But that is not all:
misericordia has 12 characters. As C strings require a NULL terminator, field1 must have at least 13 as size
the format characters should be consistent with the type of the fields
when you read into a char array, the array decays to a pointer: you must not add th &
So the line could become:
sscanf(r, "%d,%[^,],%d,%f", &files[n].id, files[n].field1, &files[n].field2, &files[n].field3)
Other possible improvements:
identifiers starting with _ should be reserved for those that you do not use. Close to an opinion, but here you should better use SortedArray
replace plain magic values for sizes with the sizeof operator where you can. If you later change a size, you will have to change it in one single place in your code (best practice: Don't Repeat Yourself)
control the result of input functions (here [s]scanf to be robust against erroneous input data
eventually control that nothing is left at the end of line
only try to print as many lines as you could read
remove unused variables (a nice compiler should emit warnings)
always limit input of string to the size of the buffer (%12[^,])
The code could become:
#include <stdlib.h>
#include<stdio.h>
#include <string.h>
struct SortedArray {
int id;
char field1[13];
int field2;
float field3;
};
int main() {
FILE *fd;
// int res;
struct SortedArray files[101];
int n;
fd = fopen("records.csv", "r");
if (fd == NULL) {
perror("Error");
exit(1);
}
char r[100];
for (n=0; n<sizeof(files)/sizeof(files[0]); n++) {
if(fgets(r,sizeof(r),fd)==NULL){
break;
}
char dummy[2]; // to control nothing is left on end of line
//puts(r);
if (4 != sscanf(r, "%d,%12[^,],%d,%f%1s", &files[n].id, files[n].field1, &files[n].field2, &files[n].field3, dummy)) {
perror("Incorrect line");
fprintf(stderr, "Line %d : %s\n", n+1, r);
}
}
for(int i=0;i<n;i++){
printf(" INT:%d,CHAR:%s //",files[i].id, files[i].field1);
}
return 0;
}

Storing several string with struct in C

with following code I can store one string only.
Main problem is how to store several. If i want to enter another string after the first one it wont do it.
I didnt write it in code but when I type("KRAJ") it should get out of while loop.
typedef struct{
char Objekat[20+1];
char Mjesto[20+1];
char velicina [20];
int cijena;
char kn[3];
char stanje[20];
}Apartmani;
int main()
{
Apartmani *apartmani=(Apartmani*)malloc(sizeof(Apartmani)*50);
while(scanf("%[^,\n],%[^,],%[^,],%d%[^,],%[^\n]", &apartmani[i].Objekat,&apartmani[i].Mjesto,&apartmani[i].velicina,
&apartmani[i].cijena,&apartmani[i].kn, &apartmani[i].stanje )==6)
{
i++;
}
for(p=0;p<i;p++)
{
printf("%s %s %s %d %s %s",apartmani[p].Objekat,apartmani[p].Mjesto,apartmani[p].velicina,apartmani[p].cijena,
apartmani[p].kn, apartmani[p].stanje);
}
}
For example:
string 1: Apartman, Novalja, 100.00 m2, 750000kn, dobro ocuvano.
string 2: Kuca, Ivanbregovia, 20m2, Imtoski, 21252RH, vrijednost-neprocjenjiva.
You should use fgets() plus sscanf().
You should not cast malloc[Do I cast the result of malloc?][1]. Remember to check the return value of malloc, since it can be failed.
change the line of allocating apartmani to:
Apartmani *apartmani= malloc(sizeof(Apartmani)*50);
if(!apartmani) {return -1;}
Do not use & for the input of string.
Check the value of i because its value is limited to 50.
Your code is missing the declaration of i (should be: int i = 0), and the declaration of p also.
Your while loop can be as below:
int i = 0;
char line[100];
while(i < 50 && fgets(line,sizeof(line),stdin))
{
line[strcspn (line, "\n" )] = '\0'; // trip the enter character at the end of line.
int err = sscanf(line,"%20[^,],%20[^,],%19[^,],%d,%2[^,],%19[^\n]", apartmani[i].Objekat,apartmani[i].Mjesto,apartmani[i].velicina,&apartmani[i].cijena,
apartmani[i].kn, apartmani[i].stanje);
if(err != 6)
break;
i++;
}
If I understand you correctly, you want to store several 'Apartmani' structures.
In this case, you have 2 main possibilites :
Using array of structures (Fastest to write but less efficient)
Use linked-list (More efficient but more complex to use)
Examples
1: Using array of structures
#define MAX_APARTMANI 50
int main(void) {
int i = 0;
/* Create Apartmani array */
Apartmani *apartmani_tab[MAX_APARTMANI];
do {
/* loop by using malloc on a single element */
apartmani_tab[i] = (Apartmani *) malloc(sizeof(Apartmani));
/* While check using scanf */
} while (scanf("%[^,\n],%[^,],%[^,],%d%[^,],%[^\n]", apartmani_tab[i]->Objekat, apartmani_tab[i]->Mjesto, apartmani_tab[i]->velicina,
apartmani_tab[i]->cijena, apartmani_tab[i]->kn, apartmani_tab[i]->stanje) == 6 && ++i < MAX_APARTMANI)
/* good pratice: don't forget to free memory ! */
while (--i > 0) {
free(apartmani_tab[i]);
}
return (0);
}
2: Using linked-list
typedef struct Apartmani {
char Objekat[20+1];
char Mjesto[20+1];
char velicina [20];
int cijena;
char kn[3];
char stanje[20];
struct Apartmani *next;/* add pointer to next item in the list */
} Apartmani_t;
Apartmani_t *new_item(void) {
Apartmani_t *new_element = NULL;
new_element = (Apartmani_t *) malloc(sizeof(Apartmani));
if (!new_element)
return (NULL);
memset(new_element, 0, sizeof(*new_element));
new_element->next = NULL;
return (new_element);
}
int main(void) {
/* Initialize Apartmani list*/
Apartmani *apartmani_list = NULL, *current = NULL;
do {
if (!apartmani_list) { /* if empty list */
apartmani_list = new_item(); /* add first item */
if (!apartmani_list) /* prevent malloc errors */
break;
current = apartmani_list; /* link current pointer to list */
} else {
current->next = new_item();
if (!current->next) /* if malloc fails */
break;
current = current->next; /* update current pointer */
}
} while (scanf("%[^,\n],%[^,],%[^,],%d%[^,],%[^\n]", current->Objekat, current->Mjesto, current->velicina, current->cijena, current->kn, current->stanje) == 6) /* While check using scanf */
/* good pratice: don't forget to free memory ! */
while (apartmani_list) {
current = apartmani_list->next;
free(apartmani_list);
apartmani_list = current;
}
}
NB: I have not tried this code but the final version is probably very close to that.

Error trying to compile program from instructor (symbols expected in define line)

I was given a template from my instructor and then modified the code to frequency histogram of a given text. However i'm getting errors trying to compile the code. I believe the errors indicated by the compiler is at the beginning of the code. The errors have been taken a screenshot of and attached below. Thanks in advance
errors:
test.c:6:25: error: expected declaration specifiers or '...' before '\x20'
#define FIRST_PRINTABLE ' ' // Space character code 32, see Etter 2016 text, pp. 418-420
^
test.c:8:30: note: in expansion of macro 'FIRST_PRINTABLE'
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
^~~~~~~~~~~~~~~
test.c:11:38: note: in expansion of macro 'NUM_PRINTABLE'
void init_array(int histogram[], int NUM_PRINTABLE);
^~~~~~~~~~~~~
code:
/*
* Comp120 - Lab 7: Starter project -- Complete this code
* Character Frequency analysis -- read a text file and display frequency
* analysis
* for all printable characters.
*
* Author: J. Fall
* Date: Feb. 2017
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
// Definition of printable character set
#define FIRST_PRINTABLE ' ' // Space character code 32, see Etter 2016 text, pp. 418-420
#define LAST_PRINTABLE '~'
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
// Function prototypes:
void init_array(int histogram[], int NUM_PRINTABLE);
int sum_array(const int histogram[], int NUM_PRINTABLE);
bool isPrintable(char c);
void compute_frequency(char* filename, int histogram[], int NUM_PRINTABLE);
void write_histogram(int histogram[], int NUM_PRINTABLE);
FILE* openFileRead(char* filename);
int main( int argc, char* argv[] )
{
if (argc < 2) {
printf("Usage: freq inputFile \n");
exit(-1);
}
int histogram[NUM_PRINTABLE]; // Array of counters -- one for each printible character
compute_frequency(argv[1], histogram, NUM_PRINTABLE);
write_histogram(histogram, NUM_PRINTABLE);
printf( "Program complete. \n" );
return 0 ;
}
/*
* Initialize the array of integers of given length to all zeros
*/
void init_array(int histogram[], int NUM_PRINTABLE)
{
int i = 0;
for(i=0;i<NUM_PRINTABLE;i++){
histogram[i]=0;
} // TODO: write function to assign 0 too every array element.
}
/*
* Return the sum of all items in the given array
*/
int sum_array(const int array[], int NUM_PRINTABLE)
{
int i = 0;
int sum = 0;
for(i=0;i<NUM_PRINTABLE;i++){
sum = sum + histogram[i];
}
return sum; // TODO: write function to add up every element in the given array.
}
/*
* Return true iff the character is PRINTABLE
*/
bool isPrintable(char c)
{
if (c >= FIRST_PRINTABLE && <= LAST_PRINTABLE){
return true;
}
else
return false; // TODO: write function to return true iff c is a printable character
}
/*
* Compute the frequency histogram for all PRINTABLE characters in the given file
*/
void compute_frequency(char* filename, int histogram[], int NUM_PRINTABLE)
{
FILE* inputFile = openFileRead(filename);
init_array(histogram, NUM_PRINTABLE);
char c = getc(inputFile); // priming read -- read first character from file
while (c != EOF) {
if(isPrintable(c)){
int bin = c - FIRST_PRINTABLE;// TODO: write algorithm to count the number of times character c occurs.
histogram[bin]++;
}
// HINT: since array indexes start at zero, map the ASCII code for each
// printable charcter onto an index by subtracting FIRST_PRINTABLE
// After processing previous character, read next character from file to re-prime the loop
c = getc(inputFile);
}
}
/*
* Write the frequency histogram out to the given file
*/
void write_histogram(int histogram[], int NUM_PRINTABLE)
{
FILE* outputFile = stdout; // Simplifictaion: output is written to the console instead of to an output file.
int total_count = sum_array(histogram, NUM_PRINTABLE);
fprintf(outputFile, "Frequency Analysis Results. Input contained %d printable characters. \n", total_count);
fprintf(outputFile, "Char | Frequency \n");
fprintf(outputFile, "____ | _________ \n");
int i;
for (i=0; i<NUM_PRINTABLE; i++) {
char ch = (char) (i + FIRST_PRINTABLE);
double freq;
if (histogram[i] > 0) {
double freq = histogram[i]/(double)total_count * 100;
fprintf(outputFile, "%3c | %9.3f%% \n", ch, freq);
}
else
fprintf(outputFile, "%3c | %9d%% \n", ch, 0);
}
}
/*
* Attempt to open the file for read access.
* Peforms error check and exits if file is not accessible
*/
FILE* openFileRead(char* filename)
{
FILE* inFile = fopen(filename, "r" );
if( inFile == NULL) {
printf( "Error opening input file %s, program terminating!\n", filename);
exit(-1);
}
return inFile;
}
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
// Function prototypes:
void init_array(int histogram[], int NUM_PRINTABLE);
when those both lines are expanded by preprocessor it translates as (you can use gcc -E on the source to see it in action):
void init_array(int histogram[], int (int) (' '-'~'+1));
which is obviously a syntax error. Just use NUM_PRINTABLE as a constant in your functions, not as a parameter.
Aside, the macro is functionnaly wrong, it should be
#define NUM_PRINTABLE (LAST_PRINTABLE-FIRST_PRINTABLE+1)
or the value would be negative. (and you don't need to cast to int since character literals are already int)

C: dictionary implemented from a txt file with linked lists and a hash table can't find words that exist in the file

Please excuse any mistakes made in this post, as this is my first post here, but do point them out.
I'm still quite new to C, but I'm trying to implement a dictionary from a txt file that has one word in each line through a hash table but whenever I try to search for a word it's never found, though it exists in the file.
Can you please help me figure out what I'm doing wrong?
Both following files:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define tab_size 29 /* table size */
#define word_len 24 /* max length of word */
.h file:
/* structure to be used for each word */
typedef struct list {
char *word;
struct list *next;
} WORD;
.c file:
/* create table */
WORD **create_tab(int size) {
int i = 0;
WORD **hash_tab;
hash_tab = malloc(sizeof(*hash_tab) * size); /*allocate memory */
for (; i<size; i++) /* initialize elements */
hash_tab[i]=NULL;
return hash_tab;
}
/* search for word in table; returns 1 if found, 0 otherwise */
int search(WORD **tb, char *w) {
WORD *htmp/*, *hprv*/;
unsigned long hash = (hash_f(w) % tab_size); /* hash_f is Dan Bernstein's hash function; modulo by tab_size to make sure it fits */
for (htmp = tb[hash]; (htmp != NULL) && (strcmp(htmp->word, w) != 0); / htmp = htmp->next) /* follow chained array of respective cell until word is found or until the end */
{
;
}
if (htmp == NULL) return 0;
return 1;
}
/* insert new WORD with word w at the beginning of the chained array of the respective cell */
void insert(WORD **ht, char *w) {
WORD *htmp;
unsigned long hash = (hash_f(w) % tab_size); /* hash_f is Dan Bernstein's hash function; modulo by tab_size to make sure it fits */
htmp = malloc( sizeof(*htmp) ); /* allocate memory for new WORD */
htmp->word = calloc(word_len+1,sizeof(char)); /* allocate memory for new word with max word length plus one character to make sure there's no buffer overflow in the next line*/
strncpy(htmp->word, w, word_len); /* copy w to word */
htmp->next = ht[hash]; /* new WORD now points to content of the respective table cell */
ht[hash] = htmp; /* table cell now points to new WORD */
}
/* receive empty table and create the whole dictionary in memory word by word */
WORD **make(WORD **dic;) {
char w[word_len];
FILE *dictionary;
dictionary = fopen("dictionary.txt","r");
while ((fgets( w, word_len, dictionary )) != NULL)
insert(dic, w);
fclose(dictionary);
return dic;
}
int main() {
WORD **dic;
char w[word_len];
dic = create_tab(tab_size); /* create the table */
dic = make(dic); /* insert all entrys of dictionary in table */
printf("Insert a word in lowercase: \n");
if ((scanf("%s",w)) == 0) return 0; /* if I didn't somehow verify the scanf it would return an error */
else if (search(dic, w) == 1) printf("The word %s exists in the dictionary \n",w);
else printf("The word %s does not exist in the dictionary",w);
return 0;
}
I've tried several aproaches, some based on the following links, but this always happens. This version is based on the last one.
Quick Way to Implement Dictionary in C
http://www.sparknotes.com/cs/searching/hashtables/section3.rhtml
http://www.seg.rmit.edu.au/code/zwh-ipl/hashtable.c
Problem 1
This is not right.
hash_tab = malloc(sizeof(WORD) * size); /*allocate memory */
It should be
hash_tab = malloc(sizeof(WORD*) * size); /*allocate memory */
To avoid errors like this, you should get in the habit of using
hash_tab = malloc(sizeof(*hash_tab) * size); /*allocate memory */
Problem 2
for (; i<tab_size; i++) /* initialize elements */
hash_tab[i]=NULL;
should be
for (; i<size; i++) /* initialize elements */
hash_tab[i]=NULL;
This is a potential problem. It may not be a real problem in your case if size and tab_size happens to be equal. But as a matter of style, you should use the second block of code.

Resources