I am trying to get words inserted into a hash table. When I run the code, it's supposed to give me a list of the frequency of each word, but it just gives me nothing.
I'm sure it's either to do with my print function, or my insert function, probably more my insert function. I know it's isn't mylib.h, but I'm just not sure where I'm going wrong.
It doesn't insert anything into my table or print it. I'm not really sure what's going on.
hashtable.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "htable.h"
struct htablerec {
char **key;
int *frequencies;
int num_keys;
int capacity;
};
void *emalloc(size_t s) {
void *result = malloc(s);
if (NULL == result) {
fprintf(stderr, "Memory allocation failed!\n");
exit(EXIT_FAILURE);
}
return result;
}
htable htable_new(int capacity) {
int i;
htable h = emalloc(sizeof * h);
h->capacity = capacity;
h->num_keys = 0;
h->frequencies = emalloc(h->capacity * sizeof h->frequencies[0]);
h->key = emalloc(h->capacity * sizeof h->key[0]);
for (i = 0; i < h->capacity; i++) {
h->frequencies[i] = 0;
h->key[i] = NULL;
}
return h;
}
void htable_free(htable h) {
free(h->frequencies);
free(h->key);
free(h);
}
static unsigned int htable_word_to_int(char *word) {
unsigned int result = 0;
while (*word != '\0') {
result = (*word++ + 31 * result);
}
return result;
}
int htable_insert(htable h, char *str) {
int i;
/*convert string to integer*/
unsigned int index = htable_word_to_int(str);
/*calculate index to insert into hash table*/
int remainder = index%h->capacity;
/*once calculated position in the hash table, 3 possibilities occur*/
/*no string in this positon, copy string to that position, increment number of keys, return 1*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
return 1;
}
/*the exact same string is at the position, increment frequency at that position, return frequency*/
if (strcmp(str, h->key[remainder]) == 0) {
h->frequencies[remainder]++;
return h->frequencies[remainder];
}/*a string is at that position, but it isnt the rightone, keep moving along the array
until you find either an open space or the string you are looking for*/
if (h->key[remainder] != NULL && strcmp(str, h->key[remainder]) != 0) {
/*you may need to wrap back around to the beginning of the table, so each time you add
to the position you should also mod by the table capacity.*/
for (i = 0; i <= h->capacity; i++) {
if (h->key[remainder] != NULL && h->capacity == i) {
i = 0;
}
/*no string in this positon, copy string to that position, increment number of keys*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
}
/*if you find the string you were looking for, increment the frequecny at the position
and return the frequency*/
if (strcmp(str, h->key[remainder]) == 0) {
h->frequencies[remainder]++;
return h->frequencies[remainder];
}
}
}
/*if you have kept looking for an open space but there isnt one, the hash table must be full so return 0*/
return 0;
}
void htable_print(htable h, FILE *stream) {
int i;
for(i = 0; i < h->capacity; i++) {
if(h->key[i] != NULL) {
fprintf(stream, "%d%s\n", h->frequencies[i], h->key[i]);
}
}
}
htable.h:
#ifndef HTABLE_H_
#define HTABLE_H_
#include <stdio.h>
typedef struct htablerec *htable;
extern void htable_free(htable h);
extern int htable_insert(htable h, char *str);
extern htable htable_new(int capacity);
extern void htable_print(htable h, FILE *stream);
extern int htable_search(htable h, char *str);
#endif
mylib.c:
#include <stdio.h>
#include <stdlib.h>
#include "mylib.h"
#include "htable.h"
int main(void) {
htable h = htable_new(18143);
char word[256];
while (getword(word, sizeof word, stdin) !=EOF) {
htable_insert(h, word);
}
htable_print(h, stdout);
htable_free(h);
return EXIT_SUCCESS;
}
mylib.h:
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
int getword(char *s, int limit, FILE *stream) {
int c;
char *w = s;
assert(limit > 0 && s != NULL && stream != NULL);
/*skip to the start fo the word */
while (!isalnum(c = getc(stream)) && EOF != c)
;
if(EOF == c) {
return EOF;
} else if (--limit > 0) { /*reduce limit by 1 to allow for the \0 */
*w++ = tolower(c);
}
while(--limit > 0) {
if(isalnum(c = getc(stream))) {
*w++ = tolower(c);
} else if ('\'' == c) {
limit++;
} else {
break;
}
}
*w = '\0';
return w - s;
}
You never set h->key[remainder] to anything in htable_insert, so h->key[i] is still NULL for all i when you call htable_print.
/*no string in this positon, copy string to that position, increment number of keys, return 1*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
return 1;
}
...
/*no string in this positon, copy string to that position, increment number of keys*/
if (h->key[remainder] == NULL) {
h->frequencies[remainder] = 1;
h->num_keys++;
}
You never copy the string. Try something like...
char *key = emalloc(strlen(str) + 1);
strcpy(str, key);
h->key[remainder] = key;
Related
I have a C program that will count the amount of words in each file given on the command line. I now need to make it count how many times each word appears. Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#define MAXWORDS 10000
#define MAXSTRING 100
/* structure holding word frequency information */
typedef struct _word {
char s[MAXSTRING]; /* the word */
int count; /* number of times word occurs */
} word;
static int *total_amount_of_words;
int countWords(FILE *file){
int count = 0;
char character;
while((character = fgetc(file)) != EOF){
if(character == '\n' || character == ' ')
count++;
}
return count;
}
void insert_word (word *words, int *n, char *s) {
int i;
/* linear search for the word */
for (i=0; i<*n; i++) if (strcmp (s, words[i].s) == 0) {
/* found it? increment and return. */
words[i].count++;
return;
}
/* error conditions... */
if (strlen (s) >= MAXSTRING) {
fprintf (stderr, "word too long!\n");
exit (1);
}
if (*n >= MAXWORDS) {
fprintf (stderr, "too many words!\n");
exit (1);
}
/* copy the word into the structure at the first available slot,
* i.e., *n
*/
strcpy (words[*n].s, s);
/* this word has occured once up to now, so count = 1 */
words[*n].count = 1;
/* one more word */
(*n)++;
}
int wordcmp (word *a, word *b) {
if (a->count < b->count) return +1;
if (a->count > b->count) return -1;
return 0;
}
/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */
int is_alpha (char c) {
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') return 1;
return 0;
}
void remove_char (char *s, int i) {
while (s[i]) {
i++;
s[i-1] = s[i];
}
s[i] = 0;
}
void remove_non_alpha (char *s) {
int i;
for (i=0; s[i]; i++) if (!is_alpha (s[i])) remove_char (s, i);
}
void make_lowercase (char *s) {
int i;
for (i=0; s[i]; i++) s[i] = tolower (s[i]);
}
int main(int argc, char *argv[])
{
word words[MAXWORDS];
char s[1000];
int i, j, n, m;
int pid;
FILE *current_file;
n = 0;
total_amount_of_words = mmap(NULL, sizeof *total_amount_of_words, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
//Create processes for each file on the command line
for(i = 1; i < argc; i++)
{
pid = fork();
if(pid == -1) //Error
{
exit(-1);
}
else if(pid == 0) //Children
{
current_file = fopen(argv[i], "r");
int current_word_amount = countWords(current_file);
*total_amount_of_words += current_word_amount;
printf("Child Process for File %s: number of words is: %i\n", argv[i], current_word_amount);
rewind(current_file);
/* read all the words in the file... */
while (!feof (current_file)) {
fscanf (current_file, "%s", s);
/* only insert the word if it's not punctuation */
if (is_alpha (s[0])) {
/* get rid of non-letters */
remove_non_alpha (s);
/* make all letters lowercase */
make_lowercase (s);
/* put this word in the list */
insert_word (words, &n, s);
}
}
fclose(current_file);
exit(0);
}
else //Parent
{
wait(NULL);
}
}
/* sort the list of words by descending frequency */
qsort((void *) words, n, sizeof (word),
(int (*) (const void *, const void *)) wordcmp);
/* print the words with their frequencies */
for (j=0; j<*total_amount_of_words; j++){
printf ("Word: %s\t", words[j].s);
printf ("Frequency: %%d\n", words[j].count);
}
printf("All %i files have been counted!\n Total Amount of Words: %d\n", (argc-1), *total_amount_of_words);
munmap(total_amount_of_words, sizeof *total_amount_of_words);
}
In the for loop at the bottom I cannot get it to print out each word and how many times it appeared. I cannot get anything to output, the struct need gets initialized. How do I share the struct with all processes?
I'm new here, so this is my first post. I've been struggling for 2 weeks to solve this problem. I'm trying to open a directory, capture and store the names of the files found, sort them in ascending order, and print the results. My issue is either qsort causes my program to crash entirely, or qsort doesn't sort the array at all because the files are alphanumeric. I even tried looping through a stored filename to output each character, just to see if I could eventually try comparing the characters between two array locations for sorting. But I noticed that it can't seem to see or recognize the numbers in the alphanumeric filename (for example: "f1.jpg" will only print "f", a blank, then "j", and that's it. I should note that I cannot change the file names because I don't know in advance the names or total files. I'm trying to make this to be dynamic. The following is the main code that I'm having problems with since it crashes at the 'qsort' keyword:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <conio.h>
#include <ctype.h>
#include <time.h>
#include <dirent.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int compare(const void *a, const void *b);
void readInFilenames();
int main
{
readInFilenames();
system("pause");
}
int compare(const void *a, const void *b)
{
return strcmp(*(char **)a, *(char **)b);
}
void readInFilenames()
{
char cwd[1024];
DIR *dir = NULL;
struct dirent *pent = NULL;
struct stat info;
char file_path[50] = "files/";
int total_files = 0;
int file_size;
// Change directory to file location
chdir(file_path);
if((getcwd(cwd, sizeof(cwd))) != NULL)
{
printf("Current Directory: %s\n", cwd);
}
// Open directory and count the total number of files found
dir = opendir(cwd);
if(dir != NULL)
{
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
total_files++;
file_size = sizeof(pent->d_name);
}
}
}
}
printf("# of files found: %d\n", total_files);
rewinddir(dir); //reset pointer back to beginning of file directory
// Create character array to store file names;
char *filenames_arr[total_files][file_size];
int size = sizeof(filenames_arr)/sizeof(filenames_arr[total_files]);
total_files = 0; //reset file counter back to 0;
// Read and store file names in the character array
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
strcpy(filenames_arr[total_files], pent->d_name);
//printf("%s\n", filenames_arr[i]);
total_files++;
}
}
}
}
closedir(dir);
// Print original array contents
printf("Original List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
// Sort array in ascending order
qsort(filenames_arr, total_files, size, compare);
//qsort(filenames_arr, total_files, sizeof(filenames_arr[0]), (char (*)(const void*, const void*))strcmp);
// Print organized array contents
printf("Sorted List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
printf("\nFinished!\n");
}
}
This portion of code is when I was trying to print each individual characters. This was originally located where the final array printing takes place in the previous code:
int i = 0;
int j = 0;
while(i < total_files)
{
printf("File Name: %s\n", filenames_arr[i]);
printf("String Length: %d\n", strlen(filenames_arr[i]));
while(filenames_arr[i] != '\0')
{
printf("Checking filenames_arr[%d][%d]\n", i, j);
if(isalpha((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isalpha\n");
printf("Found: %c\n", filenames_arr[i][j]);
}
else if(isdigit((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isdigit\n");
printf("Found: %d\n", filenames_arr[i][j]);
}
j++;
}
printf("-------------------------------------------\n");
i++;
j = 0;
}
How do I sort a 2D array of alphanumeric character strings using qsort? What is it about qsort, or even my array setup that's causing my program to crash? Also, how does qsort work? I've tried searching forums and online course notes to find out whether or not qsort only sorts by looking just at the first character, all characters, or if it has problems with numbers. Thank you in advance!
UPDATE:
I made the following edits to my code. Its working much better, in that qsort no longer crashes program. But, qsort still isn't sorting. Here are the updates I made, followed by a screenshot of the results:
typedef struct{
char *filename;
}filedata;
int compare(const void *a, const void *b);
void readInFilenames();
int main(void){
readInFilenames();
system("pause");
}
int compare (const void *a, const void *b ) {
filedata *ia = (filedata *)a;
filedata *ib = (filedata *)b;
return strcmp(ia->filename, ib->filename);
}
readInFilenames(){
.
.
.
printf("# of files found: %d\n", total_files);
rewinddir(dir);
filedata fn_data[total_files];
total_files = 0;
printf("Original Array: \n");
while((pent = readdir(dir)) != NULL)
{
.
.
.
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
fn_data[total_files].filename = malloc(file_size + 1);
strcpy(fn_data[total_files].filename, pent->d_name);
printf("%s\n", fn_data[total_files].filename);
total_files++;
}
}
closedir(dir);
printf("\n");
qsort(fn_data, total_files, sizeof(filedata), compare);
printf("Sorted Array:\n");
for(int i = 0; i < total_files; i++)
printf("%s\n", fn_data[i].filename);
printf("Finished!\n");
}
Click here to see sorting results
The list should print: f0.dat, f1.dat, f2.dat, f3.dat,...,f20.dat. But instead it prints: f0.dat, f1.dat, f10.dat, f11.dat,...,f9.dat.
OP has fixed code to cope with "qsort dynamic 2d char array with filenames" by enabling warnings and using #Snohdo advice.
Yet code is still doing a compare with strcmp() which only treat digits as characters and not numerically to achieve f1.dat, f2.dat, f3.dat,...,f20.dat order.
Following is a compare functions that looks for digits to invoke an alternate compare for numeric sub-strings. Variations on this compare can be made by OP to suit detailed coding goals.
int AdamsOrder(const char *s1, const char *s2) {
// Compare as `unsigned char` as that is `strcmp()` behavior. C11 7.24.1 3
const unsigned char *us1 = (const unsigned char *) s1;
const unsigned char *us2 = (const unsigned char *) s2;
while (*us1 && *us2) {
if (isdigit(*us1) && isdigit(*us2)) {
char *end; // dummy
unsigned long long l1 = strtoull(us1, &end, 10); // Parse for a number
unsigned long long l2 = strtoull(us2, &end, 10);
if (l1 > l2) return 1;
if (l1 < l2) return -1;
// Continue on treating as text. OP needs to decide how to handle ties: "0001" vs "1"
}
if (*us1 > *us2) return 1;
if (*us1 < *us2) return -1;
us1++;
us2++;
}
// At this point, at least one string ended (i.e. points to '\0').
// The return statement below will behave as follows:
// If a string ended, *us1/2 will be 0. Let an unfinished one be X > 0.
// First string ended : ( 0 > X ) - ( 0 < X ) = false - true = 0 - 1 = -1
// Second string ended: ( X > 0 ) - ( X < 0 ) = true - false = 1 - 0 = 1
// Both strings ended : ( 0 > 0 ) - ( 0 < 0 ) = false - false = 0 - 0 = 0
return (*us1 > *us2) - (*us1 < *us2);
}
I am trying to read a file. I want to read each line from the file and check if there are any spelling error in that line.
For that I have added condition that data from file will store in buffer until it gets a new line characher '\n'. And after getting this line I want to empty the buffer and re insert the values in that.
Code I am using for the same is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define W_COUNT 23800
#define MAX_LEN 100
char *dict[W_COUNT];
char buffer[MAX_LEN];
int num_words; //No of Words
char *statement[W_COUNT];
char buffer1[MAX_LEN];
void read_dictionary();
void file_read(char *);
void spell_check();
int word_search(char*);
int main(int argc, char*argv[]){
int i;
if(argc < 2){
printf("Expected Filename.\n");
exit(0);
}
read_dictionary();
file_read(argv[1]);
// spell_check();
}
void read_dictionary(){
FILE *fd;
int i = 0;
fd = fopen("dictionary", "r");
while ( fscanf(fd,"%s",buffer) != EOF)
dict[i++] = strdup(buffer);
num_words = i;
fclose(fd);
}
void file_read(char *filename){
FILE *fd;
int i = 0;
char c;
fd = fopen(filename,"r");
/*while ( fscanf(fd,"%s",buffer1) != EOF)
{
word[i++] = strdup(buffer1);
printf("File : %s\n", buffer1);
}*/
while ( ( c = fgetc(fd)) != EOF )
{
buffer1[i++] = tolower(c);
if ( c == '\n')
{
//printf("New Line\n");
spell_check();
buffer1[i] = 0;
}
//buffer1[i] = 0;
}
printf("Statement : %s\n", buffer1);
fclose(fd);
}
void spell_check(){
char *str;
str = strtok(buffer1," .?,!-");
while( str != NULL){
if(!word_search(str))
printf("%s Not found.\n",str);
str = strtok(0," .?,!-");
}
}
int word_search(char *word){
int high, low, mid;
high = num_words - 1;
low = 0;
int found = 0;
while (found == 0){
mid = (low + high) / 2;
if(strcmp(word, dict[mid]) == 0)
return 1;
else if(strcmp(word,dict[mid]) < 0)
high = mid - 1;
else
low = mid + 1;
if ( low > high)
return 0;
}
}
Any suggestions will be appreciated.
Thank you in advance.
while ( ( c = fgetc(fd)) != EOF )
{
buffer1[i++] = tolower(c);
if ( c == '\n')
{
//printf("New Line\n");
spell_check();
i = 0;
buffer1[i] = 0;
}
//buffer1[i] = 0;
}
For each line reading you have to assign the 0 to the i. After that you have to assign the null to the 0th position in the buffer.
You can try the above code for loop it will work.
I'm making a program that reads a given dictionary into a trie tree, and then
performs auto complete on a string inputted by the user. When I use the dictionary file that I am required to use (~100,000 words) I get a segmentation fault. I can't seem to figure out what is causing the segmentation fault. Any help would be appreciated.
typedef struct trieTree {
int data;
struct trieTree *array[26];
}trieTree;
insert function:
trieTree* insert_tree(trieTree *t, char *s, int val)
{
int i;
trieTree *p;
if (strlen(s) == 0)
return t;
if (t == NULL)
t = new_tree(t);
p = t;
for (i = 0; i < strlen(s); ++i) {
if (p->array[s[i] - 'a'] == NULL)
p->array[s[i] - 'a'] = malloc(sizeof (trieTree));
p = p->array[s[i] - 'a'];
}
p->data = val;
return t;
}
Filling the tree:
trieTree* load_tree(trieTree *t, char *file)
{
char s[MAX];
FILE *f = fopen(file, "r");
if (f == NULL)
printf("Error! File not found.");
else
while (feof(f) == 0) {
fscanf(f, "%s", s);
t = insert_tree(t, s, 1);
}
return t;
}
Main function
int main()
{
trieTree t;
new_tree(&t);
load_tree(&t, "dict.txt");
char word[100];
printf("Enter word: ");
scanf("%s", word);
char dat[100] = "";
search_tree(&t, word, dat);
return 0;
}
trieTree* new_tree(trieTree *t)
{
int i;
t = malloc(sizeof (trieTree));
for (i = 0; i < 24; ++i)
t->array[i] = 0;
return t;
}
Your function new_tree() returns a pointer to allocated memory but the returned value is ignored. That's a memory leak, and your code continues to use an uninitialized variable. That's a problem!
int main()
{
trieTree t;
new_tree(&t);
load_tree(&t, "dict.txt");
…
trieTree* new_tree(trieTree *t)
{
int i;
t = malloc(sizeof(trieTree));
for (i = 0; i < 24; ++i)
t->array[i] = 0;
return t;
}
The 24 in the function should be 26, of course. But the function allocates memory and assigns it to the local pointer (original set to point to t in main(), but the malloc() zaps that value). That pointer is returned, but the return is ignored. The variable t in main() is still uninitialized, but it is passed to the load_tree() function.
Frankly, you need:
int main()
{
trieTree *tp = new_tree();
load_tree(&t, "dict.txt");
…
trieTree* new_tree(void)
{
int i;
trieTree *t = malloc(sizeof(trieTree));
if (t == 0)
{
fprintf(stderr, "memory allocation failure\n");
exit(EXIT_FAILURE);
}
for (i = 0; i < 26; ++i)
t->array[i] = 0;
return t;
}
Note that errors should be reported on the standard error channel; that is what it's for. And that every memory allocation should be checked, because if you don't check, it will fail and your program will crash.
There are probably a lot of other problems; I've not investigated them all. This should get you further before crashing.
This seems to work for me, though admittedly I only tested it on a 'dictionary' of 257 words.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAX = 1024 };
typedef struct trieTree
{
int data;
struct trieTree *array[26];
} trieTree;
static trieTree *new_tree(void)
{
int i;
trieTree *t = malloc(sizeof(trieTree));
if (t == 0)
{
fprintf(stderr, "malloc for %zu bytes failed\n", sizeof(trieTree));
exit(EXIT_FAILURE);
}
t->data = 0;
for (i = 0; i < 26; ++i)
t->array[i] = 0;
return t;
}
static trieTree *insert_tree(trieTree *t, char *s, int val)
{
int i;
trieTree *p;
if (strlen(s) == 0)
return t;
if (t == NULL)
t = new_tree();
p = t;
int len = strlen(s);
for (i = 0; i < len; ++i)
{
if (p->array[s[i] - 'a'] == NULL)
p->array[s[i] - 'a'] = new_tree();
p = p->array[s[i] - 'a'];
}
p->data = val;
return t;
}
static trieTree *load_tree(trieTree *t, char *file)
{
char s[MAX];
FILE *f = fopen(file, "r");
if (f == NULL)
{
fprintf(stderr, "Error! File not found.");
exit(EXIT_FAILURE);
}
else
{
while (fscanf(f, "%s", s) == 1)
t = insert_tree(t, s, 1);
fclose(f);
}
return t;
}
static void print_trie(trieTree *t, char *pad)
{
int len = strlen(pad);
char space[len + 3];
memset(space, ' ', len + 2);
space[len + 2] = '\0';
for (int i = 0; i < 26; i++)
{
if (t->array[i] != 0)
{
printf("%s%c\n", pad, i + 'a');
print_trie(t->array[i], space);
}
}
}
static void free_trie(trieTree *t)
{
if (t != 0)
{
for (int i = 0; i < 26; i++)
free_trie(t->array[i]);
free(t);
}
}
int main(void)
{
trieTree *tp = new_tree();
if (tp != 0)
{
tp = load_tree(tp, "dict.txt");
print_trie(tp, "");
free_trie(tp);
}
return 0;
}
I believe it is leak free, too.
Note that this code will crash and burn if any of the input words contains any upper-case letters, or digits, or punctuation. It only handles lower-case and white space; anything else is an unchecked disaster waiting to devastate your program. That's because I've not done any substantive work in the insert_tree() function. You need to worry about 'invalid' characters in that function, probably by case-converting upper-case letters to lower-case and ignoring anything that's not a letter.
I have a program that reads the words of two files (the first a wordlist, and the second an ebook from the Gutenberg project ) into two char *arrays.
I am trying to add all the unique words from the second char *array that don't appear in
the first char *array into a third char *array then print them.
This program adds the correct words, but is adding them more than once.
The error occurs in findOdds().
Note when I use a non-binary search method this program works correctly, but takes a long time.
What is the problem with my program? I apologize for my English.
#include <stdio.h>
#include <stdlib.h> /* for malloc() */
#include <ctype.h>
#include <string.h>
#define MAXCHAR 24
#define MAXLINES 150000
int add2array(FILE *fp, char *lineptr[]);
int findOdds(char *lineptr[], char *lineptr1[], int nlines, int nlines1);
int binsearch1(char *val, char *lineptr[], int nlines);
char *lineptr2[MAXLINES]; /* The unique words not in the word list */
int main(int argc, char *argv[])
{
FILE *my_stream, *my_stream1;
char *lineptr[MAXLINES], *lineptr1[MAXLINES];
int i, nlines, nlines1, nlines2;
/* Load the wordlist. */
my_stream = fopen("words.txt","r");
if(my_stream == NULL) {
printf("error: Couldn't open file\n");
return 2;
} else {
nlines = add2array(my_stream, lineptr);
fclose(my_stream);
}
if(nlines==-1) {
printf("error: Epic Failure to copy words to char *lineptr[]\n");
return -1;
}
/* Load the ebook. */
my_stream1 = fopen("horsemanship.txt","r");
if(my_stream1 == NULL) {
printf("error: Couldn't open file\n");
return 2;
} else {
nlines1 = add2array(my_stream1, lineptr1);
fclose(my_stream1);
}
if(nlines1==-1) {
printf("error: Epic Failure to copy words to char *lineptr[]\n");
return -1;
}
/* Find and print the unique words from the ebook not in the wordlist */
nlines2 = findOdds(lineptr, lineptr1, nlines, nlines1);
for(i=0; i<nlines2; i++)
printf("%s\n",lineptr2[i]);
return 0;
}
/* add2array: read the words from the file into char *lineptr[] */
int add2array(FILE *fp, char *lineptr[])
{
int nlines=0, c=0, pos=0;
char temp[MAXCHAR];
char *p;
while((c = getc(fp)) != EOF) {
if(isalpha(c))
temp[pos++] = tolower(c);
else if(!isalpha(c)) {
temp[pos] = '\0';
pos = 0;
if(isalpha(temp[0])){
if((p = malloc(sizeof(temp)))==NULL)
return -1;
strcpy(p, temp);
lineptr[nlines++] = p;
}
}
}
return nlines;
}
/* Add the unique words from lineptr1 not in lineptr to lineptr2 */
int findOdds(char *lineptr[], char *lineptr1[], int nlines, int nlines1)
{
char *p;
char temp[MAXCHAR];
int i, nlines2=0;
for(i=0; i<nlines1; i++) {
if(binsearch1(lineptr1[i], lineptr, nlines)==-1) {
if(binsearch1(lineptr1[i], lineptr2, nlines2)==-1) {
if((p = malloc(sizeof(temp)))==NULL)
return -1;
strcpy(p, lineptr1[i]);
lineptr2[nlines2++] = p;
}
}
}
return nlines2;
}
int binsearch1(char *val, char *lineptr[], int nlines)
{
int pos;
int start = 0;
int end = nlines-1;
int cond = 0;
while(start <= end){
pos=(start + end)/2;
if((cond = strcmp(lineptr[pos],val)) == 0)
return pos;
else if(cond < 0)
start = pos+1;
else
end = pos-1;
}
return -1;
}
Arrays must be sorted if you want to use binary search, as stated above by n.m.
in main() ...
shellsort1(lineptr1, nlines1);
/* Find and print the unique words from the ebook not in the wordlist */
nlines2 = findOdds(lineptr, lineptr1, nlines, nlines1);
...
int shellsort1(char *v[], int n)
{
int gap, i, j;
char temp[MAXCHAR];
char *p;
for(gap=n/2; gap>0; gap/=2)
for(i=gap; i<n; i++)
for(j=i-gap; j>=0 && strcmp(v[j],v[j+gap])>0; j-=gap) {
if((p = malloc(sizeof(temp)))==NULL)
return -1;
p = v[j];
v[j] = v[j+gap];
v[j+gap] = p;
}
return 0;
}