Working on a binary search. The code below should explain what I'm trying to do. The user inputs a word and then a binary search is implemented to search a wordlist. Problem is the binary search. It's running but it's not finding the word in the wordlist even though I know its there. I know the code could be better but it should work. Anyone shed any light?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char dictionary[400000][45];
int main(void)
{
FILE infile;
int i=0;
int num;
int index;
char buffer[45];
char userword[45];
fp1 = fopen("C:/Users/Aaron/ProgrammingAssignment/dictionary.txt","rb");
if (fp1 == NULL)
{
printf("The dictionary file did not open\n");
exit(0);
}
else
{
printf("Dictionary file is open\n");
}
while(fgets(buffer,45, fp1)!=NULL)
{
strcpy(wordlist[i],buffer);
//printf("Line %d: %s",i,wordlist[i]);
i++;
}
printf("Your wordlist is now in the dictionary array");
do
{
//fscanf(fp2,"%s", userword);
printf("Enter a word to be spell checked: ");
fgets(userword, 43, stdin);
//and do a binary search
index = BinarySearch(userword,0,i);
if(index > -1)
printf("%s was found in the wordlist", userword);
else
printf("%s was not found in the dictionary", wordcheck);
}
while(wordlist != NULL);
if(index>-1) //The word was found
{
printf("That is correctly spelled\n");
}
else
{
printf("That word is spelt wrong\n");
}
return 0;
}
int BinarySearch(const char userword[],int left,int right)
{ int high = 400000;
int low = 0;
int target;
int count = 0;
while (high >= low)
{ target = low + ((high - low) / 2);
// show tries for demonstration only
printf("%d, ",target);
if (strcmp(userword, wordlist[target]) < 0)
high = target -1;
else if (strcmp(userword, wordlist[target]) > 0)
low = target + 1;
else
return target;
}
return -1;
}
Your binary search function is ignoring the values left and right that are passed in.
It shouldn't.
It should probably start:
int BinarySearch(const char userword[], int left, int right)
{
int high = right;
int low = left;
You should close the dictionary after you finish reading it.
You need to consider whether right is the index of the last valid element or 'one after the index of the last element'. This might mean you need to pass i - 1 in the call to the function.
You should consider calling strcmp() once and capturing its return value; it is relatively expensive:
int rc = strcmp(userword, wordlist[target]);
if (rc == 0)
return target;
else if (rc < 0)
high = target - 1;
else
low = target - 1;
Related
I want to insert the data in ascending order based on the partNumber.
When the function is called in main, then the node is successfully added at the first position. But on calling the function second time, there is some problem in insertion and I am unable to figure it out. When I enter the values(in second call), I get the error
Process exited after 8.277 seconds with return value 3221225477
typedef struct part {
int partNumber;
char partName[200];
int partQuantity;
struct part *nextPart;
} Part;
Part *inventory = NULL;
void insertPart();
int
main(int argc, char *argv[])
{
insertPart();
insertPart();
insertPart();
insertPart();
return 0;
}
void
insertPart()
{
Part *tempPart,
*traversePart,
*swapPart;
int counter = 0;
traversePart = inventory;
tempPart = (Part *) malloc(sizeof(Part *));
printf("Enter the Part Number\n");
scanf("%d", &(tempPart->partNumber));
getchar();
printf("Enter the Part Name\n");
fgets(tempPart->partName, 200, stdin);
printf("Enter the Part Quantity\n");
scanf("%d", &(tempPart->partQuantity));
getchar();
if (inventory == NULL) {
inventory = tempPart;
printf("Part added at the first position.\n");
}
else {
while (traversePart->nextPart->partNumber < tempPart->partNumber) {
counter++;
traversePart = traversePart->nextPart;
if (traversePart->nextPart == NULL) {
break;
}
}
if (counter == 0) {
swapPart = inventory;
inventory = tempPart;
tempPart->nextPart = swapPart;
}
else if (traversePart->nextPart == NULL) {
traversePart->nextPart = tempPart;
}
else {
swapPart = traversePart->nextPart;
traversePart->nextPart = tempPart;
tempPart->nextPart = swapPart;
}
}
printf("Element added at position : %d", counter);
}
The problem is traversePart->nextPart->partNumber traversePart->nextPart is not referring to anything or it is not holding any of the address. When you insert first value if condition is true
if (inventory == NULL) {
inventory = tempPart;
printf("Part added at the first position.\n");
}
inventory now holding the address of tempPart but while assigning values of tempPart you never assign an address to its nextvalue and it's not there because you only inserted the first value. For the second position
else{
while(traversePart->nextPart!=NULL)
{
traversePart=traversePart->nextPart;
}
if(traversePart->partNumber < tempPart->partNumber){
//here you can verify conditions
traversePart->nextPart = tempPart
}
}
You're intermixing fgets and scanf [and getchar]. Better to use just fgets and then apply strtol for numbers [or sscanf].
You're linked list code is a bit convoluted. It can be simplified.
Here's the refactored code. I've pulled some helper functions that I had lying around to do the prompting.
And, I added list printing.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
typedef struct part {
int partNumber;
char partName[200];
int partQuantity;
struct part *nextPart;
} Part;
Part *inventory = NULL;
void insertPart();
int getstr(char *buf,int buflen,const char *prompt);
long getnum_strtol(const char *prompt);
int
main(int argc, char **argv)
{
insertPart();
insertPart();
insertPart();
insertPart();
for (Part *cur = inventory; cur != NULL; cur = cur->nextPart)
printf("partNumber=%d partQuantity=%d partName='%s'\n",
cur->partNumber,cur->partQuantity,cur->partName);
return 0;
}
void
insertPart(void)
{
Part *tempPart;
Part *cur;
Part *prev = NULL;
int counter = 0;
#if 0
tempPart = (Part *) malloc(sizeof(Part *));
#else
tempPart = malloc(sizeof(*tempPart));
#endif
tempPart->partNumber = getnum_strtol("Enter the Part Number");
getstr(tempPart->partName,sizeof(tempPart->partName),"Enter the Part Name");
tempPart->partQuantity = getnum_strtol("Enter the Part Quantity");
tempPart->nextPart = NULL;
// find the tail/end of the list
for (cur = inventory; cur != NULL; cur = cur->nextPart) {
++counter;
// insert in sorted part order
if (cur->partNumber > tempPart->partNumber)
break;
prev = cur;
}
do {
tempPart->nextPart = cur;
// insert in the middle or end of list
if (prev != NULL) {
prev->nextPart = tempPart;
break;
}
// insert in new list or before first element of existing list
tempPart->nextPart = inventory;
inventory = tempPart;
} while (0);
printf("\nElement added at position : %d\n", counter);
}
// getstr -- get a string with prompt
// RETURNS: length or (<0 -> error)
int
getstr(char *buf,int buflen,const char *prompt)
{
char *cp;
int ret = 0;
// NOTE: usage of the error codes in errno.h is arbitrary
while (ret <= 0) {
// ensure buffer has enough space
if (buflen < 2) {
ret = -ENOMEM;
break;
}
// output prompt
if (prompt != NULL) {
printf("%s: ",prompt);
fflush(stdout);
}
// get a line
cp = fgets(buf,buflen,stdin);
// EOF
if (cp == NULL) {
ret = -ENODATA;
break;
}
// get buffer length
ret = strlen(buf);
// empty string
if (ret <= 0)
continue;
// point to last char
cp = &buf[ret - 1];
// ensure we got a newline -- if not, fgets had to chop the line (i.e.)
// the line is too long to fit in the buffer
if (*cp != '\n') {
ret = -ENOSPC;
break;
}
// strip the newline -- we are done
*cp = 0;
--ret;
}
return ret;
}
// getnum_strtol -- get number using strtol
long
getnum_strtol(const char *prompt)
{
int len;
int readflg = 1;
char *cp;
char buf[100];
long num = 0;
while (readflg) {
len = getstr(buf,sizeof(buf),prompt);
if (len < 0)
exit(1);
num = strtol(buf,&cp,10);
// ensure we got a least one digit
if (cp <= buf)
continue;
switch (*cp) {
case ' ':
case '\t':
case 0:
readflg = 0;
break;
default:
printf("getnum_strtol: not a valid number -- buffer '%s', invalid '%s'\n",
buf,cp);
break;
}
}
return num;
}
Here's the input file I used to test:
37
Hex Bolt
12
28
Machine Screw
6
23
Brad Nail
1000
27
Lock Nut
300
Here's the program output:
Enter the Part Number: Enter the Part Name: Enter the Part Quantity:
Element added at position : 0
Enter the Part Number: Enter the Part Name: Enter the Part Quantity:
Element added at position : 1
Enter the Part Number: Enter the Part Name: Enter the Part Quantity:
Element added at position : 1
Enter the Part Number: Enter the Part Name: Enter the Part Quantity:
Element added at position : 2
partNumber=23 partQuantity=1000 partName='Brad Nail'
partNumber=27 partQuantity=300 partName='Lock Nut'
partNumber=28 partQuantity=6 partName='Machine Screw'
partNumber=37 partQuantity=12 partName='Hex Bolt'
I'm new here, so this is my first post. I've been struggling for 2 weeks to solve this problem. I'm trying to open a directory, capture and store the names of the files found, sort them in ascending order, and print the results. My issue is either qsort causes my program to crash entirely, or qsort doesn't sort the array at all because the files are alphanumeric. I even tried looping through a stored filename to output each character, just to see if I could eventually try comparing the characters between two array locations for sorting. But I noticed that it can't seem to see or recognize the numbers in the alphanumeric filename (for example: "f1.jpg" will only print "f", a blank, then "j", and that's it. I should note that I cannot change the file names because I don't know in advance the names or total files. I'm trying to make this to be dynamic. The following is the main code that I'm having problems with since it crashes at the 'qsort' keyword:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <conio.h>
#include <ctype.h>
#include <time.h>
#include <dirent.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int compare(const void *a, const void *b);
void readInFilenames();
int main
{
readInFilenames();
system("pause");
}
int compare(const void *a, const void *b)
{
return strcmp(*(char **)a, *(char **)b);
}
void readInFilenames()
{
char cwd[1024];
DIR *dir = NULL;
struct dirent *pent = NULL;
struct stat info;
char file_path[50] = "files/";
int total_files = 0;
int file_size;
// Change directory to file location
chdir(file_path);
if((getcwd(cwd, sizeof(cwd))) != NULL)
{
printf("Current Directory: %s\n", cwd);
}
// Open directory and count the total number of files found
dir = opendir(cwd);
if(dir != NULL)
{
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
total_files++;
file_size = sizeof(pent->d_name);
}
}
}
}
printf("# of files found: %d\n", total_files);
rewinddir(dir); //reset pointer back to beginning of file directory
// Create character array to store file names;
char *filenames_arr[total_files][file_size];
int size = sizeof(filenames_arr)/sizeof(filenames_arr[total_files]);
total_files = 0; //reset file counter back to 0;
// Read and store file names in the character array
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
strcpy(filenames_arr[total_files], pent->d_name);
//printf("%s\n", filenames_arr[i]);
total_files++;
}
}
}
}
closedir(dir);
// Print original array contents
printf("Original List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
// Sort array in ascending order
qsort(filenames_arr, total_files, size, compare);
//qsort(filenames_arr, total_files, sizeof(filenames_arr[0]), (char (*)(const void*, const void*))strcmp);
// Print organized array contents
printf("Sorted List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
printf("\nFinished!\n");
}
}
This portion of code is when I was trying to print each individual characters. This was originally located where the final array printing takes place in the previous code:
int i = 0;
int j = 0;
while(i < total_files)
{
printf("File Name: %s\n", filenames_arr[i]);
printf("String Length: %d\n", strlen(filenames_arr[i]));
while(filenames_arr[i] != '\0')
{
printf("Checking filenames_arr[%d][%d]\n", i, j);
if(isalpha((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isalpha\n");
printf("Found: %c\n", filenames_arr[i][j]);
}
else if(isdigit((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isdigit\n");
printf("Found: %d\n", filenames_arr[i][j]);
}
j++;
}
printf("-------------------------------------------\n");
i++;
j = 0;
}
How do I sort a 2D array of alphanumeric character strings using qsort? What is it about qsort, or even my array setup that's causing my program to crash? Also, how does qsort work? I've tried searching forums and online course notes to find out whether or not qsort only sorts by looking just at the first character, all characters, or if it has problems with numbers. Thank you in advance!
UPDATE:
I made the following edits to my code. Its working much better, in that qsort no longer crashes program. But, qsort still isn't sorting. Here are the updates I made, followed by a screenshot of the results:
typedef struct{
char *filename;
}filedata;
int compare(const void *a, const void *b);
void readInFilenames();
int main(void){
readInFilenames();
system("pause");
}
int compare (const void *a, const void *b ) {
filedata *ia = (filedata *)a;
filedata *ib = (filedata *)b;
return strcmp(ia->filename, ib->filename);
}
readInFilenames(){
.
.
.
printf("# of files found: %d\n", total_files);
rewinddir(dir);
filedata fn_data[total_files];
total_files = 0;
printf("Original Array: \n");
while((pent = readdir(dir)) != NULL)
{
.
.
.
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
fn_data[total_files].filename = malloc(file_size + 1);
strcpy(fn_data[total_files].filename, pent->d_name);
printf("%s\n", fn_data[total_files].filename);
total_files++;
}
}
closedir(dir);
printf("\n");
qsort(fn_data, total_files, sizeof(filedata), compare);
printf("Sorted Array:\n");
for(int i = 0; i < total_files; i++)
printf("%s\n", fn_data[i].filename);
printf("Finished!\n");
}
Click here to see sorting results
The list should print: f0.dat, f1.dat, f2.dat, f3.dat,...,f20.dat. But instead it prints: f0.dat, f1.dat, f10.dat, f11.dat,...,f9.dat.
OP has fixed code to cope with "qsort dynamic 2d char array with filenames" by enabling warnings and using #Snohdo advice.
Yet code is still doing a compare with strcmp() which only treat digits as characters and not numerically to achieve f1.dat, f2.dat, f3.dat,...,f20.dat order.
Following is a compare functions that looks for digits to invoke an alternate compare for numeric sub-strings. Variations on this compare can be made by OP to suit detailed coding goals.
int AdamsOrder(const char *s1, const char *s2) {
// Compare as `unsigned char` as that is `strcmp()` behavior. C11 7.24.1 3
const unsigned char *us1 = (const unsigned char *) s1;
const unsigned char *us2 = (const unsigned char *) s2;
while (*us1 && *us2) {
if (isdigit(*us1) && isdigit(*us2)) {
char *end; // dummy
unsigned long long l1 = strtoull(us1, &end, 10); // Parse for a number
unsigned long long l2 = strtoull(us2, &end, 10);
if (l1 > l2) return 1;
if (l1 < l2) return -1;
// Continue on treating as text. OP needs to decide how to handle ties: "0001" vs "1"
}
if (*us1 > *us2) return 1;
if (*us1 < *us2) return -1;
us1++;
us2++;
}
// At this point, at least one string ended (i.e. points to '\0').
// The return statement below will behave as follows:
// If a string ended, *us1/2 will be 0. Let an unfinished one be X > 0.
// First string ended : ( 0 > X ) - ( 0 < X ) = false - true = 0 - 1 = -1
// Second string ended: ( X > 0 ) - ( X < 0 ) = true - false = 1 - 0 = 1
// Both strings ended : ( 0 > 0 ) - ( 0 < 0 ) = false - false = 0 - 0 = 0
return (*us1 > *us2) - (*us1 < *us2);
}
so I wrote this code for a project, and I thought it'd work, but it was only doing a comparison for the very first term from one of my files(the IkeaWords.txt file).
Where did I go wrong?
so this is what I wrote hopefully it's enough.
/*Display each IKEA product name that can be found in the English dictionary.
The full list of the 1764 unique IKEA product words is in IKEAwords.txt
To see if words exist in English, use the 40,444 word English dictionary dictionary.txt,
where the longest word has 21 characters. To lookup a word in the dictionary consider
using binary search,
Print out each word that is found.
*/
#define _CRT_SECURE_NO_WARNINGS
#define NumberOfWordsInDictionary 40437
#define MaxWordSize 21+1
#define NumberOfWordsInIkea 1764
#include <stdio.h>
#include <string.h> // for string length
#include <stdlib.h> // for exit()
// Maximum size of any word in the dictionary, + 1 for null
const char DictionaryFileName[] = "dictionary.txt"; // File name for where dictionary words are found
const char IkeaFileName[] = "IKEAwords.txt";
//--------------------------------------------------------------------------------------
// Use binary search to look up the word from the .txt file in the dictionary array,
//returning index if found, -1 otherwise
int binarySearch(const char ikeaWord[][MaxWordSize], // word to be looked up
const char dictionary[][MaxWordSize], // the dictionary of words
int numberOfDictionaryWords //number of words in the dictionary
)
{
int low, mid, high; // array indices for binary search
int searchResult = -1; // Stores index of word if search succeeded, else -1
// Binary search for word
low = 0;
high = numberOfDictionaryWords - 1;
int i = 0;
while (i < MaxWordSize)
{
while (low <= high)
{
mid = (low + high) / 2;
// searchResult negative value means word is to the left, positive value means
// word is to the right, value of 0 means word was found
searchResult = strcmp(ikeaWord[i], dictionary[mid]);
if (searchResult == 0) {
// Word IS in dictionary, so return the index where the word was found
return mid;
}
else if (searchResult < 0)
{
high = mid - 1; // word should be located prior to mid location
}
else
{
low = mid + 1; // word should be located after mid location
}
}
i++;
}
// Word was not found
return -1;
}//end binarySearch()
//--------------------------------------------------------------------------------------
// Read in the words from the dictionary file
void readWordsInFromDictionaryFile(FILE *pInputFile, char dictionary[][MaxWordSize])
{
int index = 0; // index of dictionary word being read
int maxWordLength = 0;
// Associate the actual file name with file pointer and try to open it
pInputFile = fopen(DictionaryFileName, "r");
// verify that file open worked
if (pInputFile == NULL) {
printf("Can't open %s. Verify it is in correct location\n", DictionaryFileName);
exit(-1);
}
// Keep reading words while there are any
while (fscanf(pInputFile, "%s", dictionary[index]) != EOF) {
int tempLength = (int)strlen(dictionary[index]);
if (tempLength > maxWordLength) {
maxWordLength = tempLength;
}
index++;
}
// uncomment out code test array dictionary[][]
//printf("There were %d words in the dictionary, with max length %d. \n", index, maxWordLength);
fclose(pInputFile); // close the dictionary file
printf("There were %d words read from the dictionary with max length %d.\n", index, maxWordLength);
}//end readInputFile()
void readWordsInFromIkeaFile(FILE *pInputFile2, char ikeaWord[][MaxWordSize])
{
int index2 = 0; // index of dictionary word being read
int maxIkeaWordLength = 0;
// Associate the actual file name with file pointer and try to open it
pInputFile2 = fopen(IkeaFileName, "r");
// verify that file open worked
if (pInputFile2 == NULL)
{
printf("Can't open %s. Verify it is in correct location\n", IkeaFileName);
exit(-1);
}
// Keep reading words while there are any
while (fscanf(pInputFile2, "%s", ikeaWord[index2]) != EOF)
{
int tempLength2 = (int)strlen(ikeaWord[index2]);
if (tempLength2 > maxIkeaWordLength)
{
maxIkeaWordLength = tempLength2;
}
index2++;
}
printf("There were %d words read from the Ikea file with max length %d.\n", index2,maxIkeaWordLength);
}
//--------------------------------------------------------------------------------------
int main()
{
char dictionary[NumberOfWordsInDictionary][MaxWordSize];
char ikeaWord[NumberOfWordsInIkea][MaxWordSize];
FILE *pInputFile = fopen(DictionaryFileName, "r"); // file pointer
FILE *pInputFile2 = fopen(IkeaFileName, "r");
readWordsInFromDictionaryFile(pInputFile, dictionary);
readWordsInFromIkeaFile(pInputFile2, ikeaWord); // used as input
// Find index of word in dictionary
int index = -1;
int j = 0; // counter
while(j<NumberOfWordsInIkea)
{
index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);
// Display results
if (index != -1)
{
// word was found, so display it
printf("The word \"%s\" was found.\n", dictionary[index]);
}
j++;
}
system("pause");
return 0;
}
I wrote it in visual studio 2015 if you need to know that too.
Thanks for the help!
You have several errors and unnecessary things in your code. I took the liberty to change some things to make it work (you might have already found them if you followed the hints in the comments) and some things to make it a bit cleaner (non compiler warnings from GCC). Did not check with MSVS because of lack of MSVS.
#define _CRT_SECURE_NO_WARNINGS
// changed values to accomodate different data-files sizes
#define NumberOfWordsInDictionary 99172
#define MaxWordSize 64
#define NumberOfWordsInIkea 1393
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// from /usr/dict/words (put to lower case)
const char DictionaryFileName[] = "words.txt";
// scraped from http://lar5.com/ikea/ (put to lower case)
const char IkeaFileName[] = "ikea_names.txt";
// ripped 'const' and changed ikeaWord[][] to take a the single entry
int binarySearch(char *ikeaWord, char dictionary[][MaxWordSize],
int numberOfDictionaryWords)
{
int low, mid, high;
int searchResult = -1;
low = 0;
high = numberOfDictionaryWords - 1;
// ripped outer loop because we search for Ikea names one by one
while (low <= high) {
mid = (low + high) / 2;
searchResult = strcmp(ikeaWord, dictionary[mid]);
if (searchResult == 0) {
return mid;
} else if (searchResult < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1;
}
int readWordsInFromDictionaryFile(FILE * pInputFile,
char dictionary[][MaxWordSize])
{
int index = 0;
int maxWordLength = 0;
// ripped fopen() because that happened already in main()
// Changed from fscanf to fgets because the *scanf() family is a
// never ending source of problems, see stackoverflow et al. for endless examples
while (fgets(dictionary[index], MaxWordSize - 1, pInputFile)) {
int tempLength = (int) strlen(dictionary[index]);
// Because of the change from fscanf to fgets we need to snip the newline off
// (for "\r\n" endings snipp two)
dictionary[index][tempLength - 1] = '\0';
if (tempLength > maxWordLength) {
maxWordLength = tempLength;
}
index++;
}
// If fgets returns NULL it is either EOF or an error
if (ferror(pInputFile)) {
fprintf(stderr, "something bad happend while reading dictionary\n");
return 0;
}
fclose(pInputFile);
printf("There were %d words read from the dictionary with max length %d.\n",
index, maxWordLength);
return 1;
}
// snipped off the addition of "2" to the variable names, no need for that
int readWordsInFromIkeaFile(FILE * pInputFile, char ikeaWord[][MaxWordSize])
{
int index = 0;
int maxIkeaWordLength = 0;
while (fgets(ikeaWord[index], MaxWordSize - 1, pInputFile)) {
int tempLength = (int) strlen(ikeaWord[index]);
ikeaWord[index][tempLength - 1] = '\0';
if (tempLength > maxIkeaWordLength) {
maxIkeaWordLength = tempLength;
}
index++;
}
if (ferror(pInputFile)) {
fprintf(stderr, "something bad happend while reading ikeawords\n");
return 0;
}
printf("There were %d words read from the Ikea file with max length %d.\n",
index, maxIkeaWordLength);
return 1;
}
//--------------------------------------------------------------------------------------
int main()
{
char dictionary[NumberOfWordsInDictionary][MaxWordSize];
char ikeaWord[NumberOfWordsInIkea][MaxWordSize];
int res;
// added error-checks
FILE *pInputFile = fopen(DictionaryFileName, "r");
if (pInputFile == NULL) {
fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
DictionaryFileName);
exit(EXIT_FAILURE);
}
FILE *pInputFile2 = fopen(IkeaFileName, "r");
if (pInputFile2 == NULL) {
fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
IkeaFileName);
exit(EXIT_FAILURE);
}
if ((res = readWordsInFromDictionaryFile(pInputFile, dictionary)) == 0) {
fprintf(stderr, "Error in reading dictionary\n");
exit(EXIT_FAILURE);
}
if ((res = readWordsInFromIkeaFile(pInputFile2, ikeaWord)) == 0) {
fprintf(stderr, "Error in reading ikea-file\n");
exit(EXIT_FAILURE);
}
int index = -1;
int j = 0;
while (j < NumberOfWordsInIkea) {
index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);
if (index != -1) {
printf("The word \"%s\" was found.\n", dictionary[index]);
}
j++;
}
// Seems to be useful when run in MS-Windows
#if defined _WIN32 || defined WIN32 || defined WIN64 || defined _WIN64
sytem("pause");
#endif
exit(EXIT_SUCCESS);
}
I did not polish every corner, it still needs some work. For example: the two functions reading the two files are actually doing the same, just for a different file and a different dictionary. That can be done with a single function. The name of the files, the lengths of the file, and the length of the entries of these files are fixed, they can be made dynamic to be able to use different input without recompiling.
But all over: not bad for start!
I am working with hashtables for the first time and I think I have a basic understanding of how they work. I am using a hashtable to check to see if a word exists in a file. The program takes in a "dictionary" file and a word check file. The program works fine when I have a small dictionary but when I use a very large one, the words get overwritten. I was hoping to get some insight as to why. Here is my code:
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <pthread.h>
#include <tgmath.h>
#include <ctype.h>
#include "hashtable_constants.h"
#define HASH_SIZE 500
#define MAX_WORD_SIZE 50
struct hashTable {
int collisions;
char** words;
};
struct hashTable hashTables[HASH_SIZE];
int hashKey(char * str)
{
int key = 0;
for(int j = 0; j <= 51; j++)
{
if(str[j] == '\0')
break;
key += (int)str[j];
}
key = key % HASH_SIZE;
return key;
}
int main(int argc, char** argv)
{
if(argc > 3)
{
fprintf(stderr, "Too many arguments!\n");
return -1;
}
else if(argc < 3)
{
fprintf(stderr, "Not enough arguments!\n");
return -1;
}
FILE *dictionary = fopen(argv[1], "r");
FILE *wordCheck = fopen(argv[2], "r");
if(dictionary == NULL || wordCheck == NULL ) //ensure input file exists
{
fprintf(stderr, "Error accessing input files\n");
return -1;
}
for(int i = 0; i < HASH_SIZE; i++)
{
hashTables[i].collisions = 0;
hashTables[i].words = malloc(HASH_SIZE * MAX_WORD_SIZE);
}
struct stat fileStat1;
struct stat fileStat2;
stat(argv[1], &fileStat1);
stat(argv[2], &fileStat2);
char* dictBuffer = (char*)malloc(fileStat1.st_size + 1);
char* wordCheckBuff = (char*)malloc(fileStat2.st_size + 1);
if (dictBuffer == NULL || wordCheckBuff == NULL)
{
fprintf (stderr, "Memory error");
return -1;
}
fread(dictBuffer, 1, (int)fileStat1.st_size, dictionary);
fread(wordCheckBuff, 1, (int)fileStat2.st_size, wordCheck);
char* word = malloc(MAX_WORD_SIZE + 1);
int count = 0;
for(int i = 0; i < (int)fileStat1.st_size; i++)
{
char c = dictBuffer[i];
if(isspace(c))
{
word[count] = '\0';
char* wordToAdd = word;
int key = hashKey(wordToAdd);
int collisionIndex = hashTables[key].collisions;
hashTables[key].words[collisionIndex] = wordToAdd;
hashTables[key].collisions++;
count = 0;
free(word);
word = malloc(MAX_WORD_SIZE + 1);
//printf("Added: %s to hashtable at key: %d\n",word,key);
}
else
{
word[count] = c;
count++;
}
}
count = 0;
for(int i = 0; i < (int)fileStat2.st_size; i++)
{
char c = wordCheckBuff[i];
if(isspace(c))
{
word[count] = '\0';
char* wordToCheck = word;
int key = hashKey(wordToCheck);
int collisionIndex = hashTables[key].collisions;
int foundWord = 0;
for(int j = 0; j < collisionIndex; j++)
{
if(hashTables[key].words[j] == wordToCheck)
{
printf("%s == %s\n",hashTables[key].words[j], wordToCheck);
foundWord = 1;
break;
}
}
if(foundWord == 0)
printf("Not a word: %s\n", wordToCheck);
/*else
printf("Key: %d -- Is a word: %s\n",key, word);*/
free(word);
word = malloc(MAX_WORD_SIZE + 1);
count = 0;
}
else
{
word[count] = c;
count++;
}
}
for(int i = 0; i < HASH_SIZE; i++)
free(hashTables[i].words);
free(word);
fclose(dictionary);
fclose(wordCheck);
printf("done\n");
return 0;
}
On problem is that in the line:
hashTables[key].words[collisionIndex] = wordToAdd;
You add 'wordToAdd' to the table.
But wordToAdd is equal to word. A few lines later you call
free(word);
So the hash table now holds a pointer to freed memory.
This will lead to all sorts of undefined behaviour in the program, quite possibly seg-faults too. Also it's very likely that since the memory is now 'free', a subsequent call to malloc might return this same pointer again - which you will then fill with another word. Hence you see the overwriting of strings.
You need to review how you use 'malloc' / 'free' generally in the program. If you want a pointer to refer to a valid string, you cannot call 'free' on that pointer during the intended lifetime of that string.
What you want to do is malloc each string, and add the pointers to the hashtable. Then when you've finished with the hashtable, and no longer need the string data, then call 'free' on all the pointers contained within it. In your case, this will probably need to be in your cleanup code at the end of your program's execution.
I'm having some trouble with the output of this program. I need to print the verbs on one line, and I need to print a separate statement in the case that there are no verbs. For ex.
"talk and walk" should print "The verbs are: talk walk"
while "hello there" should print "There are no verbs"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int binary_search(char *list_of_words[], int size, char *target){
int bottom= 0;
int mid;
int top = size - 1;
int found = 0;
while(bottom <= top && !found){
mid = (bottom + top)/2;
if (strcmp(list_of_words[mid], target) == 0){
//printf("%s found at location %d.\n", target, mid+1);
found = 1;
} else if (strcmp(list_of_words[mid], target) > 0){
top = mid - 1;
} else if (strcmp(list_of_words[mid], target) < 0){
bottom = mid + 1;
}
}
if (found == 1)
return mid;
else
return -1;
}
int main(int argc, char* argv[]){
char *input = strtok(argv[1], " \"\n");
char *verbs[5] = { "do", "make", "take", "talk", "walk" };
int position;
int check = 0;
while (input != NULL) {
//printf("%s\n", input);
position = binary_search(verbs, 5, input);
if (position != -1)
printf("The verbs are: %s\n", verbs[position]);
check = 1;
input = strtok(NULL, " ");
}
if (check == 0){
printf("There are no verbs\n");
}
return 0;
}
Any ideas?
It seems to be working fine, but you need to add parenthesis around
if (position != -1)
printf("The verbs are: %s\n", verbs[position]);
check = 1;
like in
if (position != -1) {
printf("The verbs are: %s\n", verbs[position]);
check = 1;
}
otherwise check is always set 1 in the loop.
And if you do not want to repeat "The verbs are:" , add a check for that
if (position != -1) {
if (first) {
printf("The verbs are:");
first = 0;
check = 1;
}
printf(" %s", verbs[position]);
}
int main(int argc, char* argv[]){
char *input = strtok(argv[1], " \"\n");
char *verbs[5] = { "do", "make", "take", "talk", "walk" };
char match[5] = {0};
int position;
int check = 0;
while (input != NULL) {
//printf("%s\n", input);
position = binary_search(verbs, 5, input);
if (position != -1){
//printf("The verbs are: %s\n", verbs[position]);
match[position]=1;//match[position] = check = 1;
check = 1;
}
input = strtok(NULL, " ");
}
if (check == 0){
printf("There are no verbs\n");
} else {
int i;
printf("The verbs are: ");
for(i=0;i<5;++i)
if(match[i])
printf("%s ", verbs[i]);
printf("\n");
}
return 0;
}
If you're more interested in just having the search done, rather than implementing it yourself (i.e., assuming "implement a search" is not your actual task), you should use the standard library's little-known hero, bsearch().
Note that this requires the input data (the array you're searching in) to be sorted, but yours seems to be since you're already working on a binary search.