some quick help in comparing 2 files using binary search

some quick help in comparing 2 files using binary search - c

so I wrote this code for a project, and I thought it'd work, but it was only doing a comparison for the very first term from one of my files(the IkeaWords.txt file).
Where did I go wrong?
so this is what I wrote hopefully it's enough.
/*Display each IKEA product name that can be found in the English dictionary.
The full list of the 1764 unique IKEA product words is in IKEAwords.txt
To see if words exist in English, use the 40,444 word English dictionary dictionary.txt,
where the longest word has 21 characters. To lookup a word in the dictionary consider
using binary search,
Print out each word that is found.
*/
#define _CRT_SECURE_NO_WARNINGS
#define NumberOfWordsInDictionary 40437
#define MaxWordSize 21+1
#define NumberOfWordsInIkea 1764
#include <stdio.h>
#include <string.h> // for string length
#include <stdlib.h> // for exit()
// Maximum size of any word in the dictionary, + 1 for null
const char DictionaryFileName[] = "dictionary.txt"; // File name for where dictionary words are found
const char IkeaFileName[] = "IKEAwords.txt";
//--------------------------------------------------------------------------------------
// Use binary search to look up the word from the .txt file in the dictionary array,
//returning index if found, -1 otherwise
int binarySearch(const char ikeaWord[][MaxWordSize], // word to be looked up
const char dictionary[][MaxWordSize], // the dictionary of words
int numberOfDictionaryWords //number of words in the dictionary
)
{
int low, mid, high; // array indices for binary search
int searchResult = -1; // Stores index of word if search succeeded, else -1
// Binary search for word
low = 0;
high = numberOfDictionaryWords - 1;
int i = 0;
while (i < MaxWordSize)
{
while (low <= high)
{
mid = (low + high) / 2;
// searchResult negative value means word is to the left, positive value means
// word is to the right, value of 0 means word was found
searchResult = strcmp(ikeaWord[i], dictionary[mid]);
if (searchResult == 0) {
// Word IS in dictionary, so return the index where the word was found
return mid;
}
else if (searchResult < 0)
{
high = mid - 1; // word should be located prior to mid location
}
else
{
low = mid + 1; // word should be located after mid location
}
}
i++;
}
// Word was not found
return -1;
}//end binarySearch()
//--------------------------------------------------------------------------------------
// Read in the words from the dictionary file
void readWordsInFromDictionaryFile(FILE *pInputFile, char dictionary[][MaxWordSize])
{
int index = 0; // index of dictionary word being read
int maxWordLength = 0;
// Associate the actual file name with file pointer and try to open it
pInputFile = fopen(DictionaryFileName, "r");
// verify that file open worked
if (pInputFile == NULL) {
printf("Can't open %s. Verify it is in correct location\n", DictionaryFileName);
exit(-1);
}
// Keep reading words while there are any
while (fscanf(pInputFile, "%s", dictionary[index]) != EOF) {
int tempLength = (int)strlen(dictionary[index]);
if (tempLength > maxWordLength) {
maxWordLength = tempLength;
}
index++;
}
// uncomment out code test array dictionary[][]
//printf("There were %d words in the dictionary, with max length %d. \n", index, maxWordLength);
fclose(pInputFile); // close the dictionary file
printf("There were %d words read from the dictionary with max length %d.\n", index, maxWordLength);
}//end readInputFile()
void readWordsInFromIkeaFile(FILE *pInputFile2, char ikeaWord[][MaxWordSize])
{
int index2 = 0; // index of dictionary word being read
int maxIkeaWordLength = 0;
// Associate the actual file name with file pointer and try to open it
pInputFile2 = fopen(IkeaFileName, "r");
// verify that file open worked
if (pInputFile2 == NULL)
{
printf("Can't open %s. Verify it is in correct location\n", IkeaFileName);
exit(-1);
}
// Keep reading words while there are any
while (fscanf(pInputFile2, "%s", ikeaWord[index2]) != EOF)
{
int tempLength2 = (int)strlen(ikeaWord[index2]);
if (tempLength2 > maxIkeaWordLength)
{
maxIkeaWordLength = tempLength2;
}
index2++;
}
printf("There were %d words read from the Ikea file with max length %d.\n", index2,maxIkeaWordLength);
}
//--------------------------------------------------------------------------------------
int main()
{
char dictionary[NumberOfWordsInDictionary][MaxWordSize];
char ikeaWord[NumberOfWordsInIkea][MaxWordSize];
FILE *pInputFile = fopen(DictionaryFileName, "r"); // file pointer
FILE *pInputFile2 = fopen(IkeaFileName, "r");
readWordsInFromDictionaryFile(pInputFile, dictionary);
readWordsInFromIkeaFile(pInputFile2, ikeaWord); // used as input
// Find index of word in dictionary
int index = -1;
int j = 0; // counter
while(j<NumberOfWordsInIkea)
{
index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);
// Display results
if (index != -1)
{
// word was found, so display it
printf("The word \"%s\" was found.\n", dictionary[index]);
}
j++;
}
system("pause");
return 0;
}
I wrote it in visual studio 2015 if you need to know that too.
Thanks for the help!

You have several errors and unnecessary things in your code. I took the liberty to change some things to make it work (you might have already found them if you followed the hints in the comments) and some things to make it a bit cleaner (non compiler warnings from GCC). Did not check with MSVS because of lack of MSVS.
#define _CRT_SECURE_NO_WARNINGS
// changed values to accomodate different data-files sizes
#define NumberOfWordsInDictionary 99172
#define MaxWordSize 64
#define NumberOfWordsInIkea 1393
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// from /usr/dict/words (put to lower case)
const char DictionaryFileName[] = "words.txt";
// scraped from http://lar5.com/ikea/ (put to lower case)
const char IkeaFileName[] = "ikea_names.txt";
// ripped 'const' and changed ikeaWord[][] to take a the single entry
int binarySearch(char *ikeaWord, char dictionary[][MaxWordSize],
int numberOfDictionaryWords)
{
int low, mid, high;
int searchResult = -1;
low = 0;
high = numberOfDictionaryWords - 1;
// ripped outer loop because we search for Ikea names one by one
while (low <= high) {
mid = (low + high) / 2;
searchResult = strcmp(ikeaWord, dictionary[mid]);
if (searchResult == 0) {
return mid;
} else if (searchResult < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1;
}
int readWordsInFromDictionaryFile(FILE * pInputFile,
char dictionary[][MaxWordSize])
{
int index = 0;
int maxWordLength = 0;
// ripped fopen() because that happened already in main()
// Changed from fscanf to fgets because the *scanf() family is a
// never ending source of problems, see stackoverflow et al. for endless examples
while (fgets(dictionary[index], MaxWordSize - 1, pInputFile)) {
int tempLength = (int) strlen(dictionary[index]);
// Because of the change from fscanf to fgets we need to snip the newline off
// (for "\r\n" endings snipp two)
dictionary[index][tempLength - 1] = '\0';
if (tempLength > maxWordLength) {
maxWordLength = tempLength;
}
index++;
}
// If fgets returns NULL it is either EOF or an error
if (ferror(pInputFile)) {
fprintf(stderr, "something bad happend while reading dictionary\n");
return 0;
}
fclose(pInputFile);
printf("There were %d words read from the dictionary with max length %d.\n",
index, maxWordLength);
return 1;
}
// snipped off the addition of "2" to the variable names, no need for that
int readWordsInFromIkeaFile(FILE * pInputFile, char ikeaWord[][MaxWordSize])
{
int index = 0;
int maxIkeaWordLength = 0;
while (fgets(ikeaWord[index], MaxWordSize - 1, pInputFile)) {
int tempLength = (int) strlen(ikeaWord[index]);
ikeaWord[index][tempLength - 1] = '\0';
if (tempLength > maxIkeaWordLength) {
maxIkeaWordLength = tempLength;
}
index++;
}
if (ferror(pInputFile)) {
fprintf(stderr, "something bad happend while reading ikeawords\n");
return 0;
}
printf("There were %d words read from the Ikea file with max length %d.\n",
index, maxIkeaWordLength);
return 1;
}
//--------------------------------------------------------------------------------------
int main()
{
char dictionary[NumberOfWordsInDictionary][MaxWordSize];
char ikeaWord[NumberOfWordsInIkea][MaxWordSize];
int res;
// added error-checks
FILE *pInputFile = fopen(DictionaryFileName, "r");
if (pInputFile == NULL) {
fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
DictionaryFileName);
exit(EXIT_FAILURE);
}
FILE *pInputFile2 = fopen(IkeaFileName, "r");
if (pInputFile2 == NULL) {
fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
IkeaFileName);
exit(EXIT_FAILURE);
}
if ((res = readWordsInFromDictionaryFile(pInputFile, dictionary)) == 0) {
fprintf(stderr, "Error in reading dictionary\n");
exit(EXIT_FAILURE);
}
if ((res = readWordsInFromIkeaFile(pInputFile2, ikeaWord)) == 0) {
fprintf(stderr, "Error in reading ikea-file\n");
exit(EXIT_FAILURE);
}
int index = -1;
int j = 0;
while (j < NumberOfWordsInIkea) {
index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);
if (index != -1) {
printf("The word \"%s\" was found.\n", dictionary[index]);
}
j++;
}
// Seems to be useful when run in MS-Windows
#if defined _WIN32 || defined WIN32 || defined WIN64 || defined _WIN64
sytem("pause");
#endif
exit(EXIT_SUCCESS);
}
I did not polish every corner, it still needs some work. For example: the two functions reading the two files are actually doing the same, just for a different file and a different dictionary. That can be done with a single function. The name of the files, the lengths of the file, and the length of the entries of these files are fixed, they can be made dynamic to be able to use different input without recompiling.
But all over: not bad for start!

Related

C: converting string array to float array OR read file with floats as floats not strings

I wan´t to read a file (10K float numbers each in a own row) and find the max_val element of it (I´m not that far now).
I managed to get the file into a char array but I need it to be a float array to be abled to find the max value.
Thanks for any help.
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[]){
char nameDatei[100];
if(argv[1] != NULL) {
strcpy(nameDatei, argv[1]);
} else {
printf("type in the name of the file: ");
scanf("%s", nameDatei);
}
//read file;
FILE *fPointer;
fPointer = fopen("findMaxOfFloats.txt", "r");
//save
char singleLine[100];
char content[10000][100];
int i = 0;
while (!feof(fPointer)){
if (fgets(content[i], sizeof(singleLine), fPointer) != NULL)
i++;
}
fclose(fPointer);
//print the array
for(int loop = 0; loop < 10000; loop++){
printf("%s", content[loop]);
}
//find max...
return 0;
}

A few issues ...
You read a line into singleLine but don't copy it into content
feof is bad--just check the fgets return value
You want content to be double and not just a char array.
After doing fgets, you can/should use strtod to convert the string in singleLine into a binary/floating point value.
You want to maintain a count of the number of lines read (i.e. the number of valid elements in content).
You get a filename from either argv[1] or scanf but then use a hardwired name instead.
You don't check the return value of fopen
Here is the refactored code. It is annotated:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXCONTENT 10000
int
main(int argc, char *argv[])
{
char nameDatei[100];
if (argv[1] != NULL) {
strcpy(nameDatei, argv[1]);
}
else {
printf("type in the name of the file: ");
scanf("%s", nameDatei);
}
// read file;
FILE *fPointer;
// NOTE/BUG: You get a filename from either argv[1] or scanf but don't use it
#if 0
fPointer = fopen("findMaxOfFloats.txt", "r");
#else
fPointer = fopen(nameDatei, "r");
if (fPointer == NULL) {
perror(nameDatei);
exit(1);
}
#endif
// save
char singleLine[100];
#if 0
char content[10000][100];
#else
double content[MAXCONTENT];
#endif
#if 0
int i = 0;
while (!feof(fPointer)) {
if (fgets(content[i], sizeof(singleLine), fPointer) != NULL)
i++;
}
#else
int count = 0;
// read in all lines until EOF
while (fgets(singleLine,sizeof(singleLine),fPointer) != NULL) {
// don't overflow the array
if (count >= MAXCONTENT) {
printf("count too large\n");
exit(1);
}
// decode the number
char *cp;
content[count] = strtod(singleLine,&cp);
// check for syntax error
if (*cp != '\n') {
printf("syntax error: %s",singleLine);
exit(1);
}
++count;
}
#endif
fclose(fPointer);
// print the array
for (int idx = 0; idx < count; ++idx)
printf("%g\n", content[idx]);
// find max... [and min, too ;-)]
double max = content[0];
double min = content[0];
for (int idx = 0; idx < count; ++idx) {
// get the current value
double cur = content[idx];
// set new maximum
if (cur > max)
max = cur;
// set new minimum
if (cur < min)
min = cur;
}
printf("min=%g max=%g\n",min,max);
return 0;
}
In the above code, I've used cpp conditionals to denote old vs. new code:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Note: this can be cleaned up by running the file through unifdef -k

qsort dynamic 2d char array with alphanumeric filenames - C program

I'm new here, so this is my first post. I've been struggling for 2 weeks to solve this problem. I'm trying to open a directory, capture and store the names of the files found, sort them in ascending order, and print the results. My issue is either qsort causes my program to crash entirely, or qsort doesn't sort the array at all because the files are alphanumeric. I even tried looping through a stored filename to output each character, just to see if I could eventually try comparing the characters between two array locations for sorting. But I noticed that it can't seem to see or recognize the numbers in the alphanumeric filename (for example: "f1.jpg" will only print "f", a blank, then "j", and that's it. I should note that I cannot change the file names because I don't know in advance the names or total files. I'm trying to make this to be dynamic. The following is the main code that I'm having problems with since it crashes at the 'qsort' keyword:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <conio.h>
#include <ctype.h>
#include <time.h>
#include <dirent.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int compare(const void *a, const void *b);
void readInFilenames();
int main
{
readInFilenames();
system("pause");
}
int compare(const void *a, const void *b)
{
return strcmp(*(char **)a, *(char **)b);
}
void readInFilenames()
{
char cwd[1024];
DIR *dir = NULL;
struct dirent *pent = NULL;
struct stat info;
char file_path[50] = "files/";
int total_files = 0;
int file_size;
// Change directory to file location
chdir(file_path);
if((getcwd(cwd, sizeof(cwd))) != NULL)
{
printf("Current Directory: %s\n", cwd);
}
// Open directory and count the total number of files found
dir = opendir(cwd);
if(dir != NULL)
{
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
total_files++;
file_size = sizeof(pent->d_name);
}
}
}
}
printf("# of files found: %d\n", total_files);
rewinddir(dir); //reset pointer back to beginning of file directory
// Create character array to store file names;
char *filenames_arr[total_files][file_size];
int size = sizeof(filenames_arr)/sizeof(filenames_arr[total_files]);
total_files = 0; //reset file counter back to 0;
// Read and store file names in the character array
while((pent = readdir(dir)) != NULL)
{
if(stat(pent->d_name, &info))
{
printf("ERROR: stat%s: %s\n", pent->d_name, strerror(errno));
}
else
{
if(S_ISREG(info.st_mode))
{
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
strcpy(filenames_arr[total_files], pent->d_name);
//printf("%s\n", filenames_arr[i]);
total_files++;
}
}
}
}
closedir(dir);
// Print original array contents
printf("Original List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
// Sort array in ascending order
qsort(filenames_arr, total_files, size, compare);
//qsort(filenames_arr, total_files, sizeof(filenames_arr[0]), (char (*)(const void*, const void*))strcmp);
// Print organized array contents
printf("Sorted List of Files\n");
printf("----------------------\n");
for(int i = 0; i < total_files; i++)
{
printf("%s\n", filenames_arr[i]);
}
printf("\nFinished!\n");
}
}
This portion of code is when I was trying to print each individual characters. This was originally located where the final array printing takes place in the previous code:
int i = 0;
int j = 0;
while(i < total_files)
{
printf("File Name: %s\n", filenames_arr[i]);
printf("String Length: %d\n", strlen(filenames_arr[i]));
while(filenames_arr[i] != '\0')
{
printf("Checking filenames_arr[%d][%d]\n", i, j);
if(isalpha((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isalpha\n");
printf("Found: %c\n", filenames_arr[i][j]);
}
else if(isdigit((unsigned char)filenames_arr[i][j]) != 0)
{
printf("In isdigit\n");
printf("Found: %d\n", filenames_arr[i][j]);
}
j++;
}
printf("-------------------------------------------\n");
i++;
j = 0;
}
How do I sort a 2D array of alphanumeric character strings using qsort? What is it about qsort, or even my array setup that's causing my program to crash? Also, how does qsort work? I've tried searching forums and online course notes to find out whether or not qsort only sorts by looking just at the first character, all characters, or if it has problems with numbers. Thank you in advance!
UPDATE:
I made the following edits to my code. Its working much better, in that qsort no longer crashes program. But, qsort still isn't sorting. Here are the updates I made, followed by a screenshot of the results:
typedef struct{
char *filename;
}filedata;
int compare(const void *a, const void *b);
void readInFilenames();
int main(void){
readInFilenames();
system("pause");
}
int compare (const void *a, const void *b ) {
filedata *ia = (filedata *)a;
filedata *ib = (filedata *)b;
return strcmp(ia->filename, ib->filename);
}
readInFilenames(){
.
.
.
printf("# of files found: %d\n", total_files);
rewinddir(dir);
filedata fn_data[total_files];
total_files = 0;
printf("Original Array: \n");
while((pent = readdir(dir)) != NULL)
{
.
.
.
if((strcmp(pent->d_name, ".cproject") == 0) || (strcmp(pent->d_name, ".project") == 0))
{
continue;
}
else
{
fn_data[total_files].filename = malloc(file_size + 1);
strcpy(fn_data[total_files].filename, pent->d_name);
printf("%s\n", fn_data[total_files].filename);
total_files++;
}
}
closedir(dir);
printf("\n");
qsort(fn_data, total_files, sizeof(filedata), compare);
printf("Sorted Array:\n");
for(int i = 0; i < total_files; i++)
printf("%s\n", fn_data[i].filename);
printf("Finished!\n");
}
Click here to see sorting results
The list should print: f0.dat, f1.dat, f2.dat, f3.dat,...,f20.dat. But instead it prints: f0.dat, f1.dat, f10.dat, f11.dat,...,f9.dat.

OP has fixed code to cope with "qsort dynamic 2d char array with filenames" by enabling warnings and using #Snohdo advice.
Yet code is still doing a compare with strcmp() which only treat digits as characters and not numerically to achieve f1.dat, f2.dat, f3.dat,...,f20.dat order.
Following is a compare functions that looks for digits to invoke an alternate compare for numeric sub-strings. Variations on this compare can be made by OP to suit detailed coding goals.
int AdamsOrder(const char *s1, const char *s2) {
// Compare as `unsigned char` as that is `strcmp()` behavior. C11 7.24.1 3
const unsigned char *us1 = (const unsigned char *) s1;
const unsigned char *us2 = (const unsigned char *) s2;
while (*us1 && *us2) {
if (isdigit(*us1) && isdigit(*us2)) {
char *end; // dummy
unsigned long long l1 = strtoull(us1, &end, 10); // Parse for a number
unsigned long long l2 = strtoull(us2, &end, 10);
if (l1 > l2) return 1;
if (l1 < l2) return -1;
// Continue on treating as text. OP needs to decide how to handle ties: "0001" vs "1"
}
if (*us1 > *us2) return 1;
if (*us1 < *us2) return -1;
us1++;
us2++;
}
// At this point, at least one string ended (i.e. points to '\0').
// The return statement below will behave as follows:
// If a string ended, *us1/2 will be 0. Let an unfinished one be X > 0.
// First string ended : ( 0 > X ) - ( 0 < X ) = false - true = 0 - 1 = -1
// Second string ended: ( X > 0 ) - ( X < 0 ) = true - false = 1 - 0 = 1
// Both strings ended : ( 0 > 0 ) - ( 0 < 0 ) = false - false = 0 - 0 = 0
return (*us1 > *us2) - (*us1 < *us2);
}

C - Get random words from text a file

I have a text file which contains a list of words in a precise order.
I'm trying to create a function that return an array of words from this file. I managed to retrieve words in the same order as the file like this:
char *readDict(char *fileName) {
int i;
char * lines[100];
FILE *pf = fopen ("francais.txt", "r");
if (pf == NULL) {
printf("Unable to open the file");
} else {
for (i = 0; i < 100; i++) {
lines[i] = malloc(128);
fscanf(pf, "%s", lines[i]);
printf("%d: %s\n", i, lines[i]);
}
fclose(pf);
return *lines;
}
return "NULL";
}
My question is: How can I return an array with random words from the text file; Not as the file words order?
The file looks like this:
exemple1
exemple2
exemple3
exemple4

Reservoir sampling allows you to select a random number of elements from a stream of indeterminate size. Something like this could work (although untested):
char **reservoir_sample(const char *filename, int count) {
FILE *file;
char **lines;
char buf[LINE_MAX];
int i, n;
file = fopen(filename, "r");
lines = calloc(count, sizeof(char *));
for (n = 1; fgets(buf, LINE_MAX, file); n++) {
if (n <= count) {
lines[n - 1] = strdup(buf);
} else {
i = random() % n;
if (i < count) {
free(lines[i]);
lines[i] = strdup(buf);
}
}
}
fclose(file);
return lines;
}
This is "Algorithm R":
Read the first count lines into the sample array.
For each subsequent line, replace a random element of the sample array with probability count / n, where n is the line number.
At the end, the sample contains a set of random lines. (The order is not uniformly random, but you can fix that with a shuffle.)

If each line of the file contains one word, one possibility would be to open the file and count the number of lines first. Then rewind() the file stream and select a random number, sel, in the range of the number of words in the file. Next, call fgets() in a loop to read sel words into a buffer. The last word read can be copied into an array that stores the results. Rewind and repeat for each word desired.
Here is a program that uses the /usr/share/dict/words file that is typical on Linux systems. Note that if the number of lines in the file is greater than RAND_MAX (the largest number that can be returned by rand()), words with greater line numbers will be ignored. This number can be as small as 32767. In the GNU C Library RAND_MAX is 2147483647.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define MAX_WORD 100
#define NUM_WORDS 10
int main(void)
{
/* Open words file */
FILE *fp = fopen("/usr/share/dict/words", "r");
if (fp == NULL) {
perror("Unable to locate word list");
exit(EXIT_FAILURE);
}
/* Count words in file */
char word[MAX_WORD];
long wc = 0;
while (fgets(word, sizeof word, fp) != NULL) {
++wc;
}
/* Store random words in array */
char randwords[NUM_WORDS][MAX_WORD];
srand((unsigned) time(NULL));
for (size_t i = 0; i < NUM_WORDS; i++) {
rewind(fp);
int sel = rand() % wc + 1;
for (int j = 0; j < sel; j++) {
if (fgets(word, sizeof word, fp) == NULL) {
perror("Error in fgets()");
}
}
strcpy(randwords[i], word);
}
if (fclose(fp) != 0) {
perror("Unable to close file");
}
/* Display results */
for (size_t i = 0; i < NUM_WORDS; i++) {
printf("%s", randwords[i]);
}
return 0;
}
Program output:
biology's
lists
revamping
slitter
loftiness's
concur
solemnity's
memories
winch's
boosting
If blank lines in input are a concern, the selection loop can test for them and reset to select another word when they occur:
/* Store random words in array */
char randwords[NUM_WORDS][MAX_WORD];
srand((unsigned) time(NULL));
for (size_t i = 0; i < NUM_WORDS; i++) {
rewind(fp);
int sel = rand() % wc + 1;
for (int j = 0; j < sel; j++) {
if (fgets(word, sizeof word, fp) == NULL) {
perror("Error in fgets()");
}
}
if (word[0] == '\n') { // if line is blank
--i; // reset counter
continue; // and select another one
}
strcpy(randwords[i], word);
}
Note that if a file contains only blank lines, with the above modification the program would loop forever; it may be safer to count the number of blank lines selected in a row and skip until some reasonable threshold is reached. Better yet to verify that at least one line of the input file is not blank during the initial line-count:
/* Count words in file */
char word[MAX_WORD];
long wc = 0;
long nonblanks = 0;
while (fgets(word, sizeof word, fp) != NULL) {
++wc;
if (word[0] != '\n') {
++nonblanks;
}
}
if (nonblanks == 0) {
fprintf(stderr, "Input file contains only blank lines\n");
exit(EXIT_FAILURE);
}

Can't eliminate one character in my array while parsing it even though I handle that character

So this is my second time adapting my code to fscanf to get what I want. I threw some comments next to the output. The main issue I am having is that the one null character or space is getting added into the array. I have tried to check for the null char and the space in the string variable and it does not catch it. I am a little stuck and would like to know why my code is letting that one null character through?
Part where it is slipping up "Pardon, O King," output:King -- 1; -- 1
so here it parses king a word and then ," goes through the strip function and becomes \0, then my check later down the road allows it through??
Input: a short story containing apostrophes and commas (the lion's rock. First, the lion woke up)
//Output: Every unique word that shows up with how many times it shows up.
//Lion -- 1
//s - 12
//lion -- 8
//tree -- 2
//-- 1 //this is the line that prints a null char?
//cub -- //3 it is not a space! I even check if it is \0 before entering
//it into the array. Any ideas (this is my 2nd time)?
//trying to rewrite my code around a fscanf function.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
//Remove non-alpha numeric characters
void strip_word(char* string)
{
char* string_two = calloc(80, sizeof(char));
int i;
int c = 0;
for(i = 0; i < strlen(string); i++)
{
if(isalnum(string[i]))
{
string_two[c] = string[i];
++c;
}
}
string_two[i] = '\0';
strcpy(string, string_two);
free(string_two);
}
//Parse through file
void file_parse(FILE* text_file, char*** word_array, int** count_array, int* total_count, int* unique_count)
{
int mem_Size = 8;
int is_unique = 1;
char** words = calloc(mem_Size, sizeof(char *)); //Dynamically allocate array of size 8 of char*
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
int* counts = calloc(mem_Size, sizeof(int)); //Dynamically allocate array of size 8 of int
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);
char* string;
while('A')
{
is_unique = 1;
fscanf(text_file, " ,");
fscanf(text_file, " '");
while(fscanf(text_file, "%m[^,' \n]", &string) == 1) //%m length modifier
{
is_unique = 1;
strip_word(string);
if(string == '\0') continue; //if the string is empty move to next iteration
else
{
int i = 0;
++(*total_count);
for(i = 0; i < (*unique_count); i++)
{
if(strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if(is_unique)
{
++(*unique_count);
if((*unique_count) >= mem_Size)
{
mem_Size = mem_Size*2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if(words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count)-1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count)-1], string);
counts[(*unique_count) - 1] = 1;
}
}
free(string);
}
if(feof(text_file)) break;
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;
}
int main(int argc, char* argv[])
{
if(argc < 2 || argc > 3) //Checks if too little or too many args
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}
FILE * text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");
}
int total_count = 0;
int unique_count = 0;
char** word_array;
int* count_array;
file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);
fclose(text_file);
int i;
if(argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for(i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for(i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
for(i = 0; i < unique_count; i++)
{
free(word_array[i]);
}
free(word_array);
free(count_array);
return EXIT_SUCCESS;
}

I'm not sure quite what's going wrong with your code. I'm working on macOS Sierra 10.12.3 with GCC 6.3.0, and the local fscanf() does not support the m modifier. Consequently, I modified the code to use a fixed size string of 80 bytes. When I do that (and only that), your program runs without obvious problem (certainly on the input "the lion's rock. First, the lion woke up").
I also think that the while ('A') loop (which should be written conventionally while (1) if it is used at all) is undesirable. I wrote a function read_word() which gets the next 'word', including skipping blanks, commas and quotes, and use that to control the loop. I left your memory allocation in file_parse() unchanged. I did get rid of the memory allocation in strip_word() (eventually — it worked OK as written too).
That left me with:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
static void strip_word(char *string)
{
char string_two[80];
int i;
int c = 0;
int len = strlen(string);
for (i = 0; i < len; i++)
{
if (isalnum(string[i]))
string_two[c++] = string[i];
}
string_two[c] = '\0';
strcpy(string, string_two);
}
static int read_word(FILE *fp, char *string)
{
if (fscanf(fp, " ,") == EOF ||
fscanf(fp, " '") == EOF ||
fscanf(fp, "%79[^,' \n]", string) != 1)
return EOF;
return 0;
}
static void file_parse(FILE *text_file, char ***word_array, int **count_array, int *total_count, int *unique_count)
{
int mem_Size = 8;
char **words = calloc(mem_Size, sizeof(char *));
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
int *counts = calloc(mem_Size, sizeof(int));
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);
char string[80];
while (read_word(text_file, string) != EOF)
{
int is_unique = 1;
printf("Got [%s]\n", string);
strip_word(string);
if (string[0] == '\0')
continue;
else
{
int i = 0;
++(*total_count);
for (i = 0; i < (*unique_count); i++)
{
if (strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if (is_unique)
{
++(*unique_count);
if ((*unique_count) >= mem_Size)
{
mem_Size = mem_Size * 2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if (words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
exit(EXIT_FAILURE);
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count) - 1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count) - 1], string);
counts[(*unique_count) - 1] = 1;
}
}
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;
}
int main(int argc, char *argv[])
{
if (argc < 2 || argc > 3)
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}
FILE *text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");
return EXIT_FAILURE;
}
int total_count = 0;
int unique_count = 0;
char **word_array = 0;
int *count_array = 0;
file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);
fclose(text_file);
if (argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for (int i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for (int i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
for (int i = 0; i < unique_count; i++)
free(word_array[i]);
free(word_array);
free(count_array);
return EXIT_SUCCESS;
}
When run on the data file:
the lion's rock. First, the lion woke up
the output was:
Allocated initial parallel arrays of size 8.
Got [the]
Got [lion]
Got [s]
Got [rock.]
Got [First]
Got [the]
Got [lion]
Got [woke]
Got [up]
All done (successfully read 9 words; 7 unique words).
All words (and corresponding counts) are:
the -- 2
lion -- 2
s -- 1
rock -- 1
First -- 1
woke -- 1
up -- 1
When the code was run on your text, including double quotes, like this:
$ echo '"Pardon, O King,"' | cw37 /dev/stdin
Allocated initial parallel arrays of size 8.
Got ["Pardon]
Got [O]
Got [King]
Got ["]
All done (successfully read 3 words; 3 unique words).
All words (and corresponding counts) are:
Pardon -- 1
O -- 1
King -- 1
$
It took a little finnagling of the code. If there isn't an alphabetic character, your code still counts it (because of subtle problems in strip_word()). That would need to be handled by checking strip_word() more carefully; you test if (string == '\0') which checks (belatedly) whether memory was allocated where you need if (string[0] == '\0') to test whether the string is empty.
Note that the code in read_word() would be confused into reporting EOF if there were two commas in a row, or an apostrophe followed by a comma (though it handles a comma followed by an apostrophe OK). Fixing that is fiddlier; you'd probably be better off using a loop with getc() to read a string of characters. You could even use that loop to strip non-alphabetic characters without needing a separate strip_word() function.
I am assuming you've not yet covered structures yet. If you had covered structures, you'd use an array of a structure such as struct Word { char *word; int count; }; and allocate the memory once, rather than needing two parallel arrays.

Search not working need some advice

Working on a binary search. The code below should explain what I'm trying to do. The user inputs a word and then a binary search is implemented to search a wordlist. Problem is the binary search. It's running but it's not finding the word in the wordlist even though I know its there. I know the code could be better but it should work. Anyone shed any light?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char dictionary[400000][45];
int main(void)
{
FILE infile;
int i=0;
int num;
int index;
char buffer[45];
char userword[45];
fp1 = fopen("C:/Users/Aaron/ProgrammingAssignment/dictionary.txt","rb");
if (fp1 == NULL)
{
printf("The dictionary file did not open\n");
exit(0);
}
else
{
printf("Dictionary file is open\n");
}
while(fgets(buffer,45, fp1)!=NULL)
{
strcpy(wordlist[i],buffer);
//printf("Line %d: %s",i,wordlist[i]);
i++;
}
printf("Your wordlist is now in the dictionary array");
do
{
//fscanf(fp2,"%s", userword);
printf("Enter a word to be spell checked: ");
fgets(userword, 43, stdin);
//and do a binary search
index = BinarySearch(userword,0,i);
if(index > -1)
printf("%s was found in the wordlist", userword);
else
printf("%s was not found in the dictionary", wordcheck);
}
while(wordlist != NULL);
if(index>-1) //The word was found
{
printf("That is correctly spelled\n");
}
else
{
printf("That word is spelt wrong\n");
}
return 0;
}
int BinarySearch(const char userword[],int left,int right)
{ int high = 400000;
int low = 0;
int target;
int count = 0;
while (high >= low)
{ target = low + ((high - low) / 2);
// show tries for demonstration only
printf("%d, ",target);
if (strcmp(userword, wordlist[target]) < 0)
high = target -1;
else if (strcmp(userword, wordlist[target]) > 0)
low = target + 1;
else
return target;
}
return -1;
}

Your binary search function is ignoring the values left and right that are passed in.
It shouldn't.
It should probably start:
int BinarySearch(const char userword[], int left, int right)
{
int high = right;
int low = left;
You should close the dictionary after you finish reading it.
You need to consider whether right is the index of the last valid element or 'one after the index of the last element'. This might mean you need to pass i - 1 in the call to the function.
You should consider calling strcmp() once and capturing its return value; it is relatively expensive:
int rc = strcmp(userword, wordlist[target]);
if (rc == 0)
return target;
else if (rc < 0)
high = target - 1;
else
low = target - 1;

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

some quick help in comparing 2 files using binary search - c

Related

C: converting string array to float array OR read file with floats as floats not strings

qsort dynamic 2d char array with alphanumeric filenames - C program

C - Get random words from text a file

Can't eliminate one character in my array while parsing it even though I handle that character

Search not working need some advice

Categories

Resources