I am working with hashtables for the first time and I think I have a basic understanding of how they work. I am using a hashtable to check to see if a word exists in a file. The program takes in a "dictionary" file and a word check file. The program works fine when I have a small dictionary but when I use a very large one, the words get overwritten. I was hoping to get some insight as to why. Here is my code:
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <pthread.h>
#include <tgmath.h>
#include <ctype.h>
#include "hashtable_constants.h"
#define HASH_SIZE 500
#define MAX_WORD_SIZE 50
struct hashTable {
int collisions;
char** words;
};
struct hashTable hashTables[HASH_SIZE];
int hashKey(char * str)
{
int key = 0;
for(int j = 0; j <= 51; j++)
{
if(str[j] == '\0')
break;
key += (int)str[j];
}
key = key % HASH_SIZE;
return key;
}
int main(int argc, char** argv)
{
if(argc > 3)
{
fprintf(stderr, "Too many arguments!\n");
return -1;
}
else if(argc < 3)
{
fprintf(stderr, "Not enough arguments!\n");
return -1;
}
FILE *dictionary = fopen(argv[1], "r");
FILE *wordCheck = fopen(argv[2], "r");
if(dictionary == NULL || wordCheck == NULL ) //ensure input file exists
{
fprintf(stderr, "Error accessing input files\n");
return -1;
}
for(int i = 0; i < HASH_SIZE; i++)
{
hashTables[i].collisions = 0;
hashTables[i].words = malloc(HASH_SIZE * MAX_WORD_SIZE);
}
struct stat fileStat1;
struct stat fileStat2;
stat(argv[1], &fileStat1);
stat(argv[2], &fileStat2);
char* dictBuffer = (char*)malloc(fileStat1.st_size + 1);
char* wordCheckBuff = (char*)malloc(fileStat2.st_size + 1);
if (dictBuffer == NULL || wordCheckBuff == NULL)
{
fprintf (stderr, "Memory error");
return -1;
}
fread(dictBuffer, 1, (int)fileStat1.st_size, dictionary);
fread(wordCheckBuff, 1, (int)fileStat2.st_size, wordCheck);
char* word = malloc(MAX_WORD_SIZE + 1);
int count = 0;
for(int i = 0; i < (int)fileStat1.st_size; i++)
{
char c = dictBuffer[i];
if(isspace(c))
{
word[count] = '\0';
char* wordToAdd = word;
int key = hashKey(wordToAdd);
int collisionIndex = hashTables[key].collisions;
hashTables[key].words[collisionIndex] = wordToAdd;
hashTables[key].collisions++;
count = 0;
free(word);
word = malloc(MAX_WORD_SIZE + 1);
//printf("Added: %s to hashtable at key: %d\n",word,key);
}
else
{
word[count] = c;
count++;
}
}
count = 0;
for(int i = 0; i < (int)fileStat2.st_size; i++)
{
char c = wordCheckBuff[i];
if(isspace(c))
{
word[count] = '\0';
char* wordToCheck = word;
int key = hashKey(wordToCheck);
int collisionIndex = hashTables[key].collisions;
int foundWord = 0;
for(int j = 0; j < collisionIndex; j++)
{
if(hashTables[key].words[j] == wordToCheck)
{
printf("%s == %s\n",hashTables[key].words[j], wordToCheck);
foundWord = 1;
break;
}
}
if(foundWord == 0)
printf("Not a word: %s\n", wordToCheck);
/*else
printf("Key: %d -- Is a word: %s\n",key, word);*/
free(word);
word = malloc(MAX_WORD_SIZE + 1);
count = 0;
}
else
{
word[count] = c;
count++;
}
}
for(int i = 0; i < HASH_SIZE; i++)
free(hashTables[i].words);
free(word);
fclose(dictionary);
fclose(wordCheck);
printf("done\n");
return 0;
}
On problem is that in the line:
hashTables[key].words[collisionIndex] = wordToAdd;
You add 'wordToAdd' to the table.
But wordToAdd is equal to word. A few lines later you call
free(word);
So the hash table now holds a pointer to freed memory.
This will lead to all sorts of undefined behaviour in the program, quite possibly seg-faults too. Also it's very likely that since the memory is now 'free', a subsequent call to malloc might return this same pointer again - which you will then fill with another word. Hence you see the overwriting of strings.
You need to review how you use 'malloc' / 'free' generally in the program. If you want a pointer to refer to a valid string, you cannot call 'free' on that pointer during the intended lifetime of that string.
What you want to do is malloc each string, and add the pointers to the hashtable. Then when you've finished with the hashtable, and no longer need the string data, then call 'free' on all the pointers contained within it. In your case, this will probably need to be in your cleanup code at the end of your program's execution.
Related
I want to read all .txt files in the directory and add those file names to an array. Catching the text files part is okay but I am having a problem storing those file names inside an array. What is the mistake I've done here? This is my code.
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <strings.h>
int main()
{
DIR *p;
struct dirent *pp;
p = opendir ("./");
char file_list[10][10];
char shades[10][10];
int i = 0;
if (p != NULL)
{
while ((pp = readdir (p))!=NULL) {
int length = strlen(pp->d_name);
if (strncmp(pp->d_name + length - 4, ".txt", 4) == 0) {
puts (pp->d_name);
strcpy(shades[i], pp->d_name);
}
}
i = i + 1;
(void) closedir (p);
for(int i=0; i<4; i++){
printf("\n %s", &shades[i]);
}
}
return(0);
}
What seems to be a problem here is that 'strings' in C works much different than in C++ or C# or Python.
To make it work you'll need to alloc memory for each string.
If I understood correctly what you want to do, here's an example:
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#define FILE_LIST_MAX 10
int main()
{
DIR *p = opendir("./");
struct dirent *pp;
char **file_list = malloc(FILE_LIST_MAX * sizeof(char*));
int i = 0;
if (p == NULL) {
printf("Could not open current directory" );
return 0;
}
while ((pp = readdir(p)) != NULL) {
int length = strlen(pp->d_name);
printf ("%s\n", pp->d_name);
if(length > 4 && memcmp(pp->d_name + length - 4, ".txt", 4) == 0) {
char filename[length];
sprintf(filename, "%s", pp->d_name);
file_list[i] = malloc(length * sizeof(char));
strcpy(file_list[i], filename);
i++;
}
}
(void) closedir(p);
printf("\n.txt filenames within array");
for(int j = 0; j < i; j++) {
printf("\n%s", file_list[j]);
}
for(int j = 0; j < i; j++) {
free(file_list[j]);
}
free(file_list);
return(0);
}
It's not perfect though as you must manually change file_list capacity in FILE_LIST_MAX
Here's an example output from executing this code:
Much better approach would be implementing a dynamic array of chars which will automatically resizes as necessary.
I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* loadFile(char* fileName)
{
FILE *inputFile;
inputFile = fopen(fileName, "r");
//finds the end of the file
fseek(inputFile, 0, SEEK_END);
//stores the size of the file
int size = ftell(inputFile);
//Sets the scan to the start of the file
fseek(inputFile, 0, SEEK_SET);
char *documentStore = (char*)malloc(size);
int i = 0, c;
while((c = fgetc(inputFile)) != EOF)
{
documentStore[i] = c;
i++;
}
return documentStore;
}
void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
int uniqueWordCount = 0;
int lengthWordStore = 10;
int i = 0;
int recording = 0;
char wordBuffer[50];
int wordBufferCount = 0;
int isWordPresent = 0;
while(fileContent[i] != EOF)
{
//To allocate more memory incase the structure is full
if(uniqueWordCount == lengthWordStore)
{
lengthWordStore += 10;
char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
importantWords = newWordStore;
frequencyWords = newFrequencyStore;
}
printf("%s", wordBuffer);
//Conditions to fill if its a word
if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
{
wordBuffer[0] = fileContent[i];
recording = 1;
}else if(fileContent[i] >= 'a' && fileContent[i] <= 'z' && recording == 1)
{
//each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
{
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else {
//Adding a terminating character so that it strcpy only copies until that point
wordBuffer[wordBufferCount + 1] = '\0';
recording = 0;
//check to see if that word is in the array already, and if it is, it will just increment the frequency
for(int j = 0; j < uniqueWordCount; j++){
if(strcmp(wordBuffer, importantWords[j]) == 0)
{
frequencyWords[j] += 1;
isWordPresent = 1;
}
}
//if its not present, it should assign it to the structure
if(isWordPresent == 0)
{
char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
strcpy(wordStore, wordBuffer);
uniqueWordCount += 1;
importantWords[uniqueWordCount] = wordStore;
frequencyWords[uniqueWordCount] = 1;
}
}
i++;
}
}
int main() {
char fileName[50];
char *fileContent;
char **importantWords = (char**)malloc(10*sizeof(char**));
int *frequencyWords = (int*)malloc(10*sizeof(int));
printf("Please input the full file path: ");
scanf("%s", fileName);
fileContent = loadFile(fileName);
countImportantWords(fileContent, importantWords, frequencyWords);
int i = 0;
while(importantWords[i] != '\0')
{
printf("%s %d", importantWords[i], frequencyWords[i]);
i++;
}
return 0;
}
I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)
You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
char* data = NULL;
FILE* file = fopen(filename, "r");
if(file == NULL){
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file)+1;
fseek(file, 0, SEEK_SET);
data = (char*)malloc(size);
if(data == NULL){
return NULL;
}
fgets(data, (int)size, file);
return data;
}
typedef struct uppercase_t{
char** word;
int count;
}uppercase;
void copy(uppercase* u,char* token){
size_t length = strlen(token);
u->word[u->count] = (char*)malloc(length+1);
if(u->word[u->count] == NULL){
return;
}
strcpy(u->word[u->count], token);
++u->count;
}
void createuppercasedata(uppercase* u, char* data){
const char delimeter[] = " ";
char* token = strtok(data, delimeter);
if(token == NULL){
return;
}
u->word = (char**)malloc(u->count+1);
if(u->word == NULL){
return;
}
if(isupper(token[0])){
copy(u,token);
}
while(token != NULL){
token = strtok(0, delimeter);
if(token != NULL)
if(isupper(token[0])) {
char** reallocated = (char**)realloc(u->word, u->count+1);
if(reallocated == NULL){
return;
}
u->word = reallocated;
copy(u, token);
}
}
}
void destroyuppercasedata(uppercase* u){
for(int index = 0; index < u->count; ++index){
free(u->word[index]);
}
free(u->word);
}
int main(){
char filename[] = "textfile";
char* data = readfile(filename);
if(data == NULL){
return -1;
}
uppercase u = {0};
createuppercasedata(&u, data);
printf("found %i uppercase words\n",u.count);
for(int index = 0; index < u.count; ++index){
printf("%s\n", u.word[index]);
}
destroyuppercasedata(&u);
free(data);
}
The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.
This was the test file I used.
textfile
How many Uppercase words can Be Found In this text File the answer should be Seven
And this was the output to the terminal:
How
Uppercase
Be
Found
In
File
Seven
So this is my second time adapting my code to fscanf to get what I want. I threw some comments next to the output. The main issue I am having is that the one null character or space is getting added into the array. I have tried to check for the null char and the space in the string variable and it does not catch it. I am a little stuck and would like to know why my code is letting that one null character through?
Part where it is slipping up "Pardon, O King," output:King -- 1; -- 1
so here it parses king a word and then ," goes through the strip function and becomes \0, then my check later down the road allows it through??
Input: a short story containing apostrophes and commas (the lion's rock. First, the lion woke up)
//Output: Every unique word that shows up with how many times it shows up.
//Lion -- 1
//s - 12
//lion -- 8
//tree -- 2
//-- 1 //this is the line that prints a null char?
//cub -- //3 it is not a space! I even check if it is \0 before entering
//it into the array. Any ideas (this is my 2nd time)?
//trying to rewrite my code around a fscanf function.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
//Remove non-alpha numeric characters
void strip_word(char* string)
{
char* string_two = calloc(80, sizeof(char));
int i;
int c = 0;
for(i = 0; i < strlen(string); i++)
{
if(isalnum(string[i]))
{
string_two[c] = string[i];
++c;
}
}
string_two[i] = '\0';
strcpy(string, string_two);
free(string_two);
}
//Parse through file
void file_parse(FILE* text_file, char*** word_array, int** count_array, int* total_count, int* unique_count)
{
int mem_Size = 8;
int is_unique = 1;
char** words = calloc(mem_Size, sizeof(char *)); //Dynamically allocate array of size 8 of char*
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
int* counts = calloc(mem_Size, sizeof(int)); //Dynamically allocate array of size 8 of int
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);
char* string;
while('A')
{
is_unique = 1;
fscanf(text_file, " ,");
fscanf(text_file, " '");
while(fscanf(text_file, "%m[^,' \n]", &string) == 1) //%m length modifier
{
is_unique = 1;
strip_word(string);
if(string == '\0') continue; //if the string is empty move to next iteration
else
{
int i = 0;
++(*total_count);
for(i = 0; i < (*unique_count); i++)
{
if(strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if(is_unique)
{
++(*unique_count);
if((*unique_count) >= mem_Size)
{
mem_Size = mem_Size*2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if(words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count)-1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count)-1], string);
counts[(*unique_count) - 1] = 1;
}
}
free(string);
}
if(feof(text_file)) break;
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;
}
int main(int argc, char* argv[])
{
if(argc < 2 || argc > 3) //Checks if too little or too many args
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}
FILE * text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");
}
int total_count = 0;
int unique_count = 0;
char** word_array;
int* count_array;
file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);
fclose(text_file);
int i;
if(argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for(i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for(i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
for(i = 0; i < unique_count; i++)
{
free(word_array[i]);
}
free(word_array);
free(count_array);
return EXIT_SUCCESS;
}
I'm not sure quite what's going wrong with your code. I'm working on macOS Sierra 10.12.3 with GCC 6.3.0, and the local fscanf() does not support the m modifier. Consequently, I modified the code to use a fixed size string of 80 bytes. When I do that (and only that), your program runs without obvious problem (certainly on the input "the lion's rock. First, the lion woke up").
I also think that the while ('A') loop (which should be written conventionally while (1) if it is used at all) is undesirable. I wrote a function read_word() which gets the next 'word', including skipping blanks, commas and quotes, and use that to control the loop. I left your memory allocation in file_parse() unchanged. I did get rid of the memory allocation in strip_word() (eventually — it worked OK as written too).
That left me with:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
static void strip_word(char *string)
{
char string_two[80];
int i;
int c = 0;
int len = strlen(string);
for (i = 0; i < len; i++)
{
if (isalnum(string[i]))
string_two[c++] = string[i];
}
string_two[c] = '\0';
strcpy(string, string_two);
}
static int read_word(FILE *fp, char *string)
{
if (fscanf(fp, " ,") == EOF ||
fscanf(fp, " '") == EOF ||
fscanf(fp, "%79[^,' \n]", string) != 1)
return EOF;
return 0;
}
static void file_parse(FILE *text_file, char ***word_array, int **count_array, int *total_count, int *unique_count)
{
int mem_Size = 8;
char **words = calloc(mem_Size, sizeof(char *));
if (words == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
int *counts = calloc(mem_Size, sizeof(int));
if (counts == NULL)
{
fprintf(stderr, "ERROR: calloc() failed!");
}
printf("Allocated initial parallel arrays of size 8.\n");
fflush(stdout);
char string[80];
while (read_word(text_file, string) != EOF)
{
int is_unique = 1;
printf("Got [%s]\n", string);
strip_word(string);
if (string[0] == '\0')
continue;
else
{
int i = 0;
++(*total_count);
for (i = 0; i < (*unique_count); i++)
{
if (strcmp(string, words[i]) == 0)
{
counts[i]++;
is_unique = 0;
break;
}
}
if (is_unique)
{
++(*unique_count);
if ((*unique_count) >= mem_Size)
{
mem_Size = mem_Size * 2;
words = realloc(words, mem_Size * sizeof(char *));
counts = realloc(counts, mem_Size * sizeof(int));
if (words == NULL || counts == NULL)
{
fprintf(stderr, "ERROR: realloc() failed!");
exit(EXIT_FAILURE);
}
printf("Re-allocated parallel arrays to be size %d.\n", mem_Size);
fflush(stdout);
}
words[(*unique_count) - 1] = calloc(strlen(string) + 1, sizeof(char));
strcpy(words[(*unique_count) - 1], string);
counts[(*unique_count) - 1] = 1;
}
}
}
printf("All done (successfully read %d words; %d unique words).\n", *total_count, *unique_count);
fflush(stdout);
*word_array = words;
*count_array = counts;
}
int main(int argc, char *argv[])
{
if (argc < 2 || argc > 3)
{
fprintf(stderr, "ERROR: Invalid Arguements\n");
return EXIT_FAILURE;
}
FILE *text_file = fopen(argv[1], "r");
if (text_file == NULL)
{
fprintf(stderr, "ERROR: Can't open file");
return EXIT_FAILURE;
}
int total_count = 0;
int unique_count = 0;
char **word_array = 0;
int *count_array = 0;
file_parse(text_file, &word_array, &count_array, &total_count, &unique_count);
fclose(text_file);
if (argv[2] == NULL)
{
printf("All words (and corresponding counts) are:\n");
fflush(stdout);
for (int i = 0; i < unique_count; i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
else
{
printf("First %d words (and corresponding counts) are:\n", atoi(argv[2]));
fflush(stdout);
for (int i = 0; i < atoi(argv[2]); i++)
{
printf("%s -- %d\n", word_array[i], count_array[i]);
fflush(stdout);
}
}
for (int i = 0; i < unique_count; i++)
free(word_array[i]);
free(word_array);
free(count_array);
return EXIT_SUCCESS;
}
When run on the data file:
the lion's rock. First, the lion woke up
the output was:
Allocated initial parallel arrays of size 8.
Got [the]
Got [lion]
Got [s]
Got [rock.]
Got [First]
Got [the]
Got [lion]
Got [woke]
Got [up]
All done (successfully read 9 words; 7 unique words).
All words (and corresponding counts) are:
the -- 2
lion -- 2
s -- 1
rock -- 1
First -- 1
woke -- 1
up -- 1
When the code was run on your text, including double quotes, like this:
$ echo '"Pardon, O King,"' | cw37 /dev/stdin
Allocated initial parallel arrays of size 8.
Got ["Pardon]
Got [O]
Got [King]
Got ["]
All done (successfully read 3 words; 3 unique words).
All words (and corresponding counts) are:
Pardon -- 1
O -- 1
King -- 1
$
It took a little finnagling of the code. If there isn't an alphabetic character, your code still counts it (because of subtle problems in strip_word()). That would need to be handled by checking strip_word() more carefully; you test if (string == '\0') which checks (belatedly) whether memory was allocated where you need if (string[0] == '\0') to test whether the string is empty.
Note that the code in read_word() would be confused into reporting EOF if there were two commas in a row, or an apostrophe followed by a comma (though it handles a comma followed by an apostrophe OK). Fixing that is fiddlier; you'd probably be better off using a loop with getc() to read a string of characters. You could even use that loop to strip non-alphabetic characters without needing a separate strip_word() function.
I am assuming you've not yet covered structures yet. If you had covered structures, you'd use an array of a structure such as struct Word { char *word; int count; }; and allocate the memory once, rather than needing two parallel arrays.
I'm currently writing a program that is doing what is a simple task; read a file line by line, parse it, and store the results into an array where the structure would be array[lineNumber][lineElement]. And for the most part, it's working, except for one odd issue that I've ran into.
In the code below, any access to the array that is housing the data outside of the while loop that's populating it, only returns the last entry. This occurs regardless of the key for lineNumber. Basically it acts like it's overwriting, even though within the while loop its accessible just fine. The only two items that I think could be at fault I've outlined in bold, although for char *processData[100];, it shouldn't be an issue as it's stored within an array that's declared outside the while loop (and if I remember right while loops shouldn't have scope?), and the other line **char **processArray[100];
**, it might be the double star for an array of pointers, but returning that to just one star introduces a whole wave of bugs, namely the aforementioned array structure breaks completely.
So in a nutshell, not being a C expert by any means and exhausting my resources for this issue, I wonder if the C coders here might have some advice as to what the heck is going on, and how I can get this to work as intended....if I even can.
As mentioned previously, the code.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
FILE *ifp;
char line[80];
int returnValue = 0;
//Open file
ifp = fopen("dataFile", "rt");
if (ifp == NULL) {
fprintf(stderr, "Can't open input file!\n");
returnValue = 1;
}
int lineCounter = 0;
char **processArray[100];
while(fgets(line, 80, ifp) != NULL) {
char *processData[100];
char *p = strtok(line, " ,\n");
int keyCounter = 0;
while (p != NULL) {
processData[keyCounter] = p;
p = strtok(NULL, " ,\n");
keyCounter++;
}
processArray[lineCounter] = processData;
printf("%d\n", lineCounter);
printf("Inside -> %s\n", processArray[0][0]);
lineCounter++;
}
printf("Outside %s\n", processArray[0][0]);
fclose(ifp);
int i;
int j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
printf("%d-%d => %s\n ", i, j, processArray[i][j]);
}
}
return returnValue;
}
[Just about] everything gets overwritten on the outer while loop, so only the last processed line remains. The intermediate results must be preserved
I've fixed the program with annotations as to the bugs. The style is #if 0 /* original code */ #else /* fixed code */ #endif
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main(void)
{
FILE *ifp;
char line[80];
int returnValue = 0;
//Open file
ifp = fopen("dataFile", "rt");
if (ifp == NULL) {
fprintf(stderr, "Can't open input file!\n");
returnValue = 1;
}
int lineCounter = 0;
char **processArray[100];
// NOTE/BUG: things get lost on each iteration of this loop
while(fgets(line, 80, ifp) != NULL) {
char *processData[100];
char *p = strtok(line, " ,\n");
int keyCounter = 0;
while (p != NULL) {
// NOTE/BUG: p will get overwritten -- so we must save the string
#if 0
processData[keyCounter] = p;
#else
processData[keyCounter] = strdup(p);
#endif
p = strtok(NULL, " ,\n");
keyCounter++;
}
// NOTE/BUG: processData must be duplicated -- it is overwritten
// on the outer loop
#if 0
processArray[lineCounter] = processData;
#else
char **pA = malloc(sizeof(char *) * keyCounter);
processArray[lineCounter] = pA;
for (int copyidx = 0; copyidx < keyCounter; ++copyidx)
pA[copyidx] = processData[copyidx];
#endif
printf("%d\n", lineCounter);
printf("Inside -> %s\n", processArray[0][0]);
lineCounter++;
}
printf("Outside %s\n", processArray[0][0]);
fclose(ifp);
int i;
int j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
printf("%d-%d => %s\n ", i, j, processArray[i][j]);
}
}
return returnValue;
}
processData is being allocated on the stack and the memory address is not valid after you leave the while loop, regardless of you storing it in processArray. You need to allocate from the heap instead (using malloc or some other memory allocation function)
I am novice to C programming and I have written a code to a requirement specification but I am consistently getting Segmentation Fault and unable to proceed ahead.
If the file name is 'code.c' and it runs with an error of not passing the argument (filename). But if the filename is passed, we land in Segmentation Fault.
Any help/suggestions will be appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
struct _data
{
char *firstName;
char *lastName;
long number;
};
// SCAN FILE
int SCAN(FILE *(*stream))
{
*stream = fopen("inputFile.data", "r");
int ch = 0, lines = 0;
while (!feof(*stream))
{
ch = fgetc(*stream);
if (ch == '\n')
{
lines++;
}
}
return lines;
}
// LOAD FILE
struct _data *LOAD(FILE *stream, int size)
{
int i;
size_t chrCount;
char *text, *number, *firstName, *lastName;
struct _data *BlackBox;
if ((BlackBox = (struct _data*)calloc(size, sizeof(struct _data))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
rewind(stream);
for (i = 0; i < size; i++)
{
getline(&text, &chrCount, stream);
firstName = strtok(text, " ");
lastName = strtok(text, " ");
number = strtok(NULL, "\n");
// Allocate memory for name part of struct.
if ((BlackBox[i].firstName = (char*)calloc(strlen(firstName), sizeof(char))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
if ((BlackBox[i].lastName = (char*)calloc(strlen(lastName), sizeof(char))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
strcpy(BlackBox[i].firstName, firstName);
strcpy(BlackBox[i].lastName, lastName);
BlackBox[i].number = atol(number);
}
fclose(stream);
return BlackBox;
}
void SEARCH(struct _data *BlackBox, char *name, int size, int inputs)
{
int i;
int found = 0;
char *search = " ";
char *firstName;
char *lastName;
if (inputs == 2)
{
firstName = strtok(name, search);
lastName = strtok(NULL, search);
}
printf("*******************************************\n");
if (inputs == 2)
{
for (i = 0; i < size; i++)
{
if (!strcasecmp(firstName, BlackBox[i].firstName) && !strcasecmp(firstName, BlackBox[i].firstName))
{
printf("The name was found at the %d entry.\n", i);
found = 1;
break;
}
}
}
else
{
for (i = 0; i < size; i++)
{
if (!strcasecmp(firstName, BlackBox[i].firstName) || !strcasecmp(firstName, BlackBox[i].firstName))
{
printf("The name was found at the %d entry.\n", i);
found = 1;
break;
}
}
}
if (found == 0)
{
printf("The name was NOT found.\n");
}
printf("*******************************************\n");
}
// FREE MEMORY
void FREE(struct _data *BlackBox, int size)
{
int i;
for (i = 0; i < size; i++)
{
free(BlackBox[i].firstName);
free(BlackBox[i].lastName);
}
free(BlackBox);
BlackBox = NULL;
}
// MAIN
int main(int argv, char **argc)
{
int size;
FILE *stream;
struct _data *BlackBox;
// argv == 1 WORKS, Below message is printed.
if (argv == 1)
{
printf("*******************************************\n");
printf("* You must include a name to search for. *\n");
printf("*******************************************\n");
}
// argv == 2 DOES NOT WORK, Segmentation Fault.
if (argv == 2)
{
size = SCAN (&stream);
BlackBox = LOAD(stream, size);
SEARCH(BlackBox, argc[1], size, 1);
}
if (argv == 3)
{
size = SCAN(&stream);
BlackBox = LOAD(stream, size);
SEARCH(BlackBox, argc[2], size, 2);
}
return 0;
}
You have a problem in this code:
firstName = strtok(text, " ");
lastName = strtok(text, " ");
number = strtok(NULL, "\n");
...
BlackBox[i].number = atol(number);
The second strtok() call should pass NULL as its first argument. As it is, the third strtok() call is certain to return NULL because the first call modifies text in such a way that the second consumes the whole thing (when tokenizing again from the beginning, as it erroneously does). You do not test for that, however, and as a result, atol() attempts to dereference a null pointer.
Update:
Additionally, as #chqrlie and later #JamesWilkins observed, you do not allocate sufficient space for BlackBox[i].firstName and BlackBox[i].lastName, as you need room for the string terminators as well. This is an entirely separate problem that could also produce a segfault. I like #chqrlie's suggestion to switch to strdup(), but it would be sufficient to just increase each allocation by one unit.
Update 2:
Furthermore, you have an issue with this line:
getline(&text, &chrCount, stream);
You do not initialize variable text before the first call, so it contains a junk value. The function allocates a buffer only when its first argument points to a NULL pointer; otherwise it writes the line to the buffer pointed to by the pointer obtained by dereferencing the first argument. Writing to a random location in memory certainly produces undefined behavior, which in practice often manifests as a segfault.
Moreover, unless you can rely on no line of the file being longer than the first, you also need to free the text pointer at the end of each loop iteration AND reset its value to NULL, so that getline() allocates a fresh buffer on the next iteration. If you do not free it on each iteration, then you need instead to free it after the end of the loop; else you will leak memory.
Try this (though I'm using Visual Studio on Windows). I added code to check for a missing '\n' on the last line, and also allowed for a variable number of search terms. I also increased the memory allocation for strings by 1 to account for the null terminating character. I noticed you are using getline(const char*..., which I think is GNU (Linux?), so I change that to fgets() just so I could compile and test it in VS (so you can change it back if you like). I put in various null checks as well, to be safer.
#include <iostream>
using namespace std;
struct _data
{
char *firstName;
char *lastName;
long number;
};
// SCAN FILE
int SCAN(FILE *(*stream))
{
*stream = fopen("inputFile.data", "r");
if (*stream == NULL)
{
perror("Error opening file");
return 0;
}
char ch = 0, lines = 0, linesize = 0;
while ((ch = fgetc(*stream)) != EOF)
{
if (ch == '\n')
{
lines++;
linesize = 0;
}
else linesize++;
}
if (linesize > 0)
lines++; // (last line doesn't have '\n')
return lines;
}
// LOAD FILE
struct _data *LOAD(FILE *stream, int lineCount)
{
int i;
size_t chrCount = 256;
char text[256], *result, *number, *firstName, *lastName;
struct _data *BlackBox;
if ((BlackBox = (struct _data*)calloc(lineCount, sizeof(struct _data))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
else memset(BlackBox, 0, sizeof(struct _data) * lineCount); // (make sure all data members are null to begin)
rewind(stream);
for (i = 0; i < lineCount; i++)
{
result = fgets(text, chrCount, stream);
if (result == NULL)
break; // (EOF)
firstName = strtok(text, " ");
lastName = strtok(NULL, " ");
number = strtok(NULL, "\n");
// Allocate memory for name part of struct.
if ((BlackBox[i].firstName = (char*)calloc(strlen(firstName) + 1, sizeof(char))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
if ((BlackBox[i].lastName = (char*)calloc(strlen(lastName) + 1, sizeof(char))) == NULL)
{
printf("ERROR - Could not allocate memory.\n");
exit(0);
}
strcpy(BlackBox[i].firstName, firstName);
strcpy(BlackBox[i].lastName, lastName);
BlackBox[i].number = atol(number);
}
fclose(stream);
return BlackBox;
}
void SEARCH(struct _data *BlackBox, char **names, int lineCount, int inputs)
{
int i, l;
int found = 0;
printf("*******************************************\n");
for (i = 0; i < inputs; ++i)
{
for (l = 0; l < lineCount; ++l)
{
if (BlackBox[l].firstName != NULL && !_stricmp(names[i], BlackBox[l].firstName)
|| BlackBox[l].lastName != NULL && !_stricmp(names[i], BlackBox[l].lastName))
{
printf("The name was found on line %d.\n", 1 + l);
found = 1;
break;
}
}
if (found) break;
}
if (!found)
printf("The name was NOT found.\n");
printf("*******************************************\n");
}
// FREE MEMORY
void FREE(struct _data *BlackBox, int lineCount)
{
int i;
for (i = 0; i < lineCount; i++)
{
if (BlackBox[i].firstName != NULL)
free(BlackBox[i].firstName);
if (BlackBox[i].lastName != NULL)
free(BlackBox[i].lastName);
}
free(BlackBox);
}
// MAIN
int main(int argc, char **argv)
{
int lineCount;
FILE *stream;
struct _data *BlackBox;
// argc == 1 WORKS, Below message is printed.
if (argc == 1)
{
printf("*******************************************\n");
printf("* You must include a name to search for. *\n");
printf("*******************************************\n");
}
// argc == 2 DOES NOT WORK, Segmentation Fault.
if (argc > 1)
{
lineCount = SCAN(&stream);
if (lineCount > 0)
{
BlackBox = LOAD(stream, lineCount);
SEARCH(BlackBox, argv + 1, lineCount, argc - 1);
FREE(BlackBox, lineCount);
}
}
return 0;
}
Tested it on the command line, and it works.
The problem is the argv and argc. argc is supposed to be an int (think argument count), while argv is meant to be char**. You have them mixed up in your main.