Large numbers of input files causing program to not function at all - c

I'm working on a word frequency program that is designed to handle any number of input files. It works fine for smaller numbers of files, even if those files have tens of thousands of words, but when attempting to run it with a larger number of files (24 in the case I'm testing), it barely even begins reading from the first file before segfaulting.
typedef struct {
int noInFiles, numFiles, numToPrint;
char** fileNames;
FILE** files;
Hash hash;
} Freq;
void openFiles(Freq* freq) {
int i;
char* str;
freq->files = calloc(1,sizeof(FILE**));
for(i = 0; i < freq-> numFiles; i++) {
freq->files[i] = fopen(freq->fileNames[i],"r");
if(freq->files[i] == NULL) {
str = malloc(strlen(freq->fileNames[i]) + 5);
sprintf(str,"wf: %s",freq->fileNames[i]);
perror(str);
free(str);
exit(EXIT_FAILURE);
}
}
}
void wordCount(Freq* freq) {
int i, totalWords = 0;
char *word = NULL;
unsigned wordLength = 0, memSize = 0;
for(i = 0; i < freq->numFiles; i++) {
fprintf(stderr,"Counting from file %d named %s\n", i,freq->fileNames[i]);
while(EOF != getWord(&word, &wordLength, &memSize, freq->files[i], "file"))
{
addEntry(&(freq->hash), word, 1);
totalWords++;
free(word);
word = NULL;
}
}
freq->totalWords = totalWords;
}
Valgrind says that openFiles has an Invalid write of size 4 but i have no idea what that means

This line is problematic:
freq->files = calloc(1,sizeof(FILE**));
According to the sturct, you will need a FILE* to each of the files, but this line just allocate one FILE**, change to this:
freq->files = calloc(freq->numFiles, sizeof(FILE*));

Related

Read binary file to struct array pointer

I have written a struct array to a binary file using this function:
int write_binary(const char* filename, const Product* shop)
{
FILE* OUT;
int jees = 0;
int i = 0;
OUT = fopen(filename, "wb");
if (!OUT) {
return 1;
}
while (jees == 0)
{
//the last element of the struct array has '\0' as the first char of its name
if (shop[i].name[0] == '\0')
{
jees = 1;
}
fwrite(&shop[i], sizeof (Product), 1, OUT) ;
i++;
}
fclose(OUT);
return 0;
}
Now I want to read it back into a struct array pointer. I have tried:
Product* read_binary(const char* filename)
{
FILE* IN = fopen(filename,"rb");
Product *shop;
for (int i = 0; i < 10; i++) {
fread(&shop[i], sizeof(Product), 1, IN);
}
fclose(IN);
return shop;
}
But this way doesn't seem to work. Is there a way to find out the how many structs are in the binary data?
Product *shop;
Here you are declaring a pointer, but you are not allocating memory for it. You should allocate with malloc() or do some static allocation.
To know the number of structs in the file, I'd seek to the end of it, count the bytes and divide by the size of the struct.
A side note: you don't need the jees variable. Just test the breaking condition after writing and break the loop explicitly:
for (i = 0; ; i++)
{
fwrite(&shop[i], sizeof (Product), 1, OUT);
if (shop[i].name[0] == '\0')
break;
}

How to find words with capital letters in a char using c?

I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* loadFile(char* fileName)
{
FILE *inputFile;
inputFile = fopen(fileName, "r");
//finds the end of the file
fseek(inputFile, 0, SEEK_END);
//stores the size of the file
int size = ftell(inputFile);
//Sets the scan to the start of the file
fseek(inputFile, 0, SEEK_SET);
char *documentStore = (char*)malloc(size);
int i = 0, c;
while((c = fgetc(inputFile)) != EOF)
{
documentStore[i] = c;
i++;
}
return documentStore;
}
void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
int uniqueWordCount = 0;
int lengthWordStore = 10;
int i = 0;
int recording = 0;
char wordBuffer[50];
int wordBufferCount = 0;
int isWordPresent = 0;
while(fileContent[i] != EOF)
{
//To allocate more memory incase the structure is full
if(uniqueWordCount == lengthWordStore)
{
lengthWordStore += 10;
char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
importantWords = newWordStore;
frequencyWords = newFrequencyStore;
}
printf("%s", wordBuffer);
//Conditions to fill if its a word
if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
{
wordBuffer[0] = fileContent[i];
recording = 1;
}else if(fileContent[i] >= 'a' && fileContent[i] <= 'z' && recording == 1)
{
//each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
{
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else {
//Adding a terminating character so that it strcpy only copies until that point
wordBuffer[wordBufferCount + 1] = '\0';
recording = 0;
//check to see if that word is in the array already, and if it is, it will just increment the frequency
for(int j = 0; j < uniqueWordCount; j++){
if(strcmp(wordBuffer, importantWords[j]) == 0)
{
frequencyWords[j] += 1;
isWordPresent = 1;
}
}
//if its not present, it should assign it to the structure
if(isWordPresent == 0)
{
char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
strcpy(wordStore, wordBuffer);
uniqueWordCount += 1;
importantWords[uniqueWordCount] = wordStore;
frequencyWords[uniqueWordCount] = 1;
}
}
i++;
}
}
int main() {
char fileName[50];
char *fileContent;
char **importantWords = (char**)malloc(10*sizeof(char**));
int *frequencyWords = (int*)malloc(10*sizeof(int));
printf("Please input the full file path: ");
scanf("%s", fileName);
fileContent = loadFile(fileName);
countImportantWords(fileContent, importantWords, frequencyWords);
int i = 0;
while(importantWords[i] != '\0')
{
printf("%s %d", importantWords[i], frequencyWords[i]);
i++;
}
return 0;
}
I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)
You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
char* data = NULL;
FILE* file = fopen(filename, "r");
if(file == NULL){
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file)+1;
fseek(file, 0, SEEK_SET);
data = (char*)malloc(size);
if(data == NULL){
return NULL;
}
fgets(data, (int)size, file);
return data;
}
typedef struct uppercase_t{
char** word;
int count;
}uppercase;
void copy(uppercase* u,char* token){
size_t length = strlen(token);
u->word[u->count] = (char*)malloc(length+1);
if(u->word[u->count] == NULL){
return;
}
strcpy(u->word[u->count], token);
++u->count;
}
void createuppercasedata(uppercase* u, char* data){
const char delimeter[] = " ";
char* token = strtok(data, delimeter);
if(token == NULL){
return;
}
u->word = (char**)malloc(u->count+1);
if(u->word == NULL){
return;
}
if(isupper(token[0])){
copy(u,token);
}
while(token != NULL){
token = strtok(0, delimeter);
if(token != NULL)
if(isupper(token[0])) {
char** reallocated = (char**)realloc(u->word, u->count+1);
if(reallocated == NULL){
return;
}
u->word = reallocated;
copy(u, token);
}
}
}
void destroyuppercasedata(uppercase* u){
for(int index = 0; index < u->count; ++index){
free(u->word[index]);
}
free(u->word);
}
int main(){
char filename[] = "textfile";
char* data = readfile(filename);
if(data == NULL){
return -1;
}
uppercase u = {0};
createuppercasedata(&u, data);
printf("found %i uppercase words\n",u.count);
for(int index = 0; index < u.count; ++index){
printf("%s\n", u.word[index]);
}
destroyuppercasedata(&u);
free(data);
}
The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.
This was the test file I used.
textfile
How many Uppercase words can Be Found In this text File the answer should be Seven
And this was the output to the terminal:
How
Uppercase
Be
Found
In
File
Seven

Write structure to a binary file, read it from file, store in buffer, print the buffer

I am trying to write a structure to a binary file, the punctuation will be received as a parameter in the insertInFile function, (working with raspberryPi, and store the values of the time reaction with switches). Then I want to read from the file and and store the values on the heap and finally read those values I stored dynamically. I'm having problems with this because either it cannot read properly or it cannot write to the binary file.
typedef enum dificulty {
EASY, MEDIUM, HARD
} Dificulty;
typedef struct player {
char nickname[MAXSIZE];
enum dificulty Dificulty;
float pontuacion;
} Player;
#define LED_CYCLES 5
#define MAX_PLAYERS 2
int insertInFile(float const *const timeReceived, unsigned short *level, char *playerName) {
Player players[MAX_PLAYERS];
int count = 0, i;
FILE *fplayers;
//check if file can be opened
if ((fplayers = fopen("game.bin", "wb")) == NULL) {
fputs("Erro ao abrir ficheiro\n", stderror);
return (-1);
}
//go to the beginning of the file
rewind(fplayers);
//cycle that allows to save int the bin file the struct "Player"
for (i = 0; i < count; i++) {
Player[i].pontuacion = &timeReceived[count];
Player[i].Dificulty = &level;
Player[i].nickname = &playerName;
fwrite(&players, sizeof (Player), count, fplayers);
}
//close the bin file
fclose(fplayers);
return 0;
}
void obtainFromFile() {
Player players;
int count = 0;
FILE *fplayers;
size_t size;
unsigned char *buffer;
int i;
//open file
fp = fopen("game.bin", "rb");
fseek(fp, 0, SEEK_END);
size = ftell(fp); // calculate the size needed
fseek(fp, 0, SEEK_SET);
buffer = (unsigned char *)malloc(size);
if (fplayers == NULL) { // check some error if file == empty
printf("Error\n", stderr);
return (-1);
} else
if (fread(&buffer, sizeof (*buffer), size, fp) != size) // if count of read bytes != calculated size of .bin file -> ERROR
printf("Error\n", stderr);
return (-1);
} else {
for (i = 0; i < size; i++) {
printf("%02x", buffer[i]);
}
}
fclose(fp);
free(buffer);
}
In insertInFile() there is a misunderstanding of the structure use. Writing Player.pontuacion is not right.
To assign a value in an array of struct Player
players[MAX_PLAYERS];, use players[i].Dificulty = (Dificulty)level.
//cicle that allows to save int the bin file the struct "Player"
for (i = 0; i < count; i++) {
players[i].pontuacion = timeReceived[count];
players[i].Dificulty = (Dificulty)(level[count]);
strncpy(players[i].nickname,playerName,MAXSIZE-1);
players[i].nickname[MAXSIZE]='\0';
}
// write count players
fwrite(&(players[0]), sizeof (Player), count, fplayers);
Instead of:
for (i = 0; i < count; i++) {
Player.pontuacion = &timeReceived[count];
Player.Dificulty = &level;
Player.nickname = &playerName;
fwrite(&players, sizeof (Player), count, fplayers);
}

getline from file, put to array of struct, print content (Homework)

this is an assignment for my CS course,
im trying to write a code that reads a file line by line and put the input into a struct element.the struct looks like this:
typedef char* Name;
struct Room
{
int fStatus;
Name fGuest;
};
the status is 0 for available and 1 for booked. the name will be empty if the room is available.
there are 2 function, one to read and put the values to a struct element, and the other one to print it out.
int openRoomFile()
{
FILE *roomFile;
char *buffer = NULL;
size_t length = 0;
size_t count = 0;
roomFile = fopen("roomstatus.txt", "r+");
if (roomFile == NULL)
return 1;
while (getline(&buffer, &length, roomFile) != -1) {
if (count % 2 == 0) {
sscanf(buffer, "%d", &AllRooms[count].fStatus);
} else {
AllRooms[count].fGuest = buffer;
}
count++;
}
fclose(roomFile);
free(buffer);
return 0;
}
print function
void printLayout(const struct Room rooms[])
{
for (int i=0; i<3; i++) {
printf("%3d \t", rooms[i].fStatus);
puts(rooms[i].fGuest);
}
}
the output is not what i expected, given the input file is :
1
Johnson
0
1
Emilda
i will get the output :
1 (null)
0
0 (null)
i dont know what went wrong, am i using the right way to read the file? every code is adapted from different sources on the internet.
Here is a fixed version of the openRoomFile()
int openRoomFile(void)
{
FILE *roomFile;
char *buffer = NULL;
size_t length = 0;
size_t count = 0;
roomFile = fopen("roomstatus.txt", "r+");
if (roomFile == NULL)
return 1;
while (1) {
buffer = NULL;
if (getline(&buffer, &length, roomFile) == -1) {
break;
}
sscanf(buffer, "%d", &AllRooms[count].fStatus);
free(buffer);
buffer = NULL;
if (getline(&buffer, &length, roomFile) == -1) {
fprintf(stderr, "syntax error\n");
return 1;
}
AllRooms[count].fGuest = buffer;
count++;
}
fclose(roomFile);
return 0;
}
When you no longer need those fGuest anymore, you should call free on them.
If your input is guaranteed to be valid (as were many of my inputs in my CS classes), I'd use something like this for reading in the file.
while(!feof(ifp)){
fscanf(ifp,"%d%s",&AllRooms[i].fStatus, AllRooms[i].fGuest); //syntax might not be right here
//might need to play with the '&'s
//and maybe make the dots into
//arrows
//do work here
i++;
}
You are not allocating memory for Name. Check this. In the below example i'm not included free() calls to allocated memory. you need to call free from each pointer in AllRooms array, once you feel you are done with those and no more required.
#include<stdio.h>
#include<stdlib.h>
typedef char* Name;
struct Room
{
int fStatus;
Name fGuest;
}Room_t;
struct Room AllRooms[10];
int openRoomFile()
{
FILE *roomFile;
char *buffer = NULL;
size_t length = 0;
size_t count = 0;
size_t itemCount = 0;
roomFile = fopen("roomstatus.txt", "r+");
if (roomFile == NULL)
return 1;
buffer = (char *) malloc(16); // considering name size as 16 bytes
while (getline(&buffer, &length, roomFile) != -1) {
if (count % 2 == 0) {
sscanf(buffer, "%d", &AllRooms[itemCount].fStatus);
} else {
AllRooms[itemCount].fGuest = buffer;
itemCount++;
}
count++;
buffer = (char *) malloc(16); // considering name size as 16 bytes
}
fclose(roomFile);
free(buffer);
return 0;
}
void printLayout(const struct Room rooms[])
{
int i;
for (i=0; i<3; i++) {
printf("%3d \t", rooms[i].fStatus);
puts(rooms[i].fGuest);
}
}
int main(void)
{
openRoomFile();
printLayout(AllRooms);
// free all memory allocated using malloc()
return 0;
}

fgets + dynamic malloc/realloc with a .txt as stdin

i'm trying to read lines of a file. txt, but without knowing the size of each lines...First I used the getline instruction (and works fine), but my teacher does not let me use that instruction, he says I can only use the fgets statement with malloc and realloc...
This is an input example, with variable line sizes:
[9.3,1.2,87.9]
[1.0,1.0]
[0.0,0.0,1.0]
As shown, each line defines a different vector with no size limit
Someone could help me implement this method?
Thank you very much.
NOTE: I forgot to mention, to compile the program I use these commands:
g++ -Wall-Wextra-Werror-pedantic main.c-o metbasicos.c metintermedios.c eda.exe
./eda.exe <eda.txt
I would say do something similar to this
while(fgets(buf, LEN, stdin)){
z = strtok(buf, ",");
*(*(matrix + i)) = atof(z);
for(j = 1; j < col; ++j){
z = strtok(NULL, ",");
*(*(matrix + i) + j) = atof(z);
}
++i;
}
The only extra thing you would have to take care of is making sure that you strip the brackets off of the first and last element.
Of course, if you don't know the size of the final array, you might need something like this:
struct data_t {
int nval; /* current number of values in array */
int max; /* allocated number of vlaues */
char **words; /* the data array */
};
enum {INIT = 1, GROW = 2};
...
while (fgets(buf, LEN, stdin)) {
if (data->words == NULL)
data->words = malloc(sizeof(char *));
else if (data->nval > data->max) {
data->words = realloc(data->words, GROW * data->max *sizeof(char *));
data->max = GROW * data->max;
}
z = strtok(buf, "\n");
*(data->words + i) = malloc(sizeof(char) * (strlen(z) + 1));
strcpy(*(data->words + i), z);
i++;
data->nval++;
}
data->nval--;
If you combine both of those while loops into a single one, you should be all set. The first one reads in floats, the second one is good for dynamically allocating space on the fly.
If you can use multiple steps, then use one function to get the information you need to malloc memory. (for example determine number of lines, and longest line) This function will do that for you (given the file name and location)
[EDIT] LineCount - This function will get you the number of lines, and the longest line so you can dynamically allocate memory in char **strings; in which to read the lines of the input file.
int lineCount(char *file, int *nLines)
{
FILE *fp;
int cnt=0, longest=0, numLines=0;
char c;
fp = fopen(file, "r");
while ( (c = fgetc ( fp) ) != EOF )
{
if ( c != '\n' )
{
cnt++;
if (cnt > longest) longest = cnt;
}
else
{
numLines++;
cnt= 0;
}
}
*nLines = numLines+1;//add one more
fclose(fp);
return longest+1;
}
Here is the implementation to read the input file you provided, using the function above to get the unknown dimensions of the input file...
#include <ansi_c.h>
#include <stdio.h>
#define FILENAME "c:\\dev\\play\\in.txt" //put your own path here
#define DELIM "- ,:;//_*&[]\n" //change this line as needed for search criteria
int lineCount(char *file, int *cnt);
void allocMemory(int numStrings, int max);
void freeMemory(int numStrings);
char **strings;
int main()
{
int numLines, longest, cnt, i;
FILE *fp;
longest = lineCount(FILENAME, &numLines);
char wordKeep[longest];
allocMemory(numLines, longest);
//read file into string arrays
fp = fopen(FILENAME, "r");
cnt=0;
i=0;
for(i=0;i<numLines;i++)
{
fgets(strings[i], longest, fp);
}
fclose(fp);
freeMemory(numLines);
getchar();
return 0;
}
int lineCount(char *file, int *nLines)
{
FILE *fp;
int cnt=0, longest=0, numLines=0;
char c;
fp = fopen(file, "r");
while ( (c = fgetc ( fp) ) != EOF )
{
if ( c != '\n' )
{
cnt++;
if (cnt > longest) longest = cnt;
}
else
{
numLines++;
cnt= 0;
}
}
*nLines = numLines+1;//add one more
fclose(fp);
return longest+1;
}
void allocMemory(int numStrings, int max)
{
int i;
// need number of lines by longest line for string containers
strings = calloc(sizeof(char*)*(numStrings+1), sizeof(char*));
for(i=0;i<numStrings; i++)
{
strings[i] = calloc(sizeof(char)*max + 1, sizeof(char));
}
}
void freeMemory(int numStrings)
{
int i;
for(i=0;i<numStrings; i++)
if(strings[i]) free(strings[i]);
free(strings);
}

Resources