Reading a well-formatted text file - c

Given the well-formatted text file called input.txt below:
Yesterday snowed
Today is hot
Tomorrow will rain
Next week will earthquake
How can I read the text file line by line and also dynamically allocate memory to each English word as a character array if I do not know the length of each English word since I do not want to waste 1000 bytes on a short word. Should realloc be used in this case? The following is my code:
int main() {
FILE* pfile = fopen("input.txt", "r");
int i = 0;
while (i != 0) {
char* stringLiteral = (char*) malloc(1000 * sizeof(char));
i = fscanf(pfile, "%s", stringLiteral);
insertString(stringLiteral);
}
fclose("input.txt");
return 1;
}
void insertString(char* charArray) {
/*This function inserts a char array to a linked list*/
}

If you want you can use realloc, yes, in that case you would need to reallocate, smaller pieces of memory.
You can even reallocate char by char stretching the string as it's being populated and not waste a single byte.
Example with comments:
Live demo
#include <stdio.h>
#include <stdlib.h>
int main() {
FILE *pfile = fopen("input.txt", "r");
if (pfile == NULL) { //check for errors in opening file
perror("fopen");
}
else {
int c;
int i = 0; //string iterator
char *stringLiteral;
stringLiteral = malloc(1); //initial allocation
if(stringLiteral == NULL) {
perror("malloc");
return EXIT_FAILURE;
}
while ((c = fgetc(pfile)) != EOF) { //until the end of the file is reached
if (c != '\n') { //until the line ends
stringLiteral = realloc(stringLiteral, i + 1); //keep reallocating memory for each character
if(stringLiteral == NULL){
perror("malloc");
return EXIT_FAILURE;
}
stringLiteral[i] = c; //assing the read character to the char array
i++;
}
else { //'\n' was reached
stringLiteral[i] = '\0'; //terminate string
//insertString(stringLiteral); //your insertion function
printf("%s\n", stringLiteral); //test print
i = 0;
}
}
//insertString(stringLiteral); //last read line
printf("%s\n", stringLiteral); // test print
fclose(pfile);
}
return EXIT_SUCCESS;
}
The problem here is that memory allocation is an expensive process and can slow down your program.
You have to weigh what's more important, the space or the speed. Unless the strings are so huge that they cannot fit in the stack, in that case memory allocation is the way to go, though it can be more sensible to allocate blocks of bytes instead of byte by byte.

Related

How to return 2d char array (char double pointer) in C?

I am reading a file that contains several lines of strings(max length 50 characters). To store those strings I created a char double-pointer using calloc. The way my code works is as it finds a line in the file it adds one new row (char *) and 50 columns (char) and then stores the value.
My understanding is that I can call this method and get this pointer with values in return. However, I was not getting the values so I check where I am losing it and I found that the memory is not persisting after while loop. I am able to print strings using print 1 statement but print 2 gives me null.
Please let me know what I am doing wrong here.
char **read_file(char *file)
{
FILE *fp = fopen(file, "r");
char line[50] = {0};
char **values = NULL;
int index = 0;
if (fp == NULL)
{
perror("Unable to open file!");
exit(1);
}
// read both sequence
while (fgets(line, 50, fp))
{
values = (char **)calloc(index + 1, sizeof(char *));
values[index] = (char *)calloc(50, sizeof(char));
values[index] = line;
printf("%s",values[index]); // print 1
index++;
}
fclose(fp);
printf("%s", values[0]); // print 2
return values;
}
line content is overwritten on each loop iteration (by fgets()).
values is overwritten (data loss) and leaks memory on each iteration index > 1.
value[index] is allocated memory on each iteration which leaks as you overwrite it with the address of line on the following line.
line is a local variable so you cannot return it to caller where it will be out of scope.
caller has no way to tell how many entries values contain.
Here is a working implementation with a few changes. On error it closes the file and frees up memory allocated and return NULL instead of exiting. Moved printf() to caller:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define BUF_LEN 50
char **read_file(char *file) {
FILE *fp = fopen(file, "r");
if(!fp) {
perror("Unable to open file!");
return NULL;
}
char **values = NULL;
char line[BUF_LEN];
unsigned index;
for(index = 0;; index++) {
char **values2 = realloc(values, (index + 1) * sizeof(char *));
if(!values2) {
perror("realloc failed");
goto err;
}
values = values2;
if(!fgets(line, BUF_LEN, fp)) break;
values[index] = strdup(line);
}
fclose(fp);
values[index] = NULL;
return values;
err:
fclose(fp);
for(unsigned i = 0; i < index; i++) {
free(values[i]);
}
free(values);
return NULL;
}
int main() {
char **values = read_file("test.txt");
for(unsigned i = 0; values[i]; i++) {
printf("%s", values[i]);
free(values[i]);
}
free(values);
return 0;
}
fgets() returns line ending in '\n' or at most BUF_LEN - 1 of data. This means a given value[i] may or may not be ending with a \n. You may want this behavior, or you want value[i] to be consistent and not contain any trailing \n irregardless of the input.
strdup() is _POSIX_C_SOURCE >= 200809L and not standard c,
so if you build with --std=c11 the symbol would not be defined.

fscanf text file with dynamic allocated memorry

I have to do a function that reads a text file with characters. It is obligatory to use malloc and realloc. I made this code, whitout errors, but when I try to read file, I get runtime error. And I can't understand where is the problem in this code.
void openFile(FILE** file,char* filename)
{
int SIZE=10;
char* data,*data2;
int n = 0;
char c;
printf("filename:");
scanf("%s",&*filename);
if (!((*file) = fopen(filename, "r")))
perror("Error:");
else
{
if ((data = (char*)malloc(SIZE * sizeof(char))) == NULL)
{
perror("malloc:");
}
while (fscanf((*file),"%s",c) != EOF)
{
if (n < SIZE)
{
data[n++] = c;
}
else
{
if ((data2 = (char*)realloc(data, SIZE * sizeof(char))) != NULL)
{
data = data2;
data[n++] = c;
}
else
{
free(data);
}
}
}
}
}
There are some issues with your code, almost none of them fatal, deppending on what you pass to the function.
The catastrophic failure is probably because in your fscanf function you using "%s" specifier with a char variable, the correct specifier is %c, and you need to pass the address of the variable &c.
You should address scanf("%s", &*filename);, there is the danger of the data exceeding the storage capacity of the memory buffer, you should allways define a max size not larger than the capacity of the buffer, you can use "%99s" specifier with scanf for a memory buffer of 100 characters, or better yet, using fgets:
fgets(filename, sizeof(filename), stdin);
filename[strcspn(filename, "\n")] = '\0'; //to remove newline character
The way you are using the file pointer makes me suspect that you wouldn't need to pass it as an argument and much less as a double pointer, this would be useful if you need to keep the pointer to the file stream outside the function, if that's the case, you can leave it as is, otherwise you can use a local variable and close the stream when you are finished.
Here is he code with some corrections as mentioned, and some other minor ones:
void openFile(char *filename)
{
int SIZE = 10;
char *data, *data2;
int n = 0;
char c;
FILE *file; //local variable
printf("filename:");
scanf("%99s", filename);// &* is pointless, using %99s, assuming filename[100]
if (!(file = fopen(filename, "r")))
perror("Error:");
else
{
if ((data = malloc(SIZE)) == NULL) //char size is always 1 byte, no cast needed, include stdlib.h
{
perror("malloc:");
}
while (fscanf(file, "%c", &c) != EOF) //specifier for char is %c, furthermore you need & operator
{ //probably the source of your problems
if (n < SIZE)
{
data[n++] = c;
}
else
{
if ((data2 = realloc(data, SIZE)) != NULL) // again no cast, no sizeof(char)
{
data = data2;
data[n++] = c;
}
else
{
free(data);
}
}
}
fclose(file); //close file stream
}
}
Note that the only catastrophic problems are the ones with fscanf, you can fix those only and the code will likely work, I'd still advise the other fixes.

My code crashes at long texts? Reading from file character by character into a dynamic char array in C

I want to read a whole text from txt file, then print it.
It should be with dynamic memory managment and character by character.
My code works good with short texts (max. 40 characters), but crashes when I read more characters.
#include <stdio.h>
#include <stdlib.h>
int main()
{
char *s;
int n, i;
s = (char*) malloc(sizeof(char));
n=0;
FILE *f=fopen("input.txt","r");
while (fscanf(f, "%c", &s[n]) != EOF)
n++;
fclose(f);
free(s);
for (i=0;i<n;i++)
printf("%c",s[i]);
return 0;
}
Please don't cast malloc.
You allocating the wrong number of bytes, sizeof(char) returns you the size of
a single char, so you are allocating one byte only. That's not enough to hold
a string.
You should use malloc like this:
int *a = malloc(100 * sizeof *a);
if(a == NULL)
{
// error handling
// do not continue
}
Using sizeof *a is better than sizeof(int), because it will always return
the correct amount of bytes. sizeof(int) will do that as well, but the problem
here is the human factor, it's easy to make a mistake and write sizeof(*int)
instead. There are thousands of questions here with this problem.
Note that a char is defined to have the size of 1, that's why when you are
allocating memory for strings or char arrays, people usually don't write the
* sizeof *a:
char *s = malloc(100);
// instead of
char *s = malloc(100 * sizeof *s);
would be just fine. But again, this is only the case for char. For other types
you need to use sizeof operator.
You should always check the return value of malloc, because if it returns
NULL, you cannot access that memory.
while (fscanf(f, "%c", &s[n]) != EOF)
n++;
If you for example allocated 100 spaces, you have to check that you haven't
reached the limit. Otherwise you will overflow s:
char *s = malloc(100);
if(s == NULL)
{
fprintf(stderr, "not enough memory\n");
return 1;
}
int n = 0;
while ((fscanf(f, "%c", &s[n]) != EOF) && n < 100)
n++;
In this case you are not using the allocated memory to store a string, so it's
fine that it doesn't have the '\0'-terminating byte. However, if you want to
have a string, you need to write one:
while ((fscanf(f, "%c", &s[n]) != EOF) && n < 99)
n++;
s[n] = '\0';
Also you are doing this
free(s);
for (i=0;i<n;i++)
printf("%c",s[i]);
You are freeing the memory and then trying to access it. You have to do the
other way round, access then free.
Correct way:
for (i=0;i<n;i++)
printf("%c",s[i]);
free(s);
EDIT
If you want the contents of a whole file in a single string, then you have 2
options:
Calculate the length of the file beforehand and then use allocate the correct
amount of data
Read one fixed size chunk of bytes at a time and resize the memory every time
you read a new chunk.
The first one is easy, the second one is a little bit more complicated because
you have to read the contents, look how much you've read, resize the memory with
realloc, check that the resizing was successful. This is something you could
do later when you have for knowledge in simple memory managment.
I'll show you the first one, because it's much easier. The function fseek
allows you to advance your file pointer to the end of the file, with the
function ftell you can get the size of the file and with rewind
rewind the file pointer and set it to the beginning:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
if(argc != 2)
{
fprintf(stderr, "usage: %s file\n", argv[0]);
return 0;
}
FILE *fp = fopen(argv[1], "r");
if(fp == NULL)
{
fprintf(stderr, "Could not open %s for reading.\n", argv[1]);
return 1;
}
// calculating the size
// setting file pointer to the end of the file
if(fseek(fp, 0L, SEEK_END) < 0)
{
fprintf(stderr, "Could not set the file pointer to the end\n");
fclose(fp);
return 1;
}
// getting the size
long size = ftell(fp);
if(size < 0)
{
fprintf(stderr, "Could not calculate the size\n");
fclose(fp);
return 1;
}
printf("file size of %s: %ld\n", argv[1], size);
// rewinding the file pointer to the beginning of the file
rewind(fp);
char *s = malloc(size + 1); // +1 for the 0-terminating byte
if(s == NULL)
{
fprintf(stderr, "not enough memory\n");
fclose(fp);
return 1;
}
int n = 0;
// here the check && n < size is not needed
// you allocated enough memory already
while(fscanf(fp, "%c", &s[n]) != EOF)
n++;
s[n] = '\0'; // writing the 0-terminating byte
fclose(fp);
printf("Contents of file %s\n\n", argv[1]);
for(int i=0; i<n; i++)
printf("%c",s[i]);
free(s);
return 0;
}
You are only allocating space for one single char (sizeof(char)):
s = (char*) malloc(sizeof(char));
If you want to store more characters you have to allocate more space, for example:
s = (char*) malloc(sizeof(char) * 100);
To read the whole file you best first find out how large the file is, so that you know how much space you need to allocate for s.
Calculate size of the file with the help of fseek.
fseek(fp, 0L, SEEK_END); sz = ftell(fp);
You can then seek back,to the begining of file
fseek(fp, 0L, SEEK_SET);
Allocate that much memory through malloc.
S= (char*)malloc((sizeof(char)×sz)+1)
Now you can use this memory to copy bytes in while lopp

How to populate Dynamic array with Strings in C

I am doing a project where I have to read in text from a file and then extract every word that is 4 characters long and allocate it into dynamic array.My approach is to create int function that will get number of 4 letter words and return that number , then create another function that will grab that number and create dynamic array consisting of that many elements. The problem with this approach is how to populate that array with words that meet the requirement.
int func1(FILE *pFile){
int counter = 0;
int words = 0;
char inputWords[length];
while(fscanf(pFile,"%s",inputWords) != EOF){
if(strlen(inputWords)==4){
#counting 4 letter words
counter++;
}
}
}
return counter;
}
int main(){
#creating pointer to a textFile
FILE *pFile = fopen("smallDictionary.txt","r");
int line = 0;
#sending pointer into a function
func1(pFile);
fclose(pFile);
return 0;
}
I would suggest reading lines of input with fgets(), and breaking each line into tokens with strtok(). As each token is found, the length can be checked, and if the token is four characters long it can be saved to an array using strdup().
In the code below, storage is allocated for pointers to char which will store the addresses of four-letter words. num_words holds the number of four-letter words found, and max_words holds the maximum number of words that can currently be stored. When a new word needs to be added, num_words is incremented, and if there is not enough storage, more space is allocated. Then strdup() is used to duplicate the token, and the address is assigned to the next pointer in words.
Note that strdup() is not in the C Standard Library, but that it is POSIX. The feature test macro in the first line of the program may be needed to enable this function. Also note that strdup() allocates memory for the duplicated string which must be freed by the caller.
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
Update
OP has mentioned that nonstandard functions are not permitted in solving this problem. Though strdup() is POSIX, and both common and standard in this sense, it is not always available. In such circumstances it is common to simply implement strdup(), as it is straightforward to do so. Here is the above code, modified so that now the function my_strdup() is used in place of strdup(). The code is unchanged, except that the feature test macro has been removed, the call to strdup() has been changed to my_strdup(), and of course now there is a function prototype and a definition for my_strdup():
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
char * my_strdup(const char *);
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = my_strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
char * my_strdup(const char *str)
{
size_t sz = strlen(str) + 1;
char *dup = malloc(sizeof *dup * sz);
if (dup) {
strcpy(dup, str);
}
return dup;
}
Final Update
OP had not posted code in the question when the above solution was written. The posted code does not compile as is. In addition to missing #includes and various syntax errors (extra braces, incorrect comment syntax) there are a couple of more significant issues. In func1(), the length variable is used uninitialized. This should be large enough so that inputWords[] can hold any expected word. Also, width specifiers should be used with %s in scanf() format strings to avoid buffer overflow. And, OP code should be checking whether the file opened successfully. Finally, func1() returns a value, but the calling function does not even assign this value to a variable.
To complete the task, the value returned from func1() should be used to declare a 2d array to store the four-letter words. The file can be rewound, but this time as fscanf() retrieves words in a loop, if a word has length 4, strcpy() is used to copy the word into the array.
Here is a modified version of OP's code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD 100
int func1(FILE *pFile){
int counter = 0;
char inputWords[MAX_WORD];
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
counter++;
}
}
return counter;
}
int main(void)
{
FILE *pFile = fopen("filename.txt","r");
if (pFile == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char inputWords[MAX_WORD];
int num_4words = func1(pFile);
char words[num_4words][MAX_WORD];
int counter = 0;
rewind(pFile);
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
strcpy(words[counter], inputWords);
counter++;
}
}
if (fclose(pFile) != 0) {
perror("Unable to close file");
}
for (int i = 0; i < num_4words; i++) {
puts(words[i]);
}
return 0;
}

Generating array of word pointers in c

I have a problem where I have to read a text file made of 264064 words into a buffer and then create an array of word-pointers in a separate array. I am not sure how to go about creating the array of word-pointers which points to different amount of characters in the buffer. Any hints on how to approach this problem?
#include <stdlib.h>
#include <string.h>
int main()
{
int i,wordCount=0;
long bufsize;
char ch;
//Open File and get number of lines in file
FILE *fp = fopen("words2.txt", "r");
if (fp == NULL) {
printf("Error!");
exit(1);
}
do {
ch = fgetc(fp);
if (ch == '\n')
{
wordCount++;
}
} while (ch != EOF);
fclose(fp);
printf("%d\n",wordCount);
//Reading Words into buffer rawtext
char *rawtext;
fp = fopen("words2.txt", "rb");
if (fp != NULL)
{
if (fseek(fp, 0L, SEEK_END) == 0) {
bufsize = ftell(fp);
if (bufsize == -1) {
exit(1);
}
rawtext = malloc(sizeof(char) * (bufsize + 1));
if (fseek(fp, 0L, SEEK_SET) != 0) { exit(1); }
size_t newLen = fread(rawtext, sizeof(char), bufsize, fp);
if (ferror(fp) != 0) {
fputs("Error reading file", stderr);
} else {
rawtext[newLen++] = '\0';
}
}
//Print out buffer
printf("%s",rawtext);
fclose(fp);
free(rawtext);//Free allocated memory
char *ptr[wordCount];//Array for word-pointers
}
}
If you keep your rawtext (i.e. do not free it), you could use strchr('\n') to go through the content, store to the array the current position, detect every new line char, terminate the string at this new line character, and go ahead. Thereby, your ptr-array will point to each word inside rawtext at the end (that's why you should not free rawtext then, because the pointers would then point to invalid memory):
The following code should work:
char* currWord = rawtext;
int nrOfWords = 0;
char* newlinePos;
while ((newlinePos = strchr(currWord,'\n')) != NULL) {
*newlinePos = '\0';
ptr[nrOfWords++] = currWord;
currWord = newlinePos + 1;
}
if (*currWord) {
ptr[nrOfWords++] = currWord;
}
Side note: expression char *ptr[wordCount] might put your pointer array on the stack, which has limited space, at least less than the heap. This could get a problem if your file contains a lot of words. Use char *ptr = malloc((wordCount+1) * sizeof(char*)) to reserve memory on the heap. Note also the +1 after wordCount for the case that the last word is not terminated by a new line.

Resources