I am trying to read lines until EOF. If the line is longer than maxLength, the rest gets truncated.
If the number of rows exceed S, it should reallocate for double the amount.
Everything works well until the reallocation. When it gets to that part, the program crashes.
I have been sitting on this for 2 days.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define S 8
char ** readLines(int * wordCountp, int maxLength) {
char line[maxLength + 1];
int size = S;
char ** words = malloc(size * sizeof(char));
if (words == NULL) {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
int i = 0;
while (fscanf(stdin, "%s", line) == 1) {
words[i] = malloc((maxLength + 1) * sizeof(char));
strncpy(words[i], line, maxLength);
i++;
if (i == size) {
size *= 2;
words = realloc(words, size * sizeof(char));
if (words == NULL) {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
}
if (words == NULL) {
fprintf(stderr, "Memory allocation failed\n");
exit(1);
}
free(words[i]);
}
words[i] = NULL;
*wordCountp = i;
return words;
}
Allocating with sizeof(char*) instead of sizeof(char) messes up the array, but still doesn't solve the problem.
Creating a separate char**, reallocating that one and then copying the original into that does not work either.
At least these problems:
Avoid allocation size mistakes
Allocate to the size of the referenced object, rather than the size of the type, which was wrong here.
// v----------v Wrong type and too small
// char ** words = malloc(size * sizeof(char));
char ** words = malloc(sizeof words[0] * size);
// ^-------------^ Just right
// words = realloc(words, size * sizeof(char));
words = realloc(words, sizeof words[0] * size);
Copy the whole string
words[i] = malloc((maxLength + 1) * sizeof(char));
// v-------v 1 too short
// strncpy(words[i], line, maxLength);
strcpy(words[i], line);
Use fgets() to read a line
fscanf(stdin, "%s", line) will not read a line, more like it will read a word. Without a width, buffer overflow possible.
// fscanf(stdin, "%s", line)
while (fgets(line, sizeof line, stdin)) {
Related
I'm working on a crosswords program in which a word dictionary is necessary. I'm trying load a jspell dictionary file into an dynamic string array but i keep getting the
error malloc(): mismatching next->prev_size (unsorted)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
void dict_init(Dictionary * dict, char * dict_dir, size_t w_len)
{
printf("dictionary.c (dict_init): initializing dictionary.\n");
/*Adjust this value to control the initial array size*/
size_t init_size = 1000;
/*initialize dictionary file directory*/
dict->dir = malloc(strlen(dict_dir) * sizeof(char) + 1);
strcpy(dict->dir, dict_dir);
/*create memory for words array*/
dict->words = malloc(init_size * sizeof(char *));
/*initialize array size*/
dict->size = init_size;
/*initilize word length*/
dict->w_len = w_len;
/*initialize word counter*/
dict->counter = 0;
/*load words into dictionary*/
dict_load(dict);
printf("dictionary.c (dict_init): dictionary initialized.\n");
}
void dict_add(Dictionary * dict, char * word)
{
char ** dictionary = dict->words;
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5;
dict->words = realloc(dict->words, dict->size * sizeof(char *));
}
/*add word to dictionary*/
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word);
}
void dict_free(Dictionary * dict)
{
free(dict->words);
}
void dict_load(Dictionary * dict)
{
FILE * fp;
char * line = NULL;
char * word = NULL;
size_t len = 0;
ssize_t read;
fp = fopen(dict->dir, "r");
/*check if file exists*/
if (fp == NULL)
{
perror("ERROR: File not found.");
exit(EXIT_FAILURE);
}
/*discard first line*/
if(strstr(dict->dir, ".dic"))
getline(&line, &len, fp);
/*read file lines*/
while ((read = getline(&line, &len, fp)) != -1)
{
if(((strstr(line, "[CAT=punct") == NULL) && (word = parse_line(line, dict->w_len)) != NULL)) {
dict_add(dict, word);
}
}
fclose(fp);
free(line);
printf("dictionary.c (dict_load): dictionary loaded %ld words.\n", dict->counter);
}
char * parse_line(char * line, size_t w_len)
{
int i;
char s_tmp[101] = "";
char * dlm_slash, * dlm_space, * dlm_tab , *substring;
/*get delimiter pointer*/
dlm_slash = strchr(line, '/');
dlm_space = strchr(line, ' ');
dlm_tab = strchr(line, '\t');
/*check if delimiter exists in line*/
if(dlm_slash != NULL)
i = (int)(dlm_slash - line);
else if(dlm_space != NULL)
i = (int)(dlm_space - line);
else if(dlm_tab != NULL)
i = (int)(dlm_tab - line);
else
{
/*replace '\n' with '\0'*/
line[strcspn(line, "\n")] = '\0';
i = strlen(line);
}
strncpy(s_tmp, line, i);
substring = malloc(sizeof(char) * strlen(s_tmp) + 1);
strncpy(substring, s_tmp, strlen(s_tmp));
/*lowercase word*/
lower_case(substring);
if((is_valid(substring) == 0) && (strlen(substring) <= w_len))
return substring;
free(substring);
return NULL;
}
Here's the basic problem, I think:
void dict_add(Dictionary * dict, char * word) {
char ** dictionary = dict->words; /* **** 1 **** */
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5; /* **** 2 **** */
dict->words = realloc(dict->words, dict->size * sizeof(char *));
/* **** 3 **** */
}
/*add word to dictionary*/
This one is the problem:
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word); /* **** 4 **** */
}
The problem is that dictionary was saved before you called realloc. realloc might make a brand-new memory allocation, in which case it will automatically free() the old one after copying its contents into the new one. So any copy of the pointer which you made before calling realloc might end up pointing to unallocated memory. Writing to unallocated memory is a big no-no; in this particular case, you're probably overwriting malloc's bookkeeping information about the unallocated block, which is why it detects the problem and complains. Count yourself lucky: lots of memory corruption problems go undetected for quite a while until the factory explodes.
Some other issues which I noticed while writing this, with numbered comments in the source:
There's actually no need for the variable dictionary at all.
dict->size is an integer. Forcing conversion to a floating point number and then truncating back to an integer is not very useful. Prefer dict->size += dict->size/2;. Even better would be to first make sure that dict->size isn't so big that increasing it will cause integer wraparound. (This is not undefined behaviour on unsigned types like size_t, but it's not going to produce correct results.)
Here you could actually use a temporary, because realloc might return NULL indicating a memory allocation failure. If that happens, the original allocation is not automatically freed, and you don't have a way to free it. (Actually you do, since you have a variable confusingly called dictionary, but in point 1 I recommended that you get rid of it.) A more idiomatic call would be:
if(dict->counter == dict->size) {
/*increrase size of dictionary*/
dict->size += dict->size / 2; /* See point 2, above */
char** new_words = realloc(dict->words, dict->size * sizeof(*new_words));
if (new_words == NULL) {
/* Report allocation error and free all the memory you've allocated */
/* Then probably exit(1) but if this were a library function, just
* return some kind of failure indication so that the caller can do
* their own clean-up.
*/
}
dict->words = new_words;
}
dict->words[dict->counter] = word; /* See point 4, below */
You're freeing word here because it was allocated in parse_line(). But if you know you're going to free it anyway, there wasn't much point making a copy of it first. You might as well just use it. (But you need to document the fact that this function takes ownership of the word passed as an argument.)
It might be considered cleaner to do the copy as you do but then not free the argument, leaving it for the caller to do that. That would have the advantage of allowing the caller to provide a word which hadn't been dynamically allocated, or use the word for some other purpose.
(Not indicated in this snippet, but nonetheless important). Every block of allocated memory must be freed. So your program should execute free exactly as many times as it executed malloc. But you don't do that; you just free the array of word pointers, and let the words pointed to in that array leak. You should fix that. (Note that you don't need an extra call to free for a call to realloc, since realloc itself frees the old block if it allocates a new one. You only need to match the initial malloc with a free.)
I am trying to learn C pointer passing. So please forgive my ignorance.
I want to allocate a 2 dimensional dynamically allocated string array in a function.
The function signature is void so the parameters are by reference.
The test file contains these two lines.
I am testing.
This is not an empty file.
Here is what I have done so far.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void read_lines(FILE *fp, char** lines, int *num_lines) {
ssize_t read;
char * line = NULL;
size_t len = 0;
*num_lines = 0;
while ((read = getline(&line, &len, fp)) != -1) {
if (*num_lines == 0) {
// For the first time it holds only one char pointer
*lines = malloc(sizeof(char *));
} else {
// Every time a line is read, space for next pointer is allocated
*lines = realloc(*lines, (*num_lines) * sizeof(char *));
}
// allocate space where the current line can be stored
*(lines + (*num_lines)) = malloc(len * sizeof(char));
// Copy data
strcpy(*(lines + (*num_lines)), line);
printf("Retrieved line of length %zu:\n", read);
printf("%s\n", line);
(*num_lines)++;
// After first line subsequent lines get truncated if I free
// the storage here, then subsequent lines are not read completely
//if (line) {
// free(line);
//}
}
if (line) {
free(line);
}
}
int main(void)
{
FILE * fp;
char *array;
int num_lines;
fp = fopen("file.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
read_lines(fp, &array, &num_lines);
printf("After returning\n");
// Intend to access as array[0], array[1] etc
// That's not working
// If I access this way then I get seg violation after first line
printf("%s\n", &array[0]);
fclose(fp);
}
My questions are inline with code:
Why can't I free storage for line inside the while loop?
How do I access returned 2D array in main? array[0] array[1] doesn't seem to work? I want to do something similar.
Why seg fault is generated for the way I am doing it now?
Corrected code will help me understand. Also any good reference anybody can provide to get these concept clarified for C will be greatly appreciated.
If you free(line) inside the while loop, you have to reset line to NULL and len to 0, before the next calling of getline. Otherwise, getline will think line is a valid buffer of size len, and may try to write to it, which is actually a so called "dangling pointer" now.
In the realloc line, the size should be (*num_lines + 1) * sizeof(char *), one more element need to be allocate to hold the just read line.
And the array variable is char*, its address is taken and assiged to the parameter lines of read_lines. So lines is the address of array, and *lines is just array itself.
But
// allocate `char*[1]`
*lines = malloc(sizeof(char *));
and
// allocate `char*[N]` with N=`*num_lines`
*lines = realloc(*lines, (*num_lines) * sizeof(char *));
You assigned a char*[] to array, which is a char* in fact.
So, if you want your function return a array of strings (that is char*[] or char**) by parameter, you have to make the parameter a pointer to a array of strings (that is char***).
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
void read_lines(FILE * fp, char*** lines, int* num_lines) {
ssize_t read;
char* buffer = NULL;
size_t buffer_len = 0;
*num_lines = 0;
while ((read = getline(&buffer, &buffer_len, fp)) != -1) {
// `*lines` is actually `array`,
// modify `*lines` will effectively modify `array`
if (*num_lines == 0) {
// `array` now is `char*[1]`
*lines = (char**)malloc(sizeof(char*)); // A
}
else {
// `array` now is `char*[(*num_lines) + 1]`
*lines = (char**)realloc(*lines, (*num_lines + 1) * sizeof(char*)); // B
}
// *(x+n) is the same as x[n], this line is actually doing:
// `array[*num_lines] = malloc...
*(*lines + (*num_lines)) = (char*)malloc((read + 1) * sizeof(char)); // C
strcpy(*(*lines + (*num_lines)), buffer);
(*num_lines)++;
printf("Retrieved line of length %zu:\n", read);
printf("%s\n", buffer);
}
if (buffer) {
// `line` is `malloc`ed or `realloc`ed by `getline`,
// have to be `free`ed
free(buffer);
}
}
int main(void)
{
FILE* fp;
char** array;
int num_lines;
fp = fopen("file.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
read_lines(fp, &array, &num_lines);
printf("After returning\n");
for (int i = 0; i < *num_lines; i++) {
printf("%s\n", array[i]);
free(array[i]); // corresponding to C
}
free(array); // corresponding to A or B
fclose(fp);
}
FILE *file;
file = fopen(argv[1], "r");
char *match = argv[2];
if (file == NULL) {
printf("File does not exist\n");
return EXIT_FAILURE;
}
int numWords = 0, memLimit = 20;
char** words = (char**) calloc(memLimit, sizeof(char));
printf("Allocated initial array of 20 character pointers.\n");
char string[20];
while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) != EOF) {
words[numWords] = malloc(strlen(string) + 1 * sizeof(char));
strcpy(words[numWords], string);
printf("Words: %s\n", words[numWords]);
numWords++; /*keep track of indexes, to realloc*/
if (numWords == memLimit) {
memLimit = 2 * memLimit;
words = (char**) realloc(words, memLimit * sizeof(char*)); /*Fails here*/
printf("Reallocated array of %d character pointers.\n", memLimit);
}
}
Code should open and read a file containing words with punctuation, spaces etc and store in a string, but after 20 tries it throws an error, and I can't seem to get realloc() to work here, which I'm expecting to be the problem. The array is dynamically allocated 20 char pointers, at which when limit is reached, it should realloc by double. How can I get around this?
Two notes. First, you shouldn't ever cast the return value of calloc/malloc/realloc. See this for more information.
Second, as others have pointed out in comments, the first calloc statement uses sizeof(char) and not sizeof(char*) like it should.
words is a pointer to a pointer. The idea is to allocate an array of pointers.
The below is wrong as it allocates for memLimit characters rather than memLimit pointers.
This is the main issue
char** words = (char**) calloc(memLimit, sizeof(char)); // bad
So use an easy idiom: allocate memLimit groups of whatever words points to. It is easier to write, read and maintain.
char** words = calloc(memLimit, sizeof *words);
Avoid the while (scanf() != EOF) hole. Recall that various results can come from scanf() family. It returns the count of successfully scanned fields or EOF. That is typically 1 of at least 3 options. So do not test for one result you do not want, test for the one result you do want.
// while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) != EOF) {
while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) == 1) {
The above example may not every return 0, but the below easily could.
int d;
while (fscanf(file, "%d", &d) == 1) {
#Enzo Ferber rightly suggests using "%s". Further recommend to follow the above idiom and restrict input width to 1 less than the size of the buffer.
char string[20];
while (fscanf(file, "%19s", string) == 1) {
Suggest the habit of checking allocation result.
// better to use `size_t` rather than `int `for array sizes.
size_t newLimit = 2u * memLimit;
char** newptr = realloc(words, newLimit * sizeof *newptr);
if (newptr == NULL) {
puts("Out-of-memory");
// Code still can use old `words` pointer of size `memLimit * sizeof *words`
return -1;
}
memLimit = newLimit;
words = newptr;
}
Errors
Don't cast malloc/calloc returns. There's not need for it.
Your first sizeof is wrong. It should be sizeof(char*)
That scanf() format string. %s does the job just fine.
Code
The following code worked for me (printed one word per line):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *file;
file = fopen(argv[1], "r");
char *match = argv[2];
if (file == NULL) {
printf("File does not exist\n");
return EXIT_FAILURE;
}
int numWords = 0, memLimit = 20;
char **words = calloc(memLimit, sizeof(char*));
printf("Allocated initial array of 20 character pointers.\n");
char string[20];
while (fscanf(file, "%s", string) != EOF) {
words[numWords] =
malloc(strlen(string) + 1 * sizeof(char));
strcpy(words[numWords], string);
printf("Words: %s\n", words[numWords]);
numWords++; /*keep track of indexes, to realloc */
if (numWords == memLimit) {
memLimit = 2 * memLimit;
words = realloc(words, memLimit * sizeof(char *));
printf
("Reallocated array of %d character pointers.\n",
memLimit);
}
}
}
Called with ./realloc realloc.c
Hope it helps.
Your first allocation is the problem. You allocate 20 chars and treat them as 20 char pointers. You overrun the allocated buffer and corrupt your memory.
The second allocation fails because the heap is corrupted.
When I print char** surname and char** first, I get some strange outputs. I am not sure if I am doing the malloc correctly or if I'm doing something else incorrectly.
The Input -> names1.txt
The outputs
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main ()
{
int size, i;
char **surname, **first, *middle_init, dummy, str[80];
FILE *fp_input = fopen("names1.txt", "r");
fscanf(fp_input, "%d%c", &size, &dummy); // gets size of array from file
/* dynamic memory allocation */
middle_init = (char*)malloc(size * sizeof(char));
surname = (char**)malloc(size * sizeof(char*));
first = (char**)malloc(size * sizeof(char*));
for (i = 0; i < size; i++)
{
surname[i] = (char*)malloc(17 * sizeof(char));
first[i] = (char*)malloc(17 * sizeof(char));
} // for
/* reads from file and assigns value to arrays */
i = 0;
strcpy(middle_init, "");
while (fgets(str, 80, fp_input) != NULL)
{
surname[i] = strtok(str, ", \n");
first[i] = strtok(NULL, ". ");
strcat(middle_init, strtok(NULL, ". "));
i++;
} // while
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s\n", surname[i], first[i]);
return 0;
} // main
A casual look at the code suggests:
You must use strcpy() or a variant on the theme to copy the string found by strtok() into the surname, etc.
The way you've written it, you throw away your allocated memory.
You get the repeated output because you're storing pointers to the string you use to hold the line in the surname and first arrays. That string only holds the last line when you do the printing. This and the previous point are corollaries of the first point.
You only allocate a single character for the middle initials. You then use strcat() to treat them as strings. I recommend treating middle initials as strings, much like the other names. Or, since you aren't required to print them, you might decide to ignore middle initials altogether.
Using 17 instead of enum { NAME_LENGTH = 17 }; or equivalent is not a good idea.
There are undoubtedly other issues too.
I guess you've not reached structures in your course of study yet. If you have covered structures, you should probably use a structure type to represent a complete name, and use a single array of names instead of parallel arrays. This will likely simplify memory management too; you'd use fixed size array elements in the structure, so you'd only have to make one allocation for each name.
The code below produces the output:
Ryan Elizabeth
McIntyre O
Cauble-Chantrenne Kristin
Larson Lois
Thorpe Trinity
Ruiz Pedro
In this code, the err_exit() function is vastly valuable because it makes error reporting into a one-line call, rather than a 4-line paragraph, which means you're more likely to do the error checking. It is a basic use of variable length argument lists, and you may not understand it yet, but it is extremely convenient and powerful. The only functions that could be error checked but aren't are the fclose() and printf(). If you're reading a file, there's little benefit to checking fclose(); if you're writing and fclose() fails, you may have run out of disk space or something like that and it is probably appropriate to report the error. You could add <errno.h> to the list of headers and report on errno and strerror(errno) if you wanted to improve the error reporting more. The code frees the allocated memory; valgrind gives it a clean bill of health.
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void err_exit(const char *fmt, ...);
int main(void)
{
enum { NAME_SIZE = 25 };
const char *file = "names1.txt";
int size, i;
char **surname, **first, str[80];
FILE *fp_input = fopen(file, "r");
if (fp_input == NULL)
err_exit("Failed to open file %s\n", file);
if (fgets(str, sizeof(str), fp_input) == 0)
err_exit("Unexpected EOF on file %s\n", file);
if (sscanf(str, "%d", &size) != 1)
err_exit("Did not find integer in line: %s\n", str);
if (size <= 0 || size > 1000)
err_exit("Integer %d out of range 1..1000\n", size);
if ((surname = (char**)malloc(size * sizeof(char*))) == 0 ||
(first = (char**)malloc(size * sizeof(char*))) == 0)
err_exit("Memory allocation failure\n");
for (i = 0; i < size; i++)
{
if ((surname[i] = (char*)malloc(NAME_SIZE * sizeof(char))) == 0 ||
(first[i] = (char*)malloc(NAME_SIZE * sizeof(char))) == 0)
err_exit("Memory allocation failure\n");
}
for (i = 0; i < size && fgets(str, sizeof(str), fp_input) != NULL; i++)
{
char *tok_s = strtok(str, ",. \n");
char *tok_f = strtok(NULL, ". ");
if (tok_s == 0 || tok_f == 0)
err_exit("Failed to read surname and first name from: %s\n", str);
if (strlen(tok_s) >= NAME_SIZE || strlen(tok_f) >= NAME_SIZE)
err_exit("Name(s) %s and %s are too long (max %d)\n", tok_s, tok_f, NAME_SIZE-1);
strcpy(surname[i], tok_s);
strcpy(first[i], tok_f);
}
if (i != size)
err_exit("Only read %d names\n", i);
fclose(fp_input);
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s\n", surname[i], first[i]);
for (i = 0; i < size; i++)
{
free(surname[i]);
free(first[i]);
}
free(surname);
free(first);
return 0;
}
static void err_exit(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
here:
surname[i] = (char*)malloc(17 * sizeof(char));
first[i] = (char*)malloc(17 * sizeof(char));
..
surname[i] = strtok(str, ", \n");
first[i] = strtok(NULL, ". ");
you allocate memory for surname and first and you don't use that memory because you assign to it the string returned from strtok which you should not do anyway because it points to a static buffer used by the function for parsing, you could use strdup instead:
while (fgets(str, 80, fp_input) != NULL) {
surname[i] = strdup(strtok(str, ", \n"));
first[i] = strdup(strtok(NULL, ". "));
middle_init[i] = strtok(NULL, ". ")[0];
i++;
} // while
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s %c\n", surname[i], first[i], middle_init[i]);
strdup will allocate memory and copy the string, this way you avoid hard coding the string length too, you should free that memory when you're done, also note that middile_init is a char array, so I just assign 1 char.
I have been trying to take chars from a txt file(in which the words of the text that will become strings will be separated by spaces) and import them into strings in my code. I tried it but I only could print the words (that are separated by spaces). How can I input them into strings?
The code that prints the words is the following, but I also need it to save the string into arrays or pointers if possible.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(){
FILE *fp;
int i=0;
char *words=NULL,*word=NULL,c;
if ((fp=fopen("monologue.txt","r"))==NULL){ /*Where monologue txt is a normal file with plain text*/
printf("Error Opening File\n");
exit(1);}
while ((c = fgetc(fp))!= EOF){
if (c=='\n'){ c = ' '; }
words = (char *)realloc(words, ++i*sizeof(char));
words[i-1]=c;}
word=strtok(words," ");
while(word!= NULL){
printf("%s\n",word);
word = strtok(NULL," ");}
exit(0);
}
Your code is rather hard to read. Here is almost identical code that is (I submit) considerably more readable:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
const char filename[] = "monologue.txt";
FILE *fp;
int i = 0;
char *words = NULL;
char *word = NULL;
int c;
if ((fp = fopen(filename, "r")) == NULL)
{
/*Where monologue txt is a normal file with plain text*/
fprintf(stderr, "Error opening file %s\n", filename);
exit(1);
}
while ((c = fgetc(fp)) != EOF)
{
if (c == '\n')
c = ' ';
words = (char *)realloc(words, ++i * sizeof(char));
words[i-1] = c;
}
word = strtok(words, " ");
while (word != NULL)
{
printf("%s\n", word);
word = strtok(NULL, " ");
}
return(0);
}
This shows us that you are slurping the entire file into the string pointed to by words, but you are doing so rather inefficiently in that you are reallocating memory one byte at a time for each byte read. You should be looking to do things much more effectively, by reading bigger chunks of the file into memory. For example, you might allocate an initial buffer of 32 KiB; you could read into that buffer using fread(); if you don't encounter EOF, you could then reallocate the space, doubling the amount available to you. (For testing, you'd start with a much smaller block - maybe 16 bytes, maybe even as small as 4 bytes; this ensures you test the memory reallocation code, whereas 32 KiB would probably seldom exercise the reallocation code.)
You also need to ensure that your string is null terminated; as it stands, it is not. You would need to do a final realloc() to make space for the null terminator too.
You can avoid mapping newlines during input since strtok() can be given a list of characters on which to split, so you can add newline to that list.
To generate a list of words, you need to adapt the loop around strtok(). You might simply count the spaces and newlines and then allocate enough pointers to point to that many words; you might have an overestimate if there are adjacent spaces or newlines, but better over than under. Alternatively, you can can allocate, for sake of argument, 16 pointers. As you process the first 16 words, you use these pointers; when you run out of space, you double the number of pointers allocated, and use the new supply until that runs out. You can use any algorithm that allocates a significant number of pointers (meaning 'more than one' and 'increasing as the number already used goes up') instead of simple doubling, but doubling has its merits (notably, it is simple).
One word of caution: you should never assign the result of realloc() to the variable that is its first argument:
words = (char *)realloc(words, ++i * sizeof(char)); // Bad!
The trouble is that if realloc() fails, you've just wiped out the only pointer to the previously allocated memory, so you have leaked it all. Always assign to a new variable, test that it worked, then copy the result:
char *new_space = (char *)realloc(words, ++i * sizeof(char));
if (new_space == 0)
{
fprintf(stderr, "Memory allocation failed at size %d\n", i);
exit(1);
}
words = new_space;
I assembled this code yesterday. Notice that it uses functions to do repeated jobs - such as checking that memory allocation succeeded. There is room to improve it (there always is). It does character at a time input still (and newline mapping, therefore) but allocates increasingly large chunks of memory so that it does not do memory allocation on every character read. The err_exit() function is a useful skeleton; you can flesh it out into a much more complex system, but the basic idea of a function to report errors and exit (with a behaviour similar to fprintf() + exit() can simplify programs a lot (and error checking and reporting is important, but needs to be simple when it can be).
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void err_exit(const char *format, ...);
static void *emalloc(size_t nbytes);
static void *erealloc(void *old_space, size_t nbytes);
int main(void)
{
const char filename[] = "monologue.txt";
FILE *fp;
size_t i = 0;
size_t len_data = 4;
char *data = emalloc(len_data);
int c;
/* Read data from file */
if ((fp = fopen(filename, "r")) == NULL)
err_exit("Error opening file %s\n", filename);
while ((c = fgetc(fp)) != EOF)
{
if (c == '\n')
c = ' ';
if (i >= len_data)
{
assert(i == len_data);
data = realloc(data, 2 * len_data);
len_data *= 2;
}
data[i++] = c;
}
if (i >= len_data)
{
assert(i == len_data);
data = erealloc(data, len_data + 1);
len_data++;
}
data[i] = '\0';
fclose(fp);
/* Split file into words */
size_t len_wordlist = 16;
size_t num_words = 0;
char **wordlist = emalloc(len_wordlist * sizeof(char *));
char *location = data;
char *word;
for (num_words = 0; (word = strtok(location, " ")) != NULL; num_words++)
{
if (num_words >= len_wordlist)
{
assert(num_words == len_wordlist);
wordlist = erealloc(wordlist, 2 * len_wordlist * sizeof(char *));
len_wordlist *= 2;
}
wordlist[num_words] = word;
location = NULL;
}
/* Print the word list - one per line */
for (i = 0; i < num_words; i++)
printf("%zu: %s\n", i, wordlist[i]);
/* Release allocated space */
free(data);
free(wordlist);
return(0);
}
static void err_exit(const char *format, ...)
{
va_list args;
va_start(args, format);
vfprintf(stderr, format, args);
va_end(args);
exit(1);
}
static void *emalloc(size_t nbytes)
{
void *new_space = malloc(nbytes);
if (new_space == 0)
err_exit("Failed to allocate %zu bytes of memory\n", nbytes);
return(new_space);
}
static void *erealloc(void *old_space, size_t nbytes)
{
void *new_space = realloc(old_space, nbytes);
if (new_space == 0)
err_exit("Failed to reallocate %zu bytes of memory\n", nbytes);
return(new_space);
}
Try this. I've modified very little about your code, just to keep it close to your starting point. The main thing I did was add allwords which is an array of char * (this is where I store each string one by one). Then right after printing each version of word (what you were already doing), I also copied it into the next open slot in the allwords array. At the end I added another printing loop to display the contents of each string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXWORDS 999
int main(){
FILE *fp;
int i=0, j;
char *words=NULL,*word=NULL,c;
char *allwords[MAXWORDS];
if ((fp=fopen("monologue.txt","r"))==NULL){ /*Where monologue txt is a normal file with plain text*/
printf("Error Opening File\n");
exit(1);}
while ((c = fgetc(fp))!= EOF){
if (c=='\n'){ c = ' '; }
words = (char *)realloc(words, ++i*sizeof(char));
words[i-1]=c;}
word=strtok(words," ");
i=0;
while(word!= NULL && i < MAXWORDS){
printf("%s\n",word);
allwords[i] = malloc(strlen(word));
strcpy(allwords[i], word);
word = strtok(NULL," ");
i++;
}
printf("\nNow printing each saved string:\n");
for (j=0; j<i; j++)
printf("String %d: %s\n", j, allwords[j]);
exit(0);
}