Dynamic growing string array memory issues - c

I'm working on a crosswords program in which a word dictionary is necessary. I'm trying load a jspell dictionary file into an dynamic string array but i keep getting the
error malloc(): mismatching next->prev_size (unsorted)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
void dict_init(Dictionary * dict, char * dict_dir, size_t w_len)
{
printf("dictionary.c (dict_init): initializing dictionary.\n");
/*Adjust this value to control the initial array size*/
size_t init_size = 1000;
/*initialize dictionary file directory*/
dict->dir = malloc(strlen(dict_dir) * sizeof(char) + 1);
strcpy(dict->dir, dict_dir);
/*create memory for words array*/
dict->words = malloc(init_size * sizeof(char *));
/*initialize array size*/
dict->size = init_size;
/*initilize word length*/
dict->w_len = w_len;
/*initialize word counter*/
dict->counter = 0;
/*load words into dictionary*/
dict_load(dict);
printf("dictionary.c (dict_init): dictionary initialized.\n");
}
void dict_add(Dictionary * dict, char * word)
{
char ** dictionary = dict->words;
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5;
dict->words = realloc(dict->words, dict->size * sizeof(char *));
}
/*add word to dictionary*/
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word);
}
void dict_free(Dictionary * dict)
{
free(dict->words);
}
void dict_load(Dictionary * dict)
{
FILE * fp;
char * line = NULL;
char * word = NULL;
size_t len = 0;
ssize_t read;
fp = fopen(dict->dir, "r");
/*check if file exists*/
if (fp == NULL)
{
perror("ERROR: File not found.");
exit(EXIT_FAILURE);
}
/*discard first line*/
if(strstr(dict->dir, ".dic"))
getline(&line, &len, fp);
/*read file lines*/
while ((read = getline(&line, &len, fp)) != -1)
{
if(((strstr(line, "[CAT=punct") == NULL) && (word = parse_line(line, dict->w_len)) != NULL)) {
dict_add(dict, word);
}
}
fclose(fp);
free(line);
printf("dictionary.c (dict_load): dictionary loaded %ld words.\n", dict->counter);
}
char * parse_line(char * line, size_t w_len)
{
int i;
char s_tmp[101] = "";
char * dlm_slash, * dlm_space, * dlm_tab , *substring;
/*get delimiter pointer*/
dlm_slash = strchr(line, '/');
dlm_space = strchr(line, ' ');
dlm_tab = strchr(line, '\t');
/*check if delimiter exists in line*/
if(dlm_slash != NULL)
i = (int)(dlm_slash - line);
else if(dlm_space != NULL)
i = (int)(dlm_space - line);
else if(dlm_tab != NULL)
i = (int)(dlm_tab - line);
else
{
/*replace '\n' with '\0'*/
line[strcspn(line, "\n")] = '\0';
i = strlen(line);
}
strncpy(s_tmp, line, i);
substring = malloc(sizeof(char) * strlen(s_tmp) + 1);
strncpy(substring, s_tmp, strlen(s_tmp));
/*lowercase word*/
lower_case(substring);
if((is_valid(substring) == 0) && (strlen(substring) <= w_len))
return substring;
free(substring);
return NULL;
}

Here's the basic problem, I think:
void dict_add(Dictionary * dict, char * word) {
char ** dictionary = dict->words; /* **** 1 **** */
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5; /* **** 2 **** */
dict->words = realloc(dict->words, dict->size * sizeof(char *));
/* **** 3 **** */
}
/*add word to dictionary*/
This one is the problem:
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word); /* **** 4 **** */
}
The problem is that dictionary was saved before you called realloc. realloc might make a brand-new memory allocation, in which case it will automatically free() the old one after copying its contents into the new one. So any copy of the pointer which you made before calling realloc might end up pointing to unallocated memory. Writing to unallocated memory is a big no-no; in this particular case, you're probably overwriting malloc's bookkeeping information about the unallocated block, which is why it detects the problem and complains. Count yourself lucky: lots of memory corruption problems go undetected for quite a while until the factory explodes.
Some other issues which I noticed while writing this, with numbered comments in the source:
There's actually no need for the variable dictionary at all.
dict->size is an integer. Forcing conversion to a floating point number and then truncating back to an integer is not very useful. Prefer dict->size += dict->size/2;. Even better would be to first make sure that dict->size isn't so big that increasing it will cause integer wraparound. (This is not undefined behaviour on unsigned types like size_t, but it's not going to produce correct results.)
Here you could actually use a temporary, because realloc might return NULL indicating a memory allocation failure. If that happens, the original allocation is not automatically freed, and you don't have a way to free it. (Actually you do, since you have a variable confusingly called dictionary, but in point 1 I recommended that you get rid of it.) A more idiomatic call would be:
if(dict->counter == dict->size) {
/*increrase size of dictionary*/
dict->size += dict->size / 2; /* See point 2, above */
char** new_words = realloc(dict->words, dict->size * sizeof(*new_words));
if (new_words == NULL) {
/* Report allocation error and free all the memory you've allocated */
/* Then probably exit(1) but if this were a library function, just
* return some kind of failure indication so that the caller can do
* their own clean-up.
*/
}
dict->words = new_words;
}
dict->words[dict->counter] = word; /* See point 4, below */
You're freeing word here because it was allocated in parse_line(). But if you know you're going to free it anyway, there wasn't much point making a copy of it first. You might as well just use it. (But you need to document the fact that this function takes ownership of the word passed as an argument.)
It might be considered cleaner to do the copy as you do but then not free the argument, leaving it for the caller to do that. That would have the advantage of allowing the caller to provide a word which hadn't been dynamically allocated, or use the word for some other purpose.
(Not indicated in this snippet, but nonetheless important). Every block of allocated memory must be freed. So your program should execute free exactly as many times as it executed malloc. But you don't do that; you just free the array of word pointers, and let the words pointed to in that array leak. You should fix that. (Note that you don't need an extra call to free for a call to realloc, since realloc itself frees the old block if it allocates a new one. You only need to match the initial malloc with a free.)

Related

How to Delete Duplicate Elements from Dynamically Allocated String Array in C

I have created a program in C that reads in a word file and counts how many words are in that file, along with how many times each word occurs.
When I run it through Valgrind I either get too many bytes lost or a Segmentation Fault.
How can I remove a duplicate element from a dynamically allocated array and free the memory as well?
Gist: wordcount.c
int tokenize(Dictionary **dictionary, char *words, int total_words)
{
char *delim = " .,?!:;/\"\'\n\t";
char **temp = malloc(sizeof(char) * strlen(words) + 1);
char *token = strtok(words, delim);
*dictionary = (Dictionary*)malloc(sizeof(Dictionary) * total_words);
int count = 1, index = 0;
while (token != NULL)
{
temp[index] = (char*)malloc(sizeof(char) * strlen(token) + 1);
strcpy(temp[index], token);
token = strtok(NULL, delim);
index++;
}
for (int i = 0; i < total_words; ++i)
{
for (int j = i + 1; j < total_words; ++j)
{
if (strcmp(temp[i], temp[j]) == 0) // <------ segmentation fault occurs here
{
count++;
for (int k = j; k < total_words; ++k) // <----- loop to remove duplicates
temp[k] = temp[k+1];
total_words--;
j--;
}
}
int length = strlen(temp[i]) + 1;
(*dictionary)[i].word = (char*)malloc(sizeof(char) * length);
strcpy((*dictionary)[i].word, temp[i]);
(*dictionary)[i].count = count;
count = 1;
}
free(temp);
return 0;
}
Thanks in advance.
Without A Minimal, Complete, and Verifiable example, there is no guarantee that additional problems do not originate elsewhere in your code, but the following need careful attention:
char **temp = malloc(sizeof(char) * strlen(words) + 1);
Above you are allocating pointers not words, your allocation is too small by a factor of sizeof (char*) - sizeof (char). To prevent such problems, if you use the sizeof *thepointer, you will always have the correct size, e.g.
char **temp = malloc (sizeof *temp * strlen(words) + 1);
(unless you plan on providing a sentinel NULL as the final pointer, then + 1 is unnecessary. You must also validate the return (see below))
Next:
*dictionary = (Dictionary*)malloc(sizeof(Dictionary) * total_words);
There is no need to cast the return of malloc, it is unnecessary. See: Do I cast the result of malloc?. Further, if *dictionary was previously allocated elsewhere, the allocation above creates a memory leak because you lose the reference to the original pointer. If it has been previously allocated, you need realloc, not malloc. And if wasn't allocate, a better way of writing it would be:
*dictionary = malloc (sizeof **dictionary * total_words);
You must also validation the allocation succeeds before attempting to use the block of memory, e.g.
if (! *dictionary) {
perror ("malloc - *dictionary");
exit (EXIT_FAILURE);
}
In:
temp[index] = (char*)malloc(sizeof(char) * strlen(token) + 1);
sizeof(char) is always 1 and can be omitted. Better written as:
temp[index] = malloc (strlen(token) + 1);
or better, allocate and validate in a single block:
if (!(temp[index] = malloc (strlen(token) + 1))) {
perror ("malloc - temp[index]");
exit (EXIT_FAILURE);
}
then
strcpy(temp[index++], token);
Next, while total_words may be equal to the words in temp, you have only validated that you have index number of words. That combined with your original allocation times sizeof (char) instead of sizeof (char *), makes it no wonder there can be segfaults where you attempt to iterate over your list of pointers in temp. Better:
for (int i = 0; i < index; ++i)
{
for (int j = i + 1; j < index; ++j)
(the same applies to your k loop as well. Additionally, since you have allocated each temp[index], when you shuffle pointers with temp[k] = temp[k+1]; you overwrite the pointer address in temp[k] causing a memory leak with every pointer you overwrite. Each temp[k] that is overwritten should be freed before the assignment is made.
While you are updating total_words--, there still to this point has never been a validation that index == total_words, and in the event they are not, you can have no confidence in total_words or that you won't segfault attempting to iterate over uninitialized pointers as the result.
The rest appears workable, but after changes are made above, you should insure that the are no additional changes needed. Look things over and let me know if you need additional help. (and with a MCVE, I'm happy to help further)
Additional Problems
I apologize for the delay, real-world called -- and this took a lot longer than anticipated, because what you have is an awkward slow-motion logical train-wreck. First and foremost, while there is nothing wrong with reading an entire text-file file into a buffer with fread -- the buffer is NOT nul-terminated and therefore cannot be used with any functions expecting a string. Yes, strtok, strcpy or any string function will read past the end of word_data looking for the nul-terminating character (well out into memory you don't own) resulting in a SegFault.
Your various scattered +1 tacked onto your malloc allocations now make a little more sense, as it appears you were looking for where you needed to add an additional character to make sure you could nul-terminate word_data, but couldn't quite figure out where it went. (don't worry, I straightened that out for you, but it is a big hint that you are probably going about this in the wrong way -- reading with POSIX getline or fgets is probably a better approach than the file-at-once for this type of text processing)
That is literally, just the tip of the iceberg in the problems encountered in your code. As hinted at earlier, in tokenize, you failed to validate that index equals total_words. This ends up being important given your choice of delim which includes the ASCII apostrophe (or single-quote). This causes your index to exceed the word_count any time a plural-possessive or contraction is encountered in the buffer (e.g. "can't" is split is "can" and "t", "Peter's" is split into "Peter" and "s", etc.... You will have to decide how you want to resolve this, I have simply removed the single quote for now.
Your logic in both tokenize and count_words was difficult to follows, and just wrong in some aspects, and your return type (void) for read_file provided absolutely no way to indicate a success (or failure) within. Always choose a return type that provides meaningful information from which you can determine is a critical function has succeeded or failed (reading your data qualifies as critical).
If it provides a return -- use it. This applies to all functions that can fail (including functions like fseek)
Returning 0 from tokenize misses the return of the number of words (allocated struts) in dictionary leaving you unable to properly free the information and leaving you to guess at some number to display (e.g. for (int i = 0; i < 333; ++i) in main()). You need to track the number of dictionary structs and member word that are allocated in tokenize (keep an index, say dindex). Then returning dindex to main() (assigned to hello in your code) provides the information you need to iterate over the structs in main() to output your information, as well as to free each allocated word before freeing the pointers.
If you don't have an accurate count of the number of allocated dictionary structs back in main(), you have failed in the two responsibilities you have regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed. If you don't know how many blocks there are, then you haven't done (1) and can't do (2).
This is a nit about style, and while not an error, the standard coding style for C avoids the use of Initialcaps, camelCase or MixedCase variable names in favor of all lower-case while reserving upper-case names for use with macros and constants. It is a matter of style -- so it is completely up to you, but failing to follow it can lead to the wrong first impression in some circles.
Rather than carry on for another handful of paragraphs, I've reworked your example for you and added a few comments inline. Go though it, I haven't punishingly tested it for all corner-cases, but it should be a sound base to build from. You will note in going though it, your count_words and tokenize have been simplified. Try and understand why what was done, was done, and ask if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
typedef struct{
char *word;
int count;
} dictionary_t;
char *read_file (FILE *file, char **words, size_t *length)
{
size_t size = *length = 0;
if (fseek (file, 0, SEEK_END) == -1) {
perror ("fseek SEEK_END");
return NULL;
}
size = (size_t)ftell (file);
if (fseek (file, 0, SEEK_SET) == -1) {
perror ("fseek SEEK_SET");
return NULL;
}
/* +1 needed to nul-terminate buffer to pass to strtok */
if (!(*words = malloc (size + 1))) {
perror ("malloc - size");
return NULL;
}
if (fread (*words, 1, size, file) != size) {
perror ("fread words");
free (*words);
return NULL;
}
*length = size;
(*words)[*length] = 0; /* nul-terminate buffer - critical */
return *words;
}
int tokenize (dictionary_t **dictionary, char *words, int total_words)
{
// char *delim = " .,?!:;/\"\'\n\t"; /* don't split on apostrophies */
char *delim = " .,?!:;/\"\n\t";
char **temp = malloc (sizeof *temp * total_words);
char *token = strtok(words, delim);
int index = 0, dindex = 0;
if (!temp) {
perror ("malloc temp");
return -1;
}
if (!(*dictionary = malloc (sizeof **dictionary * total_words))) {
perror ("malloc - dictionary");
return -1;
}
while (token != NULL)
{
if (!(temp[index] = malloc (strlen (token) + 1))) {
perror ("malloc - temp[index]");
exit (EXIT_FAILURE);
}
strcpy(temp[index++], token);
token = strtok (NULL, delim);
}
if (total_words != index) { /* validate total_words = index */
fprintf (stderr, "error: total_words != index (%d != %d)\n",
total_words, index);
/* handle error */
}
for (int i = 0; i < total_words; i++) {
int found = 0, j = 0;
for (; j < dindex; j++)
if (strcmp((*dictionary)[j].word, temp[i]) == 0) {
found = 1;
break;
}
if (!found) {
if (!((*dictionary)[dindex].word = malloc (strlen (temp[i]) + 1))) {
perror ("malloc (*dictionay)[dindex].word");
exit (EXIT_FAILURE);
}
strcpy ((*dictionary)[dindex].word, temp[i]);
(*dictionary)[dindex++].count = 1;
}
else
(*dictionary)[j].count++;
}
for (int i = 0; i < total_words; i++)
free (temp[i]); /* you must free storage for words */
free (temp); /* before freeing pointers */
return dindex;
}
int count_words (char *words, size_t length)
{
int count = 0;
char previous_char = ' ';
while (length--) {
if (isspace (previous_char) && !isspace (*words))
count++;
previous_char = *words++;
}
return count;
}
int main (int argc, char **argv)
{
char *word_data = NULL;
int word_count, hello;
size_t length = 0;
dictionary_t *dictionary = NULL;
FILE *input = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!input) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (!read_file (input, &word_data, &length)) {
fprintf (stderr, "error: file_read failed.\n");
return 1;
}
if (input != stdin) fclose (input); /* close file if not stdin */
word_count = count_words (word_data, length);
printf ("wordct: %d\n", word_count);
/* number of dictionary words returned in hello */
if ((hello = tokenize (&dictionary, word_data, word_count)) <= 0) {
fprintf (stderr, "error: no words or tokenize failed.\n");
return 1;
}
for (int i = 0; i < hello; ++i) {
printf("%-16s : %d\n", dictionary[i].word, dictionary[i].count);
free (dictionary[i].word); /* you must free word storage */
}
free (dictionary); /* free pointers */
free (word_data); /* free buffer */
return 0;
}
Let me know if you have further questions.
There are a few things that you need to do to make your code work:
Fix the memory allocation of temp by replacing sizeof(char) with sizeof(char *) like so:
char **temp = malloc(sizeof(char *) * strlen(words) + 1);
Fix the memory allocation of dictionary by replacing sizeof(Dictionary) with sizeof(Dictionary *):
*dictionary = (Dictionary*)malloc(sizeof(Dictionary *) * (*total_words));
Pass the address of address of word_count when calling tokenize:
int hello = tokenize(&dictionary, word_data, &word_count);
Replace all occurrences of total_words in tokenize function with (*total_words). In the tokenize function signature, you can replace int total_words with int *total_words.
You should also replace the hard-coded value of 333 in your for loop in the main function with word_count.
After you make these changes, your code should work as expected. I was able to run it successfully with these changes.

dynamically allocating my 2d array in c

Any hints on how I would dynamically allocate myArray so I can enter any amount of strings and it would store correctly.
int main()
{
char myArray[1][1]; //how to dynamically allocate the memory?
counter = 0;
char *readLine;
char *word;
char *rest;
printf("\n enter: ");
ssize_t buffSize = 0;
getline(&readLine, &buffSize, stdin);//get user input
//tokenize the strings
while(word = strtok_r(readLine, " \n", &rest )) {
strcpy(myArray[counter], word);
counter++;
readLine= rest;
}
//print the elements user has entered
int i =0;
for(i = 0;i<counter;i++){
printf("%s ",myArray[i]);
}
printf("\n");
}
Use realloc like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void){
char **myArray = NULL;
char *readLine = NULL;
size_t buffSize = 0;
size_t counter = 0;
char *word, *rest, *p;
printf("\n enter: ");
getline(&readLine, &buffSize, stdin);
p = readLine;
while(word = strtok_r(p, " \n", &rest )) {
myArray = realloc(myArray, (counter + 1) * sizeof(*myArray));//check omitted
myArray[counter++] = strdup(word);
p = NULL;
}
free(readLine);
for(int i = 0; i < counter; i++){
printf("<%s> ", myArray[i]);
free(myArray[i]);
}
printf("\n");
free(myArray);
}
Here is one way you might approach this problem. If you are going to dynamically allocate storage for an unknown number of words of unknown length, you can start with a buffSize that seems reasonable, allocate that much space for the readLine buffer, and grow this memory as needed. Similarly, you can choose a reasonable size for the number of words expected, and grow word storage as needed.
In the program below, myArray is a pointer to pointer to char. arrSize is initialized so that pointers to 100 words may be stored in myArray. First, readLine is filled with an input line. If more space than provided by the initial allocation is required, the memory is realloced to be twice as large. After reading in the line, the memory is again realloced to trim it to the size of the line (including space for the '\0').
strtok_r() breaks the line into tokens. The pointer store is used to hold the address of the memory allocated to hold the word, and then word is copied into this memory using strcpy(). If more space is needed to store words, the memory pointed to by myArray is realloced and doubled in size. After all words have been stored, myArray is realloced a final time to trim it to its minimum size.
When doing this much allocation, it is nice to write functions which allocate memory and check for errors, so that you don't have to do this manually every allocation. xmalloc() takes a size_t argument and an error message string. If an allocation error occurs, the message is printed to stderr and the program exits. Otherwise, a pointer to the allocated memory is returned. Similarly, xrealloc() takes a pointer to the memory to be reallocated, a size_t argument, and an error message string. Note here that realloc() can return a NULL pointer if there is an allocation error, so you need to assign the return value to a temporary pointer to avoid a memory leak. Moving realloc() into a separate function helps protect you from this issue. If you assigned the return value of realloc() directly to readLine, for example, and if there were an allocation error, readLine would no longer point to the previously allocated memory, which would be lost. This function prints the error message and exits if there is an error.
Also, you need to free all of these memory allocations, so this is done before the program exits.
This method is more efficient than reallocing memory for every added character in the line, and for every added pointer to a word in myArray. With generous starting values for buffSize and arrSize, you may only need the initial allocations, which are then trimmed to final size. Of course, there are still the individual allocations for each of the individual words. You could also use strdup() for this part, but you would still need to remember to free those allocations as well.Still, not nearly as many allocations will be needed as when readLine and myArray are grown one char or one pointer at a time.
#define _POSIX_C_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void * xmalloc(size_t size, char *msg);
void * xrealloc(void *ptr, size_t size, char *msg);
int main(void)
{
char **myArray;
size_t buffSize = 1000;
size_t arrSize = 100;
size_t charIndex = 0;
size_t wordIndex = 0;
char *readLine;
char *inLine;
char *word;
char *rest;
char *store;
/* Initial allocations */
readLine = xmalloc(buffSize, "Allocation error: readLine");
myArray = xmalloc(sizeof(*myArray) * arrSize,
"Allocation error: myArray\n");
/* Get user input */
printf("\n enter a line of input:\n");
int c;
while ((c = getchar()) != '\n' && c != EOF) {
if (charIndex + 1 >= buffSize) { // keep room for '\0'
buffSize *= 2;
readLine = xrealloc(readLine, buffSize,
"Error in readLine realloc()\n");
}
readLine[charIndex++] = c;
}
readLine[charIndex] = '\0'; // add '\0' terminator
/* If you must, trim the allocation now */
readLine = xrealloc(readLine, strlen(readLine) + 1,
"Error in readLine trim\n");
/* Tokenize readLine */
inLine = readLine;
while((word = strtok_r(inLine, " \n", &rest)) != NULL) {
store = xmalloc(strlen(word) + 1, "Error in word allocation\n");
strcpy(store, word);
if (wordIndex >= arrSize) {
arrSize *= 2;
myArray = xrealloc(myArray, sizeof(*myArray) * arrSize,
"Error in myArray realloc()\n");
}
myArray[wordIndex] = store;
wordIndex++;
inLine = NULL;
}
/* You can trim this allocation, too */
myArray = xrealloc(myArray, sizeof(*myArray) * wordIndex,
"Error in myArray trim\n");
/* Print words */
for(size_t i = 0; i < wordIndex; i++){
printf("%s ",myArray[i]);
}
printf("\n");
/* Free allocated memory */
for (size_t i = 0; i < wordIndex; i++) {
free(myArray[i]);
}
free(myArray);
free(readLine);
return 0;
}
void * xmalloc(size_t size, char *msg)
{
void *temp = malloc(size);
if (temp == NULL) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
return temp;
}
void * xrealloc(void *ptr, size_t size, char *msg)
{
void *temp = realloc(ptr, size);
if (temp == NULL) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
return temp;
}
I suggest you first scan the data and then call malloc() with the appropriate size.
Otherwise, you can use realloc() to reallocate memory as you go through the data.

Cannot get realloc() to work

FILE *file;
file = fopen(argv[1], "r");
char *match = argv[2];
if (file == NULL) {
printf("File does not exist\n");
return EXIT_FAILURE;
}
int numWords = 0, memLimit = 20;
char** words = (char**) calloc(memLimit, sizeof(char));
printf("Allocated initial array of 20 character pointers.\n");
char string[20];
while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) != EOF) {
words[numWords] = malloc(strlen(string) + 1 * sizeof(char));
strcpy(words[numWords], string);
printf("Words: %s\n", words[numWords]);
numWords++; /*keep track of indexes, to realloc*/
if (numWords == memLimit) {
memLimit = 2 * memLimit;
words = (char**) realloc(words, memLimit * sizeof(char*)); /*Fails here*/
printf("Reallocated array of %d character pointers.\n", memLimit);
}
}
Code should open and read a file containing words with punctuation, spaces etc and store in a string, but after 20 tries it throws an error, and I can't seem to get realloc() to work here, which I'm expecting to be the problem. The array is dynamically allocated 20 char pointers, at which when limit is reached, it should realloc by double. How can I get around this?
Two notes. First, you shouldn't ever cast the return value of calloc/malloc/realloc. See this for more information.
Second, as others have pointed out in comments, the first calloc statement uses sizeof(char) and not sizeof(char*) like it should.
words is a pointer to a pointer. The idea is to allocate an array of pointers.
The below is wrong as it allocates for memLimit characters rather than memLimit pointers.
This is the main issue
char** words = (char**) calloc(memLimit, sizeof(char)); // bad
So use an easy idiom: allocate memLimit groups of whatever words points to. It is easier to write, read and maintain.
char** words = calloc(memLimit, sizeof *words);
Avoid the while (scanf() != EOF) hole. Recall that various results can come from scanf() family. It returns the count of successfully scanned fields or EOF. That is typically 1 of at least 3 options. So do not test for one result you do not want, test for the one result you do want.
// while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) != EOF) {
while (fscanf(file, "%[a-zA-Z]%*[^a-zA-Z]", string) == 1) {
The above example may not every return 0, but the below easily could.
int d;
while (fscanf(file, "%d", &d) == 1) {
#Enzo Ferber rightly suggests using "%s". Further recommend to follow the above idiom and restrict input width to 1 less than the size of the buffer.
char string[20];
while (fscanf(file, "%19s", string) == 1) {
Suggest the habit of checking allocation result.
// better to use `size_t` rather than `int `for array sizes.
size_t newLimit = 2u * memLimit;
char** newptr = realloc(words, newLimit * sizeof *newptr);
if (newptr == NULL) {
puts("Out-of-memory");
// Code still can use old `words` pointer of size `memLimit * sizeof *words`
return -1;
}
memLimit = newLimit;
words = newptr;
}
Errors
Don't cast malloc/calloc returns. There's not need for it.
Your first sizeof is wrong. It should be sizeof(char*)
That scanf() format string. %s does the job just fine.
Code
The following code worked for me (printed one word per line):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *file;
file = fopen(argv[1], "r");
char *match = argv[2];
if (file == NULL) {
printf("File does not exist\n");
return EXIT_FAILURE;
}
int numWords = 0, memLimit = 20;
char **words = calloc(memLimit, sizeof(char*));
printf("Allocated initial array of 20 character pointers.\n");
char string[20];
while (fscanf(file, "%s", string) != EOF) {
words[numWords] =
malloc(strlen(string) + 1 * sizeof(char));
strcpy(words[numWords], string);
printf("Words: %s\n", words[numWords]);
numWords++; /*keep track of indexes, to realloc */
if (numWords == memLimit) {
memLimit = 2 * memLimit;
words = realloc(words, memLimit * sizeof(char *));
printf
("Reallocated array of %d character pointers.\n",
memLimit);
}
}
}
Called with ./realloc realloc.c
Hope it helps.
Your first allocation is the problem. You allocate 20 chars and treat them as 20 char pointers. You overrun the allocated buffer and corrupt your memory.
The second allocation fails because the heap is corrupted.

C reallocating to find end of line, but not getting entire line?

I have a function that is finding the number of lines in a text file and returning these lines. I have to dynamically resize the array. However, it is only displaying the last few letters in each line within the buffer when I print it. I'm new to C. This is the main part of the code:
char * foo( char **buffer, FILE * infile )
...
int buffSizer = 10;
*buffer = calloc( buffSizer, 1);
do {
char * result = fgets(*buffer, buffSizer, infile);
if (result == NULL){
free(*buffer);
return(NULL);
}
char * ptr = strchr(*buffer, '\n');
if (ptr){
return(*buffer);
}
buffSiz = buffSizer * 2;
*buffer = realloc(*buffer, buffSizer);
} while (1);
Every time you need to realloc, you immediately go back and overwrite everything you've read so far. You need to account for an offset into *buffer for charactrers you've already read.
For example, you could add:
int offset = 0;
Along with the buffSiz declaration, and then use it like:
char * result = fgets(*buffer + offset, buffSiz - offset, infile);
And then when you realloc, add:
offset = buffSiz - 1; // -1 to account for null character stored by fgets()
Before this line:
buffSiz = buffSiz * 2;
As a bit of an aside, it's dangerous to reassign *buffer at the same time you pass it to realloc. If an error occurs, you'll leak the original allocation and the data will be unrecoverable. This example is safer:
char *tmp = realloc(*buffer, buffSiz);
if (!tmp)
{
free(*buffer);
return NULL;
}
*buffer = tmp;

Using 2D array of char pointer in C

I want to read a file and write each line to array of char. As I don't know the amount of lines, therefore I thought the most efficient way is to use 2D array of char pointer. However I get segmentation fault.
My question might be duplicate of this one :
2D array of char pointers --> Segmentation fault?
But I couldn't figure the correct syntax for C so I couldn't try.
Here's my code:
FILE *file = fopen ( filename, "r" );
if ( file != NULL )
{
char line [ 128 ]; /* or other suitable maximum line size */
char **new_line;
int i = 0;
while ( fgets ( line, sizeof line, file ) != NULL ) /* read a line */
{
strcpy(new_line[i], line);
i++;
}
Memory is not allocated for new_line which causes the segmentation fault.
If you know the no of lines, then you can declare that as local array itself. In that case your accessing method will works fine.
#define MAX_LINES 20
#define MAX_CHARS 128
...
char new_line[MAX_LINES][MAX_CHARS] = {0};
...
Your problem here is you dont know the maximum number of lines. So you have selected double pointer. In that case you need to first malloc with some n number of lines and then you need to keep on using realloc to increase the buffer size.
#define MAX_CHARS 128
#define N_NO_OF_LINES 10
...
char line[MAX_CHARS] = {0};
char **new_line = NULL;
int noOfLines = 0;
int lineCount = 0;
new_line = malloc(sizeof(char*) * N_NO_OF_LINES);
noOfLines = N_NO_OF_LINES;
while (fgets (line, sizeof line, file) != NULL) /* read a line */
{
if (lineCount >= noOfLines)
{
new_line = realloc(new_line, (sizeof(char*)*(noOfLines+N_NO_OF_LINES)));
noOfLines += N_NO_OF_LINES;
}
new_line[lineCount] = strdup(line);
lineCount++;
}
Note : Take care of null check for malloc and realloc
new_line is not initialized to a valid chunk of memory.
Roughly:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main() {
FILE *file = fopen ( "test.txt", "r" );
if ( file != NULL )
{
#define MAXLINES 128
#define MAXLINELEN 100
char line [ MAXLINELEN ]; /* or other suitable maximum line size */
char **new_line = (char **)malloc(sizeof(char *) * MAXLINES);
int i = 0;
if (!new_line) exit(-1);
while ( i < MAXLINES && (fgets ( line, sizeof line, file ) != NULL )) /* read a line */
{
new_line[i] = strdup(line);
i++;
}
printf("read %d lines\n", i);
}
exit(0);
}
You didn't allocate any memory for the new_line array. You need something like:
char **new_line = malloc(sizeof(char *) * MAX_LINES);
And then for each line, don't use strcpy, which will copy into a garbage pointer (the uninitialized new_line array). You probably want strdup(3):
new_line[i] = strdup(line);
You declare new_line as a pointer to char *, but it is never initialized so it points to some invalid memory address. You get the error when you write to that address.
You will probably want to allocate the memory, assign it to new_line, and then you can copy strings to it.
You need to alloc space for your strings.
Malloc returns a memory slot with the size you want but doesn't allow memory reallocation. For that you have realloc.
With malloc you would end up with a fixed size for your table, just like if you had only declared it has static but with later initialization (Well, I'm a bit agains this sentence because malloc is much more than that, but for this purpose it is safe to say it).
Realloc does that, reallocates memory, but it can be pretty dangerous if you don't use i correctly. And, in my opinion, is not the most correct way to do it.
When you want to save something that you don't know the size, dynamic structures are the way to go.
You can use 'linked lists like' data structures so you can have as many words as you want and then convert that list to an array.
I would go with something like this:
typedef struct _words{ //Structure to the dynamic insertion of words
char *word;
struct _words *next;
}words;
words *last; //Easier implementation for this case. Not always the best solution
words *init(){ //Is good practice to start with an empty structure for reference passing
words *new = malloc(sizeof(words));
if(new == NULL) exit(0);
new->next = NULL; //A good end/next reference
return new;
}
void insertWord(char *word){
words *new = malloc (sizeof(words));
if(new == NULL) exit(0);
new->word = malloc(strlen(word)*sizeof(char));
if(new->word == NULL) exit(0);
new->next = NULL; //Or new->next = last->next; wich is also null.
last->next = new;
last = new;
}
int main(){ //Or the name of your function
FILE *file = fopen ( filename, "r" );
words *list = init();
last = list;
if ( file != NULL )
{
char line [ 128 ]; /* or other suitable maximum line size */
int i = 0;
while ( fgets ( line, sizeof line, file ) != NULL ) /* read a line */
{
insertWord(line);
i++;
}
//Here, you already have all the words in your dynamic structure. You can now save them into an array
char **array = malloc(i*sizeof(char*)); //i is the number of words.
if(array == NULL) exit(0);
word *aux = list->next;
if(aux != NULL){
for(int j=0;j<i && aux != NULL;j++){
array[j] = malloc(strlen(aux->word)*sizeof(char));
if(array[j] == NULL) exit(0);
strcpy(array[j], aux->word);
aux = aux->next; // jump to the next word
}
}
...
}
I think this might work but I didn't try it. Is just to give you an idea on how to implement dynamic structures.
It misses frees and is not an actual stack, even if is close.
Hope this helps.

Resources