I am trying to make a function that takes a string and a pointer to an array of strings and malloc() the array of char arrays and copies each individual word of the string. This is what I have so far, I think I'm close, I'm just struggling with using malloc() on an array of arrays.
int string_parser(char *inp, char **array_of_words_p[])
{
int CurrentChar = 0; //Variable Initialization
char *buffer; //Variable Initialization
/* Allocate memory and check for errors allocating memory */
//Allocate memory to buffer the size of the input string
buffer = (char*)malloc(strlen(inp));
if (buffer == NULL)
{
printf("Error allocating memory..\n");
return -1;
}
/* Move input string into buffer before processing */
for (CurrentChar = 0; CurrentChar < strlen(inp) + 1; CurrentChar++)
{ //For every character in input
if (inp != NULL)
{
//Move input character into buffer
buffer[CurrentChar] = inp[CurrentChar];
}
}
/* Convert string into array of words */
char ** stringbuffer = NULL;
//Convert string to array of words
char * CurrentWord = strtok_s(buffer, " ", *array_of_words_p);
//Variable Initialization
int numspaces = 0;
while (CurrentWord)
{
//Allocate memory for size of string
stringbuffer = (char**)realloc(stringbuffer, sizeof(char**) * ++numspaces);
if (stringbuffer == NULL)
{
return -1;
}
stringbuffer[numspaces - 1] = CurrentWord;
//Reset Current word to null
CurrentWord = strtok_s(NULL, " ", *array_of_words_p);
}
//Reallocate memory to include terminating character
stringbuffer = (char**)realloc(stringbuffer, sizeof(char**) * (numspaces + 1));
stringbuffer[numspaces] = 0;
/* Write processed data into returned argument */
*array_of_words_p = (char**)malloc(sizeof(char**) * (numspaces + 2));
memcpy(*array_of_words_p, stringbuffer, (sizeof(char*) * (numspaces + 2)));
free(stringbuffer);
return numspaces;
}
//Allocate memory to buffer the size of the input string
buffer = (char*)malloc(strlen(inp));
The size of the input string includes the terminating \0, so you need:
buffer = malloc(strlen(inp)+1);
//Convert string to array of words
char * CurrentWord = strtok_s(buffer, " ", *array_of_words_p);
It's unwise to abuse the *array_of_words_p for the context save variable, as this requires it to be initialized appropriately. Better:
char *context, *CurrentWord = strtok_s(buffer, " ", &context);
…
CurrentWord = strtok_s(NULL, " ", &context);
//Allocate memory for size of string
stringbuffer = (char**)realloc(stringbuffer, sizeof(char**) * ++numspaces);
It likely doesn't hurt (owing to equal pointer sizes), but sizeof(char**) is strictly speaking wrong, since the elements of the array of strings are of type char *. Correct:
stringbuffer = realloc(stringbuffer, sizeof (char *) * ++numspaces);
…
stringbuffer = realloc(stringbuffer, sizeof (char *) * (numspaces + 1));
/* Write processed data into returned argument */
*array_of_words_p = (char**)malloc(sizeof(char**) * (numspaces + 2));
memcpy(*array_of_words_p, stringbuffer, (sizeof(char*) * (numspaces + 2)));
free(stringbuffer);
You can spare this unnecessary copying and by the way avoid accessing the unallocated memory stringbuffer[numspaces+1] by replacing the above with just:
*array_of_words_p = stringbuffer;
Apart from that all, your function works and can be called like:
char **array_of_words;
int n = string_parser("this here is an example string", &array_of_words);
for (int i = 0; i < n; ++i) puts(array_of_words[i]);
Related
My program is using a main function to prompt the user to choose which function to use and then sending the reference to a char double pointer to the function they choose. In the function, I allocate memory dynamically for the number of strings. Then, for each string, I allocate memory depending on the incoming string length.
void readFile(char *** fileAsArray){
/* file name found, file opened, set to FILE *input */
int numWords = 0;
*fileAsArray = malloc(5000 * sizeof(**fileAsArray));
while(fgets(line, sizeof(line), input){
char *word = strtok(line, " \n");
while(word){
int wordSize = strlen(word);
int i;
(*fileAsArray)[numWords] = malloc(wordSize * sizeof((**fileAsArray)[numWords]));
(*fileAsArray)[numWords] = word;
printf("%s", (*fileAsArray)[numWords]); // CORRECT WHEN CALLED HERE
numWords++;
word = strtok(NULL, " \n");
}
}
printf("%s", (*fileAsArray)[0]); //INCORRECT WHEN CALLED HERE??
}
(*fileAsArray)[numWords] = malloc(wordSize * sizeof((**fileAsArray)[numWords]));
(*fileAsArray)[numWords] = word;
Is overwriting the pointer to allocated buffer by word and causing memory leak.
strcpy() should be used to copy strings.
Also you forgot to allocate for teminating null-character.
The part should be:
(*fileAsArray)[numWords] = malloc((wordSize + 1) * sizeof((**fileAsArray)[numWords]));
strcpy((*fileAsArray)[numWords], word);
I have a program, that splits strings based on the delimiter. I have also, 2 other functions, one that prints the returned array and another that frees the array.
My program prints the array and returns an error when the free array method is called. Below is the full code.
#include "stringsplit.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
unsigned long int getNofTokens(const char *string) {
char *stringCopy;
unsigned long int stringLength;
unsigned long int count = 0;
stringLength = (unsigned)strlen(string);
stringCopy = malloc((stringLength + 1) * sizeof(char));
strcpy(stringCopy, string);
if (strtok(stringCopy, " \t") != NULL) {
count++;
while (strtok(NULL, " \t") != NULL)
count++;
}
free(stringCopy);
return count;
}
char **split_string(const char *str, const char *split) {
unsigned long int count = getNofTokens(str);
char **result;
result = malloc(sizeof(char *) * count + 1);
char *tmp = malloc(sizeof(char) * strlen(str));
strcpy(tmp, str);
char *token = strtok(tmp, split);
int idx = 0;
while (token != NULL) {
result[idx++] = token;
token = strtok(NULL, split);
}
return result;
}
void print_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
char *currentPointer = split_string[i];
free(currentPointer);
}
free(split_string);
}
Also, do I need to explicitly add \0 at the end of the array or does strtok add it automatically?
There are some problems in your code:
[Major] the function getNofTokens() does not take the separator string as an argument, it counts the number of words separated by blanks, potentially returning an inconsistent count to its caller.
[Major] the size allocated in result = malloc(sizeof(char *) * count + 1); is incorrect: it should be:
result = malloc(sizeof(char *) * (count + 1));
Storing the trailing NULL pointer will write beyond the end of the allocated space.
[Major] storing the said NULL terminator at the end of the array is indeed necessary, as the block of memory returned by malloc() is uninitialized.
[Major] the copy of the string allocated and parsed by split_string cannot be safely freed because the pointer tmp is not saved anywhere. The pointer to the first token will be different from tmp in 2 cases: if the string contains only delimiters (no token found) or if the string starts with a delimiter (the initial delimiters will be skipped). In order to simplify the code and make it reliable, each token could be duplicated and tmp should be freed. In fact your free_split_string() function relies on this behavior. With the current implementation, the behavior is undefined.
[Minor] you use unsigned long and int inconsistently for strings lengths and array index variables. For consistency, you should use size_t for both.
[Remark] you should allocate string copies with strdup(). If this POSIX standard function is not available on your system, write a simple implementation.
[Major] you never test for memory allocation failure. This is OK for testing purposes and throw away code, but such potential failures should always be accounted for in production code.
[Remark] strtok() is a tricky function to use: it modifies the source string and keeps a hidden static state that makes it non-reentrant. You should avoid using this function although in this particular case it performs correctly, but if the caller of split_string or getNofTokens relied on this hidden state being preserved, it would get unexpected behavior.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *string, const char *split) {
char *tmp = strdup(string);
size_t count = 0;
if (strtok(tmp, split) != NULL) {
count++;
while (strtok(NULL, split) != NULL)
count++;
}
free(tmp);
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
char *tmp = strdup(str);
char *token = strtok(tmp, split);
size_t idx = 0;
while (token != NULL && idx < count) {
result[idx++] = strdup(token);
token = strtok(NULL, split);
}
result[idx] = NULL;
free(tmp);
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
Here is an alternative without strtok() and without intermediary allocations:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
size_t getNofTokens(const char *str, const char *split) {
size_t count = 0;
size_t pos = 0, len;
for (pos = 0;; pos += len) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
count++;
}
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t pos, len, idx;
for (pos = 0, idx = 0; idx < count; pos += len, idx++) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
result[idx] = strndup(str + pos, len);
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
EDIT After re-reading the specification in your comment, there seems to be some potential confusion as to the semantics of the split argument:
if split is a set of delimiters, the above code does the job. And the examples will be split as expected.
if split is an actual string to match explicitly, the above code only works by coincidence on the examples given in the comment.
To implement the latter semantics, you should use strstr() to search for the split substring in both getNofTokens and split_string.
Here is an example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *str, const char *split) {
const char *p;
size_t count = 1;
size_t len = strlen(split);
if (len == 0)
return strlen(str);
for (p = str; (p = strstr(p, split)) != NULL; p += len)
count++;
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t len = strlen(split);
size_t idx;
const char *p = str;
for (idx = 0; idx < count; idx++) {
const char *q = strstr(p, split);
if (q == NULL) {
q = p + strlen(p);
} else
if (q == p && *q != '\0') {
q++;
}
result[idx] = strndup(p, q - p);
p = q + len;
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
When debugging, take note of values that you got from malloc, strdup, etc. Let's call these values "the active set". It's just a name, so that we can refer to them. You get a pointer from those functions, you mentally add it to the active set. When you call free, you can only pass values from the active set, and after free returns, you mentally remove them from the set. Any other use of free is invalid and a bug.
You can easily find this out by putting breakpoints after all memory allocations, so that you can write down the pointer values, and then breakpoints on all frees, so that you can see if one of those pointer values got passed to free - since, again, to do otherwise is to misuse free.
This can be done also using "printf" debugging. Like this:
char *buf = malloc(...); // or strdup, or ...
fprintf(stderr, "+++ Alloc %8p\n", buf);
And then whenever you have free, do it again:
fprintf(stderr, "--- Free %8p\n", ptr);
free(ptr);
In the output of the program, you must be able to match every +++ with ---. If you see any --- with a value that wasn't earlier listed with a +++, there's your problem: that's the buggy invocation of free :)
I suggest using fprintf(stderr, ... instead of printf(..., since the former is typically unbuffered, so if your program crashes, you won't miss any output. printf is buffered on some architectures (and not buffered on others - so much for consistency).
Hello i am doing a project where i have to implement a hashTable that stores words based on a hash function. On a stress test i get malloc(): memory corruption
The initial declaration of the hashTable
hashTable = (char**)malloc(hashSize[0] * sizeof(char*));
This is the function i wrote to add word to hashTable of hashSize:
void addWord(char** hashTable, unsigned int hashSize, const char* word) {
int bucketIndex = hash(word, hashSize);
//printf("Word to add = %s, bucket = %d, hashTable size = %d\n", word, bucketIndex, hashSize);
if(hashTable[bucketIndex] == NULL) {
hashTable[bucketIndex] = (char*)malloc(strlen(word) * sizeof(char));
strcpy(hashTable[bucketIndex], word);
return;
}
/* checks for duplicats */
int exists = 0;
char* heyStack = (char*)malloc(strlen(hashTable[bucketIndex]));
memcpy(heyStack, hashTable[bucketIndex], strlen(hashTable[bucketIndex]));
char* token = strtok(heyStack, " ");
while(token) {
if(strcmp(token, word) == 0) {
exists = 1;
break;
}
token = strtok(NULL, " ");
}
/* end check for duplicates */
if(exists == 0) {
size_t bucketSize = strlen(hashTable[bucketIndex]);
hashTable[bucketIndex] = (char*)realloc(hashTable[bucketIndex], bucketSize + strlen(word) + 2);
memcpy(hashTable[bucketIndex] + bucketSize, " ", 1);
memcpy(hashTable[bucketIndex] + bucketSize + 1, word, strlen(word) + 1);
}
}
I have a stress test that adds 20k words to the table and it always breaks on the same word (no 10k something)
Any ideas on what i am doing wrong ?
Tyvm
You have to terminate the "strings" before passing it to functions that deal with strings, such as strlen() and strtok().
Allocate size of the string and one more byte for terminating null-character.
Terminate the "strings" by adding null character.
Notes:
They say you shouldn't cast the result of malloc() in C.
sizeof(char) will always be 1, so you don't need to multiply with it.
Corrected code:
void addWord(char** hashTable, unsigned int hashSize, const char* word) {
int bucketIndex = hash(word, hashSize);
//printf("Word to add = %s, bucket = %d, hashTable size = %d\n", word, bucketIndex, hashSize);
if(hashTable[bucketIndex] == NULL) {
size_t wordSize = strlen(word);
hashTable[bucketIndex] = malloc(wordSize + 1); /* size +1 */
memcpy(hashTable[bucketIndex], word, wordSize + 1); /* why did you use strcpy() only in here? */
return;
}
/* checks for duplicats */
int exists = 0;
size_t dataSize = strlen(hashTable[bucketIndex]);
char* heyStack = malloc(dataSize + 1); /* size +1 */
memcpy(heyStack, hashTable[bucketIndex], dataSize + 1); /* size +1 */
char* token = strtok(heyStack, " ");
while(token) {
if(strcmp(token, word) == 0) {
exists = 1;
break;
}
token = strtok(NULL, " ");
}
/* end check for duplicates */
if(exists == 0) {
size_t bucketSize = strlen(hashTable[bucketIndex]);
size_t wordSize = strlen(word);
hashTable[bucketIndex] = realloc(hashTable[bucketIndex], bucketSize + wordSize + 2);
memcpy(hashTable[bucketIndex] + bucketSize, " ", 1);
memcpy(hashTable[bucketIndex] + bucketSize + 1, word, wordSize + 1);
}
free(heyStack); /* do free what you allocated */
}
This code will be better if you add some code to check if malloc() and realloc() are successful.
I have buffer problem on this line
strcpy_s(*(pWords + word_count), word_length, pWord);
I'm trying to read a file from argv[1] and print out every single word in that file and their occurrence, but I can't figure out whats wrong..?!?
int main(int argc, char* argv[])
{
char *delimiters = argv[2]; // Prose delimiters
char buf[BUF_LEN]; // Buffer for a line of keyboard input
size_t str_size = INIT_STR_EXT; // Current memory to store prose
char* filePath = argv[1];
FILE *fP ;
char* pStr = malloc(str_size); // Pointer to prose to be tokenized
*pStr = '\0'; // Set 1st character to null
fopen_s(&fP, filePath, "r");
fread(buf, BUF_LEN, 10, fP);
size_t maxWords = 10; // Current maximum word count
int word_count = 0; // Current word count
size_t word_length = 0; // Current word length
char** pWords = calloc(maxWords, sizeof(char*)); // Stores pointers to the words
int* pnWord = calloc(maxWords, sizeof(int)); // Stores count for each word
size_t str_len = strnlen_s(buf, BUF_LEN); // Length used by strtok_s()
char* ptr = NULL; // Pointer used by strtok_s()
char* pWord = strtok_s(buf, delimiters, &ptr); // Find 1st word
if (!pWord)
{
printf("No words found. Ending program.\n");
return 1;
}
bool new_word = true; // False for an existing word
while (pWord)
{
// Check for existing word
for (int i = 0; i < word_count; ++i)
if (strcmp(*(pWords + i), pWord) == 0)
{
++*(pnWord + i);
new_word = false;
break;
}
if (new_word) // Not NULL if new word
{
//Check for sufficient memory
if (word_count == maxWords)
{ // Get more space for pointers to words
maxWords += WORDS_INCR;
pWords = realloc(pWords, maxWords*sizeof(char*));
// Get more space for word counts
pnWord = realloc(pnWord, maxWords*sizeof(int));
}
// Found a new word so get memory for it and copy it there
word_length = ptr - pWord; // Length of new word
*(pWords + word_count) = malloc(word_length);
strcpy_s(*(pWords + word_count), word_length, pWord); // Copy to array
*(pnWord + word_count++) = 1; // Increment word count
}
else
new_word = true; // Reset new word flag
pWord = strtok_s(NULL, delimiters, &ptr); // Find subsequent word
}
strcpy_s adds a null byte to the end of the string. You need to malloc(word_length+1).
There are two problems with this line:
fread(buf, BUF_LEN, 10, fP);
Firstly the buffer is too small by a factor of 10 as you read 10 elements.
Second, it does not read the file further than BUF_LEN (previously, *10).
Also the code does not take care of newline chars, as I cannot pass that in argv[2] delimiter spec, even as " \\n".
I suggest you replace fread() with a loop of fgets(), and redefine the word delimiters.
#define BUF_LEN 1000 // plenty of room
...
char buf[BUF_LEN+1]; // allow for 0 terminator
char delimiters[] = " \n\t"; // predefined
...
//size_t str_len = strnlen_s(buf, BUF_LEN); // unnecessary
while (fgets(buf, BUF_LEN, fP) != NULL) { // new outer loop
char* ptr = NULL; // carry on as you were
...
}
Next, as others commented, increase the string space allocation
*(pWords + word_count) = malloc(word_length+1);
In addition, although you have used the "safe" string functions, you did not check argc or the result of any of fopen_s(), fread(), malloc(), calloc(), realloc(), nor have you closed the file or released memory.
Looks to me like you forgot to get an additional byte for the 0 character.
Despite that: Instead of allocating a fixed buffer size for your file, you could get the filesize with fseek using SEEK_END and an offset of 0 to allocate that much memory+1 byte
I am trying to read in from stdin (passing in value from a file). I am reading each character from the string and storing it into a dynamically allocated string pointer. When needed I realloc the memory. I am trying to get as many characters as possible. Though I can limit it to 100,000 chars. But the realloc fails after some iteration. But if I specify a chunk size big, say 1048567 during the first initialization in malloc, I am able to read the string completely. Why is this?
Below is my program:
#include <stdio.h>
#include <stdlib.h>
int display_mem_alloc_error();
enum {
CHUNK_SIZE = 31 //31 fails. But 1048567 passes.
};
int display_mem_alloc_error() {
fprintf(stderr, "\nError allocating memory");
exit(1);
}
int main(int argc, char **argv) {
int numStr; //number of input strings
int curSize = CHUNK_SIZE; //currently allocated chunk size
int i = 0; //counter
int len = 0; //length of the current string
int c; //will contain a character
char *str = NULL; //will contain the input string
char *str_cp = NULL; //will point to str
char *str_tmp = NULL; //used for realloc
str = malloc(sizeof(*str) * CHUNK_SIZE);
if (str == NULL) {
display_mem_alloc_error();
}
str_cp = str; //store the reference to the allocated memory
scanf("%d\n", &numStr); //get the number of input strings
while (i != numStr) {
if (i >= 1) { //reset
str = str_cp;
len = 0;
curSize = CHUNK_SIZE;
}
c = getchar();
while (c != '\n' && c != '\r') {
*str = (char *) c;
//printf("\nlen: %d -> *str: %c", len, *str);
str = str + 1;
len = len + 1;
*str = '\0';
c = getchar();
if (curSize / len == 1) {
curSize = curSize + CHUNK_SIZE;
//printf("\nlen: %d", len);
printf("\n%d \n", curSize); //NB: If I comment this then the program simply exits. No message is displayed.
str_tmp = realloc(str_cp, sizeof(*str_cp) * curSize);
if (str_tmp == NULL) {
display_mem_alloc_error();
}
//printf("\nstr_tmp: %d", str_tmp);
//printf("\nstr: %d", str);
//printf("\nstr_cp: %d\n", str_cp);
str_cp = str_tmp;
str_tmp = NULL;
}
}
i = i + 1;
printf("\nlen: %d", len);
//printf("\nEntered string: %s\n", str_cp);
}
str = str_cp;
free(str_cp);
free(str);
str_cp = NULL;
str = NULL;
return 0;
}
Thanks.
When you realloc
str_tmp = realloc(str_cp, sizeof(*str_cp) * curSize);
if (str_tmp == NULL) {
display_mem_alloc_error();
}
//printf("\nstr_tmp: %d", str_tmp);
//printf("\nstr: %d", str);
//printf("\nstr_cp: %d\n", str_cp);
str_cp = str_tmp;
str_tmp = NULL;
you let str_cp point to the new block of memory, but str still points into the old, now freed block. Thus when you access what str points to in the next iteration, you invoke undefined behaviour.
You need to save the offset of str with respect to str_cp, and after the reallocation, letstr point into the new block at its old offset.
And *str = (char *) c; is wrong, although there is a nonzero chance of it being functionally equivalent to the correct *str = c;.
*str = (char *) c;
This line is wrong.
str is a pointer to char and *str is a char but you are assigning a pointer to char to a char. This cannot be done in C.
Moreover:
scanf("%d\n", &numStr);
The \n in scanf call probably does not what you expect:
http://c-faq.com/stdio/scanfhang.html
And also:
str = str_cp;
free(str_cp);
free(str);
You have a double free here. After the assignment str and str_cp will have the same value so doing:
free(str_cp);
free(str);
is as if you do:
free(str);
free(str);
which is undefined behavior (you cannot free twice).