C strcpy_s - Buffer is too small && 0 error - c

I have buffer problem on this line
strcpy_s(*(pWords + word_count), word_length, pWord);
I'm trying to read a file from argv[1] and print out every single word in that file and their occurrence, but I can't figure out whats wrong..?!?
int main(int argc, char* argv[])
{
char *delimiters = argv[2]; // Prose delimiters
char buf[BUF_LEN]; // Buffer for a line of keyboard input
size_t str_size = INIT_STR_EXT; // Current memory to store prose
char* filePath = argv[1];
FILE *fP ;
char* pStr = malloc(str_size); // Pointer to prose to be tokenized
*pStr = '\0'; // Set 1st character to null
fopen_s(&fP, filePath, "r");
fread(buf, BUF_LEN, 10, fP);
size_t maxWords = 10; // Current maximum word count
int word_count = 0; // Current word count
size_t word_length = 0; // Current word length
char** pWords = calloc(maxWords, sizeof(char*)); // Stores pointers to the words
int* pnWord = calloc(maxWords, sizeof(int)); // Stores count for each word
size_t str_len = strnlen_s(buf, BUF_LEN); // Length used by strtok_s()
char* ptr = NULL; // Pointer used by strtok_s()
char* pWord = strtok_s(buf, delimiters, &ptr); // Find 1st word
if (!pWord)
{
printf("No words found. Ending program.\n");
return 1;
}
bool new_word = true; // False for an existing word
while (pWord)
{
// Check for existing word
for (int i = 0; i < word_count; ++i)
if (strcmp(*(pWords + i), pWord) == 0)
{
++*(pnWord + i);
new_word = false;
break;
}
if (new_word) // Not NULL if new word
{
//Check for sufficient memory
if (word_count == maxWords)
{ // Get more space for pointers to words
maxWords += WORDS_INCR;
pWords = realloc(pWords, maxWords*sizeof(char*));
// Get more space for word counts
pnWord = realloc(pnWord, maxWords*sizeof(int));
}
// Found a new word so get memory for it and copy it there
word_length = ptr - pWord; // Length of new word
*(pWords + word_count) = malloc(word_length);
strcpy_s(*(pWords + word_count), word_length, pWord); // Copy to array
*(pnWord + word_count++) = 1; // Increment word count
}
else
new_word = true; // Reset new word flag
pWord = strtok_s(NULL, delimiters, &ptr); // Find subsequent word
}

strcpy_s adds a null byte to the end of the string. You need to malloc(word_length+1).

There are two problems with this line:
fread(buf, BUF_LEN, 10, fP);
Firstly the buffer is too small by a factor of 10 as you read 10 elements.
Second, it does not read the file further than BUF_LEN (previously, *10).
Also the code does not take care of newline chars, as I cannot pass that in argv[2] delimiter spec, even as " \\n".
I suggest you replace fread() with a loop of fgets(), and redefine the word delimiters.
#define BUF_LEN 1000 // plenty of room
...
char buf[BUF_LEN+1]; // allow for 0 terminator
char delimiters[] = " \n\t"; // predefined
...
//size_t str_len = strnlen_s(buf, BUF_LEN); // unnecessary
while (fgets(buf, BUF_LEN, fP) != NULL) { // new outer loop
char* ptr = NULL; // carry on as you were
...
}
Next, as others commented, increase the string space allocation
*(pWords + word_count) = malloc(word_length+1);
In addition, although you have used the "safe" string functions, you did not check argc or the result of any of fopen_s(), fread(), malloc(), calloc(), realloc(), nor have you closed the file or released memory.

Looks to me like you forgot to get an additional byte for the 0 character.
Despite that: Instead of allocating a fixed buffer size for your file, you could get the filesize with fseek using SEEK_END and an offset of 0 to allocate that much memory+1 byte

Related

Getting error when trying release heap-allocated memory

I am working on a program that reads text, line by line from input file. Once the line is read, the program reverses order of words in that string, prints it to the output file and starts reading next line. My program reads only specific number of characters from one line, meaning that if line contains more characters then that specific number, all of them have to skipped until next line is reached. My program seems to work fine.
One of the task requirements is to use dynamically allocated arrays. That is the part where my main problem lies. Once I try to free heap-allocated memory, the program fails with error message that says: HEAP CORRUPTION DETECTED. It must be that I messed up something while working with them. However, I am unable to find the real reason.
#include <stdio.h>
#include <stdlib.h>
#define BUFFER_SIZE 255
int readLine(FILE** stream, char** buffer, int* bufferSize);
void reverseString(char* buffer, char** reverse, int bufferSize, int lastLine);
int main(int argc, char** argv)
{
char* buffer = NULL;
char* reverse = NULL;
int bufferSize = 0;
int lastLine = 0;
FILE* intputStream = fopen(argv[1], "r");
FILE* outputStream = fopen(argv[2], "w");
if (intputStream == NULL || outputStream == NULL)
{
printf("Input or output file cannot be opened\n");
return 0;
}
while (!feof(intputStream))
{
lastLine = readLine(&intputStream, &buffer, &bufferSize);
reverse = (char*)malloc(sizeof(char) * bufferSize);
if (reverse != NULL)
{
reverseString(buffer, &reverse, bufferSize, lastLine);
fputs(reverse, outputStream);
}
}
fclose(intputStream);
fclose(outputStream);
free(buffer);
free(reverse);
return 0;
}
int readLine(FILE** stream, char** buffer, int* bufferSize)
{
char tempBuffer[BUFFER_SIZE] = { 0 };
int lastLine = 0;
if (*stream != NULL)
{
fgets(tempBuffer, BUFFER_SIZE, *stream);
char ignoredChar[100] = { 0 };
*bufferSize = strlen(tempBuffer);
// Ignoring in the same line left characters and checking if this is the last line
if (tempBuffer[(*bufferSize) - 1] != '\n')
{
fgets(ignoredChar, 100, *stream);
if (!feof(*stream))
lastLine = 1;
}
// Allocating memory and copying line to dynamically-allocated array
*buffer = (char*)malloc(sizeof(char) * (*bufferSize));
if (*buffer != NULL)
{
memcpy(*buffer, tempBuffer, (*bufferSize));
(*buffer)[(*bufferSize)] = '\0';
}
}
// Return whether or not the last line is read
return lastLine;
}
void reverseString(char* buffer, char** reverse, int bufferSize, int lastLine)
{
int startingValue = (lastLine ? bufferSize - 1 : bufferSize - 2);
int wordStart = startingValue, wordEnd = startingValue;
int index = 0;
while (wordStart > 0)
{
if (buffer[wordStart] == ' ')
{
int i = wordStart + 1;
while (i <= wordEnd)
(*reverse)[index++] = buffer[i++];
(*reverse)[index++] = ' ';
wordEnd = wordStart - 1;
}
wordStart--;
}
for (int i = 0; i <= wordEnd; i++)
{
(*reverse)[index] = buffer[i];
index++;
}
if (!lastLine)
(*reverse)[index++] = '\n';
(*reverse)[index] = '\0';
}
One of the problems is in readLine where you allocate and copy your string like this (code shortened to the relevant parts):
*bufferSize = strlen(tempBuffer);
*buffer = (char*)malloc(sizeof(char) * (*bufferSize));
(*buffer)[(*bufferSize)] = '\0';
This will not allocate space for the null-terminator. And you will write the null-terminator out of bounds of the allocated memory. That leads to undefined behavior.
You need to allocate an extra byte for the null-terminator:
*buffer = malloc(*bufferSize + 1); // +1 for null-terminator
[Note that I don't cast the result, and don't use sizeof(char) because it's specified to always be equal to 1.]
Another problem is because you don't include the null-terminator in the bufferSize the allocation for reverse in main will be wrong as well:
reverse = (char*)malloc(sizeof(char) * bufferSize);
Which should of course be changed to:
reverse = malloc(bufferSize + 1); // +1 for null-terminator

How do I collect chars into a string in C?

I need to collect some chars into the buffer for my lexer, but I don't know how. I've read some answers on stackoverflow, but those are different cases. I have a while loop that reads next char and I want to put logic in it so it append new char to the buffer in memory.
// init buffer with the first char 'h'
char *buffer = malloc(sizeof(char));
buffer[0] = 'h';
buffer[1] = '\0';
// go through input char by char
while(...)
{
char c = read_next_char();
buffer.append(c) // I whould do in JavaScript, but not in C :(
}
In your case you are allocating a single byte char *buffer = malloc(sizeof(char)); at the beginning and access buffer[1] or any other index is UB.
You can allocate a known number of bytes at beginning and use it until you see a point you need more buffer size.
something like this,
int buffersize = 100;
int index =0;
char *buffer = malloc(sizeof(char)*buffersize); //100bytes are allocated
if(!buffer)
return;
buffer[index++] = 'h';
buffer[index++] = '\0';
// go through input char by char
while(...)
{
char c = read_next_char();
if(index == buffersize ){
buffersize +=100;
buffer= realloc(buffer, buffersize );
//here buffer size is increased by 100
if(!buffer)
return;
}
buffer[index++] = c ;
}
Note: You must free the buffer once the usage is over else it would lead to resource leak.
You need simple to overwrite the null terminating character and add the new one.
char *append(char *buff, int ch)
{
size_t len = strlen(buff);
buff[len] = ch;
buff[len+1] = 0;
return buff;
}
The code assumes that buff is a valid pointer to long enough memory block to accommodate the new char and null terminating char. It has to contain a valid C string.
Unlike in java or javascipt there is no string type in C, you need to write your own.
This is a very simple example of how you could handle the building of strings in an efficient way.
It's pretty self explaining.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct DynamicString
{
char* string; // pointer to string
int length; // string length
int capacity; // capacity of the string buffer (= allocated size)
};
#define DS_CHUNKSIZE 100 // we increase the buffer size by DS_CHUNKSIZE
// change this as needed
// Initialize the structure
void InitDynamicString(struct DynamicString* ds)
{
ds->capacity = DS_CHUNKSIZE + 1;
ds->string = malloc(ds->capacity);
ds->string[0] = 0; // null terminator
ds->length = 0; // initial string length 0
};
// Increase the string buffer size if necessary
// (internal function)
void IncreaseSize(struct DynamicString* ds, int newsize)
{
if (ds->length + newsize + 1 > ds->capacity)
{
ds->capacity = ds->length + newsize + DS_CHUNKSIZE + 1;
ds->string = realloc(ds->string, ds->capacity); // reallocate a new larger buffer
}
}
// append a single character
void AppendChar(struct DynamicString* ds, char ch)
{
IncreaseSize(ds, sizeof(char)); // increase size by 1 if necessary
ds->string[ds->length++] = ch; // append char
ds->string[ds->length] = 0; // null terminator
}
// append a string
void AppendString(struct DynamicString* ds, const char *str)
{
IncreaseSize(ds, strlen(str)); // increase by length of string if necessary
strcat(ds->string, str); // concatenate
ds->length += strlen(str); // update string length
}
int main(int argc, char* argv[])
{
struct DynamicString ds;
InitDynamicString(&ds); // initialize ds
AppendChar(&ds, 'a'); // append chars
AppendChar(&ds, 'b');
AppendChar(&ds, 'c');
AppendString(&ds, "DE"); // append strings
AppendString(&ds, "xyz1234");
printf("string = \"%s\"", ds.string); // show result
}
You code could use it like this:
struct DynamicString buffer;
InitDynamicString(&buffer)
dAppendChar(&buffer, 'h');
while(...)
{
char c = read_next_char();
AppendChar(&buffer, c); // quite similar to buffer.append(c)
}
Disclaimer:
The code hasn't been thoroughly tested and there may be bugs.
There is no error checking whatsoever. malloc and realloc may fail.
Other useful functions such as SetString(struct DynamicString *ds, const char *string) need to be written.
There is room for optimisation, especially the strcat could be handled differently, read this article for more information. I leave this as a (very simple) exercise to the reader.
There a no standard function in C that can append a char to a string. You need to write the code from scratch.
Let's start here:
char *buffer = malloc(sizeof(char)); // This allocates memory for ONE char
buffer[0] = 'h'; // So this is fine
buffer[1] = '\0'; // but this is bad. It writes outside the allocated memory
Fix it by allocating memory for two chars
char *buffer = malloc(2); // sizeof(char) is always 1 so no need for it
buffer[0] = 'h';
buffer[1] = '\0';
When you want to append a new character to the string, you also need to allocate memory for it. In other words, you need to increase the size of the memory that buffer is pointing to. For that you can use the function realloc.
size_t buffer_size = 2;
char *buffer = malloc(buffer_size );
buffer[0] = 'h';
buffer[1] = '\0';
while(...)
{
char c = read_next_char();
char* tmp = realloc(buffer, buffer_size + 1);
if (tmp == NULL)
{
// realloc failed ! Add error handling here
... error handling ...
}
buffer = tmp;
buffer[buffer_size - 1] = c; // Add the new char
buffer[buffer_size] = '\0'; // Add the string termination
++buffer_size; // Update buffer size
}
The other answers can work but they are complex. I suggest a simpler solution. A string is an array of char's where the last char of that string is a '\0'-Byte. There can be more char's in the array after it but they are not part of the string.
The simpler solution is to create an array which is large enough for 98% of cases, use it to store the string and when the string gets too long you can exit with an error. Changing the buffer size when needed is a nice feature but when you are new to C you shouldn't start there.
#define BUFFER_SIZE 1024
// init buffer with the first char 'h'
char buffer[BUFFER_SIZE];
buffer[0] = 'h';
buffer[1] = '\0';
// go through input char by char Replace the ... with your condition of the while loop
for(size_t i=1;...;i++) //start at 1 so the 'h' is not overwritten
{
if(i==BUFFER_SIZE-1) //-1 for the '\0'-Byte
{
fputs("Input too long, exit\n",stderr);
exit(1);
}
//Are you sure you don't need error handling for read_next_char()?
buffer[i] = read_next_char();
buffer[i+1]='\0'; //End the string with a '\0'-Byte
}

Reading the words of a file into a dynamic 2D array

I am trying to read a file and store every word into a dynamically allocated 2D array. The size of the input file is unknown.
I am totally lost and don't know how I could "fix/finish" the program.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char filename[25];
printf("Input the filename");
scanf("%s", filename);
fileConverter(filename);
}
int fileConverter(char filename[25]) {
//int maxLines = 50000;
//int maxWordSize = 128;
//char words[maxLines][maxWordSize];
//char **words;
char **arr = (char**) calloc(num_elements, sizeof(char*));
for ( i = 0; i < num_elements; i++ ) {
arr[i] = (char*) calloc(num_elements_sub, sizeof(char));
}
FILE *file = NULL;
int amountOfWords = 0;
file = fopen(filename, "r");
if(file == NULL) {
exit(0);
}
while(fgets(words[amountOfWords], 10000, file)) {
words[amountOfWords][strlen(words[amountOfWords]) - 1] = "\0";
amountOfWords++;
}
for(int i = 0; i < amountOfWords; i++) {
printf("a[%d] = ", i);
printf("%s\n", words[i]);
}
printf("The file contains %d words and the same amount of lines.\n", amountOfWords);
return amountOfWords;
The main challenges for this kind of problem are
reallocating the array of strings as the program reads new words, and
handling words that are larger than the buffer used by fgets.
The general approach for these kind of parsing problems, is to design a state machine. The state machine here has two states:
The current character is whitespace. Action: Continue reading whitespace until we reach the end of the buffer, or until we land on a non-whitespace character, in which case we switch to state 2.
The current character is non-whitespace (i.e. a word). Action: Continue reading non-whitespace until we reach the end of the buffer, or until we land on a whitespace character, in which case we copy the word we just read to the array of strings and switch to state 1.
Particularly difficult is the case in which we are in state 2 and reach the end of the buffer. This means that this word spans multiple buffers. To accommodate for this, we deviate slightly from a direct state machine implementation. State 2 is slightly different, depending on if we are reading a new word or continuing one that was started in a previous buffer.
We now keep track of wordSize. If we start reading from the start of a buffer, but wordSize is not 0, then we know we are continuing a previous word and we know what size it was for the realloc we need.
Below is one possible implementation. All the work is done in the wordArrayRead function. Walking through it from the top of the function:
First we declare the variables that we need across lineBuffer reads: an index for the word itself and the length of the word we are currently reading, followed by the declaration of the buffer itself. The outside loop repeatedly reads using fgets until we have exhausted the input.
We start reading at index 0 and stop at the null-terminator. The first if-statement checks if we should be in state 2: either the current character is the start of a word or we were already reading a word.
State 2
The index wordStartIdx stays at the first character of the word (segment) and we walk the wordEndIdx to the end of the word (segment) or to the end of the buffer.
We then check if we need to increase the size of the array of strings. Here we increase it to 2 times + 1 the previous size to avoid frequent reallocations.
We set a boolean value, indication whether we have reached the end of a word. If we have, we need to allocate for and write the null-terminator at the end of the string.
If wordLength == 0 it means we are reading a new word and have to allocate memory for it for the first time. If wordLength != 0, we have to reallocate to append to an existing word.
We copy the word (segment) currently in the lineBuffer to the array of strings.
Now, we do some bookkeeping. If we reached the end of a word, we write the null-terminator, increment the index to point to the next word location and reset wordLength. If this wasn't the case, we only increment the wordLength with the length of the segment we just read. Finally, we update wordStartIdx, which still points to the start of the word, to point to the end of the word, so we can continue iterating over the buffer.
State 1
Having finishing the State 2 processing, we go into State 1 which has only two lines. It simply advances the index until we land at non-whitespace. Note that the null-terminator of the lineBuffer ('\0') does not count as whitespace, so this loop will not continue past the end of the buffer.
After all input has been processed, we shrink the array of strings to the actual size of its data. This "corrects" the allocation policy of increasing the size by 2n+1 each time it wasn't large enough.
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// BUFFER_SIZE must be >1U
#define BUFFER_SIZE 1024U
struct WordArray
{
char **words;
size_t numberOfWords;
};
static struct WordArray wordArrayConstruct(void);
static void wordArrayResize(struct WordArray *wordArray, size_t const newSize);
static void wordArrayDestruct(struct WordArray *wordArray);
static void wordArrayRead(FILE *restrict stream, struct WordArray *wordArray);
static char *reallocStringWrapper(char *restrict str, size_t const newSize);
static void wordArrayPrint(struct WordArray const *wordArray);
int main(void)
{
struct WordArray wordArray = wordArrayConstruct();
wordArrayRead(stdin, &wordArray);
wordArrayPrint(&wordArray);
wordArrayDestruct(&wordArray);
}
static void wordArrayRead(FILE *restrict stream, struct WordArray *wordArray)
{
size_t wordArrayIdx = 0U;
size_t wordLength = 0U;
char lineBuffer[BUFFER_SIZE];
while (fgets(lineBuffer, sizeof lineBuffer, stream) != NULL)
{
size_t wordStartIdx = 0U;
while (lineBuffer[wordStartIdx] != '\0')
{
if (!isspace(lineBuffer[wordStartIdx]) || wordLength != 0U)
{
size_t wordEndIdx = wordStartIdx;
while (!isspace(lineBuffer[wordEndIdx]) && wordEndIdx != BUFFER_SIZE - 1U)
++wordEndIdx;
if (wordArrayIdx >= wordArray->numberOfWords)
wordArrayResize(wordArray, wordArray->numberOfWords * 2U + 1U);
size_t wordSegmentLength = wordEndIdx - wordStartIdx;
size_t foundWordEnd = wordEndIdx != BUFFER_SIZE - 1U; // 0 or 1 bool
// Allocate for a new word, or reallocate for an existing word
// If a word end was found, add 1 to the size for the '\0' character
char *dest = wordLength == 0U ? NULL : wordArray->words[wordArrayIdx];
size_t allocSize = wordLength + wordSegmentLength + foundWordEnd;
wordArray->words[wordArrayIdx] = reallocStringWrapper(dest, allocSize);
memcpy(&(wordArray->words[wordArrayIdx][wordLength]),
&lineBuffer[wordStartIdx], wordSegmentLength);
if (foundWordEnd)
{
wordArray->words[wordArrayIdx][wordLength + wordSegmentLength] = '\0';
++wordArrayIdx;
wordLength = 0U;
}
else
{
wordLength += wordSegmentLength;
}
wordStartIdx = wordEndIdx;
}
while (isspace(lineBuffer[wordStartIdx]))
++wordStartIdx;
}
}
// All done. Shrink the words array to the size of the actual data
if (wordArray->numberOfWords != 0U)
wordArrayResize(wordArray, wordArrayIdx);
}
static struct WordArray wordArrayConstruct(void)
{
return (struct WordArray) {.words = NULL, .numberOfWords = 0U};
}
static void wordArrayResize(struct WordArray *wordArray, size_t const newSize)
{
assert(newSize > 0U);
char **tmp = (char**) realloc(wordArray->words, newSize * sizeof *wordArray->words);
if (tmp == NULL)
{
wordArrayDestruct(wordArray);
fprintf(stderr, "WordArray allocation error\n");
exit(EXIT_FAILURE);
}
wordArray->words = tmp;
wordArray->numberOfWords = newSize;
}
static void wordArrayDestruct(struct WordArray *wordArray)
{
for (size_t wordStartIdx = 0U; wordStartIdx < wordArray->numberOfWords; ++wordStartIdx)
{
free(wordArray->words[wordStartIdx]);
wordArray->words[wordStartIdx] = NULL;
}
free(wordArray->words);
}
static char *reallocStringWrapper(char *restrict str, size_t const newSize)
{
char *tmp = (char*) realloc(str, newSize);
if (tmp == NULL)
{
free(str);
fprintf(stderr, "Realloc string allocation error\n");
exit(EXIT_FAILURE);
}
return tmp;
}
static void wordArrayPrint(struct WordArray const *wordArray)
{
for (size_t wordStartIdx = 0U; wordStartIdx < wordArray->numberOfWords; ++wordStartIdx)
printf("%zu: %s\n", wordStartIdx, wordArray->words[wordStartIdx]);
}
Note: This program reads input from stdin, as Unix/Linux utilities typically do. Use input redirection to read from a file, or provide a file descriptor to the readWordArray function.
to allocate dynamic 2D array you need:
void allocChar2Darray(size_t rows, size_t columns, char (**array)[columns])
{
*array = malloc(rows * sizeof(**array));
}

Create 2D array of dynamic size in C

Let's say we have a string of words that are delimited by a comma.
I want to write a code in C to store these words in a variable.
Example
amazon, google, facebook, twitter, salesforce, sfb
We do not know how many words are present.
If I were to do this in C, I thought I need to do 2 iterations.
First iteration, I count how many words are present.
Then, in the next iteration, I store each words.
Step 1: 1st loop -- count number of words
....
....
//End 1st loop. num_words is set.
Step 2:
// Do malloc using num_words.
char **array = (char**)malloc(num_words* sizeof(char*));
Step 3: 2nd loop -- Store each word.
// First, walk until the delimiter and determine the length of the word
// Once len_word is determined, do malloc
*array= (char*)malloc(len_word * sizeof(char));
// And then store the word to it
// Do this for all words and then the 2nd loop terminates
Can this be done more efficiently?
I do not like having 2 loops. I think there must be a way to do it in 1 loop with just basic pointers.
The only restriction is that this needs to be done in C (due to constraints that are not in my control)
You don't need to do a separate pass to count the words. You can use realloc to enlarge the array on the fly as you read in the data on a single pass.
To parse an input line buffer, you can use strtok to tokenize the individual words.
When saving the parsed words into the word list array, you can use strdup to create a copy of the tokenized word. This is necessary for the word to persist. That is, whatever you were pointing to in the line buffer on the first line will get clobbered when you read the second line (and so on ...)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
char **words;
size_t wordmax;
size_t wordcount;
int
main(int argc,char **argv)
{
char *cp;
char *bp;
FILE *fi;
char buf[5000];
--argc;
++argv;
// get input file name
cp = *argv;
if (cp == NULL) {
printf("no file specified\n");
exit(1);
}
// open input file
fi = fopen(cp,"r");
if (fi == NULL) {
printf("unable to open file '%s' -- %s\n",cp,strerror(errno));
exit(1);
}
while (1) {
// read in next line -- bug out if EOF
cp = fgets(buf,sizeof(buf),fi);
if (cp == NULL)
break;
bp = buf;
while (1) {
// tokenize the word
cp = strtok(bp," \t,\n");
if (cp == NULL)
break;
bp = NULL;
// expand the space allocated for the word list [if necessary]
if (wordcount >= wordmax) {
// this is an expensive operation so don't do it too often
wordmax += 100;
words = realloc(words,(wordmax + 1) * sizeof(char *));
if (words == NULL) {
printf("out of memory\n");
exit(1);
}
}
// get a persistent copy of the word text
cp = strdup(cp);
if (cp == NULL) {
printf("out of memory\n");
exit(1);
}
// save the word into the word array
words[wordcount++] = cp;
}
}
// close the input file
fclose(fi);
// add a null terminator
words[wordcount] = NULL;
// trim the array to exactly what we need/used
words = realloc(words,(wordcount + 1) * sizeof(char *));
// NOTE: because we added the terminator, _either_ of these loops will
// print the word list
#if 1
for (size_t idx = 0; idx < wordcount; ++idx)
printf("%s\n",words[idx]);
#else
for (char **word = words; *word != NULL; ++word)
printf("%s\n",*word);
#endif
return 0;
}
What you're looking for is
http://manpagesfr.free.fr/man/man3/strtok.3.html
(From man page)
The strtok() function parses a string into a sequence of tokens. On the first call to strtok() the string to be parsed should be specified in str. In each subsequent call that should parse the same string, str should be NULL.
But this thread look like duplicate of Split string with delimiters in C
Unless you are forced to produce your own implementation ...
We do not know how many words are present.
We know num_words <= strlen(string) + 1. Only 1 "loop" needed. The cheat here is a quick run down s via strlen().
// *alloc() out-of-memory checking omitted for brevity
char **parse_csv(const char *s) {
size_t slen = strlen(s);
size_t num_words = 0;
char **words = malloc(sizeof *words * (slen + 1));
// find, allocate, copy the words
while (*s) {
size_t len = strcspn(s, ",");
words[num_words] = malloc(len + 1);
memcpy(words[num_words], s, len);
words[num_words][len] = '\0';
num_words++;
s += len; // skip word
if (*s) s++; // skip ,
}
// Only 1 realloc() needed.
realloc(words, sizeof *words *num_words); // right-size words list
return words;
}
It makes send to NULL terminate the list, so
char **words = malloc(sizeof *words * (slen + 1 + 1));
...
words[num_words++] = NULL;
realloc(words, sizeof *words *num_words);
return words;
In considering the worst case for the initial char **words = malloc(...);, I take a string like ",,," with its 3 ',' would make for 4 words "", "", "", "". Adjust code as needed for such pathological cases.

Scanf unknown number of string arguments C

I wanted to know if there was a way to use scanf so I can take in an unknown number of string arguments and put them into a char* array. I have seen it being done with int values, but can't find a way for it to be done with char arrays. Also the arguments are entered on the same line separated by spaces.
Example:
user enters hello goodbye yes, hello gets stored in array[0], goodbye in array[1] and yes in array[2]. Or the user could just enter hello and then the only thing in the array would be hello.
I do not really have any code to post, as I have no real idea how to do this.
You can do something like, read until the "\n" :
scanf("%[^\n]",buffer);
you need to allocate before hand a big enough buffer.
Now go through the buffer count the number of words, and allocate the necessary space char **array = ....(dynamic string allocation), go to the buffer and copy string by string into the array.
An example:
int words = 1;
char buffer[128];
int result = scanf("%127[^\n]",buffer);
if(result > 0)
{
char **array;
for(int i = 0; buffer[i]!='\0'; i++)
{
if(buffer[i]==' ' || buffer[i]=='\n' || buffer[i]=='\t')
{
words++;
}
}
array = malloc(words * sizeof(char*));
// Using RoadRunner suggestion
array[0] = strtok (buffer," ");
for(int w = 1; w < words; w++)
{
array[w] = strtok (NULL," ");
}
}
As mention in the comments you should use (if you can) fgets instead fgets(buffer,128,stdin);.
More about strtok
If you have an upper bound to the number of strings you may receive from the user, and to the number of characters in each string, and all strings are entered on a single line, you can do this with the following steps:
read the full line with fgets(),
parse the line with sscanf() with a format string with the maximum number of %s conversion specifiers.
Here is an example for up to 10 strings, each up to 32 characters:
char buf[400];
char s[10][32 + 1];
int n = 0;
if (fgets(buf, sizeof buf, sdtin)) {
n = sscanf("%32s%32s%32s%32s%32s%32s%32s%32s%32s%32s",
s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[8], s[9]));
}
// `n` contains the number of strings
// s[0], s[1]... contain the strings
If the maximum number is not known of if the maximum length of a single string is not fixed, or if the strings can be input on successive lines, you will need to iterate with a simple loop:
char buf[200];
char **s = NULL;
int n;
while (scanf("%199s", buf) == 1) {
char **s1 = realloc(s, (n + 1) * sizeof(*s));
if (s1 == NULL || (s1[n] = strdup(buf)) == NULL) {
printf("allocation error");
exit(1);
}
s = s1;
n++;
}
// `n` contains the number of strings
// s[0], s[1]... contain pointers to the strings
Aside from the error handling, this loop is comparable to the hard-coded example above but it still has a maximum length for each string. Unless you can use a scanf() extension to allocate the strings automatically (%as on GNU systems), the code will be more complicated to handle any number of strings with any possible length.
You can use:
fgets to read input from user. You have an easier time using this instead of scanf.
malloc to allocate memory for pointers on the heap. You can use a starting size, like in this example:
size_t currsize = 10
char **strings = malloc(currsize * sizeof(*strings)); /* always check
return value */
and when space is exceeded, then realloc more space as needed:
currsize *= 2;
strings = realloc(strings, currsize * sizeof(*strings)); /* always check
return value */
When finished using the requested memory from malloc() and realloc(), it's always to good to free the pointers at the end.
strtok to parse the input at every space. When copying over the char * pointer from strtok(), you must also allocate space for strings[i], using malloc() or strdup.
Here is an example I wrote a while ago which does something very similar to what you want:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INITSIZE 10
#define BUFFSIZE 100
int
main(void) {
char **strings;
size_t currsize = INITSIZE, str_count = 0, slen;
char buffer[BUFFSIZE];
char *word;
const char *delim = " ";
int i;
/* Allocate initial space for array */
strings = malloc(currsize * sizeof(*strings));
if(!strings) {
printf("Issue allocating memory for array of strings.\n");
exit(EXIT_FAILURE);
}
printf("Enter some words(Press enter again to end): ");
while (fgets(buffer, BUFFSIZE, stdin) != NULL && strlen(buffer) > 1) {
/* grow array as needed */
if (currsize == str_count) {
currsize *= 2;
strings = realloc(strings, currsize * sizeof(*strings));
if(!strings) {
printf("Issue reallocating memory for array of strings.\n");
exit(EXIT_FAILURE);
}
}
/* Remove newline from fgets(), and check for buffer overflow */
slen = strlen(buffer);
if (slen > 0) {
if (buffer[slen-1] == '\n') {
buffer[slen-1] = '\0';
} else {
printf("Exceeded buffer length of %d.\n", BUFFSIZE);
exit(EXIT_FAILURE);
}
}
/* Parsing of words from stdin */
word = strtok(buffer, delim);
while (word != NULL) {
/* allocate space for one word, including nullbyte */
strings[str_count] = malloc(strlen(word)+1);
if (!strings[str_count]) {
printf("Issue allocating space for word.\n");
exit(EXIT_FAILURE);
}
/* copy strings into array */
strcpy(strings[str_count], word);
str_count++;
word = strtok(NULL, delim);
}
}
/* print and free strings */
printf("Your array of strings:\n");
for (i = 0; i < str_count; i++) {
printf("strings[%d] = %s\n", i, strings[i]);
free(strings[i]);
strings[i] = NULL;
}
free(strings);
strings = NULL;
return 0;
}

Resources