My problem now is that I have taken space for different words,but I'm having problems storing this as an array. Even though there are some similar posts like this, nothing seems to work for me and I'm completely stuck here. I want to keep this format(i don't want to change the definition of the function). Grateful for all help and comments!
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int i, len = 0, counter = 0;
char ** p = 0;
for(i = 0; s[i] != '\0'; i++){
len++;
if(s[i] == ' ' || s[i+1] == '\0'){
counter ++;
for(i = 0; i < len; i++){
p[i] = s[i];
}
}
printf("%d\n", len);
printf("%d\n", counter);
return p;
}
int main() {
char *s = "This is a string";
int n;
int i;
for(i = 0; i < n*; i++){
//also not sure how to print this
}
}
I edited your code and it's now working correctly:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char* s, int *n);
char** split(const char* s, int *n) {
int i, len = 0, counter = 0;
char ** p = 0;
for(int i = 0; ; ++i) {
if(s[i] == '\0') {
break;
}
if(s[i] == ' ') {
counter += 1;
}
}
++counter;
p = (char **) malloc(counter * sizeof(char*));
for(int i = 0, c = 0; ; ++i, ++c) {
if(s[i] == '\0') {
break;
}
len = 0;
while(s[len + i + 1] != ' ' && s[len + i + 1] != '\0') {
++len;
}
p[c] = (char *) malloc(len * sizeof(char) + 1);
int k = 0;
for(int j = i; j < i + len + 1; ++j) {
p[c][k++] = s[j];
}
p[c][k] = '\0';
i += len + 1;
}
*n = counter;
return p;
}
int main() {
char *s = "This is a string";
int n;
int i;
char** split_s = split(s, &n);
for(i = 0; i < n; i++) {
printf("%s\n", split_s[i]);
}
}
But I suggest you do a little bit clean-up.
Here is a solution using sscanf. scanf and sscanf considers space as an end of input. I have taken benefit of that to make it work for you.
char *str = (char*) "This is a string";
char buffer[50];
char ** p = (char**)malloc(1 * sizeof(*p));
for (int i = 0; str[0] != NULL; i++)
{
if (i > 0)
{
p = (char**)realloc(p, i * sizeof(p));
}
sscanf(str, "%s", buffer);
int read = strlen(buffer);
str += read + 1;
p[i] = (char*)malloc(sizeof(char)*read + 1);
strcpy(p[i], buffer);
printf("%s\n", p[i]);
}
Since this pointer is growing in both the dimensions, every time a new string is found we need to resize the p itself and then the new address that it contains should be resized too .
My problem now is that I have taken space for different words using malloc, but I'm having problems storing this as an array.
When addressable memory for a collection of strings is needed, then a collection of pointers, as well as memory for each pointer needed.
In your code:
p = (char**)malloc(counter*sizeof(char*));
You have created the collection of pointers, but you have not yet created memory at those locations to accommodate the strings. (By the way, the cast is not necessary)
Here are the essential steps to both create a collection of pointers, and memory for each:
//for illustration, pick sizes for count of strings needed,
//and length of longest string needed.
#define NUM_STRINGS 5
#define STR_LEN 80
char **stringArray = NULL;
stringArray = malloc(NUM_STRINGS*sizeof(char *));// create collection of pointers
if(stringArray)
{
for(int i=0;i<NUM_STRINGS;i++)
{
stringArray[i] = malloc(STR_LEN + 1);//create memory for each string
if(!stringArray[i]) //+1 room for nul terminator
{
//handle error
}
}
}
As a function it could look like this: (replacing malloc with calloc for initialized space)
char ** Create2DStr(size_t numStrings, size_t maxStrLen)
{
int i;
char **a = {0};
a = calloc(numStrings, sizeof(char *));
for(i=0;i<numStrings; i++)
{
a[i] = calloc(maxStrLen + 1, 1);
}
return a;
}
using this in your split() function:
char** split(const char* s, int *n){
int i, len = 0, counter = 0, lenLongest = 0
char ** p = 0;
//code to count words and longest word
p = Create2DStr(counter, longest + 1); //+1 for nul termination
if(p)
{
//your searching code
//...
// when finished, free memory
Let's start at the logic.
How does a string like A quick brown fox. get processed? I would suggest:
Count the number of words, and the amount of memory needed to store the words. (In C, each string ends with a terminating nul byte, \0.)
Allocate enough memory for the pointers and the words.
Copy each word from the source string.
We have a string as an input, and we want an array of strings as output. The simplest option is
char **split_words(const char *source);
where the return value is NULL if an error occurs, or an array of pointers terminated by a NULL pointer otherwise. All of it is dynamically allocated at once, so calling free() on the return value will free both the pointers and their contents.
Let's start implementing the logic according to the bullet points above.
#include <stdlib.h>
char **split_words(const char *source)
{
size_t num_chars = 0;
size_t num_words = 0;
size_t w = 0;
const char *src;
char **word, *data;
/* Sanity check. */
if (!source)
return NULL; /* split_words(NULL) will return NULL. */
/* Count the number of words in source (num_words),
and the number of chars needed to store
a copy of each word (num_chars). */
src = source;
while (1) {
/* Skip any leading whitespace (not just spaces). */
while (*src == '\t' || *src == '\n' || *src == '\v' ||
*src == '\f' || *src == '\r' || *src == ' ')
src++;
/* No more words? */
if (*src == '\0')
break;
/* We have one more word. Account for the pointer itself,
and the string-terminating nul char. */
num_words++;
num_chars++;
/* Count and skip the characters in this word. */
while (*src != '\0' && *src != '\t' && *src != '\n' &&
*src != '\v' && *src != '\f' && *src != '\r' &&
*src != ' ') {
src++;
num_chars++;
}
}
/* If the string has no words in it, return NULL. */
if (num_chars < 1)
return NULL;
/* Allocate memory for both the pointers and the data.
One extra pointer is needed for the array-terminating
NULL pointer. */
word = malloc((num_words + 1) * sizeof (char *) + num_chars);
if (!word)
return NULL; /* Not enough memory. */
/* Since 'word' is the return value, and we use
num_words + 1 pointers in it, the rest of the memory
we allocated we use for the string contents. */
data = (char *)(word + num_words + 1);
/* Now we must repeat the first loop, exactly,
but also copy the data as we do so. */
src = source;
while (1) {
/* Skip any leading whitespace (not just spaces). */
while (*src == '\t' || *src == '\n' || *src == '\v' ||
*src == '\f' || *src == '\r' || *src == ' ')
src++;
/* No more words? */
if (*src == '\0')
break;
/* We have one more word. Assign the pointer. */
word[w] = data;
w++;
/* Count and skip the characters in this word. */
while (*src != '\0' && *src != '\t' && *src != '\n' &&
*src != '\v' && *src != '\f' && *src != '\r' &&
*src != ' ') {
*(data++) = *(src++);
}
/* Terminate this word. */
*(data++) = '\0';
}
/* Terminate the word array. */
word[w] = NULL;
/* All done! */
return word;
}
We can test the above with a small test main():
#include <stdio.h>
int main(int argc, char *argv[])
{
char **all;
size_t i;
all = split_words(" foo Bar. BAZ!\tWoohoo\n More");
if (!all) {
fprintf(stderr, "split_words() failed.\n");
exit(EXIT_FAILURE);
}
for (i = 0; all[i] != NULL; i++)
printf("all[%zu] = \"%s\"\n", i, all[i]);
free(all);
return EXIT_SUCCESS;
}
If we compile and run the above, we get
all[0] = "foo"
all[1] = "Bar."
all[2] = "BAZ!"
all[3] = "Woohoo"
all[4] = "More"
The downside of this approach (of using one malloc() call to allocate memory for both the pointers and the data), is that we cannot easily grow the array; we can really just treat it as one big clump.
A better approach, especially if we intend to add new words dynamically, is to use a structure:
typedef struct {
size_t max_words; /* Number of pointers allocated */
size_t num_words; /* Number of words in array */
char **word; /* Array of pointers */
} wordarray;
Unfortunately, this time we need to allocate each word separately. However, if we use a structure to describe each word in a common allocation buffer, say
typedef struct {
size_t offset;
size_t length;
} wordref;
typedef struct {
size_t max_words;
size_t num_words;
wordref *word;
size_t max_data;
size_t num_data;
char *data;
} wordarray;
#define WORDARRAY_INIT { 0, 0, NULL, 0, 0, NULL }
static inline const char *wordarray_word_ptr(wordarray *wa, size_t i)
{
if (wa && i < wa->num_words)
return wa->data + wa->word[i].offset;
else
return "";
}
static inline size_t wordarray_word_len(wordarray *wa, size_t i)
{
if (wa && i < wa->num_words)
return wa->word[i].length;
else
return 0;
}
The idea is that if you declare
wordarray words = WORDARRAY_INIT;
you can use wordarray_word_ptr(&words, i) to get a pointer to the ith word, or a pointer to an empty string if ith word does not exist yet, and wordarray_word_len(&words, i) to get the length of that word (much faster than calling strlen(wordarray_word_ptr(&words, i))).
The underlying reason why we cannot use char * here, is that realloc()ing the data area (where the word pointers would point to) may change its address. If that were to happen, we'd have to adjust every pointer in our array. It is much easier to use offsets to the data area instead.
The only downside to this approach is that deleting words does not mean a corresponding shrinkage in the data area. However, it is possible to write a simple "compactor" function, that repacks the data to a new area, so that holes left by deleted words are "moved" to the end of the data area. Usually, this is not necessary, but you might wish to add a member to the wordarray structure, say the number of lost characters from word deletions, so that the compaction can be done heuristically the next time the data area would be otherwise resized.
Related
I'm learning C and I've created some small "challenges" for myself to solve. I have to create a program that reads an input string which consists of words separated by underscore and returns the last letter of each odd word followed by the number of chars of that word.
The input won't be empty. The words are separated by exactly 1 underscore. The first and last chars won't be underscores (so no _this_is_a_sentence or this_is_a_sentence_ or _this_is_a_sentence_
Example:
input: we_had_a_lot_of_rain_in_today
output: e2a1f2n2
Explanation:
We only consider words in an odd position, so we just need to consider: we, a, of and in. Now, for each of those words, we get the last char and append the total number of chars of the word: we has 2 chars, so it becomes e2. a has 1 char, so it becomes a1, of has 2 chars so it becomes f2 and in has 2 chars so it becomes n2.
This is my code so far
#include <stdio.h>
void str_dummy_encrypt(char *sentence)
{
int currentWord = 1;
int totalChars = 0;
for (int i = 0; sentence[i] != '\0'; i++)
{
if (sentence[i] == '_')
{
if (currentWord % 2 != 0)
{
// I know the last char of the word is on sentence[i-1]
// and the total chars for this word is totalChars
// but how to return it in order to be printed?
}
currentWord++;
totalChars = 0;
} else {
totalChars++;
}
}
}
int main()
{
char sentence[100];
while (scanf("%s", sentence) != EOF)
{
str_dummy_encrypt(sentence);
}
return 0;
}
I think I'm on the right path, but I don't have any clue on how to return the result to the main function so it can be printed.
Thanks in advance
... how to return the result (?)
You have a couple choices:
Pass in the destination
Caller provides an ample destination.
void str_dummy_encrypt(size_t dsize, char *destination, const char *sentence)
Allocate and return the destination
Caller should free the returned pointer when done.
char *str_dummy_encrypt(const char *sentence) {
...
char *destination = malloc()
...
return destination;
}
Over-write the source
This one is tricky as code needs to insure the destination does not get ahead of the source, but I think you are OK given the task requirements, as long as string length > 1.
void str_dummy_encrypt(char *sentence) {
char *destination = sentence;
...
}
Others
Let us go deeper with pass in the destination and return a flag indicating success/error.
Use snprintf() to form the letter-count.
// Return error flag
int str_dummy_encrypt(size_t dsize, char *destination, const char *sentence) {
...
if (currentWord % 2 != 0) {
int len = snprintf(destination, dsize, "%c%d", sentence[i-1], totalChars);
if (len < 0 || (unsigned) len >= dsize) {
// We ran out of room
return -1; // failure
}
// Adjust to append the next encoding.
dsize -= len;
destination += len;
}
...
return 0;
}
Usage
char sentence[100];
char destination[sizeof sentence + 1]; // I think worse case is 1 more than source.
...
if (str_dummy_encrypt(sizeof destination, destination, sentence)) {
puts("Error");
} else {
puts(destination);
}
Code has other issues:
Does not handle an odd number of words correctly like "abc".
Attempts sentence[i-1] with leading _ like "_abc".
Poor input:
No width limit, weak test.
char sentence[100];
// while(scanf("%s", sentence) != EOF)
while(scanf("%99s", sentence) == 1)
Perhaps other issues.
Consider a test like if(sentence[i+1] == '_' || sentence[i+1] == '\0') to detect end of word and avoid 2 issues mentioned above. (Count and other code will need adjusting too.)
As it follows from the description of the task the function should return a new string that is built based on the fornat of the passed source string.
It means that you need to allocated dynamically a character array within the function where the result string will be stored.
As the source string is not changed within the function then the function parameter should have qualifier const.
And you should always write more general functions. This restriction
The words are separated by exactly 1 underscore. The first and last
chars won't be underscores (so no this_is_a_sentence or
this_is_a_sentence or this_is_a_sentence
for the function does not make it general. The function should be able also to process strings like "_this_is_a_sentence_".
Here is a demonstration program that shows how the function can be implemented.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char * str_dummy_encrypt( const char *s )
{
size_t n = 0;
for (const char *p = s; *p; )
{
size_t length = 0;
while (length == 0 && *p)
{
length = strcspn( p, "_" );
if (length == 0) ++p;
}
if (length != 0)
{
p += length;
n += 1 + snprintf( NULL, 0, "%zu", length );
}
length = 0;
while (length == 0 && *p)
{
length = strcspn( p, "_" );
p += length == 0 ? 1 : length;
}
}
char *result = malloc( n + 1 );
if (result != NULL)
{
result[n] = '\0';
if (n != 0)
{
char *current = result;
for (const char *p = s; *p; )
{
size_t length = 0;
while (length == 0 && *p)
{
length = strcspn( p, "_" );
if (length == 0) ++p;
}
if (length != 0)
{
p += length;
*current++ = p[-1];
current += sprintf( current, "%zu", length );
}
length = 0;
while (length == 0 && *p)
{
length = strcspn( p, "_" );
p += length == 0 ? 1 : length;
}
}
}
}
return result;
}
int main( void )
{
const char *s = "_we__had___a_lot_of_rain_in_today___";
char *result = str_dummy_encrypt( s );
if (result != NULL) puts( result );
free( result );
}
The program output is
e2a1f2n2
The same output will be if to use the string showed in your question that is "we_had_a_lot_of_rain_in_today".
The function would be more general if to add one more parameter that will specify the delimiter as
char * str_dummy_encrypt( const char *s, char c );
Or as the shown function implementation uses the standard C string function strcspn then the function could accept a set of delimiters like
char * str_dummy_encrypt( const char *s, const char *delimiters );
You do not actually need to read the word into a buffer, you can just read one character at a time and keep track of the last char, the word number and its length:
#include <stdio.h>
int main() {
int c, lastc = ' ', n = 1, len = 0;
for (;;) {
c = getchar();
if (c == '_' || c == '\n' || c == EOF) {
if (n & 1) {
printf("%c%d", lastc, len);
}
n++;
len = 0;
if (c != '_')
break;
} else {
lastc = c;
len++;
}
}
printf("\n");
return 0;
}
I'm quite new to C and am trying to write a function, which will split a string into an array of strings at a specific delimiter. But strangely I can only write at the first index of my char** array of strings, which will be my result. For example if I want to split the following string "Hello;;world;;!" at ;; I get [ "Hello" ] instead of [ "Hello", "world", "!" ]. I can't find my mistake.
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "strings.h"
int split(char **dest, const char *src, const char *splitStr) {
char buffer[16384];
int counter = 0;
int len = strlen(splitStr);
int flag = 0;
int start = 0;
for (int i = 0; i < strlen(src); i++) {
flag = 0;
if (src[i] == splitStr[0]) {
for (int j = 1; j < len; j++) {
//check if all elements in delimiter are in string
if (src[i + j] == splitStr[j] && j != (len - 1)) {
continue;
}
else if(src[i + j] == splitStr[j] && j == (len - 1)) {
buffer[i] = '\0';
dest[counter] = malloc(sizeof(char) * (i - start + 1));
strncpy(dest[counter], buffer + start, (i - start));
start = i + (len-1)
flag = 1;
i += (len - 1);
counter++;
}
//if not break
else {
break;
}
}
}
if (i == (strlen(src) - 1)) {
buffer[i] = src[i];
buffer[i + 1] = '\0';
counter++;
break;
}
if (flag == 0) {
buffer[i] = src[i];
}
}
return counter;
}
A proper function call would look like this:
auto src = "Hello;;world;;!";
auto buffer = (char **)malloc(32);
int count = split(buffer, src, ";;");
The buffer should contain, all the splitted strings, more or less like this: [ "Hello", "world", "!" ].
Currently my result buffer looks like this in the debugger. It appears as only the first element is written into it.
There are multiple problems in your code:
you compute string lengths repeatedly, which may be very inefficient. Instead of testing i < strlen(src) you should write src[i] != '\0'.
your test for check a matching delimiter is too complicated. You should use strstr to locate the delimiter string in the remaining portion of the string.
strncpy does not do what you think: strncpy(dest[counter], buffer + start, (i - start)); should be replaced with memcpy(dest[counter], buffer + start, i - start); and you must set the null terminator explicitly: dest[counter][i - start] = '\0'; You should read why you should never use strncpy().
it is unclear why you use buffer at all.
Here is a modified version:
#include <stdlib.h>
#include <string.h>
/* if POSIX function strndup() is not defined on your system, use this */
char *strndup(const char *str, size_t n) {
size_t len;
for (len = 0; len < n && str[len] != '\0'; len++)
continue;
char *s = malloc(len + 1);
if (s != NULL) {
memcpy(s, str, len);
s[len] = '\0';
}
return s;
}
int split(char **dest, const char *src, const char *splitStr) {
const char *p = str;
const char *end;
int counter = 0;
size_t len = strlen(splitStr);
if (len == 0) {
/* special case */
while (*p != '\0') {
dest[counter++] = strndup(p++, 1);
}
} else {
while ((end = strstr(p, splitStr)) != NULL) {
dest[counter++] = strndup(p, end - p);
p = end + len;
}
dest[counter++] = strdup(p);
}
return counter;
}
First of all you are not updating the start variable after you have copied the first string.
For simple debugging I would recommend adding some printf statements to see what is going on.
Proper formatting is not to be underestimated to make the code easy to read and easier to debug.
Also it is not clear what the buffer is for, and I think you can do without it.
The tips in the comments are also good. Split the function into smaller pieces and structure your code so it is simple to read.
A suggestion is to write a function to find the index of the next split string and the end of the string. Then you can use that to get the index and length you need to copy.
First time asking a question here:
well I need to take the original string
and remove the spaces and numbers from the string
I need to use the exact amount of memory.
For some reason, the string is fine in the beginning
but then it prints garbage values:
original string: "abcd2 34fty 78 jurt#"
what needed to be done: abcdftyjurt#
My code:
#define _CRT_SECURE_NO_WARNINGS
#include <malloc.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/* Function declarations */
/*-------------------------------------------------------------*/
void Ex1();
char* clearDigitsAndSpaces(char*);
/*-------------------------------------------------------------*/
void Ex2();
/*-------------------------------------------------------------*/
void Ex3();
/*-------------------------------------------------------------*/
/* Declarations of other functions */
int main() {
int select = 0, i, all_Ex_in_loop = 0;
printf("Run menu once or cyclically?\n(Once - enter 0, cyclically - enter other number) ");
if (scanf("%d", &all_Ex_in_loop) == 1)
do {
for (i = 1; i <= 3; i++)
printf("Ex%d--->%d\n", i, i);
printf("EXIT-->0\n");
do {
select = 0;
printf("please select 0-3 : ");
scanf("%d", &select);
} while ((select < 0) || (select > 3));
switch (select) {
case 1: Ex1(); break;
case 2: Ex2(); break;
case 3: Ex3(); break;
}
} while (all_Ex_in_loop && select);
return 0;
}
/* Function definitions */
void Ex1() {
char input[] = "abcd2 34fty 78 jurt#";
char *temp = NULL;
temp = clearDigitsAndSpaces(input);
printf("%s\n ", temp);
free(temp);
}
char *clearDigitsAndSpaces(char *old_string) {
char *new_string;
int count = 0;
int i = 0;
int j = 0;
int size = strlen(old_string);
new_string = (char *)malloc(size * sizeof(char));
assert(new_string); /*Memory allocation check*/
while (old_string[i]) {
if (old_string[i] != ' ' && (old_string[i] > '9' || old_string[i] < '0')) {
new_string[j++] = old_string[i];
} else {
//size -= 1;
new_string = (char *)realloc(new_string, size - 1);
}
i++;
}
assert(new_string);
//printf("%s", new_string);
return new_string;
}
void Ex2() {
}
void Ex3() {
}
The problem in your code is you must allocate one extra byte for the null terminator.
You can avoid using realloc() by first scanning the source string to determine the allocation size and then use a separate loop to copy the contents:
char *clearDigitsAndSpaces(const char *src) {
char *new_string;
size_t size = 1; // 1 extra byte for the null terminator.
for (size_t i = 0; src[i] != '\0'; i++) {
if (src[i] != ' ' && !(src[i] >= '0' && src[i] <= '9'))
size++;
}
new_string = malloc(size);
if (new_string) {
size_t j = 0;
for (size_t i = 0; src[i] != '\0'; i++) {
if (src[i] != ' ' && !(src[i] >= '0' && src[i] <= '9'))
new_string[j++] = src[i];
}
new_string[j] = '\0'; // set the null terminator
}
return new_string;
}
Firstly: you need to understand the difference between the length of a C-string and the size of a C-string. The length does not include the null terminator. The size does. So this snippet:
int size = strlen(old_string);
new_string = (char*)malloc(size * sizeof(char));
needs to be
int size = strlen(old_string) + 1;
new_string = (char*)malloc(size * sizeof(char));
(note that if you're using Unicode in Windows, with wchar_t instead of char, then the size in bytes is twice the length, plus 2 - each character is two bytes, as well as the null terminator aka 'sentinel')
Secondly: I would suggest you use parenthesis to be explicit about intention. It may not be "absolutely necessary", but there would be no doubt about the intention when someone else reads your code. Also avoid indexing the same thing repeatedly. Change:
if (old_string[i]!=' ' && (old_string[i] > '9' || old_string[i]< '0'))
to:
char oldChar = old_string[i];
if ((oldChar != ' ')
&& ((oldChar > '9') || (oldChar < '0'))
)
Finally, you need to emplace a null character at the end. You don't need to realloc; just use not all of the buffer. Change:
new_string = (char*)realloc(new_string, size-1);
to:
new_string[j++] = '\0';
// PS: if you really want to realloc, then add "new_string = (char*)realloc(new_string, j);" after writing the null character.
Also - if you change the malloc to a calloc, you won't need to write a null terminator, since the entire buffer would be nulled before you copied anything to it.
Furthermore, I would add a defensive limit check to i in the while loop to ensure it cannot go on ad-infinitum.
I usually try hard and harder to solve myself any bugs I find in my code, but this one is totally out of any logic for me. It works really fine with whatever strings and char separators, but only with that useless printf inside the while of the function, otherwise it prints
-> Lorem
then
-> ▼
and crashes aftwerwards. Thanks in advance to anyone that could tell me what is happening.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
char **strsep_(char *str, char ch) {
// Sub-string length
uint8_t len = 0;
// The number of sub-strings found means the same as the position where it will be stored in the main pointer
// Obviously, the number tends to increase over time, and at the end of the algorithm, it means the main pointer length too
uint8_t pos = 0;
// Storage for any found sub-strings and one more byte as the pointer is null-terminated
char **arr = (char**)malloc(sizeof(char **) + 1);
while (*str) {
printf("Erase me and it will not work! :)\n");
if (*str == ch) {
// The allocated memory should be one step ahead of the current usage
arr = realloc(arr, sizeof(char **) * pos + 1);
// Allocates enough memory in the current main pointer position and the '\0' byte
arr[pos] = malloc(sizeof(char *) * len + 1);
// Copies the sub-string size (based in the length number) into the previously allocated space
memcpy(arr[pos], (str - len), len);
// `-_("")_-k
arr[pos][len] = '\0';
len = 0;
pos++;
} else {
len++;
}
*str++;
}
// Is not needed to reallocate additional memory if no separator character was found
if (pos > 0) arr = realloc(arr, sizeof(char **) * pos + 1);
// The last chunk of characters after the last separator character is properly allocated
arr[pos] = malloc(sizeof(char *) * len + 1);
memcpy(arr[pos], (str - len), len);
// To prevent undefined behavior while iterating over the pointer
arr[++pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
char **aux = arr;
while (*arr) {
free(*arr);
*arr = NULL;
arr++;
}
// One more time to fully deallocate the null-terminated pointer
free(*arr);
*arr = NULL;
arr++;
// Clearing The pointer itself
free(aux);
aux = NULL;
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
char **i = s;
while (*i != NULL) {
printf("-> %s\n", *i);
i++;
}
strsep_free_(s);
}
Your program has undefined behavior, which means it may behave in unexpected ways, but could by chance behave as expected. Adding the extra printf changes the behavior in a way the seems to correct the bug, but only by coincidence. On a different machine, or even on the same machine at a different time, the behavior may again change.
There are multiple bugs in your program that lead to undefined behavior:
You are not allocating the array with the proper size: it should have space fpr pos + 1 pointers, hence sizeof(char **) * (pos + 1). The faulty statements are: char **arr = (char**)malloc(sizeof(char **) + 1); and arr = realloc(arr, sizeof(char **) * pos + 1);.
Furthermore, the space allocated for each substring is incorrect too: arr[pos] = malloc(sizeof(char *) * len + 1); should read arr[pos] = malloc(sizeof(char) * len + 1);, which by definition is arr[pos] = malloc(len + 1);. This does not lead to undefined behavior, you just allocate too much memory. If your system supports it, allocation and copy can be combined in one call to strndup(str - len, len).
You never check for memory allocation failure, causing undefined behavior in case of memory allocation failure.
Using uint8_t for len and pos is risky: what if the number of substrings exceeds 255? pos and len would silently wrap back to 0, producing unexpected results and memory leaks. There is no advantage at using such a small type, use int or size_t instead.
Here is a corrected version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **strsep_(const char *str, char ch) {
// Sub-string length
int len = 0;
// The number of sub-strings found, index where to store the NULL at the end of the array.
int pos = 0;
// return value: array of pointers to substrings with an extra slot for a NULL terminator.
char **arr = (char**)malloc(sizeof(*arr) * (pos + 1));
if (arr == NULL)
return NULL;
for (;;) {
if (*str == ch || *str == '\0') {
// alocate the substring and reallocate the array
char *p = malloc(len + 1);
char **new_arr = realloc(arr, sizeof(*arr) * (pos + 2));
if (new_arr == NULL || p == NULL) {
// allocation failure: free the memory allocated so far
free(p);
if (new_arr)
arr = new_arr;
while (pos-- > 0)
free(arr[pos]);
free(arr);
return NULL;
}
arr = new_arr;
memcpy(p, str - len, len);
p[len] = '\0';
arr[pos] = p;
pos++;
len = 0;
if (*str == '\0')
break;
} else {
len++;
}
str++;
}
arr[pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
int i;
// Free the array elements
for (i = 0; arr[i] != NULL; i++) {
free(arr[i]);
arr[i] = NULL; // extra safety, not really needed
}
// Free The array itself
free(arr);
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
int i;
for (i = 0; s[i] != NULL; i++) {
printf("-> %s\n", s[i]);
}
strsep_free_(s);
return 0;
}
Output:
-> Lorem
-> ipsum
-> four
-> words
The probable reason for the crash is most likely this: realloc(arr, sizeof(char **) * pos + 1).
That is the same as realloc(arr, (sizeof(char **) * pos) + 1) which does not allocate enough space for your "array". You need to do realloc(arr, sizeof(char **) * (pos + 1)).
Same with the allocation for arr[pos], you need to use parentheses correctly there too.
Good answer from #chqrlie. From my side, I think it would be better to count everything before copy, it should help to avoid realloc.
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
int count_chars(const char *str, const char ch)
{
int i;
int count;
i = 0;
count = 0;
if (*str == ch)
str++;
while (str[i] != ch && str[i] != '\0')
{
count++;
i++;
}
return (count);
}
int count_delimeter(const char *str, const char ch)
{
int i = 0;
int count = 0;
while (str[i])
{
if (str[i] == ch && str[i + 1] != ch)
count++;
i++;
}
return count;
}
char** strsep_(const char *str, const char ch)
{
char **arr;
int index = 0;
int size = 0;
int i = 0;
size = count_delimeter(str, ch) + 1;
if ((arr = malloc(sizeof(char *) * (size + 1))) == NULL)
return (NULL);
arr[size] = NULL;
while (i < size)
{
if (str[index] == ch)
index++;
if (str[index] && str[index] == ch && str[index + 1] == ch)
{
while (str[index] && str[index] == ch && str[index + 1] == ch)
index++;
index++;
}
int len = count_chars(&str[index], ch);
if ((arr[i] = malloc(sizeof(char) * (len + 1))) == NULL)
return NULL;
memcpy(arr[i], &str[index], len);
index += len;
arr[i++][len] = '\0';
}
return arr;
}
int main(void)
{
char *str = "Lorem ipsum ipsum Lorem lipsum gorem insum";
char **s = strsep_(str, ' ');
/* char *str = "Lorem + Ipsum"; */
/* char **s = strsep_(str, '+'); */
/* char *str = "lorem, torem, horem, lorem"; */
/* char **s = strsep_(str, ','); */
while (*s != NULL) {
printf("-> [%s]\n", *s);
s++;
}
/* dont forget to free */
return 0;
}
I'm trying to build a string from f, being split at whitespace and read it into a struct.
f is the char array I'm iterating over.
I then copy the contents from tmp into ra1.callsign, and essentially empty the tmp char array.
What I want to do is have the the tmp variable start building from index 0 again, so that when I try to strcpy the second time round all the characters in tmp start from index 0.
The way I have it now, when it tries the line: strcpy(ra1.location, tmp) it doesn't copy anything, I think this is because at that point the first character in tmp doesn't appear until some time down the array.
char c;
char tmp[1000];
for (i = 0; i < len; ++i) {
c = f[i];
if (c != ' ') {
tmp[i] = c; //build string to be added
}
//add string to data structure
if (c == ' ') {
if (addTo == CALLSIGN) {
strncpy(ra1.callsign, tmp, strlen(tmp));
memset(tmp, '\0', strlen(tmp));
}
if (addTo == LOCATION) {
strcpy(ra1.location, tmp);
}
++addTo;
}
}
Hope this is clear enough, thanks.
You left out quite a few details in your code and I have made a number of assumptions.
So, using the assumptions that I have made (which you can see in the code below), I believe that this will do what you are trying to accomplish. There are much easier and cleaner ways to do this, but I am hoping that you can get a clear understanding of how it would work with your code.
I have basically added a terminating null character where it is required so the strlen() function will work correctly and utilized an extra variable called cur_size which can be used as an offset based on the current index i.
#include <string.h>
#include <stdio.h>
#define CALLSIGN 3U
#define LOCATION 5U
#define ARRAY_SIZE 50U
typedef struct
{
char callsign[ARRAY_SIZE];
char location[ARRAY_SIZE];
} MyStruct;
MyStruct ra1 = { .callsign = {0}, .location = {0} };
char f[] = "This is my character array. Let's see what happens.";
int main (void)
{
char c;
char tmp[ARRAY_SIZE];
unsigned char addTo = 0;
unsigned char i;
unsigned char cur_size = 0;
for(i = 0; i < sizeof(f); ++i)
{
c = f[i];
if(c != ' ')
{
tmp[i - cur_size] = c; //build string to be added
}
//add string to data structure
if(c == ' ')
{
tmp[i - cur_size] = '\0'; /* YOU NEED THIS FOR strlen(tmp) to work */
cur_size = i + 1;
if(addTo == CALLSIGN)
{
strncpy(ra1.callsign, tmp, strlen(tmp));
//memset(tmp, '\0', strlen(tmp));
}
else if (addTo == LOCATION)
{
strncpy(ra1.location, tmp, strlen(tmp));
}
++addTo;
}
}
for (i = 0; i < ARRAY_SIZE; i++)
{
printf("%c", ra1.callsign[i]);
}
printf("\r\n");
for (i = 0; i < ARRAY_SIZE; i++)
{
printf("%c", ra1.location[i]);
}
printf("\r\n");
return 0;
}