Related
I'm quite new to C and am trying to write a function, which will split a string into an array of strings at a specific delimiter. But strangely I can only write at the first index of my char** array of strings, which will be my result. For example if I want to split the following string "Hello;;world;;!" at ;; I get [ "Hello" ] instead of [ "Hello", "world", "!" ]. I can't find my mistake.
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "strings.h"
int split(char **dest, const char *src, const char *splitStr) {
char buffer[16384];
int counter = 0;
int len = strlen(splitStr);
int flag = 0;
int start = 0;
for (int i = 0; i < strlen(src); i++) {
flag = 0;
if (src[i] == splitStr[0]) {
for (int j = 1; j < len; j++) {
//check if all elements in delimiter are in string
if (src[i + j] == splitStr[j] && j != (len - 1)) {
continue;
}
else if(src[i + j] == splitStr[j] && j == (len - 1)) {
buffer[i] = '\0';
dest[counter] = malloc(sizeof(char) * (i - start + 1));
strncpy(dest[counter], buffer + start, (i - start));
start = i + (len-1)
flag = 1;
i += (len - 1);
counter++;
}
//if not break
else {
break;
}
}
}
if (i == (strlen(src) - 1)) {
buffer[i] = src[i];
buffer[i + 1] = '\0';
counter++;
break;
}
if (flag == 0) {
buffer[i] = src[i];
}
}
return counter;
}
A proper function call would look like this:
auto src = "Hello;;world;;!";
auto buffer = (char **)malloc(32);
int count = split(buffer, src, ";;");
The buffer should contain, all the splitted strings, more or less like this: [ "Hello", "world", "!" ].
Currently my result buffer looks like this in the debugger. It appears as only the first element is written into it.
There are multiple problems in your code:
you compute string lengths repeatedly, which may be very inefficient. Instead of testing i < strlen(src) you should write src[i] != '\0'.
your test for check a matching delimiter is too complicated. You should use strstr to locate the delimiter string in the remaining portion of the string.
strncpy does not do what you think: strncpy(dest[counter], buffer + start, (i - start)); should be replaced with memcpy(dest[counter], buffer + start, i - start); and you must set the null terminator explicitly: dest[counter][i - start] = '\0'; You should read why you should never use strncpy().
it is unclear why you use buffer at all.
Here is a modified version:
#include <stdlib.h>
#include <string.h>
/* if POSIX function strndup() is not defined on your system, use this */
char *strndup(const char *str, size_t n) {
size_t len;
for (len = 0; len < n && str[len] != '\0'; len++)
continue;
char *s = malloc(len + 1);
if (s != NULL) {
memcpy(s, str, len);
s[len] = '\0';
}
return s;
}
int split(char **dest, const char *src, const char *splitStr) {
const char *p = str;
const char *end;
int counter = 0;
size_t len = strlen(splitStr);
if (len == 0) {
/* special case */
while (*p != '\0') {
dest[counter++] = strndup(p++, 1);
}
} else {
while ((end = strstr(p, splitStr)) != NULL) {
dest[counter++] = strndup(p, end - p);
p = end + len;
}
dest[counter++] = strdup(p);
}
return counter;
}
First of all you are not updating the start variable after you have copied the first string.
For simple debugging I would recommend adding some printf statements to see what is going on.
Proper formatting is not to be underestimated to make the code easy to read and easier to debug.
Also it is not clear what the buffer is for, and I think you can do without it.
The tips in the comments are also good. Split the function into smaller pieces and structure your code so it is simple to read.
A suggestion is to write a function to find the index of the next split string and the end of the string. Then you can use that to get the index and length you need to copy.
My problem now is that I have taken space for different words,but I'm having problems storing this as an array. Even though there are some similar posts like this, nothing seems to work for me and I'm completely stuck here. I want to keep this format(i don't want to change the definition of the function). Grateful for all help and comments!
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int i, len = 0, counter = 0;
char ** p = 0;
for(i = 0; s[i] != '\0'; i++){
len++;
if(s[i] == ' ' || s[i+1] == '\0'){
counter ++;
for(i = 0; i < len; i++){
p[i] = s[i];
}
}
printf("%d\n", len);
printf("%d\n", counter);
return p;
}
int main() {
char *s = "This is a string";
int n;
int i;
for(i = 0; i < n*; i++){
//also not sure how to print this
}
}
I edited your code and it's now working correctly:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char* s, int *n);
char** split(const char* s, int *n) {
int i, len = 0, counter = 0;
char ** p = 0;
for(int i = 0; ; ++i) {
if(s[i] == '\0') {
break;
}
if(s[i] == ' ') {
counter += 1;
}
}
++counter;
p = (char **) malloc(counter * sizeof(char*));
for(int i = 0, c = 0; ; ++i, ++c) {
if(s[i] == '\0') {
break;
}
len = 0;
while(s[len + i + 1] != ' ' && s[len + i + 1] != '\0') {
++len;
}
p[c] = (char *) malloc(len * sizeof(char) + 1);
int k = 0;
for(int j = i; j < i + len + 1; ++j) {
p[c][k++] = s[j];
}
p[c][k] = '\0';
i += len + 1;
}
*n = counter;
return p;
}
int main() {
char *s = "This is a string";
int n;
int i;
char** split_s = split(s, &n);
for(i = 0; i < n; i++) {
printf("%s\n", split_s[i]);
}
}
But I suggest you do a little bit clean-up.
Here is a solution using sscanf. scanf and sscanf considers space as an end of input. I have taken benefit of that to make it work for you.
char *str = (char*) "This is a string";
char buffer[50];
char ** p = (char**)malloc(1 * sizeof(*p));
for (int i = 0; str[0] != NULL; i++)
{
if (i > 0)
{
p = (char**)realloc(p, i * sizeof(p));
}
sscanf(str, "%s", buffer);
int read = strlen(buffer);
str += read + 1;
p[i] = (char*)malloc(sizeof(char)*read + 1);
strcpy(p[i], buffer);
printf("%s\n", p[i]);
}
Since this pointer is growing in both the dimensions, every time a new string is found we need to resize the p itself and then the new address that it contains should be resized too .
My problem now is that I have taken space for different words using malloc, but I'm having problems storing this as an array.
When addressable memory for a collection of strings is needed, then a collection of pointers, as well as memory for each pointer needed.
In your code:
p = (char**)malloc(counter*sizeof(char*));
You have created the collection of pointers, but you have not yet created memory at those locations to accommodate the strings. (By the way, the cast is not necessary)
Here are the essential steps to both create a collection of pointers, and memory for each:
//for illustration, pick sizes for count of strings needed,
//and length of longest string needed.
#define NUM_STRINGS 5
#define STR_LEN 80
char **stringArray = NULL;
stringArray = malloc(NUM_STRINGS*sizeof(char *));// create collection of pointers
if(stringArray)
{
for(int i=0;i<NUM_STRINGS;i++)
{
stringArray[i] = malloc(STR_LEN + 1);//create memory for each string
if(!stringArray[i]) //+1 room for nul terminator
{
//handle error
}
}
}
As a function it could look like this: (replacing malloc with calloc for initialized space)
char ** Create2DStr(size_t numStrings, size_t maxStrLen)
{
int i;
char **a = {0};
a = calloc(numStrings, sizeof(char *));
for(i=0;i<numStrings; i++)
{
a[i] = calloc(maxStrLen + 1, 1);
}
return a;
}
using this in your split() function:
char** split(const char* s, int *n){
int i, len = 0, counter = 0, lenLongest = 0
char ** p = 0;
//code to count words and longest word
p = Create2DStr(counter, longest + 1); //+1 for nul termination
if(p)
{
//your searching code
//...
// when finished, free memory
Let's start at the logic.
How does a string like A quick brown fox. get processed? I would suggest:
Count the number of words, and the amount of memory needed to store the words. (In C, each string ends with a terminating nul byte, \0.)
Allocate enough memory for the pointers and the words.
Copy each word from the source string.
We have a string as an input, and we want an array of strings as output. The simplest option is
char **split_words(const char *source);
where the return value is NULL if an error occurs, or an array of pointers terminated by a NULL pointer otherwise. All of it is dynamically allocated at once, so calling free() on the return value will free both the pointers and their contents.
Let's start implementing the logic according to the bullet points above.
#include <stdlib.h>
char **split_words(const char *source)
{
size_t num_chars = 0;
size_t num_words = 0;
size_t w = 0;
const char *src;
char **word, *data;
/* Sanity check. */
if (!source)
return NULL; /* split_words(NULL) will return NULL. */
/* Count the number of words in source (num_words),
and the number of chars needed to store
a copy of each word (num_chars). */
src = source;
while (1) {
/* Skip any leading whitespace (not just spaces). */
while (*src == '\t' || *src == '\n' || *src == '\v' ||
*src == '\f' || *src == '\r' || *src == ' ')
src++;
/* No more words? */
if (*src == '\0')
break;
/* We have one more word. Account for the pointer itself,
and the string-terminating nul char. */
num_words++;
num_chars++;
/* Count and skip the characters in this word. */
while (*src != '\0' && *src != '\t' && *src != '\n' &&
*src != '\v' && *src != '\f' && *src != '\r' &&
*src != ' ') {
src++;
num_chars++;
}
}
/* If the string has no words in it, return NULL. */
if (num_chars < 1)
return NULL;
/* Allocate memory for both the pointers and the data.
One extra pointer is needed for the array-terminating
NULL pointer. */
word = malloc((num_words + 1) * sizeof (char *) + num_chars);
if (!word)
return NULL; /* Not enough memory. */
/* Since 'word' is the return value, and we use
num_words + 1 pointers in it, the rest of the memory
we allocated we use for the string contents. */
data = (char *)(word + num_words + 1);
/* Now we must repeat the first loop, exactly,
but also copy the data as we do so. */
src = source;
while (1) {
/* Skip any leading whitespace (not just spaces). */
while (*src == '\t' || *src == '\n' || *src == '\v' ||
*src == '\f' || *src == '\r' || *src == ' ')
src++;
/* No more words? */
if (*src == '\0')
break;
/* We have one more word. Assign the pointer. */
word[w] = data;
w++;
/* Count and skip the characters in this word. */
while (*src != '\0' && *src != '\t' && *src != '\n' &&
*src != '\v' && *src != '\f' && *src != '\r' &&
*src != ' ') {
*(data++) = *(src++);
}
/* Terminate this word. */
*(data++) = '\0';
}
/* Terminate the word array. */
word[w] = NULL;
/* All done! */
return word;
}
We can test the above with a small test main():
#include <stdio.h>
int main(int argc, char *argv[])
{
char **all;
size_t i;
all = split_words(" foo Bar. BAZ!\tWoohoo\n More");
if (!all) {
fprintf(stderr, "split_words() failed.\n");
exit(EXIT_FAILURE);
}
for (i = 0; all[i] != NULL; i++)
printf("all[%zu] = \"%s\"\n", i, all[i]);
free(all);
return EXIT_SUCCESS;
}
If we compile and run the above, we get
all[0] = "foo"
all[1] = "Bar."
all[2] = "BAZ!"
all[3] = "Woohoo"
all[4] = "More"
The downside of this approach (of using one malloc() call to allocate memory for both the pointers and the data), is that we cannot easily grow the array; we can really just treat it as one big clump.
A better approach, especially if we intend to add new words dynamically, is to use a structure:
typedef struct {
size_t max_words; /* Number of pointers allocated */
size_t num_words; /* Number of words in array */
char **word; /* Array of pointers */
} wordarray;
Unfortunately, this time we need to allocate each word separately. However, if we use a structure to describe each word in a common allocation buffer, say
typedef struct {
size_t offset;
size_t length;
} wordref;
typedef struct {
size_t max_words;
size_t num_words;
wordref *word;
size_t max_data;
size_t num_data;
char *data;
} wordarray;
#define WORDARRAY_INIT { 0, 0, NULL, 0, 0, NULL }
static inline const char *wordarray_word_ptr(wordarray *wa, size_t i)
{
if (wa && i < wa->num_words)
return wa->data + wa->word[i].offset;
else
return "";
}
static inline size_t wordarray_word_len(wordarray *wa, size_t i)
{
if (wa && i < wa->num_words)
return wa->word[i].length;
else
return 0;
}
The idea is that if you declare
wordarray words = WORDARRAY_INIT;
you can use wordarray_word_ptr(&words, i) to get a pointer to the ith word, or a pointer to an empty string if ith word does not exist yet, and wordarray_word_len(&words, i) to get the length of that word (much faster than calling strlen(wordarray_word_ptr(&words, i))).
The underlying reason why we cannot use char * here, is that realloc()ing the data area (where the word pointers would point to) may change its address. If that were to happen, we'd have to adjust every pointer in our array. It is much easier to use offsets to the data area instead.
The only downside to this approach is that deleting words does not mean a corresponding shrinkage in the data area. However, it is possible to write a simple "compactor" function, that repacks the data to a new area, so that holes left by deleted words are "moved" to the end of the data area. Usually, this is not necessary, but you might wish to add a member to the wordarray structure, say the number of lost characters from word deletions, so that the compaction can be done heuristically the next time the data area would be otherwise resized.
I had a assignment for my class last week where I had to split a string with the spaces, tabs et \n as separators and store every "word" in an array. I think I'm very close but my output is very weird so if someone could tell what did I forget, it would be nice. Only thing is I can only use malloc.
char **ft_split_whitespaces(char *str)
{
int i;
int j;
int k;
char **tab;
i = 0;
j = 0;
k = 0;
tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
while (str[i])
{
while (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
i++;
if (str[i])
{
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(str + i) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(str + i))
tab[j][k++] = str[i++];
tab[j++][k] = '\0';
k = 0;
}
}
tab[j] = NULL;
return (tab);
}
The functions returning the length of a word and the number of words work fine so I think the problem comes from the main function.
This can be easily handled if you take one pointer to point the last occurrence of specific character('' '\n' \t).
char **ft_split_whitespaces(char *str)
{
int i;
int j;
int k;
char **tab;
char *prevToken=str;
i = 0;
j = 0;
k = 0;
tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
while (str[i] != '\0')
{
if(str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
{
i++;
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(prevToken) &&
(prevToken[k] !=' ' && prevToken[k] != '\t' && prevToken[k] != '\n'))
tab[j][k] = prevToken[k++];
printf("tab=%s\n", tab[j]);
k = 0;
j++;
prevToken=(str+i);
}
else{
i++;
}
}
/* to handle the last word */
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(prevToken) &&
(prevToken[k] !=' ' && prevToken[k] != '\t' && prevToken[k] != '\n'))
tab[j][k] = prevToken[k++];
printf("tab=%s\n", tab[j]);
tab[j] = NULL;
return (tab);
}
The following code contains an implementation of some useful C functions.
The function you search is strtok(). In the code are implemented also the functions strspn() and strpbrk() because strtok() uses them.
The best way to solve this kind of problems is to study the implementation of C standard functions.
The code stores the copies of max 100 token (the extracted words).
You have to remember that the function strtok() modifies the content of the source string inserting '\0' to terminate the strings found.
The functions here implemented are:
mystrtok()
mystrspn()
mystrpbrk()
The code:
#include <stdio.h>
#include <string.h> /* for the use of strcpy fn */
#include <malloc.h>
char * mystrtok (char * s, char * delim);
size_t mystrspn (const char *s, const char *accept);
char * mystrpbrk (const char *s, const char *accept);
char * mystrpbrk (const char *s, const char *accept)
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
size_t mystrspn (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '\0'; ++p)
{
for (a = accept; *a != '\0'; ++a)
if (*p == *a)
break;
if (*a == '\0')
return count;
else
++count;
}
return count;
}
char * mystrtok (char *s, char *delim)
{
char *token;
static char *olds;
if (s == NULL) {
s = olds;
}
/* Scan leading delimiters. */
s += mystrspn (s, delim);
if (*s == '\0')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = mystrpbrk (token, delim);
if (s == NULL)
{
/* This token finishes the string. */
while(*olds)
olds++;
}
else
{
/* Terminate the token and make OLDS point past it. */
*s = '\0';
olds = s + 1;
}
return token;
}
int main(void)
{
char str[] = "I have an orange\tYou have some bananas\nShe has three pineapples\n";
char * x = NULL;
int cnt=0,i;
char **store;
/* Stores a max of 100 strings */
store = malloc(sizeof(char *)*100);
/* The total space for the tokens is
max the entire string + '\0' */
store[0] = malloc(strlen(str)+1);
/* Extract the first token */
x=mystrtok(str," \n");
while(x) {
printf("Storing %s\n",x);
/* Store a copy of the token */
strcpy(store[cnt],x);
store[cnt+1]=store[cnt]+strlen(x)+1;
cnt++;
/* extract the next token */
x=mystrtok(NULL," \n\t");
}
for(i=0;i<cnt;i++)
printf("Stored %s\n",store[i]);
free(store[0]);
free(store);
return 0;
}
Your code is inefficient as you call ft_len_word far too many times, but it does not seem broken apart from the undefined behavior on malloc failures.
The problem might lie in your versions of ft_len_word or ft_nb_words. You should post a full program exhibiting the problem for a proper investigation.
Here is a modified version that does not use these functions:
#include <stdlib.h>
int ft_is_space(char c) {
return (c == ' ' || c == '\t' || c == '\n');
}
char **ft_split_whitespaces(const char *str) {
int i, j, k, len, in_space, nb_words;
char **tab;
nb_words = 0;
in_space = 1;
for (i = 0; str[i]; i++) {
if (ft_is_space(str[i]) {
in_space = 1;
} else {
nb_words += in_space;
in_space = 0;
}
}
tab = malloc(sizeof(*tab) * (nb_words + 1));
if (tab != NULL) {
i = 0;
j = 0;
while (str[i]) {
while (ft_is_space(str[i]))
i++;
if (str[i]) {
for (len = 1; str[i + len] && !ft_is_space(str[i + len]); len++)
continue;
if ((tab[j] = malloc(sizeof(*tab[j]) * (len + 1))) == NULL) {
while (j > 0)
free(tab[--j]);
free(tab);
return NULL;
}
for (k = 0; k < len; k++)
tab[j][k] = str[i + k];
tab[j++][len] = '\0';
i += len;
}
}
tab[j] = NULL;
}
return tab;
}
You need to implement your version of strtok() if you do not want to use the library function or need a different functionality than the one provided by strtok().
Below is a simple string tokenizer, which, unlike the standard library's strtok(), still returns a value in case of consecutive delimiters. I used this function to parse CSV files, which sometimes include empty cells, hence consecutive , characters. Standard library's strtok() did not work for me, so I had to implement my own function.
I used other helper functions, which are now part of a simple string library I maintain on GitHub, called zString.
Below is how it behaves
Example Usage
char str[] = "A,B,,,C";
printf("1 %s\n",zstring_strtok(s,","));
printf("2 %s\n",zstring_strtok(NULL,","));
printf("3 %s\n",zstring_strtok(NULL,","));
printf("4 %s\n",zstring_strtok(NULL,","));
printf("5 %s\n",zstring_strtok(NULL,","));
printf("6 %s\n",zstring_strtok(NULL,","));
Example Output
1 A
2 B
3 ,
4 ,
5 C
6 (null)
and the code
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurrence of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignment requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
I usually try hard and harder to solve myself any bugs I find in my code, but this one is totally out of any logic for me. It works really fine with whatever strings and char separators, but only with that useless printf inside the while of the function, otherwise it prints
-> Lorem
then
-> ▼
and crashes aftwerwards. Thanks in advance to anyone that could tell me what is happening.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
char **strsep_(char *str, char ch) {
// Sub-string length
uint8_t len = 0;
// The number of sub-strings found means the same as the position where it will be stored in the main pointer
// Obviously, the number tends to increase over time, and at the end of the algorithm, it means the main pointer length too
uint8_t pos = 0;
// Storage for any found sub-strings and one more byte as the pointer is null-terminated
char **arr = (char**)malloc(sizeof(char **) + 1);
while (*str) {
printf("Erase me and it will not work! :)\n");
if (*str == ch) {
// The allocated memory should be one step ahead of the current usage
arr = realloc(arr, sizeof(char **) * pos + 1);
// Allocates enough memory in the current main pointer position and the '\0' byte
arr[pos] = malloc(sizeof(char *) * len + 1);
// Copies the sub-string size (based in the length number) into the previously allocated space
memcpy(arr[pos], (str - len), len);
// `-_("")_-k
arr[pos][len] = '\0';
len = 0;
pos++;
} else {
len++;
}
*str++;
}
// Is not needed to reallocate additional memory if no separator character was found
if (pos > 0) arr = realloc(arr, sizeof(char **) * pos + 1);
// The last chunk of characters after the last separator character is properly allocated
arr[pos] = malloc(sizeof(char *) * len + 1);
memcpy(arr[pos], (str - len), len);
// To prevent undefined behavior while iterating over the pointer
arr[++pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
char **aux = arr;
while (*arr) {
free(*arr);
*arr = NULL;
arr++;
}
// One more time to fully deallocate the null-terminated pointer
free(*arr);
*arr = NULL;
arr++;
// Clearing The pointer itself
free(aux);
aux = NULL;
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
char **i = s;
while (*i != NULL) {
printf("-> %s\n", *i);
i++;
}
strsep_free_(s);
}
Your program has undefined behavior, which means it may behave in unexpected ways, but could by chance behave as expected. Adding the extra printf changes the behavior in a way the seems to correct the bug, but only by coincidence. On a different machine, or even on the same machine at a different time, the behavior may again change.
There are multiple bugs in your program that lead to undefined behavior:
You are not allocating the array with the proper size: it should have space fpr pos + 1 pointers, hence sizeof(char **) * (pos + 1). The faulty statements are: char **arr = (char**)malloc(sizeof(char **) + 1); and arr = realloc(arr, sizeof(char **) * pos + 1);.
Furthermore, the space allocated for each substring is incorrect too: arr[pos] = malloc(sizeof(char *) * len + 1); should read arr[pos] = malloc(sizeof(char) * len + 1);, which by definition is arr[pos] = malloc(len + 1);. This does not lead to undefined behavior, you just allocate too much memory. If your system supports it, allocation and copy can be combined in one call to strndup(str - len, len).
You never check for memory allocation failure, causing undefined behavior in case of memory allocation failure.
Using uint8_t for len and pos is risky: what if the number of substrings exceeds 255? pos and len would silently wrap back to 0, producing unexpected results and memory leaks. There is no advantage at using such a small type, use int or size_t instead.
Here is a corrected version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **strsep_(const char *str, char ch) {
// Sub-string length
int len = 0;
// The number of sub-strings found, index where to store the NULL at the end of the array.
int pos = 0;
// return value: array of pointers to substrings with an extra slot for a NULL terminator.
char **arr = (char**)malloc(sizeof(*arr) * (pos + 1));
if (arr == NULL)
return NULL;
for (;;) {
if (*str == ch || *str == '\0') {
// alocate the substring and reallocate the array
char *p = malloc(len + 1);
char **new_arr = realloc(arr, sizeof(*arr) * (pos + 2));
if (new_arr == NULL || p == NULL) {
// allocation failure: free the memory allocated so far
free(p);
if (new_arr)
arr = new_arr;
while (pos-- > 0)
free(arr[pos]);
free(arr);
return NULL;
}
arr = new_arr;
memcpy(p, str - len, len);
p[len] = '\0';
arr[pos] = p;
pos++;
len = 0;
if (*str == '\0')
break;
} else {
len++;
}
str++;
}
arr[pos] = NULL;
return arr;
}
void strsep_free_(char **arr) {
int i;
// Free the array elements
for (i = 0; arr[i] != NULL; i++) {
free(arr[i]);
arr[i] = NULL; // extra safety, not really needed
}
// Free The array itself
free(arr);
}
int main(void) {
char **s = strsep_("Lorem ipsum four words", ' ');
int i;
for (i = 0; s[i] != NULL; i++) {
printf("-> %s\n", s[i]);
}
strsep_free_(s);
return 0;
}
Output:
-> Lorem
-> ipsum
-> four
-> words
The probable reason for the crash is most likely this: realloc(arr, sizeof(char **) * pos + 1).
That is the same as realloc(arr, (sizeof(char **) * pos) + 1) which does not allocate enough space for your "array". You need to do realloc(arr, sizeof(char **) * (pos + 1)).
Same with the allocation for arr[pos], you need to use parentheses correctly there too.
Good answer from #chqrlie. From my side, I think it would be better to count everything before copy, it should help to avoid realloc.
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
int count_chars(const char *str, const char ch)
{
int i;
int count;
i = 0;
count = 0;
if (*str == ch)
str++;
while (str[i] != ch && str[i] != '\0')
{
count++;
i++;
}
return (count);
}
int count_delimeter(const char *str, const char ch)
{
int i = 0;
int count = 0;
while (str[i])
{
if (str[i] == ch && str[i + 1] != ch)
count++;
i++;
}
return count;
}
char** strsep_(const char *str, const char ch)
{
char **arr;
int index = 0;
int size = 0;
int i = 0;
size = count_delimeter(str, ch) + 1;
if ((arr = malloc(sizeof(char *) * (size + 1))) == NULL)
return (NULL);
arr[size] = NULL;
while (i < size)
{
if (str[index] == ch)
index++;
if (str[index] && str[index] == ch && str[index + 1] == ch)
{
while (str[index] && str[index] == ch && str[index + 1] == ch)
index++;
index++;
}
int len = count_chars(&str[index], ch);
if ((arr[i] = malloc(sizeof(char) * (len + 1))) == NULL)
return NULL;
memcpy(arr[i], &str[index], len);
index += len;
arr[i++][len] = '\0';
}
return arr;
}
int main(void)
{
char *str = "Lorem ipsum ipsum Lorem lipsum gorem insum";
char **s = strsep_(str, ' ');
/* char *str = "Lorem + Ipsum"; */
/* char **s = strsep_(str, '+'); */
/* char *str = "lorem, torem, horem, lorem"; */
/* char **s = strsep_(str, ','); */
while (*s != NULL) {
printf("-> [%s]\n", *s);
s++;
}
/* dont forget to free */
return 0;
}
I tried to make a function which replace every word in a text with the word shifted to right by 'k' times.
the code look like this:
void operation_3(char *string, int k){
int len = 0, i;
int string_len = strlen(string);
char *word;
char s[12] = " .,?!\"'";
char *dup;
dup = strdup(string);
word = strtok(dup, s);
while (word != NULL) {
len = strlen(word);
char *new_word = (char *)malloc(len * sizeof(char));
for (i = 0; i < k; i++) {
new_word = shift_to_right(word);
}
string = replace_word(string, word, new_word);
word = strtok(NULL, s);
}
}
shift_to_right is:
char *shift_to_right(char *string){
char temp;
int len = strlen(string) - 1;
int i;
for (i = len - 1; i >= 0; i--) {
temp = string[i+1];
string[i+1] = string[i];
string[i] = temp;
}
return string;
}
replace_word is:
char *replace_word(char *string, char *word, char *new_word) {
int len = strlen(string) + 1;
char *temp = malloc(len * sizeof(char));
int temp_len = 0;
char *found;
while (found = strstr(string, word)) {
if (strlen(found) != strlen(word) || isDelimitator(*(found - 1)) == 1) {
break;
}
memcpy(temp + temp_len, string, found - string);
temp_len = temp_len + found - string;
string = found + strlen(word)
len = len - strlen(word) + strlen(new_word);
temp = realloc(temp, len * sizeof(char));
memcpy(temp + temp_len, new_word, strlen(new_word));
temp_len = temp_len + strlen(new_word);
}
strcpy(temp + temp_len, string);
return temp;
}
and isDelimitator is:
int isDelimitator(char c) {
if(c == ' ' || c == '.' || c == ',' || c == '?' || c == '!' ||
c == '"' || c == '\0' || c == '\'') {
return 0;
}
else return 1;
}
I tested shift_to_right, replace_word and isDelimitator and work fine. But the final function, operation_3 doesn't work as expected. For example, for input: "Hi I am John" and for k = 1 the output is : "Hi I am John". Basically operation_3 doesn't modify the string. Any advice, corrections please?
There are a few things which I see are possibly the reason for error.
1) In operation_3 you do this : new_word = shift_to_right(word); And, in the definition of char *shift_to_right(char *string) you modify the string itself and return a pointer to it. So, if you called shift_to_right(word) and word = "Hi" then after the execution of shift_to_right both word and new_word are now pointing to the same string "iH", so in replace_word when you pass both the words and check for the substring word you will always get NULL, because, there is no substring "iH".
A possible solution, in shift_to_right add a statement,
char *new_string = strdup(string);
and instead of swapping the characters in string, swap the characters now in new_string and return the new_string from the function.
Your code shall look like this ::
char *shift_to_right(char *string){
char temp;
int len = strlen(string) - 1;
char *new_string = strdup(string);
int i;
for (i = len - 1; i >= 0; i--) {
temp = new_string[i+1];
new_string[i+1] = new_string[i];
new_string[i] = temp;
}
return new_string;
}
2) In the function replace_word, for a moment let us consider that the above mentioned error does not occur and replace_word get called with the parameters :: replace_word(string, "Hi", "iH");.
So, when you perform found = strstr(string, word), it gives you a pointer to the first letter where Hi started. So, in this case, if your string was "Hi I am John", then you get a pointer to the first H, and when you perform strlen(found) you will get 12(length of string left starting from the pointer) as the output, and strlen(word) will always be less (unless found points to the last word in the string), so in most cases your if condition becomes true and you break from the loop, without any swapping.
Moreover, as you yourself pointed out in the comments that strstr will return Johns as well if you want a substring John the only solution for this would be to run a loop and check that in string after John if there is delimiter character or not, if there is no delimiter character, then this is not the substring that you needed.
replace_word shall look something like this ::
void replace_word(char *string, char *word, char *new_word) {
char *found = strstr(string, word);
int len = strlen(word);
while(found) {
char temp = *(found + len);
if(isDelimeter(temp) == 0) {
break;
} else {
found = strstr(found + len + 1);
}
}
if(found != NULL) {
for(int i = 0; i < len; i++) {
*(found + i) = new_word[i]; // *(found + i) is accessing the i^th, character in string from the pointer found
}
}
}
I think this replace_word shall work, you can directly modify the string, and there is no need to actually make a temp string and return it. This reduces the need of allocating new memory and saving that pointer.
I hope this could help!
EDIT :: Since we have been using strdup in the code, which dynamically allocates memory of the size of the string with an extra block for the \0 character, we shall take care of freeing it explicitly, so it will be a good idea according to me free the allocated memory in replace_word just before we exit the function since the new_word is useless after it.
Moreover, I saw a statement in your code::
1) char *new_word = (char *)malloc(len * sizeof(char));
Just before you start the shifting the words, I hope you understand that you do not need to do it. new_word is just a pointer, and since we now allocated memory to it in strdup we do not need to do it. Even before, considering the code that you had written there was no reason to allocate memory to new_word since you were returning the address of the array, which was already in the stack, and would stay in the stack till the end of the execution of the program.
This code is simpler than what you have, and it prints all the word delimiters that were in the input string. And rather than looking for specific punctuation characters, it checks alphanumeric instead.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main(void)
{
char instr[] = "Hi! I am 'John' ;)";
int lennin = strlen(instr);
int shifts, i, len, index, start, next;
printf("Working with %s\n", instr);
for(shifts=0; shifts<5; shifts++) { // various examples
printf("Shifts = %d ", shifts);
start = 0;
while(start < lennin) {
while (start < lennin && !isalnum(instr[start])) { // find next alphanum
printf("%c", instr[start]); // output non-alphanum
start++;
}
next = start + 1;
while (isalnum(instr[next])) // find next non-alphanum
next++;
len = next - start;
for(i=0; i<len; i++) { // shift the substring
index = i - shifts;
while(index < 0) index += len; // get index in range
printf("%c", instr[start + (index % len)]); // ditto
}
start = next; // next substring
}
printf("\n");
}
return 0;
}
Program output:
Working with Hi! I am 'John' ;)
Shifts = 0 Hi! I am 'John' ;)
Shifts = 1 iH! I ma 'nJoh' ;)
Shifts = 2 Hi! I am 'hnJo' ;)
Shifts = 3 iH! I ma 'ohnJ' ;)
Shifts = 4 Hi! I am 'John' ;)