Split and Join strings in C Language - c

I learnt C in uni but haven't used it for quite a few years. Recently I started working on a tool which uses C as the programming language. Now I'm stuck with some really basic functions. Among them are how to split and join strings using a delimiter? (I miss Python so much, even Java or C#!)
Below is the function I created to split a string, but it does not seem to work properly. Also, even this function works, the delimiter can only be a single character. How can I use a string as a delimiter?
Can someone please provide some help?
Ideally, I would like to have 2 functions:
// Split a string into a string array
char** fSplitStr(char *str, const char *delimiter);
// Join the elements of a string array to a single string
char* fJoinStr(char **str, const char *delimiter);
Thank you,
Allen
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
char** fSplitStr(char *str, const char *delimiters)
{
char * token;
char **tokenArray;
int count=0;
token = (char *)strtok(str, delimiters); // Get the first token
tokenArray = (char**)malloc(1 * sizeof(char*));
if (!token) {
return tokenArray;
}
while (token != NULL ) { // While valid tokens are returned
tokenArray[count] = (char*)malloc(sizeof(token));
tokenArray[count] = token;
printf ("%s", tokenArray[count]);
count++;
tokenArray = (char **)realloc(tokenArray, sizeof(char *) * count);
token = (char *)strtok(NULL, delimiters); // Get the next token
}
return tokenArray;
}
int main (void)
{
char str[] = "Split_The_String";
char ** splitArray = fSplitStr(str,"_");
printf ("%s", splitArray[0]);
printf ("%s", splitArray[1]);
printf ("%s", splitArray[2]);
return 0;
}
Answers: (Thanks to Moshbear, Joachim and sarnold):
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
char** fStrSplit(char *str, const char *delimiters)
{
char * token;
char **tokenArray;
int count=0;
token = (char *)strtok(str, delimiters); // Get the first token
tokenArray = (char**)malloc(1 * sizeof(char*));
tokenArray[0] = NULL;
if (!token) {
return tokenArray;
}
while (token != NULL) { // While valid tokens are returned
tokenArray[count] = (char*)strdup(token);
//printf ("%s", tokenArray[count]);
count++;
tokenArray = (char **)realloc(tokenArray, sizeof(char *) * (count + 1));
token = (char *)strtok(NULL, delimiters); // Get the next token
}
tokenArray[count] = NULL; /* Terminate the array */
return tokenArray;
}
char* fStrJoin(char **str, const char *delimiters)
{
char *joinedStr;
int i = 1;
joinedStr = realloc(NULL, strlen(str[0])+1);
strcpy(joinedStr, str[0]);
if (str[0] == NULL){
return joinedStr;
}
while (str[i] !=NULL){
joinedStr = (char*)realloc(joinedStr, strlen(joinedStr) + strlen(str[i]) + strlen(delimiters) + 1);
strcat(joinedStr, delimiters);
strcat(joinedStr, str[i]);
i++;
}
return joinedStr;
}
int main (void)
{
char str[] = "Split_The_String";
char ** splitArray = (char **)fStrSplit(str,"_");
char * joinedStr;
int i=0;
while (splitArray[i]!=NULL) {
printf ("%s", splitArray[i]);
i++;
}
joinedStr = fStrJoin(splitArray, "-");
printf ("%s", joinedStr);
return 0;
}

Use strpbrk instead of strtok, because strtok suffers from two weaknesses:
it's not re-entrant (i.e. thread-safe)
it modifies the string
For joining, use strncat for joining, and realloc for resizing.
The order of operations is very important.
Before doing the realloc;strncat loop, set the 0th element of the target string to '\0' so that strncat won't cause undefined behavior.

For starters, don't use sizeof to get the length of a string. strlen is the function to use. In this case strdup is better.
And you don't actually copy the string returned by strtok, you copy the pointer. Change you loop to this:
while (token != NULL) { // While valid tokens are returned
tokenArray[count] = strdup(token);
printf ("%s", tokenArray[count]);
count++;
tokenArray = (char **)realloc(tokenArray, sizeof(char *) * count);
token = (char *)strtok(NULL, delimiters); // Get the next token
}
tokenArray[count] = NULL; /* Terminate the array */
Also, don't forget to free the entries in the array, and the array itself when you're done with it.
Edit At the beginning of fSplitStr, wait with allocating the tokenArray until after you check that token is not NULL, and if token is NULL why not return NULL?

I'm not sure the best solution for you, but I do have a few notes:
token = (char *)strtok(str, delimiters); // Get the first token
tokenArray = (char**)malloc(1 * sizeof(char*));
if (!token) {
return tokenArray;
}
At this point, if you weren't able to find any tokens in the string, you return a pointer to an "array" that is large enough to hold a single character pointer. It is un-initialized, so it would not be a good idea to use the contents of this array in any way. C almost never initializes memory to 0x00 for you. (calloc(3) would do that for you, but since you need to overwrite every element anyway, it doesn't seem worth switching to calloc(3).)
Also, the (char **) case before the malloc(3) call indicates to me that you've probably forgotten the #include <stdlib.h> that would properly prototype malloc(3). (The cast was necessary before about 1989.)
Do note that your while() { } loop is setting pointers to the parts of the original input string to your tokenArray elements. (This is one of the cons that moshbear mentioned in his answer -- though it isn't always a weakness.) If you change tokenArray[1][1]='H', then your original input string also changes. (In addition to having each of the delimiter characters replaced with an ASCII NUL character.)

Related

Split string without using strtok() in C

I am trying to split a sentence into an array of the individual words.
I have recently realized that I cannot use strtok() in this function and am now looking for an alternative way to implement this function without using strtok(). Does anyone know how I can go about this?
Edit:
I need to return a pointer to an array of pointer pointing to the individual words in the sentence.
Copied from my answer to a now closed question at https://stackoverflow.com/a/63866151/13422
You would look through the list of functions provided by the C <string.h> header file in the C Standard Library and you'd find a lot of options.
I wrote something just for fun:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct token {
const char *str;
size_t len;
};
struct token next_token(const char *str, const char *sep) {
struct token tok;
/* skip leading separator characters */
str += strspn(str, sep);
tok.str = str;
tok.len = strcspn(str, sep);
return tok;
}
struct token *get_tokens(const char *str, const char *sep) {
size_t len = 0, cap = 8;
struct token *arr = malloc(cap * sizeof *arr);
for (struct token tok = next_token(str, sep); tok.len;
tok = next_token(tok.str + tok.len, sep)) {
arr[len] = tok;
++len;
if (len == cap) {
cap *= 2;
arr = realloc(arr, cap * sizeof *arr);
}
}
arr[len].str = NULL;
arr[len].len = 0;
return arr;
}
int main(int argc, char *argv[]) {
if (argc < 2)
exit(EXIT_FAILURE);
puts("Token array");
struct token *token_arr = get_tokens(argv[1], " \t\n");
for (size_t i = 0; token_arr[i].str; ++i) {
printf("\"%.*s\" ", (int)token_arr[i].len, token_arr[i].str);
}
putchar('\n');
free(token_arr);
puts("Next token loop");
for (struct token tok = next_token(argv[1], " \t\n"); tok.len;
tok = next_token(tok.str + tok.len, " \t\n")) {
printf("\"%.*s\" ", (int)tok.len, tok.str);
}
putchar('\n');
return EXIT_SUCCESS;
}
Use strchr() instead of strtok()
UPDATE
The OP needs to clarify what exactly he wants to get from this splitString function?
It returns an array of pointers to separated words. That's fine if he used strtok, as it would terminate each word with 0.
If those 0's are not inserted, the resulting array, while still pointing to the beginning of each word, would have no indication of their lengths. That should be handled by the user of this array.

How to replace characters by strtok function - C?

I really want to change all spaces ' ' in my char array for NULL -
#include <string.h>
void ReplaceCharactersInString(char *pcString, char *cOldChar, char *cNewChar) {
char *p = strtok(pcString, cOldChar);
strcpy(pcString, p);
while (p != NULL) {
strcat(pcString, p);
p = strtok(cNewChar, cOldChar);
}
}
int main() {
char pcString[] = "I am testing";
ReplaceCharactersInString(pcString, " ", NULL);
printf(pcString);
}
OUTPUT: Iamtesting
If I simply put the printf(p) function before:
p = strtok(cNewChar, cOldChar);
In the result I have what I need - but the problem is how to store it in pcString (directly)?
Or there is maybe a better solution to simply do it?
While some functions expect a [single] string to be pre-parsed to: I\0am\0testing, that is rare.
And, if you have multiple spaces/delimiters, you'll get (e.g.) foo\0\0bar, which you probably don't want.
And, your printf in main will only print the first token in the string because it will stop on the first EOS (i.e. '\0').
(i.e.) You probably don't want strcpy/strcat.
More likely, you want to fill an array of char * pointers to the tokens you parse.
So, you'd want to pass down char **argv, then do: argv[argc++] = strtok(...); and then do: return argc
Here's how I would refactor your code:
#include <stdio.h>
#include <string.h>
#define ARGMAX 100
int
ReplaceCharactersInString(int argmax,char **argv,char *pcString,
const char *delim)
{
char *p;
int argc;
// allow space for NULL termination
--argmax;
for (argc = 0; argc < argmax; ++argc, ++argv) {
// get next token
p = strtok(pcString,delim);
if (p == NULL)
break;
// zap the buffer pointer
pcString = NULL;
// store the token in the [returned] array
*argv = p;
}
*argv = NULL;
return argc;
}
int
main(void)
{
char pcString[] = "I am testing";
int argc;
char **av;
char *argv[ARGMAX];
argc = ReplaceCharactersInString(ARGMAX,argv,pcString," ");
printf("argc: %d\n",argc);
for (av = argv; *av != NULL; ++av)
printf("'%s'\n",*av);
return 0;
}
Here's the output:
argc: 3
'I'
'am'
'testing'
strcat strcpy should not be used when the source and destination overlap in memory.
Iterate through the array and replace the matching character with the desired character.
Since zeros are part of the string, printf will stop at the first zero and strlen can't be used for the length to print. sizeof can be used as pcString is defined in the same scope.
Note that ReplaceCharactersInString would not work a second time as it would stop at the first zero. The function could be written to accept a length parameter and loop using the length.
#include <stdio.h>
#include <stdlib.h>
void ReplaceCharactersInString(char *pcString, char cOldChar,char cNewChar){
while ( pcString && *pcString) {//not NULL and not zero
if ( *pcString == cOldChar) {//match
*pcString = cNewChar;//replace
}
++pcString;//advance to next character
}
}
int main ( void) {
char pcString[] = "I am testing";
ReplaceCharactersInString ( pcString, ' ', '\0');
for ( int each = 0; each < sizeof pcString; ++each) {
printf ( "pcString[%02d] = int:%-4d char:%c\n", each, pcString[each], pcString[each]);
}
return 0;
}
You want to split the string into individual tokens separated by spaces such as "I\0am\0testing\0". You can use strtok() for this but this function is error prone. I suggest you allocate an array of pointers and make them point to the words. Note that splitting the source string is sloppy and does not allow for tokens to be adjacent such as in 1+1. You could allocate the strings instead.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split_string(const char *str, char *delim) {
size_t i, len, count;
const char *p;
/* count tokens */
p = str;
p += strspn(p, delim); // skip initial delimiters
count = 0;
while (*p) {
count++;
p += strcspn(p, delim); // skip token
p += strspn(p, delim); // skip delimiters
}
/* allocate token array */
char **array = calloc(sizeof(*array, count + 1);
p = str;
p += strspn(p, delim); // skip initial delimiters
for (i = 0; i < count; i++) {
len = strcspn(p, delim); // token length
array[i] = strndup(p, len); // allocate a copy of the token
p += len; // skip token
p += strspn(p, delim); // skip delimiters
}
/* array ends with a null pointer */
array[count] = NULL;
return array;
}
int main() {
const char *pcString = "I am testing";
char **array = split_string(pcString, " \t\r\n");
for (size_t i = 0; array[i] != NULL; i++) {
printf("%zu: %s\n", i, array[i]);
}
return 0;
}
The strtok function pretty much does exactly what you want. It basically replaces the next delimiter with a '\0' character and returns the pointer to the current token. The next time you call strtok, you should pass a NULL argument (see the documentation for strtok) and it will point to the next token, which will again be delimited by '\0'. Read some more examples of correct strtok usage.

Cut string at a certain character

Im trying to make a function which cuts and prints/does something with parts of a string.
Say i have this string:
"strings.no.header"
And i would like to split it up so it prints something like this:
strings
no
header
Here is my lousy attempt, where i am first able to (without the else statement) print out "strings". My idea was to make the function recursive so it prints and removes the first part before ".", and then does the same with the rest of the string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void cut(char string[]);
int main() {
char string[] = "strings.no.header";
cut(string);
}
void cut(char string[]) {
char *toString = malloc(sizeof(char));
int len = strlen(string);
for(int i = 0; i < len; i++) {
int count = 0;
if(string[i] != '.') {
toString[i] = string[i];
} else if(string[i] == '.') {
char *tmp = malloc(sizeof(char));
tmp = string + i;
cut(tmp);
}
}
printf("%s", toString);
}
Would be grateful if someone could point me in the right direction.
I have an idea:
check this web page: https://en.wikibooks.org/wiki/C_Programming/Strings
use strchr to find '.'then, once you know the position, copy the characters in other string, remember that you already know where the '.' appears, so you know exactly what to copy.
Strtok is your friend. No need to reinvent the wheel.
void cut (const char* str){
char * pch;
pch = strtok (str,".");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, ".");
}
}
The code is just a quick edit of the sample from the link.
A solution with strtok:
#include <stdio.h>
#include <string.h>
void cut(char string[]);
int main()
{
char string[] = "strings.no.header";
cut(string);
}
void cut(char string[])
{
char *pch;
pch = strtok (string, ".");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, ".");
}
}
As others have noted, for something like this, strtok will give you what you need. A word of warning, this function will wreck your string, so it is always advisable to make a copy first.
Basically, what strtok does is replace all characters in the string that match the second argument of strtok and replace them with the '\0' character. The function then returns the pointer to the first "token". Each subsequent call to strtok will return the pointer to the next token. Here is a basic tutorial.
To keep it simple, I kept all the code in the main. You can now use this technique to implement the logic that suits your particular problem.
int main()
{
char string[] = "strings.no.header";
char buffer[50] = { 0 };//make sure big enough
char* token = NULL;
char* nextToken = NULL;
//want to make a copy because strtok will wreck the string
strcpy(buffer, string);
token = strtok_s(buffer, ".", &nextToken);
while (token)
{
printf("%s\n", token);//or do whatever you like.
token = strtok_s(NULL, ".", &nextToken);//pass NULL on subsequent calls
}
getchar();
return 0;
}
You may find this interesting that:
After the first call of token = strtok_s(buffer, ".", &nextToken);,
buffer now contains: "strings\0no.header".
After the second call,
buffer now contains: "strings\0no\0header".
This is the reason why you want to make a copy of the original string before using strtok. If you do not care about the original string, it is ok to use it.

String and String array Manipulation in c

I'm trying to write a string spliter function in C.It uses space as delimiter to split a given string in two or more. It more like the split funtion in Python.Here is the code:-
#include <stdio.h>
#include <string.h>
void slice_input (char *t,char **out)
{
char *x,temp[10];
int i,j;
x = t;
j=0;
i=0;
for (;*x!='\0';x++){
if (*x!=' '){
temp[i] = *x;
i++;
}else if(*x==' '){
out[j] = temp;
j++;i=0;
}
}
}
int main()
{
char *out[2];
char inp[] = "HEllo World ";
slice_input(inp,out);
printf("%s\n%s",out[0],out[1]);
//printf("%d",strlen(out[1]));
return 0;
}
Expeted Output:-
HEllo
World
but it is showing :-
World
World
Can you help please?
out[j] = temp;
where temp is a local variable. It will go out of scope as soon as your function terminates, thus out[j] will point to garbage, invoking Undefined Behavior when being accessed.
A simple fix would be to use a 2D array for out, and use strcpy() to copy the temp string to out[j], like this:
#include <stdio.h>
#include <string.h>
void slice_input(char *t, char out[2][10]) {
char *x, temp[10];
int i,j;
x = t;
j=0;
i=0;
for (;*x!='\0';x++) {
if (*x!=' ') {
temp[i] = *x;
i++;
} else if(*x==' ') {
strcpy(out[j], temp);
j++;
i=0;
}
}
}
int main()
{
char out[2][10];
char inp[] = "HEllo World ";
slice_input(inp,out);
printf("%s\n%s",out[0],out[1]);
return 0;
}
Output:
HEllo
World
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
From the website:
char * strtok ( char * str, const char * delimiters ); On a first
call, the function expects a C string as argument for str, whose first
character is used as the starting location to scan for tokens. In
subsequent calls, the function expects a null pointer and uses the
position right after the end of last token as the new starting
location for scanning.
Once the terminating null character of str is found in a call to
strtok, all subsequent calls to this function (with a null pointer as
the first argument) return a null pointer.
Parameters
str C string to truncate. Notice that this string is modified by being
broken into smaller strings (tokens). Alternativelly [sic], a null
pointer may be specified, in which case the function continues
scanning where a previous successful call to the function ended.
delimiters C string containing the delimiter characters. These may
vary from one call to another. Return Value
A pointer to the last token found in string. A null pointer is
returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
You can use this function to split string into tokens - there is no need to use some own functions. Your code looks like garbage, please format it.
Your source propably would look like this:
char *
strtok(s, delim)
char *s; /* string to search for tokens */
const char *delim; /* delimiting characters */
{
static char *lasts;
register int ch;
if (s == 0)
s = lasts;
do {
if ((ch = *s++) == '\0')
return 0;
} while (strchr(delim, ch));
--s;
lasts = s + strcspn(s, delim);
if (*lasts != 0)
*lasts++ = 0;
return s;
}

Do I need to free the strtok resulting string?

Or rather, how does strtok produce the string to which it's return value points? Does it allocate memory dynamically? I am asking because I am not sure if I need to free the token in the following code:
The STANDARD_INPUT variables is for exit procedure in case I run out of memory for allocation and the string is the tested subject.
int ValidTotal(STANDARD_INPUT, char *str)
{
char *cutout = NULL, *temp, delim = '#';
int i = 0; //Checks the number of ladders in a string, 3 is the required number
temp = (char*)calloc(strlen(str),sizeof(char));
if(NULL == temp)
Pexit(STANDARD_C); //Exit function, frees the memory given in STANDARD_INPUT(STANDARD_C is defined as the names given in STANDARD_INPUT)
strcpy(temp,str);//Do not want to touch the actual string, so copying it
cutout = strtok(temp,&delim);//Here is the lynchpin -
while(NULL != cutout)
{
if(cutout[strlen(cutout) - 1] == '_')
cutout[strlen(cutout) - 1] = '\0'; \\cutout the _ at the end of a token
if(Valid(cutout,i++) == INVALID) //Checks validity for substring, INVALID is -1
return INVALID;
cutout = strtok(NULL,&delim);
strcpy(cutout,cutout + 1); //cutout the _ at the beginning of a token
}
free(temp);
return VALID; // VALID is 1
}
strtok manipulates the string you pass in and returns a pointer to it,
so no memory is allocated.
Please consider using strsep or at least strtok_r to save you some headaches later.
The first parameter to the strtok(...) function is YOUR string:
str
C string to truncate. Notice that this string is modified by
being broken into smaller strings (tokens). Alternativelly, a null
pointer may be specified, in which case the function continues
scanning where a previous successful call to the function ended.
It puts '\0' characters into YOUR string and returns them as terminated strings. Yes, it mangles your original string. If you need it later, make a copy.
Further, it should not be a constant string (e.g. char* myStr = "constant string";). See here.
It could be allocated locally or by malloc/calloc.
If you allocated it locally on the stack (e.g. char myStr[100];), you don't have to free it.
If you allocated it by malloc (e.g. char* myStr = malloc(100*sizeof(char));), you need to free it.
Some example code:
#include <string.h>
#include <stdio.h>
int main()
{
const char str[80] = "This is an example string.";
const char s[2] = " ";
char *token;
/* get the first token */
token = strtok(str, s);
/* walk through other tokens */
while( token != NULL )
{
printf( " %s\n", token );
token = strtok(NULL, s);
}
return(0);
}
NOTE: This example shows how you iterate through the string...since your original string was mangled, strtok(...) remembers where you were last time and keeps working through the string.
According to the docs:
Return Value
A pointer to the last token found in string.
Since the return pointer just points to one of the bytes in your input string where the token starts, whether you need to free depends on whether you allocated the input string or not.
As others mentioned, strtok uses its first parameter, your input string, as the memory buffer. It doesn't allocate anything. It's stateful and non-thread safe; if strtok's first argument is null, it reuses the previously-provided buffer. During a call, strtok destroys the string, adding nulls into it and returning pointers to the tokens.
Here's an example:
#include <stdio.h>
#include <string.h>
int main() {
char s[] = "foo;bar;baz";
char *foo = strtok(s, ";");
char *bar = strtok(NULL, ";");
char *baz = strtok(NULL, ";");
printf("%s %s %s\n", foo, bar, baz); // => foo bar baz
printf("original: %s\n", s); // => original: foo
printf("%ssame memory loc\n", s == foo ? "" : "not "); // => same memory loc
return 0;
}
s started out as foo;bar;baz\0. Three calls to strtok turned it into foo\0bar\0baz\0. s is basically the same as the first chunk, foo.
Valgrind:
==89== HEAP SUMMARY:
==89== in use at exit: 0 bytes in 0 blocks
==89== total heap usage: 1 allocs, 1 frees, 1,024 bytes allocated
==89==
==89== All heap blocks were freed -- no leaks are possible
While the code below doesn't fix all of the problems with strtok, it might help get you moving in a pinch, preserving the original string with strdup:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
const char s[] = "foo;bar_baz";
const char delims[] = ";_";
char *cpy = strdup(s);
char *foo = strtok(cpy, delims);
char *bar = strtok(NULL, delims);
char *baz = strtok(NULL, delims);
printf("%s %s %s\n", foo, bar, baz); // => foo bar baz
printf("original: %s\n", s); // => original: foo;bar_baz
printf("%ssame memory loc\n", s == foo ? "" : "not "); // => not same memory loc
free(cpy);
return 0;
}
Or a more full-fledged example (still not thread safe):
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void must(
bool predicate,
const char *msg,
const char *file,
const unsigned int line
) {
if (!predicate) {
fprintf(stderr, "%s:%d: %s\n", file, line, msg);
exit(1);
}
}
size_t split(
char ***tokens,
const size_t len,
const char *s,
const char *delims
) {
char temp[len+1];
temp[0] = '\0';
strcpy(temp, s);
*tokens = malloc(sizeof(**tokens) * 1);
must(*tokens, "malloc failed", __FILE__, __LINE__);
size_t chunks = 0;
for (;;) {
char *p = strtok(chunks == 0 ? temp : NULL, delims);
if (!p) {
break;
}
size_t sz = sizeof(**tokens) * (chunks + 1);
*tokens = realloc(*tokens, sz);
must(*tokens, "realloc failed", __FILE__, __LINE__);
(*tokens)[chunks++] = strdup(p);
}
return chunks;
}
int main() {
const char s[] = "foo;;bar_baz";
char **tokens;
size_t len = split(&tokens, strlen(s), s, ";_");
for (size_t i = 0; i < len; i++) {
printf("%s ", tokens[i]);
free(tokens[i]);
}
puts("");
free(tokens);
return 0;
}

Resources