Split string by a substring - c

I have following string:
char str[] = "A/USING=B)";
I want to split to get separate A and B values with /USING= as a delimiter
How can I do it? I known strtok() but it just split by one character as delimiter.

As others have pointed out, you can use strstr from <string.h> to find the delimiter in your string. Then either copy the substrings or modify the input string to split it.
Here's an implementation that returns the second part of a split string. If the string can't be split, it returns NULL and the original string is unchanged. If you need to split the string into more substrings, you can call the function on the tail repeatedly. The first part will be the input string, possibly shortened.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *split(char *str, const char *delim)
{
char *p = strstr(str, delim);
if (p == NULL) return NULL; // delimiter not found
*p = '\0'; // terminate string after head
return p + strlen(delim); // return tail substring
}
int main(void)
{
char str[] = "A/USING=B";
char *tail;
tail = split(str, "/USING=");
if (tail) {
printf("head: '%s'\n", str);
printf("tail: '%s'\n", tail);
}
return 0;
}

I known strtok() but it just split by one character as delimiter
Nopes, it's not.
As per the man page for strtok(), (emphasis mine)
char *strtok(char *str, const char *delim);
[...] The delim argument specifies a set of bytes that delimit the tokens in the parsed string. [...] A sequence of two or more contiguous delimiter bytes in the parsed string is considered to be a single delimiter. [...]
So, it need not be "one character" as you've mentioned. You can using a string, like in your case "/USING=" as the delimiter to get the job done.

Here is a little function to do this. It works exactly like strtok_r except that the delimiter is taken as a delimiting string, not a list of delimiting characters.
char *strtokstr_r(char *s, char *delim, char **save_ptr)
{
char *end;
if (s == NULL)
s = *save_ptr;
if (s == NULL || *s == '\0')
{
*save_ptr = s;
return NULL;
}
// Skip leading delimiters.
while (strstr(s,delim)==s) s+=strlen(delim);
if (*s == '\0')
{
*save_ptr = s;
return NULL;
}
// Find the end of the token.
end = strstr (s, delim);
if (end == NULL)
{
*save_ptr = s + strlen(s);
return s;
}
// Terminate the token and make *SAVE_PTR point past it.
memset(end, 0, strlen(delim));
*save_ptr = end + strlen(delim);
return s;
}

Dude, this answer is only valid if the input is this one, if were "abUcd/USING=efgh" your algorithm doesn't work.
This answer is the only valid for me:
char *split(char *str, const char *delim)
{
char *p = strstr(str, delim);
if (p == NULL) return NULL; // delimiter not found
*p = '\0'; // terminate string after head
return p + strlen(delim); // return tail substring
}
int main(void)
{
char str[] = "A/USING=B";
char *tail;
tail = split(str, "/USING=");
if (tail) {
printf("head: '%s'\n", str);
printf("tail: '%s'\n", tail);
}
return 0;
}

See this. I got this when I searched for your question on google.
In your case it will be:
#include <stdio.h>
#include <string.h>
int main (int argc, char* argv [])
{
char theString [16] = "abcd/USING=efgh";
char theCopy [16];
char *token;
strcpy (theCopy, theString);
token = strtok (theCopy, "/USING=");
while (token)
{
printf ("%s\n", token);
token = strtok (NULL, "/USING=");
}
return 0;
}
This uses /USING= as the delimiter.
The output of this was:
abcd
efgh
If you want to check, you can compile and run it online over here.

Related

How to replace characters by strtok function - C?

I really want to change all spaces ' ' in my char array for NULL -
#include <string.h>
void ReplaceCharactersInString(char *pcString, char *cOldChar, char *cNewChar) {
char *p = strtok(pcString, cOldChar);
strcpy(pcString, p);
while (p != NULL) {
strcat(pcString, p);
p = strtok(cNewChar, cOldChar);
}
}
int main() {
char pcString[] = "I am testing";
ReplaceCharactersInString(pcString, " ", NULL);
printf(pcString);
}
OUTPUT: Iamtesting
If I simply put the printf(p) function before:
p = strtok(cNewChar, cOldChar);
In the result I have what I need - but the problem is how to store it in pcString (directly)?
Or there is maybe a better solution to simply do it?
While some functions expect a [single] string to be pre-parsed to: I\0am\0testing, that is rare.
And, if you have multiple spaces/delimiters, you'll get (e.g.) foo\0\0bar, which you probably don't want.
And, your printf in main will only print the first token in the string because it will stop on the first EOS (i.e. '\0').
(i.e.) You probably don't want strcpy/strcat.
More likely, you want to fill an array of char * pointers to the tokens you parse.
So, you'd want to pass down char **argv, then do: argv[argc++] = strtok(...); and then do: return argc
Here's how I would refactor your code:
#include <stdio.h>
#include <string.h>
#define ARGMAX 100
int
ReplaceCharactersInString(int argmax,char **argv,char *pcString,
const char *delim)
{
char *p;
int argc;
// allow space for NULL termination
--argmax;
for (argc = 0; argc < argmax; ++argc, ++argv) {
// get next token
p = strtok(pcString,delim);
if (p == NULL)
break;
// zap the buffer pointer
pcString = NULL;
// store the token in the [returned] array
*argv = p;
}
*argv = NULL;
return argc;
}
int
main(void)
{
char pcString[] = "I am testing";
int argc;
char **av;
char *argv[ARGMAX];
argc = ReplaceCharactersInString(ARGMAX,argv,pcString," ");
printf("argc: %d\n",argc);
for (av = argv; *av != NULL; ++av)
printf("'%s'\n",*av);
return 0;
}
Here's the output:
argc: 3
'I'
'am'
'testing'
strcat strcpy should not be used when the source and destination overlap in memory.
Iterate through the array and replace the matching character with the desired character.
Since zeros are part of the string, printf will stop at the first zero and strlen can't be used for the length to print. sizeof can be used as pcString is defined in the same scope.
Note that ReplaceCharactersInString would not work a second time as it would stop at the first zero. The function could be written to accept a length parameter and loop using the length.
#include <stdio.h>
#include <stdlib.h>
void ReplaceCharactersInString(char *pcString, char cOldChar,char cNewChar){
while ( pcString && *pcString) {//not NULL and not zero
if ( *pcString == cOldChar) {//match
*pcString = cNewChar;//replace
}
++pcString;//advance to next character
}
}
int main ( void) {
char pcString[] = "I am testing";
ReplaceCharactersInString ( pcString, ' ', '\0');
for ( int each = 0; each < sizeof pcString; ++each) {
printf ( "pcString[%02d] = int:%-4d char:%c\n", each, pcString[each], pcString[each]);
}
return 0;
}
You want to split the string into individual tokens separated by spaces such as "I\0am\0testing\0". You can use strtok() for this but this function is error prone. I suggest you allocate an array of pointers and make them point to the words. Note that splitting the source string is sloppy and does not allow for tokens to be adjacent such as in 1+1. You could allocate the strings instead.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split_string(const char *str, char *delim) {
size_t i, len, count;
const char *p;
/* count tokens */
p = str;
p += strspn(p, delim); // skip initial delimiters
count = 0;
while (*p) {
count++;
p += strcspn(p, delim); // skip token
p += strspn(p, delim); // skip delimiters
}
/* allocate token array */
char **array = calloc(sizeof(*array, count + 1);
p = str;
p += strspn(p, delim); // skip initial delimiters
for (i = 0; i < count; i++) {
len = strcspn(p, delim); // token length
array[i] = strndup(p, len); // allocate a copy of the token
p += len; // skip token
p += strspn(p, delim); // skip delimiters
}
/* array ends with a null pointer */
array[count] = NULL;
return array;
}
int main() {
const char *pcString = "I am testing";
char **array = split_string(pcString, " \t\r\n");
for (size_t i = 0; array[i] != NULL; i++) {
printf("%zu: %s\n", i, array[i]);
}
return 0;
}
The strtok function pretty much does exactly what you want. It basically replaces the next delimiter with a '\0' character and returns the pointer to the current token. The next time you call strtok, you should pass a NULL argument (see the documentation for strtok) and it will point to the next token, which will again be delimited by '\0'. Read some more examples of correct strtok usage.

String and String array Manipulation in c

I'm trying to write a string spliter function in C.It uses space as delimiter to split a given string in two or more. It more like the split funtion in Python.Here is the code:-
#include <stdio.h>
#include <string.h>
void slice_input (char *t,char **out)
{
char *x,temp[10];
int i,j;
x = t;
j=0;
i=0;
for (;*x!='\0';x++){
if (*x!=' '){
temp[i] = *x;
i++;
}else if(*x==' '){
out[j] = temp;
j++;i=0;
}
}
}
int main()
{
char *out[2];
char inp[] = "HEllo World ";
slice_input(inp,out);
printf("%s\n%s",out[0],out[1]);
//printf("%d",strlen(out[1]));
return 0;
}
Expeted Output:-
HEllo
World
but it is showing :-
World
World
Can you help please?
out[j] = temp;
where temp is a local variable. It will go out of scope as soon as your function terminates, thus out[j] will point to garbage, invoking Undefined Behavior when being accessed.
A simple fix would be to use a 2D array for out, and use strcpy() to copy the temp string to out[j], like this:
#include <stdio.h>
#include <string.h>
void slice_input(char *t, char out[2][10]) {
char *x, temp[10];
int i,j;
x = t;
j=0;
i=0;
for (;*x!='\0';x++) {
if (*x!=' ') {
temp[i] = *x;
i++;
} else if(*x==' ') {
strcpy(out[j], temp);
j++;
i=0;
}
}
}
int main()
{
char out[2][10];
char inp[] = "HEllo World ";
slice_input(inp,out);
printf("%s\n%s",out[0],out[1]);
return 0;
}
Output:
HEllo
World
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
From the website:
char * strtok ( char * str, const char * delimiters ); On a first
call, the function expects a C string as argument for str, whose first
character is used as the starting location to scan for tokens. In
subsequent calls, the function expects a null pointer and uses the
position right after the end of last token as the new starting
location for scanning.
Once the terminating null character of str is found in a call to
strtok, all subsequent calls to this function (with a null pointer as
the first argument) return a null pointer.
Parameters
str C string to truncate. Notice that this string is modified by being
broken into smaller strings (tokens). Alternativelly [sic], a null
pointer may be specified, in which case the function continues
scanning where a previous successful call to the function ended.
delimiters C string containing the delimiter characters. These may
vary from one call to another. Return Value
A pointer to the last token found in string. A null pointer is
returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
You can use this function to split string into tokens - there is no need to use some own functions. Your code looks like garbage, please format it.
Your source propably would look like this:
char *
strtok(s, delim)
char *s; /* string to search for tokens */
const char *delim; /* delimiting characters */
{
static char *lasts;
register int ch;
if (s == 0)
s = lasts;
do {
if ((ch = *s++) == '\0')
return 0;
} while (strchr(delim, ch));
--s;
lasts = s + strcspn(s, delim);
if (*lasts != 0)
*lasts++ = 0;
return s;
}

Substrings in the middle of a String in C

I need to extract substrings that are between Strings I know.
I have something like char string = "abcdefg";
I know what I need is between "c" and "f", then my return should be "de".
I know the strncpy() function but do not know how to apply it in the middle of a string.
Thank you.
Here's a full, working example:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[] = "abcdefg";
char from[] = "c";
char to[] = "f";
char *first = strstr(string, from);
if (first == NULL) {
first = &string[0];
} else {
first += strlen(from);
}
char *last = strstr(first, to);
if (last == NULL) {
last = &string[strlen(string)];
}
char *sub = calloc(strlen(string) + 1, sizeof(char));
strncpy(sub, first, last - first);
printf("%s\n", sub);
free(sub);
return 0;
}
You can check it at this ideone.
Now, the explanation:
1.
char string[] = "abcdefg";
char from[] = "c";
char to[] = "f";
Declarations of strings: main string to be checked, beginning delimiter, ending delimiter. Note these are arrays as well, so from and to could be, for example, cd and fg, respectively.
2.
char *first = strstr(string, from);
Find occurence of the beginning delimiter in the main string. Note that it finds the first occurence - if you need to find the last one (for example, if you had the string abcabc, and you wanted a substring from the second a), it might need to be different.
3.
if (first == NULL) {
first = &string[0];
} else {
first += strlen(from);
}
Handle situation, in which the first delimiter doesn't appear in the string. In such a case, we will make a substring from the beginning of the entire string. If it does appear, however, we move the pointer by length of from string, as we need to extract the substring beginning after the first delimiter (correction thanks to #dau_sama).
Depending on your specifications, this may or may not be needed, or another result might be expected.
4.
char *last = strstr(first, to);
Find occurence of the ending delimiter in the main string. Note that it finds the first occurence.
As noted by #dau_sama, it's better to search for ending delimiter from the first, not from beginning of the entire string. This prevents situations, in which to would appear earlier than from.
5.
if (last == NULL) {
last = &string[strlen(string)];
}
Handle situation, in which the second delimiter doesn't appear in the string. In such a case, we will make a substring until end of the string, so we get a pointer to the last character.
Again, depending on your specifications, this may or may not be needed, or another result might be expected.
6.
char *sub = calloc(last - first + 1, sizeof(char));
strncpy(sub, first, last - first);
Allocate sufficient memory and extract substring based on pointers found earlier. We copy last - first (length of the substring) characters beginning from first character.
7.
printf("%s\n", sub);
Here's the result.
I hope it does present the problem with enough details. Depending on your exact specifications, you may need to alter this somehow. For example, if you needed to find all substrings, and not just the first one, you may want to make a loop for finding first and last.
TY guys, worked using the form below:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *between_substring(char *str, char from, char to){
while(*str && *str != from)
++str;//skip
if(*str == '\0')
return NULL;
else
++str;
char *ret = malloc(strlen(str)+1);
char *p = ret;
while(*str && *str != to){
*p++ = *str++;//To the end if `to` do not exist
}
*p = 0;
return ret;
}
int main (void){
char source[] = "abcdefg";
char *target;
target = between(source, 'c', 'f');
printf("%s", source);
printf("%s", target);
return 0;
}
Since people seemed to not understand my approach in the comments, here's a quick hacked together stub.
const char* string = "abcdefg";
const char* b = "c";
const char* e = "f";
//look for the first pattern
const char* begin = strstr(string, b);
if(!begin)
return NULL;
//look for the end pattern
const char* end = strstr(begin, e);
if(!end)
return NULL;
end -= strlen(e);
char result[MAXLENGTH];
strncpy(result, begin, end-begin);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *between(const char *str, char from, char to){
while(*str && *str != from)
++str;//skip
if(*str == '\0')
return NULL;
else
++str;
char *ret = malloc(strlen(str)+1);
char *p = ret;
while(*str && *str != to){
*p++ = *str++;//To the end if `to` do not exist
}
*p = 0;
return ret;
}
int main(void){
const char* string = "abcdefg";
char *substr = between(string, 'c', 'f');
if(substr!=NULL){
puts(substr);
free(substr);
}
return 0;
}

Print delim used by strtok_r

I have this text for example:
I know,, more.- today, than yesterday!
And I'm extracting words with this code:
while(getline(&line, &len, fpSourceFile) > 0) {
last_word = NULL;
word = strtok_r(line, delim, &last_word);
while(word){
printf("%s ", word);
word = strtok_r(NULL, delim, &last_word);
// delim_used = ;
}
}
The output is:
I know more today than yesterday
But there is any way to get the delimiter used by strtok_r()? I want to replace same words by one integer, and do the same with delimiters. I can get one word with strtok_r(), but how get the delimiter used by that function?
Fortunately, strtok_r() is a pretty simple function - it's easy to create your own variant that does what you need:
#include <string.h>
/*
* public domain strtok_ex() based on a public domain
* strtok_r() by Charlie Gordon
*
* strtok_r from comp.lang.c 9/14/2007
*
* http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
*
* (Declaration that it's public domain):
* http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
*/
/*
strtok_ex() is an extended version of strtok_r() that optinally
returns the delimited that was used to terminate the token
the first 3 parameters are the same as for strtok_r(), the last
parameter:
char* delim_found
is an optional pointer to a character that will get the value of
the delimiter that was found to terminate the token.
*/
char* strtok_ex(
char *str,
const char *delim,
char **nextp,
char* delim_found)
{
char *ret;
char tmp;
if (!delim_found) delim_found = &tmp;
if (str == NULL)
{
str = *nextp;
}
str += strspn(str, delim);
if (*str == '\0')
{
*delim_found = '\0';
return NULL;
}
ret = str;
str += strcspn(str, delim);
*delim_found = *str;
if (*str)
{
*str++ = '\0';
}
*nextp = str;
return ret;
}
#include <stdio.h>
int main(void)
{
char delim[] = " ,.-!";
char line[] = "I know,, more.- today, than yesterday!";
char delim_used;
char* last_word = NULL;
char* word = strtok_ex(line, delim, &last_word, &delim_used);
while (word) {
printf("word: \"%s\" \tdelim: \'%c\'\n", word, delim_used);
word = strtok_ex(NULL, delim, &last_word, &delim_used);
}
return 0;
}
Getting any skipped delimiters would be a bit more work. I don't think it would be a lot of work, but I do think the interface would be unwieldy (strtok_ex()'s interface is already clunky), so you'd have to put some thought into that.
No, you cannot identify the delimiter (by means of the call to strtok_r() itself).
From man strtok_r:
BUGS
[...]
The identity of the delimiting byte is lost.

How to safety parse tab-delimited string ?

How to safety parse tab-delimiter string ? for example:
test\tbla-bla-bla\t2332 ?
strtok() is a standard function for parsing strings with arbitrary delimiters. It is, however, not thread-safe. Your C library of choice might have a thread-safe variant.
Another standard-compliant way (just wrote this up, it is not tested):
#include <string.h>
#include <stdio.h>
int main()
{
char string[] = "foo\tbar\tbaz";
char * start = string;
char * end;
while ( ( end = strchr( start, '\t' ) ) != NULL )
{
// %s prints a number of characters, * takes number from stack
// (your token is not zero-terminated!)
printf( "%.*s\n", end - start, start );
start = end + 1;
}
// start points to last token, zero-terminated
printf( "%s", start );
return 0;
}
Use strtok_r instead of strtok (if it is available). It has similar usage, except it is reentrant, and it does not modify the string like strtok does. [Edit: Actually, I misspoke. As Christoph points out, strtok_r does replace the delimiters by '\0'. So, you should operate on a copy of the string if you want to preserve the original string. But it is preferable to strtok because it is reentrant and thread safe]
strtok will leave your original string modified. It replaces the delimiter with '\0'. And if your string happens to be a constant, stored in a read only memory (some compilers will do that), you may actually get a access violation.
Using strtok() from string.h.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] = "test\tbla-bla-bla\t2332";
char * pch;
pch = strtok (str," \t");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " \t");
}
return 0;
}
You can use any regex library or even the GLib GScanner, see here and here for more information.
Yet another version; this one separates the logic into a new function
#include <stdio.h>
static _Bool next_token(const char **start, const char **end)
{
if(!*end) *end = *start; // first call
else if(!**end) // check for terminating zero
return 0;
else *start = ++*end; // skip tab
// advance to terminating zero or next tab
while(**end && **end != '\t')
++*end;
return 1;
}
int main(void)
{
const char *string = "foo\tbar\tbaz";
const char *start = string;
const char *end = NULL; // NULL value indicates first call
while(next_token(&start, &end))
{
// print substring [start,end[
printf("%.*s\n", end - start, start);
}
return 0;
}
If you need a binary safe way to tokenize a given string:
#include <string.h>
#include <stdio.h>
void tokenize(const char *str, const char delim, const size_t size)
{
const char *start = str, *next;
const char *end = str + size;
while (start < end) {
if ((next = memchr(start, delim, end - start)) == NULL) {
next = end;
}
printf("%.*s\n", next - start, start);
start = next + 1;
}
}
int main(void)
{
char str[] = "test\tbla-bla-bla\t2332";
int len = strlen(str);
tokenize(str, '\t', len);
return 0;
}

Resources