I´m tying to split a string into tokens by ';'. But I have a problem that some tokens are empty/null, for example; 123;123132;;;232;232323;;;;1;
So i can´t use strtok becasuse merges adjacents delimiters. I see that you post this solution:
include <string.h>
char *data = "this&&that&other";
char *next;
char *curr = data;
while ((next = strchr(curr, '&')) != NULL) {
/* process curr to next-1 */
curr = next + 1;
}
/* process the remaining string (the last token) */
But I don´t understand because when I do next-1 to get the firts value i only get the firts word of the value not all the whole value.
Can you help me?, do you have any idea how to split this?
I´m programmig in C ansi. I see in another post that exists a strsep function thats seems exactly what i need, but in C ansi library this functions is not included.
Thanks and sorry for my english :)
I think this is want you want :-
#include <stddef.h>
#include <string.h>
#include <stdio.h>
char* mystrsep(char** input, const char* delim)
{
char* result = *input;
char* p;
p = (result != NULL) ? strpbrk(result, delim) : NULL;
if (p == NULL)
*input = NULL;
else
{
*p = '\0';
*input = p + 1;
}
return result;
}
int main()
{
char str[] = "123;123132;;;232;232323;;;;1;";
const char delimiters[] = ";";
char* ptr;
char* token;
ptr = str;
token = mystrsep(&ptr, delimiters);
while(token)
{
printf("%s\n",token);
token = mystrsep(&ptr, delimiters);
}
return 0;
}
#include <stdio.h>
#include <string.h>
char *strtok_r_noskip(char *str, const char *delims, char **store){
char *p, *wk;
if(str != NULL){
*store = str;
}
if(*store == NULL) return NULL;
//*store += strspn(*store, delims);//skip delimiter
if(**store == '\0') return NULL;
p=strpbrk(wk=*store, delims);
if(p != NULL){
*p='\0';
*store = p + 1;
} else {
*store = NULL;
}
return wk;
}
int main(void){
char data1[] = "this&&that&other";
char *store, *token = strtok_r_noskip(data1, "&", &store);
for(; token ; token = strtok_r_noskip(NULL, "&", &store)) {
printf("\"%s\"\n", token);
}
/* output
"this"
""
"that"
"other"
*/
char data2[] = "123;123132;;;232;232323;;;;1;";
token = strtok_r_noskip(data2, ";", &store);
for(; token ; token = strtok_r_noskip(NULL, ";", &store)) {
printf("\"%s\"\n", token);
}
/* output
"123"
"123132"
""
""
"232"
"232323"
""
""
""
"1"
*/
return 0;
}
Related
#include <stdio.h>
#include <string.h>
int main()
{
char buf[50] = "user/local/etc/bin/example.txt";
char* ptr;
ptr = strchr(buf, '/');
char path[20];
strncpy(path, buf, ptr-buf);
path[ptr-buf] =0;
printf("%s\n", path);
return 0;
}
I am able the get the substring before the first occurence of '/' i.e. I can get user but how can I get the substring after the second occurrence of '/' i.e. local and also the last occurence of '/' i.e. example without the extension .txt? How can it be done efficiently
I wouldn't use strchr for this. The problem is that strchr can only find one specific character but you care about both / and . Instead I would iterate the string using a pointer and check for both characters inside the loop..
Something like:
int main()
{
char buf[50] = "user/local/etc/bin/example.txt";
char* pStart = buf;
char* pCurrent = buf;
while(*pCurrent != '\0')
{
if (*pCurrent == '/' || *pCurrent == '.')
{
char str[20] = {0};
strncpy(str, pStart, pCurrent - pStart);
printf("%s\n", str);
pStart = pCurrent+1;
}
++pCurrent;
}
return 0;
}
Output:
user
local
etc
bin
example
If you really want to do this using strchr it could be like:
int main()
{
char buf[50] = "user/local/etc/bin/example.txt";
char* pStart = buf;
char* pCurrent = strchr(pStart, '/');
while(pCurrent != NULL)
{
char str[20] = {0};
strncpy(str, pStart, pCurrent - pStart);
printf("%s\n", str);
pStart = pCurrent+1;
pCurrent = strchr(pStart, '/');
}
pCurrent = strchr(pStart, '.');
if (pCurrent != NULL)
{
char str[20] = {0};
strncpy(str, pStart, pCurrent - pStart);
printf("%s\n", str);
}
return 0;
}
but as you can see, it requires a bit more code than the first example.
Use strtok() in this case For detailed description check this link ! Hope this helps!
Use this:
#include <stdio.h>
#include <string.h>
#define MAXLEN 100
int main(void)
{
char buf[] = "user/local/etc/bin/example.txt";
char substring[MAXLEN+1];
char *a, *b;
int len;
b = buf;
while ( (a = strchr(b, '/')) != NULL || (a = strchr(b, '.')) != NULL)
{
len = a - b;
if (len > MAXLEN)
return 0;
memcpy(substring, b, len);
substring[len] = 0;
printf("'%s'\n", substring);
b = a + 1;
}
return 0;
}
Here is the ouput:
'user'
'local'
'etc'
'bin'
'example'
Suppose my string is: haha "lol"
I want to extract only lol
#include<stdio.h>
int main() {
char a[20]={0};
char *s="haha \"lol\"";
if(sscanf(s,"%*[^\"]'%[^\"]\"",a)==1){
printf("Found stuff inside quotes");
}
}
By applying a proper parser for the source language that you are parsing.
One-liners for parsing input are often fragile and hard get right.
That said, you can try with something like
const char *input = "haha \"lol\"";
char quoted[32];
if(sscanf(input, "%*[^\"]\"%31[^\"]\"", quoted) == 1)
{
printf("got '%s'\n", quoted);
}
This should be hardened but is enough to get you started.
Sometimes a little code goes a long way. All that is need is 2 calls to strchr()
extract_quoted_string() changed to pseudo-code.
const char *extract_quoted_string(const char *s, size_t *sz) {
const char *left = look_for_quote_start_at_s;
if (failure?) {
return NULL;
}
Update_left_to_the_next_possible_position
const char *right = look_for_quote_start_at_updated_left;
if (failure?) {
return NULL;
}
Compute_and_save_size_based_on_left_and_right
return left;
}
Test harness
void test(const char *s) {
printf("<%s> --> ", s);
size_t sz;
const char *extract = extract_quoted_string(s, &sz);
if (extract) {
printf("<%.*s>\n", (int) sz, extract);
} else {
printf("None\n");
}
}
int main() {
test("");
test("123");
test("\"123");
test("123\"");
test("\"123\"");
test("abc\"123");
test("abc\"123\"");
test("123\"xyz");
test("\"123\"xyz");
test("abc\"123\"xyz");
}
Expected output
<> --> None
<123> --> None
<"123> --> None
<123"> --> None
<"123"> --> <123>
<abc"123> --> None
<abc"123"> --> <123>
<123"xyz> --> None
<"123"xyz> --> <123>
<abc"123"xyz> --> <123>
I think that it is enough in the answer by unwind (even if you add code according to requirements)
A question similar to this question already exists.
Split unquoted string in C
When using that approach you can write follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ESCAPE '\\' //ESCAPE CHARACTER
typedef struct token {
const char *top;
const char *end;//point to next character
} Token;
Token getToken(const char **sp, char sep){
const char *s = *sp;
const char *top, *end;
Token token = { NULL, NULL};
while(*s && *s == sep)//skip top separators
++s;
if(!*s){
*sp = s;
return token;
}
token.top = s;
while(*s && *s != sep){
if(*s == ESCAPE)
++s;
else if(*s == '"'){
char *p = strchr(s + 1, '"');//search end '"'
while(p && p[-1] == ESCAPE)
p = strchr(p + 1, '"');
if(p)
s = p;
}
++s;
}
token.end = s;
*sp = s;
return token;
}
char *remove_escape(char *s){
char *from, *to;
from = to = s;
while(*from){
if(*from != ESCAPE)
*to++ = *from;
++from;
}
*to = 0;
return s;
}
char *ft_strsub(Token token){
size_t len = token.end - token.top;
char *sub = malloc(len + 1);//check return value
if (sub){
memcpy(sub, token.top, len);
sub[len] = 0;
}
return sub;
}
int main(int argc, char **argv){
char *str = NULL;
const char *s="haha \"lol\"";
Token token = getToken(&s, ' ');
while(token.top != NULL){
str = ft_strsub(token);
remove_escape(str);
if(*str == '"')//find it!
break;
free(str);
token = getToken(&s, ' ');
}
if(str){
printf("Found stuff inside quotes: ");
//remove "
size_t len = strlen(str);
str[len-1] = 0;
printf("'%s'\n", str + 1);//ignore first character or use memmove
free(str);
}
return 0;
}
Code snippet:
char str[] = "String1::String2:String3:String4::String5";
char *deli = "::";
char *token = strtok(str,deli);
while(token != NULL)
{
printf("Token= \"%s\"\n", token);
token=strtok(NULL,deli);
}
The above code snippet produces the output:
Token="String1"
Token="String2"
Token="String3"
Token="String4"
Token="String5"
but I want the output to be:
Token="String1"
Token="String2:String3:String4"
Token="String5"
I know that I am not getting the expected output because each character in the second argument of strtok is considered as a delimiter.
To get the expected output, I've written a program that uses strstr(and other things) to split the given string into tokens such that I get the expected output. Here is the program:
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int myStrtok(char* str,char* deli)
{
if(str==NULL || deli==NULL)
return -1;
int tokens=0;
char *token;
char *output=str;
while((token=strstr(output,deli))!=NULL)
{
bool print=true;
if(output != token)
{
printf("Token = \"");
tokens++;
print=false;
}
while(output != token)
{
putchar(*output);
output++;
}
if(print==false)
printf("\"\n");
output+=strlen(deli);
}
if(strlen(output)>0)
{
printf("Token = \"%s\"",output);
tokens++;
}
printf("\n\n");
return tokens;
}
int main(void)
{
char str[]="One:1:Two::Three::::";
char *deli="::";
int retval;
printf("Original string=\"%s\"\n\n",str);
if((retval=myStrtok(str,deli))==-1)
printf("The string or the delimeter is NULL\n");
else
printf("Number of tokens=%d\n", retval);
return(EXIT_SUCCESS);
}
The above program produces the expected output.
I'm wondering if there are any easier/simpler ways to do it. Are there any?
A string-delimiter function that uses strtok's prototype and mimicks its usage:
char *strtokm(char *str, const char *delim)
{
static char *tok;
static char *next;
char *m;
if (delim == NULL) return NULL;
tok = (str) ? str : next;
if (tok == NULL) return NULL;
m = strstr(tok, delim);
if (m) {
next = m + strlen(delim);
*m = '\0';
} else {
next = NULL;
}
return tok;
}
If you don't care about the same usage as strtok I would go with this:
// "String1::String2:String3:String4::String5" with delimiter "::" will produce
// "String1\0\0String2:String3:String4\0\0String5"
// And words should contain a pointer to the first S, the second S and the last S.
char **strToWordArray(char *str, const char *delimiter)
{
char **words;
int nwords = countWords(str, delimiter); //I let you decide how you want to do this
words = malloc(sizeof(*words) * (nwords + 1));
int w = 0;
int len = strlen(delimiter);
words[w++] = str;
while (*str != NULL)
{
if (strncmp(str, delimiter, len) == 0)
{
for (int i = 0; i < len; i++)
{
*(str++) = 0;
}
if (*str != 0)
words[w++] = str;
else
str--; //Anticipate wrong str++ down;
}
str++;
}
words[w] = NULL;
return words;
}
code derived from strsep https://code.woboq.org/userspace/glibc/string/strsep.c.html
char *strsepm( char **stringp, const char *delim ) {
char *begin, *end;
begin = *stringp;
if ( begin == NULL ) return NULL;
/* Find the end of the token. */
end = strstr( begin , delim );
if ( end != NULL ) {
/* Terminate the token and set *STRINGP past NUL character. */
*end = '\0';
end += strlen( delim );
*stringp = end;
} else {
/* No more delimiters; this is the last token. */
*stringp = NULL;
}
return begin;
}
int main( int argc , char *argv [] ) {
char *token_ptr;
char *token;
const char *delimiter = "&&";
char buffer [ 256 ];
strcpy( buffer , " && Hello && Bernd && waht's && going && on &&");
token_ptr = buffer;
while ( ( token = strsepm( &token_ptr , delimiter ) ) != NULL ) {
printf( "\'%s\'\n" , token );
}
}
Result:
' '
' Hello '
' Bernd '
' waht's '
' going '
' on '
''
Is there a better of parsing the below string instead of doing a strtok() to get each field.
"subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000"
Basically I want to retrieve the value for each field into another char buf's.
Here is my code. Just wanted to know if there is any other better way of doing it (any better string parsing algos)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#define SUBJECT "subject="
#define CC_LIST "cc="
#define SERVER "server="
static void
get_value (const char *tok, char **rval_buf, size_t field_len)
{
size_t val_size = 0;
if (!tok || !rval_buf)
return;
val_size = strlen(tok + field_len) + 1;
*rval_buf = calloc(1, val_size);
if (*rval_buf) {
strlcpy(*rval_buf, tok + field_len, val_size);
}
}
int
main (int argc, char **argv)
{
/* hard coded buf for testing */
char buf[] = "subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000";
char *subject_text = NULL;
char *cc_list = NULL;
char *server_addr = NULL;
char *tok = NULL;
int field_len = 0;
int val_len = 0;
tok = strtok(buf, "&");
while(tok) {
/*
* Handle the token
*/
/* check if it is subject */
if (strstr(tok, SUBJECT)) {
get_value(tok, &subject_text, strlen(SUBJECT));
} else if (strstr(tok, CC_LIST)) { /* check if it is CC */
get_value(tok, &cc_list, strlen(CC_LIST));
} else if (strstr(tok, SERVER)) { /* check if it is server */
get_value(tok, &server_addr, strlen(SERVER));
}
tok = strtok(NULL, "&");
}
/* dump data */
fprintf(stdout, "\nSUBJECT: \"%s\"\nCC_LIST: \"%s\"\nSERVER: \"%s\" \n\n",
subject_text, cc_list, server_addr);
return EXIT_SUCCESS;
}
strstr searches for one string ("the needle") inside another ("the haystack"), but you really only want to know whether the needle is the beginning of the haystack.
Here's a small suggestion: (requires #include <stdbool> or change the booleans to ints. I like bools.)
static bool
getval(const char* haystack, const char** res, const char* needle, size_t len) {
if (haystack && 0 == strncmp(haystack, needle, len)) {
*res = strdup(haystack + len);
return true;
}
return false;
}
and later:
for (tok = strtok(buf, "&"); tok; tok = strtok(NULL, "&")) {
getval(tok, &subject_text, SUBJECT, strlen(SUBJECT)) ||
getval(tok, &cc_list, CC_LIST, strlen(CC_LIST)) ||
getval(tok, &server_addr, SERVER, strlen(SERVER));
}
You can actually get away with doing the strlen inside of getval, which cuts down a lot on the noise, because most modern compilers are clever enough to inline getval and constant-fold the length of a constant string.
Use strtok()
char *strtok(char *str, const char *delim)
You can put '&' as a delimeter
I wrote a quick-n-dirty splitter for you:
int split(char* input, char delim, char*** parts)
{
int count = 1;
char** result;
char* t = input;
while(*t != '\0')
{
if (*t++ == delim)
{
count++;
}
}
result = (char**)malloc(count * sizeof(char*));
t = input;
int i = 0;
result[i] = input;
while(*t != '\0')
{
if (*t == delim)
{
*t = '\0';
result[++i] = ++t;
}
else
{
t++;
}
}
*parts = result;
return count;
}
int main()
{
char raw[] = "subject=\"some text\"&cc=abcd&server=acd.com";
char* str = _strdup(raw);
char** parts;
char** keyval;
int cnt = split(str, '&', &parts);
for(int i=0; i<cnt; ++i)
{
split(parts[i], '=', &keyval);
printf("[%d]: %s <--> %s\n", i, keyval[0], keyval[1]);
free(keyval);
}
free(parts);
getchar();
return 0;
}
Output
[0]: subject <--> "some text"
[1]: cc <--> abcd
[2]: server <--> acd.com
i use strtok_r like:
char *the_sting = "a|b||e|f";
char *last;
char *current;
current = (char*)strtok_r(the_sting, "|", &last);
while(current != NULL)
{
printf(current);
printf("\n");
current = (char*)strtok_r(NULL, "|", &last);
}
i get:
>>a
>>b
>>e
>>f
the problem is, that i need 'blank' when there is nothing between the delimiters.
like:
>>a
>>b
>>
>>e
>>f
Compare the current current with the previous current. If the difference is more than strlen(previous_current) + 1 then one or more empty places was skipped.
Then strtok_r is not your function, furthermore, you can't use a string literal char *the_sting = "a|b||e|f"; because strtok_r modifies such string, use an array instead char the_sting[] = "a|b||e|f";
And finally, do not use printf in this way printf(current); (is dangerous), instead:
printf("%s", current);
This little function does what you want:
#include <stdio.h>
#include <string.h>
char *scan(char **pp, char c)
{
char *s, *p;
p = strchr(*pp, c);
if (p) *p++ = '\0';
s = *pp;
*pp = p;
return s;
}
int main(void)
{
char the_sting[] = "a|b||e|f"; /* I think you mean the_string here */
char *s, *p = the_sting;
while (p) {
s = scan(&p, '|');
printf("<%s>", s);
}
return 0;
}
Note that a simple char (not a string) is used as delimiter
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *my_strtok_r(char *str, const char *delims, char **store){
char *p, *wk;
if(str != NULL){
*store = str;
}
if(*store == NULL) return NULL;
//*store += strspn(*store, delims);//skip delimiter
if(**store == '\0') return NULL;
p=strpbrk(wk=*store, delims);
if(p != NULL){
*p='\0';
*store = p + 1;
} else {
*store = NULL;
}
return wk;
}
int main(void){
char the_sting[] = "a|b||e|f";
char *last;
char *current;
current = my_strtok_r(the_sting, "|", &last);
while(current != NULL)
{
printf(current);
printf("\n");
current = my_strtok_r(NULL, "|", &last);
}
return 0;
}
It's easy to roll your own, really:
#include <string.h>
typedef struct {
const unsigned char *data;
size_t len;
} buffer_t;
/* Use strpbrk() for multiple delimiters. */
buffer_t
memtok(const void *s, size_t length, const char *delim, buffer_t *save_ptr)
{
const unsigned char *stream,
*token;
size_t len = 0;
if (NULL == s) {
stream = save_ptr->data;
} else {
stream = s;
save_ptr->len = length;
}
token = stream;
/* Advance until either a token is found or the stream exhausted. */
while (save_ptr->len--) {
if (memchr(delim, *stream, strlen(delim))) {
/* Point save_ptr past the (non-existent) token. */
save_ptr->data = stream + 1;
return (buffer_t) { .data = token, .len = len };
}
++len;
++stream;
}
/* State : done. */
*save_ptr = (buffer_t) { .data = NULL, .len = 0 };
/* Stream exhausted but no delimiters terminate it. */
return (buffer_t){ .data = token, .len = len };
}
and for a short test:
int main(int argc, char **argv)
{
const char *the_sting = "a|b||e|f";
buffer_t kek = { .data = the_sting, .len = 8 },
token, state;
token = memtok(the_sting, 8, "|", &state);
while (token.data != NULL) {
char test[512];
memcpy(test, token.data, token.len);
test[token.len] = 0;
printf("%s\n", test);
token = memtok(NULL, 0, "|", &state);
}
return 0;
}
how about this:
char s[] = "1,2,,,,,,,3,4,5,6";
char *tok, *saved;
tok = strtok_r(s, ",", &saved);
do
{
fprintf(stderr, "tok = %s, saved = %s\n", tok, saved);;
if (',' == *saved)
{
while (',' == *saved++ )
{
fprintf(stderr, "saved = %s\n", saved);;
}
*saved--;
}
} while( (tok = (strtok_r(((void *)0), ",", &saved))));